decoder: Fix valid SPS check in parsing SEI am: 61646846b3 am: 7f5da9039c am: e4237f852d
am: 824c19e4ca
Change-Id: I7b0970d485940b1c4c8eeea8f9cd316a84d1ac89
diff --git a/Android.bp b/Android.bp
index d74fb76..65d7127 100644
--- a/Android.bp
+++ b/Android.bp
@@ -390,10 +390,399 @@
},
sanitize: {
- cfi: true,
- diag: {
- cfi: true,
+ integer_overflow: true,
+ misc_undefined: ["bounds"],
+ // Enable CFI if this becomes a shared library.
+ // cfi: true,
+ blacklist: "libhevc_blacklist.txt",
+ },
+}
+
+cc_test {
+ name: "hevcdec",
+ cflags: [
+ "-DPROFILE_ENABLE",
+ "-DARM",
+ "-fPIC",
+ "-DMD5_DISABLE",
+ "-Wall",
+ "-Werror",
+ ],
+ srcs: ["test/decoder/main.c"],
+ static_libs: ["libhevcdec"],
+}
+
+cc_library_static {
+ name: "libhevcenc",
+ vendor_available: true,
+
+ cflags: [
+ "-DENABLE_MAIN_REXT_PROFILE",
+ "-fPIC",
+ "-O3",
+ "-Wall",
+ "-Wno-unused-variable",
+ "-Wno-unused-parameter",
+ "-Wno-switch",
+ ],
+
+ export_include_dirs: [
+ "encoder",
+ "common",
+ ],
+
+ srcs: [
+ "common/ihevc_cabac_tables.c",
+ "common/ihevc_chroma_intra_pred_filters.c",
+ "common/ihevc_chroma_itrans_recon.c",
+ "common/ihevc_chroma_itrans_recon_16x16.c",
+ "common/ihevc_chroma_itrans_recon_8x8.c",
+ "common/ihevc_common_tables.c",
+ "common/ihevc_deblk_edge_filter.c",
+ "common/ihevc_deblk_tables.c",
+ "common/ihevc_hbd_deblk_edge_filter.c",
+ "common/ihevc_inter_pred_filters.c",
+ "common/ihevc_intra_pred_filters.c",
+ "common/ihevc_iquant_recon.c",
+ "common/ihevc_itrans_recon.c",
+ "common/ihevc_itrans_recon_16x16.c",
+ "common/ihevc_itrans_recon_32x32.c",
+ "common/ihevc_itrans_recon_8x8.c",
+ "common/ihevc_mem_fns.c",
+ "common/ihevc_padding.c",
+ "common/ihevc_quant_iquant_ssd.c",
+ "common/ihevc_quant_tables.c",
+ "common/ihevc_resi_trans.c",
+ "common/ihevc_sao.c",
+ "common/ihevc_trans_tables.c",
+ "common/ihevc_weighted_pred.c",
+ "encoder/bit_allocation.c",
+ "encoder/cbr_buffer_control.c",
+ "encoder/common_rom.c",
+ "encoder/convert_float_to_fix.c",
+ "encoder/est_sad.c",
+ "encoder/fixed_point_error_bits.c",
+ "encoder/frame_info_collector.c",
+ "encoder/hme_coarse.c",
+ "encoder/hme_common_utils.c",
+ "encoder/hme_err_compute.c",
+ "encoder/hme_fullpel.c",
+ "encoder/hme_function_selector.c",
+ "encoder/hme_globals.c",
+ "encoder/hme_interface.c",
+ "encoder/hme_refine.c",
+ "encoder/hme_search_algo.c",
+ "encoder/hme_subpel.c",
+ "encoder/hme_utils.c",
+ "encoder/ihevce_bitstream.c",
+ "encoder/ihevce_bs_compute_ctb.c",
+ "encoder/ihevce_buffer_que.c",
+ "encoder/ihevce_cabac.c",
+ "encoder/ihevce_cabac_cu_pu.c",
+ "encoder/ihevce_cabac_rdo.c",
+ "encoder/ihevce_cabac_tu.c",
+ "encoder/ihevce_chroma_had_satd.c",
+ "encoder/ihevce_cmn_utils_instr_set_router.c",
+ "encoder/ihevce_coarse_me_pass.c",
+ "encoder/ihevce_common_utils.c",
+ "encoder/ihevce_deblk.c",
+ "encoder/ihevce_decomp_pre_intra_pass.c",
+ "encoder/ihevce_dep_mngr.c",
+ "encoder/ihevce_enc_cu_recursion.c",
+ "encoder/ihevce_enc_loop_inter_mode_sifter.c",
+ "encoder/ihevce_enc_loop_pass.c",
+ "encoder/ihevce_enc_loop_utils.c",
+ "encoder/ihevce_enc_sbh_funcs.c",
+ "encoder/ihevce_enc_subpel_gen.c",
+ "encoder/ihevce_encode_header.c",
+ "encoder/ihevce_encode_header_sei_vui.c",
+ "encoder/ihevce_entropy_cod.c",
+ "encoder/ihevce_entropy_interface.c",
+ "encoder/ihevce_error_check.c",
+ "encoder/ihevce_frame_process.c",
+ "encoder/ihevce_frame_process_utils.c",
+ "encoder/ihevce_function_selector.c",
+ "encoder/ihevce_global_tables.c",
+ "encoder/ihevce_had_satd.c",
+ "encoder/ihevce_hle_interface.c",
+ "encoder/ihevce_hle_q_func.c",
+ "encoder/ihevce_inter_pred.c",
+ "encoder/ihevce_ipe_instr_set_router.c",
+ "encoder/ihevce_ipe_pass.c",
+ "encoder/ihevce_lap_interface.c",
+ "encoder/ihevce_me_instr_set_router.c",
+ "encoder/ihevce_me_pass.c",
+ "encoder/ihevce_memory_init.c",
+ "encoder/ihevce_multi_thrd_funcs.c",
+ "encoder/ihevce_mv_pred.c",
+ "encoder/ihevce_mv_pred_merge.c",
+ "encoder/ihevce_nbr_avail.c",
+ "encoder/ihevce_plugin.c",
+ "encoder/ihevce_profile.c",
+ "encoder/ihevce_rc_interface.c",
+ "encoder/ihevce_recur_bracketing.c",
+ "encoder/ihevce_sao.c",
+ "encoder/ihevce_stasino_helpers.c",
+ "encoder/ihevce_sub_pic_rc.c",
+ "encoder/ihevce_sys_api.c",
+ "encoder/ihevce_tile_interface.c",
+ "encoder/ihevce_trace.c",
+ "encoder/ihevce_tu_tree_selector.c",
+ "encoder/init_qp.c",
+ "encoder/mb_model_based.c",
+ "encoder/osal.c",
+ "encoder/osal_cond_var.c",
+ "encoder/osal_error.c",
+ "encoder/osal_mutex.c",
+ "encoder/osal_semaphore.c",
+ "encoder/osal_thread.c",
+ "encoder/picture_type.c",
+ "encoder/rate_control_api.c",
+ "encoder/rc_rd_model.c",
+ "encoder/rc_rd_model_fix.c",
+ "encoder/rc_sad_acc.c",
+ "encoder/sqrt_interp.c",
+ "encoder/var_q_operator.c",
+ "encoder/vbr_storage_vbv.c",
+ "encoder/vbr_str_prms.c",
+ ],
+
+ arch: {
+ arm64: {
+
+ local_include_dirs: [
+ "encoder/arm",
+ "common/arm",
+ "common/arm64",
+ ],
+
+ srcs: [
+ "encoder/arm/ihevce_coarse_layer_sad_neon.c",
+ "encoder/arm/ihevce_common_utils_neon.c",
+ "encoder/arm/ihevce_copy_neon.c",
+ "encoder/arm/ihevce_had_compute_neon.c",
+ "encoder/arm/ihevce_hme_utils_neon.c",
+ "encoder/arm/ihevce_itrans_recon_neon.c",
+ "encoder/arm/ihevce_me_neon.c",
+ "encoder/arm/ihevce_sad_compute_neon.c",
+ "encoder/arm/ihevce_scale_by_2_neon.c",
+ "encoder/arm/ihevce_scan_coeffs_neon.c",
+ "encoder/arm/ihevce_ssd_and_sad_calculator_neon.c",
+ "encoder/arm/ihevce_ssd_calculator_neon.c",
+ "encoder/arm/ihevce_subpel_neon.c",
+ "common/arm/ihevc_resi_trans_neon.c",
+ "common/arm/ihevc_resi_trans_neon_32x32.c",
+ "common/arm/ihevc_quant_iquant_ssd_neon_intr.c",
+ "common/arm/ihevc_intra_pred_filters_neon_intr.c",
+ "common/arm/ihevc_weighted_pred_neon_intr.c",
+ "common/arm/ihevc_intra_ref_substitution_a9q.c",
+ "common/arm64/ihevc_deblk_chroma_horz.s",
+ "common/arm64/ihevc_deblk_chroma_vert.s",
+ "common/arm64/ihevc_deblk_luma_horz.s",
+ "common/arm64/ihevc_deblk_luma_vert.s",
+ "common/arm64/ihevc_inter_pred_chroma_copy.s",
+ "common/arm64/ihevc_inter_pred_chroma_copy_w16out.s",
+ "common/arm64/ihevc_inter_pred_chroma_horz.s",
+ "common/arm64/ihevc_inter_pred_chroma_horz_w16out.s",
+ "common/arm64/ihevc_inter_pred_chroma_vert.s",
+ "common/arm64/ihevc_inter_pred_chroma_vert_w16inp.s",
+ "common/arm64/ihevc_inter_pred_chroma_vert_w16inp_w16out.s",
+ "common/arm64/ihevc_inter_pred_chroma_vert_w16out.s",
+ "common/arm64/ihevc_inter_pred_filters_luma_horz.s",
+ "common/arm64/ihevc_inter_pred_filters_luma_vert.s",
+ "common/arm64/ihevc_inter_pred_filters_luma_vert_w16inp.s",
+ "common/arm64/ihevc_inter_pred_filters_luma_vert_w16out.s",
+ "common/arm64/ihevc_inter_pred_luma_copy.s",
+ "common/arm64/ihevc_inter_pred_luma_copy_w16out.s",
+ "common/arm64/ihevc_inter_pred_luma_horz_w16out.s",
+ "common/arm64/ihevc_inter_pred_luma_vert_w16inp_w16out.s",
+ "common/arm64/ihevc_intra_pred_chroma_dc.s",
+ "common/arm64/ihevc_intra_pred_chroma_horz.s",
+ "common/arm64/ihevc_intra_pred_chroma_mode2.s",
+ "common/arm64/ihevc_intra_pred_chroma_mode_18_34.s",
+ "common/arm64/ihevc_intra_pred_chroma_mode_27_to_33.s",
+ "common/arm64/ihevc_intra_pred_chroma_mode_3_to_9.s",
+ "common/arm64/ihevc_intra_pred_chroma_planar.s",
+ "common/arm64/ihevc_intra_pred_chroma_ver.s",
+ "common/arm64/ihevc_intra_pred_filters_chroma_mode_11_to_17.s",
+ "common/arm64/ihevc_intra_pred_filters_chroma_mode_19_to_25.s",
+ "common/arm64/ihevc_intra_pred_filters_luma_mode_11_to_17.s",
+ "common/arm64/ihevc_intra_pred_filters_luma_mode_19_to_25.s",
+ "common/arm64/ihevc_intra_pred_luma_dc.s",
+ "common/arm64/ihevc_intra_pred_luma_horz.s",
+ "common/arm64/ihevc_intra_pred_luma_mode2.s",
+ "common/arm64/ihevc_intra_pred_luma_mode_18_34.s",
+ "common/arm64/ihevc_intra_pred_luma_mode_27_to_33.s",
+ "common/arm64/ihevc_intra_pred_luma_mode_3_to_9.s",
+ "common/arm64/ihevc_intra_pred_luma_planar.s",
+ "common/arm64/ihevc_intra_pred_luma_vert.s",
+ "common/arm64/ihevc_itrans_recon_16x16.s",
+ "common/arm64/ihevc_itrans_recon_32x32.s",
+ "common/arm64/ihevc_itrans_recon_4x4.s",
+ "common/arm64/ihevc_itrans_recon_4x4_ttype1.s",
+ "common/arm64/ihevc_itrans_recon_8x8.s",
+ "common/arm64/ihevc_mem_fns.s",
+ "common/arm64/ihevc_padding.s",
+ "common/arm64/ihevc_sao_band_offset_chroma.s",
+ "common/arm64/ihevc_sao_band_offset_luma.s",
+ "common/arm64/ihevc_sao_edge_offset_class0.s",
+ "common/arm64/ihevc_sao_edge_offset_class0_chroma.s",
+ "common/arm64/ihevc_sao_edge_offset_class1.s",
+ "common/arm64/ihevc_sao_edge_offset_class1_chroma.s",
+ "common/arm64/ihevc_sao_edge_offset_class2.s",
+ "common/arm64/ihevc_sao_edge_offset_class2_chroma.s",
+ "common/arm64/ihevc_sao_edge_offset_class3.s",
+ "common/arm64/ihevc_sao_edge_offset_class3_chroma.s",
+ "common/arm64/ihevc_weighted_pred_bi.s",
+ "common/arm64/ihevc_weighted_pred_bi_default.s",
+ "common/arm64/ihevc_weighted_pred_uni.s",
+ ],
+
+ cflags: [
+ "-DENABLE_NEON", "-DARMV8", "-DARM",
+ ],
},
- blacklist: "cfi_blacklist.txt",
+
+ arm: {
+ local_include_dirs: [
+ "encoder/arm",
+ "common/arm",
+ ],
+
+ instruction_set: "arm",
+
+ neon: {
+ srcs: [
+ "encoder/arm/ihevce_coarse_layer_sad_neon.c",
+ "encoder/arm/ihevce_common_utils_neon.c",
+ "encoder/arm/ihevce_copy_neon.c",
+ "encoder/arm/ihevce_had_compute_neon.c",
+ "encoder/arm/ihevce_hme_utils_neon.c",
+ "encoder/arm/ihevce_itrans_recon_neon.c",
+ "encoder/arm/ihevce_me_neon.c",
+ "encoder/arm/ihevce_sad_compute_neon.c",
+ "encoder/arm/ihevce_scale_by_2_neon.c",
+ "encoder/arm/ihevce_scan_coeffs_neon.c",
+ "encoder/arm/ihevce_ssd_and_sad_calculator_neon.c",
+ "encoder/arm/ihevce_ssd_calculator_neon.c",
+ "encoder/arm/ihevce_subpel_neon.c",
+ "common/arm/ihevc_resi_trans_neon.c",
+ "common/arm/ihevc_resi_trans_neon_32x32.c",
+ "common/arm/ihevc_quant_iquant_ssd_neon_intr.c",
+ "common/arm/ihevc_intra_pred_filters_neon_intr.c",
+ "common/arm/ihevc_weighted_pred_neon_intr.c",
+ "common/arm/ihevc_intra_ref_substitution_a9q.c",
+ "common/arm/ihevc_deblk_chroma_horz.s",
+ "common/arm/ihevc_deblk_chroma_vert.s",
+ "common/arm/ihevc_deblk_luma_horz.s",
+ "common/arm/ihevc_deblk_luma_vert.s",
+ "common/arm/ihevc_inter_pred_chroma_copy.s",
+ "common/arm/ihevc_inter_pred_chroma_copy_w16out.s",
+ "common/arm/ihevc_inter_pred_chroma_horz.s",
+ "common/arm/ihevc_inter_pred_chroma_horz_w16out.s",
+ "common/arm/ihevc_inter_pred_chroma_vert.s",
+ "common/arm/ihevc_inter_pred_chroma_vert_w16inp.s",
+ "common/arm/ihevc_inter_pred_chroma_vert_w16inp_w16out.s",
+ "common/arm/ihevc_inter_pred_chroma_vert_w16out.s",
+ "common/arm/ihevc_inter_pred_filters_luma_horz.s",
+ "common/arm/ihevc_inter_pred_filters_luma_vert.s",
+ "common/arm/ihevc_inter_pred_filters_luma_vert_w16inp.s",
+ "common/arm/ihevc_inter_pred_luma_copy.s",
+ "common/arm/ihevc_inter_pred_luma_copy_w16out.s",
+ "common/arm/ihevc_inter_pred_luma_horz_w16out.s",
+ "common/arm/ihevc_inter_pred_luma_vert_w16inp_w16out.s",
+ "common/arm/ihevc_intra_pred_chroma_dc.s",
+ "common/arm/ihevc_intra_pred_chroma_horz.s",
+ "common/arm/ihevc_intra_pred_chroma_mode2.s",
+ "common/arm/ihevc_intra_pred_chroma_mode_18_34.s",
+ "common/arm/ihevc_intra_pred_chroma_mode_27_to_33.s",
+ "common/arm/ihevc_intra_pred_chroma_mode_3_to_9.s",
+ "common/arm/ihevc_intra_pred_chroma_planar.s",
+ "common/arm/ihevc_intra_pred_chroma_ver.s",
+ "common/arm/ihevc_intra_pred_filters_chroma_mode_11_to_17.s",
+ "common/arm/ihevc_intra_pred_filters_chroma_mode_19_to_25.s",
+ "common/arm/ihevc_intra_pred_filters_luma_mode_11_to_17.s",
+ "common/arm/ihevc_intra_pred_filters_luma_mode_19_to_25.s",
+ "common/arm/ihevc_intra_pred_luma_dc.s",
+ "common/arm/ihevc_intra_pred_luma_horz.s",
+ "common/arm/ihevc_intra_pred_luma_mode2.s",
+ "common/arm/ihevc_intra_pred_luma_mode_18_34.s",
+ "common/arm/ihevc_intra_pred_luma_mode_27_to_33.s",
+ "common/arm/ihevc_intra_pred_luma_mode_3_to_9.s",
+ "common/arm/ihevc_intra_pred_luma_planar.s",
+ "common/arm/ihevc_intra_pred_luma_vert.s",
+ "common/arm/ihevc_itrans_recon_16x16.s",
+ "common/arm/ihevc_itrans_recon_32x32.s",
+ "common/arm/ihevc_itrans_recon_4x4.s",
+ "common/arm/ihevc_itrans_recon_4x4_ttype1.s",
+ "common/arm/ihevc_itrans_recon_8x8.s",
+ "common/arm/ihevc_resi_trans.s",
+ "common/arm/ihevc_resi_trans_32x32_a9q.s",
+ "common/arm/ihevc_mem_fns.s",
+ "common/arm/ihevc_padding.s",
+ "common/arm/ihevc_sao_band_offset_chroma.s",
+ "common/arm/ihevc_sao_band_offset_luma.s",
+ "common/arm/ihevc_sao_edge_offset_class0.s",
+ "common/arm/ihevc_sao_edge_offset_class0_chroma.s",
+ "common/arm/ihevc_sao_edge_offset_class1.s",
+ "common/arm/ihevc_sao_edge_offset_class1_chroma.s",
+ "common/arm/ihevc_sao_edge_offset_class2.s",
+ "common/arm/ihevc_sao_edge_offset_class2_chroma.s",
+ "common/arm/ihevc_sao_edge_offset_class3.s",
+ "common/arm/ihevc_sao_edge_offset_class3_chroma.s",
+ "common/arm/ihevc_weighted_pred_bi_default.s",
+ "common/arm/ihevc_weighted_pred_bi.s",
+ "common/arm/ihevc_weighted_pred_uni.s",
+ ],
+
+ cflags: [
+ "-DENABLE_NEON", "-DARM",
+ ],
+ },
+ },
+
+ x86_64: {
+ local_include_dirs: [
+ "common/x86",
+ ],
+ },
+
+ x86: {
+ local_include_dirs: [
+ "common/x86",
+ ],
+ }
+
+ },
+ sanitize: {
+ integer_overflow: true,
+ misc_undefined: ["bounds"],
+ // Enable CFI if this becomes a shared library.
+ // cfi: true,
+ diag: {
+ integer_overflow: true,
+ misc_undefined: ["bounds"],
+ },
+ blacklist: "libhevc_blacklist.txt",
+ },
+}
+
+cc_test {
+ name: "hevcenc",
+ cflags: [
+ "-DARM",
+ "-fPIC",
+ "-Wall",
+ "-Werror",
+ ],
+ srcs: ["test/encoder/main.c"],
+ static_libs: ["libhevcenc"],
+ sanitize: {
+ integer_overflow: true,
+ misc_undefined: ["bounds"],
+ diag: {
+ integer_overflow: true,
+ misc_undefined: ["bounds"],
+ },
},
}
diff --git a/common/arm/ihevc_cmn_utils_neon.h b/common/arm/ihevc_cmn_utils_neon.h
new file mode 100644
index 0000000..488ee36
--- /dev/null
+++ b/common/arm/ihevc_cmn_utils_neon.h
@@ -0,0 +1,371 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ihevc_cmn_utils_neon.h
+*
+* @brief
+* Structure definitions used in the decoder
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef _IHEVC_CMN_UTILS_NEON_H_
+#define _IHEVC_CMN_UTILS_NEON_H_
+
+#include <arm_neon.h>
+#include "ihevc_platform_macros.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+static INLINE uint8x16_t load_unaligned_u8q(const uint8_t *buf, int stride)
+{
+ uint8_t a[16];
+
+ if(stride == 4)
+ return vld1q_u8(buf);
+ memcpy(a, buf, 4);
+ buf += stride;
+ memcpy(a + 4, buf, 4);
+ buf += stride;
+ memcpy(a + 8, buf, 4);
+ buf += stride;
+ memcpy(a + 12, buf, 4);
+ return vld1q_u8(a);
+}
+
+static INLINE uint8x16_t load_unaligned_u8qi(const uint8_t *buf, int stride)
+{
+ uint8_t a[16];
+ uint8_t *b = a;
+ int j;
+
+ for(j = 0; j < 4; j++)
+ {
+ b[0] = buf[0];
+ b[1] = buf[2];
+ b[2] = buf[4];
+ b[3] = buf[6];
+ buf += stride;
+ b += 4;
+ }
+ return vld1q_u8(a);
+}
+
+static INLINE void store_unaligned_u8q(uint8_t *buf, int stride, uint8x16_t b0)
+{
+ uint8_t a[16];
+
+ vst1q_u8(a, b0);
+ memcpy(buf, a, 4);
+ buf += stride;
+ memcpy(buf, a + 4, 4);
+ buf += stride;
+ memcpy(buf, a + 8, 4);
+ buf += stride;
+ memcpy(buf, a + 12, 4);
+}
+
+static INLINE int16x8x2_t vtrnq_s64_to_s16(int32x4_t a0, int32x4_t a1)
+{
+ int16x8x2_t b0;
+
+ b0.val[0] = vcombine_s16(
+ vreinterpret_s16_s32(vget_low_s32(a0)), vreinterpret_s16_s32(vget_low_s32(a1)));
+ b0.val[1] = vcombine_s16(
+ vreinterpret_s16_s32(vget_high_s32(a0)), vreinterpret_s16_s32(vget_high_s32(a1)));
+ return b0;
+}
+
+static INLINE void transpose_s16_4x4d(int16x4_t *a0, int16x4_t *a1, int16x4_t *a2, int16x4_t *a3)
+{
+ // Swap 16 bit elements. Goes from:
+ // a0: 00 01 02 03
+ // a1: 10 11 12 13
+ // a2: 20 21 22 23
+ // a3: 30 31 32 33
+ // to:
+ // b0.val[0]: 00 10 02 12
+ // b0.val[1]: 01 11 03 13
+ // b1.val[0]: 20 30 22 32
+ // b1.val[1]: 21 31 23 33
+
+ const int16x4x2_t b0 = vtrn_s16(*a0, *a1);
+ const int16x4x2_t b1 = vtrn_s16(*a2, *a3);
+
+ // Swap 32 bit elements resulting in:
+ // c0.val[0]: 00 10 20 30
+ // c0.val[1]: 02 12 22 32
+ // c1.val[0]: 01 11 21 31
+ // c1.val[1]: 03 13 23 33
+
+ const int32x2x2_t c0 =
+ vtrn_s32(vreinterpret_s32_s16(b0.val[0]), vreinterpret_s32_s16(b1.val[0]));
+ const int32x2x2_t c1 =
+ vtrn_s32(vreinterpret_s32_s16(b0.val[1]), vreinterpret_s32_s16(b1.val[1]));
+
+ *a0 = vreinterpret_s16_s32(c0.val[0]);
+ *a1 = vreinterpret_s16_s32(c1.val[0]);
+ *a2 = vreinterpret_s16_s32(c0.val[1]);
+ *a3 = vreinterpret_s16_s32(c1.val[1]);
+}
+
+static INLINE void transpose_s16_4x4q(int16x8_t *a0, int16x8_t *a1, int16x8_t *a2, int16x8_t *a3)
+{
+ // Swap 16 bit elements. Goes from:
+ // a0: 00 01 02 03 04 05 06 07
+ // a1: 10 11 12 13 14 15 16 17
+ // a2: 20 21 22 23 24 25 26 27
+ // a3: 30 31 32 33 34 35 36 37
+ // to:
+ // b0.val[0]: 00 10 02 12 04 14 06 16
+ // b0.val[1]: 01 11 03 13 05 15 07 17
+ // b1.val[0]: 20 30 22 32 24 34 26 36
+ // b1.val[1]: 21 31 23 33 25 35 27 37
+
+ const int16x8x2_t b0 = vtrnq_s16(*a0, *a1);
+ const int16x8x2_t b1 = vtrnq_s16(*a2, *a3);
+
+ // Swap 32 bit elements resulting in:
+ // c0.val[0]: 00 10 20 30 04 14 24 34
+ // c0.val[1]: 02 12 22 32 05 15 25 35
+ // c1.val[0]: 01 11 21 31 06 16 26 36
+ // c1.val[1]: 03 13 23 33 07 17 27 37
+
+ const int32x4x2_t c0 =
+ vtrnq_s32(vreinterpretq_s32_s16(b0.val[0]), vreinterpretq_s32_s16(b1.val[0]));
+ const int32x4x2_t c1 =
+ vtrnq_s32(vreinterpretq_s32_s16(b0.val[1]), vreinterpretq_s32_s16(b1.val[1]));
+
+ *a0 = vreinterpretq_s16_s32(c0.val[0]);
+ *a1 = vreinterpretq_s16_s32(c1.val[0]);
+ *a2 = vreinterpretq_s16_s32(c0.val[1]);
+ *a3 = vreinterpretq_s16_s32(c1.val[1]);
+}
+
+static INLINE void transpose_s16_8x8(
+ int16x8_t *a0,
+ int16x8_t *a1,
+ int16x8_t *a2,
+ int16x8_t *a3,
+ int16x8_t *a4,
+ int16x8_t *a5,
+ int16x8_t *a6,
+ int16x8_t *a7)
+{
+ // Swap 16 bit elements. Goes from:
+ // a0: 00 01 02 03 04 05 06 07
+ // a1: 10 11 12 13 14 15 16 17
+ // a2: 20 21 22 23 24 25 26 27
+ // a3: 30 31 32 33 34 35 36 37
+ // a4: 40 41 42 43 44 45 46 47
+ // a5: 50 51 52 53 54 55 56 57
+ // a6: 60 61 62 63 64 65 66 67
+ // a7: 70 71 72 73 74 75 76 77
+ // to:
+ // b0.val[0]: 00 10 02 12 04 14 06 16
+ // b0.val[1]: 01 11 03 13 05 15 07 17
+ // b1.val[0]: 20 30 22 32 24 34 26 36
+ // b1.val[1]: 21 31 23 33 25 35 27 37
+ // b2.val[0]: 40 50 42 52 44 54 46 56
+ // b2.val[1]: 41 51 43 53 45 55 47 57
+ // b3.val[0]: 60 70 62 72 64 74 66 76
+ // b3.val[1]: 61 71 63 73 65 75 67 77
+ int16x8x2_t b0, b1, b2, b3, d0, d1, d2, d3;
+ int32x4x2_t c0, c1, c2, c3;
+
+ b0 = vtrnq_s16(*a0, *a1);
+ b1 = vtrnq_s16(*a2, *a3);
+ b2 = vtrnq_s16(*a4, *a5);
+ b3 = vtrnq_s16(*a6, *a7);
+
+ // Swap 32 bit elements resulting in:
+ // c0.val[0]: 00 10 20 30 04 14 24 34
+ // c0.val[1]: 02 12 22 32 06 16 26 36
+ // c1.val[0]: 01 11 21 31 05 15 25 35
+ // c1.val[1]: 03 13 23 33 07 17 27 37
+ // c2.val[0]: 40 50 60 70 44 54 64 74
+ // c2.val[1]: 42 52 62 72 46 56 66 76
+ // c3.val[0]: 41 51 61 71 45 55 65 75
+ // c3.val[1]: 43 53 63 73 47 57 67 77
+
+ c0 = vtrnq_s32(vreinterpretq_s32_s16(b0.val[0]), vreinterpretq_s32_s16(b1.val[0]));
+ c1 = vtrnq_s32(vreinterpretq_s32_s16(b0.val[1]), vreinterpretq_s32_s16(b1.val[1]));
+ c2 = vtrnq_s32(vreinterpretq_s32_s16(b2.val[0]), vreinterpretq_s32_s16(b3.val[0]));
+ c3 = vtrnq_s32(vreinterpretq_s32_s16(b2.val[1]), vreinterpretq_s32_s16(b3.val[1]));
+
+ // Swap 64 bit elements resulting in:
+ // d0.val[0]: 00 10 20 30 40 50 60 70
+ // d0.val[1]: 04 14 24 34 44 54 64 74
+ // d1.val[0]: 01 11 21 31 41 51 61 71
+ // d1.val[1]: 05 15 25 35 45 55 65 75
+ // d2.val[0]: 02 12 22 32 42 52 62 72
+ // d2.val[1]: 06 16 26 36 46 56 66 76
+ // d3.val[0]: 03 13 23 33 43 53 63 73
+ // d3.val[1]: 07 17 27 37 47 57 67 77
+
+ d0 = vtrnq_s64_to_s16(c0.val[0], c2.val[0]);
+ d1 = vtrnq_s64_to_s16(c1.val[0], c3.val[0]);
+ d2 = vtrnq_s64_to_s16(c0.val[1], c2.val[1]);
+ d3 = vtrnq_s64_to_s16(c1.val[1], c3.val[1]);
+
+ *a0 = d0.val[0];
+ *a1 = d1.val[0];
+ *a2 = d2.val[0];
+ *a3 = d3.val[0];
+ *a4 = d0.val[1];
+ *a5 = d1.val[1];
+ *a6 = d2.val[1];
+ *a7 = d3.val[1];
+}
+
+static INLINE int32x4x2_t vtrnq_s64_to_s32(int32x4_t a0, int32x4_t a1)
+{
+ int32x4x2_t b0;
+ b0.val[0] = vcombine_s32(vget_low_s32(a0), vget_low_s32(a1));
+ b0.val[1] = vcombine_s32(vget_high_s32(a0), vget_high_s32(a1));
+ return b0;
+}
+
+static INLINE void transpose_s32_4x4(int32x4_t *a0, int32x4_t *a1, int32x4_t *a2, int32x4_t *a3)
+{
+ // Swap 32 bit elements. Goes from:
+ // a0: 00 01 02 03
+ // a1: 10 11 12 13
+ // a2: 20 21 22 23
+ // a3: 30 31 32 33
+ // to:
+ // b0.val[0]: 00 10 02 12
+ // b0.val[1]: 01 11 03 13
+ // b1.val[0]: 20 30 22 32
+ // b1.val[1]: 21 31 23 33
+
+ const int32x4x2_t b0 = vtrnq_s32(*a0, *a1);
+ const int32x4x2_t b1 = vtrnq_s32(*a2, *a3);
+
+ // Swap 64 bit elements resulting in:
+ // c0.val[0]: 00 10 20 30
+ // c0.val[1]: 02 12 22 32
+ // c1.val[0]: 01 11 21 31
+ // c1.val[1]: 03 13 23 33
+
+ const int32x4x2_t c0 = vtrnq_s64_to_s32(b0.val[0], b1.val[0]);
+ const int32x4x2_t c1 = vtrnq_s64_to_s32(b0.val[1], b1.val[1]);
+
+ *a0 = c0.val[0];
+ *a1 = c1.val[0];
+ *a2 = c0.val[1];
+ *a3 = c1.val[1];
+}
+
+static INLINE void transpose_s32_8x8(
+ int32x4x2_t *a0,
+ int32x4x2_t *a1,
+ int32x4x2_t *a2,
+ int32x4x2_t *a3,
+ int32x4x2_t *a4,
+ int32x4x2_t *a5,
+ int32x4x2_t *a6,
+ int32x4x2_t *a7)
+{
+ // Swap 32 bit elements. Goes from:
+ // a0: 00 01 02 03 04 05 06 07
+ // a1: 10 11 12 13 14 15 16 17
+ // a2: 20 21 22 23 24 25 26 27
+ // a3: 30 31 32 33 34 35 36 37
+ // a4: 40 41 42 43 44 45 46 47
+ // a5: 50 51 52 53 54 55 56 57
+ // a6: 60 61 62 63 64 65 66 67
+ // a7: 70 71 72 73 74 75 76 77
+ // to:
+ // b0: 00 10 02 12 01 11 03 13
+ // b1: 20 30 22 32 21 31 23 33
+ // b2: 40 50 42 52 41 51 43 53
+ // b3: 60 70 62 72 61 71 63 73
+ // b4: 04 14 06 16 05 15 07 17
+ // b5: 24 34 26 36 25 35 27 37
+ // b6: 44 54 46 56 45 55 47 57
+ // b7: 64 74 66 76 65 75 67 77
+
+ const int32x4x2_t b0 = vtrnq_s32(a0->val[0], a1->val[0]);
+ const int32x4x2_t b1 = vtrnq_s32(a2->val[0], a3->val[0]);
+ const int32x4x2_t b2 = vtrnq_s32(a4->val[0], a5->val[0]);
+ const int32x4x2_t b3 = vtrnq_s32(a6->val[0], a7->val[0]);
+ const int32x4x2_t b4 = vtrnq_s32(a0->val[1], a1->val[1]);
+ const int32x4x2_t b5 = vtrnq_s32(a2->val[1], a3->val[1]);
+ const int32x4x2_t b6 = vtrnq_s32(a4->val[1], a5->val[1]);
+ const int32x4x2_t b7 = vtrnq_s32(a6->val[1], a7->val[1]);
+
+ // Swap 64 bit elements resulting in:
+ // c0: 00 10 20 30 02 12 22 32
+ // c1: 01 11 21 31 03 13 23 33
+ // c2: 40 50 60 70 42 52 62 72
+ // c3: 41 51 61 71 43 53 63 73
+ // c4: 04 14 24 34 06 16 26 36
+ // c5: 05 15 25 35 07 17 27 37
+ // c6: 44 54 64 74 46 56 66 76
+ // c7: 45 55 65 75 47 57 67 77
+ const int32x4x2_t c0 = vtrnq_s64_to_s32(b0.val[0], b1.val[0]);
+ const int32x4x2_t c1 = vtrnq_s64_to_s32(b0.val[1], b1.val[1]);
+ const int32x4x2_t c2 = vtrnq_s64_to_s32(b2.val[0], b3.val[0]);
+ const int32x4x2_t c3 = vtrnq_s64_to_s32(b2.val[1], b3.val[1]);
+ const int32x4x2_t c4 = vtrnq_s64_to_s32(b4.val[0], b5.val[0]);
+ const int32x4x2_t c5 = vtrnq_s64_to_s32(b4.val[1], b5.val[1]);
+ const int32x4x2_t c6 = vtrnq_s64_to_s32(b6.val[0], b7.val[0]);
+ const int32x4x2_t c7 = vtrnq_s64_to_s32(b6.val[1], b7.val[1]);
+
+ // Swap 128 bit elements resulting in:
+ // a0: 00 10 20 30 40 50 60 70
+ // a1: 01 11 21 31 41 51 61 71
+ // a2: 02 12 22 32 42 52 62 72
+ // a3: 03 13 23 33 43 53 63 73
+ // a4: 04 14 24 34 44 54 64 74
+ // a5: 05 15 25 35 45 55 65 75
+ // a6: 06 16 26 36 46 56 66 76
+ // a7: 07 17 27 37 47 57 67 77
+ a0->val[0] = c0.val[0];
+ a0->val[1] = c2.val[0];
+ a1->val[0] = c1.val[0];
+ a1->val[1] = c3.val[0];
+ a2->val[0] = c0.val[1];
+ a2->val[1] = c2.val[1];
+ a3->val[0] = c1.val[1];
+ a3->val[1] = c3.val[1];
+ a4->val[0] = c4.val[0];
+ a4->val[1] = c6.val[0];
+ a5->val[0] = c5.val[0];
+ a5->val[1] = c7.val[0];
+ a6->val[0] = c4.val[1];
+ a6->val[1] = c6.val[1];
+ a7->val[0] = c5.val[1];
+ a7->val[1] = c7.val[1];
+}
+#endif /* _IHEVC_CMN_UTILS_NEON_H_ */
diff --git a/common/arm/ihevc_platform_macros.h b/common/arm/ihevc_platform_macros.h
index d6c4b48..966ffc5 100644
--- a/common/arm/ihevc_platform_macros.h
+++ b/common/arm/ihevc_platform_macros.h
@@ -71,6 +71,18 @@
return x;
}
+static __inline WORD32 CLIP_U14(WORD32 x)
+{
+ asm("usat %0, #14, %1" : "=r"(x) : "r"(x));
+ return x;
+}
+
+static __inline WORD32 CLIP_S14(WORD32 x)
+{
+ asm("ssat %0, #14, %1" : "=r"(x) : "r"(x));
+ return x;
+}
+
static __inline WORD32 CLIP_U16(WORD32 x)
{
asm("usat %0, #16, %1" : "=r"(x) : "r"(x));
@@ -99,6 +111,9 @@
#define CLIP_U12(x) CLIP3((x), 0, 4095);
#define CLIP_S12(x) CLIP3((x), -2048, 2047);
+#define CLIP_U14(x) CLIP3((x), 0, 16383);
+#define CLIP_S14(x) CLIP3((x), -8192, 8191);
+
#define CLIP_U16(x) CLIP3((x), 0, 65535)
#define CLIP_S16(x) CLIP3((x), -32768, 32767)
@@ -123,7 +138,7 @@
if(u4_word)
return (__builtin_clz(u4_word));
else
- return 32;
+ return 31;
}
static INLINE UWORD32 CLZNZ(UWORD32 u4_word)
@@ -197,28 +212,27 @@
} \
}
-#if 0 /* Equivalent C code for GETRANGE */
-#define GETRANGE(r,word) \
-{ \
- UWORD32 temp; \
- r = 0; \
- temp = (UWORD32)word; \
- if(0 == word) \
- r = 1; \
- else \
- { \
- while(temp) \
- { \
- temp >>= 1; \
- r++; \
- } \
- }\
+
+/**
+*****************************************************************************************************
+* @brief returns max number of bits required to represent input unsigned long long word (max 64bits)
+*****************************************************************************************************
+*/
+#define GETRANGE64(r,llword) \
+{ \
+ if(llword) \
+ { \
+ r = 64 - __builtin_clzll(llword); \
+ } \
+ else \
+ { \
+ r = 1; \
+ } \
}
-#endif
-#define NOP(nop_cnt) {UWORD32 nop_i; for (nop_i = (nop_cnt) ; nop_i > 0 ; nop_i--) asm("nop");}
+#define NOP(nop_cnt) {UWORD32 nop_i; for (nop_i = 0; nop_i < nop_cnt; nop_i++) asm("nop");}
diff --git a/common/arm/ihevc_quant_iquant_ssd_neon_intr.c b/common/arm/ihevc_quant_iquant_ssd_neon_intr.c
new file mode 100644
index 0000000..6082c87
--- /dev/null
+++ b/common/arm/ihevc_quant_iquant_ssd_neon_intr.c
@@ -0,0 +1,697 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ihevc_quant_iquant_ssd_neon_intr.c
+*
+* @brief
+* Contains function definitions for quantization, followed by Inverse
+* quantization to find transform domain SSD
+*
+* @author
+* 100736
+*
+* @par List of Functions:
+* - ihevc_quant_iquant_ssd_flat_scale_mat_neon()
+* - ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_neon()
+*
+* @remarks
+*
+*
+*******************************************************************************
+*/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "ihevc_macros.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_defs.h"
+#include "ihevc_debug.h"
+#include "ihevc_trans_tables.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_func_selector.h"
+#include "ihevc_trans_macros.h"
+#include "arm_neon.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+WORD32 ihevc_quant_iquant_ssd_flat_scale_mat_neon(
+ WORD16 *pi2_coeffs,
+ WORD16 *pi2_quant_coeff,
+ WORD16 *pi2_q_dst,
+ WORD16 *pi2_iq_dst,
+ WORD32 trans_size,
+ WORD32 qp_div,
+ WORD32 qp_rem,
+ WORD32 q_add,
+ WORD32 *pi4_quant_round_factor_0_1,
+ WORD32 *pi4_quant_round_factor_1_2,
+ WORD32 src_strd,
+ WORD32 dst_q_strd,
+ WORD32 dst_iq_strd,
+ UWORD8 *csbf,
+ WORD32 csbf_strd,
+ WORD32 *zero_col,
+ WORD32 *zero_row,
+ WORD16 *pi2_dequant_coeff,
+ LWORD64 *pi8_cost)
+{
+ WORD32 i, j;
+ WORD32 log2_size;
+ WORD32 cbf = 0;
+
+ WORD16 qm = 4;
+ WORD16 bd = 8;
+ WORD32 q_bits, tr, temp;
+ WORD32 block_col = 0;
+ WORD32 block_row = 0;
+ WORD32 temp_zero_col = 0;
+ WORD32 temp_zero_row = 0;
+
+ WORD32 sh;
+ WORD32 s_iq;
+ WORD32 sh_tmp;
+
+ // ssd
+ int32x4_t ssd0 = vdupq_n_s32(0);
+ int32x2_t ssd1;
+ WORD32 ssd;
+ // const
+ const int16x4_t zero = vdup_n_s16(0);
+ const int16x4_t zero_d = vdup_n_s16(0);
+ const int16x4_t sq = vdup_n_s16(g_ihevc_quant_scales[qp_rem]);
+ const int16x4_t siq = vdup_n_s16((g_ihevc_iquant_scales_flat_scale[qp_rem]));
+ // src
+ int16x4_t s0, s1, s2, s3;
+ // q-iq
+ int16x4_t q0, q1, q2, q3;
+ int16x4_t iq0, iq1, iq2, iq3;
+ // residue
+ int32x4_t r0, r1, r2, r3;
+ // sign
+ uint16x4_t psgn0, psgn1, psgn2, psgn3;
+ uint16x4_t nsgn0, nsgn1, nsgn2, nsgn3;
+ // abs(src)
+ int16x4_t abs_s0, abs_s1, abs_s2, abs_s3;
+ // q-temp
+ int32x4_t qtmp_0, qtmp_1, qtmp_2, qtmp_3;
+ int16x4_t pq0, pq1, pq2, pq3;
+ int16x4_t nq0, nq1, nq2, nq3;
+ // iq-temp
+ int32x4_t iqtmp_0, iqtmp_1, iqtmp_2, iqtmp_3;
+
+ int32x4_t add_q;
+ int32x4_t add_iq = vdupq_n_s32(1);
+ int32x4_t sh_iq_1;
+ int32x4_t sh_iq;
+ int32x4_t q_v_bits;
+
+ (void)pi4_quant_round_factor_0_1;
+ (void)pi4_quant_round_factor_1_2;
+ (void)pi2_dequant_coeff;
+
+ GETRANGE(log2_size, trans_size);
+ log2_size -= 1;
+
+ tr = MAX_TR_DYNAMIC_RANGE - bd - log2_size;
+ q_bits = QUANT_SHIFT + qp_div + tr + SCALING_Q_SHIFT - qm - FLAT_RESCALE_MAT_Q_SHIFT;
+ temp = (((WORD32)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q));
+
+ q_v_bits = vdupq_n_s32(-q_bits);
+ add_q = vdupq_n_s32(temp);
+
+ sh = bd + log2_size - 5;
+
+ sh_tmp = (sh - qp_div - 1);
+ sh_iq_1 = vdupq_n_s32(sh_tmp);
+ add_iq = vshlq_s32(add_iq, sh_iq_1);
+
+ s_iq = (-(sh - qp_div));
+ sh_iq = vdupq_n_s32(s_iq);
+
+ for(i = 0; i < trans_size; i += 4)
+ {
+ for(j = 0; j < trans_size; j += 4)
+ {
+ s0 = vld1_s16(pi2_coeffs + j);
+ s1 = vld1_s16(pi2_coeffs + j + (src_strd));
+ s2 = vld1_s16(pi2_coeffs + j + (2 * src_strd));
+ s3 = vld1_s16(pi2_coeffs + j + (3 * src_strd));
+
+ /* quantization */
+ /* sign */
+ psgn0 = vcge_s16(s0, zero);
+ psgn1 = vcge_s16(s1, zero);
+ psgn2 = vcge_s16(s2, zero);
+ psgn3 = vcge_s16(s3, zero);
+
+ nsgn0 = vclt_s16(s0, zero);
+ nsgn1 = vclt_s16(s1, zero);
+ nsgn2 = vclt_s16(s2, zero);
+ nsgn3 = vclt_s16(s3, zero);
+
+ /* |src| */
+ abs_s0 = vabs_s16(s0);
+ abs_s1 = vabs_s16(s1);
+ abs_s2 = vabs_s16(s2);
+ abs_s3 = vabs_s16(s3);
+
+ /* tmp = tmp * quant_coeff */
+ qtmp_0 = vmull_s16(abs_s0, sq);
+ qtmp_1 = vmull_s16(abs_s1, sq);
+ qtmp_2 = vmull_s16(abs_s2, sq);
+ qtmp_3 = vmull_s16(abs_s3, sq);
+
+ /* tmp += (((WORD32)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q)) */
+ qtmp_0 = vaddq_s32(qtmp_0, add_q);
+ qtmp_1 = vaddq_s32(qtmp_1, add_q);
+ qtmp_2 = vaddq_s32(qtmp_2, add_q);
+ qtmp_3 = vaddq_s32(qtmp_3, add_q);
+
+ /* tmp >>= q_bits; */
+ qtmp_0 = vshlq_s32(qtmp_0, q_v_bits);
+ qtmp_1 = vshlq_s32(qtmp_1, q_v_bits);
+ qtmp_2 = vshlq_s32(qtmp_2, q_v_bits);
+ qtmp_3 = vshlq_s32(qtmp_3, q_v_bits);
+
+ /* clip */
+ q0 = vqmovn_s32(qtmp_0);
+ q1 = vqmovn_s32(qtmp_1);
+ q2 = vqmovn_s32(qtmp_2);
+ q3 = vqmovn_s32(qtmp_3);
+
+ /* restore sign */
+ pq0 = vand_s16(q0, vreinterpret_s16_u16(psgn0));
+ pq1 = vand_s16(q1, vreinterpret_s16_u16(psgn1));
+ pq2 = vand_s16(q2, vreinterpret_s16_u16(psgn2));
+ pq3 = vand_s16(q3, vreinterpret_s16_u16(psgn3));
+
+ nq0 = vand_s16(q0, vreinterpret_s16_u16(nsgn0));
+ nq1 = vand_s16(q1, vreinterpret_s16_u16(nsgn1));
+ nq2 = vand_s16(q2, vreinterpret_s16_u16(nsgn2));
+ nq3 = vand_s16(q3, vreinterpret_s16_u16(nsgn3));
+
+ q0 = vsub_s16(pq0, nq0);
+ q1 = vsub_s16(pq1, nq1);
+ q2 = vsub_s16(pq2, nq2);
+ q3 = vsub_s16(pq3, nq3);
+
+ /* store */
+ vst1_s16((pi2_q_dst + j), q0);
+ vst1_s16((pi2_q_dst + j + dst_q_strd), q1);
+ vst1_s16((pi2_q_dst + j + (2 * dst_q_strd)), q2);
+ vst1_s16((pi2_q_dst + j + (3 * dst_q_strd)), q3);
+
+ *(csbf + block_col) = 0;
+ if(vget_lane_s64(vreinterpret_s64_s16(q0), 0) ||
+ vget_lane_s64(vreinterpret_s64_s16(q1), 0) ||
+ vget_lane_s64(vreinterpret_s64_s16(q2), 0) ||
+ vget_lane_s64(vreinterpret_s64_s16(q3), 0))
+ {
+ *(csbf + block_col) = 1;
+ }
+
+ if(*(csbf + block_col) == 1)
+ {
+ temp_zero_col |= (0xF << block_col * 4);
+ temp_zero_row |= (0xF << block_row);
+
+ /* inverse quantization */
+ iqtmp_0 = vmull_s16(q0, siq);
+ iqtmp_1 = vmull_s16(q1, siq);
+ iqtmp_2 = vmull_s16(q2, siq);
+ iqtmp_3 = vmull_s16(q3, siq);
+
+ iqtmp_0 = vaddq_s32(iqtmp_0, add_iq);
+ iqtmp_1 = vaddq_s32(iqtmp_1, add_iq);
+ iqtmp_2 = vaddq_s32(iqtmp_2, add_iq);
+ iqtmp_3 = vaddq_s32(iqtmp_3, add_iq);
+
+ iqtmp_0 = vshlq_s32(iqtmp_0, sh_iq);
+ iqtmp_1 = vshlq_s32(iqtmp_1, sh_iq);
+ iqtmp_2 = vshlq_s32(iqtmp_2, sh_iq);
+ iqtmp_3 = vshlq_s32(iqtmp_3, sh_iq);
+
+ /* clip */
+ iq0 = vqmovn_s32(iqtmp_0);
+ iq1 = vqmovn_s32(iqtmp_1);
+ iq2 = vqmovn_s32(iqtmp_2);
+ iq3 = vqmovn_s32(iqtmp_3);
+
+ /* store */
+ vst1_s16((pi2_iq_dst + j), iq0);
+ vst1_s16((pi2_iq_dst + j + dst_iq_strd), iq1);
+ vst1_s16((pi2_iq_dst + j + (2 * dst_iq_strd)), iq2);
+ vst1_s16((pi2_iq_dst + j + (3 * dst_iq_strd)), iq3);
+
+ /* ssd */
+ /* trans_coeff - inv.quant */
+ r0 = vsubl_s16(s0, iq0);
+ r1 = vsubl_s16(s1, iq1);
+ r2 = vsubl_s16(s2, iq2);
+ r3 = vsubl_s16(s3, iq3);
+
+ /* SD */
+ r0 = vmulq_s32(r0, r0);
+ r1 = vmulq_s32(r1, r1);
+ r2 = vmulq_s32(r2, r2);
+ r3 = vmulq_s32(r3, r3);
+ }
+ else
+ {
+ /* store */
+ vst1_s16((pi2_iq_dst + j), zero_d);
+ vst1_s16((pi2_iq_dst + j + dst_iq_strd), zero_d);
+ vst1_s16((pi2_iq_dst + j + (2 * dst_iq_strd)), zero_d);
+ vst1_s16((pi2_iq_dst + j + (3 * dst_iq_strd)), zero_d);
+
+ /* SD */
+ r0 = vmull_s16(s0, s0);
+ r1 = vmull_s16(s1, s1);
+ r2 = vmull_s16(s2, s2);
+ r3 = vmull_s16(s3, s3);
+ }
+
+ /* SSD */
+ r0 = vaddq_s32(r0, r1);
+ r2 = vaddq_s32(r2, r3);
+
+ r0 = vaddq_s32(r0, r2);
+
+ /* SSD Accumulation */
+ ssd0 = vaddq_s32(ssd0, r0);
+
+ cbf = cbf || (*(csbf + block_col)); // cbf update
+ block_col++;
+ }
+
+ block_col = 0;
+ block_row += 4;
+ csbf += csbf_strd;
+
+ pi2_coeffs += 4 * src_strd;
+ pi2_q_dst += 4 * dst_q_strd;
+ pi2_iq_dst += 4 * dst_iq_strd;
+ pi2_quant_coeff += 4 * trans_size;
+ }
+
+ /* SSD Computation */
+ ssd1 = vpadd_s32(vget_low_s32(ssd0), vget_high_s32(ssd0));
+ ssd1 = vpadd_s32(ssd1, ssd1);
+ ssd = vget_lane_s32(ssd1, 0);
+
+ *zero_col = ~temp_zero_col; //final zero_col storing
+ *zero_row = ~temp_zero_row; //final zero_row storing
+
+ /* Store the cost */
+ *pi8_cost = ssd;
+
+ return cbf;
+}
+
+WORD32 ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_neon(
+ WORD16 *pi2_coeffs,
+ WORD16 *pi2_quant_coeff,
+ WORD16 *pi2_q_dst,
+ WORD16 *pi2_iq_dst,
+ WORD32 trans_size,
+ WORD32 qp_div, /* qpscaled / 6 */
+ WORD32 qp_rem, /* qpscaled % 6 */
+ WORD32 q_add,
+ WORD32 *pi4_quant_round_factor_0_1,
+ WORD32 *pi4_quant_round_factor_1_2,
+ WORD32 src_strd,
+ WORD32 dst_q_strd,
+ WORD32 dst_iq_strd,
+ UWORD8 *csbf,
+ WORD32 csbf_strd,
+ WORD32 *zero_col,
+ WORD32 *zero_row,
+ WORD16 *pi2_dequant_coeff,
+ LWORD64 *pi8_cost)
+{
+ WORD32 i, j;
+ WORD32 log2_size;
+ WORD32 cbf = 0;
+
+ WORD16 qm = 4;
+ WORD16 bd = 8;
+ WORD32 q_bits, tr;
+ WORD32 block_col = 0;
+ WORD32 block_row = 0;
+ WORD32 temp_zero_col = 0;
+ WORD32 temp_zero_row = 0;
+
+ WORD32 sh;
+ WORD32 s_iq;
+ WORD32 sh_tmp;
+
+ // ssd
+ int32x4_t ssd0 = vdupq_n_s32(0);
+ int32x2_t ssd1;
+ WORD32 ssd;
+ // const
+ const int16x8_t zero = vdupq_n_s16(0);
+ const int16x4_t zero_d = vdup_n_s16(0);
+ const int16x8_t one = vdupq_n_s16(1);
+ const int16x8_t two = vdupq_n_s16(2);
+ const int16x4_t sq = vdup_n_s16(g_ihevc_quant_scales[qp_rem]);
+ const int16x4_t siq = vdup_n_s16((g_ihevc_iquant_scales_flat_scale[qp_rem]));
+ // src
+ int16x4_t s0, s1, s2, s3;
+ // sign
+ uint16x8_t psgn0, psgn1;
+ uint16x8_t nsgn0, nsgn1;
+ int16x8_t pq0, pq1;
+ int16x8_t nq0, nq1;
+ // abs(src)
+ int16x4_t abs_s0, abs_s1, abs_s2, abs_s3;
+ // q-temp
+ int32x4_t mul_0, mul_1, mul_2, mul_3;
+ int32x4_t q_tmp0, q_tmp1, q_tmp2, q_tmp3;
+ int16x8_t q_00, q_01;
+ int16x8_t q_10, q_11;
+ int16x8_t q_20, q_21;
+ int16x8_t q_30, q_31;
+ // cmp
+ uint16x8_t cmp_00, cmp_01;
+ uint16x8_t cmp_10, cmp_11;
+ uint16x8_t cmp_20, cmp_21;
+ // iq-temp
+ int32x4_t iqtmp_0, iqtmp_1, iqtmp_2, iqtmp_3;
+ int16x4_t iq0, iq1, iq2, iq3;
+ //residue
+ int32x4_t r0, r1, r2, r3;
+ // add_q
+ int32x4_t add_q;
+ int32x4_t add_q0, add_q1, add_q2, add_q3;
+ int32x4_t add_iq = vdupq_n_s32(1);
+ int32x4_t sh_iq_1;
+ int32x4_t sh_iq;
+ int32x4_t q_v_bits;
+ int32x4_t stmp;
+
+ (void)q_add;
+ (void)pi2_dequant_coeff;
+ GETRANGE(log2_size, trans_size);
+ log2_size -= 1;
+
+ tr = MAX_TR_DYNAMIC_RANGE - bd - log2_size;
+ q_bits = QUANT_SHIFT + qp_div + tr + SCALING_Q_SHIFT - qm - FLAT_RESCALE_MAT_Q_SHIFT;
+
+ stmp = vdupq_n_s32(q_bits - QUANT_ROUND_FACTOR_Q);
+
+ add_q = vdupq_n_s32((1 << QUANT_ROUND_FACTOR_Q) / 2);
+ add_q = vshlq_s32(add_q, stmp);
+
+ q_v_bits = vdupq_n_s32(-q_bits);
+
+ sh = bd + log2_size - 5;
+
+ sh_tmp = (sh - qp_div - 1);
+ sh_iq_1 = vdupq_n_s32(sh_tmp);
+ add_iq = vshlq_s32(add_iq, sh_iq_1);
+
+ s_iq = (-(sh - qp_div));
+ sh_iq = vdupq_n_s32(s_iq);
+
+ for(i = 0; i < trans_size; i += 4)
+ {
+ for(j = 0; j < trans_size; j += 4)
+ {
+ s0 = vld1_s16(pi2_coeffs + j);
+ s1 = vld1_s16(pi2_coeffs + j + (src_strd));
+ s2 = vld1_s16(pi2_coeffs + j + (2 * src_strd));
+ s3 = vld1_s16(pi2_coeffs + j + (3 * src_strd));
+
+ /* quantization */
+ /* sign */
+ psgn0 = vcgeq_s16(vcombine_s16(s0, s1), zero);
+ psgn1 = vcgeq_s16(vcombine_s16(s2, s3), zero);
+
+ nsgn0 = vcltq_s16(vcombine_s16(s0, s1), zero);
+ nsgn1 = vcltq_s16(vcombine_s16(s2, s3), zero);
+
+ /* |src| */
+ abs_s0 = vabs_s16(s0);
+ abs_s1 = vabs_s16(s1);
+ abs_s2 = vabs_s16(s2);
+ abs_s3 = vabs_s16(s3);
+
+ /* tmp = tmp * quant_coeff */
+ mul_0 = vmull_s16(abs_s0, sq);
+ mul_1 = vmull_s16(abs_s1, sq);
+ mul_2 = vmull_s16(abs_s2, sq);
+ mul_3 = vmull_s16(abs_s3, sq);
+
+ /* qadd = 0 */
+ /* tmp >>= q_bits; */
+ q_tmp0 = vshlq_s32(mul_0, q_v_bits);
+ q_tmp1 = vshlq_s32(mul_1, q_v_bits);
+ q_tmp2 = vshlq_s32(mul_2, q_v_bits);
+ q_tmp3 = vshlq_s32(mul_3, q_v_bits);
+
+ /* clip */
+ q_00 = vcombine_s16(vqmovn_s32(q_tmp0), vqmovn_s32(q_tmp1));
+ q_01 = vcombine_s16(vqmovn_s32(q_tmp2), vqmovn_s32(q_tmp3));
+
+ /* compare qtmp_10, qtmp_20 with 2*/
+ cmp_00 = vcltq_s16(q_00, two);
+ cmp_01 = vcltq_s16(q_01, two);
+
+ /* qadd = (1 << QUANT_ROUND_FACTOR_Q)/2) */
+ /* tmp >>= q_bits; */
+ q_tmp0 = vaddq_s32(mul_0, add_q);
+ q_tmp1 = vaddq_s32(mul_1, add_q);
+ q_tmp2 = vaddq_s32(mul_2, add_q);
+ q_tmp3 = vaddq_s32(mul_3, add_q);
+
+ q_tmp0 = vshlq_s32(q_tmp0, q_v_bits);
+ q_tmp1 = vshlq_s32(q_tmp1, q_v_bits);
+ q_tmp2 = vshlq_s32(q_tmp2, q_v_bits);
+ q_tmp3 = vshlq_s32(q_tmp3, q_v_bits);
+
+ /* clip */
+ q_10 = vcombine_s16(vqmovn_s32(q_tmp0), vqmovn_s32(q_tmp1));
+ q_11 = vcombine_s16(vqmovn_s32(q_tmp2), vqmovn_s32(q_tmp3));
+
+ if(vget_lane_s64(vreinterpret_s64_u16(vget_low_u16(cmp_00)), 0) ||
+ vget_lane_s64(vreinterpret_s64_u16(vget_high_u16(cmp_00)), 0) ||
+ vget_lane_s64(vreinterpret_s64_u16(vget_low_u16(cmp_01)), 0) ||
+ vget_lane_s64(vreinterpret_s64_u16(vget_high_u16(cmp_01)), 0))
+ {
+ /* qadd = *pi4_quant_round_factor_1_2 */
+ /* tmp >>= q_bits; */
+ add_q0 = vld1q_s32(pi4_quant_round_factor_1_2 + j);
+ add_q1 = vld1q_s32(pi4_quant_round_factor_1_2 + j + (trans_size));
+ add_q2 = vld1q_s32(pi4_quant_round_factor_1_2 + j + (2 * trans_size));
+ add_q3 = vld1q_s32(pi4_quant_round_factor_1_2 + j + (3 * trans_size));
+
+ add_q0 = vshlq_s32(add_q0, stmp);
+ add_q1 = vshlq_s32(add_q1, stmp);
+ add_q2 = vshlq_s32(add_q2, stmp);
+ add_q3 = vshlq_s32(add_q3, stmp);
+
+ q_tmp0 = vaddq_s32(mul_0, add_q0);
+ q_tmp1 = vaddq_s32(mul_1, add_q1);
+ q_tmp2 = vaddq_s32(mul_2, add_q2);
+ q_tmp3 = vaddq_s32(mul_3, add_q3);
+
+ q_tmp0 = vshlq_s32(q_tmp0, q_v_bits);
+ q_tmp1 = vshlq_s32(q_tmp1, q_v_bits);
+ q_tmp2 = vshlq_s32(q_tmp2, q_v_bits);
+ q_tmp3 = vshlq_s32(q_tmp3, q_v_bits);
+
+ /* clip */
+ q_20 = vcombine_s16(vqmovn_s32(q_tmp0), vqmovn_s32(q_tmp1));
+ q_21 = vcombine_s16(vqmovn_s32(q_tmp2), vqmovn_s32(q_tmp3));
+
+ /* qadd = *pi4_quant_round_factor_0_1 */
+ /* tmp >>= q_bits; */
+ add_q0 = vld1q_s32(pi4_quant_round_factor_0_1 + j);
+ add_q1 = vld1q_s32(pi4_quant_round_factor_0_1 + j + (trans_size));
+ add_q2 = vld1q_s32(pi4_quant_round_factor_0_1 + j + (2 * trans_size));
+ add_q3 = vld1q_s32(pi4_quant_round_factor_0_1 + j + (3 * trans_size));
+
+ add_q0 = vshlq_s32(add_q0, stmp);
+ add_q1 = vshlq_s32(add_q1, stmp);
+ add_q2 = vshlq_s32(add_q2, stmp);
+ add_q3 = vshlq_s32(add_q3, stmp);
+
+ q_tmp0 = vaddq_s32(mul_0, add_q0);
+ q_tmp1 = vaddq_s32(mul_1, add_q1);
+ q_tmp2 = vaddq_s32(mul_2, add_q2);
+ q_tmp3 = vaddq_s32(mul_3, add_q3);
+
+ q_tmp0 = vshlq_s32(q_tmp0, q_v_bits);
+ q_tmp1 = vshlq_s32(q_tmp1, q_v_bits);
+ q_tmp2 = vshlq_s32(q_tmp2, q_v_bits);
+ q_tmp3 = vshlq_s32(q_tmp3, q_v_bits);
+
+ /* clip */
+ q_30 = vcombine_s16(vqmovn_s32(q_tmp0), vqmovn_s32(q_tmp1));
+ q_31 = vcombine_s16(vqmovn_s32(q_tmp2), vqmovn_s32(q_tmp3));
+
+ /* compare qtmp_10, qtmp_20 with 1*/
+ cmp_10 = vcltq_s16(q_00, one);
+ cmp_11 = vcltq_s16(q_01, one);
+
+ cmp_20 = vbicq_u16(cmp_00, cmp_10);
+ cmp_21 = vbicq_u16(cmp_01, cmp_11);
+
+ q_10 = vbslq_s16(cmp_10, q_30, q_10);
+ q_11 = vbslq_s16(cmp_11, q_31, q_11);
+
+ q_10 = vbslq_s16(cmp_20, q_20, q_10);
+ q_11 = vbslq_s16(cmp_21, q_21, q_11);
+ }
+
+ /* restore sign */
+ pq0 = vandq_s16(q_10, vreinterpretq_s16_u16(psgn0));
+ pq1 = vandq_s16(q_11, vreinterpretq_s16_u16(psgn1));
+
+ nq0 = vandq_s16(q_10, vreinterpretq_s16_u16(nsgn0));
+ nq1 = vandq_s16(q_11, vreinterpretq_s16_u16(nsgn1));
+
+ q_10 = vsubq_s16(pq0, nq0);
+ q_11 = vsubq_s16(pq1, nq1);
+
+ /* store */
+ vst1_s16((pi2_q_dst + j), vget_low_s16(q_10));
+ vst1_s16((pi2_q_dst + j + dst_q_strd), vget_high_s16(q_10));
+ vst1_s16((pi2_q_dst + j + (2 * dst_q_strd)), vget_low_s16(q_11));
+ vst1_s16((pi2_q_dst + j + (3 * dst_q_strd)), vget_high_s16(q_11));
+
+ *(csbf + block_col) = 0;
+ if(vget_lane_s64(vreinterpret_s64_s16(vget_low_s16(q_10)), 0) ||
+ vget_lane_s64(vreinterpret_s64_s16(vget_high_s16(q_10)), 0) ||
+ vget_lane_s64(vreinterpret_s64_s16(vget_low_s16(q_11)), 0) ||
+ vget_lane_s64(vreinterpret_s64_s16(vget_high_s16(q_11)), 0))
+ {
+ *(csbf + block_col) = 1;
+ }
+
+ if(*(csbf + block_col) == 1)
+ {
+ temp_zero_col |= (0xF << block_col * 4);
+ temp_zero_row |= (0xF << block_row);
+
+ /* inverse quantization */
+ iqtmp_0 = vmull_s16(vget_low_s16(q_10), siq);
+ iqtmp_1 = vmull_s16(vget_high_s16(q_10), siq);
+ iqtmp_2 = vmull_s16(vget_low_s16(q_11), siq);
+ iqtmp_3 = vmull_s16(vget_high_s16(q_11), siq);
+
+ iqtmp_0 = vaddq_s32(iqtmp_0, add_iq);
+ iqtmp_1 = vaddq_s32(iqtmp_1, add_iq);
+ iqtmp_2 = vaddq_s32(iqtmp_2, add_iq);
+ iqtmp_3 = vaddq_s32(iqtmp_3, add_iq);
+
+ iqtmp_0 = vshlq_s32(iqtmp_0, sh_iq);
+ iqtmp_1 = vshlq_s32(iqtmp_1, sh_iq);
+ iqtmp_2 = vshlq_s32(iqtmp_2, sh_iq);
+ iqtmp_3 = vshlq_s32(iqtmp_3, sh_iq);
+
+ /* clip */
+ iq0 = vqmovn_s32(iqtmp_0);
+ iq1 = vqmovn_s32(iqtmp_1);
+ iq2 = vqmovn_s32(iqtmp_2);
+ iq3 = vqmovn_s32(iqtmp_3);
+
+ /* store */
+ vst1_s16((pi2_iq_dst + j), iq0);
+ vst1_s16((pi2_iq_dst + j + dst_iq_strd), iq1);
+ vst1_s16((pi2_iq_dst + j + (2 * dst_iq_strd)), iq2);
+ vst1_s16((pi2_iq_dst + j + (3 * dst_iq_strd)), iq3);
+
+ /* ssd */
+ /* trans_coeff - inv.quant */
+ r0 = vsubl_s16(s0, iq0);
+ r1 = vsubl_s16(s1, iq1);
+ r2 = vsubl_s16(s2, iq2);
+ r3 = vsubl_s16(s3, iq3);
+
+ /* SD */
+ r0 = vmulq_s32(r0, r0);
+ r1 = vmulq_s32(r1, r1);
+ r2 = vmulq_s32(r2, r2);
+ r3 = vmulq_s32(r3, r3);
+ }
+ else
+ {
+ /* store */
+ vst1_s16((pi2_iq_dst + j), zero_d);
+ vst1_s16((pi2_iq_dst + j + dst_iq_strd), zero_d);
+ vst1_s16((pi2_iq_dst + j + (2 * dst_iq_strd)), zero_d);
+ vst1_s16((pi2_iq_dst + j + (3 * dst_iq_strd)), zero_d);
+
+ /* SD */
+ r0 = vmull_s16(s0, s0);
+ r1 = vmull_s16(s1, s1);
+ r2 = vmull_s16(s2, s2);
+ r3 = vmull_s16(s3, s3);
+ }
+
+ /* SSD */
+ r0 = vaddq_s32(r0, r1);
+ r2 = vaddq_s32(r2, r3);
+
+ r0 = vaddq_s32(r0, r2);
+
+ /* SSD Accumulation */
+ ssd0 = vaddq_s32(ssd0, r0);
+
+ cbf = cbf || (*(csbf + block_col)); // cbf update
+ block_col++;
+ }
+
+ block_col = 0;
+ block_row += 4;
+ csbf += csbf_strd;
+
+ pi2_coeffs += 4 * src_strd;
+ pi2_q_dst += 4 * dst_q_strd;
+ pi2_iq_dst += 4 * dst_iq_strd;
+ pi2_quant_coeff += 4 * trans_size;
+ pi4_quant_round_factor_1_2 += 4 * trans_size;
+ pi4_quant_round_factor_0_1 += 4 * trans_size;
+ }
+
+ /* SSD Computation */
+ ssd1 = vpadd_s32(vget_low_s32(ssd0), vget_high_s32(ssd0));
+ ssd1 = vpadd_s32(ssd1, ssd1);
+ ssd = vget_lane_s32(ssd1, 0);
+
+ *zero_col = ~temp_zero_col; //final zero_col storing
+ *zero_row = ~temp_zero_row; //final zero_row storing
+
+ /* Store the cost */
+ *pi8_cost = ssd;
+
+ return cbf;
+}
diff --git a/common/arm/ihevc_resi_trans.s b/common/arm/ihevc_resi_trans.s
new file mode 100644
index 0000000..1ee269b
--- /dev/null
+++ b/common/arm/ihevc_resi_trans.s
@@ -0,0 +1,1625 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2018 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+
+.text
+.align 4
+
+@/**
+@/*******************************************************************************
+@/*
+@/* @brief
+@/* Residue calculation and Forward Transform for 4x4 block with 8-bit input
+@/*
+@/* @par Description:
+@/* Performs residue calculation by subtracting source and prediction and
+@/* followed by forward transform
+@/*
+@/* @param[in] pu1_src
+@/* Input 4x4 pixels
+@/*
+@/* @param[in] pu1_pred
+@/* Prediction data
+@/*
+@/* @param[in] pi4_tmp
+@/* Temporary buffer of size 4x4
+@/*
+@/* @param[out] pi2_dst
+@/* Output 4x4 coefficients
+@/*
+@/* @param[in] src_strd
+@/* Input stride
+@/*
+@/* @param[in] pred_strd
+@/* Prediction Stride
+@/*
+@/* @param[in] dst_strd_chr_flag
+@/* Output Stride and Chroma Flag packed in the MS and LS 16-bit
+@/*
+@/* @returns Void
+@/*
+@/* @remarks
+@/* None
+@/*
+@/*******************************************************************************
+@/*/
+
+@/**************Variables Vs Registers*****************************************
+@ r0 => *pu1_src
+@ r1 => *pu1_pred
+@ r2 => *pi4_temp
+@ r3 => *pi2_dst
+@ r4 => src_strd
+@ r5 => pred_strd
+@ r6 => dst_strd_chr_flag
+
+ .global ihevc_resi_trans_4x4_a9q
+
+ihevc_resi_trans_4x4_a9q:
+
+ STMFD sp!, {r4-r7, r14} @ store all the register components from caller function to memory
+ LDR r4, [sp,#20] @ r4 contains src_strd
+ LDR r5, [sp,#24] @ r5 contains pred_strd
+ LDR r6, [sp,#28] @ r6 contains dst_strd_chr_flag
+
+ ANDS r7, r6, #1 @check for chroma flag, if present interleaved data
+ CMP r7, #0
+ BEQ NON_INTERLEAVE_LOAD @if flag == 0, use non-interleaving loads
+
+ VLD1.64 d0, [r0], r4 @ load row 0 src
+ VLD1.64 d4, [r0], r4 @ load row 1 src
+ VLD1.64 d1, [r0], r4 @ load row 2 src
+ VLD1.64 d5, [r0], r4 @ load row 3 src
+ VUZP.8 d0, d4 @ de-interleaving unzip instruction to get luma data of pu1_src in d0
+ VUZP.8 d1, d5 @ de-interleaving unzip instruction to get luma data of pu1_src in d1
+
+ VLD1.64 d2, [r1], r5 @ load row 0 pred
+ VLD1.64 d6, [r1], r5 @ load row 1 pred
+ VLD1.64 d3, [r1], r5 @ load row 2 pred
+ VLD1.64 d7, [r1], r5 @ load row 3 pred
+ VUZP.8 d2, d6 @ de-interleaving unzip instruction to get luma data of pu1_pred in d2
+ VUZP.8 d3, d7 @ de-interleaving unzip instruction to get luma data of pu1_pred in d3
+
+ B LOAD_END
+
+NON_INTERLEAVE_LOAD:
+ VLD1.U32 d0[0], [r0], r4 @ load row 0 src
+ VLD1.U32 d0[1], [r0], r4 @ load row 1 src
+ VLD1.U32 d1[0], [r0], r4 @ load row 2 src
+ VLD1.U32 d1[1], [r0], r4 @ load row 3 src
+
+ VLD1.U32 d2[0], [r1], r5 @ load row 0 pred
+ VLD1.U32 d2[1], [r1], r5 @ load row 1 pred
+ VLD1.U32 d3[0], [r1], r5 @ load row 2 pred
+ VLD1.U32 d3[1], [r1], r5 @ load row 3 pred
+
+LOAD_END:
+ @ Finding the residue
+ VSUBL.U8 q2, d0, d2 @ q2 contains 1st 16-bit 8 residues
+ VSUBL.U8 q3, d1, d3 @ q3 contains 2nd 16-bit 8 residues
+
+ @ SAD caculation
+ VABDL.U8 q12, d0, d2 @ q12 contains absolute differences
+ VABAL.U8 q12, d1, d3 @ q12 accumulates absolute differences
+ VADD.U16 d26, d24, d25 @ add d-registers of q12
+ VPADDL.U16 d27, d26 @ d27 contains 2 32-bit values that have to be added
+ VPADDL.U32 d28, d27 @ d28 contains 64-bit SAD, only LSB important
+ VMOV.32 r0, d28[0] @ SAD stored in r0 for return
+ @ SAD caculation ends
+
+ @ Forward transform - step 1
+ VMOV.I16 d2, #64 @ generate immediate constant in d2 for even row multiplication
+ VTRN.16 d4, d5 @ 3-step transpose of residue matrix starts
+ VTRN.16 d6, d7 @ 2nd step of the 3-step matrix transpose
+ VMOV.I16 d0, #83 @ generate immediate constant in d0 for odd row multiplication
+ VTRN.32 q2, q3 @ Final step of matrix transpose
+
+ VMOV.I16 d1, #36 @ generate immediate constant in d1 for odd row multiplication
+ VSWP d6, d7 @ vector swap to allow even and odd row calculation using Q registers
+ VADD.S16 q10, q2, q3 @ q4 has the even array
+ VSUB.S16 q11, q2, q3 @ q5 has the odd array
+ VMULL.S16 q12, d20, d2 @ e[0]*64
+ VMLAL.S16 q12, d21, d2[0] @ row 1 of results: e[0]*64 + e[1]*64
+ VMULL.S16 q13, d20, d2 @ e[0]*64
+ VMLSL.S16 q13, d21, d2[0] @ row 3 of results: e[0]*64 - e[1]*64
+ VMULL.S16 q8, d22, d0 @ o[0]*83
+ VMLAL.S16 q8, d23, d1[0] @ row 2 of results: o[0]*83 + o[1]*36
+ VMULL.S16 q9, d22, d1 @ o[0]*36
+ VMLSL.S16 q9, d23, d0[0] @ row 4 of results: o[0]*36 - o[1]*83
+
+ @ Forward transform - step 2
+ VMOV.I32 d2, #64 @ generate immediate constant in d2 for even row multiplication
+ VMOV.I32 d0, #83 @ generate immediate constant in d0 for odd row multiplication
+ VTRN.32 q12, q8 @ 4-step transpose of residue matrix starts
+ VTRN.32 q13, q9 @ 2nd step of the 4-step matrix transpose
+
+ VMOV.I32 d1, #36 @ generate immediate constant in d1 for odd row multiplication
+ VSWP d25, d26 @ 3rd step of the 4-step matrix transpose
+ VSWP d17, d18 @ 4th step of the 4-step matrix transpose
+ VADD.S32 q2, q12, q9 @ e[0]
+ VADD.S32 q3, q8, q13 @ e[1]
+ VSUB.S32 q10, q12, q9 @ o[0]
+ VSUB.S32 q11, q8, q13 @ o[1]
+
+ VMUL.S32 q12, q2, d2[0] @ e[0]*64
+ VMLA.S32 q12, q3, d2[0] @ row 1 of results: e[0]*64 + e[1]*64
+ VMUL.S32 q13, q2, d2[0] @ e[1]*64
+ VMLS.S32 q13, q3, d2[0] @ row 3 of results: e[0]*64 - e[1]*64
+ VMUL.S32 q8, q10, d0[0] @ o[0]*83
+ VMLA.S32 q8, q11, d1[0] @ row 2 of results: o[0]*83 + o[1]*36
+ VMUL.S32 q9, q10, d1[0] @ o[0]*36
+ VMLS.S32 q9, q11, d0[0] @ row 4 of results: o[0]*36 - o[1]*83
+
+ VRSHRN.S32 d0, q12, #9 @ (row1 + 256)/512
+ VRSHRN.S32 d1, q8, #9 @ (row2 + 256)/512
+ VRSHRN.S32 d2, q13, #9 @ (row3 + 256)/512
+ VRSHRN.S32 d3, q9, #9 @ (row4 + 256)/512
+
+ LSR r7, r6, #15 @ r7 = 2*dst_strd, as pi2_dst contains 2-bit integers
+ VST1.U16 d0, [r3], r7 @ store 1st row of result
+ VST1.U16 d1, [r3], r7 @ store 2nd row of result
+ VST1.U16 d2, [r3], r7 @ store 3rd row of result
+ VST1.U16 d3, [r3], r7 @ store 4th row of result
+
+ LDMFD sp!,{r4-r7,r15} @ Reload the registers from SP
+
+ @ Function End
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* This function performs residue calculation and forward transform type 1
+@* on input pixels
+@*
+@* @description
+@* Performs residue calculation by subtracting source and prediction and
+@* followed by forward transform
+@*
+@* @param[in] pu1_src
+@* Input 4x4 pixels
+@*
+@* @param[in] pu1_pred
+@* Prediction data
+@*
+@* @param[in] pi2_tmp
+@* Temporary buffer of size 4x4
+@*
+@* @param[out] pi2_dst
+@* Output 4x4 coefficients
+@*
+@* @param[in] src_strd
+@* Input stride
+@*
+@* @param[in] pred_strd
+@* Prediction Stride
+@*
+@* @param[in] dst_strd_chr_flag
+@* Output Stride and Chroma Flag packed in the MS and LS 16-bit
+@*
+@* @returns void
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+@ UWORD32 ihevc_resi_trans_4x4_ttype1(UWORD8 *pu1_src,
+@ UWORD8 *pu1_pred,
+@ WORD32 *pi4_temp,
+@ WORD16 *pi2_dst,
+@ WORD32 src_strd,
+@ WORD32 pred_strd,
+@ WORD32 dst_strd_chr_flag);
+@
+@**************Variables Vs Registers*******************************************
+@
+@ r0 - pu1_src
+@ r1 - pu1_pred
+@ r2 - pi4_temp
+@ r3 - pi2_dst
+@
+@ [sp] - src_strd
+@ [sp+4] - pred_strd
+@ [sp+8] - dst_strd_chr_flag
+@
+@*******************************************************************************
+
+ .global ihevc_resi_trans_4x4_ttype1_a9q
+
+ihevc_resi_trans_4x4_ttype1_a9q:
+
+ PUSH {r4}
+ vpush {d8 - d15}
+
+ LDR r2,[sp,#68] @ r2 = src_strd
+ LDR r4,[sp,#72] @ r4 = pred_strd
+
+ VLD1.32 d2[0],[r0],r2 @ Row 1 of source in d2[0]
+ VLD1.32 d3[0],[r1],r4 @ Row 1 of prediction in d3[0]
+ VLD1.32 d2[1],[r0],r2 @ Row 2 of source in d2[1]
+ VLD1.32 d3[1],[r1],r4 @ Row 2 of prediction in d3[1]
+
+ VLD1.32 d8[0],[r0],r2 @ Row 3 of source in d8[0]
+ VABDL.U8 q0,d2,d3 @ Absolute differences of rows 1 and 2 in d0
+ @ R2:[d11[3] d11[2] d11[1] d11[0]] => Row 2 of residue
+ VLD1.32 d9[0],[r1],r4 @ Row 3 of prediction in d9[0]
+ VSUBL.U8 q5,d2,d3 @ R1:[d10[3] d10[2] d10[1] d10[0]] => Row 1 of residue
+ VLD1.32 d8[1],[r0] @ Row 4 of source in d8[1]
+ VTRN.16 d10,d11 @ Transpose step 1
+ VLD1.32 d9[1],[r1] @ Row 4 of prediction in d9[1]
+
+ VSUBL.U8 q6,d8,d9 @ R3:[d12[3] d12[2] d12[1] d12[0]] => Row 3 of residue
+ @ R4:[d13[3] d13[2] d13[1] d13[0]] => Row 4 of residue
+ VABAL.U8 q0,d8,d9 @ Absolute differences of rows 3 and 4 in d1
+ VTRN.16 d12,d13 @ Transpose step 2
+ VTRN.32 q5,q6 @ Transpose step 3, Residue block transposed
+ @ Columns are in C1:d10, C2:d11, C3:d12 and C4:d13
+ VADD.S16 d23,d11,d13 @ d23 = C2 + C4
+ VMOV.I32 d6,#55 @ Constant used for multiplication
+ VADD.S16 d22,d10,d13 @ d22 = C1 + C4
+ VADD.U16 d0,d1,d0 @ Accumulating SAD step 1
+ VMOV.I32 d7,#84 @ Constant used for multiplication
+ VMULL.S16 q7,d23,d6[0] @ q7 = 55*C2 + 55*C4
+ VMOV.I32 d4,#74 @ Constant used for multiplication
+ VMULL.S16 q9,d22,d7[0] @ q9 = 84*C1 + 84*C4
+ VADD.S16 d16,d10,d11 @ d16 = C1 + C2
+ VMUL.S16 d12,d12,d4[0] @ d12 = 74*C3
+ VMOV.I32 d5,#29 @ Constant used for multiplication
+ VPADDL.U16 d0,d0 @ Accumulating SAD step 2
+ VSUB.S16 d16,d16,d13 @ d16 = C1 + C2 - C4
+ VMLAL.S16 q7,d22,d5[0] @ q7 = 29*C1 + 55*C2 + 84*C4
+ VMLSL.S16 q9,d23,d5[0] @ q9 = 84*C1 - 29*C2 + 55*C4
+ VMULL.S16 q8,d16,d4[0] @ q8 = 74*C1 + 74*C2 - 74*C4
+ VPADDL.U32 d0,d0 @ Accumulating SAD step 3, SAD in d0
+ VSUB.S32 q10,q9,q7 @ q10 = q9 - q7 = 55*C1 - 84*C2 - 29*C4
+ VMOV.32 r0,d0[0] @ Return SAD value
+ VRSHR.S32 q8,q8,#1 @ Truncating the 1 bit in q8
+
+ VADDW.S16 q7,q7,d12 @ q7 = 29*C1 + 55*C2 + 74*C3 + 84*C4
+ VSUBW.S16 q9,q9,d12 @ q9 = 84*C1 - 29*C2 - 74*C3 + 55*C4
+ VADDW.S16 q10,q10,d12 @ q10 = 55*C1 - 84*C2 + 74*C3 - 29*C4
+
+ VRSHR.S32 q7,q7,#1 @ Truncating the 1 bit in q7
+ VRSHR.S32 q9,q9,#1 @ Truncating the 1 bit in q9
+ VRSHR.S32 q10,q10,#1 @ Truncating the 1 bit in q10
+ @ Transform stage 1 is in P1:q7, P2:q8, P3:q9 and P4:q10
+ VTRN.32 q7,q8
+ VTRN.32 q9,q10
+ VSWP d15,d18
+ VSWP d17,d20 @ Residue block transposed
+ @ Corresponding columns are in S1:q7, S2:q8, S3:q9 and S4:q10
+ VADD.S32 q13,q7,q8 @ q13 = S1 + S2
+ VADD.S32 q1,q7,q10 @ q1 = S1 + S4
+ VADD.S32 q4,q8,q10 @ q4 = S2 + S4
+ VSUB.S32 q13,q13,q10 @ q13 = S1 + S2 - S4
+ VMUL.S32 q12,q1,d5[0] @ q12 = 29*S1 + 29*S4
+ VMUL.S32 q14,q1,d7[0] @ q14 = 84*S1 + 84*S4
+ VMUL.S32 q13,q13,d4[0] @ q13 = 74*S1 + 74*S2 - 74*S4
+
+ VMLA.S32 q12,q4,d6[0] @ q12 = 29*S1 + 55*S2 + 84*S4
+ VMLS.S32 q14,q4,d5[0] @ q14 = 84*S1 - 29*S2 + 55*S4
+ VMUL.S32 q9,q9,d4[0] @ q9 = 74*S3
+
+ LDR r4,[sp,#76] @ r4 = dst_strd_chr_flag
+ ASR r4,r4,#16 @ r4 = dst_strd
+ LSL r4,r4,#1 @ r4 = 2*dst_strd
+
+ VRSHRN.S32 d26,q13,#8
+ VSUB.S32 q15,q14,q12 @ q15 = q14 - q12 = 55*S1 - 84*S2 - 29*S4
+
+ VADD.S32 q12,q12,q9 @ q12 = 29*S1 + 55*S2 + 74*S3 + 84*S4
+ VSUB.S32 q14,q14,q9 @ q14 = 84*S1 - 29*S2 - 74*S3 + 55*S4
+ VADD.S32 q15,q15,q9 @ q15 = 55*S1 - 84*S2 + 74*S3 - 29*S4
+
+ VRSHRN.S32 d24,q12,#8
+ VRSHRN.S32 d28,q14,#8
+ VRSHRN.S32 d30,q15,#8 @ Truncating the last 8 bits
+ @ Transform stage 2 is in U1:d24, U2:d26, U3:d28 and U4:d30
+ VST1.64 d24,[r3],r4 @ Storing row 1 of transform stage 2
+ VST1.64 d26,[r3],r4 @ Storing row 2 of transform stage 2
+ VST1.64 d28,[r3],r4 @ Storing row 3 of transform stage 2
+ VST1.64 d30,[r3] @ Storing row 4 of transform stage 2
+
+ vpop {d8 - d15}
+ POP {r4}
+ MOV pc,lr
+
+@/**
+@*******************************************************************************
+@*
+@* @brief
+@* This function performs residue calculation and DCT integer forward transform
+@* on 8x8 block
+@*
+@* @description
+@* Performs residue calculation by subtracting source and prediction and
+@* followed by DCT integer forward transform
+@*
+@* @param[in] pu1_src
+@* Input 4x4 pixels
+@*
+@* @param[in] pu1_pred
+@* Prediction data
+@*
+@* @param[in] pi2_tmp
+@* Temporary buffer of size 8x8
+@*
+@* @param[out] pi2_dst
+@* Output 8x8 coefficients
+@*
+@* @param[in] src_strd
+@* Input stride
+@*
+@* @param[in] pred_strd
+@* Prediction Stride
+@*
+@* @param[in] dst_strd_chr_flag
+@* Output Stride and Chroma Flag packed in the MS and LS 16-bit
+@*
+@* @returns void
+@*
+@* @remarks
+@* None
+@*
+@*******************************************************************************
+@*/
+@ UWORB32 ihevc_resi_trans_8x8(UWORD8 *pu1_src,
+@ UWORD8 *pu1_pred,
+@ WORB32 *pi4_temp,
+@ WORB16 *pi2_dst,
+@ WORB32 src_strd,
+@ WORB32 pred_strd,
+@ WORB32 dst_strd_chr_flag);
+@
+@**************Variables Vs Registers*******************************************
+@
+@ r0 - pu1_src
+@ r1 - pu1_pred
+@ r2 - pi4_temp
+@ r3 - pi2_dst
+@
+@ [sp] - src_strd
+@ [sp+4] - pred_strd
+@ [sp+8] - dst_strd_chr_flag
+@
+@*******************************************************************************
+
+ .global ihevc_resi_trans_8x8_a9q
+
+ihevc_resi_trans_8x8_a9q:
+
+ PUSH {r4,r5}
+ vpush {d8 - d15}
+
+ @ Loading Prediction and Source blocks of sixe 8x8
+
+ LDR r4,[sp,#80] @ r4 = dst_strd_chr_flag
+ AND r4,r4,#1 @ r4 = chr_flag
+ CMP r4,#1
+ BNE CHROMA_LOAD
+
+LUMA_LOAD:
+
+ LDR r5,[sp,#72] @ r5 = src_strd
+ LDR r4,[sp,#76] @ r4 = pred_strd
+
+ VLD2.8 {d0,d2},[r1],r4 @ Row 1 of prediction in d0
+ VLD2.8 {d1,d3},[r0],r5 @ Row 1 of source in d1
+
+ VABDL.U8 q15,d1,d0 @ Row 1 of absolute difference in q15
+ VLD2.8 {d2,d4},[r1],r4 @ Row 2 of prediction in d2
+ VSUBL.U8 q0,d1,d0 @ Row 1 of residue in q0
+ VLD2.8 {d3,d5},[r0],r5 @ Row 2 of source in d3
+
+ VABDL.U8 q9,d3,d2 @ Row 2 of absolute difference in q9
+ VLD2.8 {d4,d6},[r1],r4 @ Row 3 of prediction in d4
+ VSUBL.U8 q1,d3,d2 @ Row 2 of residue in q1
+ VLD2.8 {d5,d7},[r0],r5 @ Row 3 of source in d5
+
+ VABAL.U8 q15,d5,d4 @ Row 3 of absolute difference accumulated in q15
+ VLD2.8 {d6,d8},[r1],r4 @ Row 4 of prediction in d6
+ VSUBL.U8 q2,d5,d4 @ Row 3 of residue in q2
+ VLD2.8 {d7,d9},[r0],r5 @ Row 4 of source in d7
+
+ VABAL.U8 q9,d7,d6 @ Row 4 of absolute difference accumulated in q9
+ VLD2.8 {d8,d10},[r1],r4 @ Row 5 of prediction in d8
+ VSUBL.U8 q3,d7,d6 @ Row 4 of residue in q3
+ VLD2.8 {d9,d11},[r0],r5 @ Row 5 of source in d9
+
+ VABDL.U8 q10,d9,d8 @ Row 5 of absolute difference in q10
+ VLD2.8 {d10,d12},[r1],r4 @ Row 6 of prediction in d10
+ VSUBL.U8 q4,d9,d8 @ Row 5 of residue in q4
+ VLD2.8 {d11,d13},[r0],r5 @ Row 6 of source in d11
+
+ VABAL.U8 q15,d11,d10 @ Row 6 of absolute difference accumulated in q15
+ VLD2.8 {d12,d14},[r1],r4 @ Row 7 of prediction in d12
+ VSUBL.U8 q5,d11,d10 @ Row 6 of residue in q5
+ VLD2.8 {d13,d15},[r0],r5 @ Row 7 of source in d13
+
+ VABAL.U8 q9,d13,d12 @ Row 7 of absolute difference accumulated in q9
+ VLD2.8 {d14,d16},[r1] @ Row 8 of prediction in d14
+ VSUBL.U8 q6,d13,d12 @ Row 7 of residue in q6
+ VLD2.8 {d15,d17},[r0] @ Row 8 of source in d15
+
+ B CHROMA_LOAD_END
+
+CHROMA_LOAD:
+
+ LDR r5,[sp,#72] @ r5 = src_strd
+ LDR r4,[sp,#76] @ r4 = pred_strd
+
+ VLD1.64 d0,[r1],r4 @ Row 1 of prediction in d0
+ VLD1.64 d1,[r0],r5 @ Row 1 of source in d1
+
+ VABDL.U8 q15,d1,d0 @ Row 1 of absolute difference in q15
+ VLD1.64 d2,[r1],r4 @ Row 2 of prediction in d2
+ VSUBL.U8 q0,d1,d0 @ Row 1 of residue in q0
+ VLD1.64 d3,[r0],r5 @ Row 2 of source in d3
+
+ VABDL.U8 q9,d3,d2 @ Row 2 of absolute difference in q9
+ VLD1.64 d4,[r1],r4 @ Row 3 of prediction in d4
+ VSUBL.U8 q1,d3,d2 @ Row 2 of residue in q1
+ VLD1.64 d5,[r0],r5 @ Row 3 of source in d5
+
+ VABAL.U8 q15,d5,d4 @ Row 3 of absolute difference accumulated in q15
+ VLD1.64 d6,[r1],r4 @ Row 4 of prediction in d6
+ VSUBL.U8 q2,d5,d4 @ Row 3 of residue in q2
+ VLD1.64 d7,[r0],r5 @ Row 4 of source in d7
+
+ VABAL.U8 q9,d7,d6 @ Row 4 of absolute difference accumulated in q9
+ VLD1.64 d8,[r1],r4 @ Row 5 of prediction in d8
+ VSUBL.U8 q3,d7,d6 @ Row 4 of residue in q3
+ VLD1.64 d9,[r0],r5 @ Row 5 of source in d9
+
+ VABDL.U8 q10,d9,d8 @ Row 5 of absolute difference in q10
+ VLD1.64 d10,[r1],r4 @ Row 6 of prediction in d10
+ VSUBL.U8 q4,d9,d8 @ Row 5 of residue in q4
+ VLD1.64 d11,[r0],r5 @ Row 6 of source in d11
+
+ VABAL.U8 q15,d11,d10 @ Row 6 of absolute difference accumulated in q15
+ VLD1.64 d12,[r1],r4 @ Row 7 of prediction in d12
+ VSUBL.U8 q5,d11,d10 @ Row 6 of residue in q5
+ VLD1.64 d13,[r0],r5 @ Row 7 of source in d13
+
+ VABAL.U8 q9,d13,d12 @ Row 7 of absolute difference accumulated in q9
+ VLD1.64 d14,[r1] @ Row 8 of prediction in d14
+ VSUBL.U8 q6,d13,d12 @ Row 7 of residue in q6
+ VLD1.64 d15,[r0] @ Row 8 of source in d15
+
+CHROMA_LOAD_END:
+
+ @ Transform stage 1
+ @ Transposing residue matrix
+
+ VABAL.U8 q10,d15,d14 @ Row 8 of absolute difference accumulated in q10
+ VTRN.16 q0,q1 @ Transpose residue matrix step (1a)
+ VSUBL.U8 q7,d15,d14 @ Row 8 of residue in q7
+ VTRN.16 q2,q3 @ Transpose residue matrix step (1b)
+
+ VTRN.16 q4,q5 @ Transpose residue matrix step (1c)
+ VTRN.16 q6,q7 @ Transpose residue matrix step (1d)
+ VTRN.32 q0,q2 @ Transpose residue matrix step (2a)
+ VTRN.32 q1,q3 @ Transpose residue matrix step (2b)
+
+ VADD.U16 q8,q15,q9 @ SAD calculation (1)
+ VTRN.32 q4,q6 @ Transpose residue matrix step (2c)
+ VTRN.32 q5,q7 @ Transpose residue matrix step (2d)
+
+ VADD.U16 q8,q8,q10 @ SAD calculation (2)
+ VSWP d1,d8 @ Transpose residue matrix step (3a)
+ VSWP d3,d10 @ Transpose residue matrix step (3b)
+
+ VADD.U16 d16,d16,d17 @ SAD calculation (3)
+ VSWP d7,d14 @ Transpose residue matrix step (3c)
+ VSWP d5,d12 @ Transpose residue matrix step (3d)
+ @ Columns of residue C0-C7 (8x8 matrix) in q0-q7
+ VPADDL.U16 d16,d16 @ SAD calculation (4)
+
+ @ Evaluating first step in Butterfly diagram
+
+ VADD.S16 q10,q0,q7 @ q10 = C0 + C7
+ VADD.S16 q11,q1,q6 @ q11 = C1 + C6
+ VPADDL.U32 d16,d16 @ SAD calculation (5)
+ VADD.S16 q12,q2,q5 @ q12 = C2 + C5
+ VADD.S16 q13,q3,q4 @ q13 = C3 + C4
+
+ VSUB.S16 q4,q3,q4 @ q4 = C3 - C4
+ VSUB.S16 q5,q2,q5 @ q5 = C2 - C5
+ VSUB.S16 q6,q1,q6 @ q6 = C1 - C6
+ VSUB.S16 q7,q0,q7 @ q7 = C0 - C7
+
+ @ Calculating F0, F2, F4 and F6
+
+ VADD.S16 q1,q11,q12 @ q1 = C1 + C2 + C5 + C6
+ VADD.S16 q2,q10,q13 @ q2 = C0 + C3 + C4 + C7
+
+ MOV r4,#50
+ LSL r4,r4,#16
+ ADD r4,r4,#18
+ MOV r5,#89
+ LSL r5,r5,#16
+ ADD r5,r5,#75
+ VMOV d0,r4,r5 @ 16-bit aligned, d0[3] = 89, d0[2] = 75, d0[1] = 50, d0[0]=18
+
+ MOV r4,#83
+ LSL r4,r4,#16
+ ADD r4,r4,#36
+ VMOV d1,r4,r4 @ 16-bit aligned, d1[3] = 83, d1[2] = 36, d1[1] = 83, d1[0]=36
+
+ VSUB.S16 q10,q10,q13 @ q10 = C0 - C3 - C4 + C7
+ VSUB.S16 q11,q11,q12 @ q11 = C1 - C2 - C5 + C6
+ VMOV.32 r0,d16[0] @ SAD calculation (6) : Return value = SAD
+
+ VSUB.S16 q3,q2,q1 @ q3 = C0 - C1 - C2 + C3 + C4 - C5 - C6 + C7
+ VADD.S16 q2,q2,q1 @ q2 = C0 + C1 + C2 + C3 + C4 + C5 + C6 + C7
+
+ VMULL.S16 q14,d20,d1[1] @ q14 = [0] of 83*(C0 - C3 - C4 + C7)
+ VMULL.S16 q15,d21,d1[1] @ q15 = [1] of 83*(C0 - C3 - C4 + C7)
+ VMULL.S16 q9,d20,d1[0] @ q9 = [0] of 36*(C0 - C3 - C4 + C7)
+ VMULL.S16 q10,d21,d1[0] @ q10 = [1] of 36*(C0 - C3 - C4 + C7)
+
+ VMLAL.S16 q14,d22,d1[0] @ q14 = F2[0] = 83*(C0 - C3 - C4 + C7) + 36*(C1 - C2 - C5 + C6)
+ VSHLL.S16 q13,d6,#6 @ q13 = F4[0] = 64*(C0 - C1 - C2 + C3 + C4 - C5 - C6 + C7)
+ VMLAL.S16 q15,d23,d1[0] @ q15 = F2[1] = 83*(C0 - C3 - C4 + C7) + 36*(C1 - C2 - C5 + C6)
+ VSHLL.S16 q3,d7,#6 @ q3 = F4[1] = 64*(C0 - C1 - C2 + C3 + C4 - C5 - C6 + C7)
+ VMLSL.S16 q9,d22,d1[1] @ q9 = F6[0] = 36*(C0 - C3 - C4 + C7) - 83*(C1 - C2 - C5 + C6)
+ VSHLL.S16 q12,d4,#6 @ q12 = F0[0] = 64*(C0 + C1 + C2 + C3 + C4 + C5 + C6 + C7)
+ VMLSL.S16 q10,d23,d1[1] @ q10 = F6[1] = 36*(C0 - C3 - C4 + C7) - 83*(C1 - C2 - C5 + C6)
+ VSHLL.S16 q2,d5,#6 @ q2 = F0[1] = 64*(C0 + C1 + C2 + C3 + C4 + C5 + C6 + C7)
+
+ @ Calculating F1, F3, F5 and F7
+
+ MOV r4,#48
+ VST1.64 {d24,d25},[r2]! @ Row 1 of transform stage 1 F0[0] stored
+ VST1.64 {d4,d5},[r2],r4 @ Row 1 of transform stage 1 F0[1] stored
+ VST1.64 {d28,d29},[r2]! @ Row 3 of transform stage 1 F2[0] stored
+ VST1.64 {d30,d31},[r2],r4 @ Row 3 of transform stage 1 F2[1] stored
+
+ VST1.64 {d26,d27},[r2]! @ Row 5 of transform stage 1 F4[0] stored
+ VMULL.S16 q1,d14,d0[3] @ q1 = [0] of 89*(C0 - C7)
+ VMULL.S16 q8,d15,d0[3] @ q8 = [1] of 89*(C0 - C7)
+ VST1.64 {d6,d7},[r2],r4 @ Row 5 of transform stage 1 F4[1] stored
+ VMULL.S16 q11,d14,d0[2] @ q11 = [0] of 75*(C0 - C7)
+ VMULL.S16 q13,d15,d0[2] @ q13 = [1] of 75*(C0 - C7)
+ VST1.64 {d18,d19},[r2]! @ Row 7 of transform stage 1 F6[0] stored
+ VMULL.S16 q3,d14,d0[1] @ q3 = [0] of 50*(C0 - C7)
+ VMULL.S16 q9,d15,d0[1] @ q9 = [1] of 50*(C0 - C7)
+ VST1.64 {d20,d21},[r2] @ Row 7 of transform stage 1 F6[1] stored
+ VMULL.S16 q10,d14,d0[0] @ q10 = [0] of 18*(C0 - C7)
+ VMULL.S16 q7,d15,d0[0] @ q7 = [1] of 18*(C0 - C7)
+
+ VMLAL.S16 q1,d12,d0[2] @ q1 = [0] of 89*(C0 - C7) + 75*(C1 - C6)
+ VMLAL.S16 q8,d13,d0[2] @ q8 = [1] of 89*(C0 - C7) + 75*(C1 - C6)
+ VMLSL.S16 q11,d12,d0[0] @ q11 = [0] of 75*(C0 - C7) - 18*(C1 - C6)
+ VMLSL.S16 q13,d13,d0[0] @ q13 = [1] of 75*(C0 - C7) - 18*(C1 - C6)
+ VMLSL.S16 q3,d12,d0[3] @ q3 = [0] of 50*(C0 - C7) - 89*(C1 - C6)
+ VMLSL.S16 q9,d13,d0[3] @ q9 = [1] of 50*(C0 - C7) - 89*(C1 - C6)
+ VMLSL.S16 q10,d12,d0[1] @ q10 = [0] of 18*(C0 - C7) - 50*(C1 - C6)
+ VMLSL.S16 q7,d13,d0[1] @ q7 = [1] of 18*(C0 - C7) - 50*(C1 - C6)
+
+ VMLAL.S16 q1,d10,d0[1] @ q1 = [0] of 89*(C0 - C7) + 75*(C1 - C6) + 50*(C2 - C5)
+ VMLAL.S16 q8,d11,d0[1] @ q8 = [1] of 89*(C0 - C7) + 75*(C1 - C6) + 50*(C2 - C5)
+ VMLSL.S16 q11,d10,d0[3] @ q11 = [0] of 75*(C0 - C7) - 18*(C1 - C6) - 89*(C2 - C5)
+ VMLSL.S16 q13,d11,d0[3] @ q13 = [1] of 75*(C0 - C7) - 18*(C1 - C6) - 89*(C2 - C5)
+ VMLAL.S16 q3,d10,d0[0] @ q3 = [0] of 50*(C0 - C7) - 89*(C1 - C6) + 18*(C2 - C5)
+ VMLAL.S16 q9,d11,d0[0] @ q9 = [1] of 50*(C0 - C7) - 89*(C1 - C6) + 18*(C2 - C5)
+ VMLAL.S16 q10,d10,d0[2] @ q10 = [0] of 18*(C0 - C7) - 50*(C1 - C6) + 75*(C2 - C5)
+ VMLAL.S16 q7,d11,d0[2] @ q7 = [1] of 18*(C0 - C7) - 50*(C1 - C6) + 75*(C2 - C5)
+
+ VMLAL.S16 q1,d8,d0[0] @ q1 = F1[0] = 89*(C0 - C7) + 75*(C1 - C6) + 50*(C2 - C5) + 18*(C3 - C4)
+ VMLAL.S16 q8,d9,d0[0] @ q8 = F1[1] = 89*(C0 - C7) + 75*(C1 - C6) + 50*(C2 - C5) + 18*(C3 - C4)
+ VMLSL.S16 q11,d8,d0[1] @ q11 = F3[0] = 75*(C0 - C7) - 18*(C1 - C6) - 89*(C2 - C5) - 50*(C3 - C4)
+ VMLSL.S16 q13,d9,d0[1] @ q13 = F3[1] = 75*(C0 - C7) - 18*(C1 - C6) - 89*(C2 - C5) - 50*(C3 - C4)
+ SUB r2,r2,#176 @ r2 now points to the second row
+ VMLAL.S16 q3,d8,d0[2] @ q3 = F5[0] = 50*(C0 - C7) - 89*(C1 - C6) + 18*(C2 - C5) + 75*(C3 - C4)
+ VMLAL.S16 q9,d9,d0[2] @ q9 = F5[1] = 50*(C0 - C7) - 89*(C1 - C6) + 18*(C2 - C5) + 75*(C3 - C4)
+ VST1.64 {d2,d3},[r2]! @ Row 2 of transform stage 1 F1[0] stored
+ VMLSL.S16 q10,d8,d0[3] @ q10 = F7[0] = 18*(C0 - C7) - 50*(C1 - C6) + 75*(C2 - C5) - 89*(C3 - C4)
+ VMLSL.S16 q7,d9,d0[3] @ q7 = F7[1] = 18*(C0 - C7) - 50*(C1 - C6) + 75*(C2 - C5) - 89*(C3 - C4)
+
+ VST1.64 {d16,d17},[r2],r4 @ Row 2 of transform stage 1 F1[1] stored
+ VST1.64 {d22,d23},[r2]! @ Row 4 of transform stage 1 F3[0] stored
+ VST1.64 {d26,d27},[r2],r4 @ Row 4 of transform stage 1 F3[1] stored
+ VST1.64 {d6,d7},[r2]! @ Row 6 of transform stage 1 F5[0] stored
+ VST1.64 {d18,d19},[r2],r4 @ Row 6 of transform stage 1 F5[1] stored
+ VST1.64 {d20,d21},[r2]! @ Row 8 of transform stage 1 F7[0] stored
+ VST1.64 {d14,d15},[r2] @ Row 8 of transform stage 1 F7[1] stored
+
+ @ Transform stage 2 (for rows 1-4 of transform stage 1)
+ @ Transposing the 4 rows (F0, F1, F2, F3)
+ @ F0 = {q2,q12}, F1 = {q8,q1}, F2 = {q15,q14} and F3 = {q13,q11}
+
+ VTRN.32 q12,q1 @ Transposing first half of transform stage 1 (1a)
+ VTRN.32 q14,q11 @ Transposing first half of transform stage 1 (1b)
+ VSWP d25,d28 @ Transposing first half of transform stage 1 (2a)
+ VSWP d22,d3 @ Transposing first half of transform stage 1 (2b)
+
+ VTRN.32 q2,q8 @ Transposing first half of transform stage 1 (3a)
+ VTRN.32 q15,q13 @ Transposing first half of transform stage 1 (3b)
+ VSWP d5,d30 @ Transposing first half of transform stage 1 (4a)
+ VSWP d26,d17 @ Transposing first half of transform stage 1 (4b)
+ @ B0:q12, B1:q1, B2:q14, B3:q11, B4:q2, B5:q8, B6:q15 and B7:q13
+
+ @ Evaluating first step in Butterfly diagram
+
+ VADD.S32 q0,q12,q13 @ q0 = B0 + B7
+ VADD.S32 q5,q11,q2 @ q5 = B3 + B4
+ VADD.S32 q3,q1,q15 @ q3 = B1 + B6
+ VADD.S32 q4,q14,q8 @ q4 = B2 + B5
+
+ VSUB.S32 q7,q14,q8 @ q7 = B2 - B5
+ VSUB.S32 q8,q1,q15 @ q8 = B1 - B6
+ VSUB.S32 q6,q11,q2 @ q6 = B3 - B4
+ VSUB.S32 q9,q12,q13 @ q9 = B0 - B7
+
+ @ Calculating G0, G2, G4 and G6
+
+ MOV r4,#18
+ MOV r5,#50
+ VMOV d2,r4,r5 @ 32-bit aligned, d2[1] = 50, d2[0] = 18
+ VSUB.S32 q2,q0,q5 @ q2 = B0 - B3 - B4 + B7
+
+ MOV r4,#75
+ MOV r5,#89
+ VMOV d3,r4,r5 @ 32-bit aligned, d3[1] = 89, d3[0] = 75
+ VADD.S32 q10,q0,q5 @ q10 = B0 + B3 + B4 + B7
+
+ MOV r4,#36
+ MOV r5,#83
+ VMOV d0,r4,r5 @ 32-bit aligned, d0[1] = 83, d0[0] = 36
+ VSUB.S32 q11,q3,q4 @ q11 = B1 - B2 - B5 + B6
+ VADD.S32 q3,q3,q4 @ q3 = B1 + B2 + B5 + B6
+
+ VMUL.S32 q12,q2,d0[1] @ q12 = 83*(B0 - B3 - B4 + B7)
+ VMUL.S32 q2,q2,d0[0] @ q2 = 36*(B0 - B3 - B4 + B7)
+ VMUL.S32 q5,q9,d3[1] @ q5 = 89*(B0 - B7)
+ VADD.S32 q14,q10,q3 @ q14 = B0 + B1 + B2 + B3 + B4 + B5 + B6 + B7
+ VMUL.S32 q4,q9,d3[0] @ q4 = 75*(B0 - B7)
+ VSUB.S32 q15,q10,q3 @ q15 = B0 - B1 - B2 + B3 + B4 - B5 - B6 + B7
+@ VSHL.S32 q14,q14,#6 ; q14 = G0 = 64*(B0 + B1 + B2 + B3 + B4 + B5 + B6 + B7)
+@ VSHL.S32 q15,q15,#6 ; q15 = G4 = 64*(B0 - B1 - B2 + B3 + B4 - B5 - B6 + B7)
+
+ VMLA.S32 q12,q11,d0[0] @ q12 = G2 = 83*(B0 - B3 - B4 + B7) + 36*(B1 - B2 - B5 + B6)
+ VRSHRN.I32 d28,q14,#5 @ Truncating last 11 bits in G0
+ VMLS.S32 q2,q11,d0[1] @ q2 = G6 = 36*(B0 - B3 - B4 + B7) - 83*(B1 - B2 - B5 + B6)
+ VRSHRN.I32 d30,q15,#5 @ Truncating last 11 bits in G4
+
+ LDR r4,[sp,#80] @ r4 = dst_strd_chr_flag
+ ASR r4,r4,#16 @ r4 = dst_strd
+ LSL r4,r4,#2 @ r4 = 2*dst_strd*2
+
+ VMUL.S32 q3,q9,d2[1] @ q3 = 50*(B0 - B7)
+ VRSHRN.I32 d24,q12,#11 @ Truncating last 11 bits in G2
+ VMUL.S32 q9,q9,d2[0] @ q9 = 18*(B0 - B7)
+ VRSHRN.I32 d4,q2,#11 @ Truncating last 11 bits in G6
+
+ VMLA.S32 q5,q8,d3[0] @ q5 = 89*(B0 - B7) + 75*(B1 - B6)
+ VST1.64 d28,[r3],r4 @ First half-row of row 1 of transform stage 2 (G0) stored
+ VMLS.S32 q4,q8,d2[0] @ q4 = 75*(B0 - B7) - 18*(B1 - B6)
+
+ VMLS.S32 q3,q8,d3[1] @ q3 = 50*(B0 - B7) - 89*(B1 - B6)
+ VST1.64 d24,[r3],r4 @ First half-row of row 3 of transform stage 2 (G2) stored
+ VMLS.S32 q9,q8,d2[1] @ q9 = 18*(B0 - B7) - 50*(B1 - B6)
+
+ VMLA.S32 q5,q7,d2[1] @ q5 = 89*(B0 - B7) + 75*(B1 - B6) + 50*(B2 - B5)
+ VST1.64 d30,[r3],r4 @ First half-row of row 5 of transform stage 2 (G4) stored
+ VMLS.S32 q4,q7,d3[1] @ q4 = 75*(B0 - B7) - 18*(B1 - B6) - 89*(B2 - B5)
+
+ VMLA.S32 q3,q7,d2[0] @ q3 = 50*(B0 - B7) - 89*(B1 - B6) + 18*(B2 - B5)
+ VST1.64 d4,[r3] @ First half-row of row 7 of transform stage 2 (G6) stored
+ VMLA.S32 q9,q7,d3[0] @ q9 = 18*(B0 - B7) - 50*(B1 - B6) + 75*(B2 - B5)
+
+ VMLA.S32 q5,q6,d2[0] @ q5 = G1 = 89*(B0 - B7) + 75*(B1 - B6) + 50*(B2 - B5) + 18*(B3 - B4)
+ VMLS.S32 q4,q6,d2[1] @ q4 = G3 = 75*(B0 - B7) - 18*(B1 - B6) - 89*(B2 - B5) - 50*(B3 - B4)
+ VMLA.S32 q3,q6,d3[0] @ q3 = G5 = 50*(B0 - B7) - 89*(B1 - B6) + 18*(B2 - B5) + 75*(B3 - B4)
+ VMLS.S32 q9,q6,d3[1] @ q9 = G7 = 18*(B0 - B7) - 50*(B1 - B6) + 75*(B2 - B5) - 89*(B3 - B4)
+
+ SUB r3,r3,r4,LSL #1
+ SUB r3,r3,r4,ASR #1 @ r3 = r3 - 5*dst_strd*2
+ @ r3 is moved from row 7 to row 2
+ VRSHRN.I32 d10,q5,#11 @ Truncating last 11 bits in G1
+ VRSHRN.I32 d8,q4,#11 @ Truncating last 11 bits in G3
+ VRSHRN.I32 d6,q3,#11 @ Truncating last 11 bits in G5
+ VST1.64 d10,[r3],r4 @ First half-row of row 2 of transform stage 2 (G1) stored
+ VRSHRN.I32 d18,q9,#11 @ Truncating last 11 bits in G7
+
+ VST1.64 d8,[r3],r4 @ First half-row of row 4 of transform stage 2 (G3) stored
+ VST1.64 d6,[r3],r4 @ First half-row of row 6 of transform stage 2 (G5) stored
+ VST1.64 d18,[r3]! @ First half-row of row 8 of transform stage 2 (G7) stored
+
+ @ Transform stage 2 (for rows 5-8 of transform stage 1)
+ @ Loading the 4 rows (F4, F5, F6, F7)
+
+ SUB r2,r2,#112 @ r2 jumps from row 8 to row 5 in temporary memory
+ VLD1.64 {d20,d21},[r2]! @ q10 = F4[0]
+ VLD1.64 {d22,d23},[r2]! @ q11 = F4[1]
+ VLD1.64 {d8,d9},[r2]! @ q4 = F5[0]
+ @ Transposing the 4 rows
+ @ F0 = {q11,q10}, F1 = {q5,q4}, F2 = {q3,q2} and F3 = {q13,q12}
+
+ VTRN.32 q10,q4 @ Transposing second half of transform stage 1 (1a)
+ VLD1.64 {d10,d11},[r2]! @ q5 = F5[1]
+ VLD1.64 {d4,d5},[r2]! @ q2 = F6[0]
+ VLD1.64 {d6,d7},[r2]! @ q3 = F6[1]
+ VLD1.64 {d24,d25},[r2]! @ q12 = F7[0]
+ VTRN.32 q2,q12 @ Transposing second half of transform stage 1 (1b)
+ VLD1.64 {d26,d27},[r2] @ q13 = F7[1]
+
+ VSWP d21,d4 @ Transposing second half of transform stage 1 (2a)
+ VSWP d24,d9 @ Transposing second half of transform stage 1 (2b)
+
+ VTRN.32 q11,q5 @ Transposing second half of transform stage 1 (3a)
+ VTRN.32 q3,q13 @ Transposing second half of transform stage 1 (3b)
+ VSWP d26,d11 @ Transposing second half of transform stage 1 (4b)
+ VSWP d23,d6 @ Transposing second half of transform stage 1 (4a)
+ @ B0:q10, B1:q4, B2:q2, B3:q12, B4:q11, B5:q5, B6:q3 and B7:q13
+
+ @ Evaluating first step in Butterfly diagram
+
+ VADD.S32 q0,q10,q13 @ q0 = B0 + B7
+ VADD.S32 q15,q12,q11 @ q15 = B3 + B4
+ VADD.S32 q1,q4,q3 @ q1 = B1 + B6
+ VADD.S32 q14,q2,q5 @ q14 = B2 + B5
+
+ VSUB.S32 q9,q10,q13 @ q9 = B0 - B7
+ VSUB.S32 q6,q12,q11 @ q6 = B3 - B4
+ VSUB.S32 q7,q2,q5 @ q7 = B2 - B5
+ VSUB.S32 q8,q4,q3 @ q8 = B1 - B6
+
+ @ Calculating H0, H2, H4 and H6
+
+ VADD.S32 q3,q1,q14 @ q3 = B1 + B2 + B5 + B6
+ VSUB.S32 q5,q1,q14 @ q5 = B1 - B2 - B5 + B6
+
+ MOV r4,#18
+ MOV r5,#50
+ VSUB.S32 q4,q0,q15 @ q4 = B0 - B3 - B4 + B7
+ VMOV d2,r4,r5 @ 32-bit aligned, d2[1] = 50, d2[0] = 18
+
+ MOV r4,#75
+ MOV r5,#89
+ VADD.S32 q2,q0,q15 @ q2 = B0 + B3 + B4 + B7
+ VMOV d3,r4,r5 @ 32-bit aligned, d3[1] = 89, d3[0] = 75
+
+ MOV r4,#36
+ MOV r5,#83
+
+ @ Calculating H1, H3, H5 and H7
+
+ VMUL.S32 q10,q9,d3[1] @ q10 = 89*(B0 - B7)
+ VMOV d0,r4,r5 @ 32-bit aligned, d0[1] = 83, d0[0] = 36
+
+ VMUL.S32 q13,q9,d3[0] @ q13 = 75*(B0 - B7)
+
+ VMUL.S32 q12,q4,d0[1] @ q12 = 83*(B0 - B3 - B4 + B7)
+ VADD.S32 q14,q2,q3 @ q14 = B0 + B1 + B2 + B3 + B4 + B5 + B6 + B7
+ VMUL.S32 q4,q4,d0[0] @ q4 = 36*(B0 - B3 - B4 + B7)
+ VSUB.S32 q2,q2,q3 @ q2 = B0 - B1 - B2 + B3 + B4 - B5 - B6 + B7
+
+
+ VMLA.S32 q12,q5,d0[0] @ q12 = H2 = 83*(B0 - B3 - B4 + B7) + 36*(B1 - B2 - B5 + B6)
+@ VSHL.S32 q14,q14,#6 ; q14 = H0 = 64*(B0 + B1 + B2 + B3 + B4 + B5 + B6 + B7)
+ VMLS.S32 q4,q5,d0[1] @ q4 = H6 = 36*(B0 - B3 - B4 + B7) - 83*(B1 - B2 - B5 + B6)
+@ VSHL.S32 q2,q15,#6 ; q2 = H4 = 64*(B0 - B1 - B2 + B3 + B4 - B5 - B6 + B7)
+
+ VMUL.S32 q11,q9,d2[1] @ q11 = 50*(B0 - B7)
+ VRSHRN.I32 d28,q14,#5 @ Truncating last 11 bits in H0
+ VMUL.S32 q9,q9,d2[0] @ q9 = 18*(B0 - B7)
+ VRSHRN.I32 d24,q12,#11 @ Truncating last 11 bits in H2
+
+ VMLA.S32 q10,q8,d3[0] @ q10 = 89*(B0 - B7) + 75*(B1 - B6)
+ VRSHRN.I32 d4,q2,#5 @ Truncating last 11 bits in H4
+ VMLS.S32 q13,q8,d2[0] @ q13 = 75*(B0 - B7) - 18*(B1 - B6)
+ VRSHRN.I32 d8,q4,#11 @ Truncating last 11 bits in H6
+
+ LDR r4,[sp,#80] @ r4 = dst_strd_chr_flag
+ ASR r4,r4,#16 @ r4 = dst_strd
+ LSL r4,r4,#2 @ r4 = 2*dst_strd*2
+
+ SUB r3,r3,r4,LSL #2
+ ADD r3,r3,r4,ASR #1 @ r3 = r3 - 7*dst_strd*2
+ @ r3 is moved from row 8 to row 1
+ VMLS.S32 q11,q8,d3[1] @ q11 = 50*(B0 - B7) - 89*(B1 - B6)
+ VST1.64 d28,[r3],r4 @ Second half-row of row 1 of transform stage 2 (H0) stored
+ VMLS.S32 q9,q8,d2[1] @ q9 = 18*(B0 - B7) - 50*(B1 - B6)
+
+ VMLA.S32 q10,q7,d2[1] @ q10 = 89*(B0 - B7) + 75*(B1 - B6) + 50*(B2 - B5)
+ VST1.64 d24,[r3],r4 @ Second half-row of row 3 of transform stage 2 (H2) stored
+ VMLS.S32 q13,q7,d3[1] @ q13 = 75*(B0 - B7) - 18*(B1 - B6) - 89*(B2 - B5)
+
+ VMLA.S32 q11,q7,d2[0] @ q11 = 50*(B0 - B7) - 89*(B1 - B6) + 18*(B2 - B5)
+ VST1.64 d4,[r3],r4 @ Second half-row of row 5 of transform stage 2 (H4) stored
+ VMLA.S32 q9,q7,d3[0] @ q9 = 18*(B0 - B7) - 50*(B1 - B6) + 75*(B2 - B5)
+
+ VMLA.S32 q10,q6,d2[0] @ q10 = H1 = 89*(B0 - B7) + 75*(B1 - B6) + 50*(B2 - B5) + 18*(B3 - B4)
+ VST1.64 d8,[r3] @ Second half-row of row 7 of transform stage 2 (H6) stored
+ VMLS.S32 q13,q6,d2[1] @ q13 = H3 = 75*(B0 - B7) - 18*(B1 - B6) - 89*(B2 - B5) - 50*(B3 - B4)
+
+ VMLA.S32 q11,q6,d3[0] @ q11 = H5 = 50*(B0 - B7) - 89*(B1 - B6) + 18*(B2 - B5) + 75*(B3 - B4)
+ VMLS.S32 q9,q6,d3[1] @ q9 = H7 = 18*(B0 - B7) - 50*(B1 - B6) + 75*(B2 - B5) - 89*(B3 - B4)
+
+ SUB r3,r3,r4,LSL #1
+ SUB r3,r3,r4,ASR #1 @ r3 = r3 - 5*dst_strd
+ @ r3 is moved from row 7 to row 2
+ VRSHRN.I32 d20,q10,#11 @ Truncating last 11 bits in H1
+ VRSHRN.I32 d26,q13,#11 @ Truncating last 11 bits in H3
+ VRSHRN.I32 d22,q11,#11 @ Truncating last 11 bits in H5
+ VST1.64 d20,[r3],r4 @ Second half-row of row 2 of transform stage 2 (H1) stored
+ VRSHRN.I32 d18,q9,#11 @ Truncating last 11 bits in H7
+
+ VST1.64 d26,[r3],r4 @ Second half-row of row 4 of transform stage 2 (H3) stored
+ VST1.64 d22,[r3],r4 @ Second half-row of row 6 of transform stage 2 (H5) stored
+ VST1.64 d18,[r3] @ Second half-row of row 8 of transform stage 2 (H7) stored
+
+ vpop {d8 - d15}
+ POP {r4,r5}
+ MOV pc,lr
+
+@/**
+@*/ *******************************************************************************
+@*/
+@*/@brief
+@*/ This function performs residue calculation and forward transform on
+@*/ input pixels
+@*/
+@*/@par Description:
+@*/ Performs residue calculation by subtracting source and prediction and
+@*/ followed by forward transform
+@*/
+@*/ @param[in] pu1_src
+@*/ Input 16x16 pixels
+@*/
+@*/ @param[in] pu1_pred
+@*/ Prediction data
+@*/
+@*/ @param[in] pi2_tmp
+@*/ Temporary buffer of size 16x16
+@*/
+@*/ @param[out] pi2_dst
+@*/ Output 16x16 coefficients
+@*/
+@*/ @param[in] src_strd
+@*/ Input stride
+@*/
+@*/ @param[in] pred_strd
+@*/ Prediction Stride
+@*/
+@*/ @param[in] dst_strd_chr_flag
+@*/ Output Stride and Chroma Flag packed in the MS and LS 16-bit
+@*/
+@*/ @returns Void
+@*/
+@*/ @remarks
+@*/ None
+@*/
+@*/*******************************************************************************
+@*/
+
+.extern g_ai2_ihevc_trans_16
+.extern g_ai4_ihevc_trans_16
+
+g_ai2_ihevc_trans_16_addr_1:
+.long g_ai2_ihevc_trans_16 - ulbl1 - 8
+
+g_ai2_ihevc_trans_16_addr_2:
+.long g_ai2_ihevc_trans_16 - ulbl2 - 8
+
+g_ai4_ihevc_trans_16_addr:
+.long g_ai4_ihevc_trans_16 - ulbl3 - 8
+
+ .global ihevc_resi_trans_16x16_a9q
+
+ihevc_resi_trans_16x16_a9q:
+
+.equ TMP_STRIDE , 64 @16*4, Stride of tmp register
+.equ SHIFT , 13 @shift = 13; // log2(iWidth) - 1 + g_uiBitIncrement
+.equ RADD , 4096 @1 << (shift - 1);
+
+.equ COFF_STD_2B , 32 @Stride for g_ai2_ihevc_trans_16 in bytes
+.equ COFF_STD_W , 32 @Stride for g_ai4_ihevc_trans_16 in bytes
+
+@;LOAD the fucntion
+ STMFD SP!,{r4-r12,LR} @stack store values of the arguments
+ vpush {d8 - d15}
+ SUB SP,SP,#32
+
+ LDR R4,[SP,#136] @get src_strd
+ LDR R5,[SP,#140] @get pred_strd
+ LDR R6,[SP,#144] @get dst_strd_chr_flag
+
+ MOV R8,#0 @Set loop counter
+ LDR R9,g_ai2_ihevc_trans_16_addr_1 @get 16 bit transform matrix
+ulbl1:
+ ADD R9, R9, PC
+ @Read [0 0] [4 0] [8 0] [12 0],[0 1] [4 1] [8 1] [12 1] values of g_ai2_ihevc_trans_16
+ @and write to stack
+ MOV R12,#COFF_STD_2B
+ LSL R12,#2
+
+ VLD1.S32 D30[0],[R9],R12
+ VLD1.S32 D30[1],[R9],R12
+ VLD1.S32 D31[0],[R9],R12
+ VLD1.S32 D31[1],[R9],R12
+
+ VTRN.S32 D30,D31
+ VTRN.S16 D30,D31
+ VST1.S16 {d30,d31},[SP]
+
+ LDR R9,g_ai2_ihevc_trans_16_addr_2 @get back 16 bit transform matrix
+ulbl2:
+ ADD R9, R9, PC
+
+ MOV R7,#TMP_STRIDE
+ AND R14,R6,#0x1
+
+ VMOV.S32 Q14,#0
+
+@R0 pu1_src
+@R1 pu1_pred
+@R2 pi4_tmp
+@R3 pi2_dst
+@R4 src_strd
+@R5 pred_strd
+@R6 dst_strd_chr_flag
+@R7 tmp_dst Nx4 block stride
+@R8 loop cntr
+@R9 g_ai2_ihevc_trans_16
+@R10 tmp_dst Nx4 block offset
+@R11 tmp register
+@R12 ------
+@R14 ------.
+@q14 shift 32 bit
+@q15 add 32 bit
+
+CORE_LOOP_16X16_HORIZ:
+
+ CMP R14,#1
+ BEQ INTERLEAVED_LOAD_S1
+
+ VLD1.U8 {d0,d1},[R0],R4 @LOAD 1-16 src row 1
+ VLD1.U8 {d2,d3},[R1],R5 @LOAD 1-16 pred row 1
+ VLD1.U8 {d4,d5},[R0],R4 @LOAD 1-16 src row 2
+ VLD1.U8 {d6,d7},[R1],R5 @LOAD 1-16 pred row 2
+ B LOAD_DONE
+
+INTERLEAVED_LOAD_S1:
+
+ VLD2.U8 {Q0,Q1},[R0],R4 @LOAD 1-16 src row 1
+ VLD2.U8 {Q1,Q2},[R1],R5 @LOAD 1-16 pred row 1
+ VLD2.U8 {Q2,Q3},[R0],R4 @LOAD 1-16 src row 2
+ VLD2.U8 {Q3,Q4},[R1],R5 @LOAD 1-16 pred row 2
+LOAD_DONE:
+
+ VSUBL.U8 Q4,D0,D2 @Get residue 1-8 row 1
+ VSUBL.U8 Q5,D1,D3 @Get residue 9-16 row 1
+ VSUBL.U8 Q6,D4,D6 @Get residue 1-8 row 2
+ VSUBL.U8 Q7,D5,D7 @Get residue 9-16 row 2
+
+ @Get blk sads
+ VABDL.U8 Q15,D0,D2
+ VABAL.U8 Q15,D1,D3
+ VABAL.U8 Q15,D4,D6
+ VABAL.U8 Q15,D5,D7
+ VADDW.S16 Q14,Q14,D30
+ VADDW.S16 Q14,Q14,D31
+
+ VREV64.S16 Q5,Q5 @Rev row 1
+ VREV64.S16 Q7,Q7 @Rev row 2
+ VSWP D10,D11
+ VSWP D14,D15
+
+ VADD.S16 Q8 ,Q4,Q5 @e[k] = resi_tmp_1 + resi_tmp_2 k -> 1-8 row 1
+ VSUB.S16 Q9 ,Q4,Q5 @o[k] = resi_tmp_1 - resi_tmp_2 k ->9-16 row 1
+ VADD.S16 Q10,Q6,Q7 @e[k] = resi_tmp_1 + resi_tmp_2 k -> 1-8 row 2
+ VSUB.S16 Q11,Q6,Q7 @o[k] = resi_tmp_1 - resi_tmp_2 k ->9-16 row 2
+
+ VREV64.S16 D24,D17 @rev e[k] k-> 4-7 row 1
+ VREV64.S16 D25,D21 @rev e[k] k-> 4-7 row 2
+ VMOV.S16 D17,D20
+
+ @arrangement OF DATA
+ @Q8 A1 A2 A3 A4 B1 B2 B3 B4
+ @Q12 A8 A7 A6 A5 B8 B7 B6 B5
+
+ VADD.S16 Q13,Q8,Q12 @ee[k] = e[k] + e[7 - k] row 1 & 2
+ VSUB.S16 Q0,Q8,Q12 @eo[k] = e[k] - e[7 - k] row 1 & 2
+
+ @D26 R1ee[0] R1ee[1] R1ee[2] R1ee[3]
+ @D27 R2ee[0] R2ee[1] R2ee[2] R2ee[3]
+ VTRN.S32 D26,D27 @1-cycle stall before it?
+ @D26 R1ee[0] R1ee[1] R2ee[0] R2ee[1]
+ @D27 R1ee[2] R1ee[3] R2ee[2] R2ee[3]
+ VREV32.16 D2,D27 @1-cycle stall before it?
+ @D26 R1ee[0] R1ee[1] R2ee[0] R2ee[1]
+ @D2 R1ee[3] R1ee[2] R2ee[3] R2ee[2]
+ VMOV.S16 D27,D26
+ VNEG.S16 D3,D2
+ @Q13 R1ee[0] R1ee[1] R2ee[0] R2ee[1] R1ee[0] R1ee[1] R2ee[0] R2ee[1]
+ @Q1 R1ee[3] R1ee[2] R2ee[3] R2ee[2] -R1ee[3] -R1ee[2] -R2ee[3] -R2ee[2]
+
+ @D8 : [0 0] [4 0] [8 0] [12 0]
+ @D9 : [0 1] [4 1] [8 1] [12 1]
+ VLD1.S16 {d8,d9},[SP] @[0 0] [4 0] [8 0] [12 0] [0 1] [4 1] [8 1] [12 1]
+ VADD.S16 Q1,Q13,Q1 @ 1-cycle stall before it?
+ @Q15 R1eee[0] R1eee[1] R2eee[0] R2eee[1] R1eeo[0] R1eeo[1] R2eeo[0] R2eeo[1]
+
+ @Q1 R1eee[0] R1eee[1] R2eee[0] R2eee[1]
+ @ R1eeo[0] R1eeo[1] R2eeo[0] R2eeo[1]
+ VTRN.S16 D2,D3 @2-cycle stall before it?
+ @Q1 R1eee[0] R1eeo[0] R2eee[0] R2eeo[0]
+ @ R1eee[1] R1eeo[1] R2eee[1] R2eeo[1]
+
+ VDUP.S32 D4,D2[0] @R1eee[0] R1eeo[0] R1eee[0] R1eeo[0] ;1-cycle stall?
+ VDUP.S32 D5,D2[1] @R2eee[0] R2eeo[0] R2eee[0] R2eeo[0]
+ VDUP.S32 D6,D3[0] @R1eee[1] R1eeo[1] R1eee[1] R1eeo[1]
+ VDUP.S32 D7,D3[1] @R2eee[1] R2eeo[1] R2eee[1] R2eeo[1]
+
+ @---------------Process EO--------------------
+ @ Early start to avoid stalls
+ MOV R12,#COFF_STD_2B @Get stride of coeffs
+
+ VMULL.S16 Q5,D4,D8 @ g_ai2_ihevc_trans_16 * R1eee[0] R1eeo[0] R1eee[0] R1eeo[0]
+ VMLAL.S16 Q5,D6,D9 @ + g_ai2_ihevc_trans_16 * R1eee[1] R1eeo[1] R1eee[1] R1eeo[1]
+ VMULL.S16 Q6,D5,D8 @ g_ai2_ihevc_trans_16 * R2eee[0] R2eeo[0] R2eee[0] R2eeo[0]
+ VMLAL.S16 Q6,D7,D9 @ + g_ai2_ihevc_trans_16 * R2eee[1] R2eeo[1] R2eee[1] R2eeo[1]
+
+ ADD R11,R9,R12,LSL #1 @Load address of g_ai2_ihevc_trans_16[2]
+ LSL R12,R12,#2
+
+ VLD1.S16 D26,[R11],R12 @LOAD g_ai2_ihevc_trans_16[2][0-4]]
+
+ VLD1.S16 D27,[R11],R12 @LOAD g_ai2_ihevc_trans_16[6][0-4]
+ VMULL.S16 Q1,D26,D0 @g_ai2_ihevc_trans_16[2][0-4] * eo[0-4] R1
+
+ VMULL.S16 Q2,D26,D1 @g_ai2_ihevc_trans_16[2][0-4] * eo[0-4] R2
+
+ VZIP.S32 Q5,Q6 @3-cycle instruction
+ VMULL.S16 Q3,D27,D0 @g_ai2_ihevc_trans_16[6][0-4] * eo[0-4] R1
+
+
+ VLD1.S16 D26,[R11],R12 @LOAD g_ai2_ihevc_trans_16[10][0-4]
+ VMULL.S16 Q4,D27,D1 @g_ai2_ihevc_trans_16[6][0-4] * eo[0-4] R2
+
+ @These values must go to 0 4 8 12 colums hence we need stride *4
+ LSL R10,R7,#2
+
+ VLD1.S16 D27,[R11],R12 @LOAD g_ai2_ihevc_trans_16[14][0-4]
+
+ VST1.32 D10,[R2],R10
+ VMULL.S16 Q8,D27,D1 @g_ai2_ihevc_trans_16[14][0-4] * eo[0-4] R2
+
+ VST1.32 D11,[R2],R10
+ VMULL.S16 Q7,D27,D0 @g_ai2_ihevc_trans_16[14][0-4] * eo[0-4] R1
+
+ VST1.32 D12,[R2],R10
+ VMULL.S16 Q5,D26,D0 @g_ai2_ihevc_trans_16[10][0-4] * eo[0-4] R1
+
+ VST1.32 D13,[R2],R10
+ VMULL.S16 Q6,D26,D1 @g_ai2_ihevc_trans_16[10][0-4] * eo[0-4] R2
+
+ SUB R2,R2,R10,LSL #2
+
+ @transpose the 4x4 matrix row1
+ VTRN.32 Q1, Q3 @R1 transpose1 -- 2 cycles
+
+ @transpose the 4x4 matrix row2
+ VTRN.32 Q2,Q4 @R2 transpose1 -- 2 cycles
+
+ VTRN.32 Q5, Q7 @R1 transpose1 -- 2 cycles
+
+ VTRN.32 Q6,Q8 @R2 transpose1 -- 2 cycles
+
+ VSWP D10,D3 @R1 transpose2
+ VSWP D14,D7 @R1 transpose2
+
+ VSWP D12,D5 @R2 transpose2
+ VSWP D16,D9 @R2 transpose2
+
+ VADD.S32 Q5,Q5,Q1 @R1 add
+ VADD.S32 Q3,Q3,Q7 @R1 add
+
+ VADD.S32 Q2,Q2,Q4 @R2 add
+ VADD.S32 Q6,Q6,Q8 @R2 add
+
+ VADD.S32 Q5,Q5,Q3 @R1 add
+
+ VADD.S32 Q4,Q6,Q2 @R2 add
+
+ @-----------------------Processing O ----------------------------
+ @ Early start to avoid stalls
+ MOV R12,#COFF_STD_2B @Get coeffs stride
+ LSL R12,R12,#1
+ ADD R11,R9,#COFF_STD_2B @Get address of g_ai2_ihevc_trans_16[1]
+
+ VLD1.S16 {d4,d5},[R11],R12 @g_ai2_ihevc_trans_16[1][0-7] -- 2 cycles
+
+ VZIP.S32 Q5,Q4 @ 3 cycle instruction
+ VMULL.S16 Q6,D18,D4 @o[0][0-3]* R1
+
+
+ VMLAL.S16 Q6,D19,D5 @o[0][4-7]* R1 ; follows MULL instruction: Multiplier accumulator forwarding
+ @write to memory
+ @this should go to 2 6 10 14
+ LSL R10,R7,#2
+ ADD R2,R2,R7,LSL #1 @move to third row
+ VST1.32 D10,[R2],R10
+ VMULL.S16 Q7,D22,D4 @o[0][0-3]* R2
+
+ VST1.32 D11,[R2],R10
+ VMLAL.S16 Q7,D23,D5 @o[0][4-7]* R2
+
+ VLD1.S16 {d4,d5},[R11],R12 @g_ai2_ihevc_trans_16[3][0-7]
+
+ VST1.32 D8,[R2],R10
+ VMULL.S16 Q8,D18,D4 @o[1][0-3]* R1
+
+ VST1.32 D9,[R2],R10
+ VMLAL.S16 Q8,D19,D5 @o[1][4-7]* R1
+ SUB R2,R2,R10,LSL #2
+ SUB R2,R2,R7,LSL #1
+
+ @--------------------Done procrssing EO -------------------------
+
+ @ -----------------Processing O continues------------------------
+
+ VMULL.S16 Q10,D22,D4 @o[1][0-3]* R2
+ VMLAL.S16 Q10,D23,D5 @o[1][4-7]* R2
+
+ VLD1.S16 {d4,d5},[R11],R12 @g_ai2_ihevc_trans_16[5][0-7]
+
+ VLD1.S16 {d6,d7},[R11],R12 @g_ai2_ihevc_trans_16[7][0-7]
+ VMULL.S16 Q12,D18,D4 @o[2][0-3]* R1
+
+ VMLAL.S16 Q12,D19,D5 @o[2][4-7]* R1
+ VMULL.S16 Q0,D18,D6 @o[3][0-3]* R1
+ VMLAL.S16 Q0,D19,D7 @o[3][4-7]* R1
+
+ VMULL.S16 Q13,D22,D4 @o[2][0-3]* R2
+ VMLAL.S16 Q13,D23,D5 @o[2][4-7]* R2
+ VMULL.S16 Q1,D22,D6 @o[3][0-3]* R2
+ VMLAL.S16 Q1,D23,D7 @o[3][4-7]* R2
+
+ @transpose the 4x4 matrix R1
+ VTRN.32 Q6, Q8 @ 2-cycle instruction
+
+ VTRN.32 Q12,Q0 @ 2-cycle instruction
+
+ @transpose the 4x4 matrix R2
+ VTRN.32 Q7,Q10 @ 2-cycle instruction
+
+ VTRN.32 Q13,Q1 @ 2-cycle instruction
+
+ VSWP D24,D13
+ VSWP D0, D17
+
+ VSWP D26,D15
+ VSWP D2,D21
+
+ VADD.S32 Q8 ,Q8 ,Q6
+ VADD.S32 Q12,Q12,Q0
+
+ VADD.S32 Q10,Q10,Q7
+ VADD.S32 Q13,Q13,Q1
+
+ VLD1.S16 {d4,d5},[R11],R12 @g_ai2_ihevc_trans_16[9][0-7]
+ VADD.S32 Q12 ,Q12 ,Q8
+
+ VADD.S32 Q13,Q13,Q10
+ VMULL.S16 Q3,D18,D4 @o[4][0-3]* R1
+ VMLAL.S16 Q3,D19,D5 @o[4][4-7]* R1
+
+ VZIP.S32 Q12,Q13
+ VMULL.S16 Q4,D22,D4 @o[0][0-3]* R2
+
+
+ VMLAL.S16 Q4,D23,D5 @o[0][4-7]* R2
+ @write to memory
+ @this should go to 1 3 5 7
+ ADD R2,R2,R7
+ LSL R7,R7,#1
+ VLD1.S16 {d4,d5},[R11],R12 @g_ai2_ihevc_trans_16[11][0-7]
+
+ VST1.32 D24,[R2],R7
+ VMULL.S16 Q5,D18,D4 @o[5][0-3]* R1
+
+ VST1.32 D25,[R2],R7
+ VMLAL.S16 Q5,D19,D5 @o[5][4-7]* R1
+
+ VST1.32 D26,[R2],R7
+ VMULL.S16 Q6,D22,D4 @o[0][0-3]* R2
+
+ VST1.32 D27,[R2],R7
+ VMLAL.S16 Q6,D23,D5 @o[0][4-7]* R2
+
+ VLD1.S16 {d4,d5},[R11],R12 @g_ai2_ihevc_trans_16[13][0-7]
+
+ VLD1.S16 {d2,d3},[R11],R12 @g_ai2_ihevc_trans_16[15][0-7]
+ VMULL.S16 Q7,D18,D4 @o[6][0-3]* R1
+
+ VMLAL.S16 Q7,D19,D5 @o[6][4-7]* R1
+ VMULL.S16 Q10,D18,D2 @o[7][0-3]* R1
+ VMLAL.S16 Q10,D19,D3 @o[7][4-7]* R1
+
+ VMULL.S16 Q8,D22,D4 @o[0][0-3]* R2
+ VMLAL.S16 Q8,D23,D5 @o[0][4-7]* R2
+ VMULL.S16 Q12,D22,D2 @o[0][0-3]* R2
+ VMLAL.S16 Q12,D23,D3 @o[0][4-7]* R2
+
+
+ @transpose the 4x4 matrix R1
+ VTRN.32 Q3 ,Q5 @ 2-cycle instruction
+
+ VTRN.32 Q7 ,Q10 @ transpose step 2 R1 , 2-cycle instruction
+
+ @transpose the 4x4 matrix R2
+ VTRN.32 Q4 ,Q6 @ 2-cycle instruction
+
+ VTRN.32 Q8 ,Q12 @ transpose step 2 R2 , 2-cycle instruction
+
+ VSWP D14,D7 @ transpose step 3, R1
+ VSWP D20,D11 @ transpose step 4, R1
+ VSWP D16,D9 @ transpose step 3, R2
+ VSWP D24,D13 @ transpose step 4, R2
+
+ VADD.S32 Q5 ,Q5 ,Q3
+ VADD.S32 Q10,Q10,Q7
+ VADD.S32 Q6 ,Q6 ,Q4
+ VADD.S32 Q12,Q12,Q8
+ VADD.S32 Q10,Q10,Q5
+ VADD.S32 Q12,Q12,Q6
+
+ @ 2-cycle stall
+ VZIP.S32 Q10,Q12 @ 3-cycle instruction
+
+ @ 2-cycle stall
+ @this should go to 9 11 13 15
+ VST1.32 D20,[R2],R7
+
+ VST1.32 D21,[R2],R7
+
+ VST1.32 D24,[R2],R7
+
+ VST1.32 D25,[R2],R7
+
+ SUB R2,R2,R7,LSL #3
+ LSR R7,R7,#1
+ SUB R2,R2,R7
+
+ ADD R2,R2,#8 @MOVE TO NEXT to next COLUMN - pi4_tmp
+
+ ADD R8,R8,#2 @increment loop cntr
+ CMP R8,#16 @check lllop cntr
+ BNE CORE_LOOP_16X16_HORIZ @jump acc
+
+
+@*****************Vertical transform************************************
+
+@Initialization for vert transform
+@pi4_tmp will be the new src
+@tmp stride will be new src stride
+@dst will be new pi4_tmp
+@dst stride will be new tmp stride
+@trans table will be of 32 bit
+
+ LDR R9,g_ai4_ihevc_trans_16_addr @get 32 bit transform matrix
+ulbl3:
+ ADD R9, R9, PC
+
+ SUB R0,R2,#64 @set tmp as src [-32 to move back to orgin]
+ MOV R2,R3 @set dst as tmp
+ MOV R4,#TMP_STRIDE @set tmp stride as src stride
+ LSR R7,R6,#15 @Set dst stride as tmp stride
+ SUB R4,#48 @Adjust stride 3 previous loads
+
+ @Block SAD
+ VADD.S32 D28,D28,D29
+ VPADD.S32 D28,D28,D29
+ VMOV.S32 R3,D28[0]
+ @ SAD calculation ends -- final value in R3.
+
+ @Read [0 0] [4 0] [8 0] [12 0],[0 1] [4 1] [8 1] [12 1]
+ @values of g_ai4_ihevc_trans_16 and write to stack
+ MOV R12,#COFF_STD_W
+ LSL R12,R12,#2
+ VLD1.S32 D28,[R9],R12
+ VLD1.S32 D29,[R9],R12
+ VLD1.S32 D30,[R9],R12
+ VLD1.S32 D31,[R9],R12
+ SUB R9,R9,R12,LSL #2
+
+ VREV64.32 Q15,Q15
+ VTRN.S32 Q14,Q15
+ VST1.S32 {Q14-Q15},[SP]
+
+ VMOV.U32 Q14,#RADD @get the round factor to q14
+ VMOV.U32 Q15,#SHIFT @Get the shift to neon
+
+ MOV R8,#0 @INIT LOOP
+
+CORE_LOOP_16X16_VERT:
+
+ VLD1.S32 {D0,D1},[R0]! @LOAD 1-4 src R1
+ VLD1.S32 {D2,D3},[R0]! @LOAD 5-8 pred R1
+ VLD1.S32 {D4,D5},[R0]! @LOAD 9-12 src R1
+ VLD1.S32 {D6,D7},[R0],R4 @LOAD 12-16 pred R1
+
+ VLD1.S32 {D8,D9},[R0]! @LOAD 1-4 src R2
+ VLD1.S32 {D10,D11},[R0]! @LOAD 5-8 pred R2
+ VLD1.S32 {D12,D13},[R0]! @LOAD 9-12 src R2
+ VLD1.S32 {D14,D15},[R0],R4 @LOAD 12-16 pred R2
+
+ VREV64.S32 Q2,Q2 @Rev 9-12 R1
+ VREV64.S32 Q3,Q3 @Rev 12-16 R1
+ VREV64.S32 Q6,Q6 @Rev 9-12 R2
+ VREV64.S32 Q7,Q7 @Rev 12-16 R2
+
+ VSWP D6,D7
+ VSWP D4,D5
+ VADD.S32 Q8 ,Q0,Q3 @e[k] = resi_tmp_1 + resi_tmp_2 k -> 1-4 R1
+ VSWP D12,D13 @ dual issued with prev. instruction
+ VADD.S32 Q9 ,Q1,Q2 @e[k] = resi_tmp_1 + resi_tmp_2 k -> 5-8 R1
+ VSWP D14,D15 @ dual issued with prev. instruction
+ VSUB.S32 Q10,Q0,Q3 @o[k] = resi_tmp_1 - resi_tmp_2 k -> 1-4 R1
+ VSUB.S32 Q11,Q1,Q2 @o[k] = resi_tmp_1 - resi_tmp_2 k -> 5-8 R1
+
+ VADD.S32 Q12,Q4,Q7 @e[k] = resi_tmp_1 + resi_tmp_2 k -> 1-4 R2
+ VREV64.S32 Q9 ,Q9 @rev e[k] k-> 4-7 R1, dual issued with prev. instruction
+ VADD.S32 Q13,Q5,Q6 @e[k] = resi_tmp_1 + resi_tmp_2 k -> 5-8 R2
+ VSUB.S32 Q0 ,Q4,Q7 @o[k] = resi_tmp_1 - resi_tmp_2 k -> 1-4 R2
+ VSWP D18,D19 @ dual issued with prev. instruction
+ VSUB.S32 Q1 ,Q5,Q6 @o[k] = resi_tmp_1 - resi_tmp_2 k -> 5-8 R2
+ VREV64.S32 Q13,Q13 @rev e[k] k-> 4-7 R2, dual issued with prev. instruction
+
+ VADD.S32 Q2,Q8,Q9 @ee[k] = e[k] + e[7 - k] row R1
+ VSUB.S32 Q3,Q8,Q9 @eo[k] = e[k] - e[7 - k] row R1
+ VSWP D26,D27
+
+
+ VADD.S32 Q4,Q12,Q13 @ee[k] = e[k] + e[7 - k] row R2
+ VSUB.S32 Q5,Q12,Q13 @eo[k] = e[k] - e[7 - k] row R2
+ VREV64.S32 D5,D5 @rev ee[k] 4-7 R1, dual issued with prev. instruction
+
+ VADD.S32 D12,D4,D5 @eee[0] eee[1] R1
+ VSUB.S32 D13,D4,D5 @eeo[0] eeo[1] R1
+ VREV64.S32 D9,D9 @rev ee[k] 4-7 R2, dual issued with prev. instruction
+
+
+ VADD.S32 D14,D8,D9 @eee[0] eee[1] R2
+ VSUB.S32 D15,D8,D9 @eeo[0] eeo[1] R2
+
+ VLD1.S32 {Q12,Q13},[SP] @Load g_ai2_ihevc_trans_16[xx]-> Q12 : [0 0] [8 0] [4 0] [12 0] Q13 : [0 1] [8 1] [4 1] [12 1]
+ VREV64.S32 Q8,Q6 @Q6 : eee[0] eee[1] eeo[0] eeo[1] R1 -> ;Q8 : eee[1] eee[0] eeo[1] eeo[0] R1
+
+ VREV64.S32 Q9,Q7 @Q7 : eee[0] eee[1] eeo[0] eeo[1] R2 -> ;Q9 : eee[1] eee[0] eeo[1] eeo[0] R2
+
+
+ VMUL.S32 Q4,Q6,Q12 @g_ai2_ihevc_trans_16 * eee[0] eee[1] eeo[0] eeo[1] R1
+ VMLA.S32 Q4,Q8,Q13 @g_ai2_ihevc_trans_16 * eee[1] eee[0] eeo[1] eeo[0] R1
+
+ VMUL.S32 Q6,Q7,Q12 @g_ai2_ihevc_trans_16 * eee[0] eee[1] eeo[0] eeo[1] R2
+ VMLA.S32 Q6,Q9,Q13 @g_ai2_ihevc_trans_16 * eee[1] eee[0] eeo[1] eeo[0] R2
+
+ @Q3 :R1E00 R1E01 R1E02 R1E03
+ @Q5 :R2E00 R2E01 R2E02 R2E03
+ VSWP D7,D10 @ dual issued with prev. instruction
+ @Q3 :R1E00 R1E01 R2E00 R2E01
+ @Q5 :R1E02 R1E03 R2E02 R2E03
+ VSWP D7,D11
+ @Q3 :R1E00 R1E01 R2E02 R2E03
+ @Q5 :R1E02 R1E03 R2E00 R2E01
+
+ MOV R12,#COFF_STD_W
+ ADD R11,R9,R12,LSL #1 @Get to the 2nd row of src
+ LSL R12,R12,#2
+
+ VLD1.S32 {D14,D15},[R11],R12 @LOAD g_ai2_ihevc_trans_16[2][0-4] -> 2G0 2G1 2G2 2G3, 2-cycle instr.
+
+ VADD.S32 Q4,Q4,Q14 @ROUND R1
+ VMUL.S32 Q12,Q3,Q7 @2G0 2G1 2G2 2G3 * R1E00 R1E01 R2E02 R2E03, 4-cycle instruction
+ VSWP D14,D15 @2G0 2G1 2G2 2G3 -> 2G2 2G3 2G0 2G1, dual issued with prev. instruction
+
+ VADD.S32 Q6,Q6,Q14 @ROUND R2
+
+ VSHRN.S32 D8,Q4,#SHIFT @NARROW R1
+
+ VLD1.S32 {D16,D17},[R11],R12 @LOAD g_ai2_ihevc_trans_16[6][0-4]
+ VSHRN.S32 D9,Q6,#SHIFT @NARROW R2, dual issued in 2nd cycle
+
+ VMUL.S32 Q2,Q3,Q8 @g_ai2_ihevc_trans_16[6][0-4] * eo[0-4], 4-cycle instruction
+ VSWP D16,D17 @dual issued with prev. instr.
+
+ VZIP.S16 D8,D9 @INTERLEAVE R1 R2 R1 R2 R1 R2 to write
+ VMLA.S32 Q12,Q5,Q7 @2G2 2G3 2G0 2G1 * R1E02 R1E03 R2E00 R2E01, 4-cycle instruction
+
+
+ @WRITE INTO MEM the values or wait to be shuffled
+ @These values must go to 0 4 8 12 colums
+ LSL R10,R7,#2
+ VST1.S32 D8[0],[R2],R10
+
+ VST1.S32 D9[0],[R2],R10
+
+ VST1.S32 D8[1],[R2],R10
+ VPADD.S32 D18,D24,D25 @D18[0] -> 2G0*R1E00+2G1*R1E01 2G2*R2E02+2G3*R2E03
+ @D18[1] -> 2G2*R1E02+2G3*R1E03 2G0*R2E00+*2G1R2E01
+
+ VST1.S32 D9[1],[R2],R10
+ VMLA.S32 Q2,Q5,Q8 @g_ai2_ihevc_trans_16[2][0-4] * eo[0-4]
+ LSL R10,R10,#2
+ SUB R2,R2,R10
+
+ VLD1.S32 {D14,D15},[R11],R12 @LOAD g_ai2_ihevc_trans_16[10][0-4]
+
+ VMUL.S32 Q6,Q3,Q7 @g_ai2_ihevc_trans_16[10][0-4] * eo[0-4]
+ VSWP D14,D15 @ dual issued with prev. instruction
+ VPADD.S32 D19,D4,D5
+
+ VLD1.S32 {D16,D17},[R11],R12 @LOAD g_ai2_ihevc_trans_16[14][0-4]
+ VMUL.S32 Q2,Q3,Q8 @g_ai2_ihevc_trans_16[14][0-4] * eo[0-4]
+ VSWP D16,D17
+
+ VMLA.S32 Q6,Q5,Q7 @g_ai2_ihevc_trans_16[2][0-4] * eo[0-4]
+ VADD.S32 Q9,Q9,Q14 @Round by RADD R1
+ VMLA.S32 Q2,Q5,Q8 @g_ai2_ihevc_trans_16[2][0-4] * eo[0-4]
+ VSHRN.S32 D8,Q9,#SHIFT @Shift by SHIFT
+ VPADD.S32 D24,D12,D13
+ @---------------Processing O, Row 1 and Row 2--------------------------------------
+ @ Early start to avoid stalls
+ MOV R12,#COFF_STD_W
+ ADD R11,R9,R12 @Get 1ST row
+ LSL R12,R12,#1
+
+ LSL R10,R7,#2
+ ADD R2,R2,R7,LSL #1 @move to third row
+ @this should go to 2 6 10 14
+ VST1.S32 D8[0],[R2],R10
+
+ VST1.S32 D8[1],[R2],R10
+ VPADD.S32 D25,D4,D5 @ dual issued with prev. instruction in 2nd cycle
+
+ VLD1.S32 {Q2,Q3},[R11],R12 @g_ai2_ihevc_trans_16[1][0-7]
+ VADD.S32 Q12,Q12,Q14 @Round by RADD R2, dual issued with prev. instruction in 2nd cycle
+ VMUL.S32 Q6,Q2,Q0 @g_ai2_ihevc_trans_16[1][0-3]*o[0][0-3] R2
+ VMLA.S32 Q6,Q3,Q1 @g_ai2_ihevc_trans_16[1][4-7]*o[0][4-7] R2
+ VSHRN.S32 D9,Q12,#SHIFT @Shift by SHIFT
+
+ VMUL.S32 Q2,Q2,Q10 @g_ai2_ihevc_trans_16[1][0-3]*o[0][0-3] R1
+ VMLA.S32 Q2,Q3,Q11 @g_ai2_ihevc_trans_16[1][4-7]*o[0][4-7] R1
+ VADD.S32 D11,D12,D13 @g_ai2_ihevc_trans_16[1][k]*o[0][k]+g_ai2_ihevc_trans_16[0][7-k]*o[0][7-k] R2, dual issued with prev. instr.
+ VST1.S32 D9[0],[R2],R10
+
+ VST1.S32 D9[1],[R2],R10
+ VADD.S32 D10,D4,D5 @g_ai2_ihevc_trans_16[1][k]*o[0][k]+g_ai2_ihevc_trans_16[0][7-k]*o[0][7-k] R1, dual issued with prev. instr.
+ LSL R10,R10,#2 @go back to orgin
+ SUB R2,R2,R10
+ SUB R2,R2,R7,LSL #1
+
+ VLD1.S32 {Q2,Q3},[R11],R12 @g_ai2_ihevc_trans_16[3][0-7]
+
+ VMUL.S32 Q7,Q2,Q10 @o[0][0-3]
+ VMLA.S32 Q7,Q3,Q11 @o[0][4-7]
+ VMUL.S32 Q8,Q2,Q0 @o[0][0-3]
+ VMLA.S32 Q8,Q3,Q1 @o[0][4-7]
+
+ VLD1.S32 {Q2,Q3},[R11],R12 @g_ai2_ihevc_trans_16[5][0-7]
+ VADD.S32 D18,D14,D15
+ VMUL.S32 Q12,Q2,Q10 @o[0][0-3]
+ VMLA.S32 Q12,Q3,Q11 @o[0][4-7]
+ VADD.S32 D19,D16,D17
+ VMUL.S32 Q4,Q2,Q0
+ VMLA.S32 Q4,Q3,Q1
+ VLD1.S32 {Q2,Q3},[R11],R12 @g_ai2_ihevc_trans_16[7][0-7]
+ VADD.S32 D26,D24,D25 @ dual issued with prev. instr.
+ VMUL.S32 Q6,Q2,Q10 @o[0][0-3]
+ VMLA.S32 Q6,Q3,Q11 @o[0][4-7]
+ VADD.S32 D27,D8,D9
+ VMUL.S32 Q4,Q2,Q0
+ VMLA.S32 Q4,Q3,Q1
+ VADD.S32 D12,D12,D13
+ @Q5 Q9 Q13 Q6
+ VPADD.S32 D14,D10,D11
+ VPADD.S32 D15,D18,D19
+ VPADD.S32 D16,D26,D27
+ VADD.S32 D13,D8,D9
+ VADD.S32 Q9,Q7,Q14
+ VLD1.S32 {Q2,Q3},[R11],R12 @g_ai2_ihevc_trans_16[0][0-7]
+ VPADD.S32 D17,D12,D13 @ dual issued with prev. instr. in 2nd cycle
+
+ VMUL.S32 Q4,Q2,Q10 @o[0][0-3]
+ VMLA.S32 Q4,Q3,Q11 @o[0][4-7]
+
+ VADD.S32 Q12,Q8,Q14
+
+ VMUL.S32 Q6,Q2,Q0 @o[0][0-3]
+ VMLA.S32 Q6,Q3,Q1 @o[0][4-7]
+
+ VSHRN.S32 D26,Q9,#SHIFT
+ VSHRN.S32 D27,Q12,#SHIFT
+ VADD.S32 D10,D8,D9
+ @write to memory this should go to 1 3 5 7
+ ADD R2,R2,R7
+ LSL R7,R7,#1
+ VLD1.S32 {Q2,Q3},[R11],R12 @g_ai2_ihevc_trans_16[1][0-7]
+ VADD.S32 D11,D12,D13 @ dual issued with prev. instr.
+
+ VST1.S32 D26[0],[R2],R7
+ VMUL.S32 Q7,Q2,Q10 @o[0][0-3]
+ VMLA.S32 Q7,Q3,Q11 @o[0][4-7]
+ VST1.S32 D26[1],[R2],R7
+ VMUL.S32 Q8,Q2,Q0 @o[0][0-3]
+ VMLA.S32 Q8,Q3,Q1 @o[0][4-7]
+ VST1.S32 D27[0],[R2],R7
+ VADD.S32 D18,D14,D15
+ VST1.S32 D27[1],[R2],R7
+
+ VLD1.S32 {Q2,Q3},[R11],R12 @g_ai2_ihevc_trans_16[2][0-7]
+ VADD.S32 D19,D16,D17 @ dual issued with prev. instr.
+
+ VMUL.S32 Q12,Q2,Q10 @o[0][0-3]
+ VMLA.S32 Q12,Q3,Q11 @o[0][4-7]
+ VMUL.S32 Q4,Q2,Q0
+ VMLA.S32 Q4,Q3,Q1
+
+ VLD1.S32 {Q2,Q3},[R11],R12 @g_ai2_ihevc_trans_16[3][0-7]
+ VADD.S32 D26,D24,D25
+
+ VMUL.S32 Q6,Q2,Q10 @o[0][0-3]
+ VMLA.S32 Q6,Q3,Q11 @o[0][4-7]
+ VADD.S32 D27,D8,D9
+
+ VMUL.S32 Q4,Q2,Q0
+ VMLA.S32 Q4,Q3,Q1
+ VADD.S32 D12,D12,D13
+ @Q5 Q9 Q13 Q6
+ VPADD.S32 D14,D10,D11
+ VPADD.S32 D15,D18,D19
+ VPADD.S32 D16,D26,D27
+ VADD.S32 D13,D8,D9
+ VADD.S32 Q9,Q7,Q14
+ @ 1- cycle stall?
+ VPADD.S32 D17,D12,D13
+ VSHRN.S32 D22,Q9,#SHIFT
+ VADD.S32 Q10,Q8,Q14
+ @ 2-cycle stall?
+ VSHRN.S32 D23,Q10,#SHIFT
+
+ @this should go to 9 11 13 15
+ @LSL R11,R7,#1
+ VST1.S32 D22[0],[R2],R7
+ VST1.S32 D22[1],[R2],R7
+ VST1.S32 D23[0],[R2],R7
+ VST1.S32 D23[1],[R2],R7
+
+ SUB R2,R2,R7,LSL #3
+ LSR R7,R7,#1
+ SUB R2,R2,R7
+
+ ADD R2,R2,#4 @MOVE TO NEXT to next COLUMN
+
+ ADD R8,R8,#2 @increment loop cntr by 2 since we process loop as 2 cols
+ CMP R8,#16 @check loop cntr
+ BNE CORE_LOOP_16X16_VERT @jump acc
+
+ MOV R0,R3
+
+ ADD SP,SP,#32
+ vpop {d8 - d15}
+ LDMFD sp!,{r4-r12,PC} @stack store values of the arguments
+
diff --git a/common/arm/ihevc_resi_trans_32x32_a9q.s b/common/arm/ihevc_resi_trans_32x32_a9q.s
new file mode 100644
index 0000000..9cf7486
--- /dev/null
+++ b/common/arm/ihevc_resi_trans_32x32_a9q.s
@@ -0,0 +1,1240 @@
+@/******************************************************************************
+@ *
+@ * Copyright (C) 2018 The Android Open Source Project
+@ *
+@ * Licensed under the Apache License, Version 2.0 (the "License");
+@ * you may not use this file except in compliance with the License.
+@ * You may obtain a copy of the License at:
+@ *
+@ * http://www.apache.org/licenses/LICENSE-2.0
+@ *
+@ * Unless required by applicable law or agreed to in writing, software
+@ * distributed under the License is distributed on an "AS IS" BASIS,
+@ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ * See the License for the specific language governing permissions and
+@ * limitations under the License.
+@ *
+@ *****************************************************************************
+@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+@*/
+@/**
+@ *******************************************************************************
+@ * @file
+@ * ihevc_resi_trans_32x32.s
+@ *
+@ * @brief
+@ * Contains function definitions for forward transform 32x32
+@ *
+@ * @author
+@ * Mohit
+@ *
+@ * @par List of Functions:
+@ * - ihevc_resi_trans_32x32()
+@ *
+@ * @remarks
+@ * None
+@ *
+@ *******************************************************************************
+@*/
+@*/
+.text
+.p2align 2
+
+.extern g_ai2_ihevc_trans_32
+.extern g_ai4_ihevc_trans_32
+
+g_ai2_ihevc_trans_32_addr_1:
+.long g_ai2_ihevc_trans_32 - ulbl1 - 8
+
+g_ai2_ihevc_trans_32_addr_2:
+.long g_ai2_ihevc_trans_32 - ulbl2 - 8
+
+g_ai4_ihevc_trans_32_addr:
+.long g_ai4_ihevc_trans_32 - ulbl3 - 8
+
+@*/
+@*/
+@/**
+@*/ *******************************************************************************
+@*/
+@*/@brief
+@*/ This function performs residue calculation and forward transform on
+@*/ input pixels
+@*/
+@*/@par Description:
+@*/ Performs residue calculation by subtracting source and prediction and
+@*/ followed by forward transform
+@*/
+@*/ @param[in] pu1_src
+@*/ Input 32x32 pixels
+@*/
+@*/ @param[in] pu1_pred
+@*/ Prediction data
+@*/
+@*/ @param[in] pi2_tmp
+@*/ Temporary buffer of size 16x16
+@*/
+@*/ @param[out] pi2_dst
+@*/ Output 32x32 coefficients
+@*/
+@*/ @param[in] src_strd
+@*/ Input stride
+@*/
+@*/ @param[in] pred_strd
+@*/ Prediction Stride
+@*/
+@*/ @param[in] dst_strd_chr_flag
+@*/ Output Stride and Chroma Flag packed in the MS and LS 16-bit
+@*/
+@*/ @returns Void
+@*/
+@*/ @remarks
+@*/ None
+@*/
+@*/*******************************************************************************
+@*/
+ .global ihevc_resi_trans_32x32_a9q
+ihevc_resi_trans_32x32_a9q:
+
+.equ TMP_STRIDE_32 , 128 @16*4, Stride of tmp register
+.equ SHIFT_32 , 15 @shift = 15; // log2(iWidth) - 1 + g_uiBitIncrement
+
+.equ COFF_STD_2B_32 , 64 @Stride for g_ai2_ihevc_trans_32 in bytes
+.equ COFF_STD_W_32 , 64 @Stride for g_ai4_ihevc_trans_32 in bytes
+
+@LOAD the function
+ STMFD SP!,{r4-r12,LR} @stack store values of the arguments
+ vpush {d8 - d15}
+ SUB SP,SP,#32
+
+ LDR R4,[SP,#136] @get src_strd
+ LDR R5,[SP,#140] @get pred_strd
+ LDR R6,[SP,#144] @get dst_strd_chr_flag
+
+ MOV R8,#0 @Set loop counter
+ LDR R9,g_ai2_ihevc_trans_32_addr_1 @get 16 bit transform matrix
+ulbl1:
+ ADD R9, R9, PC
+
+ @Read [0 0] [8 0] [16 0] [24 0],[0 1] [8 1] [16 1] [24 1] values of g_ai2_ihevc_trans_32
+ @and write to stack
+ MOV R12,#COFF_STD_2B_32
+ LSL R12,#3
+
+ VLD1.S32 D30[0],[R9],R12
+ VLD1.S32 D30[1],[R9],R12 @ D30 - [0 0] [0 1] [8 0] [8 1]
+ VLD1.S32 D31[0],[R9],R12
+ VLD1.S32 D31[1],[R9],R12 @ D31 - [16 0] [16 1] [24 0] [24 1]
+
+ VTRN.S32 D30,D31 @ D30 - [0 0] [0 1] [16 0] [16 1]
+ VTRN.S16 D30,D31 @ D31 - [8 0] [8 1] [24 0] [24 1]
+ VST1.S16 {D30,D31},[SP]
+
+ LDR R9,g_ai2_ihevc_trans_32_addr_2 @get 16 bit transform matrix
+ulbl2:
+ ADD R9, R9, PC
+
+ MOV R7,#TMP_STRIDE_32
+@ AND R14,R6,#0x1
+
+ VMOV.S32 Q14,#0
+
+@R0 pu1_src
+@R1 pu1_pred
+@R2 pi4_tmp
+@R3 pi2_dst
+@R4 src_strd - 16
+@R5 pred_strd - 16
+@R6 dst_strd_chr_flag
+@R7 tmp_dst Nx4 block stride
+@R8 loop cntr
+@R9 g_ai2_ihevc_trans_32
+@R10 tmp_dst Nx4 block offset
+@R11 tmp register
+@R12 ------
+@R14 ------.
+@q14 shift 32 bit
+@q15 add 32 bit
+
+ SUB R4, R4, #16
+ SUB R5, R5, #16
+CORE_LOOP_32X32_HORIZ:
+
+ VLD1.U8 {D0,D1},[R0]! @LOAD 1-16 src row 1
+
+ VLD1.U8 {D4,D5},[R1]! @LOAD 1-16 pred row 1
+
+ VLD1.U8 {D2,D3},[R0],R4 @LOAD 17-32 src row 1
+ @ Residue calculation
+ VSUBL.U8 Q8,D0,D4 @ Get residue 1-8 row 1 -- dual issued with prev. instr. 2nd cycle
+
+ VLD1.U8 {D6,D7},[R1],R5 @LOAD 17-32 pred row 1
+ VSUBL.U8 Q9,D1,D5 @ Get residue 9-16 row 1 -- dual issue
+
+ VLD1.U8 {D8,D9},[R0]! @ LOAD 1-16 src row 2
+ VSUBL.U8 Q10,D2,D6 @ Get residue 17-24 row 1 -- dual issue
+
+ VLD1.U8 {D12,D13},[R1]! @ LOAD 1-16 pred row 2
+ VSUBL.U8 Q11,D3,D7 @ Get residue 25-32 row 1 -- dual issue
+
+ VLD1.U8 {D10,D11},[R0],R4 @ LOAD 17-32 src row 2
+ @ Residue - Row 2
+ VSUBL.U8 Q12,D8,D12 @ Get residue 1-8 row 2 -- dual issue
+
+ VLD1.U8 {D14,D15},[R1],R5 @ LOAD 17-32 pred row 2
+ VSUBL.U8 Q13,D9,D13 @ Get residue 9-16 row 2 -- dual issue
+ @ Get blk sads
+ VABDL.U8 Q15,D0,D4
+ VABAL.U8 Q15,D1,D5
+ VABAL.U8 Q15,D2,D6
+ VABAL.U8 Q15,D3,D7
+ VABAL.U8 Q15,D8,D12
+ VABAL.U8 Q15,D9,D13
+ VABAL.U8 Q15,D10,D14
+ VABAL.U8 Q15,D11,D15
+ VADDW.S16 Q14,Q14,D30
+ VADDW.S16 Q14,Q14,D31
+ @ SAD Ends
+
+ VREV64.S16 Q10,Q10 @ Rev 17-24 row 1 -- dual issue
+ VSUBL.U8 Q2,D10,D14 @ Get residue 17-24 row 2
+ VREV64.S16 Q11,Q11 @ Rev 25-32 row 1 -- dual issue
+ VSUBL.U8 Q3,D11,D15 @ Get residue 25-32 row 2
+
+ VSWP D20,D21 @ Q10: 24 23 22 21 20 19 18 17 row 1
+ VSWP D22,D23 @ Q11: 32 31 30 29 28 27 26 25 row 1
+
+ VREV64.S16 Q2,Q2 @ Rev 17-24 row 2
+ VADD.S16 Q5, Q9,Q10 @ e[k] = resi_tmp_1 + resi_tmp_2 k ->9-16 row 1 -- dual issue
+ VREV64.S16 Q3,Q3 @ Rev 25-32 row 2
+ VADD.S16 Q4, Q8,Q11 @ e[k] = resi_tmp_1 + resi_tmp_2 k -> 1-8 row 1 -- dual issue
+ VSWP D4,D5 @ Q2: 24 23 22 21 20 19 18 17 row 2
+ VSUB.S16 Q6, Q8,Q11 @ o[k] = resi_tmp_1 - resi_tmp_2 k -> 1-8 row 1 -- dual issue
+ VSWP D6,D7 @ Q3: 32 31 30 29 28 27 26 25 row 2
+ VSUB.S16 Q7, Q9,Q10 @ o[k] = resi_tmp_1 - resi_tmp_2 k ->9-16 row 1 -- dual issue
+
+ VREV64.16 Q5, Q5 @ Rev 9-16 of e[k], row 1
+ VADD.S16 Q9, Q13,Q2 @ e[k] = resi_tmp_1 + resi_tmp_2 k ->9-16 row 2 -- dual issue
+ VADD.S16 Q8, Q12,Q3 @ e[k] = resi_tmp_1 + resi_tmp_2 k -> 1-8 row 2
+ VSWP D10, D11 @ Q5: e[16] e[15] e[14] e[13] e[12] e[11] e[10] e[9]
+ VSUB.S16 Q10, Q12,Q3 @ o[k] = resi_tmp_1 - resi_tmp_2 k -> 1-8 row 2 -- dual issue
+ VREV64.16 Q9, Q9 @ Rev 9-16 of e[k], row 2
+ VSUB.S16 Q11, Q13,Q2 @ o[k] = resi_tmp_1 - resi_tmp_2 k ->9-16 row 2 -- dual issue
+
+ VADD.S16 Q0, Q4, Q5 @ ee[k] = e[k] + e[16-k] k->1-8 row 1
+ VSWP D18, D19 @ Q9: e[16] e[15] e[14] e[13] e[12] e[11] e[10] e[9]
+ VSUB.S16 Q1, Q4, Q5 @ eo[k] = e[k] - e[16-k] k->1-8 row 1 -- dual issue
+
+ VREV64.S16 D8,D1 @ rev ee[k] k-> 4-7 row 1
+ VADD.S16 Q2, Q8, Q9 @ ee[k] = e[k] + e[16-k] k->1-8 row 2 -- dual issue
+ VSUB.S16 Q3, Q8, Q9 @ eo[k] = e[k] - e[16-k] k->1-8 row 2
+ VMOV.S16 D1,D4
+ VREV64.S16 D9,D5 @ rev ee[k] k-> 4-7 row 2
+
+ @ arrangement OF DATA
+ @ Q0 A1 A2 A3 A4 B1 B2 B3 B4
+ @ Q4 A8 A7 A6 A5 B8 B7 B6 B5
+ @---------------Process EEO starts--------------------
+ MOV R12,#COFF_STD_2B_32 @Get stride of coeffs
+
+ ADD R11,R9,R12,LSL #2 @Load address of g_ai2_ihevc_trans_32[4]
+ LSL R12,R12,#3
+
+ VADD.S16 Q13, Q0, Q4 @ eee[k] = ee[k] + ee[7 - k] row 1 & 2
+ VLD1.S16 D24,[R11],R12 @ LOAD g_ai2_ihevc_trans_32[4][0-4]
+ VSUB.S16 Q0, Q0 ,Q4 @ eeo[k] = ee[k] - ee[7 - k] row 1 & 2 -- dual issue
+
+ @ D26 R1eee[0] R1eee[1] R1eee[2] R1eee[3]
+ @ D27 R2eee[0] R2eee[1] R2eee[2] R2eee[3]
+ VTRN.S32 D26,D27
+ @ D26 R1eee[0] R1eee[1] R2eee[0] R2eee[1]
+ @ D27 R1eee[2] R1eee[3] R2eee[2] R2eee[3]
+ VREV32.16 D4,D27
+ @ D26 R1eee[0] R1eee[1] R2eee[0] R2eee[1]
+ @ D4 R1eee[3] R1eee[2] R2eee[3] R2eee[2]
+ VMOV.S16 D27,D26
+ VNEG.S16 D5,D4
+
+ @ Q13 R1eee[0] R1eee[1] R2eee[0] R2eee[1] R1eee[0] R1eee[1] R2eee[0] R2eee[1]
+ @ Q2 R1eee[3] R1eee[2] R2eee[3] R2eee[2] -R1eee[3] -R1eee[2] -R2eee[3] -R2eee[2]
+ @ 1- cycle stall?
+ VADD.S16 Q2,Q13,Q2
+ @ Q2 R1eeee[0] R1eeee[1] R2eeee[0] R2eeee[1] R1eeeo[0] R1eeeo[1] R2eeeo[0] R2eeeo[1]
+
+ @ Q2 R1eeee[0] R1eeee[1] R2eeee[0] R2eeee[1]
+ @ R1eeeo[0] R1eeeo[1] R2eeeo[0] R2eeeo[1]
+ VMULL.S16 Q15,D24,D0 @g_ai2_ihevc_trans_32[4][0-4] * eeo[0-4] R1 -- dual issue
+ VTRN.S16 D4,D5
+ @ Q2 R1eeee[0] R1eeeo[0] R2eeee[0] R2eeeo[0]
+ @ R1eeee[1] R1eeeo[1] R2eeee[1] R2eeeo[1]
+ @ 1-cycle stall?
+ VDUP.S32 D8,D4[0] @ R1eeee[0] R1eeeo[0] R1eeee[0] R1eeeo[0]
+ VDUP.S32 D9,D4[1] @ R2eeee[0] R2eeeo[0] R2eeee[0] R2eeeo[0]
+ VDUP.S32 D10,D5[0] @ R1eeee[1] R1eeeo[1] R1eeee[1] R1eeeo[1]
+ VDUP.S32 D11,D5[1] @ R2eeee[1] R2eeeo[1] R2eeee[1] R2eeeo[1]
+
+ @D4 : [0 0] [8 0] [16 0] [24 0]
+ @D5 : [0 1] [8 1] [16 1] [24 1]
+ VLD1.S16 {D4,D5},[SP] @ [0 0] [8 0] [16 0] [24 0] [0 1] [8 1] [16 1] [24 1]
+ VMULL.S16 Q8,D8,D4 @ g_ai2_ihevc_trans_32 * R1eeee[0] R1eeeo[0] R1eeee[0] R1eeeo[0] -- dual issue 2nd cycle
+ VMLAL.S16 Q8,D10,D5 @ + g_ai2_ihevc_trans_32 * R1eeee[1] R1eeeo[1] R1eeee[1] R1eeeo[1]
+ VLD1.S16 D27,[R11],R12 @LOAD g_ai2_ihevc_trans_32[12][0-4] -- 1st cycle dual issue with prev. MLAL
+ VMULL.S16 Q9,D9,D4 @ g_ai2_ihevc_trans_32 * R2eeee[0] R2eeeo[0] R2eeee[0] R2eeeo[0] -- dual issue 2nd cycle
+ VMLAL.S16 Q9,D11,D5 @ + g_ai2_ihevc_trans_32 * R2eeee[1] R2eeeo[1] R2eeee[1] R2eeeo[1]
+
+ VMULL.S16 Q4,D24,D1 @g_ai2_ihevc_trans_32[4][0-4] * eeo[0-4] R2
+
+ VMULL.S16 Q5,D27,D0 @g_ai2_ihevc_trans_32[12][0-4] * eeo[0-4] R1
+ VZIP.S32 Q8,Q9 @ 3-cycle instruction -- 1st cycle dual issued
+ @These values must go to 0 8 16 24 rows hence we need stride *8
+ LSL R10,R7,#3
+ VMULL.S16 Q12,D27,D1 @g_ai2_ihevc_trans_32[12][0-4] * eeo[0-4] R2
+ VST1.32 D16,[R2],R10 @ -- dual issued
+
+ VST1.32 D17,[R2],R10
+
+ VLD1.S16 D26,[R11],R12 @LOAD g_ai2_ihevc_trans_32[20][0-4]
+
+ VMULL.S16 Q8,D26,D1 @g_ai2_ihevc_trans_32[20][0-4] * eeo[0-4] R2
+ VST1.32 D18,[R2],R10 @ -- dual issued
+
+ VST1.32 D19,[R2],R10
+
+ SUB R2,R2,R10,LSL #2
+ @----------------------------Process EEEO ends----------------------------------------
+
+ VLD1.S16 D27,[R11],R12 @LOAD g_ai2_ihevc_trans_32[28][0-4]
+ VMULL.S16 Q9,D26,D0 @g_ai2_ihevc_trans_32[20][0-4] * eeo[0-4] R1
+
+ VMULL.S16 Q2,D27,D1 @g_ai2_ihevc_trans_32[28][0-4] * eeo[0-4] R2
+ @transpose the 4x4 matrix row1
+ VTRN.32 Q15, Q5 @R1 transpose1 -- dual issue
+ VMULL.S16 Q13,D27,D0 @g_ai2_ihevc_trans_32[28][0-4] * eeo[0-4] R1
+
+ @transpose the 4x4 matrix row2
+ VTRN.32 Q4,Q12 @R2 transpose1
+ VTRN.32 Q8,Q2 @R2 transpose1
+
+ @-----------------------Processing EO ----------------------------
+ MOV R12,#COFF_STD_2B_32 @Get coeffs stride
+ ADD R11,R9,R12,LSL #1 @Load address of g_ai2_ihevc_trans_32[2]
+ LSL R12,R12,#2
+ VLD1.S16 {D0,D1},[R11],R12 @g_ai2_ihevc_trans_32[2][0-7]
+
+ VSWP D4,D25 @R2 transpose2
+ VSWP D16,D9 @R2 transpose2
+
+ VADD.S32 Q4,Q4,Q12 @R2 add -- dual issue 1st cycle
+ VTRN.32 Q9, Q13 @R1 transpose1
+ VADD.S32 Q8,Q8,Q2 @R2 add -- dual issue 2nd cycle
+
+ VSWP D18,D31 @R1 transpose2
+ VMULL.S16 Q2,D2,D0 @eo[0][0-3]* R1 -- dual issue
+ VMLAL.S16 Q2,D3,D1 @eo[0][4-7]* R1
+
+ VSWP D26,D11 @R1 transpose2
+ VADD.S32 Q8,Q4,Q8 @R2 add -- dual issue
+
+ VADD.S32 Q15,Q15,Q9 @R1 add
+ VADD.S32 Q5,Q5,Q13 @R1 add
+ VMULL.S16 Q4,D6,D0 @eo[0][0-3]* R2
+ VMLAL.S16 Q4,D7,D1 @eo[0][4-7]* R2
+ VADD.S32 Q15,Q15,Q5 @R1 add
+
+ VLD1.S16 {D0,D1},[R11],R12 @g_ai2_ihevc_trans_32[6][0-7]
+
+ VMULL.S16 Q5,D2,D0 @eo[1][0-3]* R1
+ VMLAL.S16 Q5,D3,D1 @eo[1][4-7]* R1
+
+
+ VZIP.S32 Q15,Q8 @ 3-cycle instruction
+ VMULL.S16 Q13,D6,D0 @eo[1][0-3]* R2 -- dual issue
+ VMLAL.S16 Q13,D7,D1 @eo[1][4-7]* R2
+
+ VLD1.S16 {D0,D1},[R11],R12 @g_ai2_ihevc_trans_32[10][0-7] -- dual issue with prev. MLAL
+
+ @write to memory
+ @this should go to 4 12 20 28
+ LSL R10,R7,#3
+ ADD R2,R2,R7,LSL #2 @move to fifth row
+ VST1.32 D30,[R2],R10
+ VMULL.S16 Q9,D2,D0 @eo[2][0-3]* R1 -- dual issue
+ VMLAL.S16 Q9,D3,D1 @eo[2][4-7]* R1
+ VST1.32 D31,[R2],R10 @ 1st cycle dual issued with MLAL
+
+ VST1.32 D16,[R2],R10
+ VMULL.S16 Q12,D6,D0 @eo[2][0-3]* R2 -- dual issue
+ VMLAL.S16 Q12,D7,D1 @eo[2][4-7]* R2
+ VST1.32 D17,[R2],R10 @ 1st cycle dual issued with MLAL
+
+ SUB R2,R2,R10,LSL #2
+ SUB R2,R2,R7,LSL #2
+ @--------------------Done procrssing EEO -------------------------
+
+ VLD1.S16 {D0,D1},[R11],R12 @g_ai2_ihevc_trans_32[14][0-7]
+
+ VMULL.S16 Q8,D2,D0 @eo[3][0-3]* R1
+ VMLAL.S16 Q8,D3,D1 @eo[3][4-7]* R1
+
+ @transpose the 4x4 matrix R1
+ VTRN.32 Q2, Q5 @
+ VMULL.S16 Q15,D6,D0 @eo[3][0-3]* R2 -- dual issued with 2nd cycle of TRN
+ VMLAL.S16 Q15,D7,D1 @eo[3][4-7]* R2
+ VTRN.32 Q9, Q8 @ 1st cycle dual issued
+ @transpose the 4x4 matrix R2
+ VTRN.32 Q4,Q13
+
+ VSWP D18, D5 @ R1
+ VSWP D16, D11 @ R1
+ VADD.S32 Q2, Q2, Q5 @ R1
+ VADD.S32 Q9, Q9, Q8 @ R1
+ VTRN.32 Q12,Q15 @ R2 -- dual issue
+ VADD.S32 Q9, Q2, Q9 @ R1
+
+ VSWP D24,D9 @ R2
+ VSWP D30,D27 @ R2
+
+ VLD1.S16 {D4,D5},[R11],R12 @g_ai2_ihevc_trans_32[18][0-7]
+
+ VADD.S32 Q4, Q4, Q13 @ R2
+ VADD.S32 Q12, Q12, Q15 @ R2
+ VMULL.S16 Q0,D2,D4 @eo[4][0-3]* R1
+ VMLAL.S16 Q0,D3,D5 @eo[4][4-7]* R1
+ VADD.S32 Q12, Q4, Q12 @ R2
+
+ VZIP.S32 Q9,Q12 @ 3-cycle
+ VMULL.S16 Q4,D6,D4 @eo[0][0-3]* R2 -- dual issue
+ VMLAL.S16 Q4,D7,D5 @eo[0][4-7]* R2
+
+ VLD1.S16 {D4,D5},[R11],R12 @g_ai2_ihevc_trans_32[22][0-7] -- 1st cycle dual issued with prev. instr
+
+ @write to memory
+ @this should go to 2 6 10 14
+ ADD R2,R2,R7, LSL #1
+ LSL R7,R7,#2
+ VST1.32 D18,[R2],R7
+ VMULL.S16 Q5,D2,D4 @eo[5][0-3]* R1 -- dual issue
+ VMLAL.S16 Q5,D3,D5 @eo[5][4-7]* R1
+ VST1.32 D19,[R2],R7 @ 1st cycle dual issued with prev. instr
+
+ VST1.32 D24,[R2],R7
+ VMULL.S16 Q8,D6,D4 @eo[0][0-3]* R2 -- dual issue
+ VMLAL.S16 Q8,D7,D5 @eo[0][4-7]* R2
+ VST1.32 D25,[R2],R7 @ 1st cycle dual issued with prev. instr
+
+
+ VLD1.S16 {D4,D5},[R11],R12 @g_ai2_ihevc_trans_32[26][0-7]
+ VMULL.S16 Q9,D2,D4 @eo[6][0-3]* R1
+ VMLAL.S16 Q9,D3,D5 @eo[6][4-7]* R1
+ VMULL.S16 Q12,D6,D4 @eo[0][0-3]* R2
+ VMLAL.S16 Q12,D7,D5 @eo[0][4-7]* R2
+
+ VLD1.S16 {D4,D5},[R11],R12 @g_ai2_ihevc_trans_32[30][0-7]
+ VMULL.S16 Q13,D2,D4 @eo[7][0-3]* R1
+ VMLAL.S16 Q13,D3,D5 @eo[7][4-7]* R1
+ VMULL.S16 Q15,D6,D4 @eo[0][0-3]* R2
+ VMLAL.S16 Q15,D7,D5 @eo[0][4-7]* R2
+
+ @-----------------------Processing O ----------------------------
+ MOV R12,#COFF_STD_2B_32 @Get coeffs stride
+ LSL R12,R12,#1
+ ADD R11,R9,#COFF_STD_2B_32 @Get address of g_ai2_ihevc_trans_32[1]
+ SUB R12, R12, #16
+
+ VLD1.S16 {D4,D5},[R11]! @g_ai2_ihevc_trans_32[1][0-7]
+
+ VLD1.S16 {D6,D7},[R11],R12 @g_ai2_ihevc_trans_32[1][8-15]
+ VMULL.S16 Q1,D20,D4 @o[0][0-3]* R2 -- dual issue
+ VMLAL.S16 Q1,D21,D5 @o[0][4-7]* R2
+ VMLAL.S16 Q1,D22,D6 @o[0][8-11]* R2
+ VMLAL.S16 Q1,D23,D7 @o[0][12-15]* R2
+
+ @transpose the 4x4 matrix R1
+ VTRN.32 Q0, Q5 @ R1
+ VTRN.32 Q9,Q13 @ R1
+ @transpose the 4x4 matrix R2
+ VTRN.32 Q4,Q8 @ R2
+ VSWP D18, D1 @ R1
+ VSWP D26, D11 @ R1
+ VTRN.32 Q12,Q15 @ R2
+ VADD.S32 Q0, Q0, Q5 @ R1 -- dual issue
+ VADD.S32 Q9, Q9, Q13 @ R1
+
+ VSWP D24,D9 @ R2
+ VSWP D30,D17 @ R2
+ VADD.S32 Q9, Q0, Q9 @ R1 -- dual issue
+
+ VMULL.S16 Q0,D12,D4 @o[0][0-3]* R1
+ VMLAL.S16 Q0,D13,D5 @o[0][4-7]* R1
+ VMLAL.S16 Q0,D14,D6 @o[0][8-11]* R1
+ VMLAL.S16 Q0,D15,D7 @o[0][12-15]* R1
+
+ VLD1.S16 {D4,D5},[R11]! @g_ai2_ihevc_trans_32[3][0-7]
+ VADD.S32 Q4, Q4, Q8 @ R2 -- dual issue
+ VLD1.S16 {D6,D7},[R11],R12 @g_ai2_ihevc_trans_32[3][8-15]
+ VADD.S32 Q12, Q12, Q15 @ R2 -- dual issue
+
+ VMULL.S16 Q5,D20,D4 @o[0][0-3]* R2
+ VMLAL.S16 Q5,D21,D5 @o[0][4-7]* R2
+ VMLAL.S16 Q5,D22,D6 @o[0][8-11]* R2
+ VMLAL.S16 Q5,D23,D7 @o[0][12-15]* R2
+ VADD.S32 Q12, Q4, Q12 @ R2
+
+ VZIP.S32 Q9,Q12
+ VMULL.S16 Q4,D12,D4 @o[0][0-3]* R1
+ VMLAL.S16 Q4,D13,D5 @o[0][4-7]* R1
+ VMLAL.S16 Q4,D14,D6 @o[0][8-11]* R1
+ VMLAL.S16 Q4,D15,D7 @o[0][12-15]* R1
+
+ VLD1.S16 {D4,D5},[R11]! @g_ai2_ihevc_trans_32[5][0-7] -- 1st cycle dual issued with prev. instr
+
+ VLD1.S16 {D6,D7},[R11],R12 @g_ai2_ihevc_trans_32[5][8-15]
+ VMULL.S16 Q8,D12,D4 @o[0][0-3]* R1 -- dual issue with 2nd cycle
+ VMLAL.S16 Q8,D13,D5 @o[0][4-7]* R1
+ VMLAL.S16 Q8,D14,D6 @o[0][8-11]* R1
+ VMLAL.S16 Q8,D15,D7 @o[0][12-15]* R1
+ @this should go to 18 22 26 30
+ VST1.32 D18,[R2],R7 @1st cycle dual issue
+
+ VST1.32 D19,[R2],R7
+
+ VST1.32 D24,[R2],R7
+ VMULL.S16 Q9,D20,D4 @o[0][0-3]* R2 -- dual issue with 2nd cycle
+ VMLAL.S16 Q9,D21,D5 @o[0][4-7]* R2
+ VMLAL.S16 Q9,D22,D6 @o[0][8-11]* R2
+ VMLAL.S16 Q9,D23,D7 @o[0][12-15]* R2
+
+ VST1.32 D25,[R2],R7 @ 1st cycle dual issue
+
+ SUB R2,R2,R7, LSL #3
+ LSR R7,R7,#2
+ SUB R2,R2,R7, LSL #1
+ @--------------------Done Processing EO--------------------------
+
+
+ VLD1.S16 {D4,D5},[R11]! @g_ai2_ihevc_trans_32[7][0-7]
+
+ VLD1.S16 {D6,D7},[R11],R12 @g_ai2_ihevc_trans_32[7][8-15]
+ VMULL.S16 Q12,D12,D4 @o[0][0-3]* R1 -- dual issue
+ VMLAL.S16 Q12,D13,D5 @o[0][4-7]* R1 -- dual issue
+ VMLAL.S16 Q12,D14,D6 @o[0][8-11]* R1
+ VMLAL.S16 Q12,D15,D7 @o[0][12-15]* R1
+ VMULL.S16 Q13,D20,D4 @o[0][0-3]* R2
+ VMLAL.S16 Q13,D21,D5 @o[0][4-7]* R2
+ VMLAL.S16 Q13,D22,D6 @o[0][8-11]* R2
+ VMLAL.S16 Q13,D23,D7 @o[0][12-15]* R2
+
+ @transpose the 4x4 matrix R1
+ VTRN.32 Q0, Q4 @ R1
+ VTRN.32 Q8, Q12 @ R1
+ @transpose the 4x4 matrix R2
+ VTRN.32 Q1, Q5 @ R2
+ VSWP D16, D1 @ R1
+ VSWP D24, D9 @ R1
+
+ VTRN.32 Q9, Q13 @ R2
+ VADD.S32 Q0, Q0, Q4 @ R1 -- dual issue
+ VLD1.S16 {D4,D5},[R11]! @g_ai2_ihevc_trans_32[9][0-7]
+ VADD.S32 Q8, Q8, Q12 @ R1 -- dual issue
+
+ VSWP D18, D3 @ R2
+ VSWP D26, D11 @ R2
+ VLD1.S16 {D6,D7},[R11],R12 @g_ai2_ihevc_trans_32[9][8-15]
+ VADD.S32 Q8, Q0, Q8 @ R1 -- dual issue
+
+ VADD.S32 Q1, Q1, Q5 @ R2
+ VADD.S32 Q9, Q9, Q13 @ R2
+
+ VMULL.S16 Q0,D12,D4 @o[0][0-3]* R1
+ VMLAL.S16 Q0,D13,D5 @o[0][4-7]* R1
+ VMLAL.S16 Q0,D14,D6 @o[0][8-11]* R1
+ VMLAL.S16 Q0,D15,D7 @o[0][12-15]* R1
+ VADD.S32 Q9, Q1, Q9 @ R2
+
+ VMULL.S16 Q1,D20,D4 @o[0][0-3]* R2
+ VMLAL.S16 Q1,D21,D5 @o[0][4-7]* R2
+ VMLAL.S16 Q1,D22,D6 @o[0][8-11]* R2
+ VMLAL.S16 Q1,D23,D7 @o[0][12-15]* R2
+
+ VLD1.S16 {D4,D5},[R11]! @g_ai2_ihevc_trans_32[11][0-7] -- 1st cycle dual issue
+ VLD1.S16 {D6,D7},[R11],R12 @g_ai2_ihevc_trans_32[11][8-15]
+
+ VZIP.S32 Q8, Q9
+
+ @write to memory
+ @this should go to 1 3 5 7
+ ADD R2,R2,R7
+ LSL R7,R7,#1
+ VST1.32 D16, [R2], R7
+
+ VST1.32 D17, [R2], R7
+ VMULL.S16 Q4,D12,D4 @o[0][0-3]* R1 -- dual issued with 2nd cycle
+ VMLAL.S16 Q4,D13,D5 @o[0][4-7]* R1
+ VMLAL.S16 Q4,D14,D6 @o[0][8-11]* R1
+ VMLAL.S16 Q4,D15,D7 @o[0][12-15]* R1
+
+ VST1.32 D18, [R2], R7 @ 1st cycle dual issued
+ VMULL.S16 Q5,D20,D4 @o[0][0-3]* R2 -- dual issue with 2nd cycle
+ VMLAL.S16 Q5,D21,D5 @o[0][4-7]* R2
+ VMLAL.S16 Q5,D22,D6 @o[0][8-11]* R2
+ VMLAL.S16 Q5,D23,D7 @o[0][12-15]* R2
+
+ VST1.32 D19, [R2], R7 @ 1st cycle dual issued
+
+
+ VLD1.S16 {D4,D5},[R11]! @g_ai2_ihevc_trans_32[13][0-7]
+
+ VLD1.S16 {D6,D7},[R11],R12 @g_ai2_ihevc_trans_32[13][8-15]
+ VMULL.S16 Q8,D12,D4 @o[0][0-3]* R1 -- dual issue
+ VMLAL.S16 Q8,D13,D5 @o[0][4-7]* R1
+ VMLAL.S16 Q8,D14,D6 @o[0][8-11]* R1
+ VMLAL.S16 Q8,D15,D7 @o[0][12-15]* R1
+ VMULL.S16 Q9,D20,D4 @o[0][0-3]* R2
+ VMLAL.S16 Q9,D21,D5 @o[0][4-7]* R2
+ VMLAL.S16 Q9,D22,D6 @o[0][8-11]* R2
+ VMLAL.S16 Q9,D23,D7 @o[0][12-15]* R2
+
+ VLD1.S16 {D4,D5},[R11]! @g_ai2_ihevc_trans_32[15][0-7] - 1st cycle dual issue
+ VLD1.S16 {D6,D7},[R11],R12 @g_ai2_ihevc_trans_32[15][8-15]
+ VMULL.S16 Q12,D12,D4 @o[0][0-3]* R1 -- dual issue
+ VMLAL.S16 Q12,D13,D5 @o[0][4-7]* R1
+ VMLAL.S16 Q12,D14,D6 @o[0][8-11]* R1
+ VMLAL.S16 Q12,D15,D7 @o[0][12-15]* R1
+ VMULL.S16 Q13,D20,D4 @o[0][0-3]* R2
+ VMLAL.S16 Q13,D21,D5 @o[0][4-7]* R2
+ VMLAL.S16 Q13,D22,D6 @o[0][8-11]* R2
+ VMLAL.S16 Q13,D23,D7 @o[0][12-15]* R2
+
+ @transpose the 4x4 matrix R1
+ VTRN.32 Q0, Q4 @ R1 1st cycle dual issue
+ VTRN.32 Q8, Q12 @ R1
+ @transpose the 4x4 matrix R2
+ VTRN.32 Q1, Q5 @ R2
+ VSWP D16, D1 @ R1
+ VSWP D24, D9 @ R1
+
+ VTRN.32 Q9, Q13 @ R2
+ VADD.S32 Q0, Q0, Q4 @ R1 -- dual issue
+ VLD1.S16 {D4,D5},[R11]! @g_ai2_ihevc_trans_32[17][0-7]
+ VADD.S32 Q8, Q8, Q12 @ R1 -- dual issue
+
+ VSWP D18, D3 @ R2
+ VSWP D26, D11 @ R2
+ VADD.S32 Q8, Q0, Q8 @ R1 -- dual issue with 1st cycle
+ VLD1.S16 {D6,D7},[R11],R12 @g_ai2_ihevc_trans_32[17][8-15]
+
+
+ VADD.S32 Q1, Q1, Q5 @ R2 -- dual issue with 2nd cycle
+ VADD.S32 Q9, Q9, Q13 @ R2
+
+ VMULL.S16 Q0,D12,D4 @o[0][0-3]* R1
+ VMLAL.S16 Q0,D13,D5 @o[0][4-7]* R1
+ VMLAL.S16 Q0,D14,D6 @o[0][8-11]* R1
+ VMLAL.S16 Q0,D15,D7 @o[0][12-15]* R1
+ VADD.S32 Q9, Q1, Q9 @ R2
+
+ VMULL.S16 Q1,D20,D4 @o[0][0-3]* R2
+ VMLAL.S16 Q1,D21,D5 @o[0][4-7]* R2
+ VLD1.S16 {D4,D5},[R11]! @g_ai2_ihevc_trans_32[19][0-7]
+ VMLAL.S16 Q1,D22,D6 @o[0][8-11]* R2
+ VMLAL.S16 Q1,D23,D7 @o[0][12-15]* R2
+ VLD1.S16 {D6,D7},[R11],R12 @g_ai2_ihevc_trans_32[19][8-15]
+
+ VZIP.S32 Q8, Q9
+
+ @write to memory
+ @this should go to 9 11 13 15
+ VST1.32 D16, [R2], R7
+ VMULL.S16 Q4,D12,D4 @o[0][0-3]* R1 -- dual issued with 2nd cycle
+ VMLAL.S16 Q4,D13,D5 @o[0][4-7]* R1
+ VMLAL.S16 Q4,D14,D6 @o[0][8-11]* R1
+ VMLAL.S16 Q4,D15,D7 @o[0][12-15]* R1
+
+ VST1.32 D17, [R2], R7 @ 1st cycle dual issued
+ VMULL.S16 Q5,D20,D4 @o[0][0-3]* R2 -- dual issue with 2nd cycle
+ VMLAL.S16 Q5,D21,D5 @o[0][4-7]* R2
+ VST1.32 D18, [R2], R7 @1st cycle dual issued
+ VMLAL.S16 Q5,D22,D6 @o[0][8-11]* R2 -- dual issued with 2nd cycle
+ VMLAL.S16 Q5,D23,D7 @o[0][12-15]* R2
+
+ VST1.32 D19, [R2], R7 @ 1st cycle dual issue
+
+
+ VLD1.S16 {D4,D5},[R11]! @g_ai2_ihevc_trans_32[21][0-7]
+ VLD1.S16 {D6,D7},[R11],R12 @g_ai2_ihevc_trans_32[21][8-15]
+ VMULL.S16 Q8,D12,D4 @o[0][0-3]* R1 -- dual issue
+ VMLAL.S16 Q8,D13,D5 @o[0][4-7]* R1
+ VMLAL.S16 Q8,D14,D6 @o[0][8-11]* R1
+ VMLAL.S16 Q8,D15,D7 @o[0][12-15]* R1
+ VMULL.S16 Q9,D20,D4 @o[0][0-3]* R2
+ VMLAL.S16 Q9,D21,D5 @o[0][4-7]* R2
+ VMLAL.S16 Q9,D22,D6 @o[0][8-11]* R2
+ VMLAL.S16 Q9,D23,D7 @o[0][12-15]* R2
+
+ VLD1.S16 {D4,D5},[R11]! @g_ai2_ihevc_trans_32[23][0-7]
+ VLD1.S16 {D6,D7},[R11],R12 @g_ai2_ihevc_trans_32[23][8-15]
+ VMULL.S16 Q12,D12,D4 @o[0][0-3]* R1 -- dual issue
+ VMLAL.S16 Q12,D13,D5 @o[0][4-7]* R1 -- dual issue
+ VMLAL.S16 Q12,D14,D6 @o[0][8-11]* R1
+ VMLAL.S16 Q12,D15,D7 @o[0][12-15]* R1
+ VMULL.S16 Q13,D20,D4 @o[0][0-3]* R2
+ VMLAL.S16 Q13,D21,D5 @o[0][4-7]* R2
+ VMLAL.S16 Q13,D22,D6 @o[0][8-11]* R2
+ VMLAL.S16 Q13,D23,D7 @o[0][12-15]* R2
+
+ @transpose the 4x4 matrix R1
+ VTRN.32 Q0, Q4 @ R1
+ VTRN.32 Q8, Q12 @ R1
+ @transpose the 4x4 matrix R2
+ VTRN.32 Q1, Q5 @ R2
+ VSWP D16, D1 @ R1
+ VSWP D24, D9 @ R1
+
+ VTRN.32 Q9, Q13 @ R2
+ VADD.S32 Q0, Q0, Q4 @ R1 -- dual issue
+ VLD1.S16 {D4,D5},[R11]! @g_ai2_ihevc_trans_32[25][0-7]
+ VADD.S32 Q8, Q8, Q12 @ R1 -- dual issue
+
+ VSWP D18, D3 @ R2
+ VSWP D26, D11 @ R2
+ VLD1.S16 {D6,D7},[R11],R12 @g_ai2_ihevc_trans_32[25][8-15]
+ VADD.S32 Q8, Q0, Q8 @ R1 -- dual issue
+
+ VADD.S32 Q1, Q1, Q5 @ R2
+ VADD.S32 Q9, Q9, Q13 @ R2
+
+ VMULL.S16 Q0,D12,D4 @o[0][0-3]* R1
+ VMLAL.S16 Q0,D13,D5 @o[0][4-7]* R1
+ VMLAL.S16 Q0,D14,D6 @o[0][8-11]* R1
+ VMLAL.S16 Q0,D15,D7 @o[0][12-15]* R1
+ VADD.S32 Q9, Q1, Q9 @ R2
+
+ VMULL.S16 Q1,D20,D4 @o[0][0-3]* R2
+ VMLAL.S16 Q1,D21,D5 @o[0][4-7]* R2
+ VMLAL.S16 Q1,D22,D6 @o[0][8-11]* R2
+ VMLAL.S16 Q1,D23,D7 @o[0][12-15]* R2
+
+ VLD1.S16 {D4,D5},[R11]! @g_ai2_ihevc_trans_32[27][0-7]
+ VLD1.S16 {D6,D7},[R11],R12 @g_ai2_ihevc_trans_32[27][8-15]
+
+ VZIP.S32 Q8, Q9
+ VMULL.S16 Q4,D12,D4 @o[0][0-3]* R1
+ VMLAL.S16 Q4,D13,D5 @o[0][4-7]* R1
+ VMLAL.S16 Q4,D14,D6 @o[0][8-11]* R1
+ VMLAL.S16 Q4,D15,D7 @o[0][12-15]* R1
+ @write to memory
+ @this should go to 17 19 21 23
+ VST1.32 D16, [R2], R7
+ VMULL.S16 Q5,D20,D4 @o[0][0-3]* R2 -- dual issue
+ VST1.32 D17, [R2], R7
+ VMLAL.S16 Q5,D21,D5 @o[0][4-7]* R2 -- dual issue
+ VST1.32 D18, [R2], R7
+ VMLAL.S16 Q5,D22,D6 @o[0][8-11]* R2 -- dual issue
+ VST1.32 D19, [R2], R7
+ VMLAL.S16 Q5,D23,D7 @o[0][12-15]* R2 -- dual issue
+
+ VLD1.S16 {D4,D5},[R11]! @g_ai2_ihevc_trans_32[29][0-7]
+ VLD1.S16 {D6,D7},[R11],R12 @g_ai2_ihevc_trans_32[29][8-15]
+ VMULL.S16 Q8,D12,D4 @o[0][0-3]* R1 -- dual issue
+ VMLAL.S16 Q8,D13,D5 @o[0][4-7]* R1 -- dual issue
+ VMLAL.S16 Q8,D14,D6 @o[0][8-11]* R1
+ VMLAL.S16 Q8,D15,D7 @o[0][12-15]* R1
+ VMULL.S16 Q9,D20,D4 @o[0][0-3]* R2
+ VMLAL.S16 Q9,D21,D5 @o[0][4-7]* R2
+ VMLAL.S16 Q9,D22,D6 @o[0][8-11]* R2
+ VMLAL.S16 Q9,D23,D7 @o[0][12-15]* R2
+
+ VLD1.S16 {D4,D5},[R11]! @g_ai2_ihevc_trans_32[31][0-7]
+ VLD1.S16 {D6,D7},[R11],R12 @g_ai2_ihevc_trans_32[31][8-15]
+ VMULL.S16 Q12,D12,D4 @o[0][0-3]* R1 -- dual issued
+ VMLAL.S16 Q12,D13,D5 @o[0][4-7]* R1 -- dual issued
+ VMLAL.S16 Q12,D14,D6 @o[0][8-11]* R1
+ VMLAL.S16 Q12,D15,D7 @o[0][12-15]* R1
+ VMULL.S16 Q13,D20,D4 @o[0][0-3]* R2
+ VMLAL.S16 Q13,D21,D5 @o[0][4-7]* R2
+ VMLAL.S16 Q13,D22,D6 @o[0][8-11]* R2
+ VMLAL.S16 Q13,D23,D7 @o[0][12-15]* R2
+
+ @transpose the 4x4 matrix R1
+ VTRN.32 Q0, Q4 @ R1
+ VTRN.32 Q8, Q12 @ R1
+ @transpose the 4x4 matrix R2
+ VTRN.32 Q1, Q5 @ R2
+ VSWP D16, D1 @ R1
+ VSWP D24, D9 @ R1
+
+ VTRN.32 Q9, Q13 @ R2
+ VADD.S32 Q0, Q0, Q4 @ R1 -- dual issue
+ VADD.S32 Q8, Q8, Q12 @ R1
+ @ 1-cycle stall?
+ VADD.S32 Q0, Q0, Q8 @ R1
+ VSWP D18, D3 @ R2
+ VSWP D26, D11 @ R2
+ VADD.S32 Q1, Q1, Q5 @ R2
+ VADD.S32 Q9, Q9, Q13 @ R2
+ @ 1-cycle stall?
+ VADD.S32 Q1, Q1, Q9 @ R2
+ @ 2-cycle stall?
+ VZIP.S32 Q0, Q1 @ 3-cycle instruction
+
+ @ 1-cycle stall?
+ @write to memory
+ @this should go to 25 27 29 31
+ VST1.32 D0, [R2], R7
+ VST1.32 D1, [R2], R7
+ VST1.32 D2, [R2], R7
+ VST1.32 D3, [R2], R7
+ @------------------Processing O ends-------------------------------
+
+ SUB R2,R2,R7,LSL #4
+ LSR R7,R7,#1
+ SUB R2,R2,R7
+
+ ADD R2,R2,#8 @MOVE TO NEXT to next COLUMN - pi4_tmp
+
+ ADD R8,R8,#2 @increment loop cntr
+ CMP R8,#32 @check loop cntr
+ BNE CORE_LOOP_32X32_HORIZ @jump acc
+
+
+@*****************Vertical transform************************************
+
+@Initialization for vert transform
+@pi4_tmp will be the new src
+@tmp stride will be new src stride
+@dst will be new pi4_tmp
+@dst stride will be new tmp stride
+@trans table will be of 32 bit
+
+ LDR R9,g_ai4_ihevc_trans_32_addr @get 32 bit transform matrix
+ulbl3:
+ ADD R9, R9, PC
+
+ SUB R0,R2,#128 @set tmp as src [-32 to move back to orgin]
+ MOV R2,R3 @set dst as tmp
+ MOV R4,#TMP_STRIDE_32 @set tmp stride as src stride
+ SUB R4,#112 @Adjust stride for 7 previous loads
+ LSR R7,R6,#15 @Set dst stride as tmp stride
+
+
+ @Block SAD
+ VADD.S32 D28,D28,D29
+ VPADD.S32 D28,D28,D29
+ VMOV.S32 R3,D28[0]
+
+ @Read [0 0] [8 0] [16 0] [24 0],[0 1] [8 1] [16 1] [24 1]
+ @values of g_ai4_ihevc_trans_32 and write to stack
+ MOV R12,#COFF_STD_W_32
+ LSL R12,R12,#3
+ VLD1.S32 D28,[R9],R12 @ D28: [0 0] [0 1]
+ VLD1.S32 D29,[R9],R12 @ D29: [8 0] [8 1]
+ VLD1.S32 D30,[R9],R12 @ D30: [16 0] [16 1]
+ VLD1.S32 D31,[R9],R12 @ D31: [24 0] [24 1]
+ SUB R9,R9,R12,LSL #2
+
+ VREV64.32 Q15,Q15 @ Q15: [16 1] [16 0] [24 1] [24 0]
+ VTRN.S32 Q14,Q15 @ Q14: [0 0] [16 1] [8 0] [24 1]
+ @ Q15: [0 1] [16 0] [8 1] [24 0]
+ VST1.S32 {Q14-Q15},[SP]
+
+@ VMOV.U32 Q14,#RADD ;get the round factor to q14
+@ VMOV.U32 Q15,#SHIFT ;Get the shift to neon
+
+ MOV R8,#0 @INIT LOOP
+
+CORE_LOOP_32X32_VERT:
+
+ VLD1.S32 {D0,D1},[R0]! @LOAD 1-4 src R1
+ VLD1.S32 {D2,D3},[R0]! @LOAD 5-8 src R1
+ VLD1.S32 {D4,D5},[R0]! @LOAD 9-12 src R1
+ VLD1.S32 {D6,D7},[R0]! @LOAD 13-16 src R1
+ VLD1.S32 {D8,D9},[R0]! @LOAD 17-20 src R1
+ VREV64.S32 Q4,Q4 @Rev 17-20 R1
+ VLD1.S32 {D10,D11},[R0]! @LOAD 21-24 src R1
+ VREV64.S32 Q5,Q5 @Rev 21-24 R1
+ VLD1.S32 {D12,D13},[R0]! @LOAD 25-28 src R1
+ VREV64.S32 Q6,Q6 @Rev 25-28 R1
+ VLD1.S32 {D14,D15},[R0],R4 @LOAD 29-32 src R1
+ VREV64.S32 Q7,Q7 @Rev 29-32 R1
+
+ VSWP D8,D9 @ Q4: 20 19 18 17
+ VADD.S32 Q11, Q3, Q4 @e[k] = resi_tmp_1 + resi_tmp_2 k -> 13-16 R1-- dual issue
+ VSWP D10,D11 @ Q5: 24 23 22 21
+ VADD.S32 Q10, Q2, Q5 @e[k] = resi_tmp_1 + resi_tmp_2 k -> 9-12 R1-- dual issue
+ VSWP D12,D13 @ Q6: 28 27 26 25
+ VADD.S32 Q9, Q1, Q6 @e[k] = resi_tmp_1 + resi_tmp_2 k -> 5-8 R1 -- dual issue
+ VSWP D14,D15 @ Q7: 32 31 30 29
+
+ VADD.S32 Q8, Q0, Q7 @e[k] = resi_tmp_1 + resi_tmp_2 k -> 1-4 R1 -- dual issue
+ VREV64.S32 Q11, Q11 @rev e[k] k-> 13-16 R1 -- dual issue
+ VSUB.S32 Q12, Q0, Q7 @o[k] = resi_tmp_1 - resi_tmp_2 k -> 1-4 R1
+ VREV64.S32 Q10, Q10 @rev e[k] k-> 9-12 R1 -- dual issue
+ VSUB.S32 Q13, Q1, Q6 @o[k] = resi_tmp_1 - resi_tmp_2 k -> 5-8 R1
+ VSWP D22, D23 @Q11: e[16] e[15] e[14] e[13] -- dual issue
+ VSUB.S32 Q14, Q2, Q5 @o[k] = resi_tmp_1 - resi_tmp_2 k -> 9-12 R1
+ VSWP D20, D21 @Q10: e[12] e[11] e[10] e[9] -- dual issue
+ VSUB.S32 Q15, Q3, Q4 @o[k] = resi_tmp_1 - resi_tmp_2 k -> 13-16 R1
+
+ VADD.S32 Q1, Q9, Q10 @ee[k] = e[k] + e[15- k] row R1, k-> 4-7
+ VADD.S32 Q0, Q8, Q11 @ee[k] = e[k] + e[15- k] row R1, k-> 0-3
+
+ VSUB.S32 Q2, Q8, Q11 @eo[k] = e[k] - e[15 - k] row R1, k-> 0-3
+ VSUB.S32 Q3, Q9, Q10 @eo[k] = e[k] - e[15 - k] row R1, k-> 4-7
+ VREV64.S32 Q1, Q1 @Q1: ee[5] ee[4] ee[7] ee[6] -- dual issue
+
+ VSWP D2, D3 @Q1: ee[7] ee[6] ee[5] ee[4]
+
+ VADD.S32 Q4, Q0, Q1 @eee[k] = ee[k] + ee[7-k] row R1, k-> 0-3
+ VSUB.S32 Q5, Q0, Q1 @eeo[k] = ee[k] - ee[7-k] row R1, k-> 0-3
+
+ @D8: eee[0] eee[1]
+ VLD1.S32 {Q10,Q11},[SP] @Load g_ai4_ihevc_trans_32[xx]-> Q10 : [0 0] [16 1] [8 0] [24 1] Q11 : [0 1] [16 0] [8 1] [24 0]
+ VREV64.S32 D9, D9 @D9: eee[3] eee[2]
+
+ @-----------------------Processing EEO ----------------------------
+ @Q5 :R1eeo[0] R1eeo[1] R1eeo[2] R1eeo[3]
+ MOV R12,#COFF_STD_W_32
+ ADD R11,R9,R12,LSL #2 @Get to the 4th row of src
+ LSL R12,R12,#3
+
+ VADD.S32 D12, D8, D9 @eeee[0] eeee[1] -- dual issue in 1st cycle
+ VLD1.S32 {D14,D15},[R11],R12 @LOAD g_ai4_ihevc_trans_32[4][0-4] -> 4G0 4G1 4G2 4G3
+ VSUB.S32 D13, D8, D9 @eeeo[0] eeeo[1] -- dual issue in 2nd cycle
+
+ VMUL.S32 Q0,Q5,Q7 @4G0 4G1 4G2 4G3 * R1eeo[0] R1eeo[1] R1eeo[2] R1eeo[3]
+
+ VLD1.S32 {D14,D15},[R11],R12 @LOAD g_ai4_ihevc_trans_32[12][0-4] -- 1st cycle dual issue
+ VREV64.S32 Q8,Q6 @Q6 : eeee[0] eeee[1] eeeo[0] eeeo[1] R1 -> ;Q8 : eeee[1] eeee[0] eeeo[1] eeeo[0] R1
+
+ VMUL.S32 Q4,Q6,Q10 @g_ai4_ihevc_trans_32 * eeee[0] eeee[1] eeeo[0] eeeo[1] R1 -- dual issue
+ VMLA.S32 Q4,Q8,Q11 @g_ai4_ihevc_trans_32 * eeee[1] eeee[0] eeeo[1] eeeo[0] R1
+
+ VMUL.S32 Q9,Q5,Q7 @g_ai4_ihevc_trans_32[6][0-4] * eeo[0-4]
+
+ VLD1.S32 {D14,D15},[R11],R12 @LOAD g_ai4_ihevc_trans_32[20][0-4] - 1st cycle dual issue
+ VRSHRN.S32 D8,Q4,#SHIFT_32 @ROUND NARROW R1 -- dual issued in 2nd cycle
+ @ D8: 0 16 8 24
+ @WRITE INTO MEM the values or wait to be shuffled
+ @These values must go to 0 8 16 24 colums
+ LSL R10,R7,#3
+ VST1.S16 D8[0],[R2],R10
+ VMUL.S32 Q10,Q5,Q7 @g_ai4_ihevc_trans_32[10][0-4] * eeo[0-4] -- dual issued
+
+ VLD1.S32 {D14,D15},[R11],R12 @LOAD g_ai4_ihevc_trans_32[28][0-4]
+
+ VST1.S16 D8[2],[R2],R10
+ VMUL.S32 Q11,Q5,Q7 @g_ai4_ihevc_trans_32[14][0-4] * eeo[0-4] -- dual issue
+ @transpose the 4x4 matrix R1
+ VTRN.32 Q0, Q9
+ @-----------------------Processing EO ----------------------------
+ MOV R12,#COFF_STD_W_32
+ ADD R11,R9,R12,LSL #1 @Get 1ST row
+ LSL R12,R12,#2
+
+ VLD1.S32 {Q6,Q7},[R11],R12 @g_ai4_ihevc_trans_16[2][0-7]
+
+ VMUL.S32 Q8,Q6,Q2 @g_ai4_ihevc_trans_16[2][0-3]*eo[0][0-3] R1
+ VTRN.32 Q10, Q11 @ dual issue
+ VMLA.S32 Q8,Q7,Q3 @g_ai4_ihevc_trans_16[2][4-7]*eo[0][4-7] R1
+
+
+ VSWP D20, D1
+ VSWP D22, D19
+
+ VST1.S16 D8[1],[R2],R10
+ VADD.S32 Q0, Q0, Q9 @ dual issue
+ VST1.S16 D8[3],[R2],R10
+ VADD.S32 Q10, Q10, Q11 @ dual issue
+ SUB R2,R2,R10, LSL #2
+ @-----------------------Processing EEEO complete-------------------
+
+ VLD1.S32 {Q4,Q5},[R11],R12 @g_ai4_ihevc_trans_16[6][0-7]
+ VADD.S32 Q0, Q0, Q10 @ dual issue
+
+ VMUL.S32 Q7,Q4,Q2 @eo[0][0-3]
+ VMLA.S32 Q7,Q5,Q3 @eo[0][4-7]
+ VRSHRN.S32 D0,Q0,#SHIFT_32 @ Shift by SHIFT and Round the result
+
+ VLD1.S32 {Q9,Q10},[R11],R12 @g_ai4_ihevc_trans_16[10][0-7]
+ VADD.S32 D12,D16,D17 @g_ai4_ihevc_trans_16[2][k]*eo[0][k]+g_ai4_ihevc_trans_16[2][7-k]*eo[0][7-k] R1 -- dual issue
+
+
+ VMUL.S32 Q8,Q9,Q2 @eo[0][0-3]
+ VMLA.S32 Q8,Q10,Q3 @eo[0][4-7]
+
+ @this should go to 4 12 20 28
+ LSL R10,R7,#3
+ ADD R2,R2,R7,LSL #2 @move to fifth row
+ VST1.S16 D0[0], [R2], R10
+ VADD.S32 D13,D14,D15 @ -- dual issue--
+ VST1.S16 D0[1], [R2], R10
+ VADD.S32 D10,D16,D17 @ -- dual issue --
+
+ VLD1.S32 {Q7,Q8},[R11],R12 @g_ai4_ihevc_trans_16[14][0-7]
+
+ VST1.S16 D0[2], [R2], R10
+ VMUL.S32 Q9,Q7,Q2 @eo[0][0-3] -- dual issue
+ VST1.S16 D0[3], [R2], R10
+ VMLA.S32 Q9,Q8,Q3 @eo[0][4-7] -- dual issue
+ SUB R2,R2,R10,LSL #2 @go back to orgin
+ SUB R2,R2,R7,LSL #2
+ @----------------------Processing EEO complete-------------------
+
+ VLD1.S32 {Q0,Q1},[R11],R12 @g_ai4_ihevc_trans_16[18][0-7]
+
+ VMUL.S32 Q4,Q0,Q2 @g_ai4_ihevc_trans_16[18][0-3]*eo[0][0-3] R1
+ VMLA.S32 Q4,Q1,Q3 @g_ai4_ihevc_trans_16[18][4-7]*eo[0][4-7] R1
+
+ VLD1.S32 {Q0,Q1},[R11],R12 @g_ai4_ihevc_trans_16[22][0-7]
+ VADD.S32 D11,D18,D19 @ dual issue
+
+ @Q5 Q6
+ VMUL.S32 Q7,Q0,Q2 @eo[0][0-3]
+ VMLA.S32 Q7,Q1,Q3 @eo[0][4-7]
+
+ VPADD.S32 D16,D12,D13
+ VPADD.S32 D17,D10,D11
+
+ VADD.S32 D12,D8,D9 @g_ai4_ihevc_trans_16[18][k]*eo[0][k]+g_ai4_ihevc_trans_16[18][7-k]*eo[0][7-k] R1
+ VADD.S32 D13,D14,D15
+
+ VRSHRN.S32 D14,Q8,#SHIFT_32
+ VLD1.S32 {Q0,Q1},[R11],R12 @g_ai4_ihevc_trans_16[26][0-7]
+ VLD1.S32 {Q10,Q11},[R11],R12 @g_ai4_ihevc_trans_16[30][0-7]
+ @write to memory this should go to 2 6 10 14
+ ADD R2,R2,R7,LSL #1
+ LSL R7,R7,#2
+ VST1.S16 D14[0],[R2],R7
+ VMUL.S32 Q8,Q0,Q2 @eo[0][0-3] -- dual issue
+ VST1.S16 D14[1],[R2],R7
+ VMLA.S32 Q8,Q1,Q3 @eo[0][4-7] -- dual issue
+ VST1.S16 D14[2],[R2],R7
+ VMUL.S32 Q9,Q10,Q2 @eo[0][0-3] -- dual issue
+ VST1.S16 D14[3],[R2],R7
+ VMLA.S32 Q9,Q11,Q3 @eo[0][4-7] -- dual issue
+
+ VADD.S32 D10,D16,D17
+ @---------------Processing O Row 1-----------------------------------------------
+ MOV R12,#COFF_STD_W_32
+ ADD R11,R9,R12 @Get 1ST row
+ LSL R12,R12,#1
+ SUB R12, R12, #32
+ VLD1.S32 {Q0,Q1},[R11]!
+
+ VLD1.S32 {Q2,Q3},[R11],R12 @g_ai4_ihevc_trans_32[1][0-15]
+ VADD.S32 D11,D18,D19 @ dual issue in 2nd cycle
+
+ VMUL.S32 Q4,Q0,Q12 @g_ai4_ihevc_trans_32[1][0-3]*o[0][0-3] R1
+ VMLA.S32 Q4,Q1,Q13 @g_ai4_ihevc_trans_32[1][4-7]*o[0][4-7] R1
+ VLD1.S32 {Q0,Q1},[R11]!
+ VMLA.S32 Q4,Q2,Q14 @g_ai4_ihevc_trans_32[1][8-11]*o[0][8-11] R1
+ VMLA.S32 Q4,Q3,Q15 @g_ai4_ihevc_trans_32[1][12-15]*o[0][12-15] R1
+
+ @Q5 Q6
+ VPADD.S32 D16,D12,D13
+ VPADD.S32 D17,D10,D11
+
+
+ VLD1.S32 {Q2,Q3},[R11],R12 @g_ai4_ihevc_trans_32[3][0-15]
+ VRSHRN.S32 D16,Q8,#SHIFT_32 @ dual issue
+
+ VMUL.S32 Q7,Q0,Q12 @g_ai4_ihevc_trans_32[3][0-3]*o[0][0-3] R1
+ VMLA.S32 Q7,Q1,Q13 @g_ai4_ihevc_trans_32[3][4-7]*o[0][4-7] R1
+ VMLA.S32 Q7,Q2,Q14 @g_ai4_ihevc_trans_32[3][8-11]*o[0][8-11] R1
+ VMLA.S32 Q7,Q3,Q15 @g_ai4_ihevc_trans_32[3][12-15]*o[0][12-15] R1
+
+ @write to memory this should go to 2 6 10 14
+ VST1.S16 D16[0],[R2],R7
+ VADD.S32 D10,D8,D9 @g_ai4_ihevc_trans_32[1][0-3]*o[0][0-3]+g_ai4_ihevc_trans_32[1][4-7]*o[0][4-7]+g_ai4_ihevc_trans_32[1][8-11]*o[0][8-11]+g_ai4_ihevc_trans_32[1][12-15]*o[0][12-15]
+ VLD1.S32 {Q0,Q1},[R11]!
+ VLD1.S32 {Q2,Q3},[R11],R12 @g_ai4_ihevc_trans_32[5][0-15]
+ VMUL.S32 Q4,Q0,Q12 @g_ai4_ihevc_trans_32[5][0-3]*o[0][0-3] R1 -- dual issue
+ VST1.S16 D16[1],[R2],R7
+ VMLA.S32 Q4,Q1,Q13 @g_ai4_ihevc_trans_32[5][4-7]*o[0][4-7] R1 -- dual issue
+ VST1.S16 D16[2],[R2],R7
+ VMLA.S32 Q4,Q2,Q14 @g_ai4_ihevc_trans_32[5][8-11]*o[0][8-11] R1 -- dual issue
+ VST1.S16 D16[3],[R2],R7
+ VMLA.S32 Q4,Q3,Q15 @g_ai4_ihevc_trans_32[5][12-15]*o[0][12-15] R1
+ SUB R2,R2,R7, LSL #3
+ LSR R7,R7,#2
+ SUB R2,R2,R7, LSL #1
+
+ @--------------------Done Processing EO--------------------------
+
+ VLD1.S32 {Q0,Q1},[R11]!
+ VADD.S32 D11,D14,D15 @ dual issued
+ VLD1.S32 {Q2,Q3},[R11],R12 @g_ai4_ihevc_trans_32[7][0-15]
+ VMUL.S32 Q7,Q0,Q12 @g_ai4_ihevc_trans_32[7][0-3]*o[0][0-3] R1
+ VMLA.S32 Q7,Q1,Q13 @g_ai4_ihevc_trans_32[7][4-7]*o[0][4-7] R1
+ VLD1.S32 {Q0,Q1},[R11]!
+ VMLA.S32 Q7,Q2,Q14 @g_ai4_ihevc_trans_32[7][8-11]*o[0][8-11] R1
+ VMLA.S32 Q7,Q3,Q15 @g_ai4_ihevc_trans_32[7][12-15]*o[0][12-15] R1
+
+
+ VADD.S32 D12,D8,D9 @ dual issued
+ VLD1.S32 {Q2,Q3},[R11],R12 @g_ai4_ihevc_trans_32[9][0-15]
+ @Q5 Q6
+ VPADD.S32 D16,D10,D11
+ VADD.S32 D13,D14,D15
+
+ VMUL.S32 Q4,Q0,Q12 @g_ai4_ihevc_trans_32[9][0-3]*o[0][0-3] R1
+ VMLA.S32 Q4,Q1,Q13 @g_ai4_ihevc_trans_32[9][4-7]*o[0][4-7] R1
+ VLD1.S32 {Q0,Q1},[R11]!
+ VMLA.S32 Q4,Q2,Q14 @g_ai4_ihevc_trans_32[9][8-11]*o[0][8-11] R1
+ VMLA.S32 Q4,Q3,Q15 @g_ai4_ihevc_trans_32[9][12-15]*o[0][12-15] R1
+ VPADD.S32 D17,D12,D13
+
+
+ VLD1.S32 {Q2,Q3},[R11],R12 @g_ai4_ihevc_trans_32[11][0-15]
+ VRSHRN.S32 D16,Q8,#SHIFT_32 @ duall issue
+
+ VMUL.S32 Q7,Q0,Q12 @g_ai4_ihevc_trans_32[11][0-3]*o[0][0-3] R1
+ VMLA.S32 Q7,Q1,Q13 @g_ai4_ihevc_trans_32[11][4-7]*o[0][4-7] R1
+ VLD1.S32 {Q0,Q1},[R11]!
+ VMLA.S32 Q7,Q2,Q14 @g_ai4_ihevc_trans_32[11][8-11]*o[0][8-11] R1
+ VMLA.S32 Q7,Q3,Q15 @g_ai4_ihevc_trans_32[11][12-15]*o[0][12-15] R1
+ VADD.S32 D10,D8,D9 @g_ai4_ihevc_trans_32[9][0-3]*o[0][0-3]+g_ai4_ihevc_trans_32[9][4-7]*o[0][4-7]+g_ai4_ihevc_trans_32[9][8-11]*o[0][8-11]+g_ai4_ihevc_trans_32[9][12-15]*o[0][12-15]
+ VLD1.S32 {Q2,Q3},[R11],R12 @g_ai4_ihevc_trans_32[13][0-15]
+ @write to memory this should go to 1 3 5 7
+ ADD R2,R2,R7
+ LSL R7,R7,#1
+ VST1.S16 D16[0],[R2],R7
+ VMUL.S32 Q4,Q0,Q12 @g_ai4_ihevc_trans_32[13][0-3]*o[0][0-3] R1
+ VST1.S16 D16[1],[R2],R7
+ VMLA.S32 Q4,Q1,Q13 @g_ai4_ihevc_trans_32[13][4-7]*o[0][4-7] R1
+ VST1.S16 D16[2],[R2],R7
+ VMLA.S32 Q4,Q2,Q14 @g_ai4_ihevc_trans_32[13][8-11]*o[0][8-11] R1
+ VST1.S16 D16[3],[R2],R7
+ VMLA.S32 Q4,Q3,Q15 @g_ai4_ihevc_trans_32[13][12-15]*o[0][12-15] R1
+
+ VLD1.S32 {Q0,Q1},[R11]!
+ VADD.S32 D11,D14,D15 @ dual issue
+ VLD1.S32 {Q2,Q3},[R11],R12 @g_ai4_ihevc_trans_32[15][0-15]
+ VMUL.S32 Q7,Q0,Q12 @g_ai4_ihevc_trans_32[15][0-3]*o[0][0-3] R1 -- dual issue--
+ VMLA.S32 Q7,Q1,Q13 @g_ai4_ihevc_trans_32[15][4-7]*o[0][4-7] R1
+ VMLA.S32 Q7,Q2,Q14 @g_ai4_ihevc_trans_32[15][8-11]*o[0][8-11] R1
+ VMLA.S32 Q7,Q3,Q15 @g_ai4_ihevc_trans_32[15][12-15]*o[0][12-15] R1
+
+ VLD1.S32 {Q0,Q1},[R11]!
+ VADD.S32 D12,D8,D9 @ dual issued
+
+ VLD1.S32 {Q2,Q3},[R11],R12 @g_ai4_ihevc_trans_32[17][0-15]
+ @Q5 Q6
+ VPADD.S32 D16,D10,D11
+ VADD.S32 D13,D14,D15
+
+ VMUL.S32 Q4,Q0,Q12 @g_ai4_ihevc_trans_32[17][0-3]*o[0][0-3] R1
+ VMLA.S32 Q4,Q1,Q13 @g_ai4_ihevc_trans_32[17][4-7]*o[0][4-7] R1
+ VMLA.S32 Q4,Q2,Q14 @g_ai4_ihevc_trans_32[17][8-11]*o[0][8-11] R1
+ VMLA.S32 Q4,Q3,Q15 @g_ai4_ihevc_trans_32[17][12-15]*o[0][12-15] R1
+ VPADD.S32 D17,D12,D13
+
+ VLD1.S32 {Q0,Q1},[R11]!
+ VLD1.S32 {Q2,Q3},[R11],R12 @g_ai4_ihevc_trans_32[19][0-15]
+ VRSHRN.S32 D16,Q8,#SHIFT_32 @ dual issue
+
+ VMUL.S32 Q7,Q0,Q12 @g_ai4_ihevc_trans_32[19][0-3]*o[0][0-3] R1
+ VMLA.S32 Q7,Q1,Q13 @g_ai4_ihevc_trans_32[19][4-7]*o[0][4-7] R1
+ VLD1.S32 {Q0,Q1},[R11]!
+ VMLA.S32 Q7,Q2,Q14 @g_ai4_ihevc_trans_32[19][8-11]*o[0][8-11] R1
+ VMLA.S32 Q7,Q3,Q15 @g_ai4_ihevc_trans_32[19][12-15]*o[0][12-15] R1
+ VADD.S32 D10,D8,D9 @g_ai4_ihevc_trans_32[17][0-3]*o[0][0-3]+g_ai4_ihevc_trans_32[17][4-7]*o[0][4-7]+g_ai4_ihevc_trans_32[17][8-11]*o[0][8-11]+g_ai4_ihevc_trans_32[17][12-15]*o[0][12-15]
+ VLD1.S32 {Q2,Q3},[R11],R12 @g_ai4_ihevc_trans_32[21][0-15]
+ @write to memory this should go to 9 11 13 15
+ VST1.S16 D16[0],[R2],R7
+ VMUL.S32 Q4,Q0,Q12 @g_ai4_ihevc_trans_32[21][0-3]*o[0][0-3] R1
+ VST1.S16 D16[1],[R2],R7
+ VMLA.S32 Q4,Q1,Q13 @g_ai4_ihevc_trans_32[21][4-7]*o[0][4-7] R1
+ VST1.S16 D16[2],[R2],R7
+ VMLA.S32 Q4,Q2,Q14 @g_ai4_ihevc_trans_32[21][8-11]*o[0][8-11] R1
+ VST1.S16 D16[3],[R2],R7
+ VMLA.S32 Q4,Q3,Q15 @g_ai4_ihevc_trans_32[21][12-15]*o[0][12-15] R1
+
+
+ VLD1.S32 {Q0,Q1},[R11]!
+ VADD.S32 D11,D14,D15 @ dual issue
+ VLD1.S32 {Q2,Q3},[R11],R12 @g_ai4_ihevc_trans_32[23][0-15]
+ VMUL.S32 Q7,Q0,Q12 @g_ai4_ihevc_trans_32[23][0-3]*o[0][0-3] R1
+ VMLA.S32 Q7,Q1,Q13 @g_ai4_ihevc_trans_32[23][4-7]*o[0][4-7] R1
+ VLD1.S32 {Q0,Q1},[R11]!
+ VMLA.S32 Q7,Q2,Q14 @g_ai4_ihevc_trans_32[23][8-11]*o[0][8-11] R1
+ VMLA.S32 Q7,Q3,Q15 @g_ai4_ihevc_trans_32[23][12-15]*o[0][12-15] R1
+ VADD.S32 D12,D8,D9 @ dual issued
+ VLD1.S32 {Q2,Q3},[R11],R12 @g_ai4_ihevc_trans_32[25][0-15]
+
+ @Q5 Q6
+ VPADD.S32 D16,D10,D11
+ VADD.S32 D13,D14,D15
+
+ VMUL.S32 Q4,Q0,Q12 @g_ai4_ihevc_trans_32[25][0-3]*o[0][0-3] R1
+ VMLA.S32 Q4,Q1,Q13 @g_ai4_ihevc_trans_32[25][4-7]*o[0][4-7] R1
+ VMLA.S32 Q4,Q2,Q14 @g_ai4_ihevc_trans_32[25][8-11]*o[0][8-11] R1
+ VMLA.S32 Q4,Q3,Q15 @g_ai4_ihevc_trans_32[25][12-15]*o[0][12-15] R1
+ VPADD.S32 D17,D12,D13
+
+ VLD1.S32 {Q0,Q1},[R11]!
+ VLD1.S32 {Q2,Q3},[R11],R12 @g_ai4_ihevc_trans_32[27][0-15]
+ VRSHRN.S32 D16,Q8,#SHIFT_32
+
+ VMUL.S32 Q7,Q0,Q12 @g_ai4_ihevc_trans_32[27][0-3]*o[0][0-3] R1
+ VMLA.S32 Q7,Q1,Q13 @g_ai4_ihevc_trans_32[27][4-7]*o[0][4-7] R1
+ VLD1.S32 {Q0,Q1},[R11]!
+ VMLA.S32 Q7,Q2,Q14 @g_ai4_ihevc_trans_32[27][8-11]*o[0][8-11] R1
+ VMLA.S32 Q7,Q3,Q15 @g_ai4_ihevc_trans_32[27][12-15]*o[0][12-15] R1
+ VADD.S32 D10,D8,D9 @g_ai4_ihevc_trans_32[25][0-3]*o[0][0-3]+g_ai4_ihevc_trans_32[25][4-7]*o[0][4-7]+g_ai4_ihevc_trans_32[25][8-11]*o[0][8-11]+g_ai4_ihevc_trans_32[25][12-15]*o[0][12-15]
+ VLD1.S32 {Q2,Q3},[R11],R12 @g_ai4_ihevc_trans_32[29][0-15]
+ @write to memory this should go to 17 19 21 23
+ VST1.S16 D16[0],[R2],R7
+ VMUL.S32 Q4,Q0,Q12 @g_ai4_ihevc_trans_32[29][0-3]*o[0][0-3] R1
+ VST1.S16 D16[1],[R2],R7
+ VMLA.S32 Q4,Q1,Q13 @g_ai4_ihevc_trans_32[29][4-7]*o[0][4-7] R1
+ VST1.S16 D16[2],[R2],R7
+ VMLA.S32 Q4,Q2,Q14 @g_ai4_ihevc_trans_32[29][8-11]*o[0][8-11] R1
+ VST1.S16 D16[3],[R2],R7
+ VMLA.S32 Q4,Q3,Q15 @g_ai4_ihevc_trans_32[29][12-15]*o[0][12-15] R1
+
+ VADD.S32 D11,D14,D15 @ dual issue
+ VLD1.S32 {Q0,Q1},[R11]!
+
+ VLD1.S32 {Q2,Q3},[R11],R12 @g_ai4_ihevc_trans_32[31][0-15]
+ VMUL.S32 Q7,Q0,Q12 @g_ai4_ihevc_trans_32[31][0-3]*o[0][0-3] R1
+ VMLA.S32 Q7,Q1,Q13 @g_ai4_ihevc_trans_32[31][4-7]*o[0][4-7] R1
+ VMLA.S32 Q7,Q2,Q14 @g_ai4_ihevc_trans_32[31][8-11]*o[0][8-11] R1
+ VMLA.S32 Q7,Q3,Q15 @g_ai4_ihevc_trans_32[31][12-15]*o[0][12-15] R1
+
+
+ VADD.S32 D12,D8,D9
+ @Q5 Q6
+ VPADD.S32 D16,D10,D11
+
+ VADD.S32 D13,D14,D15
+
+
+ VPADD.S32 D17,D12,D13
+
+ VRSHRN.S32 D16,Q8,#SHIFT_32
+
+ @write to memory this should go to 25 27 29 31
+ VST1.S16 D16[0],[R2],R7
+ VST1.S16 D16[1],[R2],R7
+ VST1.S16 D16[2],[R2],R7
+ VST1.S16 D16[3],[R2],R7
+
+ SUB R2,R2,R7,LSL #4
+ LSR R7,R7,#1
+ SUB R2,R2,R7
+
+ ADD R2,R2,#2 @MOVE TO NEXT to next COLUMN
+
+ ADD R8,R8,#1 @increment loop cntr by 2 since we process loop as 2 cols
+ CMP R8,#32 @check loop cntr
+ BNE CORE_LOOP_32X32_VERT @jump acc
+
+ MOV R0,R3
+
+ ADD SP,SP,#32
+ vpop {d8 - d15}
+ LDMFD sp!,{r4-r12,PC} @stack store values of the arguments
+
+
+ .section .note.GNU-stack,"",%progbits
diff --git a/common/arm/ihevc_resi_trans_neon.c b/common/arm/ihevc_resi_trans_neon.c
new file mode 100644
index 0000000..280b8e9
--- /dev/null
+++ b/common/arm/ihevc_resi_trans_neon.c
@@ -0,0 +1,1356 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+ *******************************************************************************
+ * @file
+ * ihevc_resi_trans_neon.c
+ *
+ * @brief
+ * Contains definitions of functions for computing residue and fwd transform
+ *
+ * @author
+ * Ittiam
+ *
+ * @par List of Functions:
+ * - ihevc_resi_trans_4x4_neon()
+ * - ihevc_resi_trans_4x4_ttype1_neon()
+ * - ihevc_resi_trans_8x8_neon()
+ * - ihevc_resi_trans_16x16_neon()
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+
+/* System user files */
+#include "ihevc_typedefs.h"
+#include "ihevc_macros.h"
+#include "ihevc_defs.h"
+#include "ihevc_cmn_utils_neon.h"
+
+#include "ihevc_trans_tables.h"
+#include "ihevc_resi_trans.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+UWORD32 ihevc_resi_trans_4x4_neon(
+ UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD32 *pi4_temp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd_chr_flag)
+{
+ WORD32 chroma_flag = dst_strd_chr_flag & 1;
+ WORD32 dst_strd = dst_strd_chr_flag >> 16;
+ UWORD32 sad;
+ uint8x16_t inp_buf, pred_buf;
+ int16x8_t diff_1, diff_2;
+ int16x4_t diff_1_low, diff_1_high, diff_2_low, diff_2_high;
+ int16x8_t e_01, o_32;
+ int16x4_t e_0, e_1, o_0, o_1;
+ int32x4_t e_0_a_e_1, e_0_s_e_1;
+ int32x4_t temp1, temp2, temp3, temp4;
+ int32x4_t o_1_m_trans_10, o_1_m_trans_11;
+ int32x4_t e_03, e_12, o_03, o_12;
+ int16x4_t out_0, out_1, out_2, out_3;
+ uint16x8_t abs;
+ uint32x4_t b;
+ uint64x2_t c;
+
+ (void)pi4_temp;
+ if(chroma_flag == 0)
+ {
+ inp_buf = load_unaligned_u8q(pu1_src, src_strd);
+ pred_buf = load_unaligned_u8q(pu1_pred, pred_strd);
+ }
+ else
+ {
+ inp_buf = load_unaligned_u8qi(pu1_src, src_strd);
+ pred_buf = load_unaligned_u8qi(pu1_pred, pred_strd);
+ }
+
+ abs = vabdl_u8(vget_low_u8(inp_buf), vget_low_u8(pred_buf));
+ abs = vabal_u8(abs, vget_high_u8(inp_buf), vget_high_u8(pred_buf));
+ b = vpaddlq_u16(abs);
+ c = vpaddlq_u32(b);
+ sad = vget_lane_u32(vadd_u32(vreinterpret_u32_u64(vget_low_u64(c)),
+ vreinterpret_u32_u64(vget_high_u64(c))),
+ 0);
+
+ diff_1 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(inp_buf), vget_low_u8(pred_buf)));
+ diff_2 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(inp_buf), vget_high_u8(pred_buf)));
+
+ diff_1_low = vget_low_s16(diff_1);
+ diff_1_high = vget_high_s16(diff_1);
+ diff_2_low = vget_low_s16(diff_2);
+ diff_2_high = vget_high_s16(diff_2);
+
+ transpose_s16_4x4d(&diff_1_low, &diff_1_high, &diff_2_low, &diff_2_high);
+ diff_1 = vcombine_s16(diff_1_low, diff_1_high);
+ diff_2 = vcombine_s16(diff_2_high, diff_2_low);
+
+ e_01 = vaddq_s16(diff_1, diff_2);
+ o_32 = vsubq_s16(diff_1, diff_2);
+
+ e_0 = vget_low_s16(e_01);
+ e_1 = vget_high_s16(e_01);
+ o_0 = vget_high_s16(o_32);
+ o_1 = vget_low_s16(o_32);
+
+ e_0_a_e_1 = vaddl_s16(e_0, e_1);
+ e_0_s_e_1 = vsubl_s16(e_0, e_1);
+
+ temp1 = vmulq_n_s32(e_0_a_e_1, (WORD32)g_ai2_ihevc_trans_4[0][0]);
+ temp2 = vmulq_n_s32(e_0_s_e_1, (WORD32)g_ai2_ihevc_trans_4[0][0]);
+
+ o_1_m_trans_10 = vmull_n_s16(o_1, (WORD32)g_ai2_ihevc_trans_4[1][0]);
+ o_1_m_trans_11 = vmull_n_s16(o_1, (WORD32)g_ai2_ihevc_trans_4[1][1]);
+
+ temp3 = vmlal_n_s16(o_1_m_trans_10, o_0, (WORD32)g_ai2_ihevc_trans_4[1][1]);
+ temp4 = vmlsl_n_s16(o_1_m_trans_11, o_0, (WORD32)g_ai2_ihevc_trans_4[1][0]);
+
+ transpose_s32_4x4(&temp1, &temp3, &temp2, &temp4);
+
+ e_03 = vaddq_s32(temp1, temp4);
+ e_12 = vaddq_s32(temp3, temp2);
+ o_03 = vsubq_s32(temp1, temp4);
+ o_12 = vsubq_s32(temp3, temp2);
+
+ e_0_a_e_1 = vaddq_s32(e_03, e_12);
+ e_0_s_e_1 = vsubq_s32(e_03, e_12);
+
+ temp1 = vmulq_n_s32(e_0_a_e_1, (WORD32)g_ai2_ihevc_trans_4[0][0]);
+ temp2 = vmulq_n_s32(e_0_s_e_1, (WORD32)g_ai2_ihevc_trans_4[0][0]);
+
+ o_1_m_trans_10 = vmulq_n_s32(o_03, (WORD32)g_ai2_ihevc_trans_4[1][0]);
+ o_1_m_trans_11 = vmulq_n_s32(o_03, (WORD32)g_ai2_ihevc_trans_4[1][1]);
+
+ temp3 = vmlaq_n_s32(o_1_m_trans_10, o_12, (WORD32)g_ai2_ihevc_trans_4[1][1]);
+ temp4 = vmlsq_n_s32(o_1_m_trans_11, o_12, (WORD32)g_ai2_ihevc_trans_4[1][0]);
+
+ out_0 = vrshrn_n_s32(temp1, 9);
+ out_1 = vrshrn_n_s32(temp3, 9);
+ out_2 = vrshrn_n_s32(temp2, 9);
+ out_3 = vrshrn_n_s32(temp4, 9);
+
+ vst1_s16(pi2_dst, out_0);
+ vst1_s16(pi2_dst + dst_strd, out_1);
+ vst1_s16(pi2_dst + 2 * dst_strd, out_2);
+ vst1_s16(pi2_dst + 3 * dst_strd, out_3);
+
+ return sad;
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs residue calculation and forward transform type 1
+ * on input pixels
+ *
+ * @par Description:
+ * Performs residue calculation by subtracting source and prediction and
+ * followed by forward transform
+ *
+ * @param[in] pu1_src
+ * Input 4x4 pixels
+ *
+ * @param[in] pu1_pred
+ * Prediction data
+ *
+ * @param[in] pi2_tmp
+ * Temporary buffer of size 4x4
+ *
+ * @param[out] pi2_dst
+ * Output 4x4 coefficients
+ *
+ * @param[in] src_strd
+ * Input stride
+ *
+ * @param[in] pred_strd
+ * Prediction Stride
+ *
+ * @param[in] dst_strd_chr_flag
+ * Output Stride and Chroma Flag packed in the MS and LS 16-bit
+ * 0 - luma transform, 1 - chroma transform. Not used for 4x4ttyppe1
+ *
+ * @returns block sad
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+UWORD32 ihevc_resi_trans_4x4_ttype1_neon(
+ UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD32 *pi4_temp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd_chr_flag)
+{
+ WORD32 dst_strd;
+ UWORD32 sad;
+ int16x4_t src0_4x16b;
+ int16x4_t src1_4x16b;
+ int16x4_t src2_4x16b;
+ int16x4_t src3_4x16b;
+ int32x4_t src0_4x32b;
+ int32x4_t src1_4x32b;
+ int32x4_t src2_4x32b;
+ int32x4_t src3_4x32b;
+ /*load source and pred values */
+ const uint8x16_t src_u8 = load_unaligned_u8q(pu1_src, src_strd);
+ const uint8x16_t pred_u8 = load_unaligned_u8q(pu1_pred, pred_strd);
+
+ const int16x8_t src_reg0 =
+ vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(src_u8), vget_low_u8(pred_u8)));
+ const int16x8_t src_reg1 =
+ vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(src_u8), vget_high_u8(pred_u8)));
+
+ int32x4_t add_val = vdupq_n_s32(1);
+
+ uint16x8_t abs = vabdl_u8(vget_low_u8(src_u8), vget_low_u8(pred_u8));
+ uint32x4_t b;
+ uint64x2_t c;
+
+ abs = vabal_u8(abs, vget_high_u8(src_u8), vget_high_u8(pred_u8));
+ b = vpaddlq_u16(abs);
+ c = vpaddlq_u32(b);
+ sad = vget_lane_u32(vadd_u32(vreinterpret_u32_u64(vget_low_u64(c)),
+ vreinterpret_u32_u64(vget_high_u64(c))),
+ 0);
+
+ (void)pi4_temp;
+ dst_strd = dst_strd_chr_flag >> 16;
+
+ /************************* 4x4 16bit Transpose ***********************/
+ src0_4x16b = vget_low_s16(src_reg0);
+ src1_4x16b = vget_high_s16(src_reg0);
+ src2_4x16b = vget_low_s16(src_reg1);
+ src3_4x16b = vget_high_s16(src_reg1);
+
+ transpose_s16_4x4d(&src0_4x16b, &src1_4x16b, &src2_4x16b, &src3_4x16b);
+
+ /************************** 4x4 Transpose End *************************/
+
+ /* Residue + Forward Transform 1st stage */
+ /* coeff2_4x32b = 74 74 74 74 */
+ const int32x4_t coeff2_4x32b =
+ vdupq_n_s32(74); //vld1q_s32(&g_ai4_ihevc_trans_dst_intr_4[2][0]);
+ /* coeff0_4x32b = 29 29 29 29 */
+ const int32x4_t coeff0_4x32b =
+ vdupq_n_s32(29); //vld1q_s32(&g_ai4_ihevc_trans_dst_intr_4[0][0]);
+ /* coeff1_4x32b = 55 55 55 55 */
+ const int32x4_t coeff1_4x32b =
+ vdupq_n_s32(55); //vld1q_s32(&g_ai4_ihevc_trans_dst_intr_4[1][0]);
+
+ /* c0 to c3 calculation */
+ int32x4_t c0_4x32b = vaddl_s16(src0_4x16b, src3_4x16b); /* r0+r3 */
+ int32x4_t c1_4x32b = vaddl_s16(src1_4x16b, src3_4x16b); /* r1+r3 */
+ int32x4_t c2_4x32b = vsubl_s16(src0_4x16b, src1_4x16b); /* r0-r1 */
+ int32x4_t c3_4x32b = vmulq_s32(vmovl_s16(src2_4x16b), coeff2_4x32b); /* 74*r2 */
+ src0_4x16b = vadd_s16(src0_4x16b, src1_4x16b); /* r0+r1 */
+
+ src1_4x32b = vsubl_s16(src0_4x16b, src3_4x16b); /* r0+r1-r3 */
+ src0_4x32b = vmlaq_s32(c3_4x32b, c0_4x32b, coeff0_4x32b); /* 29*c0 + c3 */
+ src2_4x32b = vmulq_s32(c2_4x32b, coeff0_4x32b); /* 29*c2 - c3 */
+ src3_4x32b = vmlaq_s32(c3_4x32b, c2_4x32b, coeff1_4x32b); /* 55*c2 + c3 */
+ src2_4x32b = vsubq_s32(src2_4x32b, c3_4x32b);
+
+ src0_4x32b = vmlaq_s32(src0_4x32b, c1_4x32b, coeff1_4x32b); /* 29*c0 + 55*c1 + c3 */
+ src2_4x32b = vmlaq_s32(src2_4x32b, c0_4x32b, coeff1_4x32b); /* 29*c2 + 55*c0 - c3 */
+ c1_4x32b = vmulq_s32(c1_4x32b, coeff0_4x32b); /* 55*c2 - 29*c1 + c3 */
+ src1_4x32b = vmulq_s32(src1_4x32b, coeff2_4x32b); /*74*(r0+r1-r3)*/
+ src3_4x32b = vsubq_s32(src3_4x32b, c1_4x32b);
+
+ /* result + add */
+ src1_4x32b = vaddq_s32(src1_4x32b, add_val);
+ src0_4x32b = vaddq_s32(src0_4x32b, add_val);
+ src2_4x32b = vaddq_s32(src2_4x32b, add_val);
+ src3_4x32b = vaddq_s32(src3_4x32b, add_val);
+ /* result >> shift */
+ src1_4x32b = vshrq_n_s32(src1_4x32b, 1);
+ src0_4x32b = vshrq_n_s32(src0_4x32b, 1);
+ src2_4x32b = vshrq_n_s32(src2_4x32b, 1);
+ src3_4x32b = vshrq_n_s32(src3_4x32b, 1);
+ /* Forward transform 2nd stage */
+ {
+ /************************* 4x4 32bit Transpose ***********************/
+
+ transpose_s32_4x4(&src0_4x32b, &src1_4x32b, &src2_4x32b, &src3_4x32b);
+
+ /************************** 4x4 Transpose End *************************/
+
+ /* add value */
+ add_val = vdupq_n_s32(128);
+ c0_4x32b = vaddq_s32(src0_4x32b, src3_4x32b); /* r0+r3 */
+ c1_4x32b = vaddq_s32(src1_4x32b, src3_4x32b); /* r1+r3 */
+ c2_4x32b = vsubq_s32(src0_4x32b, src1_4x32b); /* r0-r1 */
+ c3_4x32b = vmulq_s32(src2_4x32b, coeff2_4x32b); /* 74*r2 */
+ src1_4x32b = vaddq_s32(src0_4x32b, src1_4x32b); /* r0+r1 */
+
+ src1_4x32b = vsubq_s32(src1_4x32b, src3_4x32b); /* r0+r1-r3 */
+ src0_4x32b = vmlaq_s32(c3_4x32b, c0_4x32b, coeff0_4x32b); /* 29*c0 + c3 */
+ src2_4x32b = vmulq_s32(c2_4x32b, coeff0_4x32b); /* 29*c2 - c3 */
+ src3_4x32b = vmlaq_s32(c3_4x32b, c2_4x32b, coeff1_4x32b); /* 55*c2 + c3 */
+ src2_4x32b = vsubq_s32(src2_4x32b, c3_4x32b);
+
+ src0_4x32b = vmlaq_s32(src0_4x32b, c1_4x32b, coeff1_4x32b); /* 29*c0 + 55*c1 + c3 */
+ src2_4x32b = vmlaq_s32(src2_4x32b, c0_4x32b, coeff1_4x32b); /* 29*c2 + 55*c0 - c3 */
+ c1_4x32b = vmulq_s32(c1_4x32b, coeff0_4x32b); /* 55*c2 - 29*c1 + c3 */
+ src1_4x32b = vmulq_s32(src1_4x32b, coeff2_4x32b); /*74*(r0+r1-r3)*/
+ src3_4x32b = vsubq_s32(src3_4x32b, c1_4x32b);
+
+ /* result + add */
+ src1_4x32b = vaddq_s32(src1_4x32b, add_val);
+ src0_4x32b = vaddq_s32(src0_4x32b, add_val);
+ src2_4x32b = vaddq_s32(src2_4x32b, add_val);
+ src3_4x32b = vaddq_s32(src3_4x32b, add_val);
+
+ src1_4x32b = vshrq_n_s32(src1_4x32b, 8);
+ src0_4x32b = vshrq_n_s32(src0_4x32b, 8);
+ src2_4x32b = vshrq_n_s32(src2_4x32b, 8);
+ src3_4x32b = vshrq_n_s32(src3_4x32b, 8);
+
+ vst1_s16((pi2_dst + dst_strd), vmovn_s32(src1_4x32b));
+ vst1_s16(pi2_dst, vmovn_s32(src0_4x32b));
+ vst1_s16((pi2_dst + 2 * dst_strd), vmovn_s32(src2_4x32b));
+ vst1_s16((pi2_dst + 3 * dst_strd), vmovn_s32(src3_4x32b));
+ }
+ return sad;
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs residue calculation and forward transform on
+ * input pixels
+ *
+ * @par Description:
+ * Performs residue calculation by subtracting source and prediction and
+ * followed by forward transform
+ *
+ * @param[in] pu1_src
+ * Input 8x8 pixels
+ *
+ * @param[in] pu1_pred
+ * Prediction data
+ *
+ * @param[in] pi2_tmp
+ * Temporary buffer of size 8x8
+ *
+ * @param[out] pi2_dst
+ * Output 8x8 coefficients
+ *
+ * @param[in] src_strd
+ * Input stride
+ *
+ * @param[in] pred_strd
+ * Prediction Stride
+ *
+ * @param[in] dst_strd_chr_flag
+ * Output Stride and Chroma Flag packed in the MS and LS 16-bit
+ *
+ * @returns Void
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+UWORD32 ihevc_resi_trans_8x8_neon(
+ UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD32 *pi4_temp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd_chr_flag)
+{
+ int16x8_t diff_16[8];
+ int16x8_t abs = vdupq_n_s16(0);
+ int32x4_t tmp_a;
+ int64x2_t tmp_b;
+ int32x2_t sad_v;
+ int32x4x2_t a0, a1, a2, a3, a4, a5, a6, a7;
+ int chroma_flag = dst_strd_chr_flag & 1;
+ int dst_strd = dst_strd_chr_flag >> 16;
+ UWORD32 sad;
+
+ (void)pi4_temp;
+#define RESIDUE(k, is_chroma) \
+ if(!is_chroma) \
+ { \
+ const uint8x8_t s##k = vld1_u8(pu1_src); \
+ const uint8x8_t p##k = vld1_u8(pu1_pred); \
+ diff_16[k] = vreinterpretq_s16_u16(vsubl_u8(s##k, p##k)); \
+ pu1_src += src_strd; \
+ pu1_pred += pred_strd; \
+ abs = vaddq_s16(abs, vabsq_s16(diff_16[k])); \
+ } \
+ else \
+ { \
+ const uint8x8_t s##k = vld2_u8(pu1_src).val[0]; \
+ const uint8x8_t p##k = vld2_u8(pu1_pred).val[0]; \
+ diff_16[k] = vreinterpretq_s16_u16(vsubl_u8(s##k, p##k)); \
+ pu1_src += src_strd; \
+ pu1_pred += pred_strd; \
+ abs = vaddq_s16(abs, vabsq_s16(diff_16[k])); \
+ }
+
+ // stage 1
+ RESIDUE(0, chroma_flag);
+ RESIDUE(1, chroma_flag);
+ RESIDUE(2, chroma_flag);
+ RESIDUE(3, chroma_flag);
+ RESIDUE(4, chroma_flag);
+ RESIDUE(5, chroma_flag);
+ RESIDUE(6, chroma_flag);
+ RESIDUE(7, chroma_flag);
+
+ tmp_a = vpaddlq_s16(abs);
+ tmp_b = vpaddlq_s32(tmp_a);
+ sad_v = vadd_s32(vreinterpret_s32_s64(vget_low_s64(tmp_b)),
+ vreinterpret_s32_s64(vget_high_s64(tmp_b)));
+ sad = vget_lane_s32(sad_v, 0);
+
+ transpose_s16_8x8(
+ &diff_16[0],
+ &diff_16[1],
+ &diff_16[2],
+ &diff_16[3],
+ &diff_16[4],
+ &diff_16[5],
+ &diff_16[6],
+ &diff_16[7]);
+
+ {
+ const int16x8_t o3 = vsubq_s16(diff_16[3], diff_16[4]); /*C3 - C4*/
+ const int16x8_t o2 = vsubq_s16(diff_16[2], diff_16[5]); /*C2 - C5*/
+ const int16x8_t o1 = vsubq_s16(diff_16[1], diff_16[6]); /*C1 - C6*/
+ const int16x8_t o0 = vsubq_s16(diff_16[0], diff_16[7]); /*C0 - C7*/
+ const int16x8_t e0 = vaddq_s16(diff_16[0], diff_16[7]); /*C0 + C7*/
+ const int16x8_t e1 = vaddq_s16(diff_16[1], diff_16[6]); /*C1 + C6*/
+ const int16x8_t e2 = vaddq_s16(diff_16[2], diff_16[5]); /*C2 + C5*/
+ const int16x8_t e3 = vaddq_s16(diff_16[3], diff_16[4]); /*C3 + C4*/
+
+ const int16x8_t ee0 = vaddq_s16(e0, e3); /*C0 + C3 + C4 + C7*/
+ const int16x8_t ee1 = vaddq_s16(e1, e2); /*C1 + C2 + C5 + C6*/
+ const int16x8_t eo0 = vsubq_s16(e0, e3); /*C0 - C3 - C4 + C7*/
+ const int16x8_t eo1 = vsubq_s16(e1, e2); /*C1 - C2 - C5 + C6*/
+
+ /*C0 + C1 + C2 + C3 + C4 + C5 + C6 + C7*/
+ const int16x8_t eee = vaddq_s16(ee1, ee0);
+ /*C0 - C1 - C2 + C3 + C4 - C5 - C6 + C7*/
+ const int16x8_t eeo = vsubq_s16(ee0, ee1);
+
+ /*F2[0] of 83*(C0 - C3 - C4 + C7)*/
+ a2.val[0] = vmull_n_s16(vget_low_s16(eo0), 83);
+ /*F6[0] of 36*(C0 - C3 - C4 + C7)*/
+ a6.val[0] = vmull_n_s16(vget_low_s16(eo0), 36);
+ /*F2[1] of 83*(C0 - C3 - C4 + C7)*/
+ a2.val[1] = vmull_n_s16(vget_high_s16(eo0), 83);
+ /*F6[1] of 36*(C0 - C3 - C4 + C7)*/
+ a6.val[1] = vmull_n_s16(vget_high_s16(eo0), 36);
+
+ /*F6[1] = 36*(C0 - C3 - C4 + C7) - 83*(C1 - C2 - C5 + C6)*/
+ a6.val[1] = vmlsl_n_s16(a6.val[1], vget_high_s16(eo1), 83);
+ /*F2[1] = 83*(C0 - C3 - C4 + C7) + 36*(C1 - C2 - C5 + C6)*/
+ a2.val[1] = vmlal_n_s16(a2.val[1], vget_high_s16(eo1), 36);
+ /*F6[0] = 36*(C0 - C3 - C4 + C7) - 83*(C1 - C2 - C5 + C6)*/
+ a6.val[0] = vmlsl_n_s16(a6.val[0], vget_low_s16(eo1), 83);
+ /*F2[0] = 83*(C0 - C3 - C4 + C7) + 36*(C1 - C2 - C5 + C6)*/
+ a2.val[0] = vmlal_n_s16(a2.val[0], vget_low_s16(eo1), 36);
+
+ /*F0[0] = 64*(C0 + C1 + C2 + C3 + C4 + C5 + C6 + C7)*/
+ a0.val[0] = vshll_n_s16(vget_low_s16(eee), 6);
+ /*F0[1] = 64*(C0 + C1 + C2 + C3 + C4 + C5 + C6 + C7)*/
+ a0.val[1] = vshll_n_s16(vget_high_s16(eee), 6);
+ /*F4[0] = 64*(C0 - C1 - C2 + C3 + C4 - C5 - C6 + C7)*/
+ a4.val[0] = vshll_n_s16(vget_low_s16(eeo), 6);
+ /*F4[1] = 64*(C0 - C1 - C2 + C3 + C4 - C5 - C6 + C7)*/
+ a4.val[1] = vshll_n_s16(vget_high_s16(eeo), 6);
+
+ a7.val[0] = vmull_n_s16(vget_low_s16(o0), 18); /*F7[0] = 18*(C0 - C7)*/
+ a5.val[0] = vmull_n_s16(vget_low_s16(o0), 50); /*F5[0] = 50*(C0 - C7)*/
+ a3.val[0] = vmull_n_s16(vget_low_s16(o0), 75); /*F3[0] = 75*(C0 - C7)*/
+ a1.val[0] = vmull_n_s16(vget_low_s16(o0), 89); /*F1[0] = 89*(C0 - C7)*/
+ a1.val[1] = vmull_n_s16(vget_high_s16(o0), 89); /*F1[1] = 89*(C0 - C7)*/
+ a3.val[1] = vmull_n_s16(vget_high_s16(o0), 75); /*F3[1] = 75*(C0 - C7)*/
+ a5.val[1] = vmull_n_s16(vget_high_s16(o0), 50); /*F5[1] = 50*(C0 - C7)*/
+ a7.val[1] = vmull_n_s16(vget_high_s16(o0), 18); /*F7[1] = 18*(C0 - C7)*/
+
+ /*F7[0] = 18*(C0 - C7) - 50*(C1 - C6)*/
+ a7.val[0] = vmlsl_n_s16(a7.val[0], vget_low_s16(o1), 50);
+ /*F5[0] = 50*(C0 - C7) - 89*(C1 - C6)*/
+ a5.val[0] = vmlsl_n_s16(a5.val[0], vget_low_s16(o1), 89);
+ /*F3[0] = 75*(C0 - C7) - 18*(C1 - C6)*/
+ a3.val[0] = vmlsl_n_s16(a3.val[0], vget_low_s16(o1), 18);
+ /*F1[0] = 89*(C0 - C7) + 75*(C1 - C6)*/
+ a1.val[0] = vmlal_n_s16(a1.val[0], vget_low_s16(o1), 75);
+ /*F1[1] = 89*(C0 - C7) + 75*(C1 - C6)*/
+ a1.val[1] = vmlal_n_s16(a1.val[1], vget_high_s16(o1), 75);
+ /*F3[1] = 75*(C0 - C7) - 18*(C1 - C6)*/
+ a3.val[1] = vmlsl_n_s16(a3.val[1], vget_high_s16(o1), 18);
+ /*F5[1] = 50*(C0 - C7) - 89*(C1 - C6)*/
+ a5.val[1] = vmlsl_n_s16(a5.val[1], vget_high_s16(o1), 89);
+ /*F7[1] = 18*(C0 - C7) - 50*(C1 - C6)*/
+ a7.val[1] = vmlsl_n_s16(a7.val[1], vget_high_s16(o1), 50);
+
+ /*F7[0] = 18*(C0 - C7) - 50*(C1 - C6) + 75*(C2 - C5)*/
+ a7.val[0] = vmlal_n_s16(a7.val[0], vget_low_s16(o2), 75);
+ /*F5[0] = 50*(C0 - C7) - 89*(C1 - C6) + 18*(C2 - C5)*/
+ a5.val[0] = vmlal_n_s16(a5.val[0], vget_low_s16(o2), 18);
+ /*F3[0] = 75*(C0 - C7) - 18*(C1 - C6) - 89*(C2 - C5)*/
+ a3.val[0] = vmlsl_n_s16(a3.val[0], vget_low_s16(o2), 89);
+ /*F1[0] = 89*(C0 - C7) + 75*(C1 - C6) + 50*(C2 - C5)*/
+ a1.val[0] = vmlal_n_s16(a1.val[0], vget_low_s16(o2), 50);
+ /*F1[1] = 89*(C0 - C7) + 75*(C1 - C6) + 50*(C2 - C5)*/
+ a1.val[1] = vmlal_n_s16(a1.val[1], vget_high_s16(o2), 50);
+ /*F3[1] = 75*(C0 - C7) - 18*(C1 - C6) - 89*(C2 - C5)*/
+ a3.val[1] = vmlsl_n_s16(a3.val[1], vget_high_s16(o2), 89);
+ /*F5[1] = 50*(C0 - C7) - 89*(C1 - C6) + 18*(C2 - C5)*/
+ a5.val[1] = vmlal_n_s16(a5.val[1], vget_high_s16(o2), 18);
+ /*F7[1] = 18*(C0 - C7) - 50*(C1 - C6) + 75*(C2 - C5)*/
+ a7.val[1] = vmlal_n_s16(a7.val[1], vget_high_s16(o2), 75);
+
+ /*F7[0] = 18*(C0 - C7) - 50*(C1 - C6) + 75*(C2 - C5) - 89*(C3 - C4)*/
+ a7.val[0] = vmlsl_n_s16(a7.val[0], vget_low_s16(o3), 89);
+ /*F5[0] = 50*(C0 - C7) - 89*(C1 - C6) + 18*(C2 - C5) + 75*(C3 - C4)*/
+ a5.val[0] = vmlal_n_s16(a5.val[0], vget_low_s16(o3), 75);
+ /*F3[0] = 75*(C0 - C7) - 18*(C1 - C6) - 89*(C2 - C5) - 50*(C3 - C4)*/
+ a3.val[0] = vmlsl_n_s16(a3.val[0], vget_low_s16(o3), 50);
+ /*F1[0] = 89*(C0 - C7) + 75*(C1 - C6) + 50*(C2 - C5) + 18*(C3 - C4)*/
+ a1.val[0] = vmlal_n_s16(a1.val[0], vget_low_s16(o3), 18);
+ /*F1[1] = 89*(C0 - C7) + 75*(C1 - C6) + 50*(C2 - C5) + 18*(C3 - C4)*/
+ a1.val[1] = vmlal_n_s16(a1.val[1], vget_high_s16(o3), 18);
+ /*F3[1] = 75*(C0 - C7) - 18*(C1 - C6) - 89*(C2 - C5) - 50*(C3 - C4)*/
+ a3.val[1] = vmlsl_n_s16(a3.val[1], vget_high_s16(o3), 50);
+ /*F5[1] = 50*(C0 - C7) - 89*(C1 - C6) + 18*(C2 - C5) + 75*(C3 - C4)*/
+ a5.val[1] = vmlal_n_s16(a5.val[1], vget_high_s16(o3), 75);
+ /*F7[1] = 18*(C0 - C7) - 50*(C1 - C6) + 75*(C2 - C5) - 89*(C3 - C4)*/
+ a7.val[1] = vmlsl_n_s16(a7.val[1], vget_high_s16(o3), 89);
+ }
+
+ //Stage 2
+ {
+ int32x4_t h0, h1, h2, h3, h4, h5, h6, h7;
+ int32x4_t e0_2, e1_2, e2_2, e3_2;
+ int32x4_t o0_2, o1_2, o2_2, o3_2;
+ int32x4_t ee1_2, eo1_2, eo0_2, ee0_2;
+ int16x4_t row0, row1, row2, row3, row4, row5, row6, row7;
+
+ /*Transposing second half of transform stage 1 (1)*/
+ int32x4x2_t b1 = vtrnq_s32(a0.val[1], a1.val[1]);
+ int32x4x2_t b3 = vtrnq_s32(a2.val[1], a3.val[1]);
+ int32x4x2_t b0 = vtrnq_s32(a0.val[0], a1.val[0]);
+ int32x4x2_t b2 = vtrnq_s32(a2.val[0], a3.val[0]);
+
+ /*Transposing second half of transform stage 1 (2)*/
+ a0.val[0] = vcombine_s32(vget_low_s32(b0.val[0]), vget_low_s32(b2.val[0]));
+ a2.val[0] = vcombine_s32(vget_high_s32(b0.val[0]), vget_high_s32(b2.val[0]));
+ a1.val[0] = vcombine_s32(vget_low_s32(b0.val[1]), vget_low_s32(b2.val[1]));
+ a3.val[0] = vcombine_s32(vget_high_s32(b0.val[1]), vget_high_s32(b2.val[1]));
+ a0.val[1] = vcombine_s32(vget_low_s32(b1.val[0]), vget_low_s32(b3.val[0]));
+ a2.val[1] = vcombine_s32(vget_high_s32(b1.val[0]), vget_high_s32(b3.val[0]));
+ a1.val[1] = vcombine_s32(vget_low_s32(b1.val[1]), vget_low_s32(b3.val[1]));
+ a3.val[1] = vcombine_s32(vget_high_s32(b1.val[1]), vget_high_s32(b3.val[1]));
+
+ o0_2 = vsubq_s32(a0.val[0], a3.val[1]); /*B0 - B7*/
+ o1_2 = vsubq_s32(a1.val[0], a2.val[1]); /*B1 - B6*/
+ o2_2 = vsubq_s32(a2.val[0], a1.val[1]); /*B2 - B5*/
+ o3_2 = vsubq_s32(a3.val[0], a0.val[1]); /*B3 - B4*/
+ e3_2 = vaddq_s32(a3.val[0], a0.val[1]); /*B3 + B4*/
+ e2_2 = vaddq_s32(a2.val[0], a1.val[1]); /*B2 + B5*/
+ e1_2 = vaddq_s32(a1.val[0], a2.val[1]); /*B1 + B6*/
+ e0_2 = vaddq_s32(a0.val[0], a3.val[1]); /*B0 + B7*/
+
+ eo1_2 = vsubq_s32(e1_2, e2_2); /*B1 - B2 - B5 + B6*/
+ ee1_2 = vaddq_s32(e1_2, e2_2); /*B1 + B2 + B5 + B6*/
+ eo0_2 = vsubq_s32(e0_2, e3_2); /*B0 - B3 - B4 + B7*/
+ ee0_2 = vaddq_s32(e0_2, e3_2); /*B0 + B3 + B4 + B7*/
+
+ /* F4 = B0 - B1 - B2 + B3 + B4 - B5 - B6 + B7*/
+ h4 = vsubq_s32(ee0_2, ee1_2);
+ /* F0 = B0 + B1 + B2 + B3 + B4 + B5 + B6 + B7*/
+ h0 = vaddq_s32(ee0_2, ee1_2);
+ /* Truncating last 11 bits in H0*/
+ row0 = vrshrn_n_s32(h0, 5);
+ /*First half-row of row 1 of transform stage 2 (H0) stored*/
+ vst1_s16(pi2_dst, row0);
+ /* Truncating last 11 bits in H4*/
+ row4 = vrshrn_n_s32(h4, 5);
+ /*First half-row of row 5 of transform stage 2 (H4) stored*/
+ vst1_s16(pi2_dst + 4 * dst_strd, row4);
+
+ /* F6 = 36*(B0 - B3 - B4 + B7) */
+ h6 = vmulq_n_s32(eo0_2, 36);
+ /* F2 = 83*(B0 - B3 - B4 + B7) */
+ h2 = vmulq_n_s32(eo0_2, 83);
+ /*H2 = 83*(B0 - B3 - B4 + B7) + 36*(B1 - B2 - B5 + B6)*/
+ h2 = vmlaq_n_s32(h2, eo1_2, 36);
+ /*H6 = 36*(B0 - B3 - B4 + B7) - 83*(B1 - B2 - B5 + B6)*/
+ h6 = vmlsq_n_s32(h6, eo1_2, 83);
+ /* Truncating last 11 bits in H6*/
+ row6 = vrshrn_n_s32(h6, 11);
+ /*First half-row of row 7 of transform stage 2 (H6) stored*/
+ vst1_s16(pi2_dst + 6 * dst_strd, row6);
+ /* Truncating last 11 bits in H2*/
+ row2 = vrshrn_n_s32(h2, 11);
+ /*First half-row of row 3 of transform stage 2 (H2) stored*/
+ vst1_s16(pi2_dst + 2 * dst_strd, row2);
+
+ h1 = vmulq_n_s32(o0_2, 89); /* H1 = 89*(B0 - B7) */
+ h3 = vmulq_n_s32(o0_2, 75); /* H3 = 75*(B0 - B7) */
+ h5 = vmulq_n_s32(o0_2, 50); /* H5 = 50*(B0 - B7) */
+ h7 = vmulq_n_s32(o0_2, 18); /* H7 = 18*(B0 - B7) */
+
+ h7 = vmlsq_n_s32(h7, o1_2, 50); /* H7 = 18*(B0 - B7) - 50*(B1 - B6) */
+ h5 = vmlsq_n_s32(h5, o1_2, 89); /* H5 = 50*(B0 - B7) - 89*(B1 - B6) */
+ h3 = vmlsq_n_s32(h3, o1_2, 18); /* H3 = 75*(B0 - B7) - 18*(B1 - B6) */
+ h1 = vmlaq_n_s32(h1, o1_2, 75); /* H1 = 89*(B0 - B7) + 75*(B1 - B6) */
+
+ /* H1 = 89*(B0 - B7) + 75*(B1 - B6) + 50*(B2 - B5) */
+ h1 = vmlaq_n_s32(h1, o2_2, 50);
+ /* H3 = 75*(B0 - B7) - 18*(B1 - B6) - 89*(B2 - B5) */
+ h3 = vmlsq_n_s32(h3, o2_2, 89);
+ /* H5 = 50*(B0 - B7) - 89*(B1 - B6) + 18*(B2 - B5) */
+ h5 = vmlaq_n_s32(h5, o2_2, 18);
+ /* H7 = 18*(B0 - B7) - 50*(B1 - B6) + 75*(B2 - B5) */
+ h7 = vmlaq_n_s32(h7, o2_2, 75);
+
+ /* H7 = 18*(B0 - B7) - 50*(B1 - B6) + 75*(B2 - B5) - 89*(B3 - B4) */
+ h7 = vmlsq_n_s32(h7, o3_2, 89);
+ /* Truncating last 11 bits in H7*/
+ row7 = vrshrn_n_s32(h7, 11);
+ /*First half-row of row 8 of transform stage 2 (H7) stored*/
+ vst1_s16(pi2_dst + 7 * dst_strd, row7);
+ /* H5 = 50*(B0 - B7) - 89*(B1 - B6) + 18*(B2 - B5) + 75*(B3 - B4) */
+ h5 = vmlaq_n_s32(h5, o3_2, 75);
+ /* Truncating last 11 bits in H5*/
+ row5 = vrshrn_n_s32(h5, 11);
+ /*First half-row of row 6 of transform stage 2 (H5) stored*/
+ vst1_s16(pi2_dst + 5 * dst_strd, row5);
+ /* H3 = 75*(B0 - B7) - 18*(B1 - B6) - 89*(B2 - B5) - 50*(B3 - B4) */
+ h3 = vmlsq_n_s32(h3, o3_2, 50);
+ /* Truncating last 11 bits in H3*/
+ row3 = vrshrn_n_s32(h3, 11);
+ /*First half-row of row 4 of transform stage 2 (H3) stored*/
+ vst1_s16(pi2_dst + 3 * dst_strd, row3);
+ /* H1 = 89*(B0 - B7) + 75*(B1 - B6) + 50*(B2 - B5) + 18*(B3 - B4) */
+ h1 = vmlaq_n_s32(h1, o3_2, 18);
+ /* Truncating last 11 bits in H1*/
+ row1 = vrshrn_n_s32(h1, 11);
+ /*First half-row of row 2 of transform stage 2 (H1) stored*/
+ vst1_s16(pi2_dst + dst_strd, row1);
+ }
+
+ pi2_dst += 4;
+
+ {
+ int32x4_t h0, h1, h2, h3, h4, h5, h6, h7;
+ int32x4_t e0_2, e1_2, e2_2, e3_2;
+ int32x4_t o0_2, o1_2, o2_2, o3_2;
+ int32x4_t ee1_2, eo1_2, eo0_2, ee0_2;
+ int16x4_t row0, row1, row2, row3, row4, row5, row6, row7;
+
+ /*Transposing second half of transform stage 1 (1)*/
+ int32x4x2_t b1 = vtrnq_s32(a4.val[1], a5.val[1]);
+ int32x4x2_t b3 = vtrnq_s32(a6.val[1], a7.val[1]);
+ int32x4x2_t b0 = vtrnq_s32(a4.val[0], a5.val[0]);
+ int32x4x2_t b2 = vtrnq_s32(a6.val[0], a7.val[0]);
+
+ /*Transposing second half of transform stage 1 (2)*/
+ a0.val[0] = vcombine_s32(vget_low_s32(b0.val[0]), vget_low_s32(b2.val[0]));
+ a2.val[0] = vcombine_s32(vget_high_s32(b0.val[0]), vget_high_s32(b2.val[0]));
+ a1.val[0] = vcombine_s32(vget_low_s32(b0.val[1]), vget_low_s32(b2.val[1]));
+ a3.val[0] = vcombine_s32(vget_high_s32(b0.val[1]), vget_high_s32(b2.val[1]));
+ a0.val[1] = vcombine_s32(vget_low_s32(b1.val[0]), vget_low_s32(b3.val[0]));
+ a2.val[1] = vcombine_s32(vget_high_s32(b1.val[0]), vget_high_s32(b3.val[0]));
+ a1.val[1] = vcombine_s32(vget_low_s32(b1.val[1]), vget_low_s32(b3.val[1]));
+ a3.val[1] = vcombine_s32(vget_high_s32(b1.val[1]), vget_high_s32(b3.val[1]));
+
+ o0_2 = vsubq_s32(a0.val[0], a3.val[1]); /*B0 - B7*/
+ o1_2 = vsubq_s32(a1.val[0], a2.val[1]); /*B1 - B6*/
+ o2_2 = vsubq_s32(a2.val[0], a1.val[1]); /*B2 - B5*/
+ o3_2 = vsubq_s32(a3.val[0], a0.val[1]); /*B3 - B4*/
+ e3_2 = vaddq_s32(a3.val[0], a0.val[1]); /*B3 + B4*/
+ e2_2 = vaddq_s32(a2.val[0], a1.val[1]); /*B2 + B5*/
+ e1_2 = vaddq_s32(a1.val[0], a2.val[1]); /*B1 + B6*/
+ e0_2 = vaddq_s32(a0.val[0], a3.val[1]); /*B0 + B7*/
+
+ eo1_2 = vsubq_s32(e1_2, e2_2); /*B1 - B2 - B5 + B6*/
+ ee1_2 = vaddq_s32(e1_2, e2_2); /*B1 + B2 + B5 + B6*/
+ eo0_2 = vsubq_s32(e0_2, e3_2); /*B0 - B3 - B4 + B7*/
+ ee0_2 = vaddq_s32(e0_2, e3_2); /*B0 + B3 + B4 + B7*/
+
+ /* F4 = B0 - B1 - B2 + B3 + B4 - B5 - B6 + B7*/
+ h4 = vsubq_s32(ee0_2, ee1_2);
+ /* F0 = B0 + B1 + B2 + B3 + B4 + B5 + B6 + B7*/
+ h0 = vaddq_s32(ee0_2, ee1_2);
+ /* Truncating last 11 bits in H0*/
+ row0 = vrshrn_n_s32(h0, 5);
+ /*First half-row of row 1 of transform stage 2 (H0) stored*/
+ vst1_s16(pi2_dst, row0);
+ /* Truncating last 11 bits in H4*/
+ row4 = vrshrn_n_s32(h4, 5);
+ /*First half-row of row 5 of transform stage 2 (H4) stored*/
+ vst1_s16(pi2_dst + 4 * dst_strd, row4);
+
+ /* F6 = 36*(B0 - B3 - B4 + B7) */
+ h6 = vmulq_n_s32(eo0_2, 36);
+ /* F2 = 83*(B0 - B3 - B4 + B7) */
+ h2 = vmulq_n_s32(eo0_2, 83);
+ /*H2 = 83*(B0 - B3 - B4 + B7) + 36*(B1 - B2 - B5 + B6)*/
+ h2 = vmlaq_n_s32(h2, eo1_2, 36);
+ /*H6 = 36*(B0 - B3 - B4 + B7) - 83*(B1 - B2 - B5 + B6)*/
+ h6 = vmlsq_n_s32(h6, eo1_2, 83);
+ /* Truncating last 11 bits in H6*/
+ row6 = vrshrn_n_s32(h6, 11);
+ /*First half-row of row 7 of transform stage 2 (H6) stored*/
+ vst1_s16(pi2_dst + 6 * dst_strd, row6);
+ /* Truncating last 11 bits in H2*/
+ row2 = vrshrn_n_s32(h2, 11);
+ /*First half-row of row 3 of transform stage 2 (H2) stored*/
+ vst1_s16(pi2_dst + 2 * dst_strd, row2);
+
+ h1 = vmulq_n_s32(o0_2, 89); /* H1 = 89*(B0 - B7) */
+ h3 = vmulq_n_s32(o0_2, 75); /* H3 = 75*(B0 - B7) */
+ h5 = vmulq_n_s32(o0_2, 50); /* H5 = 50*(B0 - B7) */
+ h7 = vmulq_n_s32(o0_2, 18); /* H7 = 18*(B0 - B7) */
+
+ h7 = vmlsq_n_s32(h7, o1_2, 50); /* H7 = 18*(B0 - B7) - 50*(B1 - B6) */
+ h5 = vmlsq_n_s32(h5, o1_2, 89); /* H5 = 50*(B0 - B7) - 89*(B1 - B6) */
+ h3 = vmlsq_n_s32(h3, o1_2, 18); /* H3 = 75*(B0 - B7) - 18*(B1 - B6) */
+ h1 = vmlaq_n_s32(h1, o1_2, 75); /* H1 = 89*(B0 - B7) + 75*(B1 - B6) */
+
+ /* H1 = 89*(B0 - B7) + 75*(B1 - B6) + 50*(B2 - B5) */
+ h1 = vmlaq_n_s32(h1, o2_2, 50);
+ /* H3 = 75*(B0 - B7) - 18*(B1 - B6) - 89*(B2 - B5) */
+ h3 = vmlsq_n_s32(h3, o2_2, 89);
+ /* H5 = 50*(B0 - B7) - 89*(B1 - B6) + 18*(B2 - B5) */
+ h5 = vmlaq_n_s32(h5, o2_2, 18);
+ /* H7 = 18*(B0 - B7) - 50*(B1 - B6) + 75*(B2 - B5) */
+ h7 = vmlaq_n_s32(h7, o2_2, 75);
+
+ /* H7 = 18*(B0 - B7) - 50*(B1 - B6) + 75*(B2 - B5) - 89*(B3 - B4) */
+ h7 = vmlsq_n_s32(h7, o3_2, 89);
+ /* Truncating last 11 bits in H7*/
+ row7 = vrshrn_n_s32(h7, 11);
+ /*First half-row of row 8 of transform stage 2 (H7) stored*/
+ vst1_s16(pi2_dst + 7 * dst_strd, row7);
+ /* H5 = 50*(B0 - B7) - 89*(B1 - B6) + 18*(B2 - B5) + 75*(B3 - B4) */
+ h5 = vmlaq_n_s32(h5, o3_2, 75);
+ /* Truncating last 11 bits in H5*/
+ row5 = vrshrn_n_s32(h5, 11);
+ /*First half-row of row 6 of transform stage 2 (H5) stored*/
+ vst1_s16(pi2_dst + 5 * dst_strd, row5);
+ /* H3 = 75*(B0 - B7) - 18*(B1 - B6) - 89*(B2 - B5) - 50*(B3 - B4) */
+ h3 = vmlsq_n_s32(h3, o3_2, 50);
+ /* Truncating last 11 bits in H3*/
+ row3 = vrshrn_n_s32(h3, 11);
+ /*First half-row of row 4 of transform stage 2 (H3) stored*/
+ vst1_s16(pi2_dst + 3 * dst_strd, row3);
+ /* H1 = 89*(B0 - B7) + 75*(B1 - B6) + 50*(B2 - B5) + 18*(B3 - B4) */
+ h1 = vmlaq_n_s32(h1, o3_2, 18);
+ /* Truncating last 11 bits in H1*/
+ row1 = vrshrn_n_s32(h1, 11);
+ /*First half-row of row 2 of transform stage 2 (H1) stored*/
+ vst1_s16(pi2_dst + dst_strd, row1);
+ }
+ return sad;
+}
+
+static INLINE void load(const uint8_t *a, int stride, uint8x8_t *b, int is_chroma)
+{
+ int i;
+
+ if(is_chroma == 0)
+ {
+ for (i = 0; i < 16; i++)
+ {
+ b[i] = vld1_u8(a);
+ a += stride;
+ }
+ }
+ else
+ {
+ for (i = 0; i < 16; i++)
+ {
+ b[i] = vld2_u8(a).val[0];
+ a += stride;
+ }
+ }
+}
+
+// Store 8 16x8 values, assuming stride == 16.
+static INLINE void store(WORD16 *a, int16x8_t *b /*[8]*/)
+{
+ int i;
+
+ for (i = 0; i < 8; i++)
+ {
+ vst1q_s16(a, b[i]);
+ a += 16;
+ }
+}
+
+static INLINE void cross_input_16(int16x8_t *a /*[16]*/, int16x8_t *b /*[16]*/)
+{
+ b[0] = vaddq_s16(a[0], a[15]);
+ b[1] = vaddq_s16(a[1], a[14]);
+ b[2] = vaddq_s16(a[2], a[13]);
+ b[3] = vaddq_s16(a[3], a[12]);
+ b[4] = vaddq_s16(a[4], a[11]);
+ b[5] = vaddq_s16(a[5], a[10]);
+ b[6] = vaddq_s16(a[6], a[9]);
+ b[7] = vaddq_s16(a[7], a[8]);
+
+ b[8] = vsubq_s16(a[7], a[8]);
+ b[9] = vsubq_s16(a[6], a[9]);
+ b[10] = vsubq_s16(a[5], a[10]);
+ b[11] = vsubq_s16(a[4], a[11]);
+ b[12] = vsubq_s16(a[3], a[12]);
+ b[13] = vsubq_s16(a[2], a[13]);
+ b[14] = vsubq_s16(a[1], a[14]);
+ b[15] = vsubq_s16(a[0], a[15]);
+}
+
+static INLINE void cross_input_32(int32x4x2_t *a /*[16][2]*/, int32x4x2_t *b /*[16][2]*/)
+{
+ WORD32 i;
+ for(i = 0; i < 2; i++)
+ {
+ b[0].val[i] = vaddq_s32(a[0].val[i], a[15].val[i]);
+ b[1].val[i] = vaddq_s32(a[1].val[i], a[14].val[i]);
+ b[2].val[i] = vaddq_s32(a[2].val[i], a[13].val[i]);
+ b[3].val[i] = vaddq_s32(a[3].val[i], a[12].val[i]);
+ b[4].val[i] = vaddq_s32(a[4].val[i], a[11].val[i]);
+ b[5].val[i] = vaddq_s32(a[5].val[i], a[10].val[i]);
+ b[6].val[i] = vaddq_s32(a[6].val[i], a[9].val[i]);
+ b[7].val[i] = vaddq_s32(a[7].val[i], a[8].val[i]);
+
+ b[8].val[i] = vsubq_s32(a[7].val[i], a[8].val[i]);
+ b[9].val[i] = vsubq_s32(a[6].val[i], a[9].val[i]);
+ b[10].val[i] = vsubq_s32(a[5].val[i], a[10].val[i]);
+ b[11].val[i] = vsubq_s32(a[4].val[i], a[11].val[i]);
+ b[12].val[i] = vsubq_s32(a[3].val[i], a[12].val[i]);
+ b[13].val[i] = vsubq_s32(a[2].val[i], a[13].val[i]);
+ b[14].val[i] = vsubq_s32(a[1].val[i], a[14].val[i]);
+ b[15].val[i] = vsubq_s32(a[0].val[i], a[15].val[i]);
+ }
+}
+
+static INLINE int32x4_t diff(uint8x8_t *a /*[16]*/, uint8x8_t *b /*[16]*/, int16x8_t *c /*[16]*/)
+{
+ int i;
+ int16x8_t abs = vdupq_n_s16(0);
+
+ for (i = 0; i < 16; i++)
+ {
+ c[i] = vreinterpretq_s16_u16(vsubl_u8(a[i], b[i]));
+ abs = vaddq_s16(abs, vabsq_s16(c[i]));
+ }
+ return vpaddlq_s16(abs);
+}
+
+static INLINE void partial_round_shift(int32x4x2_t *a, int16x8_t *b /*[16]*/)
+{
+ WORD32 shift = 13, add;
+ add = 1 << (shift - 1);
+
+ const int32x4_t vecadd = vdupq_n_s32(add);
+ b[0] = vcombine_s16(
+ vshrn_n_s32(vaddq_s32(a[0].val[0], vecadd), 13),
+ vshrn_n_s32(vaddq_s32(a[0].val[1], vecadd), 13));
+ b[1] = vcombine_s16(
+ vshrn_n_s32(vaddq_s32(a[1].val[0], vecadd), 13),
+ vshrn_n_s32(vaddq_s32(a[1].val[1], vecadd), 13));
+ b[2] = vcombine_s16(
+ vshrn_n_s32(vaddq_s32(a[2].val[0], vecadd), 13),
+ vshrn_n_s32(vaddq_s32(a[2].val[1], vecadd), 13));
+ b[3] = vcombine_s16(
+ vshrn_n_s32(vaddq_s32(a[3].val[0], vecadd), 13),
+ vshrn_n_s32(vaddq_s32(a[3].val[1], vecadd), 13));
+ b[4] = vcombine_s16(
+ vshrn_n_s32(vaddq_s32(a[4].val[0], vecadd), 13),
+ vshrn_n_s32(vaddq_s32(a[4].val[1], vecadd), 13));
+ b[5] = vcombine_s16(
+ vshrn_n_s32(vaddq_s32(a[5].val[0], vecadd), 13),
+ vshrn_n_s32(vaddq_s32(a[5].val[1], vecadd), 13));
+ b[6] = vcombine_s16(
+ vshrn_n_s32(vaddq_s32(a[6].val[0], vecadd), 13),
+ vshrn_n_s32(vaddq_s32(a[6].val[1], vecadd), 13));
+ b[7] = vcombine_s16(
+ vshrn_n_s32(vaddq_s32(a[7].val[0], vecadd), 13),
+ vshrn_n_s32(vaddq_s32(a[7].val[1], vecadd), 13));
+ b[8] = vcombine_s16(
+ vshrn_n_s32(vaddq_s32(a[8].val[0], vecadd), 13),
+ vshrn_n_s32(vaddq_s32(a[8].val[1], vecadd), 13));
+ b[9] = vcombine_s16(
+ vshrn_n_s32(vaddq_s32(a[9].val[0], vecadd), 13),
+ vshrn_n_s32(vaddq_s32(a[9].val[1], vecadd), 13));
+ b[10] = vcombine_s16(
+ vshrn_n_s32(vaddq_s32(a[10].val[0], vecadd), 13),
+ vshrn_n_s32(vaddq_s32(a[10].val[1], vecadd), 13));
+ b[11] = vcombine_s16(
+ vshrn_n_s32(vaddq_s32(a[11].val[0], vecadd), 13),
+ vshrn_n_s32(vaddq_s32(a[11].val[1], vecadd), 13));
+ b[12] = vcombine_s16(
+ vshrn_n_s32(vaddq_s32(a[12].val[0], vecadd), 13),
+ vshrn_n_s32(vaddq_s32(a[12].val[1], vecadd), 13));
+ b[13] = vcombine_s16(
+ vshrn_n_s32(vaddq_s32(a[13].val[0], vecadd), 13),
+ vshrn_n_s32(vaddq_s32(a[13].val[1], vecadd), 13));
+ b[14] = vcombine_s16(
+ vshrn_n_s32(vaddq_s32(a[14].val[0], vecadd), 13),
+ vshrn_n_s32(vaddq_s32(a[14].val[1], vecadd), 13));
+ b[15] = vcombine_s16(
+ vshrn_n_s32(vaddq_s32(a[15].val[0], vecadd), 13),
+ vshrn_n_s32(vaddq_s32(a[15].val[1], vecadd), 13));
+}
+
+static INLINE int32x4_t
+ add4(int32x4_t row1_low, int32x4_t row1_high, int32x4_t row2_low, int32x4_t row2_high)
+{
+ int32x4_t sum1, sum2;
+ sum1 = vaddq_s32(row1_low, row1_high);
+ sum2 = vaddq_s32(row2_low, row2_high);
+ return vaddq_s32(sum1, sum2);
+}
+
+static INLINE void butterfly_one_coeff_16_32(
+ int16x8_t a, int16x8_t b, int16_t c, int32x4x2_t *row1, int32x4x2_t *row2)
+{
+ const int32x4_t a0 = vmull_n_s16(vget_low_s16(a), c);
+ const int32x4_t a1 = vmull_n_s16(vget_high_s16(a), c);
+ //printf("multiply done\n");
+ row1->val[0] = vmlal_n_s16(a0, vget_low_s16(b), c);
+ row1->val[1] = vmlal_n_s16(a1, vget_high_s16(b), c);
+ row2->val[0] = vmlsl_n_s16(a0, vget_low_s16(b), c);
+ row2->val[1] = vmlsl_n_s16(a1, vget_high_s16(b), c);
+}
+
+static INLINE void butterfly_two_coeff_16_32(
+ int16x8_t a, int16x8_t b, int16_t c0, int16_t c1, int32x4x2_t *row1, int32x4x2_t *row2)
+{
+ const int32x4_t a0 = vmull_n_s16(vget_low_s16(a), c0);
+ const int32x4_t a1 = vmull_n_s16(vget_high_s16(a), c0);
+ const int32x4_t a2 = vmull_n_s16(vget_low_s16(a), c1);
+ const int32x4_t a3 = vmull_n_s16(vget_high_s16(a), c1);
+ row1->val[0] = vmlal_n_s16(a2, vget_low_s16(b), c0);
+ row1->val[1] = vmlal_n_s16(a3, vget_high_s16(b), c0);
+ row2->val[0] = vmlsl_n_s16(a0, vget_low_s16(b), c1);
+ row2->val[1] = vmlsl_n_s16(a1, vget_high_s16(b), c1);
+}
+
+static INLINE void butterfly_one_coeff_32_32(
+ int32x4x2_t a, int32x4x2_t b, int32_t c, int32x4x2_t *row1, int32x4x2_t *row2)
+{
+ const int32x4_t a0 = vmulq_n_s32(a.val[0], c);
+ const int32x4_t a1 = vmulq_n_s32(a.val[1], c);
+ row1->val[0] = vmlaq_n_s32(a0, b.val[0], c);
+ row1->val[1] = vmlaq_n_s32(a1, b.val[1], c);
+ row2->val[0] = vmlsq_n_s32(a0, b.val[0], c);
+ row2->val[1] = vmlsq_n_s32(a1, b.val[1], c);
+}
+
+static INLINE void butterfly_two_coeff_32_32(
+ int32x4x2_t a, int32x4x2_t b, int32_t c0, int32_t c1, int32x4x2_t *row1, int32x4x2_t *row2)
+{
+ const int32x4_t a0 = vmulq_n_s32(a.val[0], c0);
+ const int32x4_t a1 = vmulq_n_s32(a.val[1], c0);
+ const int32x4_t a2 = vmulq_n_s32(a.val[0], c1);
+ const int32x4_t a3 = vmulq_n_s32(a.val[1], c1);
+ row1->val[0] = vmlaq_n_s32(a2, b.val[0], c0);
+ row1->val[1] = vmlaq_n_s32(a3, b.val[1], c0);
+ row2->val[0] = vmlsq_n_s32(a0, b.val[0], c1);
+ row2->val[1] = vmlsq_n_s32(a1, b.val[1], c1);
+}
+
+// Transpose 8x8 to a new location. Don't use transpose_neon.h because those
+// are all in-place.
+static INLINE void transpose_8x8(int32x4x2_t *a /*[8][2]*/, int32x4x2_t *b)
+{
+ const int32x4x2_t c0 = vtrnq_s32(a[0].val[0], a[1].val[0]);
+ const int32x4x2_t c1 = vtrnq_s32(a[2].val[0], a[3].val[0]);
+ const int32x4x2_t c2 = vtrnq_s32(a[4].val[0], a[5].val[0]);
+ const int32x4x2_t c3 = vtrnq_s32(a[6].val[0], a[7].val[0]);
+ const int32x4x2_t c4 = vtrnq_s32(a[0].val[1], a[1].val[1]);
+ const int32x4x2_t c5 = vtrnq_s32(a[2].val[1], a[3].val[1]);
+ const int32x4x2_t c6 = vtrnq_s32(a[4].val[1], a[5].val[1]);
+ const int32x4x2_t c7 = vtrnq_s32(a[6].val[1], a[7].val[1]);
+
+ const int32x4x2_t d0 = vtrnq_s64_to_s32(c0.val[0], c1.val[0]);
+ const int32x4x2_t d1 = vtrnq_s64_to_s32(c0.val[1], c1.val[1]);
+ const int32x4x2_t d2 = vtrnq_s64_to_s32(c2.val[0], c3.val[0]);
+ const int32x4x2_t d3 = vtrnq_s64_to_s32(c2.val[1], c3.val[1]);
+ const int32x4x2_t d4 = vtrnq_s64_to_s32(c4.val[0], c5.val[0]);
+ const int32x4x2_t d5 = vtrnq_s64_to_s32(c4.val[1], c5.val[1]);
+ const int32x4x2_t d6 = vtrnq_s64_to_s32(c6.val[0], c7.val[0]);
+ const int32x4x2_t d7 = vtrnq_s64_to_s32(c6.val[1], c7.val[1]);
+
+ b[0].val[0] = d0.val[0];
+ b[0].val[1] = d2.val[0];
+ b[1].val[0] = d1.val[0];
+ b[1].val[1] = d3.val[0];
+ b[2].val[0] = d0.val[1];
+ b[2].val[1] = d2.val[1];
+ b[3].val[0] = d1.val[1];
+ b[3].val[1] = d3.val[1];
+ b[4].val[0] = d4.val[0];
+ b[4].val[1] = d6.val[0];
+ b[5].val[0] = d5.val[0];
+ b[5].val[1] = d7.val[0];
+ b[6].val[0] = d4.val[1];
+ b[6].val[1] = d6.val[1];
+ b[7].val[0] = d5.val[1];
+ b[7].val[1] = d7.val[1];
+}
+
+static void dct_body_16_32(int16x8_t *in /*[16]*/, int32x4x2_t *out /*[16]*/)
+{
+ int16x8_t s[8];
+ int16x8_t x[4];
+ int32x4x2_t tmp0, tmp1, tmp2, tmp3;
+ int32x4x2_t tmp4, tmp5, tmp6, tmp7;
+
+ s[0] = vaddq_s16(in[0], in[7]);
+ s[1] = vaddq_s16(in[1], in[6]);
+ s[2] = vaddq_s16(in[2], in[5]);
+ s[3] = vaddq_s16(in[3], in[4]);
+ s[4] = vsubq_s16(in[3], in[4]);
+ s[5] = vsubq_s16(in[2], in[5]);
+ s[6] = vsubq_s16(in[1], in[6]);
+ s[7] = vsubq_s16(in[0], in[7]);
+
+ x[0] = vaddq_s16(s[0], s[3]);
+ x[1] = vaddq_s16(s[1], s[2]);
+ x[2] = vsubq_s16(s[1], s[2]);
+ x[3] = vsubq_s16(s[0], s[3]);
+
+ // Type 1
+ // out[0] = fdct_round_shift((x0 + x1) * cospi_16_64)
+ // out[8] = fdct_round_shift((x0 - x1) * cospi_16_64)
+ butterfly_one_coeff_16_32(x[0], x[1], 64, &out[0], &out[8]);
+
+ // out[4] = fdct_round_shift(x3 * cospi_8_64 + x2 * cospi_24_64);
+ // out[12] = fdct_round_shift(x3 * cospi_24_64 - x2 * cospi_8_64);
+ butterfly_two_coeff_16_32(x[3], x[2], 36, 83, &out[4], &out[12]);
+
+ // Type 2
+ butterfly_two_coeff_16_32(s[7], s[4], 18, 89, &tmp0, &tmp1);
+ butterfly_two_coeff_16_32(s[5], s[6], 75, 50, &tmp2, &tmp3);
+
+ out[2].val[0] = vaddq_s32(tmp0.val[0], tmp2.val[0]);
+ out[2].val[1] = vaddq_s32(tmp0.val[1], tmp2.val[1]);
+
+ out[14].val[0] = vaddq_s32(tmp1.val[0], tmp3.val[0]);
+ out[14].val[1] = vaddq_s32(tmp1.val[1], tmp3.val[1]);
+
+ butterfly_two_coeff_16_32(s[7], s[4], 75, 50, &tmp0, &tmp1);
+ butterfly_two_coeff_16_32(s[5], s[6], -89, 18, &tmp2, &tmp3);
+
+ out[10].val[0] = vaddq_s32(tmp0.val[0], tmp2.val[0]);
+ out[10].val[1] = vaddq_s32(tmp0.val[1], tmp2.val[1]);
+
+ out[6].val[0] = vaddq_s32(tmp1.val[0], tmp3.val[0]);
+ out[6].val[1] = vaddq_s32(tmp1.val[1], tmp3.val[1]);
+
+ // Type 3
+ butterfly_two_coeff_16_32(in[8], in[15], 9, -90, &tmp0, &tmp1);
+ butterfly_two_coeff_16_32(in[9], in[14], 87, 25, &tmp2, &tmp3);
+ butterfly_two_coeff_16_32(in[10], in[13], 43, -80, &tmp4, &tmp5);
+ butterfly_two_coeff_16_32(in[11], in[12], 70, 57, &tmp6, &tmp7);
+
+ out[1].val[0] = add4(tmp1.val[0], tmp2.val[0], tmp5.val[0], tmp6.val[0]);
+ out[1].val[1] = add4(tmp1.val[1], tmp2.val[1], tmp5.val[1], tmp6.val[1]);
+
+ out[15].val[0] = add4(tmp0.val[0], tmp3.val[0], tmp4.val[0], tmp7.val[0]);
+ out[15].val[1] = add4(tmp0.val[1], tmp3.val[1], tmp4.val[1], tmp7.val[1]);
+
+ butterfly_two_coeff_16_32(in[8], in[15], 87, -25, &tmp0, &tmp1);
+ butterfly_two_coeff_16_32(in[9], in[14], -70, -57, &tmp2, &tmp3);
+ butterfly_two_coeff_16_32(in[10], in[13], 9, -90, &tmp4, &tmp5);
+ butterfly_two_coeff_16_32(in[11], in[12], -80, 43, &tmp6, &tmp7);
+
+ out[3].val[0] = add4(tmp0.val[0], tmp3.val[0], tmp4.val[0], tmp7.val[0]);
+ out[3].val[1] = add4(tmp0.val[1], tmp3.val[1], tmp4.val[1], tmp7.val[1]);
+
+ out[13].val[0] = add4(tmp1.val[0], tmp2.val[0], tmp5.val[0], tmp6.val[0]);
+ out[13].val[1] = add4(tmp1.val[1], tmp2.val[1], tmp5.val[1], tmp6.val[1]);
+
+ butterfly_two_coeff_16_32(in[8], in[15], 43, -80, &tmp0, &tmp1);
+ butterfly_two_coeff_16_32(in[9], in[14], 9, 90, &tmp2, &tmp3);
+ butterfly_two_coeff_16_32(in[10], in[13], 57, 70, &tmp4, &tmp5);
+ butterfly_two_coeff_16_32(in[11], in[12], -87, -25, &tmp6, &tmp7);
+
+ out[5].val[0] = add4(tmp1.val[0], tmp2.val[0], tmp5.val[0], tmp6.val[0]);
+ out[5].val[1] = add4(tmp1.val[1], tmp2.val[1], tmp5.val[1], tmp6.val[1]);
+
+ out[11].val[0] = add4(tmp0.val[0], tmp3.val[0], tmp4.val[0], tmp7.val[0]);
+ out[11].val[1] = add4(tmp0.val[1], tmp3.val[1], tmp4.val[1], tmp7.val[1]);
+
+ butterfly_two_coeff_16_32(in[8], in[15], 70, -57, &tmp0, &tmp1);
+ butterfly_two_coeff_16_32(in[9], in[14], -80, 43, &tmp2, &tmp3);
+ butterfly_two_coeff_16_32(in[10], in[13], -87, 25, &tmp4, &tmp5);
+ butterfly_two_coeff_16_32(in[11], in[12], 90, -9, &tmp6, &tmp7);
+
+ out[7].val[0] = add4(tmp0.val[0], tmp3.val[0], tmp4.val[0], tmp7.val[0]);
+ out[7].val[1] = add4(tmp0.val[1], tmp3.val[1], tmp4.val[1], tmp7.val[1]);
+
+ out[9].val[0] = add4(tmp1.val[0], tmp2.val[0], tmp5.val[0], tmp6.val[0]);
+ out[9].val[1] = add4(tmp1.val[1], tmp2.val[1], tmp5.val[1], tmp6.val[1]);
+}
+
+static void dct_body_32_32(int32x4x2_t *in /*[16]*/, int32x4x2_t *out /*[16]*/)
+{
+ int32x4x2_t s[8];
+ int32x4x2_t x[4];
+ int32x4x2_t tmp0, tmp1, tmp2, tmp3;
+ int32x4x2_t tmp4, tmp5, tmp6, tmp7;
+ WORD32 i;
+
+ for(i = 0; i < 2; i++)
+ {
+ s[0].val[i] = vaddq_s32(in[0].val[i], in[7].val[i]);
+ s[1].val[i] = vaddq_s32(in[1].val[i], in[6].val[i]);
+ s[2].val[i] = vaddq_s32(in[2].val[i], in[5].val[i]);
+ s[3].val[i] = vaddq_s32(in[3].val[i], in[4].val[i]);
+ s[4].val[i] = vsubq_s32(in[3].val[i], in[4].val[i]);
+ s[5].val[i] = vsubq_s32(in[2].val[i], in[5].val[i]);
+ s[6].val[i] = vsubq_s32(in[1].val[i], in[6].val[i]);
+ s[7].val[i] = vsubq_s32(in[0].val[i], in[7].val[i]);
+
+ x[0].val[i] = vaddq_s32(s[0].val[i], s[3].val[i]);
+ x[1].val[i] = vaddq_s32(s[1].val[i], s[2].val[i]);
+ x[2].val[i] = vsubq_s32(s[1].val[i], s[2].val[i]);
+ x[3].val[i] = vsubq_s32(s[0].val[i], s[3].val[i]);
+ }
+
+ // Type 1
+ // out[0] = fdct_round_shift((x0 + x1) * cospi_16_64)
+ // out[8] = fdct_round_shift((x0 - x1) * cospi_16_64)
+ butterfly_one_coeff_32_32(x[0], x[1], 64, &out[0], &out[8]);
+ // out[4] = fdct_round_shift(x3 * cospi_8_64 + x2 * cospi_24_64);
+ // out[12] = fdct_round_shift(x3 * cospi_24_64 - x2 * cospi_8_64);
+ butterfly_two_coeff_32_32(x[3], x[2], 36, 83, &out[4], &out[12]);
+
+ // Type 2
+ butterfly_two_coeff_32_32(s[7], s[4], 18, 89, &tmp0, &tmp1);
+ butterfly_two_coeff_32_32(s[5], s[6], 75, 50, &tmp2, &tmp3);
+
+ out[2].val[0] = vaddq_s32(tmp0.val[0], tmp2.val[0]);
+ out[2].val[1] = vaddq_s32(tmp0.val[1], tmp2.val[1]);
+
+ out[14].val[0] = vaddq_s32(tmp1.val[0], tmp3.val[0]);
+ out[14].val[1] = vaddq_s32(tmp1.val[1], tmp3.val[1]);
+
+ butterfly_two_coeff_32_32(s[7], s[4], 75, 50, &tmp0, &tmp1);
+ butterfly_two_coeff_32_32(s[5], s[6], -89, 18, &tmp2, &tmp3);
+
+ out[10].val[0] = vaddq_s32(tmp0.val[0], tmp2.val[0]);
+ out[10].val[1] = vaddq_s32(tmp0.val[1], tmp2.val[1]);
+
+ out[6].val[0] = vaddq_s32(tmp1.val[0], tmp3.val[0]);
+ out[6].val[1] = vaddq_s32(tmp1.val[1], tmp3.val[1]);
+
+ // Type 3
+ butterfly_two_coeff_32_32(in[8], in[15], 9, -90, &tmp0, &tmp1);
+ butterfly_two_coeff_32_32(in[9], in[14], 87, 25, &tmp2, &tmp3);
+ butterfly_two_coeff_32_32(in[10], in[13], 43, -80, &tmp4, &tmp5);
+ butterfly_two_coeff_32_32(in[11], in[12], 70, 57, &tmp6, &tmp7);
+
+ out[1].val[0] = add4(tmp1.val[0], tmp2.val[0], tmp5.val[0], tmp6.val[0]);
+ out[1].val[1] = add4(tmp1.val[1], tmp2.val[1], tmp5.val[1], tmp6.val[1]);
+
+ out[15].val[0] = add4(tmp0.val[0], tmp3.val[0], tmp4.val[0], tmp7.val[0]);
+ out[15].val[1] = add4(tmp0.val[1], tmp3.val[1], tmp4.val[1], tmp7.val[1]);
+
+ butterfly_two_coeff_32_32(in[8], in[15], 87, -25, &tmp0, &tmp1);
+ butterfly_two_coeff_32_32(in[9], in[14], -70, -57, &tmp2, &tmp3);
+ butterfly_two_coeff_32_32(in[10], in[13], 9, -90, &tmp4, &tmp5);
+ butterfly_two_coeff_32_32(in[11], in[12], -80, 43, &tmp6, &tmp7);
+
+ out[3].val[0] = add4(tmp0.val[0], tmp3.val[0], tmp4.val[0], tmp7.val[0]);
+ out[3].val[1] = add4(tmp0.val[1], tmp3.val[1], tmp4.val[1], tmp7.val[1]);
+
+ out[13].val[0] = add4(tmp1.val[0], tmp2.val[0], tmp5.val[0], tmp6.val[0]);
+ out[13].val[1] = add4(tmp1.val[1], tmp2.val[1], tmp5.val[1], tmp6.val[1]);
+
+ butterfly_two_coeff_32_32(in[8], in[15], 43, -80, &tmp0, &tmp1);
+ butterfly_two_coeff_32_32(in[9], in[14], 9, 90, &tmp2, &tmp3);
+ butterfly_two_coeff_32_32(in[10], in[13], 57, 70, &tmp4, &tmp5);
+ butterfly_two_coeff_32_32(in[11], in[12], -87, -25, &tmp6, &tmp7);
+
+ out[5].val[0] = add4(tmp1.val[0], tmp2.val[0], tmp5.val[0], tmp6.val[0]);
+ out[5].val[1] = add4(tmp1.val[1], tmp2.val[1], tmp5.val[1], tmp6.val[1]);
+
+ out[11].val[0] = add4(tmp0.val[0], tmp3.val[0], tmp4.val[0], tmp7.val[0]);
+ out[11].val[1] = add4(tmp0.val[1], tmp3.val[1], tmp4.val[1], tmp7.val[1]);
+
+ butterfly_two_coeff_32_32(in[8], in[15], 70, -57, &tmp0, &tmp1);
+ butterfly_two_coeff_32_32(in[9], in[14], -80, 43, &tmp2, &tmp3);
+ butterfly_two_coeff_32_32(in[10], in[13], -87, 25, &tmp4, &tmp5);
+ butterfly_two_coeff_32_32(in[11], in[12], 90, -9, &tmp6, &tmp7);
+
+ out[7].val[0] = add4(tmp0.val[0], tmp3.val[0], tmp4.val[0], tmp7.val[0]);
+ out[7].val[1] = add4(tmp0.val[1], tmp3.val[1], tmp4.val[1], tmp7.val[1]);
+
+ out[9].val[0] = add4(tmp1.val[0], tmp2.val[0], tmp5.val[0], tmp6.val[0]);
+ out[9].val[1] = add4(tmp1.val[1], tmp2.val[1], tmp5.val[1], tmp6.val[1]);
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs residue calculation and forward transform on
+ * input pixels
+ *
+ * @par Description:
+ * Performs residue calculation by subtracting source and prediction and
+ * followed by forward transform
+ *
+ * @param[in] pu1_src
+ * Input 16x16 pixels
+ *
+ * @param[in] pu1_pred
+ * Prediction data
+ *
+ * @param[in] pi2_tmp
+ * Temporary buffer of size 16x16
+ *
+ * @param[out] pi2_dst
+ * Output 16x16 coefficients
+ *
+ * @param[in] src_strd
+ * Input stride
+ *
+ * @param[in] pred_strd
+ * Prediction Stride
+ *
+ * @param[in] dst_strd_chr_flag
+ * Output Stride and Chroma Flag packed in the MS and LS 16-bit
+ *
+ * @returns Void
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+UWORD32 ihevc_resi_trans_16x16_neon(
+ UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD32 *pi4_temp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd_chr_flag)
+{
+ UWORD32 u4_blk_sad = 0;
+ WORD32 chroma_flag;
+ WORD32 dst_strd;
+
+ uint8x8_t temp0[16], temp1[16];
+ int16x8_t temp2[16], temp3[16];
+ int32x4_t tmp_a, tmp_b;
+ int64x2_t tmp_c;
+ int32x2_t sad_v;
+ int32x4x2_t out0[16], out1[16], temp4[16], temp5[16];
+
+ (void)pi4_temp;
+ chroma_flag = dst_strd_chr_flag & 1;
+ dst_strd = dst_strd_chr_flag >> 16;
+
+ /* Residue + Forward Transform 1st stage */
+ // Left half.
+ load(pu1_src, src_strd, temp0, chroma_flag);
+ load(pu1_pred, pred_strd, temp1, chroma_flag);
+
+ tmp_a = diff(temp0, temp1, temp2);
+ cross_input_16(temp2, temp3);
+ dct_body_16_32(temp3, out0);
+
+ // Right half.
+ load(pu1_src + 8 * (1 + chroma_flag), src_strd, temp0, chroma_flag);
+ load(pu1_pred + 8 * (1 + chroma_flag), pred_strd, temp1, chroma_flag);
+
+ tmp_b = diff(temp0, temp1, temp2);
+ cross_input_16(temp2, temp3);
+ dct_body_16_32(temp3, out1);
+
+ tmp_a = vaddq_s32(tmp_a, tmp_b);
+ tmp_c = vpaddlq_s32(tmp_a);
+ sad_v = vadd_s32(vreinterpret_s32_s64(vget_low_s64(tmp_c)),
+ vreinterpret_s32_s64(vget_high_s64(tmp_c)));
+ u4_blk_sad = vget_lane_s32(sad_v, 0);
+
+
+ // Transpose top left and top right quarters into one contiguous location to
+ // process to the top half.
+ transpose_8x8(&out0[0], &temp4[0]);
+ transpose_8x8(&out1[0], &temp4[8]);
+
+ cross_input_32(temp4, temp5);
+ dct_body_32_32(temp5, temp4);
+ partial_round_shift(temp4, temp2);
+ transpose_s16_8x8(
+ &temp2[0], &temp2[1], &temp2[2], &temp2[3], &temp2[4], &temp2[5], &temp2[6], &temp2[7]);
+ transpose_s16_8x8(
+ &temp2[8], &temp2[9], &temp2[10], &temp2[11], &temp2[12], &temp2[13], &temp2[14], &temp2[15]);
+
+ store(pi2_dst, &temp2[0]);
+ store(pi2_dst + 8, &temp2[8]);
+ pi2_dst += 8 * dst_strd;
+
+ // Transpose bottom left and bottom right quarters into one contiguous
+ // location to process to the bottom half.
+ transpose_8x8(&out0[8], &out1[0]);
+ transpose_s32_8x8(
+ &out1[8], &out1[9], &out1[10], &out1[11], &out1[12], &out1[13], &out1[14], &out1[15]);
+
+ cross_input_32(out1, temp5);
+ dct_body_32_32(temp5, temp4);
+ partial_round_shift(temp4, temp2);
+ transpose_s16_8x8(
+ &temp2[0], &temp2[1], &temp2[2], &temp2[3], &temp2[4], &temp2[5], &temp2[6], &temp2[7]);
+ transpose_s16_8x8(
+ &temp2[8], &temp2[9], &temp2[10], &temp2[11], &temp2[12], &temp2[13], &temp2[14], &temp2[15]);
+ store(pi2_dst, &temp2[0]);
+ store(pi2_dst + 8, &temp2[8]);
+
+ return u4_blk_sad;
+}
diff --git a/common/arm/ihevc_resi_trans_neon_32x32.c b/common/arm/ihevc_resi_trans_neon_32x32.c
new file mode 100644
index 0000000..5270f80
--- /dev/null
+++ b/common/arm/ihevc_resi_trans_neon_32x32.c
@@ -0,0 +1,1456 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+ *******************************************************************************
+ * @file
+ * ihevc_resi_trans_neon_32x32.c
+ *
+ * @brief
+ * Contains definitions of functions for computing residue and fwd transform
+ *
+ * @author
+ * Ittiam
+ *
+ * @par List of Functions:
+ * - ihevc_resi_trans_32x32_neon()
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+
+/* System user files */
+#include "ihevc_typedefs.h"
+#include "ihevc_macros.h"
+#include "ihevc_defs.h"
+#include "ihevc_cmn_utils_neon.h"
+
+#include "ihevc_trans_tables.h"
+#include "ihevc_resi_trans.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs residue calculation and forward transform on
+ * input pixels
+ *
+ * @par Description:
+ * Performs residue calculation by subtracting source and prediction and
+ * followed by forward transform
+ *
+ * @param[in] pu1_src
+ * Input 32x32 pixels
+ *
+ * @param[in] pu1_pred
+ * Prediction data
+ *
+ * @param[in] pi2_tmp
+ * Temporary buffer of size 32x32
+ *
+ * @param[out] pi2_dst
+ * Output 32x32 coefficients
+ *
+ * @param[in] src_strd
+ * Input stride
+ *
+ * @param[in] pred_strd
+ * Prediction Stride
+ *
+ * @param[in] dst_strd_chr_flag
+ * Output Stride and Chroma Flag packed in the MS and LS 16-bit
+ *
+ * @returns Void
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+UWORD32 ihevc_resi_trans_32x32_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred,
+ WORD32 *pi4_temp, WORD16 *pi2_dst, WORD32 src_strd, WORD32 pred_strd,
+ WORD32 dst_strd_chr_flag)
+{
+ int16x8_t diff_16[4][2];
+ WORD32 i;
+ int32x2_t sad;
+ int64x2_t tmp_a;
+ UWORD32 u4_blk_sad = 0;
+ WORD32 dst_strd = dst_strd_chr_flag >> 16;
+ WORD32 *pi4_temp_orig = pi4_temp;
+ int16x8_t abs = vdupq_n_s16(0);
+ int32x4_t sum_val = vdupq_n_s32(0);
+
+
+ // Stage 1
+ for(i = 0; i < 16; i++)
+ {
+
+ uint8x16_t src_buff, pred_buff;
+ abs = vdupq_n_s16(0);
+
+ src_buff = vld1q_u8(pu1_src);
+ pred_buff = vld1q_u8(pu1_pred);
+ diff_16[0][0] = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(src_buff), vget_low_u8(pred_buff)));
+ diff_16[1][0] = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(src_buff), vget_high_u8(pred_buff)));
+ abs = vaddq_s16(abs, vabsq_s16(diff_16[0][0]));
+ abs = vaddq_s16(abs, vabsq_s16(diff_16[1][0]));
+
+ src_buff = vld1q_u8(pu1_src + 16);
+ pred_buff = vld1q_u8(pu1_pred + 16);
+ diff_16[2][0] = vrev64q_s16(vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(src_buff), vget_low_u8(pred_buff))));
+ diff_16[2][0] = vcombine_s16(
+ vget_high_s16(diff_16[2][0]), vget_low_s16(diff_16[2][0]));
+ diff_16[3][0] = vrev64q_s16(vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(src_buff), vget_high_u8(pred_buff))));
+ diff_16[3][0] = vcombine_s16(
+ vget_high_s16(diff_16[3][0]), vget_low_s16(diff_16[3][0]));
+ abs = vaddq_s16(abs, vabsq_s16(diff_16[2][0]));
+ abs = vaddq_s16(abs, vabsq_s16(diff_16[3][0]));
+
+ pu1_src += src_strd;
+ pu1_pred += pred_strd;
+
+ src_buff = vld1q_u8(pu1_src);
+ pred_buff = vld1q_u8(pu1_pred);
+ diff_16[0][1] = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(src_buff), vget_low_u8(pred_buff)));
+ diff_16[1][1] = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(src_buff), vget_high_u8(pred_buff)));
+ abs = vaddq_s16(abs, vabsq_s16(diff_16[0][1]));
+ abs = vaddq_s16(abs, vabsq_s16(diff_16[1][1]));
+
+ src_buff = vld1q_u8(pu1_src + 16);
+ pred_buff = vld1q_u8(pu1_pred + 16);
+ diff_16[2][1] = vrev64q_s16(vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(src_buff), vget_low_u8(pred_buff))));
+ diff_16[2][1] = vcombine_s16(
+ vget_high_s16(diff_16[2][1]), vget_low_s16(diff_16[2][1]));
+ diff_16[3][1] = vrev64q_s16(vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(src_buff), vget_high_u8(pred_buff))));
+ diff_16[3][1] = vcombine_s16(
+ vget_high_s16(diff_16[3][1]), vget_low_s16(diff_16[3][1]));
+
+ abs = vaddq_s16(abs, vabsq_s16(diff_16[2][1]));
+ abs = vaddq_s16(abs, vabsq_s16(diff_16[3][1]));
+
+ sum_val = vaddq_s32(sum_val,vpaddlq_s16(abs));
+
+ pu1_src += src_strd;
+ pu1_pred += pred_strd;
+ {
+ static const int16x8_t g_ai2_ihevc_trans_32_01_8 = { 64, 83, 64, 36, 64, 36, -64, -83 };
+
+ static const int16x4_t g_ai2_ihevc_trans_32_4_04 = { 89, 75, 50, 18 };
+ static const int16x4_t g_ai2_ihevc_trans_32_12_04 = { 75, -18, -89, -50 };
+ static const int16x4_t g_ai2_ihevc_trans_32_20_04 = { 50, -89, 18, 75 };
+ static const int16x4_t g_ai2_ihevc_trans_32_28_04 = { 18, -50, 75, -89 };
+
+ static const int16x8_t g_ai2_ihevc_trans_32_2_07 = { 90, 87, 80, 70, 57, 43, 25, 9 };
+ static const int16x8_t g_ai2_ihevc_trans_32_6_07 = { 87, 57, 9, -43, -80, -90, -70, -25 };
+ static const int16x8_t g_ai2_ihevc_trans_32_10_07 = { 80, 9, -70, -87, -25, 57, 90, 43 };
+ static const int16x8_t g_ai2_ihevc_trans_32_14_07 = { 70, -43, -87, 9, 90, 25, -80, -57 };
+ static const int16x8_t g_ai2_ihevc_trans_32_18_07 = { 57, -80, -25, 90, -9, -87, 43, 70 };
+ static const int16x8_t g_ai2_ihevc_trans_32_22_07 = { 43, -90, 57, 25, -87, 70, 9, -80 };
+ static const int16x8_t g_ai2_ihevc_trans_32_26_07 = { 25, -70, 90, -80, 43, 9, -57, 87 };
+ static const int16x8_t g_ai2_ihevc_trans_32_30_07 = { 9, -25, 43, -57, 70, -80, 87, -90 };
+
+ static const int16x8_t g_ai2_ihevc_trans_32_1_07 = { 90, 90, 88, 85, 82, 78, 73, 67 };
+ static const int16x8_t g_ai2_ihevc_trans_32_1_815 = { 61, 54, 46, 38, 31, 22, 13, 4 };
+ static const int16x8_t g_ai2_ihevc_trans_32_3_07 = { 90, 82, 67, 46, 22, -4, -31, -54 };
+ static const int16x8_t g_ai2_ihevc_trans_32_3_815 = { -73, -85, -90, -88, -78, -61, -38, -13 };
+ static const int16x8_t g_ai2_ihevc_trans_32_5_07 = { 88, 67, 31, -13, -54, -82, -90, -78 };
+ static const int16x8_t g_ai2_ihevc_trans_32_5_815 = { -46, -4, 38, 73, 90, 85, 61, 22 };
+ static const int16x8_t g_ai2_ihevc_trans_32_7_07 = { 85, 46, -13, -67, -90, -73, -22, 38 };
+ static const int16x8_t g_ai2_ihevc_trans_32_7_815 = { 82, 88, 54, -4, -61, -90, -78, -31 };
+ static const int16x8_t g_ai2_ihevc_trans_32_9_07 = { 82, 22, -54, -90, -61, 13, 78, 85 };
+ static const int16x8_t g_ai2_ihevc_trans_32_9_815 = { 31, -46, -90, -67, 4, 73, 88, 38 };
+ static const int16x8_t g_ai2_ihevc_trans_32_11_07 = { 78, -4, -82, -73, 13, 85, 67, -22 };
+ static const int16x8_t g_ai2_ihevc_trans_32_11_815 = { -88, -61, 31, 90, 54, -38, -90, -46 };
+ static const int16x8_t g_ai2_ihevc_trans_32_13_07 = { 73, -31, -90, -22, 78, 67, -38, -90 };
+ static const int16x8_t g_ai2_ihevc_trans_32_13_815 = { -13, 82, 61, -46, -88, -4, 85, 54 };
+ static const int16x8_t g_ai2_ihevc_trans_32_15_07 = { 67, -54, -78, 38, 85, -22, -90, 4 };
+ static const int16x8_t g_ai2_ihevc_trans_32_15_815 = { 90, 13, -88, -31, 82, 46, -73, -61 };
+ static const int16x8_t g_ai2_ihevc_trans_32_17_07 = { 61, -73, -46, 82, 31, -88, -13, 90 };
+ static const int16x8_t g_ai2_ihevc_trans_32_17_815 = { -4, -90, 22, 85, -38, -78, 54, 67 };
+ static const int16x8_t g_ai2_ihevc_trans_32_19_07 = { 54, -85, -4, 88, -46, -61, 82, 13 };
+ static const int16x8_t g_ai2_ihevc_trans_32_19_815 = { -90, 38, 67, -78, -22, 90, -31, -73 };
+ static const int16x8_t g_ai2_ihevc_trans_32_21_07 = { 46, -90, 38, 54, -90, 31, 61, -88 };
+ static const int16x8_t g_ai2_ihevc_trans_32_21_815 = { 22, 67, -85, 13, 73, -82, 4, 78 };
+ static const int16x8_t g_ai2_ihevc_trans_32_23_07 = { 38, -88, 73, -4, -67, 90, -46, -31 };
+ static const int16x8_t g_ai2_ihevc_trans_32_23_815 = { 85, -78, 13, 61, -90, 54, 22, -82 };
+ static const int16x8_t g_ai2_ihevc_trans_32_25_07 = { 31, -78, 90, -61, 4, 54, -88, 82 };
+ static const int16x8_t g_ai2_ihevc_trans_32_25_815 = { -38, -22, 73, -90, 67, -13, -46, 85 };
+ static const int16x8_t g_ai2_ihevc_trans_32_27_07 = { 22, -61, 85, -90, 73, -38, -4, 46 };
+ static const int16x8_t g_ai2_ihevc_trans_32_27_815 = { -78, 90, -82, 54, -13, -31, 67, -88 };
+ static const int16x8_t g_ai2_ihevc_trans_32_29_07 = { 13, -38, 61, -78, 88, -90, 85, -73 };
+ static const int16x8_t g_ai2_ihevc_trans_32_29_815 = { 54, -31, 4, 22, -46, 67, -82, 90 };
+ static const int16x8_t g_ai2_ihevc_trans_32_31_07 = { 4, -13, 22, -31, 38, -46, 54, -61 };
+ static const int16x8_t g_ai2_ihevc_trans_32_31_815 = { 67, -73, 78, -82, 85, -88, 90, -90 };
+
+ int32x4x2_t a[32];
+
+ const int16x8_t o1_1 = vsubq_s16(
+ diff_16[1][1], diff_16[2][1]); /*R2(9-16) - R2(24-17)*/
+ const int16x8_t o1_0 = vsubq_s16(
+ diff_16[0][1], diff_16[3][1]); /*R2(1- 8) - R2(32-25)*/
+ const int16x8_t o0_1 = vsubq_s16(
+ diff_16[1][0], diff_16[2][0]); /*R1(9-16) - R1(24-17)*/
+ const int16x8_t o0_0 = vsubq_s16(
+ diff_16[0][0], diff_16[3][0]); /*R1(1- 8) - R1(32-25)*/
+ const int16x8_t e0_0 = vaddq_s16(
+ diff_16[0][0], diff_16[3][0]); /*R1(1- 8) + R1(32-25)*/
+ int16x8_t e0_1 = vrev64q_s16(vaddq_s16(
+ diff_16[1][0], diff_16[2][0])); /*R1(9-16) + R1(24-17)*/
+ e0_1 = vcombine_s16(vget_high_s16(e0_1), vget_low_s16(e0_1));
+ const int16x8_t e1_0 = vaddq_s16(
+ diff_16[0][1], diff_16[3][1]); /*R2(1- 8) + R2(32-25)*/
+ int16x8_t e1_1 = vrev64q_s16(vaddq_s16(
+ diff_16[1][1], diff_16[2][1])); /*R2(9-16) + R2(24-17)*/
+ e1_1 = vcombine_s16(vget_high_s16(e1_1), vget_low_s16(e1_1));
+
+ const int16x8_t ee0 = vaddq_s16(e0_0, e0_1); /*E1(1- 8) + E1(16-9)*/
+ const int16x8_t ee1 = vaddq_s16(e1_0, e1_1); /*E2(1- 8) + E2(16-9)*/
+ const int16x8_t eo1 = vsubq_s16(e1_0, e1_1); /*E2(1- 8) - E2(16-9)*/
+ const int16x8_t eo0 = vsubq_s16(e0_0, e0_1); /*E1(1- 8) - E1(16-9)*/
+
+ /*EE0(1-4) & EE1(1-4)*/
+ const int16x8_t ee_a =
+ vcombine_s16(vget_low_s16(ee0), vget_low_s16(ee1));
+ /*EE0(8-5) & EE1(8-5)*/
+ const int16x8_t ee_b = vcombine_s16(
+ vrev64_s16(vget_high_s16(ee0)), vrev64_s16(vget_high_s16(ee1)));
+
+ /*EE(1-4) - EE(8-5)*/
+ const int16x8_t eeo = vsubq_s16(ee_a, ee_b); //Q0
+ /*EE(1-4) + EE(8-5)*/
+ const int16x8_t eee = vaddq_s16(ee_a, ee_b); //Q13
+
+ /*EEEE Calculations*/
+ const int32x2x2_t ee =
+ vtrn_s32(vreinterpret_s32_s16(vget_low_s16(eee)),
+ vreinterpret_s32_s16(vget_high_s16(eee)));
+ const int16x8_t eeee_a =
+ vreinterpretq_s16_s32(vcombine_s32(ee.val[0], ee.val[0]));
+ const int16x8_t eeee_b =
+ vcombine_s16(vrev32_s16(vreinterpret_s16_s32(ee.val[1])),
+ vneg_s16(vrev32_s16(vreinterpret_s16_s32(ee.val[1]))));
+ const int16x8_t eeee = vaddq_s16(eeee_a, eeee_b); //q2
+ const int16x4x2_t trans_eeee =
+ vtrn_s16(vget_low_s16(eeee), vget_high_s16(eeee));
+ const int16x4_t eeee_00 = vreinterpret_s16_s32(vdup_lane_s32(
+ vreinterpret_s32_s16(trans_eeee.val[0]), 0)); //d8
+ const int16x4_t eeee_10 = vreinterpret_s16_s32(vdup_lane_s32(
+ vreinterpret_s32_s16(trans_eeee.val[0]), 1)); //d9
+ const int16x4_t eeee_01 = vreinterpret_s16_s32(vdup_lane_s32(
+ vreinterpret_s32_s16(trans_eeee.val[1]), 0)); //d10
+ const int16x4_t eeee_11 = vreinterpret_s16_s32(vdup_lane_s32(
+ vreinterpret_s32_s16(trans_eeee.val[1]), 1)); //d11
+
+ /*Calculation of values 0 8 16 24*/
+ a[0].val[0] =
+ vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_01_8), eeee_00);
+ a[0].val[0] = vmlal_s16(
+ a[0].val[0], vget_high_s16(g_ai2_ihevc_trans_32_01_8), eeee_01);
+ a[0].val[1] =
+ vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_01_8), eeee_10);
+ a[0].val[1] = vmlal_s16(
+ a[0].val[1], vget_high_s16(g_ai2_ihevc_trans_32_01_8), eeee_11);
+
+ int32x4x2_t val_8 = vzipq_s32(a[0].val[0], a[0].val[1]);
+
+ /*Store*/
+ vst1_s32(pi4_temp, vget_low_s32(val_8.val[0])); /*Value 0*/
+ vst1_s32(pi4_temp + 256, vget_high_s32(val_8.val[0])); /*Value 8*/
+ vst1_s32(pi4_temp + 512, vget_low_s32(val_8.val[1])); /*Value 16*/
+ vst1_s32(pi4_temp + 768, vget_high_s32(val_8.val[1])); /*Value 24*/
+
+ /*Calculation of values 4 12 20 28*/
+ /*Multiplications*/
+ a[4].val[0] =
+ vmull_s16(g_ai2_ihevc_trans_32_4_04, vget_low_s16(eeo));
+ a[12].val[0] =
+ vmull_s16(g_ai2_ihevc_trans_32_12_04, vget_low_s16(eeo));
+ a[20].val[0] =
+ vmull_s16(g_ai2_ihevc_trans_32_20_04, vget_low_s16(eeo));
+ a[28].val[0] =
+ vmull_s16(g_ai2_ihevc_trans_32_28_04, vget_low_s16(eeo));
+
+ a[4].val[1] =
+ vmull_s16(g_ai2_ihevc_trans_32_4_04, vget_high_s16(eeo));
+ a[12].val[1] =
+ vmull_s16(g_ai2_ihevc_trans_32_12_04, vget_high_s16(eeo));
+ a[20].val[1] =
+ vmull_s16(g_ai2_ihevc_trans_32_20_04, vget_high_s16(eeo));
+ a[28].val[1] =
+ vmull_s16(g_ai2_ihevc_trans_32_28_04, vget_high_s16(eeo));
+
+ /*Transposes*/
+ int32x4x2_t val_4_0 =
+ vtrnq_s32(a[4].val[0], a[12].val[0]); //q15 q5
+ int32x4x2_t val_4_1 =
+ vtrnq_s32(a[4].val[1], a[12].val[1]); //q4 q12
+ int32x4x2_t val_20_0 =
+ vtrnq_s32(a[20].val[0], a[28].val[0]); //q8 q2
+ int32x4x2_t val_20_1 =
+ vtrnq_s32(a[20].val[1], a[28].val[1]); //q9 q13
+
+ /*Swap*/
+ a[4].val[0] = vcombine_s32(vget_low_s32(val_4_0.val[0]),
+ vget_low_s32(val_20_0.val[0])); //q12
+ a[4].val[1] = vcombine_s32(vget_high_s32(val_4_0.val[0]),
+ vget_high_s32(val_20_0.val[0])); //q2
+
+ a[12].val[0] = vcombine_s32(vget_low_s32(val_4_0.val[1]),
+ vget_low_s32(val_20_0.val[1])); //q4
+ a[12].val[1] = vcombine_s32(vget_high_s32(val_4_0.val[1]),
+ vget_high_s32(val_20_0.val[1])); //q8
+
+ /*Additions*/
+ a[12].val[0] = vaddq_s32(a[12].val[0], a[4].val[0]); //q4
+ a[12].val[1] = vaddq_s32(a[12].val[1], a[4].val[1]); //q8
+ a[12].val[1] = vaddq_s32(a[12].val[1], a[12].val[0]); //q8
+
+ a[20].val[0] = vcombine_s32(vget_low_s32(val_4_1.val[0]),
+ vget_low_s32(val_20_1.val[0])); //q5
+ a[20].val[1] = vcombine_s32(vget_high_s32(val_4_1.val[0]),
+ vget_high_s32(val_20_1.val[0])); //q13
+
+ a[28].val[0] = vcombine_s32(vget_low_s32(val_4_1.val[1]),
+ vget_low_s32(val_20_1.val[1])); //q15
+ a[28].val[1] = vcombine_s32(vget_high_s32(val_4_1.val[1]),
+ vget_high_s32(val_20_1.val[1])); //q9
+
+ a[28].val[0] = vaddq_s32(a[28].val[0], a[20].val[0]); //q15
+ a[28].val[1] = vaddq_s32(a[28].val[1], a[20].val[1]); //q5
+ a[28].val[1] = vaddq_s32(a[28].val[1], a[28].val[0]); //q15
+
+ int32x4x2_t val_4 = vzipq_s32(a[12].val[1], a[28].val[1]);
+
+ /*Store*/
+ vst1_s32(pi4_temp + 128, vget_low_s32(val_4.val[0])); /*Value 4*/
+ vst1_s32(pi4_temp + 384, vget_high_s32(val_4.val[0])); /*Value 12*/
+ vst1_s32(pi4_temp + 640, vget_low_s32(val_4.val[1])); /*Value 20*/
+ vst1_s32(pi4_temp + 896, vget_high_s32(val_4.val[1])); /*Value 28*/
+
+ /*Calculation of value 2 6 10 14 18 22 26 30*/
+ /*Multiplications*/
+ a[2].val[0] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_2_07),
+ vget_low_s16(eo0)); //q2
+ a[6].val[0] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_6_07),
+ vget_low_s16(eo0)); //q5
+ a[10].val[0] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_10_07),
+ vget_low_s16(eo0)); //q9
+ a[14].val[0] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_14_07),
+ vget_low_s16(eo0)); //q8
+
+ a[14].val[0] = vmlal_s16(a[14].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_14_07), vget_high_s16(eo0));
+ a[10].val[0] = vmlal_s16(a[10].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_10_07), vget_high_s16(eo0));
+ a[6].val[0] = vmlal_s16(a[6].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_6_07), vget_high_s16(eo0));
+ a[2].val[0] = vmlal_s16(a[2].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_2_07), vget_high_s16(eo0));
+
+ a[2].val[1] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_2_07),
+ vget_low_s16(eo1)); //q4
+ a[6].val[1] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_6_07),
+ vget_low_s16(eo1)); //q13
+ a[10].val[1] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_10_07),
+ vget_low_s16(eo1)); //q12
+ a[14].val[1] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_14_07),
+ vget_low_s16(eo1)); //q15
+
+ a[14].val[1] = vmlal_s16(a[14].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_14_07), vget_high_s16(eo1));
+ a[10].val[1] = vmlal_s16(a[10].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_10_07), vget_high_s16(eo1));
+ a[6].val[1] = vmlal_s16(a[6].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_6_07), vget_high_s16(eo1));
+ a[2].val[1] = vmlal_s16(a[2].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_2_07), vget_high_s16(eo1));
+
+ /*Transposes*/
+ int32x4x2_t val_26_0 = vtrnq_s32(a[2].val[0], a[6].val[0]); //q2 q5
+ int32x4x2_t val_1014_0 =
+ vtrnq_s32(a[10].val[0], a[14].val[0]); //q9 q8
+ int32x4x2_t val_26_1 =
+ vtrnq_s32(a[2].val[1], a[6].val[1]); //q4 q13
+ int32x4x2_t val_1014_1 =
+ vtrnq_s32(a[10].val[1], a[14].val[1]); //q12 q15
+
+ /*Swap*/
+ a[2].val[0] = vcombine_s32(vget_low_s32(val_26_0.val[0]),
+ vget_low_s32(val_1014_0.val[0])); //q2
+ a[2].val[1] = vcombine_s32(vget_high_s32(val_26_0.val[0]),
+ vget_high_s32(val_1014_0.val[0])); //q9
+
+ a[6].val[0] = vcombine_s32(vget_low_s32(val_26_0.val[1]),
+ vget_low_s32(val_1014_0.val[1])); //q5
+ a[6].val[1] = vcombine_s32(vget_high_s32(val_26_0.val[1]),
+ vget_high_s32(val_1014_0.val[1])); //q8
+
+ a[10].val[0] = vcombine_s32(vget_low_s32(val_26_1.val[0]),
+ vget_low_s32(val_1014_1.val[0])); //q4
+ a[10].val[1] = vcombine_s32(vget_high_s32(val_26_1.val[0]),
+ vget_high_s32(val_1014_1.val[0])); //q12
+
+ a[14].val[0] = vcombine_s32(vget_low_s32(val_26_1.val[1]),
+ vget_low_s32(val_1014_1.val[1])); //q13
+ a[14].val[1] = vcombine_s32(vget_high_s32(val_26_1.val[1]),
+ vget_high_s32(val_1014_1.val[1])); //q15
+
+ /*Additions*/
+ a[2].val[0] = vaddq_s32(a[2].val[0], a[6].val[0]); //q2
+ a[2].val[1] = vaddq_s32(a[2].val[1], a[6].val[1]); //q9
+ a[2].val[1] = vaddq_s32(a[2].val[1], a[2].val[0]); //q9
+
+ a[10].val[0] = vaddq_s32(a[10].val[0], a[14].val[0]); //q4
+ a[10].val[1] = vaddq_s32(a[10].val[1], a[14].val[1]); //q12
+ a[10].val[1] = vaddq_s32(a[10].val[1], a[10].val[0]); //q12
+
+ int32x4x2_t val_2 = vzipq_s32(a[2].val[1], a[10].val[1]); //q9 q12
+
+ /*Store*/
+ vst1_s32(pi4_temp + 64, vget_low_s32(val_2.val[0])); /*Value 2*/
+ vst1_s32(pi4_temp + 192, vget_high_s32(val_2.val[0])); /*Value 6*/
+ vst1_s32(pi4_temp + 320, vget_low_s32(val_2.val[1])); /*Value 10*/
+ vst1_s32(pi4_temp + 448, vget_high_s32(val_2.val[1])); /*Value 14*/
+
+ a[18].val[0] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_18_07),
+ vget_low_s16(eo0)); //q0
+ a[22].val[0] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_22_07),
+ vget_low_s16(eo0)); //q5
+ a[26].val[0] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_26_07),
+ vget_low_s16(eo0)); //q9
+ a[30].val[0] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_30_07),
+ vget_low_s16(eo0)); //q15
+
+ a[30].val[0] = vmlal_s16(a[30].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_30_07), vget_high_s16(eo0));
+ a[26].val[0] = vmlal_s16(a[26].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_26_07), vget_high_s16(eo0));
+ a[22].val[0] = vmlal_s16(a[22].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_22_07), vget_high_s16(eo0));
+ a[18].val[0] = vmlal_s16(a[18].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_18_07), vget_high_s16(eo0));
+
+ a[18].val[1] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_18_07),
+ vget_low_s16(eo1)); //q4
+ a[22].val[1] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_22_07),
+ vget_low_s16(eo1)); //q8
+ a[26].val[1] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_26_07),
+ vget_low_s16(eo1)); //q12
+ a[30].val[1] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_30_07),
+ vget_low_s16(eo1)); //q18
+
+ a[30].val[1] = vmlal_s16(a[30].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_30_07), vget_high_s16(eo1));
+ a[26].val[1] = vmlal_s16(a[26].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_26_07), vget_high_s16(eo1));
+ a[22].val[1] = vmlal_s16(a[22].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_22_07), vget_high_s16(eo1));
+ a[18].val[1] = vmlal_s16(a[18].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_18_07), vget_high_s16(eo1));
+
+ /*Transposes*/
+ int32x4x2_t val_1822_0 =
+ vtrnq_s32(a[18].val[0], a[22].val[0]); //q2 q5
+ int32x4x2_t val_2630_0 =
+ vtrnq_s32(a[26].val[0], a[30].val[0]); //q9 q8
+ int32x4x2_t val_1822_1 =
+ vtrnq_s32(a[18].val[1], a[22].val[1]); //q4 q13
+ int32x4x2_t val_2630_1 =
+ vtrnq_s32(a[26].val[1], a[30].val[1]); //q12 q15
+
+ /*Swap*/
+ a[18].val[0] = vcombine_s32(vget_low_s32(val_1822_0.val[0]),
+ vget_low_s32(val_2630_0.val[0])); //q2
+ a[18].val[1] = vcombine_s32(vget_high_s32(val_1822_0.val[0]),
+ vget_high_s32(val_2630_0.val[0])); //q9
+
+ a[22].val[0] = vcombine_s32(vget_low_s32(val_1822_0.val[1]),
+ vget_low_s32(val_2630_0.val[1])); //q5
+ a[22].val[1] = vcombine_s32(vget_high_s32(val_1822_0.val[1]),
+ vget_high_s32(val_2630_0.val[1])); //q8
+
+ a[26].val[0] = vcombine_s32(vget_low_s32(val_1822_1.val[0]),
+ vget_low_s32(val_2630_1.val[0])); //q4
+ a[26].val[1] = vcombine_s32(vget_high_s32(val_1822_1.val[0]),
+ vget_high_s32(val_2630_1.val[0])); //q12
+
+ a[30].val[0] = vcombine_s32(vget_low_s32(val_1822_1.val[1]),
+ vget_low_s32(val_2630_1.val[1])); //q13
+ a[30].val[1] = vcombine_s32(vget_high_s32(val_1822_1.val[1]),
+ vget_high_s32(val_2630_1.val[1])); //q15
+
+ /*Additions*/
+ a[18].val[0] = vaddq_s32(a[18].val[0], a[22].val[0]); //q2
+ a[18].val[1] = vaddq_s32(a[18].val[1], a[22].val[1]); //q9
+ a[18].val[1] = vaddq_s32(a[18].val[1], a[18].val[0]); //q9
+
+ a[26].val[0] = vaddq_s32(a[26].val[0], a[30].val[0]); //q4
+ a[26].val[1] = vaddq_s32(a[26].val[1], a[30].val[1]); //q12
+ a[26].val[1] = vaddq_s32(a[26].val[1], a[26].val[0]); //q12
+
+ int32x4x2_t val_18 =
+ vzipq_s32(a[18].val[1], a[26].val[1]); //q9 q12
+
+ /*Store*/
+ vst1_s32(pi4_temp + 576, vget_low_s32(val_18.val[0])); /*Value 18*/
+ vst1_s32(pi4_temp + 704, vget_high_s32(val_18.val[0])); /*Value 22*/
+ vst1_s32(pi4_temp + 832, vget_low_s32(val_18.val[1])); /*Value 26*/
+ vst1_s32(pi4_temp + 960, vget_high_s32(val_18.val[1])); /*Value 30*/
+
+ /*Calculations for odd indexes*/
+ a[1].val[0] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_1_07),
+ vget_low_s16(o0_0)); //q1
+ a[3].val[0] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_3_07),
+ vget_low_s16(o0_0)); //q5
+ a[5].val[0] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_5_07),
+ vget_low_s16(o0_0)); //q8
+ a[7].val[0] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_7_07),
+ vget_low_s16(o0_0)); //q12
+
+ a[7].val[0] = vmlal_s16(a[7].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_7_07), vget_high_s16(o0_0));
+ a[5].val[0] = vmlal_s16(a[5].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_5_07), vget_high_s16(o0_0));
+ a[3].val[0] = vmlal_s16(a[3].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_3_07), vget_high_s16(o0_0));
+ a[1].val[0] = vmlal_s16(a[1].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_1_07), vget_high_s16(o0_0));
+
+ a[1].val[0] = vmlal_s16(a[1].val[0],
+ vget_low_s16(g_ai2_ihevc_trans_32_1_815), vget_low_s16(o0_1));
+ a[3].val[0] = vmlal_s16(a[3].val[0],
+ vget_low_s16(g_ai2_ihevc_trans_32_3_815), vget_low_s16(o0_1));
+ a[5].val[0] = vmlal_s16(a[5].val[0],
+ vget_low_s16(g_ai2_ihevc_trans_32_5_815), vget_low_s16(o0_1));
+ a[7].val[0] = vmlal_s16(a[7].val[0],
+ vget_low_s16(g_ai2_ihevc_trans_32_7_815), vget_low_s16(o0_1));
+
+ a[7].val[0] = vmlal_s16(a[7].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_7_815), vget_high_s16(o0_1));
+ a[5].val[0] = vmlal_s16(a[5].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_5_815), vget_high_s16(o0_1));
+ a[3].val[0] = vmlal_s16(a[3].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_3_815), vget_high_s16(o0_1));
+ a[1].val[0] = vmlal_s16(a[1].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_1_815), vget_high_s16(o0_1));
+
+ a[1].val[1] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_1_07),
+ vget_low_s16(o1_0)); //q0
+ a[3].val[1] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_3_07),
+ vget_low_s16(o1_0)); //q4
+ a[5].val[1] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_5_07),
+ vget_low_s16(o1_0)); //q9
+ a[7].val[1] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_7_07),
+ vget_low_s16(o1_0)); //q13
+
+ a[7].val[1] = vmlal_s16(a[7].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_7_07), vget_high_s16(o1_0));
+ a[5].val[1] = vmlal_s16(a[5].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_5_07), vget_high_s16(o1_0));
+ a[3].val[1] = vmlal_s16(a[3].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_3_07), vget_high_s16(o1_0));
+ a[1].val[1] = vmlal_s16(a[1].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_1_07), vget_high_s16(o1_0));
+
+ a[1].val[1] = vmlal_s16(a[1].val[1],
+ vget_low_s16(g_ai2_ihevc_trans_32_1_815), vget_low_s16(o1_1));
+ a[3].val[1] = vmlal_s16(a[3].val[1],
+ vget_low_s16(g_ai2_ihevc_trans_32_3_815), vget_low_s16(o1_1));
+ a[5].val[1] = vmlal_s16(a[5].val[1],
+ vget_low_s16(g_ai2_ihevc_trans_32_5_815), vget_low_s16(o1_1));
+ a[7].val[1] = vmlal_s16(a[7].val[1],
+ vget_low_s16(g_ai2_ihevc_trans_32_7_815), vget_low_s16(o1_1));
+
+ a[7].val[1] = vmlal_s16(a[7].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_7_815), vget_high_s16(o1_1));
+ a[5].val[1] = vmlal_s16(a[5].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_5_815), vget_high_s16(o1_1));
+ a[3].val[1] = vmlal_s16(a[3].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_3_815), vget_high_s16(o1_1));
+ a[1].val[1] = vmlal_s16(a[1].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_1_815), vget_high_s16(o1_1));
+
+ /*Transposes*/
+ int32x4x2_t val_13_0 = vtrnq_s32(a[1].val[0], a[3].val[0]); //q0 q4
+ int32x4x2_t val_13_1 = vtrnq_s32(a[1].val[1], a[3].val[1]); //q1 q5
+ int32x4x2_t val_57_0 =
+ vtrnq_s32(a[5].val[0], a[7].val[0]); //q8 q12
+ int32x4x2_t val_57_1 =
+ vtrnq_s32(a[5].val[1], a[7].val[1]); //q9 q13
+
+ /*Swap*/
+ a[1].val[0] = vcombine_s32(vget_low_s32(val_13_0.val[0]),
+ vget_low_s32(val_57_0.val[0])); //q0
+ a[1].val[1] = vcombine_s32(vget_high_s32(val_13_0.val[0]),
+ vget_high_s32(val_57_0.val[0])); //q8
+
+ a[3].val[0] = vcombine_s32(vget_low_s32(val_13_0.val[1]),
+ vget_low_s32(val_57_0.val[1])); //q1
+ a[3].val[1] = vcombine_s32(vget_high_s32(val_13_0.val[1]),
+ vget_high_s32(val_57_0.val[1])); //q9
+
+ a[5].val[0] = vcombine_s32(vget_low_s32(val_13_1.val[0]),
+ vget_low_s32(val_57_1.val[0])); //q4
+ a[5].val[1] = vcombine_s32(vget_high_s32(val_13_1.val[0]),
+ vget_high_s32(val_57_1.val[0])); //q12
+
+ a[7].val[0] = vcombine_s32(vget_low_s32(val_13_1.val[1]),
+ vget_low_s32(val_57_1.val[1])); //q5
+ a[7].val[1] = vcombine_s32(vget_high_s32(val_13_1.val[1]),
+ vget_high_s32(val_57_1.val[1])); //q13
+
+ /*Additions*/
+ a[1].val[0] = vaddq_s32(a[1].val[0], a[3].val[0]); //q0
+ a[1].val[1] = vaddq_s32(a[1].val[1], a[3].val[1]); //q8
+ a[1].val[1] = vaddq_s32(a[1].val[1], a[1].val[0]); //q8
+
+ a[5].val[0] = vaddq_s32(a[5].val[0], a[7].val[0]); //q1
+ a[5].val[1] = vaddq_s32(a[5].val[1], a[7].val[1]); //q9
+ a[5].val[1] = vaddq_s32(a[5].val[1], a[5].val[0]); //q9
+
+ int32x4x2_t val_1 = vzipq_s32(a[1].val[1], a[5].val[1]); //q8 q9
+
+ /*Store*/
+ vst1_s32(pi4_temp + 32, vget_low_s32(val_1.val[0])); /*Value 1*/
+ vst1_s32(pi4_temp + 96, vget_high_s32(val_1.val[0])); /*Value 3*/
+ vst1_s32(pi4_temp + 160, vget_low_s32(val_1.val[1])); /*Value 5*/
+ vst1_s32(pi4_temp + 224, vget_high_s32(val_1.val[1])); /*Value 7*/
+
+ a[9].val[0] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_9_07),
+ vget_low_s16(o0_0)); //q2
+ a[11].val[0] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_11_07),
+ vget_low_s16(o0_0)); //q2
+ a[13].val[0] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_13_07),
+ vget_low_s16(o0_0)); //q2
+ a[15].val[0] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_15_07),
+ vget_low_s16(o0_0)); //q2
+
+ a[15].val[0] = vmlal_s16(a[15].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_15_07), vget_high_s16(o0_0));
+ a[13].val[0] = vmlal_s16(a[13].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_13_07), vget_high_s16(o0_0));
+ a[11].val[0] = vmlal_s16(a[11].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_11_07), vget_high_s16(o0_0));
+ a[9].val[0] = vmlal_s16(a[9].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_9_07), vget_high_s16(o0_0));
+
+ a[9].val[0] = vmlal_s16(a[9].val[0],
+ vget_low_s16(g_ai2_ihevc_trans_32_9_815), vget_low_s16(o0_1));
+ a[11].val[0] = vmlal_s16(a[11].val[0],
+ vget_low_s16(g_ai2_ihevc_trans_32_11_815), vget_low_s16(o0_1));
+ a[13].val[0] = vmlal_s16(a[13].val[0],
+ vget_low_s16(g_ai2_ihevc_trans_32_13_815), vget_low_s16(o0_1));
+ a[15].val[0] = vmlal_s16(a[15].val[0],
+ vget_low_s16(g_ai2_ihevc_trans_32_15_815), vget_low_s16(o0_1));
+
+ a[15].val[0] = vmlal_s16(a[15].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_15_815),
+ vget_high_s16(o0_1));
+ a[13].val[0] = vmlal_s16(a[13].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_13_815),
+ vget_high_s16(o0_1));
+ a[11].val[0] = vmlal_s16(a[11].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_11_815),
+ vget_high_s16(o0_1));
+ a[9].val[0] = vmlal_s16(a[9].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_9_815), vget_high_s16(o0_1));
+
+ a[9].val[1] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_9_07),
+ vget_low_s16(o1_0)); //q2
+ a[11].val[1] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_11_07),
+ vget_low_s16(o1_0)); //q2
+ a[13].val[1] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_13_07),
+ vget_low_s16(o1_0)); //q2
+ a[15].val[1] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_15_07),
+ vget_low_s16(o1_0)); //q2
+
+ a[15].val[1] = vmlal_s16(a[15].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_15_07), vget_high_s16(o1_0));
+ a[13].val[1] = vmlal_s16(a[13].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_13_07), vget_high_s16(o1_0));
+ a[11].val[1] = vmlal_s16(a[11].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_11_07), vget_high_s16(o1_0));
+ a[9].val[1] = vmlal_s16(a[9].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_9_07), vget_high_s16(o1_0));
+
+ a[9].val[1] = vmlal_s16(a[9].val[1],
+ vget_low_s16(g_ai2_ihevc_trans_32_9_815), vget_low_s16(o1_1));
+ a[11].val[1] = vmlal_s16(a[11].val[1],
+ vget_low_s16(g_ai2_ihevc_trans_32_11_815), vget_low_s16(o1_1));
+ a[13].val[1] = vmlal_s16(a[13].val[1],
+ vget_low_s16(g_ai2_ihevc_trans_32_13_815), vget_low_s16(o1_1));
+ a[15].val[1] = vmlal_s16(a[15].val[1],
+ vget_low_s16(g_ai2_ihevc_trans_32_15_815), vget_low_s16(o1_1));
+
+ a[15].val[1] = vmlal_s16(a[15].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_15_815),
+ vget_high_s16(o1_1));
+ a[13].val[1] = vmlal_s16(a[13].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_13_815),
+ vget_high_s16(o1_1));
+ a[11].val[1] = vmlal_s16(a[11].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_11_815),
+ vget_high_s16(o1_1));
+ a[9].val[1] = vmlal_s16(a[9].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_9_815), vget_high_s16(o1_1));
+
+ int32x4x2_t val_911_0 =
+ vtrnq_s32(a[9].val[0], a[11].val[0]); //q0 q4
+ int32x4x2_t val_911_1 =
+ vtrnq_s32(a[9].val[1], a[11].val[1]); //q1 q5
+ int32x4x2_t val_1315_0 =
+ vtrnq_s32(a[13].val[0], a[15].val[0]); //q8 q12
+ int32x4x2_t val_1315_1 =
+ vtrnq_s32(a[13].val[1], a[15].val[1]); //q9 q13
+
+ a[9].val[0] = vcombine_s32(vget_low_s32(val_911_0.val[0]),
+ vget_low_s32(val_1315_0.val[0])); //q0
+ a[9].val[1] = vcombine_s32(vget_high_s32(val_911_0.val[0]),
+ vget_high_s32(val_1315_0.val[0])); //q8
+
+ a[11].val[0] = vcombine_s32(vget_low_s32(val_911_0.val[1]),
+ vget_low_s32(val_1315_0.val[1])); //q1
+ a[11].val[1] = vcombine_s32(vget_high_s32(val_911_0.val[1]),
+ vget_high_s32(val_1315_0.val[1])); //q9
+
+ a[13].val[0] = vcombine_s32(vget_low_s32(val_911_1.val[0]),
+ vget_low_s32(val_1315_1.val[0])); //q4
+ a[13].val[1] = vcombine_s32(vget_high_s32(val_911_1.val[0]),
+ vget_high_s32(val_1315_1.val[0])); //q12
+
+ a[15].val[0] = vcombine_s32(vget_low_s32(val_911_1.val[1]),
+ vget_low_s32(val_1315_1.val[1])); //q5
+ a[15].val[1] = vcombine_s32(vget_high_s32(val_911_1.val[1]),
+ vget_high_s32(val_1315_1.val[1])); //q13
+
+ a[9].val[0] = vaddq_s32(a[9].val[0], a[11].val[0]); //q0
+ a[9].val[1] = vaddq_s32(a[9].val[1], a[11].val[1]); //q8
+ a[9].val[1] = vaddq_s32(a[9].val[1], a[9].val[0]); //q8
+
+ a[13].val[0] = vaddq_s32(a[13].val[0], a[15].val[0]); //q1
+ a[13].val[1] = vaddq_s32(a[13].val[1], a[15].val[1]); //q9
+ a[13].val[1] = vaddq_s32(a[13].val[1], a[13].val[0]); //q9
+
+ int32x4x2_t val_9 = vzipq_s32(a[9].val[1], a[13].val[1]); //q8 q9
+
+ vst1_s32(pi4_temp + 288, vget_low_s32(val_9.val[0])); /*Value 9*/
+ vst1_s32(pi4_temp + 352, vget_high_s32(val_9.val[0])); /*Value 11*/
+ vst1_s32(pi4_temp + 416, vget_low_s32(val_9.val[1])); /*Value 13*/
+ vst1_s32(pi4_temp + 480, vget_high_s32(val_9.val[1])); /*Value 15*/
+
+ a[17].val[0] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_17_07),
+ vget_low_s16(o0_0)); //q2
+ a[19].val[0] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_19_07),
+ vget_low_s16(o0_0)); //q2
+ a[21].val[0] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_21_07),
+ vget_low_s16(o0_0)); //q2
+ a[23].val[0] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_23_07),
+ vget_low_s16(o0_0)); //q2
+
+ a[23].val[0] = vmlal_s16(a[23].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_23_07), vget_high_s16(o0_0));
+ a[21].val[0] = vmlal_s16(a[21].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_21_07), vget_high_s16(o0_0));
+ a[19].val[0] = vmlal_s16(a[19].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_19_07), vget_high_s16(o0_0));
+ a[17].val[0] = vmlal_s16(a[17].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_17_07), vget_high_s16(o0_0));
+
+ a[17].val[0] = vmlal_s16(a[17].val[0],
+ vget_low_s16(g_ai2_ihevc_trans_32_17_815), vget_low_s16(o0_1));
+ a[19].val[0] = vmlal_s16(a[19].val[0],
+ vget_low_s16(g_ai2_ihevc_trans_32_19_815), vget_low_s16(o0_1));
+ a[21].val[0] = vmlal_s16(a[21].val[0],
+ vget_low_s16(g_ai2_ihevc_trans_32_21_815), vget_low_s16(o0_1));
+ a[23].val[0] = vmlal_s16(a[23].val[0],
+ vget_low_s16(g_ai2_ihevc_trans_32_23_815), vget_low_s16(o0_1));
+
+ a[23].val[0] = vmlal_s16(a[23].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_23_815),
+ vget_high_s16(o0_1));
+ a[21].val[0] = vmlal_s16(a[21].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_21_815),
+ vget_high_s16(o0_1));
+ a[19].val[0] = vmlal_s16(a[19].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_19_815),
+ vget_high_s16(o0_1));
+ a[17].val[0] = vmlal_s16(a[17].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_17_815),
+ vget_high_s16(o0_1));
+
+ a[17].val[1] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_17_07),
+ vget_low_s16(o1_0)); //q2
+ a[19].val[1] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_19_07),
+ vget_low_s16(o1_0)); //q2
+ a[21].val[1] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_21_07),
+ vget_low_s16(o1_0)); //q2
+ a[23].val[1] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_23_07),
+ vget_low_s16(o1_0)); //q2
+
+ a[23].val[1] = vmlal_s16(a[23].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_23_07), vget_high_s16(o1_0));
+ a[21].val[1] = vmlal_s16(a[21].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_21_07), vget_high_s16(o1_0));
+ a[19].val[1] = vmlal_s16(a[19].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_19_07), vget_high_s16(o1_0));
+ a[17].val[1] = vmlal_s16(a[17].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_17_07), vget_high_s16(o1_0));
+
+ a[17].val[1] = vmlal_s16(a[17].val[1],
+ vget_low_s16(g_ai2_ihevc_trans_32_17_815), vget_low_s16(o1_1));
+ a[19].val[1] = vmlal_s16(a[19].val[1],
+ vget_low_s16(g_ai2_ihevc_trans_32_19_815), vget_low_s16(o1_1));
+ a[21].val[1] = vmlal_s16(a[21].val[1],
+ vget_low_s16(g_ai2_ihevc_trans_32_21_815), vget_low_s16(o1_1));
+ a[23].val[1] = vmlal_s16(a[23].val[1],
+ vget_low_s16(g_ai2_ihevc_trans_32_23_815), vget_low_s16(o1_1));
+
+ a[23].val[1] = vmlal_s16(a[23].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_23_815),
+ vget_high_s16(o1_1));
+ a[21].val[1] = vmlal_s16(a[21].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_21_815),
+ vget_high_s16(o1_1));
+ a[19].val[1] = vmlal_s16(a[19].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_19_815),
+ vget_high_s16(o1_1));
+ a[17].val[1] = vmlal_s16(a[17].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_17_815),
+ vget_high_s16(o1_1));
+
+ int32x4x2_t val_1719_0 =
+ vtrnq_s32(a[17].val[0], a[19].val[0]); //q0 q4
+ int32x4x2_t val_1719_1 =
+ vtrnq_s32(a[17].val[1], a[19].val[1]); //q1 q5
+ int32x4x2_t val_2123_0 =
+ vtrnq_s32(a[21].val[0], a[23].val[0]); //q8 q12
+ int32x4x2_t val_2123_1 =
+ vtrnq_s32(a[21].val[1], a[23].val[1]); //q9 q13
+
+ a[17].val[0] = vcombine_s32(vget_low_s32(val_1719_0.val[0]),
+ vget_low_s32(val_2123_0.val[0])); //q0
+ a[17].val[1] = vcombine_s32(vget_high_s32(val_1719_0.val[0]),
+ vget_high_s32(val_2123_0.val[0])); //q8
+
+ a[19].val[0] = vcombine_s32(vget_low_s32(val_1719_0.val[1]),
+ vget_low_s32(val_2123_0.val[1])); //q1
+ a[19].val[1] = vcombine_s32(vget_high_s32(val_1719_0.val[1]),
+ vget_high_s32(val_2123_0.val[1])); //q9
+
+ a[21].val[0] = vcombine_s32(vget_low_s32(val_1719_1.val[0]),
+ vget_low_s32(val_2123_1.val[0])); //q4
+ a[21].val[1] = vcombine_s32(vget_high_s32(val_1719_1.val[0]),
+ vget_high_s32(val_2123_1.val[0])); //q12
+
+ a[23].val[0] = vcombine_s32(vget_low_s32(val_1719_1.val[1]),
+ vget_low_s32(val_2123_1.val[1])); //q5
+ a[23].val[1] = vcombine_s32(vget_high_s32(val_1719_1.val[1]),
+ vget_high_s32(val_2123_1.val[1])); //q13
+
+ a[17].val[0] = vaddq_s32(a[17].val[0], a[19].val[0]); //q0
+ a[17].val[1] = vaddq_s32(a[17].val[1], a[19].val[1]); //q8
+ a[17].val[1] = vaddq_s32(a[17].val[1], a[17].val[0]); //q8
+
+ a[21].val[0] = vaddq_s32(a[21].val[0], a[23].val[0]); //q1
+ a[21].val[1] = vaddq_s32(a[21].val[1], a[23].val[1]); //q9
+ a[21].val[1] = vaddq_s32(a[21].val[1], a[21].val[0]); //q9
+
+ int32x4x2_t val_17 = vzipq_s32(a[17].val[1], a[21].val[1]); //q8 q9
+
+ vst1_s32(pi4_temp + 544, vget_low_s32(val_17.val[0])); /*Value 17*/
+ vst1_s32(pi4_temp + 608, vget_high_s32(val_17.val[0])); /*Value 19*/
+ vst1_s32(pi4_temp + 672, vget_low_s32(val_17.val[1])); /*Value 21*/
+ vst1_s32(pi4_temp + 736, vget_high_s32(val_17.val[1])); /*Value 23*/
+
+ a[25].val[0] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_25_07),
+ vget_low_s16(o0_0)); //q2
+ a[27].val[0] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_27_07),
+ vget_low_s16(o0_0)); //q2
+ a[29].val[0] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_29_07),
+ vget_low_s16(o0_0)); //q2
+ a[31].val[0] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_31_07),
+ vget_low_s16(o0_0)); //q2
+
+ a[31].val[0] = vmlal_s16(a[31].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_31_07), vget_high_s16(o0_0));
+ a[29].val[0] = vmlal_s16(a[29].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_29_07), vget_high_s16(o0_0));
+ a[27].val[0] = vmlal_s16(a[27].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_27_07), vget_high_s16(o0_0));
+ a[25].val[0] = vmlal_s16(a[25].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_25_07), vget_high_s16(o0_0));
+
+ a[25].val[0] = vmlal_s16(a[25].val[0],
+ vget_low_s16(g_ai2_ihevc_trans_32_25_815), vget_low_s16(o0_1));
+ a[27].val[0] = vmlal_s16(a[27].val[0],
+ vget_low_s16(g_ai2_ihevc_trans_32_27_815), vget_low_s16(o0_1));
+ a[29].val[0] = vmlal_s16(a[29].val[0],
+ vget_low_s16(g_ai2_ihevc_trans_32_29_815), vget_low_s16(o0_1));
+ a[31].val[0] = vmlal_s16(a[31].val[0],
+ vget_low_s16(g_ai2_ihevc_trans_32_31_815), vget_low_s16(o0_1));
+
+ a[31].val[0] = vmlal_s16(a[31].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_31_815),
+ vget_high_s16(o0_1));
+ a[29].val[0] = vmlal_s16(a[29].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_29_815),
+ vget_high_s16(o0_1));
+ a[27].val[0] = vmlal_s16(a[27].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_27_815),
+ vget_high_s16(o0_1));
+ a[25].val[0] = vmlal_s16(a[25].val[0],
+ vget_high_s16(g_ai2_ihevc_trans_32_25_815),
+ vget_high_s16(o0_1));
+
+ a[25].val[1] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_25_07),
+ vget_low_s16(o1_0)); //q2
+ a[27].val[1] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_27_07),
+ vget_low_s16(o1_0)); //q2
+ a[29].val[1] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_29_07),
+ vget_low_s16(o1_0)); //q2
+ a[31].val[1] = vmull_s16(vget_low_s16(g_ai2_ihevc_trans_32_31_07),
+ vget_low_s16(o1_0)); //q2
+
+ a[31].val[1] = vmlal_s16(a[31].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_31_07), vget_high_s16(o1_0));
+ a[29].val[1] = vmlal_s16(a[29].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_29_07), vget_high_s16(o1_0));
+ a[27].val[1] = vmlal_s16(a[27].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_27_07), vget_high_s16(o1_0));
+ a[25].val[1] = vmlal_s16(a[25].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_25_07), vget_high_s16(o1_0));
+
+ a[25].val[1] = vmlal_s16(a[25].val[1],
+ vget_low_s16(g_ai2_ihevc_trans_32_25_815), vget_low_s16(o1_1));
+ a[27].val[1] = vmlal_s16(a[27].val[1],
+ vget_low_s16(g_ai2_ihevc_trans_32_27_815), vget_low_s16(o1_1));
+ a[29].val[1] = vmlal_s16(a[29].val[1],
+ vget_low_s16(g_ai2_ihevc_trans_32_29_815), vget_low_s16(o1_1));
+ a[31].val[1] = vmlal_s16(a[31].val[1],
+ vget_low_s16(g_ai2_ihevc_trans_32_31_815), vget_low_s16(o1_1));
+
+ a[31].val[1] = vmlal_s16(a[31].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_31_815),
+ vget_high_s16(o1_1));
+ a[29].val[1] = vmlal_s16(a[29].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_29_815),
+ vget_high_s16(o1_1));
+ a[27].val[1] = vmlal_s16(a[27].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_27_815),
+ vget_high_s16(o1_1));
+ a[25].val[1] = vmlal_s16(a[25].val[1],
+ vget_high_s16(g_ai2_ihevc_trans_32_25_815),
+ vget_high_s16(o1_1));
+
+ int32x4x2_t val_2527_0 =
+ vtrnq_s32(a[25].val[0], a[27].val[0]); //q0 q4
+ int32x4x2_t val_2527_1 =
+ vtrnq_s32(a[25].val[1], a[27].val[1]); //q1 q5
+ int32x4x2_t val_2931_0 =
+ vtrnq_s32(a[29].val[0], a[31].val[0]); //q8 q12
+ int32x4x2_t val_2931_1 =
+ vtrnq_s32(a[29].val[1], a[31].val[1]); //q9 q13
+
+ a[25].val[0] = vcombine_s32(vget_low_s32(val_2527_0.val[0]),
+ vget_low_s32(val_2931_0.val[0])); //q0
+ a[25].val[1] = vcombine_s32(vget_high_s32(val_2527_0.val[0]),
+ vget_high_s32(val_2931_0.val[0])); //q8
+
+ a[27].val[0] = vcombine_s32(vget_low_s32(val_2527_0.val[1]),
+ vget_low_s32(val_2931_0.val[1])); //q1
+ a[27].val[1] = vcombine_s32(vget_high_s32(val_2527_0.val[1]),
+ vget_high_s32(val_2931_0.val[1])); //q9
+
+ a[29].val[0] = vcombine_s32(vget_low_s32(val_2527_1.val[0]),
+ vget_low_s32(val_2931_1.val[0])); //q4
+ a[29].val[1] = vcombine_s32(vget_high_s32(val_2527_1.val[0]),
+ vget_high_s32(val_2931_1.val[0])); //q12
+
+ a[31].val[0] = vcombine_s32(vget_low_s32(val_2527_1.val[1]),
+ vget_low_s32(val_2931_1.val[1])); //q5
+ a[31].val[1] = vcombine_s32(vget_high_s32(val_2527_1.val[1]),
+ vget_high_s32(val_2931_1.val[1])); //q13
+
+ a[25].val[0] = vaddq_s32(a[25].val[0], a[27].val[0]); //q0
+ a[25].val[1] = vaddq_s32(a[25].val[1], a[27].val[1]); //q8
+ a[25].val[1] = vaddq_s32(a[25].val[1], a[25].val[0]); //q8
+
+ a[29].val[0] = vaddq_s32(a[29].val[0], a[31].val[0]); //q1
+ a[29].val[1] = vaddq_s32(a[29].val[1], a[31].val[1]); //q9
+ a[29].val[1] = vaddq_s32(a[29].val[1], a[29].val[0]); //q9
+
+ int32x4x2_t val_25 = vzipq_s32(a[25].val[1], a[29].val[1]); //q8 q9
+
+ vst1_s32(pi4_temp + 800, vget_low_s32(val_25.val[0])); /*Value 25*/
+ vst1_s32(pi4_temp + 864, vget_high_s32(val_25.val[0])); /*Value 27*/
+ vst1_s32(pi4_temp + 928, vget_low_s32(val_25.val[1])); /*Value 29*/
+ vst1_s32(pi4_temp + 992, vget_high_s32(val_25.val[1])); /*Value 31*/
+
+ pi4_temp += 2;
+ }
+ }
+
+ /*sad of the block*/
+ tmp_a = vpaddlq_s32(sum_val);
+ sad = vadd_s32(vreinterpret_s32_s64(vget_low_s64(tmp_a)),
+ vreinterpret_s32_s64(vget_high_s64(tmp_a)));
+ u4_blk_sad = vget_lane_s32(sad, 0);
+
+ //Stage 2
+ {
+ static const int32x4_t g_ai4_ihevc_trans_32_0_8 = { 64, -64, 83, -83 };
+ static const int32x4_t g_ai4_ihevc_trans_32_1_8 = { 64, 64, 36, 36 };
+
+ static const int32x4_t g_ai4_ihevc_trans_32_4_04 = { 89, 75, 50, 18 };
+ static const int32x4_t g_ai4_ihevc_trans_32_12_04 = { 75, -18, -89, -50 };
+ static const int32x4_t g_ai4_ihevc_trans_32_20_04 = { 50, -89, 18, 75 };
+ static const int32x4_t g_ai4_ihevc_trans_32_28_04 = { 18, -50, 75, -89 };
+
+ static const int32x4_t g_ai4_ihevc_trans_32_2_03 = { 90, 87, 80, 70 };
+ static const int32x4_t g_ai4_ihevc_trans_32_2_47 = { 57, 43, 25, 9 };
+ static const int32x4_t g_ai4_ihevc_trans_32_6_03 = { 87, 57, 9, -43 };
+ static const int32x4_t g_ai4_ihevc_trans_32_6_47 = { -80, -90, -70,
+ -25 };
+ static const int32x4_t g_ai4_ihevc_trans_32_10_03 = { 80, 9, -70, -87 };
+ static const int32x4_t g_ai4_ihevc_trans_32_10_47 = { -25, 57, 90, 43 };
+ static const int32x4_t g_ai4_ihevc_trans_32_14_03 = { 70, -43, -87, 9 };
+ static const int32x4_t g_ai4_ihevc_trans_32_14_47 = { 90, 25, -80, -57 };
+ static const int32x4_t g_ai4_ihevc_trans_32_18_03 = { 57, -80, -25, 90 };
+ static const int32x4_t g_ai4_ihevc_trans_32_18_47 = { -9, -87, 43, 70 };
+ static const int32x4_t g_ai4_ihevc_trans_32_22_03 = { 43, -90, 57, 25 };
+ static const int32x4_t g_ai4_ihevc_trans_32_22_47 = { -87, 70, 9, -80 };
+ static const int32x4_t g_ai4_ihevc_trans_32_26_03 = { 25, -70, 90, -80 };
+ static const int32x4_t g_ai4_ihevc_trans_32_26_47 = { 43, 9, -57, 87 };
+ static const int32x4_t g_ai4_ihevc_trans_32_30_03 = { 9, -25, 43, -57 };
+ static const int32x4_t g_ai4_ihevc_trans_32_30_47 = { 70, -80, 87, -90 };
+
+ static const int32x4_t g_ai4_ihevc_trans_32_1_03 = { 90, 90, 88, 85 };
+ static const int32x4_t g_ai4_ihevc_trans_32_1_47 = { 82, 78, 73, 67 };
+ static const int32x4_t g_ai4_ihevc_trans_32_1_811 = { 61, 54, 46, 38 };
+ static const int32x4_t g_ai4_ihevc_trans_32_1_1215 = { 31, 22, 13, 4 };
+ static const int32x4_t g_ai4_ihevc_trans_32_3_03 = { 90, 82, 67, 46 };
+ static const int32x4_t g_ai4_ihevc_trans_32_3_47 = { 22, -4, -31, -54 };
+ static const int32x4_t g_ai4_ihevc_trans_32_3_811 = { -73, -85, -90, -88 };
+ static const int32x4_t g_ai4_ihevc_trans_32_3_1215 = { -78, -61, -38, -13 };
+ static const int32x4_t g_ai4_ihevc_trans_32_5_03 = { 88, 67, 31, -13 };
+ static const int32x4_t g_ai4_ihevc_trans_32_5_47 = { -54, -82, -90, -78 };
+ static const int32x4_t g_ai4_ihevc_trans_32_5_811 = { -46, -4, 38, 73 };
+ static const int32x4_t g_ai4_ihevc_trans_32_5_1215 = { 90, 85, 61, 22 };
+ static const int32x4_t g_ai4_ihevc_trans_32_7_03 = { 85, 46, -13, -67 };
+ static const int32x4_t g_ai4_ihevc_trans_32_7_47 = { -90, -73, -22, 38 };
+ static const int32x4_t g_ai4_ihevc_trans_32_7_811 = { 82, 88, 54, -4 };
+ static const int32x4_t g_ai4_ihevc_trans_32_7_1215 = { -61, -90, -78, -31 };
+ static const int32x4_t g_ai4_ihevc_trans_32_9_03 = { 82, 22, -54, -90 };
+ static const int32x4_t g_ai4_ihevc_trans_32_9_47 = { -61, 13, 78, 85 };
+ static const int32x4_t g_ai4_ihevc_trans_32_9_811 = { 31, -46, -90, -67 };
+ static const int32x4_t g_ai4_ihevc_trans_32_9_1215 = { 4, 73, 88, 38 };
+ static const int32x4_t g_ai4_ihevc_trans_32_11_03 = { 78, -4, -82, -73 };
+ static const int32x4_t g_ai4_ihevc_trans_32_11_47 = { 13, 85, 67, -22 };
+ static const int32x4_t g_ai4_ihevc_trans_32_11_811 = { -88, -61, 31, 90 };
+ static const int32x4_t g_ai4_ihevc_trans_32_11_1215 = { 54, -38, -90, -46 };
+ static const int32x4_t g_ai4_ihevc_trans_32_13_03 = { 73, -31, -90, -22 };
+ static const int32x4_t g_ai4_ihevc_trans_32_13_47 = { 78, 67, -38, -90 };
+ static const int32x4_t g_ai4_ihevc_trans_32_13_811 = { -13, 82, 61, -46 };
+ static const int32x4_t g_ai4_ihevc_trans_32_13_1215 = { -88, -4, 85, 54 };
+ static const int32x4_t g_ai4_ihevc_trans_32_15_03 = { 67, -54, -78, 38 };
+ static const int32x4_t g_ai4_ihevc_trans_32_15_47 = { 85, -22, -90, 4 };
+ static const int32x4_t g_ai4_ihevc_trans_32_15_811 = { 90, 13, -88, -31 };
+ static const int32x4_t g_ai4_ihevc_trans_32_15_1215 = { 82, 46, -73, -61 };
+ static const int32x4_t g_ai4_ihevc_trans_32_17_03 = { 61, -73, -46, 82 };
+ static const int32x4_t g_ai4_ihevc_trans_32_17_47 = { 31, -88, -13, 90 };
+ static const int32x4_t g_ai4_ihevc_trans_32_17_811 = { -4, -90, 22, 85 };
+ static const int32x4_t g_ai4_ihevc_trans_32_17_1215 = { -38, -78, 54, 67 };
+ static const int32x4_t g_ai4_ihevc_trans_32_19_03 = { 54, -85, -4, 88 };
+ static const int32x4_t g_ai4_ihevc_trans_32_19_47 = { -46, -61, 82, 13 };
+ static const int32x4_t g_ai4_ihevc_trans_32_19_811 = { -90, 38, 67, -78 };
+ static const int32x4_t g_ai4_ihevc_trans_32_19_1215 = { -22, 90, -31, -73 };
+ static const int32x4_t g_ai4_ihevc_trans_32_21_03 = { 46, -90, 38, 54 };
+ static const int32x4_t g_ai4_ihevc_trans_32_21_47 = { -90, 31, 61, -88 };
+ static const int32x4_t g_ai4_ihevc_trans_32_21_811 = { 22, 67, -85, 13 };
+ static const int32x4_t g_ai4_ihevc_trans_32_21_1215 = { 73, -82, 4, 78 };
+ static const int32x4_t g_ai4_ihevc_trans_32_23_03 = { 38, -88, 73, -4 };
+ static const int32x4_t g_ai4_ihevc_trans_32_23_47 = { -67, 90, -46, -31 };
+ static const int32x4_t g_ai4_ihevc_trans_32_23_811 = { 85, -78, 13, 61 };
+ static const int32x4_t g_ai4_ihevc_trans_32_23_1215 = { -90, 54, 22, -82 };
+ static const int32x4_t g_ai4_ihevc_trans_32_25_03 = { 31, -78, 90, -61 };
+ static const int32x4_t g_ai4_ihevc_trans_32_25_47 = { 4, 54, -88, 82 };
+ static const int32x4_t g_ai4_ihevc_trans_32_25_811 = { -38, -22, 73, -90 };
+ static const int32x4_t g_ai4_ihevc_trans_32_25_1215 = { 67, -13, -46, 85 };
+ static const int32x4_t g_ai4_ihevc_trans_32_27_03 = { 22, -61, 85, -90 };
+ static const int32x4_t g_ai4_ihevc_trans_32_27_47 = { 73, -38, -4, 46 };
+ static const int32x4_t g_ai4_ihevc_trans_32_27_811 = { -78, 90, -82, 54 };
+ static const int32x4_t g_ai4_ihevc_trans_32_27_1215 = { -13, -31, 67, -88 };
+ static const int32x4_t g_ai4_ihevc_trans_32_29_03 = { 13, -38, 61, -78 };
+ static const int32x4_t g_ai4_ihevc_trans_32_29_47 = { 88, -90, 85, -73 };
+ static const int32x4_t g_ai4_ihevc_trans_32_29_811 = { 54, -31, 4, 22 };
+ static const int32x4_t g_ai4_ihevc_trans_32_29_1215 = { -46, 67, -82, 90 };
+ static const int32x4_t g_ai4_ihevc_trans_32_31_03 = { 4, -13, 22, -31 };
+ static const int32x4_t g_ai4_ihevc_trans_32_31_47 = { 38, -46, 54, -61 };
+ static const int32x4_t g_ai4_ihevc_trans_32_31_811 = { 67, -73, 78, -82 };
+ static const int32x4_t g_ai4_ihevc_trans_32_31_1215 = { 85, -88, 90, -90 };
+
+ int32x4_t a[32];
+
+ pi4_temp = pi4_temp_orig;
+ for(i = 0; i < 32; i++)
+ {
+ int32x4_t temp_data[8];
+
+ temp_data[0] = vld1q_s32(pi4_temp);
+ temp_data[1] = vld1q_s32(pi4_temp + 4);
+ temp_data[2] = vld1q_s32(pi4_temp + 8);
+ temp_data[3] = vld1q_s32(pi4_temp + 12);
+
+ temp_data[4] = vrev64q_s32(vld1q_s32(pi4_temp + 16));
+ temp_data[4] = vcombine_s32(
+ vget_high_s32(temp_data[4]), vget_low_s32(temp_data[4]));
+
+ temp_data[5] = vrev64q_s32(vld1q_s32(pi4_temp + 20));
+ temp_data[5] = vcombine_s32(
+ vget_high_s32(temp_data[5]), vget_low_s32(temp_data[5]));
+
+ temp_data[6] = vrev64q_s32(vld1q_s32(pi4_temp + 24));
+ temp_data[6] = vcombine_s32(
+ vget_high_s32(temp_data[6]), vget_low_s32(temp_data[6]));
+
+ temp_data[7] = vrev64q_s32(vld1q_s32(pi4_temp + 28));
+ temp_data[7] = vcombine_s32(
+ vget_high_s32(temp_data[7]), vget_low_s32(temp_data[7]));
+
+ pi4_temp += 32;
+
+ const int32x4_t o0 =
+ vsubq_s32(temp_data[0], temp_data[7]); /*R2(9-16) - R2(24-17)*/
+ const int32x4_t o1 =
+ vsubq_s32(temp_data[1], temp_data[6]); /*R2(1- 8) - R2(32-25)*/
+ const int32x4_t o2 =
+ vsubq_s32(temp_data[2], temp_data[5]); /*R1(9-16) - R1(24-17)*/
+ const int32x4_t o3 =
+ vsubq_s32(temp_data[3], temp_data[4]); /*R1(1- 8) - R1(32-25)*/
+
+ int32x4_t e3 = vrev64q_s32(
+ vaddq_s32(temp_data[3], temp_data[4])); /*R1(1- 8) + R1(32-25)*/
+ e3 = vcombine_s32(vget_high_s32(e3), vget_low_s32(e3));
+ int32x4_t e2 = vrev64q_s32(
+ vaddq_s32(temp_data[2], temp_data[5])); /*R1(9-16) + R1(24-17)*/
+ e2 = vcombine_s32(vget_high_s32(e2), vget_low_s32(e2));
+
+ const int32x4_t e1 =
+ vaddq_s32(temp_data[1], temp_data[6]); /*R2(1- 8) + R2(32-25)*/
+ const int32x4_t e0 =
+ vaddq_s32(temp_data[0], temp_data[7]); /*R2(9-16) + R2(24-17)*/
+
+ const int32x4_t ee0 = vaddq_s32(e0, e3); /*E1(1- 8) + E1(16-9)*/
+ int32x4_t ee1 =
+ vrev64q_s32(vaddq_s32(e1, e2)); /*E2(1- 8) + E2(16-9)*/
+ ee1 = vcombine_s32(vget_high_s32(ee1), vget_low_s32(ee1));
+ const int32x4_t eo1 = vsubq_s32(e1, e2); /*E2(1- 8) - E2(16-9)*/
+ const int32x4_t eo0 = vsubq_s32(e0, e3); /*E1(1- 8) - E1(16-9)*/
+
+ /*EE(1-4) - EE(8-5)*/
+ const int32x4_t eeo = vsubq_s32(ee0, ee1); //Q5
+ /*EE(1-4) + EE(8-5)*/
+ const int32x4_t eee = vaddq_s32(ee0, ee1); //Q4
+
+ /*EEEE Calculations*/
+ const int32x4_t eeee = vcombine_s32(
+ vadd_s32(vget_low_s32(eee), vrev64_s32(vget_high_s32(eee))),
+ vsub_s32(
+ vget_low_s32(eee), vrev64_s32(vget_high_s32(eee)))); //q6
+
+ /*Calculation of values 0 8 16 24*/
+ /*Multiplications*/
+ a[0] = vmulq_s32(g_ai4_ihevc_trans_32_0_8, eeee);
+ a[0] = vmlaq_s32(a[0], g_ai4_ihevc_trans_32_1_8, vrev64q_s32(eeee));
+ /*Shift*/
+ int16x4_t val_0 = vrshrn_n_s32(a[0], 15);
+ /*Store*/
+ vst1_lane_s16(pi2_dst, val_0, 0); /*Value 0*/
+ vst1_lane_s16(pi2_dst + 8 * dst_strd, val_0, 2); /*Value 8*/
+ vst1_lane_s16(pi2_dst + 16 * dst_strd, val_0, 1); /*Value 16*/
+ vst1_lane_s16(pi2_dst + 24 * dst_strd, val_0, 3); /*Value 24*/
+
+ /*Calculation of values 4 12 20 28*/
+ /*Multiplications*/
+ a[4] = vmulq_s32(g_ai4_ihevc_trans_32_4_04, eeo);
+ a[12] = vmulq_s32(g_ai4_ihevc_trans_32_12_04, eeo);
+ a[20] = vmulq_s32(g_ai4_ihevc_trans_32_20_04, eeo);
+ a[28] = vmulq_s32(g_ai4_ihevc_trans_32_28_04, eeo);
+ /*Transposes*/
+ int32x4x2_t val_412 = vtrnq_s32(a[4], a[12]); //q0 q9
+ int32x4x2_t val_2028 = vtrnq_s32(a[20], a[28]); //q10 q11
+ /*Swap*/
+ a[4] = vcombine_s32(vget_low_s32(val_412.val[0]),
+ vget_low_s32(val_2028.val[0])); //q0
+ a[12] = vcombine_s32(vget_low_s32(val_412.val[1]),
+ vget_low_s32(val_2028.val[1])); //q9
+ a[20] = vcombine_s32(vget_high_s32(val_412.val[0]),
+ vget_high_s32(val_2028.val[0])); //q10
+ a[28] = vcombine_s32(vget_high_s32(val_412.val[1]),
+ vget_high_s32(val_2028.val[1])); //q11
+ /*Additions*/
+ a[4] = vaddq_s32(a[4], a[12]); //q0
+ a[20] = vaddq_s32(a[20], a[28]); //q10
+ a[4] = vaddq_s32(a[4], a[20]); //q0
+ /*Shift*/
+ int16x4_t val_4 = vrshrn_n_s32(a[4], 15);
+ /*Store*/
+ vst1_lane_s16(pi2_dst + 4 * dst_strd, val_4, 0); /*Value 4*/
+ vst1_lane_s16(pi2_dst + 12 * dst_strd, val_4, 1); /*Value 12*/
+ vst1_lane_s16(pi2_dst + 20 * dst_strd, val_4, 2); /*Value 20*/
+ vst1_lane_s16(pi2_dst + 28 * dst_strd, val_4, 3); /*Value 28*/
+
+ /*Calculation of value 2 6 10 14 18 22 26 30*/
+ /*Multiplications*/
+ a[2] = vmulq_s32(g_ai4_ihevc_trans_32_2_03, eo0); //q8
+ a[6] = vmulq_s32(g_ai4_ihevc_trans_32_6_03, eo0); //q2
+ a[10] = vmulq_s32(g_ai4_ihevc_trans_32_10_03, eo0); //q2
+ a[14] = vmulq_s32(g_ai4_ihevc_trans_32_14_03, eo0); //q2
+
+ a[14] = vmlaq_s32(a[14], g_ai4_ihevc_trans_32_14_47, eo1);
+ a[10] = vmlaq_s32(a[10], g_ai4_ihevc_trans_32_10_47, eo1);
+ a[6] = vmlaq_s32(a[6], g_ai4_ihevc_trans_32_6_47, eo1);
+ a[2] = vmlaq_s32(a[2], g_ai4_ihevc_trans_32_2_47, eo1);
+
+ int32x2_t val_2 = vadd_s32(vget_low_s32(a[2]), vget_high_s32(a[2]));
+ int32x2_t val_6 = vadd_s32(vget_low_s32(a[6]), vget_high_s32(a[6]));
+ val_2 = vpadd_s32(val_2, val_6);
+
+ int32x2_t val_10 =
+ vadd_s32(vget_low_s32(a[10]), vget_high_s32(a[10]));
+ int32x2_t val_14 =
+ vadd_s32(vget_low_s32(a[14]), vget_high_s32(a[14]));
+ val_10 = vpadd_s32(val_10, val_14);
+
+ /*Shift*/
+ int16x4_t val__2 =
+ vrshrn_n_s32(vcombine_s32(val_2, val_10), 15); //q9 q12
+
+ /*Store*/
+ vst1_lane_s16(pi2_dst + 2 * dst_strd, val__2, 0); /*Value 2*/
+ vst1_lane_s16(pi2_dst + 6 * dst_strd, val__2, 1); /*Value 6*/
+ vst1_lane_s16(pi2_dst + 10 * dst_strd, val__2, 2); /*Value 10*/
+ vst1_lane_s16(pi2_dst + 14 * dst_strd, val__2, 3); /*Value 14*/
+
+ a[18] = vmulq_s32(g_ai4_ihevc_trans_32_18_03, eo0); //q2
+ a[22] = vmulq_s32(g_ai4_ihevc_trans_32_22_03, eo0); //q2
+ a[26] = vmulq_s32(g_ai4_ihevc_trans_32_26_03, eo0); //q2
+ a[30] = vmulq_s32(g_ai4_ihevc_trans_32_30_03, eo0); //q2
+
+ a[30] = vmlaq_s32(a[30], g_ai4_ihevc_trans_32_30_47, eo1);
+ a[26] = vmlaq_s32(a[26], g_ai4_ihevc_trans_32_26_47, eo1);
+ a[22] = vmlaq_s32(a[22], g_ai4_ihevc_trans_32_22_47, eo1);
+ a[18] = vmlaq_s32(a[18], g_ai4_ihevc_trans_32_18_47, eo1);
+
+ int32x2_t val_18 =
+ vadd_s32(vget_low_s32(a[18]), vget_high_s32(a[18]));
+ int32x2_t val_22 =
+ vadd_s32(vget_low_s32(a[22]), vget_high_s32(a[22]));
+ val_18 = vpadd_s32(val_18, val_22);
+ int32x2_t val_26 =
+ vadd_s32(vget_low_s32(a[26]), vget_high_s32(a[26]));
+ int32x2_t val_30 =
+ vadd_s32(vget_low_s32(a[30]), vget_high_s32(a[30]));
+ val_26 = vpadd_s32(val_26, val_30);
+
+ int16x4_t val__18 =
+ vrshrn_n_s32(vcombine_s32(val_18, val_26), 15); //q9 q12
+
+ vst1_lane_s16(pi2_dst + 18 * dst_strd, val__18, 0); /*Value 18*/
+ vst1_lane_s16(pi2_dst + 22 * dst_strd, val__18, 1); /*Value 22*/
+ vst1_lane_s16(pi2_dst + 26 * dst_strd, val__18, 2); /*Value 26*/
+ vst1_lane_s16(pi2_dst + 30 * dst_strd, val__18, 3); /*Value 30*/
+
+ /*Calculations for odd indexes*/
+ a[7] = vmulq_s32(g_ai4_ihevc_trans_32_7_03, o0); //q1
+ a[5] = vmulq_s32(g_ai4_ihevc_trans_32_5_03, o0); //q1
+ a[3] = vmulq_s32(g_ai4_ihevc_trans_32_3_03, o0); //q1
+ a[1] = vmulq_s32(g_ai4_ihevc_trans_32_1_03, o0); //q1
+
+ a[1] = vmlaq_s32(a[1], g_ai4_ihevc_trans_32_1_47, o1);
+ a[3] = vmlaq_s32(a[3], g_ai4_ihevc_trans_32_3_47, o1);
+ a[5] = vmlaq_s32(a[5], g_ai4_ihevc_trans_32_5_47, o1);
+ a[7] = vmlaq_s32(a[7], g_ai4_ihevc_trans_32_7_47, o1);
+
+ a[7] = vmlaq_s32(a[7], g_ai4_ihevc_trans_32_7_811, o2);
+ a[5] = vmlaq_s32(a[5], g_ai4_ihevc_trans_32_5_811, o2);
+ a[3] = vmlaq_s32(a[3], g_ai4_ihevc_trans_32_3_811, o2);
+ a[1] = vmlaq_s32(a[1], g_ai4_ihevc_trans_32_1_811, o2);
+
+ a[1] = vmlaq_s32(a[1], g_ai4_ihevc_trans_32_1_1215, o3);
+ int32x2_t val_1 = vadd_s32(vget_low_s32(a[1]), vget_high_s32(a[1]));
+ a[3] = vmlaq_s32(a[3], g_ai4_ihevc_trans_32_3_1215, o3);
+ int32x2_t val_3 = vadd_s32(vget_low_s32(a[3]), vget_high_s32(a[3]));
+ val_1 = vpadd_s32(val_1, val_3);
+ a[5] = vmlaq_s32(a[5], g_ai4_ihevc_trans_32_5_1215, o3);
+ int32x2_t val_5 = vadd_s32(vget_low_s32(a[5]), vget_high_s32(a[5]));
+ a[7] = vmlaq_s32(a[7], g_ai4_ihevc_trans_32_7_1215, o3);
+ int32x2_t val_7 = vadd_s32(vget_low_s32(a[7]), vget_high_s32(a[7]));
+ val_5 = vpadd_s32(val_5, val_7);
+
+ /*Shift*/
+ int16x4_t val__1 =
+ vrshrn_n_s32(vcombine_s32(val_1, val_5), 15); //q9 q12
+
+ /*Store*/
+ vst1_lane_s16(pi2_dst + 1 * dst_strd, val__1, 0); /*Value 1*/
+ vst1_lane_s16(pi2_dst + 3 * dst_strd, val__1, 1); /*Value 3*/
+ vst1_lane_s16(pi2_dst + 5 * dst_strd, val__1, 2); /*Value 5*/
+ vst1_lane_s16(pi2_dst + 7 * dst_strd, val__1, 3); /*Value 7*/
+
+ a[15] = vmulq_s32(g_ai4_ihevc_trans_32_15_03, o0); //q1
+ a[13] = vmulq_s32(g_ai4_ihevc_trans_32_13_03, o0); //q1
+ a[11] = vmulq_s32(g_ai4_ihevc_trans_32_11_03, o0); //q1
+ a[9] = vmulq_s32(g_ai4_ihevc_trans_32_9_03, o0); //q1
+
+ a[9] = vmlaq_s32(a[9], g_ai4_ihevc_trans_32_9_47, o1);
+ a[11] = vmlaq_s32(a[11], g_ai4_ihevc_trans_32_11_47, o1);
+ a[13] = vmlaq_s32(a[13], g_ai4_ihevc_trans_32_13_47, o1);
+ a[15] = vmlaq_s32(a[15], g_ai4_ihevc_trans_32_15_47, o1);
+
+ a[15] = vmlaq_s32(a[15], g_ai4_ihevc_trans_32_15_811, o2);
+ a[13] = vmlaq_s32(a[13], g_ai4_ihevc_trans_32_13_811, o2);
+ a[11] = vmlaq_s32(a[11], g_ai4_ihevc_trans_32_11_811, o2);
+ a[9] = vmlaq_s32(a[9], g_ai4_ihevc_trans_32_9_811, o2);
+
+ a[9] = vmlaq_s32(a[9], g_ai4_ihevc_trans_32_9_1215, o3);
+ int32x2_t val_9 = vadd_s32(vget_low_s32(a[9]), vget_high_s32(a[9]));
+ a[11] = vmlaq_s32(a[11], g_ai4_ihevc_trans_32_11_1215, o3);
+ int32x2_t val_11 =
+ vadd_s32(vget_low_s32(a[11]), vget_high_s32(a[11]));
+ val_9 = vpadd_s32(val_9, val_11);
+ a[13] = vmlaq_s32(a[13], g_ai4_ihevc_trans_32_13_1215, o3);
+ int32x2_t val_13 =
+ vadd_s32(vget_low_s32(a[13]), vget_high_s32(a[13]));
+ a[15] = vmlaq_s32(a[15], g_ai4_ihevc_trans_32_15_1215, o3);
+ int32x2_t val_15 =
+ vadd_s32(vget_low_s32(a[15]), vget_high_s32(a[15]));
+ val_13 = vpadd_s32(val_13, val_15);
+
+ int16x4_t val__9 =
+ vrshrn_n_s32(vcombine_s32(val_9, val_13), 15); //q9 q12
+
+ vst1_lane_s16(pi2_dst + 9 * dst_strd, val__9, 0); /*Value 9*/
+ vst1_lane_s16(pi2_dst + 11 * dst_strd, val__9, 1); /*Value 11*/
+ vst1_lane_s16(pi2_dst + 13 * dst_strd, val__9, 2); /*Value 13*/
+ vst1_lane_s16(pi2_dst + 15 * dst_strd, val__9, 3); /*Value 15*/
+
+ a[23] = vmulq_s32(g_ai4_ihevc_trans_32_23_03, o0); //q1
+ a[21] = vmulq_s32(g_ai4_ihevc_trans_32_21_03, o0); //q1
+ a[19] = vmulq_s32(g_ai4_ihevc_trans_32_19_03, o0); //q1
+ a[17] = vmulq_s32(g_ai4_ihevc_trans_32_17_03, o0); //q1
+
+ a[17] = vmlaq_s32(a[17], g_ai4_ihevc_trans_32_17_47, o1);
+ a[19] = vmlaq_s32(a[19], g_ai4_ihevc_trans_32_19_47, o1);
+ a[21] = vmlaq_s32(a[21], g_ai4_ihevc_trans_32_21_47, o1);
+ a[23] = vmlaq_s32(a[23], g_ai4_ihevc_trans_32_23_47, o1);
+
+ a[23] = vmlaq_s32(a[23], g_ai4_ihevc_trans_32_23_811, o2);
+ a[21] = vmlaq_s32(a[21], g_ai4_ihevc_trans_32_21_811, o2);
+ a[19] = vmlaq_s32(a[19], g_ai4_ihevc_trans_32_19_811, o2);
+ a[17] = vmlaq_s32(a[17], g_ai4_ihevc_trans_32_17_811, o2);
+
+ a[17] = vmlaq_s32(a[17], g_ai4_ihevc_trans_32_17_1215, o3);
+ int32x2_t val_17 =
+ vadd_s32(vget_low_s32(a[17]), vget_high_s32(a[17]));
+ a[19] = vmlaq_s32(a[19], g_ai4_ihevc_trans_32_19_1215, o3);
+ int32x2_t val_19 =
+ vadd_s32(vget_low_s32(a[19]), vget_high_s32(a[19]));
+ val_17 = vpadd_s32(val_17, val_19);
+ a[21] = vmlaq_s32(a[21], g_ai4_ihevc_trans_32_21_1215, o3);
+ int32x2_t val_21 =
+ vadd_s32(vget_low_s32(a[21]), vget_high_s32(a[21]));
+ a[23] = vmlaq_s32(a[23], g_ai4_ihevc_trans_32_23_1215, o3);
+ int32x2_t val_23 =
+ vadd_s32(vget_low_s32(a[23]), vget_high_s32(a[23]));
+ val_21 = vpadd_s32(val_21, val_23);
+
+ int16x4_t val__17 =
+ vrshrn_n_s32(vcombine_s32(val_17, val_21), 15); //q9 q12
+
+ vst1_lane_s16(pi2_dst + 17 * dst_strd, val__17, 0); /*Value 17*/
+ vst1_lane_s16(pi2_dst + 19 * dst_strd, val__17, 1); /*Value 19*/
+ vst1_lane_s16(pi2_dst + 21 * dst_strd, val__17, 2); /*Value 21*/
+ vst1_lane_s16(pi2_dst + 23 * dst_strd, val__17, 3); /*Value 23*/
+
+ a[31] = vmulq_s32(g_ai4_ihevc_trans_32_31_03, o0); //q10
+ a[29] = vmulq_s32(g_ai4_ihevc_trans_32_29_03, o0); //q1
+ a[27] = vmulq_s32(g_ai4_ihevc_trans_32_27_03, o0); //q1
+ a[25] = vmulq_s32(g_ai4_ihevc_trans_32_25_03, o0); //q1
+
+ a[25] = vmlaq_s32(a[25], g_ai4_ihevc_trans_32_25_47, o1);
+ a[27] = vmlaq_s32(a[27], g_ai4_ihevc_trans_32_27_47, o1);
+ a[29] = vmlaq_s32(a[29], g_ai4_ihevc_trans_32_29_47, o1);
+ a[31] = vmlaq_s32(a[31], g_ai4_ihevc_trans_32_31_47, o1);
+
+ a[31] = vmlaq_s32(a[31], g_ai4_ihevc_trans_32_31_811, o2);
+ a[29] = vmlaq_s32(a[29], g_ai4_ihevc_trans_32_29_811, o2);
+ a[27] = vmlaq_s32(a[27], g_ai4_ihevc_trans_32_27_811, o2);
+ a[25] = vmlaq_s32(a[25], g_ai4_ihevc_trans_32_25_811, o2);
+
+ a[25] = vmlaq_s32(a[25], g_ai4_ihevc_trans_32_25_1215, o3);
+ int32x2_t val_25 =
+ vadd_s32(vget_low_s32(a[25]), vget_high_s32(a[25]));
+ a[27] = vmlaq_s32(a[27], g_ai4_ihevc_trans_32_27_1215, o3);
+ int32x2_t val_27 =
+ vadd_s32(vget_low_s32(a[27]), vget_high_s32(a[27]));
+ val_25 = vpadd_s32(val_25, val_27);
+ a[29] = vmlaq_s32(a[29], g_ai4_ihevc_trans_32_29_1215, o3);
+ int32x2_t val_29 =
+ vadd_s32(vget_low_s32(a[29]), vget_high_s32(a[29]));
+ a[31] = vmlaq_s32(a[31], g_ai4_ihevc_trans_32_31_1215, o3);
+ int32x2_t val_31 =
+ vadd_s32(vget_low_s32(a[31]), vget_high_s32(a[31]));
+ val_29 = vpadd_s32(val_29, val_31);
+
+ int16x4_t val__25 =
+ vrshrn_n_s32(vcombine_s32(val_25, val_29), 15); //q9 q12
+
+ vst1_lane_s16(pi2_dst + 25 * dst_strd, val__25, 0); /*Value 25*/
+ vst1_lane_s16(pi2_dst + 27 * dst_strd, val__25, 1); /*Value 27*/
+ vst1_lane_s16(pi2_dst + 29 * dst_strd, val__25, 2); /*Value 29*/
+ vst1_lane_s16(pi2_dst + 31 * dst_strd, val__25, 3); /*Value 31*/
+
+ pi2_dst++;
+ }
+ }
+ return u4_blk_sad;
+}
diff --git a/common/arm/ihevc_sao_edge_offset_class2_chroma.s b/common/arm/ihevc_sao_edge_offset_class2_chroma.s
index f7ab3f8..36ffa81 100644
--- a/common/arm/ihevc_sao_edge_offset_class2_chroma.s
+++ b/common/arm/ihevc_sao_edge_offset_class2_chroma.s
@@ -838,12 +838,14 @@
SUBS r6,r6,#16 @Decrement the wd loop count by 16
- BLE RE_ASSINING_LOOP @Jump to re-assigning loop
+ CMP r6,#8
+ BLT RE_ASSINING_LOOP @Jump to re-assigning loop
LDR r7,[sp,#wd_offset] @Loads wd
LDR r0,[sp,#2] @Loads *pu1_src
SUB r7,r7,r6
ADD r0,r0,r7
BGT WD_16_HT_4_LOOP
+ BEQ WIDTH_RESIDUE
WIDTH_RESIDUE:
diff --git a/common/arm/ihevc_sao_edge_offset_class3_chroma.s b/common/arm/ihevc_sao_edge_offset_class3_chroma.s
index 9f4eb62..d193dd8 100644
--- a/common/arm/ihevc_sao_edge_offset_class3_chroma.s
+++ b/common/arm/ihevc_sao_edge_offset_class3_chroma.s
@@ -860,12 +860,14 @@
BNE SRC_LEFT_LOOP_WD_16_HT_4
SUBS r6,r6,#16 @Decrement the wd loop count by 16
- BLE RE_ASSINING_LOOP @Jump to re-assigning loop
+ CMP r6,#8
+ BLT RE_ASSINING_LOOP @Jump to re-assigning loop
LDR r7,[sp,#wd_offset] @Loads wd
LDR r0,[sp,#0x02] @Loads *pu1_src
SUB r7,r7,r6
ADD r0,r0,r7
BGT WD_16_HT_4_LOOP @If not equal jump to width_loop
+ BEQ WIDTH_RESIDUE @If residue remains jump to residue loop
WIDTH_RESIDUE:
LDR r7,[sp,#wd_offset] @Loads wd
diff --git a/common/arm64/ihevc_itrans_recon_32x32.s b/common/arm64/ihevc_itrans_recon_32x32.s
index 51646ac..8f98503 100644
--- a/common/arm64/ihevc_itrans_recon_32x32.s
+++ b/common/arm64/ihevc_itrans_recon_32x32.s
@@ -141,9 +141,6 @@
.extern g_ai2_ihevc_trans_32_transpose
-x5_addr: .word 0xfffff000
-x9_addr: .word 0xffff0000
-
.type ihevc_itrans_recon_32x32_av8, %function
ihevc_itrans_recon_32x32_av8:
@@ -176,8 +173,8 @@
// x10,x9,x11,x12
mov x9,#0xffffff00
mov x10,#0xfffffff0
- ldr w5, x5_addr
- ldr w7, x9_addr
+ mov w5,#0xfffff000
+ mov w7,#0xffff0000
cmp x12,x10
mov x20,#1
csel x14, x20, x14,hs
@@ -1588,8 +1585,8 @@
// sub x0,x0,#512
mov x11,#0xfffffff0
mov x5, #0xffffff00
- ldr w6, x5_addr
- ldr w9, x9_addr
+ mov w6,#0xfffff000
+ mov w9,#0xffff0000
// sub x1,x1,#2048
mov x4,x1
mov x10,#240
diff --git a/common/arm64/ihevc_sao_edge_offset_class2_chroma.s b/common/arm64/ihevc_sao_edge_offset_class2_chroma.s
index 0a8a748..dd4be6f 100644
--- a/common/arm64/ihevc_sao_edge_offset_class2_chroma.s
+++ b/common/arm64/ihevc_sao_edge_offset_class2_chroma.s
@@ -935,12 +935,14 @@
SUBS x6,x6,#16 //Decrement the wd loop count by 16
- BLE RE_ASSINING_LOOP //Jump to re-assigning loop
+ CMP x6,#8 //Check whether residue remains
+ BLT RE_ASSINING_LOOP //Jump to re-assigning loop
mov w7, w24 //Loads wd
mov x0, x27 //Loads *pu1_src
SUB x7,x7,x6
ADD x0,x0,x7
BGT WD_16_HT_4_LOOP
+ BEQ WIDTH_RESIDUE //If residue remains jump to residue loop
WIDTH_RESIDUE:
diff --git a/common/arm64/ihevc_sao_edge_offset_class3_chroma.s b/common/arm64/ihevc_sao_edge_offset_class3_chroma.s
index 2e145af..bfdf50c 100644
--- a/common/arm64/ihevc_sao_edge_offset_class3_chroma.s
+++ b/common/arm64/ihevc_sao_edge_offset_class3_chroma.s
@@ -944,12 +944,14 @@
BNE SRC_LEFT_LOOP_WD_16_HT_4
SUBS x6,x6,#16 //Decrement the wd loop count by 16
- BLE RE_ASSINING_LOOP //Jump to re-assigning loop
+ CMP x6,#8 //Check whether residue remains
+ BLT RE_ASSINING_LOOP //Jump to re-assigning loop
mov w7, w24 //Loads wd
mov x0, x28 //Loads *pu1_src
SUB x7,x7,x6
ADD x0,x0,x7
BGT WD_16_HT_4_LOOP //If not equal jump to width_loop
+ BEQ WIDTH_RESIDUE //If residue remains jump to residue loop
WIDTH_RESIDUE:
mov w7, w24 //Loads wd
diff --git a/common/ihevc_hbd_deblk_edge_filter.c b/common/ihevc_hbd_deblk_edge_filter.c
new file mode 100644
index 0000000..3cbfaec
--- /dev/null
+++ b/common/ihevc_hbd_deblk_edge_filter.c
@@ -0,0 +1,1235 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevc_deblk_edge_filter.c
+*
+* @brief
+* Contains function definitions for deblocking filters
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* - ihevc_hbd_deblk_luma_vert()
+* - ihevc_hbd_deblk_luma_horz()
+* - ihevc_hbd_deblk_chroma_vert()
+* - ihevc_deblk_422chroma_vert()
+* - ihevc_hbd_deblk_422chroma_vert()
+* - ihevc_hbd_deblk_chroma_horz()
+* - ihevc_deblk_422chroma_horz()
+* - ihevc_hbd_deblk_422chroma_horz()
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+#include <stdlib.h>
+#include <stdio.h>
+#include <assert.h>
+#include "ihevc_typedefs.h"
+#include "ihevc_macros.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_func_selector.h"
+#include "ihevc_deblk.h"
+#include "ihevc_deblk_tables.h"
+#include "ihevc_debug.h"
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Decision process and filtering for the luma block vertical edge for high bit depth.
+*
+* @par Description:
+* The decision process for the luma block vertical edge is carried out and
+* an appropriate filter is applied. The boundary filter strength, bs should
+* be greater than 0. The pcm flags and the transquant bypass flags should
+* be taken care of by the calling function.
+*
+* @param[in] pu2_src
+* Pointer to the src sample q(0,0)
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] bs
+* Boundary filter strength of q(0,0)
+*
+* @param[in] quant_param_p
+* quantization parameter of p block
+*
+* @param[in] quant_param_q
+* quantization parameter of p block
+*
+* @param[in] beta_offset_div2
+*
+*
+* @param[in] tc_offset_div2
+*
+*
+* @param[in] filter_flag_p
+* flag whether to filter the p block
+*
+* @param[in] filter_flag_q
+* flag whether to filter the q block
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+void ihevc_hbd_deblk_luma_vert(UWORD16 *pu2_src,
+ WORD32 src_strd,
+ WORD32 bs,
+ WORD32 quant_param_p,
+ WORD32 quant_param_q,
+ WORD32 beta_offset_div2,
+ WORD32 tc_offset_div2,
+ WORD32 filter_flag_p,
+ WORD32 filter_flag_q,
+ UWORD8 bit_depth)
+{
+ WORD32 qp_luma, beta_indx, tc_indx;
+ WORD32 beta, tc;
+ WORD32 dp0, dp3, dq0, dq3, d0, d3, dp, dq, d;
+ WORD32 d_sam0, d_sam3;
+ WORD32 de, dep, deq;
+ WORD32 row;
+ WORD32 tmp_p0, tmp_p1, tmp_p2, tmp_q0, tmp_q1, tmp_q2;
+ WORD32 delta, delta_p, delta_q;
+
+ ASSERT((bs > 0) && (bs <= 3));
+ ASSERT(filter_flag_p || filter_flag_q);
+
+ qp_luma = (quant_param_p + quant_param_q + 1) >> 1;
+ beta_indx = CLIP3(qp_luma + (beta_offset_div2 << 1), 0, 51);
+
+ /* BS based on implementation can take value 3 if it is intra/inter egde */
+ /* based on BS, tc index is calcuated by adding 2 * ( bs - 1) to QP and tc_offset */
+ /* for BS = 1 adding factor is (0*2), BS = 2 or 3 adding factor is (1*2) */
+ /* the above desired functionallity is achieved by doing (2*(bs>>1)) */
+
+ tc_indx = CLIP3(qp_luma + (2 * (bs >> 1)) + (tc_offset_div2 << 1), 0, 53);
+
+ beta = gai4_ihevc_beta_table[beta_indx] * (1 << (bit_depth - 8));
+ tc = gai4_ihevc_tc_table[tc_indx] * (1 << (bit_depth - 8));
+ if(0 == tc)
+ {
+ return;
+ }
+
+ dq0 = ABS(pu2_src[2] - 2 * pu2_src[1] + pu2_src[0]);
+ dq3 = ABS(pu2_src[3 * src_strd + 2] - 2 * pu2_src[3 * src_strd + 1]
+ + pu2_src[3 * src_strd + 0]);
+ dp0 = ABS(pu2_src[-3] - 2 * pu2_src[-2] + pu2_src[-1]);
+ dp3 = ABS(pu2_src[3 * src_strd - 3] - 2 * pu2_src[3 * src_strd - 2]
+ + pu2_src[3 * src_strd - 1]);
+
+ d0 = dp0 + dq0;
+ d3 = dp3 + dq3;
+
+ dp = dp0 + dp3;
+ dq = dq0 + dq3;
+
+ d = d0 + d3;
+
+ de = 0;
+ dep = 0;
+ deq = 0;
+
+ if(d < beta)
+ {
+ d_sam0 = 0;
+ if((2 * d0 < (beta >> 2))
+ && (ABS(pu2_src[3] - pu2_src[0]) + ABS(pu2_src[-1] - pu2_src[-4])
+ < (beta >> 3))
+ && ABS(pu2_src[0] - pu2_src[-1]) < ((5 * tc + 1) >> 1))
+ {
+ d_sam0 = 1;
+ }
+
+ pu2_src += 3 * src_strd;
+ d_sam3 = 0;
+ if((2 * d3 < (beta >> 2))
+ && (ABS(pu2_src[3] - pu2_src[0]) + ABS(pu2_src[-1] - pu2_src[-4])
+ < (beta >> 3))
+ && ABS(pu2_src[0] - pu2_src[-1]) < ((5 * tc + 1) >> 1))
+ {
+ d_sam3 = 1;
+ }
+ pu2_src -= 3 * src_strd;
+
+ de = (d_sam0 == 1 && d_sam3 == 1) ? 2 : 1;
+ dep = (dp < (beta + (beta >> 1)) >> 3) ? 1 : 0;
+ deq = (dq < (beta + (beta >> 1)) >> 3) ? 1 : 0;
+ if(tc <= 1)
+ {
+ dep = 0;
+ deq = 0;
+ }
+ }
+
+ if(de != 0)
+ {
+ for(row = 0; row < 4; row++)
+ {
+ tmp_p0 = pu2_src[-1];
+ tmp_p1 = pu2_src[-2];
+ tmp_p2 = pu2_src[-3];
+
+ tmp_q0 = pu2_src[0];
+ tmp_q1 = pu2_src[1];
+ tmp_q2 = pu2_src[2];
+
+ if(de == 2)
+ {
+ tmp_q0 = CLIP3((pu2_src[2] + 2 * pu2_src[1] +
+ 2 * pu2_src[0] + 2 * pu2_src[-1] +
+ pu2_src[-2] + 4) >> 3,
+ pu2_src[0] - 2 * tc,
+ pu2_src[0] + 2 * tc);
+
+ tmp_q1 = CLIP3((pu2_src[2] + pu2_src[1] + pu2_src[0] +
+ pu2_src[-1] + 2) >> 2,
+ pu2_src[1] - 2 * tc,
+ pu2_src[1] + 2 * tc);
+
+ tmp_q2 = CLIP3((2 * pu2_src[3] + 3 * pu2_src[2] +
+ pu2_src[1] + pu2_src[0] +
+ pu2_src[-1] + 4) >> 3,
+ pu2_src[2] - 2 * tc,
+ pu2_src[2] + 2 * tc);
+
+ tmp_p0 = CLIP3((pu2_src[1] + 2 * pu2_src[0] +
+ 2 * pu2_src[-1] + 2 * pu2_src[-2] +
+ pu2_src[-3] + 4) >> 3,
+ pu2_src[-1] - 2 * tc,
+ pu2_src[-1] + 2 * tc);
+
+ tmp_p1 = CLIP3((pu2_src[0] + pu2_src[-1] +
+ pu2_src[-2] + pu2_src[-3] + 2) >> 2,
+ pu2_src[-2] - 2 * tc,
+ pu2_src[-2] + 2 * tc);
+
+ tmp_p2 = CLIP3((pu2_src[0] + pu2_src[-1] +
+ pu2_src[-2] + 3 * pu2_src[-3] +
+ 2 * pu2_src[-4] + 4) >> 3,
+ pu2_src[-3] - 2 * tc,
+ pu2_src[-3] + 2 * tc);
+ }
+ else
+ {
+ delta = (9 * (pu2_src[0] - pu2_src[-1]) -
+ 3 * (pu2_src[1] - pu2_src[-2]) + 8) >> 4;
+ if(ABS(delta) < 10 * tc)
+ {
+ delta = CLIP3(delta, -tc, tc);
+
+ tmp_p0 = CLIP3(pu2_src[-1] + delta, 0, ((1 << bit_depth) - 1));
+ tmp_q0 = CLIP3(pu2_src[0] - delta, 0, ((1 << bit_depth) - 1));
+ if(dep == 1)
+ {
+ delta_p = CLIP3((((pu2_src[-3] + pu2_src[-1] + 1) >> 1)
+ - pu2_src[-2] + delta) >> 1,
+ -(tc >> 1),
+ (tc >> 1));
+ tmp_p1 = CLIP3(pu2_src[-2] + delta_p, 0, ((1 << bit_depth) - 1));
+ }
+
+ if(deq == 1)
+ {
+ delta_q = CLIP3((((pu2_src[2] + pu2_src[0] + 1) >> 1)
+ - pu2_src[1] - delta) >> 1,
+ -(tc >> 1),
+ (tc >> 1));
+ tmp_q1 = CLIP3(pu2_src[1] + delta_q, 0, ((1 << bit_depth) - 1));
+ }
+ }
+ }
+
+ if(filter_flag_p != 0)
+ {
+ pu2_src[-3] = tmp_p2;
+ pu2_src[-2] = tmp_p1;
+ pu2_src[-1] = tmp_p0;
+ }
+
+ if(filter_flag_q != 0)
+ {
+ pu2_src[0] = tmp_q0;
+ pu2_src[1] = tmp_q1;
+ pu2_src[2] = tmp_q2;
+ }
+
+ pu2_src += src_strd;
+ }
+ }
+}
+/**
+*******************************************************************************
+*
+* @brief
+*
+* Decision process and filtering for the luma block horizontal edge for high bit depth
+*
+* @par Description:
+* The decision process for the luma block horizontal edge is carried out
+* and an appropriate filter is applied. The boundary filter strength, bs
+* should be greater than 0. The pcm flags and the transquant bypass flags
+* should be taken care of by the calling function.
+*
+* @param[in] pu1_src
+* Pointer to the src sample q(0,0)
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] bs
+* Boundary filter strength of q(0,0)
+*
+* @param[in] quant_param_p
+* quantization parameter of p block
+*
+* @param[in] quant_param_q
+* quantization parameter of p block
+*
+* @param[in] beta_offset_div2
+*
+*
+* @param[in] tc_offset_div2
+*
+*
+* @param[in] filter_flag_p
+* flag whether to filter the p block
+*
+* @param[in] filter_flag_q
+* flag whether to filter the q block
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+void ihevc_hbd_deblk_luma_horz(UWORD16 *pu2_src,
+ WORD32 src_strd,
+ WORD32 bs,
+ WORD32 quant_param_p,
+ WORD32 quant_param_q,
+ WORD32 beta_offset_div2,
+ WORD32 tc_offset_div2,
+ WORD32 filter_flag_p,
+ WORD32 filter_flag_q,
+ UWORD8 bit_depth)
+{
+ WORD32 qp_luma, beta_indx, tc_indx;
+ WORD32 beta, tc;
+ WORD32 dp0, dp3, dq0, dq3, d0, d3, dp, dq, d;
+ WORD32 d_sam0, d_sam3;
+ WORD32 de, dep, deq;
+ WORD32 col;
+ WORD32 tmp_p0, tmp_p1, tmp_p2, tmp_q0, tmp_q1, tmp_q2;
+ WORD32 delta, delta_p, delta_q;
+
+ ASSERT((bs > 0));
+ ASSERT(filter_flag_p || filter_flag_q);
+
+ qp_luma = (quant_param_p + quant_param_q + 1) >> 1;
+ beta_indx = CLIP3(qp_luma + (beta_offset_div2 << 1), 0, 51);
+
+ /* BS based on implementation can take value 3 if it is intra/inter egde */
+ /* based on BS, tc index is calcuated by adding 2 * ( bs - 1) to QP and tc_offset */
+ /* for BS = 1 adding factor is (0*2), BS = 2 or 3 adding factor is (1*2) */
+ /* the above desired functionallity is achieved by doing (2*(bs>>1)) */
+
+ tc_indx = CLIP3(qp_luma + 2 * (bs >> 1) + (tc_offset_div2 << 1), 0, 53);
+
+ beta = gai4_ihevc_beta_table[beta_indx] * (1 << (bit_depth - 8));
+ tc = gai4_ihevc_tc_table[tc_indx] * (1 << (bit_depth - 8));
+ if(0 == tc)
+ {
+ return;
+ }
+
+ dq0 = ABS(pu2_src[2 * src_strd] - 2 * pu2_src[1 * src_strd] +
+ pu2_src[0 * src_strd]);
+
+ dq3 = ABS(pu2_src[3 + 2 * src_strd] - 2 * pu2_src[3 + 1 * src_strd] +
+ pu2_src[3 + 0 * src_strd]);
+
+ dp0 = ABS(pu2_src[-3 * src_strd] - 2 * pu2_src[-2 * src_strd] +
+ pu2_src[-1 * src_strd]);
+
+ dp3 = ABS(pu2_src[3 - 3 * src_strd] - 2 * pu2_src[3 - 2 * src_strd] +
+ pu2_src[3 - 1 * src_strd]);
+
+ d0 = dp0 + dq0;
+ d3 = dp3 + dq3;
+
+ dp = dp0 + dp3;
+ dq = dq0 + dq3;
+
+ d = d0 + d3;
+
+ de = 0;
+ dep = 0;
+ deq = 0;
+
+ if(d < beta)
+ {
+ d_sam0 = 0;
+ if((2 * d0 < (beta >> 2))
+ && (ABS(pu2_src[3 * src_strd] - pu2_src[0 * src_strd]) +
+ ABS(pu2_src[-1 * src_strd] - pu2_src[-4 * src_strd])
+ < (beta >> 3))
+ && ABS(pu2_src[0 * src_strd] - pu2_src[-1 * src_strd])
+ < ((5 * tc + 1) >> 1))
+ {
+ d_sam0 = 1;
+ }
+
+ pu2_src += 3;
+ d_sam3 = 0;
+ if((2 * d3 < (beta >> 2))
+ && (ABS(pu2_src[3 * src_strd] - pu2_src[0 * src_strd]) +
+ ABS(pu2_src[-1 * src_strd] - pu2_src[-4 * src_strd])
+ < (beta >> 3))
+ && ABS(pu2_src[0 * src_strd] - pu2_src[-1 * src_strd])
+ < ((5 * tc + 1) >> 1))
+ {
+ d_sam3 = 1;
+ }
+ pu2_src -= 3;
+
+ de = (d_sam0 == 1 && d_sam3 == 1) ? 2 : 1;
+ dep = (dp < ((beta + (beta >> 1)) >> 3)) ? 1 : 0;
+ deq = (dq < ((beta + (beta >> 1)) >> 3)) ? 1 : 0;
+ if(tc <= 1)
+ {
+ dep = 0;
+ deq = 0;
+ }
+ }
+
+ if(de != 0)
+ {
+ for(col = 0; col < 4; col++)
+ {
+ tmp_p0 = pu2_src[-1 * src_strd];
+ tmp_p1 = pu2_src[-2 * src_strd];
+ tmp_p2 = pu2_src[-3 * src_strd];
+
+ tmp_q0 = pu2_src[0 * src_strd];
+ tmp_q1 = pu2_src[1 * src_strd];
+ tmp_q2 = pu2_src[2 * src_strd];
+ if(de == 2)
+ {
+ tmp_q0 = CLIP3((pu2_src[2 * src_strd] +
+ 2 * pu2_src[1 * src_strd] +
+ 2 * pu2_src[0 * src_strd] +
+ 2 * pu2_src[-1 * src_strd] +
+ pu2_src[-2 * src_strd] + 4) >> 3,
+ pu2_src[0 * src_strd] - 2 * tc,
+ pu2_src[0 * src_strd] + 2 * tc);
+
+ tmp_q1 = CLIP3((pu2_src[2 * src_strd] +
+ pu2_src[1 * src_strd] +
+ pu2_src[0 * src_strd] +
+ pu2_src[-1 * src_strd] + 2) >> 2,
+ pu2_src[1 * src_strd] - 2 * tc,
+ pu2_src[1 * src_strd] + 2 * tc);
+
+ tmp_q2 = CLIP3((2 * pu2_src[3 * src_strd] +
+ 3 * pu2_src[2 * src_strd] +
+ pu2_src[1 * src_strd] +
+ pu2_src[0 * src_strd] +
+ pu2_src[-1 * src_strd] + 4) >> 3,
+ pu2_src[2 * src_strd] - 2 * tc,
+ pu2_src[2 * src_strd] + 2 * tc);
+
+ tmp_p0 = CLIP3((pu2_src[1 * src_strd] +
+ 2 * pu2_src[0 * src_strd] +
+ 2 * pu2_src[-1 * src_strd] +
+ 2 * pu2_src[-2 * src_strd] +
+ pu2_src[-3 * src_strd] + 4) >> 3,
+ pu2_src[-1 * src_strd] - 2 * tc,
+ pu2_src[-1 * src_strd] + 2 * tc);
+
+ tmp_p1 = CLIP3((pu2_src[0 * src_strd] +
+ pu2_src[-1 * src_strd] +
+ pu2_src[-2 * src_strd] +
+ pu2_src[-3 * src_strd] + 2) >> 2,
+ pu2_src[-2 * src_strd] - 2 * tc,
+ pu2_src[-2 * src_strd] + 2 * tc);
+
+ tmp_p2 = CLIP3((pu2_src[0 * src_strd] +
+ pu2_src[-1 * src_strd] +
+ pu2_src[-2 * src_strd] +
+ 3 * pu2_src[-3 * src_strd] +
+ 2 * pu2_src[-4 * src_strd] + 4) >> 3,
+ pu2_src[-3 * src_strd] - 2 * tc,
+ pu2_src[-3 * src_strd] + 2 * tc);
+ }
+ else
+ {
+ delta = (9 * (pu2_src[0 * src_strd] - pu2_src[-1 * src_strd]) -
+ 3 * (pu2_src[1 * src_strd] - pu2_src[-2 * src_strd]) +
+ 8) >> 4;
+ if(ABS(delta) < 10 * tc)
+ {
+ delta = CLIP3(delta, -tc, tc);
+ tmp_p0 = CLIP3(pu2_src[-1 * src_strd] + delta, 0, ((1 << bit_depth) - 1));
+ tmp_q0 = CLIP3(pu2_src[0 * src_strd] - delta, 0, ((1 << bit_depth) - 1));
+ if(dep == 1)
+ {
+ delta_p = CLIP3((((pu2_src[-3 * src_strd] +
+ pu2_src[-1 * src_strd] + 1) >> 1) -
+ pu2_src[-2 * src_strd] + delta) >> 1,
+ -(tc >> 1),
+ (tc >> 1));
+ tmp_p1 = CLIP3(pu2_src[-2 * src_strd] + delta_p, 0, ((1 << bit_depth) - 1));
+ }
+
+ if(deq == 1)
+ {
+ delta_q = CLIP3((((pu2_src[2 * src_strd] +
+ pu2_src[0 * src_strd] + 1) >> 1) -
+ pu2_src[1 * src_strd] - delta) >> 1,
+ -(tc >> 1),
+ (tc >> 1));
+ tmp_q1 = CLIP3(pu2_src[1 * src_strd] + delta_q, 0, ((1 << bit_depth) - 1));
+ }
+ }
+ }
+
+ if(filter_flag_p != 0)
+ {
+ pu2_src[-3 * src_strd] = tmp_p2;
+ pu2_src[-2 * src_strd] = tmp_p1;
+ pu2_src[-1 * src_strd] = tmp_p0;
+ }
+
+ if(filter_flag_q != 0)
+ {
+ pu2_src[0 * src_strd] = tmp_q0;
+ pu2_src[1 * src_strd] = tmp_q1;
+ pu2_src[2 * src_strd] = tmp_q2;
+ }
+
+ pu2_src += 1;
+ }
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Filtering for the chroma block vertical edge.
+*
+* @par Description:
+* Filter for chroma vertical edge. The boundary filter strength, bs
+* should be greater than 1. The pcm flags and the transquant bypass flags
+* should be taken care of by the calling function.
+*
+* @param[in] pu2_src
+* Pointer to the src sample q(0,0)
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] bs
+* Boundary filter strength of q(0,0)
+*
+* @param[in] quant_param_p
+* quantization parameter of p block
+*
+* @param[in] quant_param_q
+* quantization parameter of p block
+*
+* @param[in] beta_offset_div2
+*
+*
+* @param[in] tc_offset_div2
+*
+*
+* @param[in] filter_flag_p
+* flag whether to filter the p block
+*
+* @param[in] filter_flag_q
+* flag whether to filter the q block
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+void ihevc_hbd_deblk_chroma_vert(UWORD16 *pu2_src,
+ WORD32 src_strd,
+ WORD32 quant_param_p,
+ WORD32 quant_param_q,
+ WORD32 qp_offset_u,
+ WORD32 qp_offset_v,
+ WORD32 tc_offset_div2,
+ WORD32 filter_flag_p,
+ WORD32 filter_flag_q,
+ UWORD8 bit_depth)
+{
+ WORD32 qp_indx_u, qp_chroma_u;
+ WORD32 qp_indx_v, qp_chroma_v;
+ WORD32 tc_indx_u, tc_u;
+ WORD32 tc_indx_v, tc_v;
+ WORD32 delta_u, tmp_p0_u, tmp_q0_u;
+ WORD32 delta_v, tmp_p0_v, tmp_q0_v;
+ WORD32 row;
+
+ ASSERT(filter_flag_p || filter_flag_q);
+
+ /* chroma processing is done only if BS is 2 */
+ /* this function is assumed to be called only if BS is 2 */
+ qp_indx_u = qp_offset_u + ((quant_param_p + quant_param_q + 1) >> 1);
+ qp_chroma_u = qp_indx_u < 0 ? qp_indx_u : (qp_indx_u > 57 ? qp_indx_u - 6 : gai4_ihevc_qp_table[qp_indx_u]);
+
+ qp_indx_v = qp_offset_v + ((quant_param_p + quant_param_q + 1) >> 1);
+ qp_chroma_v = qp_indx_v < 0 ? qp_indx_v : (qp_indx_v > 57 ? qp_indx_v - 6 : gai4_ihevc_qp_table[qp_indx_v]);
+
+ tc_indx_u = CLIP3(qp_chroma_u + 2 + (tc_offset_div2 << 1), 0, 53);
+ tc_u = gai4_ihevc_tc_table[tc_indx_u] * (1 << (bit_depth - 8));
+
+ tc_indx_v = CLIP3(qp_chroma_v + 2 + (tc_offset_div2 << 1), 0, 53);
+ tc_v = gai4_ihevc_tc_table[tc_indx_v] * (1 << (bit_depth - 8));
+
+ if(0 == tc_u && 0 == tc_v)
+ {
+ return;
+ }
+
+ for(row = 0; row < 4; row++)
+ {
+ delta_u = CLIP3((((pu2_src[0] - pu2_src[-2]) << 2) +
+ pu2_src[-4] - pu2_src[2] + 4) >> 3,
+ -tc_u, tc_u);
+ tmp_p0_u = CLIP3(pu2_src[-2] + delta_u, 0, ((1 << bit_depth) - 1));
+ tmp_q0_u = CLIP3(pu2_src[0] - delta_u, 0, ((1 << bit_depth) - 1));
+
+ delta_v = CLIP3((((pu2_src[1] - pu2_src[-1]) << 2) +
+ pu2_src[-3] - pu2_src[3] + 4) >> 3,
+ -tc_v, tc_v);
+ tmp_p0_v = CLIP3(pu2_src[-1] + delta_v, 0, ((1 << bit_depth) - 1));
+ tmp_q0_v = CLIP3(pu2_src[1] - delta_v, 0, ((1 << bit_depth) - 1));
+ if(filter_flag_p != 0)
+ {
+ pu2_src[-2] = tmp_p0_u;
+ pu2_src[-1] = tmp_p0_v;
+ }
+
+ if(filter_flag_q != 0)
+ {
+ pu2_src[0] = tmp_q0_u;
+ pu2_src[1] = tmp_q0_v;
+ }
+
+ pu2_src += src_strd;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Filtering for the chroma block vertical edge.
+*
+* @par Description:
+* Filter for chroma vertical edge. The boundary filter strength, bs
+* should be greater than 1. The pcm flags and the transquant bypass flags
+* should be taken care of by the calling function.
+*
+* @param[in] pu1_src
+* Pointer to the src sample q(0,0)
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] bs
+* Boundary filter strength of q(0,0)
+*
+* @param[in] quant_param_p
+* quantization parameter of p block
+*
+* @param[in] quant_param_q
+* quantization parameter of p block
+*
+* @param[in] beta_offset_div2
+*
+*
+* @param[in] tc_offset_div2
+*
+*
+* @param[in] filter_flag_p
+* flag whether to filter the p block
+*
+* @param[in] filter_flag_q
+* flag whether to filter the q block
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+void ihevc_deblk_422chroma_vert(UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 quant_param_p,
+ WORD32 quant_param_q,
+ WORD32 qp_offset_u,
+ WORD32 qp_offset_v,
+ WORD32 tc_offset_div2,
+ WORD32 filter_flag_p,
+ WORD32 filter_flag_q)
+{
+ WORD32 qp_indx_u, qp_chroma_u;
+ WORD32 qp_indx_v, qp_chroma_v;
+ WORD32 tc_indx_u, tc_u;
+ WORD32 tc_indx_v, tc_v;
+ WORD32 delta_u, tmp_p0_u, tmp_q0_u;
+ WORD32 delta_v, tmp_p0_v, tmp_q0_v;
+ WORD32 row;
+
+ ASSERT(filter_flag_p || filter_flag_q);
+
+ /* chroma processing is done only if BS is 2 */
+ /* this function is assumed to be called only if BS is 2 */
+ qp_indx_u = qp_offset_u + ((quant_param_p + quant_param_q + 1) >> 1);
+ qp_chroma_u = MIN(qp_indx_u, 51);
+
+ qp_indx_v = qp_offset_v + ((quant_param_p + quant_param_q + 1) >> 1);
+ qp_chroma_v = MIN(qp_indx_v, 51);
+
+ tc_indx_u = CLIP3(qp_chroma_u + 2 + (tc_offset_div2 << 1), 0, 53);
+ tc_u = gai4_ihevc_tc_table[tc_indx_u];
+
+ tc_indx_v = CLIP3(qp_chroma_v + 2 + (tc_offset_div2 << 1), 0, 53);
+ tc_v = gai4_ihevc_tc_table[tc_indx_v];
+
+ if(0 == tc_u && 0 == tc_v)
+ {
+ return;
+ }
+
+ for(row = 0; row < 4; row++)
+ {
+ delta_u = CLIP3((((pu1_src[0] - pu1_src[-2]) << 2) +
+ pu1_src[-4] - pu1_src[2] + 4) >> 3,
+ -tc_u, tc_u);
+
+ tmp_p0_u = CLIP_U8(pu1_src[-2] + delta_u);
+ tmp_q0_u = CLIP_U8(pu1_src[0] - delta_u);
+
+ delta_v = CLIP3((((pu1_src[1] - pu1_src[-1]) << 2) +
+ pu1_src[-3] - pu1_src[3] + 4) >> 3,
+ -tc_v, tc_v);
+
+ tmp_p0_v = CLIP_U8(pu1_src[-1] + delta_v);
+ tmp_q0_v = CLIP_U8(pu1_src[1] - delta_v);
+
+ if(filter_flag_p != 0)
+ {
+ pu1_src[-2] = tmp_p0_u;
+ pu1_src[-1] = tmp_p0_v;
+ }
+
+ if(filter_flag_q != 0)
+ {
+ pu1_src[0] = tmp_q0_u;
+ pu1_src[1] = tmp_q0_v;
+ }
+
+ pu1_src += src_strd;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Filtering for the chroma block vertical edge.
+*
+* @par Description:
+* Filter for chroma vertical edge. The boundary filter strength, bs
+* should be greater than 1. The pcm flags and the transquant bypass flags
+* should be taken care of by the calling function.
+*
+* @param[in] pu2_src
+* Pointer to the src sample q(0,0)
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] bs
+* Boundary filter strength of q(0,0)
+*
+* @param[in] quant_param_p
+* quantization parameter of p block
+*
+* @param[in] quant_param_q
+* quantization parameter of p block
+*
+* @param[in] beta_offset_div2
+*
+*
+* @param[in] tc_offset_div2
+*
+*
+* @param[in] filter_flag_p
+* flag whether to filter the p block
+*
+* @param[in] filter_flag_q
+* flag whether to filter the q block
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+void ihevc_hbd_deblk_422chroma_vert(UWORD16 *pu2_src,
+ WORD32 src_strd,
+ WORD32 quant_param_p,
+ WORD32 quant_param_q,
+ WORD32 qp_offset_u,
+ WORD32 qp_offset_v,
+ WORD32 tc_offset_div2,
+ WORD32 filter_flag_p,
+ WORD32 filter_flag_q,
+ UWORD8 bit_depth)
+{
+ WORD32 qp_indx_u, qp_chroma_u;
+ WORD32 qp_indx_v, qp_chroma_v;
+ WORD32 tc_indx_u, tc_u;
+ WORD32 tc_indx_v, tc_v;
+ WORD32 delta_u, tmp_p0_u, tmp_q0_u;
+ WORD32 delta_v, tmp_p0_v, tmp_q0_v;
+ WORD32 row;
+
+ ASSERT(filter_flag_p || filter_flag_q);
+
+ /* chroma processing is done only if BS is 2 */
+ /* this function is assumed to be called only if BS is 2 */
+ qp_indx_u = qp_offset_u + ((quant_param_p + quant_param_q + 1) >> 1);
+ qp_chroma_u = MIN(qp_indx_u, 51);
+
+ qp_indx_v = qp_offset_v + ((quant_param_p + quant_param_q + 1) >> 1);
+ qp_chroma_v = MIN(qp_indx_v, 51);
+
+ tc_indx_u = CLIP3(qp_chroma_u + 2 + (tc_offset_div2 << 1), 0, 53);
+ tc_u = gai4_ihevc_tc_table[tc_indx_u] * (1 << (bit_depth - 8));
+
+ tc_indx_v = CLIP3(qp_chroma_v + 2 + (tc_offset_div2 << 1), 0, 53);
+ tc_v = gai4_ihevc_tc_table[tc_indx_v] * (1 << (bit_depth - 8));
+
+ if(0 == tc_u && 0 == tc_v)
+ {
+ return;
+ }
+
+ for(row = 0; row < 4; row++)
+ {
+ delta_u = CLIP3((((pu2_src[0] - pu2_src[-2]) << 2) +
+ pu2_src[-4] - pu2_src[2] + 4) >> 3,
+ -tc_u, tc_u);
+ tmp_p0_u = CLIP3(pu2_src[-2] + delta_u, 0, ((1 << bit_depth) - 1));
+ tmp_q0_u = CLIP3(pu2_src[0] - delta_u, 0, ((1 << bit_depth) - 1));
+
+ delta_v = CLIP3((((pu2_src[1] - pu2_src[-1]) << 2) +
+ pu2_src[-3] - pu2_src[3] + 4) >> 3,
+ -tc_v, tc_v);
+ tmp_p0_v = CLIP3(pu2_src[-1] + delta_v, 0, ((1 << bit_depth) - 1));
+ tmp_q0_v = CLIP3(pu2_src[1] - delta_v, 0, ((1 << bit_depth) - 1));
+ if(filter_flag_p != 0)
+ {
+ pu2_src[-2] = tmp_p0_u;
+ pu2_src[-1] = tmp_p0_v;
+ }
+
+ if(filter_flag_q != 0)
+ {
+ pu2_src[0] = tmp_q0_u;
+ pu2_src[1] = tmp_q0_v;
+ }
+
+ pu2_src += src_strd;
+ }
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Filtering for the chroma block horizontal edge.
+*
+* @par Description:
+* Filter for chroma horizontal edge. The boundary filter strength, bs
+* should be greater than 1. The pcm flags and the transquant bypass flags
+* should be taken care of by the calling function.
+*
+* @param[in] pu1_src
+* Pointer to the src sample q(0,0)
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] bs
+* Boundary filter strength of q(0,0)
+*
+* @param[in] quant_param_p
+* quantization parameter of p block
+*
+* @param[in] quant_param_q
+* quantization parameter of p block
+*
+* @param[in] beta_offset_div2
+*
+*
+* @param[in] tc_offset_div2
+*
+*
+* @param[in] filter_flag_p
+* flag whether to filter the p block
+*
+* @param[in] filter_flag_q
+* flag whether to filter the q block
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+void ihevc_deblk_422chroma_horz
+ (
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 quant_param_p,
+ WORD32 quant_param_q,
+ WORD32 qp_offset_u,
+ WORD32 qp_offset_v,
+ WORD32 tc_offset_div2,
+ WORD32 filter_flag_p,
+ WORD32 filter_flag_q
+ )
+{
+ WORD32 qp_indx_u, qp_chroma_u;
+ WORD32 qp_indx_v, qp_chroma_v;
+ WORD32 tc_indx_u, tc_u;
+ WORD32 tc_indx_v, tc_v;
+ WORD32 tc;
+
+ WORD32 delta, tmp_p0, tmp_q0;
+ WORD32 col;
+
+ ASSERT(filter_flag_p || filter_flag_q);
+
+ /* chroma processing is done only if BS is 2 */
+ /* this function is assumed to be called only if BS is 2 */
+ qp_indx_u = qp_offset_u + ((quant_param_p + quant_param_q + 1) >> 1);
+ qp_chroma_u = MIN(qp_indx_u, 51);
+
+ qp_indx_v = qp_offset_v + ((quant_param_p + quant_param_q + 1) >> 1);
+ qp_chroma_v = MIN(qp_indx_v, 51);
+
+ tc_indx_u = CLIP3(qp_chroma_u + 2 + (tc_offset_div2 << 1), 0, 53);
+ tc_u = gai4_ihevc_tc_table[tc_indx_u];
+
+ tc_indx_v = CLIP3(qp_chroma_v + 2 + (tc_offset_div2 << 1), 0, 53);
+ tc_v = gai4_ihevc_tc_table[tc_indx_v];
+
+ if(0 == tc_u && 0 == tc_v)
+ {
+ return;
+ }
+
+ for(col = 0; col < 8; col++)
+ {
+ tc = (col & 1) ? tc_v : tc_u;
+ delta = CLIP3((((pu1_src[0 * src_strd] -
+ pu1_src[-1 * src_strd]) << 2) +
+ pu1_src[-2 * src_strd] -
+ pu1_src[1 * src_strd] + 4) >> 3,
+ -tc, tc);
+
+ tmp_p0 = CLIP_U8(pu1_src[-1 * src_strd] + delta);
+ tmp_q0 = CLIP_U8(pu1_src[0 * src_strd] - delta);
+
+ if(filter_flag_p != 0)
+ {
+ pu1_src[-1 * src_strd] = tmp_p0;
+ }
+
+ if(filter_flag_q != 0)
+ {
+ pu1_src[0 * src_strd] = tmp_q0;
+ }
+
+ pu1_src += 1;
+ }
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* Filtering for the chroma block horizontal edge.
+*
+* @par Description:
+* Filter for chroma horizontal edge. The boundary filter strength, bs
+* should be greater than 1. The pcm flags and the transquant bypass flags
+* should be taken care of by the calling function.
+*
+* @param[in] pu2_src
+* Pointer to the src sample q(0,0)
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] bs
+* Boundary filter strength of q(0,0)
+*
+* @param[in] quant_param_p
+* quantization parameter of p block
+*
+* @param[in] quant_param_q
+* quantization parameter of p block
+*
+* @param[in] beta_offset_div2
+*
+*
+* @param[in] tc_offset_div2
+*
+*
+* @param[in] filter_flag_p
+* flag whether to filter the p block
+*
+* @param[in] filter_flag_q
+* flag whether to filter the q block
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+void ihevc_hbd_deblk_chroma_horz(UWORD16 *pu2_src,
+ WORD32 src_strd,
+ WORD32 quant_param_p,
+ WORD32 quant_param_q,
+ WORD32 qp_offset_u,
+ WORD32 qp_offset_v,
+ WORD32 tc_offset_div2,
+ WORD32 filter_flag_p,
+ WORD32 filter_flag_q,
+ UWORD8 bit_depth)
+{
+ WORD32 qp_indx_u, qp_chroma_u;
+ WORD32 qp_indx_v, qp_chroma_v;
+ WORD32 tc_indx_u, tc_u;
+ WORD32 tc_indx_v, tc_v;
+ WORD32 tc;
+
+ WORD32 delta, tmp_p0, tmp_q0;
+ WORD32 col;
+
+ ASSERT(filter_flag_p || filter_flag_q);
+
+ /* chroma processing is done only if BS is 2 */
+ /* this function is assumed to be called only if BS is 2 */
+ qp_indx_u = qp_offset_u + ((quant_param_p + quant_param_q + 1) >> 1);
+ qp_chroma_u = qp_indx_u < 0 ? qp_indx_u : (qp_indx_u > 57 ? qp_indx_u - 6 : gai4_ihevc_qp_table[qp_indx_u]);
+
+ qp_indx_v = qp_offset_v + ((quant_param_p + quant_param_q + 1) >> 1);
+ qp_chroma_v = qp_indx_v < 0 ? qp_indx_v : (qp_indx_v > 57 ? qp_indx_v - 6 : gai4_ihevc_qp_table[qp_indx_v]);
+
+ tc_indx_u = CLIP3(qp_chroma_u + 2 + (tc_offset_div2 << 1), 0, 53);
+ tc_u = gai4_ihevc_tc_table[tc_indx_u] * (1 << (bit_depth - 8));
+
+ tc_indx_v = CLIP3(qp_chroma_v + 2 + (tc_offset_div2 << 1), 0, 53);
+ tc_v = gai4_ihevc_tc_table[tc_indx_v] * (1 << (bit_depth - 8));
+
+ if(0 == tc_u && 0 == tc_v)
+ {
+ return;
+ }
+
+ for(col = 0; col < 8; col++)
+ {
+ tc = (col & 1) ? tc_v : tc_u;
+ delta = CLIP3((((pu2_src[0 * src_strd] -
+ pu2_src[-1 * src_strd]) << 2) +
+ pu2_src[-2 * src_strd] -
+ pu2_src[1 * src_strd] + 4) >> 3,
+ -tc, tc);
+ tmp_p0 = CLIP3(pu2_src[-1 * src_strd] + delta, 0, ((1 << bit_depth) - 1));
+ tmp_q0 = CLIP3(pu2_src[0 * src_strd] - delta, 0, ((1 << bit_depth) - 1));
+
+ if(filter_flag_p != 0)
+ {
+ pu2_src[-1 * src_strd] = tmp_p0;
+ }
+
+ if(filter_flag_q != 0)
+ {
+ pu2_src[0 * src_strd] = tmp_q0;
+ }
+
+ pu2_src += 1;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Filtering for the chroma block horizontal edge.
+*
+* @par Description:
+* Filter for chroma horizontal edge. The boundary filter strength, bs
+* should be greater than 1. The pcm flags and the transquant bypass flags
+* should be taken care of by the calling function.
+*
+* @param[in] pu2_src
+* Pointer to the src sample q(0,0)
+*
+* @param[in] src_strd
+* Source stride
+*
+* @param[in] bs
+* Boundary filter strength of q(0,0)
+*
+* @param[in] quant_param_p
+* quantization parameter of p block
+*
+* @param[in] quant_param_q
+* quantization parameter of p block
+*
+* @param[in] beta_offset_div2
+*
+*
+* @param[in] tc_offset_div2
+*
+*
+* @param[in] filter_flag_p
+* flag whether to filter the p block
+*
+* @param[in] filter_flag_q
+* flag whether to filter the q block
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+void ihevc_hbd_deblk_422chroma_horz(UWORD16 *pu2_src,
+ WORD32 src_strd,
+ WORD32 quant_param_p,
+ WORD32 quant_param_q,
+ WORD32 qp_offset_u,
+ WORD32 qp_offset_v,
+ WORD32 tc_offset_div2,
+ WORD32 filter_flag_p,
+ WORD32 filter_flag_q,
+ UWORD8 bit_depth)
+{
+ WORD32 qp_indx_u, qp_chroma_u;
+ WORD32 qp_indx_v, qp_chroma_v;
+ WORD32 tc_indx_u, tc_u;
+ WORD32 tc_indx_v, tc_v;
+ WORD32 tc;
+
+ WORD32 delta, tmp_p0, tmp_q0;
+ WORD32 col;
+
+ ASSERT(filter_flag_p || filter_flag_q);
+
+ /* chroma processing is done only if BS is 2 */
+ /* this function is assumed to be called only if BS is 2 */
+ qp_indx_u = qp_offset_u + ((quant_param_p + quant_param_q + 1) >> 1);
+ qp_chroma_u = MIN(qp_indx_u, 51);
+
+ qp_indx_v = qp_offset_v + ((quant_param_p + quant_param_q + 1) >> 1);
+ qp_chroma_v = MIN(qp_indx_v, 51);
+
+ tc_indx_u = CLIP3(qp_chroma_u + 2 + (tc_offset_div2 << 1), 0, 53);
+ tc_u = gai4_ihevc_tc_table[tc_indx_u] * (1 << (bit_depth - 8));
+
+ tc_indx_v = CLIP3(qp_chroma_v + 2 + (tc_offset_div2 << 1), 0, 53);
+ tc_v = gai4_ihevc_tc_table[tc_indx_v] * (1 << (bit_depth - 8));
+
+ if(0 == tc_u && 0 == tc_v)
+ {
+ return;
+ }
+
+ for(col = 0; col < 8; col++)
+ {
+ tc = (col & 1) ? tc_v : tc_u;
+ delta = CLIP3((((pu2_src[0 * src_strd] -
+ pu2_src[-1 * src_strd]) << 2) +
+ pu2_src[-2 * src_strd] -
+ pu2_src[1 * src_strd] + 4) >> 3,
+ -tc, tc);
+ tmp_p0 = CLIP3(pu2_src[-1 * src_strd] + delta, 0, ((1 << bit_depth) - 1));
+ tmp_q0 = CLIP3(pu2_src[0 * src_strd] - delta, 0, ((1 << bit_depth) - 1));
+
+ if(filter_flag_p != 0)
+ {
+ pu2_src[-1 * src_strd] = tmp_p0;
+ }
+
+ if(filter_flag_q != 0)
+ {
+ pu2_src[0 * src_strd] = tmp_q0;
+ }
+
+ pu2_src += 1;
+ }
+}
diff --git a/common/ihevc_macros.h b/common/ihevc_macros.h
index 1e17c65..156d558 100644
--- a/common/ihevc_macros.h
+++ b/common/ihevc_macros.h
@@ -44,12 +44,15 @@
#define SIGN(x) ((x) >= 0 ? ((x)>0 ? 1: 0) : -1)
#define ABS(x) ((((WORD32)(x)) > 0) ? (x) : -(x))
+#define ALIGN1024(x) ((((x) + 1023) >> 10) << 10)
+#define ALIGN256(x) ((((x) + 255) >> 8) << 8)
#define ALIGN128(x) ((((x) + 127) >> 7) << 7)
#define ALIGN64(x) ((((x) + 63) >> 6) << 6)
#define ALIGN32(x) ((((x) + 31) >> 5) << 5)
#define ALIGN16(x) ((((x) + 15) >> 4) << 4)
#define ALIGN8(x) ((((x) + 7) >> 3) << 3)
#define ALIGN4(x) ((((x) + 3) >> 2) << 2)
+#define ALIGN2(x) ((((x) + 1) >> 1) << 1)
#define ALIGN_POW2(ptr,align) ((((WORD32)ptr)+align-1)&(~(align-1)))
diff --git a/common/ihevc_quant_iquant_ssd.c b/common/ihevc_quant_iquant_ssd.c
new file mode 100644
index 0000000..50a0e08
--- /dev/null
+++ b/common/ihevc_quant_iquant_ssd.c
@@ -0,0 +1,2217 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ihevc_quant_iquant_ssd.c
+ *
+ * @brief
+ * Contains function definitions for quantization, followed by Inverse
+ * quantization to find transform domain SSD
+ *
+ * @author
+ * 100453, 100578
+ *
+ * @par List of Functions:
+ * - ihevc_quant_iquant_ssd()
+ * - ihevc_quant_iquant_ssd_flat_scale_mat()
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "ihevc_typedefs.h"
+#include "ihevc_macros.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_defs.h"
+#include "ihevc_debug.h"
+#include "ihevc_trans_tables.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_func_selector.h"
+#include "ihevc_trans_macros.h"
+#include <assert.h>
+
+/*****************************************************************************/
+/* Globals */
+/*****************************************************************************/
+
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs quantization, followed by Inverse
+ * quantization to find transform domain SSD
+ *
+ * @par Description:
+ * Performs quantization on coeffs
+ *
+ * @param[in] pi2_coeffs
+ * 4x4 Coeffs
+ *
+ * @param[in] pi2_quant_coeff
+ * Scaling Matrix
+ *
+ * @param[out] pi2_dst
+ * Output 4x4 coefficients
+ *
+ * @param[in] qp_div
+ * Quantization parameter / 6
+ *
+ * @param[in] qp_rem
+ * Quantization parameter % 6
+ *
+ * @param[in] src_strd
+ * Input stride
+ *
+ * @param[in] dst_strd
+ * Output Stride
+ *
+ * @param[out] csbf
+ * coded sub block flag
+ *
+ * @param[in] csbf_strd
+ * coded sub block flag
+ *
+ * @param[out] zero_col
+ * zero column flag
+ *
+ * @param[out] zero_row
+ * zero column flag
+ *
+ * @returns cbf
+ * coded block flag
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+WORD32 ihevc_quant_iquant_ssd
+ (
+ WORD16 *pi2_coeffs,
+ WORD16 *pi2_quant_coeff,
+ WORD16 *pi2_q_dst,
+ WORD16 *pi2_iq_dst,
+ WORD32 trans_size,
+ WORD32 qp_div,/* qpscaled / 6 */
+ WORD32 qp_rem,/* qpscaled % 6 */
+ WORD32 q_add,
+ WORD32 *pi4_quant_round_factor_0_1,
+ WORD32 *pi4_quant_round_factor_1_2,
+ WORD32 src_strd,
+ WORD32 dst_q_strd,
+ WORD32 dst_iq_strd,
+ UWORD8 *csbf,
+ WORD32 csbf_strd,
+ WORD32 *zero_col,
+ WORD32 *zero_row,
+ WORD16 *pi2_dequant_coeff,
+ LWORD64 *pi8_cost
+ )
+{
+ WORD32 i, j;
+ WORD32 log2_size;
+ WORD16 *pi2_q_dst_orig;
+ WORD32 cbf = 0;
+ WORD32 bit_depth,shift_iq;
+ WORD32 val;
+ WORD16 i2_temp;
+ WORD32 ssd_cost = 0;
+
+ (void)pi4_quant_round_factor_0_1;
+ (void)pi4_quant_round_factor_1_2;
+ pi2_q_dst_orig = pi2_q_dst;
+
+ /* Quant initialization */
+ GETRANGE(log2_size, trans_size);
+ log2_size -= 1;
+
+ bit_depth = 8 + 0;
+ shift_iq = bit_depth + log2_size - 5;
+
+ for(i = 0; i < trans_size; i++)
+ {
+ for(j = 0; j < trans_size; j++)
+ {
+ /* Back up the coefficients before Quantization */
+ i2_temp = pi2_coeffs[j];
+
+ /* Quantization */
+ QUANT(pi2_q_dst[j], pi2_coeffs[j],
+ pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, q_add);
+
+ /* Inverse Quantization */
+ IQUANT(pi2_iq_dst[j],
+ pi2_q_dst[j], /*pi2_src[index*src_strd]*/
+ pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
+ /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
+ shift_iq,
+ qp_div);
+
+ /* SSD Computation & Accumulation */
+ val = i2_temp - pi2_iq_dst[j];
+ ssd_cost += val*val;
+
+ }
+
+ pi2_q_dst += dst_q_strd;
+ pi2_iq_dst += dst_iq_strd;
+ pi2_quant_coeff += trans_size;
+ pi2_coeffs += src_strd;
+ pi2_dequant_coeff += trans_size;
+ }
+
+ /* Store the cost */
+ *pi8_cost = ssd_cost;
+
+ /* CSBF update */
+ {
+ WORD32 block_row, block_col;
+ WORD32 row, col;
+ WORD16 *pi2_block;
+ UWORD32 temp_zero_col = 0;
+ UWORD32 temp_zero_row = 0;
+
+ pi2_q_dst = pi2_q_dst_orig;
+
+ for(block_row = 0; block_row < trans_size; block_row += 4)
+ {
+ //block_col is incrementing by 1 for easy update of csbf pointer
+ for(block_col = 0; block_col < trans_size / 4; block_col++)
+ {
+ pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
+ *(csbf + block_col) = 0;
+
+ for(row = 0; row < 4; row++)
+ {
+ for(col = 0; col < 4; col++)
+ {
+ if(pi2_block[row * dst_q_strd + col] != 0)
+ {
+ *(csbf + block_col) = 1;
+ break;
+ }
+ }
+ if(*(csbf + block_col) == 1)
+ {
+ /* zero_col update *//* temp_zero_col = ~zero_col */
+ temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
+ // zero col can be optimized further. Now clearing the
+ // entire 4 bits corresponding to 4 colums of 4x4 block
+ // even if any 4x4 csbf is set
+
+ /* zero row update */ /* temp_zero_row = ~zero_row */
+ temp_zero_row = (temp_zero_row) | (0xFU << block_row);
+ // zero row can be optimized further. Now clearing the
+ // entire 4 bits corresponding to 4 rows of 4x4 block
+ // even if any 4x4 csbf is set
+
+ break;
+ }
+ }
+
+ cbf = cbf || (*(csbf + block_col)); // cbf update
+ }
+ csbf += csbf_strd;
+ }
+
+ *zero_col = ~temp_zero_col; //final zero_col storing
+ *zero_row = ~temp_zero_row; //final zero_row storing
+ }
+
+ return cbf;
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs quantization, followed by Inverse
+ * quantization
+ *
+ * @par Description:
+ * Performs quantization on coeffs
+ *
+ * @param[in] pi2_coeffs
+ * 4x4 Coeffs
+ *
+ * @param[in] pi2_quant_coeff
+ * Scaling Matrix
+ *
+ * @param[out] pi2_dst
+ * Output 4x4 coefficients
+ *
+ * @param[in] qp_div
+ * Quantization parameter / 6
+ *
+ * @param[in] qp_rem
+ * Quantization parameter % 6
+ *
+ * @param[in] src_strd
+ * Input stride
+ *
+ * @param[in] dst_strd
+ * Output Stride
+ *
+ * @param[out] csbf
+ * coded sub block flag
+ *
+ * @param[in] csbf_strd
+ * coded sub block flag
+ *
+ * @param[out] zero_col
+ * zero column flag
+ *
+ * @param[out] zero_row
+ * zero column flag
+ *
+ * @returns cbf
+ * coded block flag
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+WORD32 ihevc_quant_iquant
+ (
+ WORD16 *pi2_coeffs,
+ WORD16 *pi2_quant_coeff,
+ WORD16 *pi2_q_dst,
+ WORD16 *pi2_iq_dst,
+ WORD32 trans_size,
+ WORD32 qp_div,/* qpscaled / 6 */
+ WORD32 qp_rem,/* qpscaled % 6 */
+ WORD32 q_add,
+ WORD32 *pi4_quant_round_factor_0_1,
+ WORD32 *pi4_quant_round_factor_1_2,
+ WORD32 src_strd,
+ WORD32 dst_q_strd,
+ WORD32 dst_iq_strd,
+ UWORD8 *csbf,
+ WORD32 csbf_strd,
+ WORD32 *zero_col,
+ WORD32 *zero_row,
+ WORD16 *pi2_dequant_coeff,
+ LWORD64 *pi8_cost
+ )
+{
+ WORD32 i, j;
+ WORD32 log2_size;
+ WORD16 *pi2_q_dst_orig;
+ WORD32 cbf = 0;
+ WORD32 bit_depth,shift_iq;
+ WORD16 i2_temp;
+
+ (void)pi8_cost;
+ (void)pi4_quant_round_factor_0_1;
+ (void)pi4_quant_round_factor_1_2;
+ pi2_q_dst_orig = pi2_q_dst;
+
+ /* Quant initialization */
+ GETRANGE(log2_size, trans_size);
+ log2_size -= 1;
+
+ bit_depth = 8;
+ shift_iq = bit_depth + log2_size - 5;
+
+ for(i = 0; i < trans_size; i++)
+ {
+ for(j = 0; j < trans_size; j++)
+ {
+ /* Back up the coefficients before Quantization */
+ i2_temp = pi2_coeffs[j];
+
+ /* Quantization */
+ QUANT(pi2_q_dst[j], pi2_coeffs[j],
+ pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, q_add);
+
+ /* Inverse Quantization */
+ IQUANT(pi2_iq_dst[j],
+ pi2_q_dst[j], /*pi2_src[index*src_strd]*/
+ pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
+ shift_iq,
+ qp_div);
+ }
+
+ pi2_q_dst += dst_q_strd;
+ pi2_iq_dst += dst_iq_strd;
+ pi2_quant_coeff += trans_size;
+ pi2_coeffs += src_strd;
+ pi2_dequant_coeff += trans_size;
+ }
+
+ /* CSBF update */
+ {
+ WORD32 block_row, block_col;
+ WORD32 row, col;
+ WORD16 *pi2_block;
+ UWORD32 temp_zero_col = 0;
+ UWORD32 temp_zero_row = 0;
+
+ pi2_q_dst = pi2_q_dst_orig;
+
+ for(block_row = 0; block_row < trans_size; block_row += 4)
+ {
+ //block_col is incrementing by 1 for easy update of csbf pointer
+ for(block_col = 0; block_col < trans_size / 4; block_col++)
+ {
+ pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
+ *(csbf + block_col) = 0;
+
+ for(row = 0; row < 4; row++)
+ {
+ for(col = 0; col < 4; col++)
+ {
+ if(pi2_block[row * dst_q_strd + col] != 0)
+ {
+ *(csbf + block_col) = 1;
+ break;
+ }
+ }
+ if(*(csbf + block_col) == 1)
+ {
+ /* zero_col update *//* temp_zero_col = ~zero_col */
+ temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
+ // zero col can be optimized further. Now clearing the
+ // entire 4 bits corresponding to 4 colums of 4x4 block
+ // even if any 4x4 csbf is set
+
+ /* zero row update */ /* temp_zero_row = ~zero_row */
+ temp_zero_row = (temp_zero_row) | (0xFU << block_row);
+ // zero row can be optimized further. Now clearing the
+ // entire 4 bits corresponding to 4 rows of 4x4 block
+ // even if any 4x4 csbf is set
+
+ break;
+ }
+ }
+
+ cbf = cbf || (*(csbf + block_col)); // cbf update
+ }
+
+ csbf += csbf_strd;
+ }
+
+ *zero_col = ~temp_zero_col; //final zero_col storing
+ *zero_row = ~temp_zero_row; //final zero_row storing
+ }
+
+ return cbf;
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs quantization, followed by Inverse
+ * quantization to find transform domain SSD
+ *
+ * @par Description:
+ * Performs quantization on coeffs
+ *
+ * @param[in] pi2_coeffs
+ * 4x4 Coeffs
+ *
+ * @param[in] pi2_quant_coeff
+ * Scaling Matrix
+ *
+ * @param[out] pi2_dst
+ * Output 4x4 coefficients
+ *
+ * @param[in] qp_div
+ * Quantization parameter / 6
+ *
+ * @param[in] qp_rem
+ * Quantization parameter % 6
+ *
+ * @param[in] src_strd
+ * Input stride
+ *
+ * @param[in] dst_strd
+ * Output Stride
+ *
+ * @param[out] csbf
+ * coded sub block flag
+ *
+ * @param[in] csbf_strd
+ * coded sub block flag
+ *
+ * @param[out] zero_col
+ * zero column flag
+ *
+ * @param[out] zero_row
+ * zero column flag
+ *
+ * @returns cbf
+ * coded block flag
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+WORD32 ihevc_quant_iquant_ssd_rdoq
+ (
+ WORD16 *pi2_coeffs,
+ WORD16 *pi2_quant_coeff,
+ WORD16 *pi2_q_dst,
+ WORD16 *pi2_iq_dst,
+ WORD32 trans_size,
+ WORD32 qp_div,/* qpscaled / 6 */
+ WORD32 qp_rem,/* qpscaled % 6 */
+ WORD32 q_add,
+ WORD32 *pi4_quant_round_factor_0_1,
+ WORD32 *pi4_quant_round_factor_1_2,
+ WORD32 src_strd,
+ WORD32 dst_q_strd,
+ WORD32 dst_iq_strd,
+ UWORD8 *csbf,
+ WORD32 csbf_strd,
+ WORD32 *zero_col,
+ WORD32 *zero_row,
+ WORD16 *pi2_dequant_coeff,
+ LWORD64 *pi8_cost
+ )
+{
+ WORD32 i, j;
+ WORD32 log2_size;
+ WORD16 *pi2_q_dst_orig;
+ WORD32 cbf = 0;
+ WORD32 bit_depth,shift_iq;
+ WORD32 val;
+ WORD16 i2_temp;
+ WORD32 ssd_cost = 0;
+
+ (void)pi4_quant_round_factor_0_1;
+ (void)pi4_quant_round_factor_1_2;
+ pi2_q_dst_orig = pi2_q_dst;
+
+ GETRANGE(log2_size, trans_size);
+ log2_size -= 1;
+
+ bit_depth = 8 + 0;
+ shift_iq = bit_depth + log2_size - 5;
+
+ for(i = 0; i < trans_size; i++)
+ {
+ for(j = 0; j < trans_size; j++)
+ {
+ /* Back up the coefficients before Quantization */
+ i2_temp = pi2_coeffs[j];
+
+ /* Quantization */
+ QUANT(pi2_q_dst[j], pi2_coeffs[j],
+ pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, q_add);
+
+
+ if (abs(pi2_q_dst[j]) > 1)
+ {
+ QUANT(pi2_q_dst[j],i2_temp,
+ pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
+
+ }
+
+
+ /* Inverse Quantization */
+ IQUANT(pi2_iq_dst[j],
+ pi2_q_dst[j], /*pi2_src[index*src_strd]*/
+ pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
+ /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
+ shift_iq,
+ qp_div);
+
+ /* SSD Computation & Accumulation */
+ val = i2_temp - pi2_iq_dst[j];
+ ssd_cost += val*val;
+
+ }
+
+ pi2_q_dst += dst_q_strd;
+ pi2_iq_dst += dst_iq_strd;
+ pi2_quant_coeff += trans_size;
+ pi2_coeffs += src_strd;
+ pi2_dequant_coeff += trans_size;
+ }
+ /* Store the cost */
+ *pi8_cost = ssd_cost;
+
+ /* CSBF update */
+ {
+ WORD32 block_row, block_col;
+ WORD32 row, col;
+ WORD16 *pi2_block;
+ UWORD32 temp_zero_col = 0;
+ UWORD32 temp_zero_row = 0;
+
+ pi2_q_dst = pi2_q_dst_orig;
+
+ for(block_row = 0; block_row < trans_size; block_row += 4)
+ {
+ //block_col is incrementing by 1 for easy update of csbf pointer
+ for(block_col = 0; block_col < trans_size / 4; block_col++)
+ {
+ pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
+ *(csbf + block_col) = 0;
+
+ for(row = 0; row < 4; row++)
+ {
+ for(col = 0; col < 4; col++)
+ {
+ if(pi2_block[row * dst_q_strd + col] != 0)
+ {
+ *(csbf + block_col) = 1;
+ break;
+ }
+ }
+ if(*(csbf + block_col) == 1)
+ {
+ /* zero_col update *//* temp_zero_col = ~zero_col */
+ temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
+ // zero col can be optimized further. Now clearing the
+ // entire 4 bits corresponding to 4 colums of 4x4 block
+ // even if any 4x4 csbf is set
+
+ /* zero row update */ /* temp_zero_row = ~zero_row */
+ temp_zero_row = (temp_zero_row) | (0xFU << block_row);
+ // zero row can be optimized further. Now clearing the
+ // entire 4 bits corresponding to 4 rows of 4x4 block
+ // even if any 4x4 csbf is set
+
+ break;
+ }
+ }
+
+ cbf = cbf || (*(csbf + block_col)); // cbf update
+ }
+ csbf += csbf_strd;
+ }
+
+ *zero_col = ~temp_zero_col; //final zero_col storing
+ *zero_row = ~temp_zero_row; //final zero_row storing
+ }
+
+ return cbf;
+}
+
+WORD32 ihevc_quant_iquant_rdoq
+ (
+ WORD16 *pi2_coeffs,
+ WORD16 *pi2_quant_coeff,
+ WORD16 *pi2_q_dst,
+ WORD16 *pi2_iq_dst,
+ WORD32 trans_size,
+ WORD32 qp_div,/* qpscaled / 6 */
+ WORD32 qp_rem,/* qpscaled % 6 */
+ WORD32 q_add,
+ WORD32 *pi4_quant_round_factor_0_1,
+ WORD32 *pi4_quant_round_factor_1_2,
+ WORD32 src_strd,
+ WORD32 dst_q_strd,
+ WORD32 dst_iq_strd,
+ UWORD8 *csbf,
+ WORD32 csbf_strd,
+ WORD32 *zero_col,
+ WORD32 *zero_row,
+ WORD16 *pi2_dequant_coeff,
+ LWORD64 *pi8_cost
+ )
+{
+ WORD32 i, j;
+ WORD32 log2_size;
+ WORD16 *pi2_q_dst_orig;
+ WORD32 cbf = 0;
+ WORD32 bit_depth,shift_iq;
+ WORD16 i2_temp;
+
+ (void)pi8_cost;
+ (void)pi4_quant_round_factor_0_1;
+ (void)pi4_quant_round_factor_1_2;
+ pi2_q_dst_orig = pi2_q_dst;
+
+ GETRANGE(log2_size, trans_size);
+ log2_size -= 1;
+
+ bit_depth = 8 + 0;
+ shift_iq = bit_depth + log2_size - 5;
+
+ for(i = 0; i < trans_size; i++)
+ {
+ for(j = 0; j < trans_size; j++)
+ {
+ /* Back up the coefficients before Quantization */
+ i2_temp = pi2_coeffs[j];
+
+ /* Quantization */
+ QUANT(pi2_q_dst[j], pi2_coeffs[j],
+ pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, q_add);
+
+ if (abs(pi2_q_dst[j]) > 1)
+ {
+ QUANT(pi2_q_dst[j],i2_temp,
+ pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
+ }
+
+ /* Inverse Quantization */
+ IQUANT(pi2_iq_dst[j],
+ pi2_q_dst[j], /*pi2_src[index*src_strd]*/
+ pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
+ shift_iq,
+ qp_div);
+ }
+
+ pi2_q_dst += dst_q_strd;
+ pi2_iq_dst += dst_iq_strd;
+ pi2_quant_coeff += trans_size;
+ pi2_coeffs += src_strd;
+ pi2_dequant_coeff += trans_size;
+ }
+
+ /* CSBF update */
+ {
+ WORD32 block_row, block_col;
+ WORD32 row, col;
+ WORD16 *pi2_block;
+ UWORD32 temp_zero_col = 0;
+ UWORD32 temp_zero_row = 0;
+
+ pi2_q_dst = pi2_q_dst_orig;
+
+ for(block_row = 0; block_row < trans_size; block_row += 4)
+ {
+ //block_col is incrementing by 1 for easy update of csbf pointer
+ for(block_col = 0; block_col < trans_size / 4; block_col++)
+ {
+ pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
+ *(csbf + block_col) = 0;
+
+ for(row = 0; row < 4; row++)
+ {
+ for(col = 0; col < 4; col++)
+ {
+ if(pi2_block[row * dst_q_strd + col] != 0)
+ {
+ *(csbf + block_col) = 1;
+ break;
+ }
+ }
+ if(*(csbf + block_col) == 1)
+ {
+ /* zero_col update *//* temp_zero_col = ~zero_col */
+ temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
+ // zero col can be optimized further. Now clearing the
+ // entire 4 bits corresponding to 4 colums of 4x4 block
+ // even if any 4x4 csbf is set
+
+ /* zero row update */ /* temp_zero_row = ~zero_row */
+ temp_zero_row = (temp_zero_row) | (0xFU << block_row);
+ // zero row can be optimized further. Now clearing the
+ // entire 4 bits corresponding to 4 rows of 4x4 block
+ // even if any 4x4 csbf is set
+
+ break;
+ }
+ }
+
+ cbf = cbf || (*(csbf + block_col)); // cbf update
+ }
+ csbf += csbf_strd;
+ }
+
+ *zero_col = ~temp_zero_col; //final zero_col storing
+ *zero_row = ~temp_zero_row; //final zero_row storing
+ }
+
+ return cbf;
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs quantization(using flat scale matrix), followed by
+ * inverse quantization to find transform domain SSD
+ *
+ * @par Description:
+ * Performs quantization on coeffs
+ *
+ * @param[in] pi2_coeffs
+ * 4x4 Coeffs
+ *
+ * @param[in] pi2_quant_coeff
+ * Scaling Matrix
+ *
+ * @param[out] pi2_dst
+ * Output 4x4 coefficients
+ *
+ * @param[in] qp_div
+ * Quantization parameter / 6
+ *
+ * @param[in] qp_rem
+ * Quantization parameter % 6
+ *
+ * @param[in] src_strd
+ * Input stride
+ *
+ * @param[in] dst_strd
+ * Output Stride
+ *
+ * @param[out] csbf
+ * coded sub block flag
+ *
+ * @param[in] csbf_strd
+ * coded sub block flag
+ *
+ * @param[out] zero_col
+ * zero column flag
+ *
+ * @param[out] zero_row
+ * zero column flag
+ *
+ * @returns cbf
+ * coded block flag
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+WORD32 ihevc_quant_iquant_ssd_flat_scale_mat
+ (
+ WORD16 *pi2_coeffs,
+ WORD16 *pi2_quant_coeff,
+ WORD16 *pi2_q_dst,
+ WORD16 *pi2_iq_dst,
+ WORD32 trans_size,
+ WORD32 qp_div,/* qpscaled / 6 */
+ WORD32 qp_rem,/* qpscaled % 6 */
+ WORD32 q_add,
+ WORD32 *pi4_quant_round_factor_0_1,
+ WORD32 *pi4_quant_round_factor_1_2,
+ WORD32 src_strd,
+ WORD32 dst_q_strd,
+ WORD32 dst_iq_strd,
+ UWORD8 *csbf,
+ WORD32 csbf_strd,
+ WORD32 *zero_col,
+ WORD32 *zero_row,
+ WORD16 *pi2_dequant_coeff,
+ LWORD64 *pi8_cost
+ )
+{
+ WORD32 i, j;
+ WORD32 log2_size;
+ WORD16 *pi2_q_dst_orig;
+ WORD32 cbf = 0;
+ WORD32 bit_depth,shift_iq;
+ WORD32 val;
+ WORD16 i2_temp;
+ /* Initialize cost to zero */
+ WORD32 ssd_cost = 0;
+
+ (void)pi4_quant_round_factor_0_1;
+ (void)pi4_quant_round_factor_1_2;
+ pi2_q_dst_orig = pi2_q_dst;
+
+ /* Quant initialization */
+ GETRANGE(log2_size, trans_size);
+ log2_size -= 1;
+
+ bit_depth = 8 + 0;
+ shift_iq = bit_depth + log2_size - 5;
+
+ for(i = 0; i < trans_size; i++)
+ {
+ for(j = 0; j < trans_size; j++)
+ {
+ /* Back up the coefficients before Quantization */
+ i2_temp = pi2_coeffs[j];
+
+ /*QUANT(pi2_dst[j], pi2_coeffs[j],
+ pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, q_add);*/
+
+ /* modified by 1028 */
+ /* Quantization */
+ QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
+ g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, q_add);
+
+ if(pi2_q_dst[j] == 0)
+ {
+ pi2_iq_dst[j] = 0;
+ }
+ else
+ {
+ /* Inverse Quantization */
+ IQUANT(pi2_iq_dst[j],
+ pi2_q_dst[j], /*pi2_src[index*src_strd]*/
+ pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
+ shift_iq,
+ qp_div);
+ }
+
+ /* SSD Computation & Accumulation */
+ val = i2_temp - pi2_iq_dst[j];
+ ssd_cost += val*val;
+
+ }
+
+ pi2_q_dst += dst_q_strd;
+ pi2_iq_dst += dst_iq_strd;
+ pi2_quant_coeff += trans_size;
+ pi2_coeffs += src_strd;
+ pi2_dequant_coeff += trans_size;
+ }
+ /* Store the cost */
+ *pi8_cost = ssd_cost;
+
+ /* CSBF update */
+ {
+ WORD32 block_row, block_col;
+ WORD32 row, col;
+ WORD16 *pi2_block;
+ UWORD32 temp_zero_col = 0;
+ UWORD32 temp_zero_row = 0;
+
+ pi2_q_dst = pi2_q_dst_orig;
+
+ for(block_row = 0; block_row < trans_size; block_row += 4)
+ {
+ //block_col is incrementing by 1 for easy update of csbf pointer
+ for(block_col = 0; block_col < trans_size / 4; block_col++)
+ {
+ pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
+ *(csbf + block_col) = 0;
+
+ for(row = 0; row < 4; row++)
+ {
+ for(col = 0; col < 4; col++)
+ {
+ if(pi2_block[row * dst_q_strd + col] != 0)
+ {
+ *(csbf + block_col) = 1;
+ break;
+ }
+ }
+ if(*(csbf + block_col) == 1)
+ {
+ /* zero_col update *//* temp_zero_col = ~zero_col */
+ temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
+ // zero col can be optimized further. Now clearing the
+ // entire 4 bits corresponding to 4 colums of 4x4 block
+ // even if any 4x4 csbf is set
+
+ /* zero row update */ /* temp_zero_row = ~zero_row */
+ temp_zero_row = (temp_zero_row) | (0xFU << block_row);
+ // zero row can be optimized further. Now clearing the
+ // entire 4 bits corresponding to 4 rows of 4x4 block
+ // even if any 4x4 csbf is set
+
+ break;
+ }
+ }
+
+ cbf = cbf || (*(csbf + block_col)); // cbf update
+ }
+ csbf += csbf_strd;
+ }
+
+ *zero_col = ~temp_zero_col; //final zero_col storing
+ *zero_row = ~temp_zero_row; //final zero_row storing
+ }
+
+ return cbf;
+}
+
+WORD32 ihevc_quant_iquant_flat_scale_mat
+ (
+ WORD16 *pi2_coeffs,
+ WORD16 *pi2_quant_coeff,
+ WORD16 *pi2_q_dst,
+ WORD16 *pi2_iq_dst,
+ WORD32 trans_size,
+ WORD32 qp_div,/* qpscaled / 6 */
+ WORD32 qp_rem,/* qpscaled % 6 */
+ WORD32 q_add,
+ WORD32 *pi4_quant_round_factor_0_1,
+ WORD32 *pi4_quant_round_factor_1_2,
+ WORD32 src_strd,
+ WORD32 dst_q_strd,
+ WORD32 dst_iq_strd,
+ UWORD8 *csbf,
+ WORD32 csbf_strd,
+ WORD32 *zero_col,
+ WORD32 *zero_row,
+ WORD16 *pi2_dequant_coeff,
+ LWORD64 *pi8_cost
+ )
+{
+ WORD32 i, j;
+ WORD32 log2_size;
+ WORD16 *pi2_q_dst_orig;
+ WORD32 cbf = 0;
+ WORD32 bit_depth,shift_iq;
+ WORD16 i2_temp;
+
+ (void)pi8_cost;
+ (void)pi4_quant_round_factor_0_1;
+ (void)pi4_quant_round_factor_1_2;
+ pi2_q_dst_orig = pi2_q_dst;
+
+ /* Quant initialization */
+ GETRANGE(log2_size, trans_size);
+ log2_size -= 1;
+
+ bit_depth = 8 + 0;
+ shift_iq = bit_depth + log2_size - 5;
+
+ for(i = 0; i < trans_size; i++)
+ {
+ for(j = 0; j < trans_size; j++)
+ {
+ /* Back up the coefficients before Quantization */
+ i2_temp = pi2_coeffs[j];
+
+ /* Quantization */
+ QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
+ g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, q_add);
+
+ if(pi2_q_dst[j] == 0)
+ {
+ pi2_iq_dst[j] = 0;
+ }
+ else
+ {
+ /* Inverse Quantization */
+ IQUANT(pi2_iq_dst[j],
+ pi2_q_dst[j], /*pi2_src[index*src_strd]*/
+ pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
+ shift_iq,
+ qp_div);
+ }
+ }
+
+ pi2_q_dst += dst_q_strd;
+ pi2_iq_dst += dst_iq_strd;
+ pi2_quant_coeff += trans_size;
+ pi2_coeffs += src_strd;
+ pi2_dequant_coeff += trans_size;
+ }
+
+ /* CSBF update */
+ {
+ WORD32 block_row, block_col;
+ WORD32 row, col;
+ WORD16 *pi2_block;
+ UWORD32 temp_zero_col = 0;
+ UWORD32 temp_zero_row = 0;
+
+ pi2_q_dst = pi2_q_dst_orig;
+
+ for(block_row = 0; block_row < trans_size; block_row += 4)
+ {
+ //block_col is incrementing by 1 for easy update of csbf pointer
+ for(block_col = 0; block_col < trans_size / 4; block_col++)
+ {
+ pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
+ *(csbf + block_col) = 0;
+
+ for(row = 0; row < 4; row++)
+ {
+ for(col = 0; col < 4; col++)
+ {
+ if(pi2_block[row * dst_q_strd + col] != 0)
+ {
+ *(csbf + block_col) = 1;
+ break;
+ }
+ }
+ if(*(csbf + block_col) == 1)
+ {
+ /* zero_col update *//* temp_zero_col = ~zero_col */
+ temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
+ // zero col can be optimized further. Now clearing the
+ // entire 4 bits corresponding to 4 colums of 4x4 block
+ // even if any 4x4 csbf is set
+
+ /* zero row update */ /* temp_zero_row = ~zero_row */
+ temp_zero_row = (temp_zero_row) | (0xFU << block_row);
+ // zero row can be optimized further. Now clearing the
+ // entire 4 bits corresponding to 4 rows of 4x4 block
+ // even if any 4x4 csbf is set
+
+ break;
+ }
+ }
+
+ cbf = cbf || (*(csbf + block_col)); // cbf update
+ }
+ csbf += csbf_strd;
+ }
+
+ *zero_col = ~temp_zero_col; //final zero_col storing
+ *zero_row = ~temp_zero_row; //final zero_row storing
+ }
+
+ return cbf;
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs quantization(using flat scale matrix), followed by
+ * inverse quantization to find transform domain SSD; when we perform RDOQ.
+ * In case the quantized value turns out to be grater than 1, we then requantize
+ * use half rounding.
+ *
+ * @par Description:
+ * Performs quantization on coeffs
+ *
+ * @param[in] pi2_coeffs
+ * 4x4 Coeffs
+ *
+ * @param[in] pi2_quant_coeff
+ * Scaling Matrix
+ *
+ * @param[out] pi2_dst
+ * Output 4x4 coefficients
+ *
+ * @param[in] qp_div
+ * Quantization parameter / 6
+ *
+ * @param[in] qp_rem
+ * Quantization parameter % 6
+ *
+ * @param[in] src_strd
+ * Input stride
+ *
+ * @param[in] dst_strd
+ * Output Stride
+ *
+ * @param[out] csbf
+ * coded sub block flag
+ *
+ * @param[in] csbf_strd
+ * coded sub block flag
+ *
+ * @param[out] zero_col
+ * zero column flag
+ *
+ * @param[out] zero_row
+ * zero column flag
+ *
+ * @returns cbf
+ * coded block flag
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+WORD32 ihevc_quant_iquant_ssd_flat_scale_mat_rdoq
+ (
+ WORD16 *pi2_coeffs,
+ WORD16 *pi2_quant_coeff,
+ WORD16 *pi2_q_dst,
+ WORD16 *pi2_iq_dst,
+ WORD32 trans_size,
+ WORD32 qp_div,/* qpscaled / 6 */
+ WORD32 qp_rem,/* qpscaled % 6 */
+ WORD32 q_add,
+ WORD32 *pi4_quant_round_factor_0_1,
+ WORD32 *pi4_quant_round_factor_1_2,
+ WORD32 src_strd,
+ WORD32 dst_q_strd,
+ WORD32 dst_iq_strd,
+ UWORD8 *csbf,
+ WORD32 csbf_strd,
+ WORD32 *zero_col,
+ WORD32 *zero_row,
+ WORD16 *pi2_dequant_coeff,
+ LWORD64 *pi8_cost
+ )
+{
+ WORD32 i, j;
+ WORD32 log2_size;
+ WORD16 *pi2_q_dst_orig;
+ WORD32 cbf = 0;
+ WORD32 bit_depth,shift_iq;
+ WORD32 val;
+ WORD16 i2_temp;
+ /* Initialize cost to zero */
+ WORD32 ssd_cost = 0;
+
+ (void)pi4_quant_round_factor_0_1;
+ (void)pi4_quant_round_factor_1_2;
+ pi2_q_dst_orig = pi2_q_dst;
+
+ /* Quant initialization */
+ GETRANGE(log2_size, trans_size);
+ log2_size -= 1;
+
+ bit_depth = 8 + 0;
+ shift_iq = bit_depth + log2_size - 5;
+
+ for(i = 0; i < trans_size; i++)
+ {
+ for(j = 0; j < trans_size; j++)
+ {
+ WORD16 i2_temp1;
+ /* Back up the coefficients before Quantization */
+ i2_temp = pi2_coeffs[j];
+
+ /*QUANT(pi2_dst[j], pi2_coeffs[j],
+ pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, q_add);*/
+
+ /* modified by 1028 */
+ /* Quantization */
+
+ if (1)
+ {
+ QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
+ g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, q_add);
+ }
+ else
+ { \
+ WORD16 inp = pi2_coeffs[j],out = pi2_q_dst[j];
+ WORD32 quant_coeff = g_ihevc_quant_scales[qp_rem];
+ WORD32 log2_trans_size = log2_size;
+ WORD32 tmp; \
+ WORD32 sign; \
+ WORD32 bit_depth,transform_shift; \
+ WORD32 q_bits, quant_multiplier; \
+ \
+ /* q_bits and q_add calculation*/ \
+ /* To be moved outside in neon. To be computer once per transform call */ \
+ bit_depth = 8; \
+ transform_shift = MAX_TR_DYNAMIC_RANGE - bit_depth - log2_trans_size; \
+ quant_multiplier = 4 ; /* because quant_coeff are multiplied by 16. Instead of multiplying, we can reduce the division factor q_bits by 4 */ \
+ q_bits = QUANT_SHIFT + qp_div + transform_shift + SCALING_Q_SHIFT - quant_multiplier - FLAT_RESCALE_MAT_Q_SHIFT /* 2048 */; \
+ \
+ sign = (inp)<0 ? -1:1; \
+ \
+ tmp = (WORD32)(abs(inp)); \
+ tmp = tmp * (quant_coeff); \
+ tmp = tmp + (((WORD32)q_add) << (q_bits - QUANT_ROUND_FACTOR_Q)); \
+ tmp = tmp >> q_bits; \
+ \
+ tmp = tmp * sign; \
+ out = (WORD16) CLIP_S16(tmp); \
+ }
+ i2_temp1 = pi2_q_dst[j];
+ if (abs(pi2_q_dst[j]) > 1)
+ {
+ QUANT_NO_WEIGHTMAT(pi2_q_dst[j], i2_temp,
+ g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
+ }
+
+
+ ASSERT(abs(i2_temp1-pi2_q_dst[j]) <= 1);
+ ASSERT(abs(i2_temp1) <= abs(pi2_q_dst[j]));
+
+
+ /* Inverse Quantization */
+ IQUANT(pi2_iq_dst[j],
+ pi2_q_dst[j], /*pi2_src[index*src_strd]*/
+ pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
+ shift_iq,
+ qp_div);
+
+ /* SSD Computation & Accumulation */
+ val = i2_temp - pi2_iq_dst[j];
+ ssd_cost += val*val;
+
+ }
+
+ pi2_q_dst += dst_q_strd;
+ pi2_iq_dst += dst_iq_strd;
+ pi2_quant_coeff += trans_size;
+ pi2_coeffs += src_strd;
+ pi2_dequant_coeff += trans_size;
+
+ }
+ /* Store the cost */
+ *pi8_cost = ssd_cost;
+
+ /* CSBF update */
+ {
+ WORD32 block_row, block_col;
+ WORD32 row, col;
+ WORD16 *pi2_block;
+ UWORD32 temp_zero_col = 0;
+ UWORD32 temp_zero_row = 0;
+
+ pi2_q_dst = pi2_q_dst_orig;
+
+ for(block_row = 0; block_row < trans_size; block_row += 4)
+ {
+ //block_col is incrementing by 1 for easy update of csbf pointer
+ for(block_col = 0; block_col < trans_size / 4; block_col++)
+ {
+ pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
+ *(csbf + block_col) = 0;
+
+ for(row = 0; row < 4; row++)
+ {
+ for(col = 0; col < 4; col++)
+ {
+ if(pi2_block[row * dst_q_strd + col] != 0)
+ {
+ *(csbf + block_col) = 1;
+ break;
+ }
+ }
+ if(*(csbf + block_col) == 1)
+ {
+ /* zero_col update *//* temp_zero_col = ~zero_col */
+ temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
+ // zero col can be optimized further. Now clearing the
+ // entire 4 bits corresponding to 4 colums of 4x4 block
+ // even if any 4x4 csbf is set
+
+ /* zero row update */ /* temp_zero_row = ~zero_row */
+ temp_zero_row = (temp_zero_row) | (0xFU << block_row);
+ // zero row can be optimized further. Now clearing the
+ // entire 4 bits corresponding to 4 rows of 4x4 block
+ // even if any 4x4 csbf is set
+
+ break;
+ }
+ }
+
+ cbf = cbf || (*(csbf + block_col)); // cbf update
+ }
+ csbf += csbf_strd;
+ }
+
+ *zero_col = ~temp_zero_col; //final zero_col storing
+ *zero_row = ~temp_zero_row; //final zero_row storing
+ }
+ return cbf;
+}
+
+WORD32 ihevc_quant_iquant_flat_scale_mat_rdoq
+ (
+ WORD16 *pi2_coeffs,
+ WORD16 *pi2_quant_coeff,
+ WORD16 *pi2_q_dst,
+ WORD16 *pi2_iq_dst,
+ WORD32 trans_size,
+ WORD32 qp_div,/* qpscaled / 6 */
+ WORD32 qp_rem,/* qpscaled % 6 */
+ WORD32 q_add,
+ WORD32 *pi4_quant_round_factor_0_1,
+ WORD32 *pi4_quant_round_factor_1_2,
+ WORD32 src_strd,
+ WORD32 dst_q_strd,
+ WORD32 dst_iq_strd,
+ UWORD8 *csbf,
+ WORD32 csbf_strd,
+ WORD32 *zero_col,
+ WORD32 *zero_row,
+ WORD16 *pi2_dequant_coeff,
+ LWORD64 *pi8_cost
+ )
+{
+ WORD32 i, j;
+ WORD32 log2_size;
+ WORD16 *pi2_q_dst_orig;
+ WORD32 cbf = 0;
+ WORD32 bit_depth,shift_iq;
+ WORD16 i2_temp;
+
+ (void)pi8_cost;
+ (void)pi4_quant_round_factor_0_1;
+ (void)pi4_quant_round_factor_1_2;
+ pi2_q_dst_orig = pi2_q_dst;
+
+ /* Quant initialization */
+ GETRANGE(log2_size, trans_size);
+ log2_size -= 1;
+
+ bit_depth = 8 + 0;
+ shift_iq = bit_depth + log2_size - 5;
+
+ for(i = 0; i < trans_size; i++)
+ {
+ for(j = 0; j < trans_size; j++)
+ {
+ WORD16 i2_temp1;
+ /* Back up the coefficients before Quantization */
+ i2_temp = pi2_coeffs[j];
+
+ QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
+ g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, q_add);
+
+ i2_temp1 = pi2_q_dst[j];
+
+ if (abs(pi2_q_dst[j]) > 1)
+ {
+ QUANT_NO_WEIGHTMAT(pi2_q_dst[j], i2_temp,
+ g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
+ }
+
+ ASSERT(abs(i2_temp1-pi2_q_dst[j]) <= 1);
+ ASSERT(abs(i2_temp1) <= abs(pi2_q_dst[j]));
+
+ IQUANT(pi2_iq_dst[j],
+ pi2_q_dst[j], /*pi2_src[index*src_strd]*/
+ pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
+ shift_iq,
+ qp_div);
+ }
+
+ pi2_q_dst += dst_q_strd;
+ pi2_iq_dst += dst_iq_strd;
+ pi2_quant_coeff += trans_size;
+ pi2_coeffs += src_strd;
+ pi2_dequant_coeff += trans_size;
+ }
+
+ /* CSBF update */
+ {
+ WORD32 block_row, block_col;
+ WORD32 row, col;
+ WORD16 *pi2_block;
+ UWORD32 temp_zero_col = 0;
+ UWORD32 temp_zero_row = 0;
+
+ pi2_q_dst = pi2_q_dst_orig;
+
+ for(block_row = 0; block_row < trans_size; block_row += 4)
+ {
+ //block_col is incrementing by 1 for easy update of csbf pointer
+ for(block_col = 0; block_col < trans_size / 4; block_col++)
+ {
+ pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
+ *(csbf + block_col) = 0;
+
+ for(row = 0; row < 4; row++)
+ {
+ for(col = 0; col < 4; col++)
+ {
+ if(pi2_block[row * dst_q_strd + col] != 0)
+ {
+ *(csbf + block_col) = 1;
+ break;
+ }
+ }
+ if(*(csbf + block_col) == 1)
+ {
+ /* zero_col update *//* temp_zero_col = ~zero_col */
+ temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
+ // zero col can be optimized further. Now clearing the
+ // entire 4 bits corresponding to 4 colums of 4x4 block
+ // even if any 4x4 csbf is set
+
+ /* zero row update */ /* temp_zero_row = ~zero_row */
+ temp_zero_row = (temp_zero_row) | (0xFU << block_row);
+ // zero row can be optimized further. Now clearing the
+ // entire 4 bits corresponding to 4 rows of 4x4 block
+ // even if any 4x4 csbf is set
+
+ break;
+ }
+ }
+
+ cbf = cbf || (*(csbf + block_col)); // cbf update
+ }
+ csbf += csbf_strd;
+ }
+
+ *zero_col = ~temp_zero_col; //final zero_col storing
+ *zero_row = ~temp_zero_row; //final zero_row storing
+ }
+
+ return cbf;
+}
+
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function performs quantization, followed by Inverse
+* quantization to find transform domain SSD
+*
+* @par Description:
+* Performs quantization on coeffs
+*
+* @param[in] pi2_coeffs
+* 4x4 Coeffs
+*
+* @param[in] pi2_quant_coeff
+* Scaling Matrix
+*
+* @param[out] pi2_dst
+* Output 4x4 coefficients
+*
+* @param[in] qp_div
+* Quantization parameter / 6
+*
+* @param[in] qp_rem
+* Quantization parameter % 6
+*
+* @param[in] src_strd
+* Input stride
+*
+* @param[in] dst_strd
+* Output Stride
+*
+* @param[out] csbf
+* coded sub block flag
+*
+* @param[in] csbf_strd
+* coded sub block flag
+*
+* @param[out] zero_col
+* zero column flag
+*
+* @param[out] zero_row
+* zero column flag
+*
+* @returns cbf
+* coded block flag
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+WORD32 ihevc_q_iq_ssd_var_rnd_fact
+ (
+ WORD16 *pi2_coeffs,
+ WORD16 *pi2_quant_coeff,
+ WORD16 *pi2_q_dst,
+ WORD16 *pi2_iq_dst,
+ WORD32 trans_size,
+ WORD32 qp_div,/* qpscaled / 6 */
+ WORD32 qp_rem,/* qpscaled % 6 */
+ WORD32 q_add,
+ WORD32 *pi4_quant_round_factor_0_1,
+ WORD32 *pi4_quant_round_factor_1_2,
+ WORD32 src_strd,
+ WORD32 dst_q_strd,
+ WORD32 dst_iq_strd,
+ UWORD8 *csbf,
+ WORD32 csbf_strd,
+ WORD32 *zero_col,
+ WORD32 *zero_row,
+ WORD16 *pi2_dequant_coeff,
+ LWORD64 *pi8_cost
+ )
+{
+ WORD32 i, j;
+ WORD32 log2_size;
+ WORD16 *pi2_q_dst_orig;
+ WORD32 cbf = 0;
+ WORD32 bit_depth,shift_iq;
+ WORD32 val;
+ WORD16 i2_temp;
+ //WORD16 i2_temp_1;
+ /* Initialize cost to zero */
+ WORD32 ssd_cost = 0;
+
+ (void)q_add;
+ pi2_q_dst_orig = pi2_q_dst;
+
+
+ /* Quant initialization */
+ GETRANGE(log2_size, trans_size);
+ log2_size -= 1;
+
+ bit_depth = 8 + 0;
+ shift_iq = bit_depth + log2_size - 5;
+
+ for(i = 0; i < trans_size; i++)
+ {
+ for(j = 0; j < trans_size; j++)
+ {
+ /* Back up the coefficients before Quantization */
+ i2_temp = pi2_coeffs[j];
+
+
+ {
+ /* Quantization */
+ QUANT(pi2_q_dst[j],i2_temp,
+ pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, 0);
+ if (abs(pi2_q_dst[j]) >= 2)
+ {
+ QUANT(pi2_q_dst[j],i2_temp,
+ pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
+
+ }
+ else if (abs(pi2_q_dst[j]) >= 1)
+ {
+ QUANT(pi2_q_dst[j],i2_temp,
+ pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, *pi4_quant_round_factor_1_2);
+ }
+
+ else
+ {
+ /* Quantization */
+ QUANT(pi2_q_dst[j],i2_temp,
+ pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, *pi4_quant_round_factor_0_1);
+ }
+
+ }
+
+
+
+ /* Inverse Quantization */
+ IQUANT(pi2_iq_dst[j],
+ pi2_q_dst[j], /*pi2_src[index*src_strd]*/
+ pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
+ /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
+ shift_iq,
+ qp_div);
+
+ /* SSD Computation & Accumulation */
+ val = i2_temp - pi2_iq_dst[j];
+ ssd_cost += val*val;
+
+ pi4_quant_round_factor_0_1++;
+ pi4_quant_round_factor_1_2++;
+ }
+
+ pi2_q_dst += dst_q_strd;
+ pi2_iq_dst += dst_iq_strd;
+ pi2_quant_coeff += trans_size;
+ pi2_coeffs += src_strd;
+ pi2_dequant_coeff += trans_size;
+ }
+ /* Store the cost */
+ *pi8_cost = ssd_cost;
+
+ /* CSBF update */
+ {
+ WORD32 block_row, block_col;
+ WORD32 row, col;
+ WORD16 *pi2_block;
+ UWORD32 temp_zero_col = 0;
+ UWORD32 temp_zero_row = 0;
+
+ pi2_q_dst = pi2_q_dst_orig;
+
+ for(block_row = 0; block_row < trans_size; block_row += 4)
+ {
+ //block_col is incrementing by 1 for easy update of csbf pointer
+ for(block_col = 0; block_col < trans_size / 4; block_col++)
+ {
+ pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
+ *(csbf + block_col) = 0;
+
+ for(row = 0; row < 4; row++)
+ {
+ for(col = 0; col < 4; col++)
+ {
+ if(pi2_block[row * dst_q_strd + col] != 0)
+ {
+ *(csbf + block_col) = 1;
+ break;
+ }
+ }
+ if(*(csbf + block_col) == 1)
+ {
+ /* zero_col update *//* temp_zero_col = ~zero_col */
+ temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
+ // zero col can be optimized further. Now clearing the
+ // entire 4 bits corresponding to 4 colums of 4x4 block
+ // even if any 4x4 csbf is set
+
+ /* zero row update */ /* temp_zero_row = ~zero_row */
+ temp_zero_row = (temp_zero_row) | (0xFU << block_row);
+ // zero row can be optimized further. Now clearing the
+ // entire 4 bits corresponding to 4 rows of 4x4 block
+ // even if any 4x4 csbf is set
+
+ break;
+ }
+ }
+
+ cbf = cbf || (*(csbf + block_col)); // cbf update
+ }
+ csbf += csbf_strd;
+ }
+
+ *zero_col = ~temp_zero_col; //final zero_col storing
+ *zero_row = ~temp_zero_row; //final zero_row storing
+ }
+
+ return cbf;
+}
+
+WORD32 ihevc_q_iq_var_rnd_fact
+ (
+ WORD16 *pi2_coeffs,
+ WORD16 *pi2_quant_coeff,
+ WORD16 *pi2_q_dst,
+ WORD16 *pi2_iq_dst,
+ WORD32 trans_size,
+ WORD32 qp_div,/* qpscaled / 6 */
+ WORD32 qp_rem,/* qpscaled % 6 */
+ WORD32 q_add,
+ WORD32 *pi4_quant_round_factor_0_1,
+ WORD32 *pi4_quant_round_factor_1_2,
+ WORD32 src_strd,
+ WORD32 dst_q_strd,
+ WORD32 dst_iq_strd,
+ UWORD8 *csbf,
+ WORD32 csbf_strd,
+ WORD32 *zero_col,
+ WORD32 *zero_row,
+ WORD16 *pi2_dequant_coeff,
+ LWORD64 *pi8_cost
+ )
+{
+ WORD32 i, j;
+ WORD32 log2_size;
+ WORD16 *pi2_q_dst_orig;
+ WORD32 cbf = 0;
+ WORD32 bit_depth,shift_iq;
+ WORD16 i2_temp;
+
+ (void)q_add;
+ (void)pi8_cost;
+ pi2_q_dst_orig = pi2_q_dst;
+
+ GETRANGE(log2_size, trans_size);
+ log2_size -= 1;
+
+ bit_depth = 8 + 0;
+ shift_iq = bit_depth + log2_size - 5;
+
+ for(i = 0; i < trans_size; i++)
+ {
+ for(j = 0; j < trans_size; j++)
+ {
+ i2_temp = pi2_coeffs[j];
+
+ {
+ QUANT(pi2_q_dst[j],i2_temp,
+ pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, 0);
+
+ if (abs(pi2_q_dst[j]) >= 2)
+ {
+ QUANT(pi2_q_dst[j],i2_temp,
+ pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
+ }
+ else if (abs(pi2_q_dst[j]) >= 1)
+ {
+ QUANT(pi2_q_dst[j],i2_temp,
+ pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, *pi4_quant_round_factor_1_2);
+ }
+ else
+ {
+ QUANT(pi2_q_dst[j],i2_temp,
+ pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, *pi4_quant_round_factor_0_1);
+ }
+ }
+
+ IQUANT(pi2_iq_dst[j],
+ pi2_q_dst[j], /*pi2_src[index*src_strd]*/
+ pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
+ shift_iq,
+ qp_div);
+
+ pi4_quant_round_factor_0_1++;
+ pi4_quant_round_factor_1_2++;
+ }
+
+ pi2_q_dst += dst_q_strd;
+ pi2_iq_dst += dst_iq_strd;
+ pi2_quant_coeff += trans_size;
+ pi2_coeffs += src_strd;
+ pi2_dequant_coeff += trans_size;
+ }
+
+ /* CSBF update */
+ {
+ WORD32 block_row, block_col;
+ WORD32 row, col;
+ WORD16 *pi2_block;
+ UWORD32 temp_zero_col = 0;
+ UWORD32 temp_zero_row = 0;
+
+ pi2_q_dst = pi2_q_dst_orig;
+
+ for(block_row = 0; block_row < trans_size; block_row += 4)
+ {
+ //block_col is incrementing by 1 for easy update of csbf pointer
+ for(block_col = 0; block_col < trans_size / 4; block_col++)
+ {
+ pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
+ *(csbf + block_col) = 0;
+
+ for(row = 0; row < 4; row++)
+ {
+ for(col = 0; col < 4; col++)
+ {
+ if(pi2_block[row * dst_q_strd + col] != 0)
+ {
+ *(csbf + block_col) = 1;
+ break;
+ }
+ }
+ if(*(csbf + block_col) == 1)
+ {
+ /* zero_col update *//* temp_zero_col = ~zero_col */
+ temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
+ // zero col can be optimized further. Now clearing the
+ // entire 4 bits corresponding to 4 colums of 4x4 block
+ // even if any 4x4 csbf is set
+
+ /* zero row update */ /* temp_zero_row = ~zero_row */
+ temp_zero_row = (temp_zero_row) | (0xFU << block_row);
+ // zero row can be optimized further. Now clearing the
+ // entire 4 bits corresponding to 4 rows of 4x4 block
+ // even if any 4x4 csbf is set
+
+ break;
+ }
+ }
+
+ cbf = cbf || (*(csbf + block_col)); // cbf update
+ }
+ csbf += csbf_strd;
+ }
+
+ *zero_col = ~temp_zero_col; //final zero_col storing
+ *zero_row = ~temp_zero_row; //final zero_row storing
+ }
+
+ return cbf;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function performs quantization(using flat scale matrix), followed by
+* inverse quantization to find transform domain SSD; when we perform RDOQ.
+* In case the quantized value turns out to be grater than 1, we then requantize
+* use half rounding.
+*
+* @par Description:
+* Performs quantization on coeffs
+*
+* @param[in] pi2_coeffs
+* 4x4 Coeffs
+*
+* @param[in] pi2_quant_coeff
+* Scaling Matrix
+*
+* @param[out] pi2_dst
+* Output 4x4 coefficients
+*
+* @param[in] qp_div
+* Quantization parameter / 6
+*
+* @param[in] qp_rem
+* Quantization parameter % 6
+*
+* @param[in] src_strd
+* Input stride
+*
+* @param[in] dst_strd
+* Output Stride
+*
+* @param[out] csbf
+* coded sub block flag
+*
+* @param[in] csbf_strd
+* coded sub block flag
+*
+* @param[out] zero_col
+* zero column flag
+*
+* @param[out] zero_row
+* zero column flag
+*
+* @returns cbf
+* coded block flag
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+WORD32 ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact
+ (
+ WORD16 *pi2_coeffs,
+ WORD16 *pi2_quant_coeff,
+ WORD16 *pi2_q_dst,
+ WORD16 *pi2_iq_dst,
+ WORD32 trans_size,
+ WORD32 qp_div,/* qpscaled / 6 */
+ WORD32 qp_rem,/* qpscaled % 6 */
+ WORD32 q_add,
+ WORD32 *pi4_quant_round_factor_0_1,
+ WORD32 *pi4_quant_round_factor_1_2,
+ WORD32 src_strd,
+ WORD32 dst_q_strd,
+ WORD32 dst_iq_strd,
+ UWORD8 *csbf,
+ WORD32 csbf_strd,
+ WORD32 *zero_col,
+ WORD32 *zero_row,
+ WORD16 *pi2_dequant_coeff,
+ LWORD64 *pi8_cost
+ )
+{
+ WORD32 i, j;
+ WORD32 log2_size;
+ WORD16 *pi2_q_dst_orig;
+ WORD32 cbf = 0;
+ WORD32 bit_depth,shift_iq;
+ WORD32 val;
+ WORD16 i2_temp;
+ /* Initialize cost to zero */
+ WORD32 ssd_cost = 0;
+
+ (void)q_add;
+ pi2_q_dst_orig = pi2_q_dst;
+
+ /* Quant initialization */
+ GETRANGE(log2_size, trans_size);
+ log2_size -= 1;
+
+ bit_depth = 8 + 0;
+ shift_iq = bit_depth + log2_size - 5;
+
+ for(i = 0; i < trans_size; i++)
+ {
+ for(j = 0; j < trans_size; j++)
+ {
+ WORD16 i2_temp1;
+ /* Back up the coefficients before Quantization */
+ i2_temp = pi2_coeffs[j];
+
+ /*QUANT(pi2_dst[j], pi2_coeffs[j],
+ pi2_quant_coeff[j] * g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, q_add);*/
+
+ /* modified by 1028 */
+ /* Quantization */
+
+
+ {
+ QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
+ g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, 0);
+
+ i2_temp1 = pi2_q_dst[j];
+
+ if (abs(pi2_q_dst[j]) >= 2)
+ {
+ QUANT_NO_WEIGHTMAT(pi2_q_dst[j], i2_temp,
+ g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
+ }
+ else if (abs(pi2_q_dst[j]) >= 1)
+ {
+ QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
+ g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, *pi4_quant_round_factor_1_2);
+ }
+
+ else
+ {
+ QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
+ g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, *pi4_quant_round_factor_0_1);
+ }
+
+ }
+
+
+
+
+ ASSERT(abs(i2_temp1-pi2_q_dst[j]) <= 1);
+
+
+ /* Inverse Quantization */
+ IQUANT(pi2_iq_dst[j],
+ pi2_q_dst[j], /*pi2_src[index*src_strd]*/
+ pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem], /*pi2_dequant_coeff[index*trans_size] * g_ihevc_iquant_scales[qp_rem] */
+ shift_iq,
+ qp_div);
+
+ /* SSD Computation & Accumulation */
+ val = i2_temp - pi2_iq_dst[j];
+ ssd_cost += val*val;
+
+ pi4_quant_round_factor_0_1++;
+ pi4_quant_round_factor_1_2++;
+ }
+
+ pi2_q_dst += dst_q_strd;
+ pi2_iq_dst += dst_iq_strd;
+ pi2_quant_coeff += trans_size;
+ pi2_coeffs += src_strd;
+ pi2_dequant_coeff += trans_size;
+
+ }
+ /* Store the cost */
+ *pi8_cost = ssd_cost;
+
+ /* CSBF update */
+ {
+ WORD32 block_row, block_col;
+ WORD32 row, col;
+ WORD16 *pi2_block;
+ UWORD32 temp_zero_col = 0;
+ UWORD32 temp_zero_row = 0;
+
+ pi2_q_dst = pi2_q_dst_orig;
+
+ for(block_row = 0; block_row < trans_size; block_row += 4)
+ {
+ //block_col is incrementing by 1 for easy update of csbf pointer
+ for(block_col = 0; block_col < trans_size / 4; block_col++)
+ {
+ pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
+ *(csbf + block_col) = 0;
+
+ for(row = 0; row < 4; row++)
+ {
+ for(col = 0; col < 4; col++)
+ {
+ if(pi2_block[row * dst_q_strd + col] != 0)
+ {
+ *(csbf + block_col) = 1;
+ break;
+ }
+ }
+ if(*(csbf + block_col) == 1)
+ {
+ /* zero_col update *//* temp_zero_col = ~zero_col */
+ temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
+ // zero col can be optimized further. Now clearing the
+ // entire 4 bits corresponding to 4 colums of 4x4 block
+ // even if any 4x4 csbf is set
+
+ /* zero row update */ /* temp_zero_row = ~zero_row */
+ temp_zero_row = (temp_zero_row) | (0xFU << block_row);
+ // zero row can be optimized further. Now clearing the
+ // entire 4 bits corresponding to 4 rows of 4x4 block
+ // even if any 4x4 csbf is set
+
+ break;
+ }
+ }
+
+ cbf = cbf || (*(csbf + block_col)); // cbf update
+ }
+ csbf += csbf_strd;
+ }
+
+ *zero_col = ~temp_zero_col; //final zero_col storing
+ *zero_row = ~temp_zero_row; //final zero_row storing
+ }
+ return cbf;
+}
+
+WORD32 ihevc_q_iq_flat_scale_mat_var_rnd_fact
+ (
+ WORD16 *pi2_coeffs,
+ WORD16 *pi2_quant_coeff,
+ WORD16 *pi2_q_dst,
+ WORD16 *pi2_iq_dst,
+ WORD32 trans_size,
+ WORD32 qp_div,/* qpscaled / 6 */
+ WORD32 qp_rem,/* qpscaled % 6 */
+ WORD32 q_add,
+ WORD32 *pi4_quant_round_factor_0_1,
+ WORD32 *pi4_quant_round_factor_1_2,
+ WORD32 src_strd,
+ WORD32 dst_q_strd,
+ WORD32 dst_iq_strd,
+ UWORD8 *csbf,
+ WORD32 csbf_strd,
+ WORD32 *zero_col,
+ WORD32 *zero_row,
+ WORD16 *pi2_dequant_coeff,
+ LWORD64 *pi8_cost
+ )
+{
+ WORD32 i, j;
+ WORD32 log2_size;
+ WORD16 *pi2_q_dst_orig;
+ WORD32 cbf = 0;
+ WORD32 bit_depth,shift_iq;
+ WORD16 i2_temp;
+
+ (void)q_add;
+ (void)pi8_cost;
+ pi2_q_dst_orig = pi2_q_dst;
+
+ GETRANGE(log2_size, trans_size);
+ log2_size -= 1;
+
+ bit_depth = 8 + 0;
+ shift_iq = bit_depth + log2_size - 5;
+
+ for(i = 0; i < trans_size; i++)
+ {
+ for(j = 0; j < trans_size; j++)
+ {
+ WORD16 i2_temp1;
+
+ i2_temp = pi2_coeffs[j];
+
+ {
+ QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
+ g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, 0);
+
+ i2_temp1 = pi2_q_dst[j];
+
+ if (abs(pi2_q_dst[j]) >= 2)
+ {
+ QUANT_NO_WEIGHTMAT(pi2_q_dst[j], i2_temp,
+ g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, ((1 << QUANT_ROUND_FACTOR_Q)/2));
+ }
+ else if (abs(pi2_q_dst[j]) >= 1)
+ {
+ QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
+ g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, *pi4_quant_round_factor_1_2);
+ }
+ else
+ {
+ QUANT_NO_WEIGHTMAT(pi2_q_dst[j], pi2_coeffs[j],
+ g_ihevc_quant_scales[qp_rem], qp_div,
+ log2_size, *pi4_quant_round_factor_0_1);
+ }
+ }
+
+ ASSERT(abs(i2_temp1-pi2_q_dst[j]) <= 1);
+
+ IQUANT(pi2_iq_dst[j],
+ pi2_q_dst[j], /*pi2_src[index*src_strd]*/
+ pi2_dequant_coeff[j]*g_ihevc_iquant_scales[qp_rem],
+ shift_iq,
+ qp_div);
+
+ pi4_quant_round_factor_0_1++;
+ pi4_quant_round_factor_1_2++;
+ }
+
+ pi2_q_dst += dst_q_strd;
+ pi2_iq_dst += dst_iq_strd;
+ pi2_quant_coeff += trans_size;
+ pi2_coeffs += src_strd;
+ pi2_dequant_coeff += trans_size;
+
+ }
+
+ /* CSBF update */
+ {
+ WORD32 block_row, block_col;
+ WORD32 row, col;
+ WORD16 *pi2_block;
+ UWORD32 temp_zero_col = 0;
+ UWORD32 temp_zero_row = 0;
+
+ pi2_q_dst = pi2_q_dst_orig;
+
+ for(block_row = 0; block_row < trans_size; block_row += 4)
+ {
+ //block_col is incrementing by 1 for easy update of csbf pointer
+ for(block_col = 0; block_col < trans_size / 4; block_col++)
+ {
+ pi2_block = pi2_q_dst + block_row * dst_q_strd + block_col * 4;
+ *(csbf + block_col) = 0;
+
+ for(row = 0; row < 4; row++)
+ {
+ for(col = 0; col < 4; col++)
+ {
+ if(pi2_block[row * dst_q_strd + col] != 0)
+ {
+ *(csbf + block_col) = 1;
+ break;
+ }
+ }
+ if(*(csbf + block_col) == 1)
+ {
+ /* zero_col update *//* temp_zero_col = ~zero_col */
+ temp_zero_col = (temp_zero_col) | (0xFU << block_col * 4);
+ // zero col can be optimized further. Now clearing the
+ // entire 4 bits corresponding to 4 colums of 4x4 block
+ // even if any 4x4 csbf is set
+
+ /* zero row update */ /* temp_zero_row = ~zero_row */
+ temp_zero_row = (temp_zero_row) | (0xFU << block_row);
+ // zero row can be optimized further. Now clearing the
+ // entire 4 bits corresponding to 4 rows of 4x4 block
+ // even if any 4x4 csbf is set
+
+ break;
+ }
+ }
+
+ cbf = cbf || (*(csbf + block_col)); // cbf update
+ }
+ csbf += csbf_strd;
+ }
+
+ *zero_col = ~temp_zero_col; //final zero_col storing
+ *zero_row = ~temp_zero_row; //final zero_row storing
+ }
+ return cbf;
+}
diff --git a/common/ihevc_quant_iquant_ssd.h b/common/ihevc_quant_iquant_ssd.h
new file mode 100644
index 0000000..3129a4a
--- /dev/null
+++ b/common/ihevc_quant_iquant_ssd.h
@@ -0,0 +1,187 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevc_quant_iquant_ssd.h
+*
+* @brief
+* Functions declarations for quantization, followed by Inverse
+* quantization to find transform domain SSD
+*
+* @author
+* Ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+
+#ifndef _IHEVC_QUANT_IQUANT_SSD_H_
+#define _IHEVC_QUANT_IQUANT_SSD_H_
+
+typedef WORD32 ihevc_quant_iquant_ssd_ft
+ (
+ WORD16 *pi2_coeffs,
+ WORD16 *pi2_quant_coeff,
+ WORD16 *pi2_q_dst,
+ WORD16 *pi2_iq_dst,
+ WORD32 trans_size,
+ WORD32 qp_div,/* qpscaled / 6 */
+ WORD32 qp_rem,/* qpscaled % 6 */
+ WORD32 q_add,
+ WORD32 *pi4_quant_round_factor_0_1,
+ WORD32 *pi4_quant_round_factor_1_2,
+ WORD32 src_strd,
+ WORD32 dst_q_strd,
+ WORD32 dst_iq_strd,
+ UWORD8 *csbf,
+ WORD32 csbf_strd,
+ WORD32 *zero_col,
+ WORD32 *zero_row,
+ WORD16 *pi2_dequant_coeff,
+ LWORD64 *pi8_cost
+ );
+
+typedef ihevc_quant_iquant_ssd_ft ihevc_quant_iquant_ssd_rdoq_ft;
+
+typedef ihevc_quant_iquant_ssd_ft ihevc_quant_iquant_ssd_flat_scale_mat_ft;
+
+typedef ihevc_quant_iquant_ssd_ft ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_ft;
+
+typedef ihevc_quant_iquant_ssd_ft ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_ft;
+
+typedef ihevc_quant_iquant_ssd_ft ihevc_q_iq_ssd_var_rnd_fact_ft;
+
+typedef WORD32 ihevc_hbd_quant_iquant_ssd_ft
+ (
+ WORD16 *pi2_coeffs,
+ WORD16 *pi2_quant_coeff,
+ WORD16 *pi2_q_dst,
+ WORD16 *pi2_iq_dst,
+ WORD32 trans_size,
+ WORD32 qp_div,/* qpscaled / 6 */
+ WORD32 qp_rem,/* qpscaled % 6 */
+ WORD32 q_add,
+ WORD32 *pi4_quant_round_factor_0_1,
+ WORD32 *pi4_quant_round_factor_1_2,
+ WORD32 src_strd,
+ WORD32 dst_q_strd,
+ WORD32 dst_iq_strd,
+ UWORD8 *csbf,
+ WORD32 csbf_strd,
+ WORD32 *zero_col,
+ WORD32 *zero_row,
+ WORD16 *pi2_dequant_coeff,
+ LWORD64 *pi8_cost,
+ WORD32 i4_bit_depth
+ );
+
+
+typedef ihevc_hbd_quant_iquant_ssd_ft ihevc_hbd_quant_iquant_ssd_rdoq_ft;
+
+typedef ihevc_hbd_quant_iquant_ssd_ft ihevc_hbd_quant_iquant_ssd_flat_scale_mat_ft;
+
+typedef ihevc_hbd_quant_iquant_ssd_ft ihevc_hbd_quant_iquant_ssd_flat_scale_mat_rdoq_ft;
+
+typedef ihevc_hbd_quant_iquant_ssd_ft ihevc_hbd_q_iq_ssd_flat_scale_mat_var_rnd_fact_ft;
+
+typedef ihevc_hbd_quant_iquant_ssd_ft ihevc_hbd_q_iq_ssd_var_rnd_fact_ft;
+
+/* C function declarations */
+ihevc_quant_iquant_ssd_ft ihevc_quant_iquant_ssd;
+ihevc_quant_iquant_ssd_rdoq_ft ihevc_quant_iquant_ssd_rdoq;
+ihevc_quant_iquant_ssd_flat_scale_mat_ft ihevc_quant_iquant_ssd_flat_scale_mat;
+ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_ft ihevc_quant_iquant_ssd_flat_scale_mat_rdoq;
+ihevc_q_iq_ssd_var_rnd_fact_ft ihevc_q_iq_ssd_var_rnd_fact;
+ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_ft ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact;
+
+ihevc_hbd_quant_iquant_ssd_ft ihevc_hbd_quant_iquant_ssd;
+ihevc_hbd_quant_iquant_ssd_rdoq_ft ihevc_hbd_quant_iquant_ssd_rdoq;
+ihevc_hbd_quant_iquant_ssd_flat_scale_mat_ft ihevc_hbd_quant_iquant_ssd_flat_scale_mat;
+ihevc_hbd_quant_iquant_ssd_flat_scale_mat_rdoq_ft ihevc_hbd_quant_iquant_ssd_flat_scale_mat_rdoq;
+ihevc_hbd_q_iq_ssd_var_rnd_fact_ft ihevc_hbd_q_iq_ssd_var_rnd_fact;
+ihevc_hbd_q_iq_ssd_flat_scale_mat_var_rnd_fact_ft ihevc_hbd_q_iq_ssd_flat_scale_mat_var_rnd_fact;
+
+ihevc_quant_iquant_ssd_ft ihevc_quant_iquant;
+ihevc_quant_iquant_ssd_rdoq_ft ihevc_quant_iquant_rdoq;
+ihevc_quant_iquant_ssd_flat_scale_mat_ft ihevc_quant_iquant_flat_scale_mat;
+ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_ft ihevc_quant_iquant_flat_scale_mat_rdoq;
+ihevc_q_iq_ssd_var_rnd_fact_ft ihevc_q_iq_var_rnd_fact;
+ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_ft ihevc_q_iq_flat_scale_mat_var_rnd_fact;
+
+ihevc_hbd_quant_iquant_ssd_ft ihevc_hbd_quant_iquant;
+ihevc_hbd_quant_iquant_ssd_rdoq_ft ihevc_hbd_quant_iquant_rdoq;
+ihevc_hbd_quant_iquant_ssd_flat_scale_mat_ft ihevc_hbd_quant_iquant_flat_scale_mat;
+ihevc_hbd_quant_iquant_ssd_flat_scale_mat_rdoq_ft ihevc_hbd_quant_iquant_flat_scale_mat_rdoq;
+ihevc_hbd_q_iq_ssd_var_rnd_fact_ft ihevc_hbd_q_iq_var_rnd_fact;
+ihevc_hbd_q_iq_ssd_flat_scale_mat_var_rnd_fact_ft ihevc_hbd_q_iq_flat_scale_mat_var_rnd_fact;
+
+/* SSE42 function declarations */
+ihevc_quant_iquant_ssd_flat_scale_mat_ft ihevc_quant_iquant_ssd_flat_scale_mat_sse42;
+ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_ft ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_sse42;
+ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_ft ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_sse42;
+
+ihevc_quant_iquant_ssd_flat_scale_mat_ft ihevc_quant_iquant_flat_scale_mat_sse42;
+ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_ft ihevc_quant_iquant_flat_scale_mat_rdoq_sse42;
+ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_ft ihevc_q_iq_flat_scale_mat_var_rnd_fact_sse42;
+
+ihevc_hbd_quant_iquant_ssd_flat_scale_mat_ft ihevc_hbd_quant_iquant_ssd_flat_scale_mat_sse42;
+ihevc_hbd_quant_iquant_ssd_flat_scale_mat_rdoq_ft ihevc_hbd_quant_iquant_ssd_flat_scale_mat_rdoq_sse42;
+ihevc_hbd_q_iq_ssd_flat_scale_mat_var_rnd_fact_ft ihevc_hbd_q_iq_ssd_flat_scale_mat_var_rnd_fact_sse42;
+
+ihevc_hbd_quant_iquant_ssd_flat_scale_mat_ft ihevc_hbd_quant_iquant_flat_scale_mat_sse42;
+ihevc_hbd_quant_iquant_ssd_flat_scale_mat_rdoq_ft ihevc_hbd_quant_iquant_flat_scale_mat_rdoq_sse42;
+ihevc_hbd_q_iq_ssd_flat_scale_mat_var_rnd_fact_ft ihevc_hbd_q_iq_flat_scale_mat_var_rnd_fact_sse42;
+
+/* AVX function declarations */
+ihevc_quant_iquant_ssd_flat_scale_mat_ft ihevc_quant_iquant_ssd_flat_scale_mat_avx;
+ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_ft ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_avx;
+ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_ft ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_avx;
+ihevc_quant_iquant_ssd_flat_scale_mat_ft ihevc_quant_iquant_flat_scale_mat_avx;
+ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_ft ihevc_quant_iquant_flat_scale_mat_rdoq_avx;
+ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_ft ihevc_q_iq_flat_scale_mat_var_rnd_fact_avx;
+
+ihevc_hbd_quant_iquant_ssd_flat_scale_mat_ft ihevc_hbd_quant_iquant_ssd_flat_scale_mat_avx;
+ihevc_hbd_quant_iquant_ssd_flat_scale_mat_rdoq_ft ihevc_hbd_quant_iquant_ssd_flat_scale_mat_rdoq_avx;
+ihevc_hbd_q_iq_ssd_flat_scale_mat_var_rnd_fact_ft ihevc_hbd_q_iq_ssd_flat_scale_mat_var_rnd_fact_avx;
+
+ihevc_hbd_quant_iquant_ssd_flat_scale_mat_ft ihevc_hbd_quant_iquant_flat_scale_mat_avx;
+ihevc_hbd_quant_iquant_ssd_flat_scale_mat_rdoq_ft ihevc_hbd_quant_iquant_flat_scale_mat_rdoq_avx;
+ihevc_hbd_q_iq_ssd_flat_scale_mat_var_rnd_fact_ft ihevc_hbd_q_iq_flat_scale_mat_var_rnd_fact_avx;
+
+#ifndef DISABLE_AVX2
+/* AVX2 function declarations */
+ihevc_quant_iquant_ssd_flat_scale_mat_ft ihevc_quant_iquant_ssd_flat_scale_mat_avx2;
+ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_ft ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_avx2;
+
+ihevc_quant_iquant_ssd_flat_scale_mat_ft ihevc_quant_iquant_flat_scale_mat_avx2;
+ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_ft ihevc_quant_iquant_flat_scale_mat_rdoq_avx2;
+
+ihevc_hbd_quant_iquant_ssd_flat_scale_mat_ft ihevc_hbd_quant_iquant_ssd_flat_scale_mat_avx2;
+#endif
+
+/* Neon function declarations */
+ihevc_quant_iquant_ssd_flat_scale_mat_ft ihevc_quant_iquant_ssd_flat_scale_mat_neon;
+ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_ft ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_neon;
+
+#endif /*_IHEVC_QUANT_IQUANT_SSD_H_*/
diff --git a/common/ihevc_resi_trans.c b/common/ihevc_resi_trans.c
new file mode 100644
index 0000000..e1537cc
--- /dev/null
+++ b/common/ihevc_resi_trans.c
@@ -0,0 +1,1462 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ihevc_resi_trans.c
+ *
+ * @brief
+ * Contains function definitions for residual and forward transform
+ *
+ * @author
+ * 100470
+ *
+ * @par List of Functions:
+ * - ihevc_resi_trans_4x4_ttype1()
+ * - ihevc_resi_trans_4x4()
+ * - ihevc_resi_trans_8x8()
+ * - ihevc_resi_trans_16x16()
+ * - ihevc_resi_trans_32x32()
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include "ihevc_typedefs.h"
+#include "ihevc_macros.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_defs.h"
+#include "ihevc_trans_tables.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_func_selector.h"
+#include "ihevc_trans_macros.h"
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs residue calculation and forward transform type 1
+ * on input pixels
+ *
+ * @par Description:
+ * Performs residue calculation by subtracting source and prediction and
+ * followed by forward transform
+ *
+ * @param[in] pu1_src
+ * Input 4x4 pixels
+ *
+ * @param[in] pu1_pred
+ * Prediction data
+ *
+ * @param[in] pi2_tmp
+ * Temporary buffer of size 4x4
+ *
+ * @param[out] pi2_dst
+ * Output 4x4 coefficients
+ *
+ * @param[in] src_strd
+ * Input stride
+ *
+ * @param[in] pred_strd
+ * Prediction Stride
+ *
+ * @param[in] dst_strd_chr_flag
+ * Output Stride and Chroma Flag packed in the MS and LS 16-bit
+ *
+ *
+ * @returns Void
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+UWORD32 ihevc_resi_trans_4x4_ttype1(UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD32 *pi4_temp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd_chr_flag)
+{
+ WORD32 i, c[4];
+ WORD32 add, shift;
+ WORD32 trans_size;
+ WORD32 *pi4_tmp_orig;
+ WORD16 *pi2_dst_orig;
+ UWORD32 u4_blk_sad = 0;
+ // WORD32 chroma_flag;
+ WORD32 dst_strd;
+
+ // chroma_flag = dst_strd_chr_flag & 1;
+ dst_strd = dst_strd_chr_flag >> 16;
+
+ pi2_dst_orig = pi2_dst;
+ pi4_tmp_orig = pi4_temp;
+ trans_size = TRANS_SIZE_4;
+
+ /* Residue + Forward Transform 1st stage */
+ shift = 1; // log2(iWidth) - 1 + g_uiBitIncrement
+ add = 1 << (shift - 1);
+
+ for(i = 0; i < trans_size; i++)
+ {
+ WORD32 resi_tmp_1, resi_tmp_2, resi_tmp_3;
+
+ // Intermediate Variables
+ resi_tmp_1 = pu1_src[0] - pu1_pred[0];
+ resi_tmp_2 = pu1_src[3] - pu1_pred[3];
+ c[0] = resi_tmp_1 + resi_tmp_2;
+ u4_blk_sad += abs(resi_tmp_1) + abs(resi_tmp_2);
+
+ resi_tmp_1 = pu1_src[1] - pu1_pred[1];
+ resi_tmp_2 = pu1_src[3] - pu1_pred[3];
+ c[1] = resi_tmp_1 + resi_tmp_2;
+ u4_blk_sad += abs(resi_tmp_1);
+
+ resi_tmp_1 = pu1_src[0] - pu1_pred[0];
+ resi_tmp_2 = pu1_src[1] - pu1_pred[1];
+ c[2] = resi_tmp_1 - resi_tmp_2;
+
+ resi_tmp_1 = pu1_src[2] - pu1_pred[2];
+ c[3] = 74 * resi_tmp_1;
+ u4_blk_sad += abs(resi_tmp_1);
+
+ pi4_temp[0] = (29 * c[0] + 55 * c[1] + c[3] + add) >> shift;
+
+ resi_tmp_1 = pu1_src[0] - pu1_pred[0];
+ resi_tmp_2 = pu1_src[1] - pu1_pred[1];
+ resi_tmp_3 = pu1_src[3] - pu1_pred[3];
+ pi4_temp[trans_size] =
+ (74 * (resi_tmp_1 + resi_tmp_2 - resi_tmp_3) + add)
+ >> shift;
+ pi4_temp[2 * trans_size] = (29 * c[2] + 55 * c[0] - c[3] + add) >> shift;
+ pi4_temp[3 * trans_size] = (55 * c[2] - 29 * c[1] + c[3] + add) >> shift;
+
+ pu1_src += src_strd;
+ pu1_pred += pred_strd;
+ pi4_temp++;
+ }
+
+ pi4_temp = pi4_tmp_orig;
+
+ /* Forward transform 2nd stage */
+ shift = 8; // log2(iHeight) + 6
+ add = 1 << (shift - 1);
+
+ for(i = 0; i < TRANS_SIZE_4; i++)
+ {
+ // Intermediate Variables
+ c[0] = pi4_temp[0] + pi4_temp[3];
+ c[1] = pi4_temp[1] + pi4_temp[3];
+ c[2] = pi4_temp[0] - pi4_temp[1];
+ c[3] = 74 * pi4_temp[2];
+
+ pi2_dst[0] = (29 * c[0] + 55 * c[1] + c[3] + add) >> shift;
+ pi2_dst[dst_strd] = (74 * (pi4_temp[0] + pi4_temp[1] - pi4_temp[3]) + add)
+ >> shift;
+ pi2_dst[2 * dst_strd] = (29 * c[2] + 55 * c[0] - c[3] + add) >> shift;
+ pi2_dst[3 * dst_strd] = (55 * c[2] - 29 * c[1] + c[3] + add) >> shift;
+
+ pi4_temp += trans_size;
+ pi2_dst++;
+ }
+
+ return u4_blk_sad;
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs residue calculation and forward transform on
+ * input pixels
+ *
+ * @par Description:
+ * Performs residue calculation by subtracting source and prediction and
+ * followed by forward transform
+ *
+ * @param[in] pu1_src
+ * Input 4x4 pixels
+ *
+ * @param[in] pu1_pred
+ * Prediction data
+ *
+ * @param[in] pi2_tmp
+ * Temporary buffer of size 4x4
+ *
+ * @param[out] pi2_dst
+ * Output 4x4 coefficients
+ *
+ * @param[in] src_strd
+ * Input stride
+ *
+ * @param[in] pred_strd
+ * Prediction Stride
+ *
+ * @param[in] dst_strd_chr_flag
+ * Output Stride and Chroma Flag packed in the MS and LS 16-bit
+ *
+ * @returns Void
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+UWORD32 ihevc_resi_trans_4x4(UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD32 *pi4_temp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd_chr_flag)
+{
+ WORD32 i;
+ WORD32 e[2], o[2];
+ WORD32 add, shift;
+ WORD32 trans_size;
+ WORD32 *pi4_tmp_orig;
+ WORD16 *pi2_dst_orig;
+ UWORD32 u4_blk_sad=0;
+ WORD32 chroma_flag;
+ WORD32 dst_strd;
+
+ chroma_flag = dst_strd_chr_flag & 1;
+ dst_strd = dst_strd_chr_flag >> 16;
+
+ pi2_dst_orig = pi2_dst;
+ pi4_tmp_orig = pi4_temp;
+ trans_size = TRANS_SIZE_4;
+
+ /* Residue + Forward Transform 1st stage */
+ shift = 1; // log2(iWidth) - 1 + g_uiBitIncrement
+ add = 1 << (shift - 1);
+
+ for(i = 0; i < trans_size; i++)
+ {
+ WORD32 resi_tmp_1, resi_tmp_2;
+
+ /* e and o */
+ resi_tmp_1 = pu1_src[0 + 0*chroma_flag] - pu1_pred[0 + 0*chroma_flag];
+ resi_tmp_2 = pu1_src[3 + 3*chroma_flag] - pu1_pred[3 + 3*chroma_flag];
+ e[0] = resi_tmp_1 + resi_tmp_2;
+ o[0] = resi_tmp_1 - resi_tmp_2;
+ u4_blk_sad += abs(resi_tmp_1);
+ u4_blk_sad += abs(resi_tmp_2);
+
+ resi_tmp_1 = pu1_src[1 + 1*chroma_flag] - pu1_pred[1 + 1*chroma_flag];
+ resi_tmp_2 = pu1_src[2 + 2*chroma_flag] - pu1_pred[2 + 2*chroma_flag];
+ e[1] = resi_tmp_1 + resi_tmp_2;
+ o[1] = resi_tmp_1 - resi_tmp_2;
+ u4_blk_sad += abs(resi_tmp_1);
+ u4_blk_sad += abs(resi_tmp_2);
+
+ pi4_temp[0] = (g_ai2_ihevc_trans_4[0][0] * e[0]
+ + g_ai2_ihevc_trans_4[0][1] * e[1]);// + add) >> shift;
+ pi4_temp[2 * trans_size] = (g_ai2_ihevc_trans_4[2][0] * e[0]
+ + g_ai2_ihevc_trans_4[2][1] * e[1]);// + add) >> shift;
+ pi4_temp[trans_size] = (g_ai2_ihevc_trans_4[1][0] * o[0]
+ + g_ai2_ihevc_trans_4[1][1] * o[1]);// + add) >> shift;
+ pi4_temp[3 * trans_size] = (g_ai2_ihevc_trans_4[3][0] * o[0]
+ + g_ai2_ihevc_trans_4[3][1] * o[1]);// + add) >> shift;
+
+ pu1_src += src_strd;
+ pu1_pred += pred_strd;
+ pi4_temp++;
+ }
+
+ pi4_temp = pi4_tmp_orig;
+ /* Forward Transform 2nd stage */
+ shift = 9; // log2(iHeight) + 6
+ add = 1 << (shift - 1);
+
+ for(i = 0; i < trans_size; i++)
+ {
+
+ /* e and o */
+ e[0] = pi4_temp[0] + pi4_temp[3];
+ o[0] = pi4_temp[0] - pi4_temp[3];
+ e[1] = pi4_temp[1] + pi4_temp[2];
+ o[1] = pi4_temp[1] - pi4_temp[2];
+
+ pi2_dst[0] = (g_ai2_ihevc_trans_4[0][0] * e[0]
+ + g_ai2_ihevc_trans_4[0][1] * e[1] + add) >> shift;
+ pi2_dst[2 * dst_strd] = (g_ai2_ihevc_trans_4[2][0] * e[0]
+ + g_ai2_ihevc_trans_4[2][1] * e[1] + add) >> shift;
+ pi2_dst[dst_strd] = (g_ai2_ihevc_trans_4[1][0] * o[0]
+ + g_ai2_ihevc_trans_4[1][1] * o[1] + add) >> shift;
+ pi2_dst[3 * dst_strd] = (g_ai2_ihevc_trans_4[3][0] * o[0]
+ + g_ai2_ihevc_trans_4[3][1] * o[1] + add) >> shift;
+
+ pi4_temp += trans_size;
+ pi2_dst++;
+ }
+
+ return u4_blk_sad;
+}
+
+void ihevc_resi_trans_4x4_16bit(WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ WORD16 *pi2_tmp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd)
+{
+ WORD32 i;
+ WORD32 e[2], o[2];
+ WORD32 add, shift;
+ WORD32 trans_size;
+ WORD16 *pi2_tmp_orig;
+ WORD16 *pi2_dst_orig;
+
+ pi2_dst_orig = pi2_dst;
+ pi2_tmp_orig = pi2_tmp;
+ trans_size = TRANS_SIZE_4;
+
+ /* Residue + Forward Transform 1st stage */
+ shift = 1; // log2(iWidth) - 1 + g_uiBitIncrement
+ add = 1 << (shift - 1);
+
+ for(i = 0; i < trans_size; i++)
+ {
+ WORD32 resi_tmp_1, resi_tmp_2;
+
+ /* e and o */
+ resi_tmp_1 = pi2_src[0] - pu1_pred[0];
+ resi_tmp_2 = pi2_src[3] - pu1_pred[3];
+ e[0] = resi_tmp_1 + resi_tmp_2;
+ o[0] = resi_tmp_1 - resi_tmp_2;
+
+ resi_tmp_1 = pi2_src[1] - pu1_pred[1];
+ resi_tmp_2 = pi2_src[2] - pu1_pred[2];
+ e[1] = resi_tmp_1 + resi_tmp_2;
+ o[1] = resi_tmp_1 - resi_tmp_2;
+
+ pi2_tmp[0] = (g_ai2_ihevc_trans_4[0][0] * e[0]
+ + g_ai2_ihevc_trans_4[0][1] * e[1] + add) >> shift;
+ pi2_tmp[2 * trans_size] = (g_ai2_ihevc_trans_4[2][0] * e[0]
+ + g_ai2_ihevc_trans_4[2][1] * e[1] + add) >> shift;
+ pi2_tmp[trans_size] = (g_ai2_ihevc_trans_4[1][0] * o[0]
+ + g_ai2_ihevc_trans_4[1][1] * o[1] + add) >> shift;
+ pi2_tmp[3 * trans_size] = (g_ai2_ihevc_trans_4[3][0] * o[0]
+ + g_ai2_ihevc_trans_4[3][1] * o[1] + add) >> shift;
+
+ pi2_src += src_strd;
+ pu1_pred += pred_strd;
+ pi2_tmp++;
+ }
+
+ pi2_tmp = pi2_tmp_orig;
+ /* Forward Transform 2nd stage */
+ shift = 8; // log2(iHeight) + 6
+ add = 1 << (shift - 1);
+
+ for(i = 0; i < trans_size; i++)
+ {
+
+ /* e and o */
+ e[0] = pi2_tmp[0] + pi2_tmp[3];
+ o[0] = pi2_tmp[0] - pi2_tmp[3];
+ e[1] = pi2_tmp[1] + pi2_tmp[2];
+ o[1] = pi2_tmp[1] - pi2_tmp[2];
+
+ pi2_dst[0] = (g_ai2_ihevc_trans_4[0][0] * e[0]
+ + g_ai2_ihevc_trans_4[0][1] * e[1] + add) >> shift;
+ pi2_dst[2 * dst_strd] = (g_ai2_ihevc_trans_4[2][0] * e[0]
+ + g_ai2_ihevc_trans_4[2][1] * e[1] + add) >> shift;
+ pi2_dst[dst_strd] = (g_ai2_ihevc_trans_4[1][0] * o[0]
+ + g_ai2_ihevc_trans_4[1][1] * o[1] + add) >> shift;
+ pi2_dst[3 * dst_strd] = (g_ai2_ihevc_trans_4[3][0] * o[0]
+ + g_ai2_ihevc_trans_4[3][1] * o[1] + add) >> shift;
+
+ pi2_tmp += trans_size;
+ pi2_dst++;
+ }
+}
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs residue calculation and forward transform on
+ * input pixels
+ *
+ * @par Description:
+ * Performs residue calculation by subtracting source and prediction and
+ * followed by forward transform
+ *
+ * @param[in] pu1_src
+ * Input 8x8 pixels
+ *
+ * @param[in] pu1_pred
+ * Prediction data
+ *
+ * @param[in] pi2_tmp
+ * Temporary buffer of size 8x8
+ *
+ * @param[out] pi2_dst
+ * Output 8x8 coefficients
+ *
+ * @param[in] src_strd
+ * Input stride
+ *
+ * @param[in] pred_strd
+ * Prediction Stride
+ *
+ * @param[in] dst_strd_chr_flag
+ * Output Stride and Chroma Flag packed in the MS and LS 16-bit
+ *
+ * @returns Void
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+UWORD32 ihevc_resi_trans_8x8(UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD32 *pi4_temp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd_chr_flag)
+{
+ WORD32 i, k;
+ WORD32 e[4], o[4];
+ WORD32 ee[2], eo[2];
+ WORD32 add, shift;
+ WORD32 trans_size;
+ WORD32 *pi4_tmp_orig;
+// WORD16 *pi2_tmp;
+ WORD16 *pi2_dst_orig;
+ UWORD32 u4_blk_sad=0;
+ WORD32 chroma_flag;
+ WORD32 dst_strd;
+
+ chroma_flag = dst_strd_chr_flag & 1;
+ dst_strd = dst_strd_chr_flag >> 16;
+
+ pi2_dst_orig = pi2_dst;
+ pi4_tmp_orig = pi4_temp;
+ trans_size = TRANS_SIZE_8;
+ /* Residue + Forward Transform 1st stage */
+ shift = 2; // log2(iWidth) - 1 + g_uiBitIncrement
+ add = 1 << (shift - 1);
+
+ for(i = 0; i < trans_size; i++)
+ {
+ WORD32 resi_tmp_1, resi_tmp_2;
+
+ /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
+ /* e and o*/
+ for(k = 0; k < 4; k++)
+ {
+ resi_tmp_1 = pu1_src[k*(1+chroma_flag)] - pu1_pred[k*(1+chroma_flag)];
+ resi_tmp_2 = pu1_src[(7-k)*(1+chroma_flag)] - pu1_pred[(7-k)*(1+chroma_flag)];
+ e[k] = resi_tmp_1 + resi_tmp_2;
+ o[k] = resi_tmp_1 - resi_tmp_2;
+ u4_blk_sad += abs(resi_tmp_1) + abs(resi_tmp_2);
+ }
+ /* ee and eo */
+ ee[0] = e[0] + e[3];
+ eo[0] = e[0] - e[3];
+ ee[1] = e[1] + e[2];
+ eo[1] = e[1] - e[2];
+
+ pi4_temp[0] = (g_ai2_ihevc_trans_8[0][0] * ee[0]
+ + g_ai2_ihevc_trans_8[0][1] * ee[1]);// + add) >> shift;
+ pi4_temp[4 * trans_size] = (g_ai2_ihevc_trans_8[4][0] * ee[0]
+ + g_ai2_ihevc_trans_8[4][1] * ee[1]);// + add) >> shift;
+ pi4_temp[2 * trans_size] = (g_ai2_ihevc_trans_8[2][0] * eo[0]
+ + g_ai2_ihevc_trans_8[2][1] * eo[1]);// + add) >> shift;
+ pi4_temp[6 * trans_size] = (g_ai2_ihevc_trans_8[6][0] * eo[0]
+ + g_ai2_ihevc_trans_8[6][1] * eo[1]);// + add) >> shift;
+
+ pi4_temp[trans_size] = (g_ai2_ihevc_trans_8[1][0] * o[0]
+ + g_ai2_ihevc_trans_8[1][1] * o[1]
+ + g_ai2_ihevc_trans_8[1][2] * o[2]
+ + g_ai2_ihevc_trans_8[1][3] * o[3]);// + add) >> shift;
+ pi4_temp[3 * trans_size] = (g_ai2_ihevc_trans_8[3][0] * o[0]
+ + g_ai2_ihevc_trans_8[3][1] * o[1]
+ + g_ai2_ihevc_trans_8[3][2] * o[2]
+ + g_ai2_ihevc_trans_8[3][3] * o[3]);// + add) >> shift;
+ pi4_temp[5 * trans_size] = (g_ai2_ihevc_trans_8[5][0] * o[0]
+ + g_ai2_ihevc_trans_8[5][1] * o[1]
+ + g_ai2_ihevc_trans_8[5][2] * o[2]
+ + g_ai2_ihevc_trans_8[5][3] * o[3]);// + add) >> shift;
+ pi4_temp[7 * trans_size] = (g_ai2_ihevc_trans_8[7][0] * o[0]
+ + g_ai2_ihevc_trans_8[7][1] * o[1]
+ + g_ai2_ihevc_trans_8[7][2] * o[2]
+ + g_ai2_ihevc_trans_8[7][3] * o[3]);// + add) >> shift;
+
+ pu1_src += src_strd;
+ pu1_pred += pred_strd;
+ pi4_temp++;
+ }
+
+ pi4_temp = pi4_tmp_orig;
+ /* Forward Transform 2nd stage */
+ shift = 11; // log2(iHeight) + 6
+ add = 1 << (shift - 1);
+
+ for(i = 0; i < trans_size; i++)
+ {
+ /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
+ /* e and o*/
+ for(k = 0; k < 4; k++)
+ {
+ e[k] = pi4_temp[k] + pi4_temp[7 - k];
+ o[k] = pi4_temp[k] - pi4_temp[7 - k];
+ }
+ /* ee and eo */
+ ee[0] = e[0] + e[3];
+ eo[0] = e[0] - e[3];
+ ee[1] = e[1] + e[2];
+ eo[1] = e[1] - e[2];
+
+ pi2_dst[0] = (g_ai2_ihevc_trans_8[0][0] * ee[0]
+ + g_ai2_ihevc_trans_8[0][1] * ee[1] + add) >> shift;
+ pi2_dst[4 * dst_strd] = (g_ai2_ihevc_trans_8[4][0] * ee[0]
+ + g_ai2_ihevc_trans_8[4][1] * ee[1] + add) >> shift;
+ pi2_dst[2 * dst_strd] = (g_ai2_ihevc_trans_8[2][0] * eo[0]
+ + g_ai2_ihevc_trans_8[2][1] * eo[1] + add) >> shift;
+ pi2_dst[6 * dst_strd] = (g_ai2_ihevc_trans_8[6][0] * eo[0]
+ + g_ai2_ihevc_trans_8[6][1] * eo[1] + add) >> shift;
+
+ pi2_dst[dst_strd] = (g_ai2_ihevc_trans_8[1][0] * o[0]
+ + g_ai2_ihevc_trans_8[1][1] * o[1]
+ + g_ai2_ihevc_trans_8[1][2] * o[2]
+ + g_ai2_ihevc_trans_8[1][3] * o[3] + add) >> shift;
+ pi2_dst[3 * dst_strd] = (g_ai2_ihevc_trans_8[3][0] * o[0]
+ + g_ai2_ihevc_trans_8[3][1] * o[1]
+ + g_ai2_ihevc_trans_8[3][2] * o[2]
+ + g_ai2_ihevc_trans_8[3][3] * o[3] + add) >> shift;
+ pi2_dst[5 * dst_strd] = (g_ai2_ihevc_trans_8[5][0] * o[0]
+ + g_ai2_ihevc_trans_8[5][1] * o[1]
+ + g_ai2_ihevc_trans_8[5][2] * o[2]
+ + g_ai2_ihevc_trans_8[5][3] * o[3] + add) >> shift;
+ pi2_dst[7 * dst_strd] = (g_ai2_ihevc_trans_8[7][0] * o[0]
+ + g_ai2_ihevc_trans_8[7][1] * o[1]
+ + g_ai2_ihevc_trans_8[7][2] * o[2]
+ + g_ai2_ihevc_trans_8[7][3] * o[3] + add) >> shift;
+
+ pi4_temp += trans_size;
+ pi2_dst++;
+ }
+
+ return u4_blk_sad;
+}
+
+void ihevc_resi_trans_8x8_16bit(WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ WORD16 *pi2_tmp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd)
+{
+ WORD32 i, k;
+ WORD32 e[4], o[4];
+ WORD32 ee[2], eo[2];
+ WORD32 add, shift;
+ WORD32 trans_size;
+ WORD16 *pi2_tmp_orig;
+ WORD16 *pi2_dst_orig;
+
+ pi2_dst_orig = pi2_dst;
+ pi2_tmp_orig = pi2_tmp;
+ trans_size = TRANS_SIZE_8;
+ /* Residue + Forward Transform 1st stage */
+ shift = 2; // log2(iWidth) - 1 + g_uiBitIncrement
+ add = 1 << (shift - 1);
+
+ for(i = 0; i < trans_size; i++)
+ {
+ WORD32 resi_tmp_1, resi_tmp_2;
+
+ /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
+ /* e and o*/
+ for(k = 0; k < 4; k++)
+ {
+ resi_tmp_1 = pi2_src[k] - pu1_pred[k];
+ resi_tmp_2 = pi2_src[7 - k] - pu1_pred[7 - k];
+ e[k] = resi_tmp_1 + resi_tmp_2;
+ o[k] = resi_tmp_1 - resi_tmp_2;
+ }
+ /* ee and eo */
+ ee[0] = e[0] + e[3];
+ eo[0] = e[0] - e[3];
+ ee[1] = e[1] + e[2];
+ eo[1] = e[1] - e[2];
+
+ pi2_tmp[0] = (g_ai2_ihevc_trans_8[0][0] * ee[0]
+ + g_ai2_ihevc_trans_8[0][1] * ee[1] + add) >> shift;
+ pi2_tmp[4 * trans_size] = (g_ai2_ihevc_trans_8[4][0] * ee[0]
+ + g_ai2_ihevc_trans_8[4][1] * ee[1] + add) >> shift;
+ pi2_tmp[2 * trans_size] = (g_ai2_ihevc_trans_8[2][0] * eo[0]
+ + g_ai2_ihevc_trans_8[2][1] * eo[1] + add) >> shift;
+ pi2_tmp[6 * trans_size] = (g_ai2_ihevc_trans_8[6][0] * eo[0]
+ + g_ai2_ihevc_trans_8[6][1] * eo[1] + add) >> shift;
+
+ pi2_tmp[trans_size] = (g_ai2_ihevc_trans_8[1][0] * o[0]
+ + g_ai2_ihevc_trans_8[1][1] * o[1]
+ + g_ai2_ihevc_trans_8[1][2] * o[2]
+ + g_ai2_ihevc_trans_8[1][3] * o[3] + add) >> shift;
+ pi2_tmp[3 * trans_size] = (g_ai2_ihevc_trans_8[3][0] * o[0]
+ + g_ai2_ihevc_trans_8[3][1] * o[1]
+ + g_ai2_ihevc_trans_8[3][2] * o[2]
+ + g_ai2_ihevc_trans_8[3][3] * o[3] + add) >> shift;
+ pi2_tmp[5 * trans_size] = (g_ai2_ihevc_trans_8[5][0] * o[0]
+ + g_ai2_ihevc_trans_8[5][1] * o[1]
+ + g_ai2_ihevc_trans_8[5][2] * o[2]
+ + g_ai2_ihevc_trans_8[5][3] * o[3] + add) >> shift;
+ pi2_tmp[7 * trans_size] = (g_ai2_ihevc_trans_8[7][0] * o[0]
+ + g_ai2_ihevc_trans_8[7][1] * o[1]
+ + g_ai2_ihevc_trans_8[7][2] * o[2]
+ + g_ai2_ihevc_trans_8[7][3] * o[3] + add) >> shift;
+
+ pi2_src += src_strd;
+ pu1_pred += pred_strd;
+ pi2_tmp++;
+ }
+
+ pi2_tmp = pi2_tmp_orig;
+ /* Forward Transform 2nd stage */
+ shift = 9; // log2(iHeight) + 6
+ add = 1 << (shift - 1);
+
+ for(i = 0; i < trans_size; i++)
+ {
+ /* Utilizing symmetry properties to the maximum to minimize the number of multiplications */
+ /* e and o*/
+ for(k = 0; k < 4; k++)
+ {
+ e[k] = pi2_tmp[k] + pi2_tmp[7 - k];
+ o[k] = pi2_tmp[k] - pi2_tmp[7 - k];
+ }
+ /* ee and eo */
+ ee[0] = e[0] + e[3];
+ eo[0] = e[0] - e[3];
+ ee[1] = e[1] + e[2];
+ eo[1] = e[1] - e[2];
+
+ pi2_dst[0] = (g_ai2_ihevc_trans_8[0][0] * ee[0]
+ + g_ai2_ihevc_trans_8[0][1] * ee[1] + add) >> shift;
+ pi2_dst[4 * dst_strd] = (g_ai2_ihevc_trans_8[4][0] * ee[0]
+ + g_ai2_ihevc_trans_8[4][1] * ee[1] + add) >> shift;
+ pi2_dst[2 * dst_strd] = (g_ai2_ihevc_trans_8[2][0] * eo[0]
+ + g_ai2_ihevc_trans_8[2][1] * eo[1] + add) >> shift;
+ pi2_dst[6 * dst_strd] = (g_ai2_ihevc_trans_8[6][0] * eo[0]
+ + g_ai2_ihevc_trans_8[6][1] * eo[1] + add) >> shift;
+
+ pi2_dst[dst_strd] = (g_ai2_ihevc_trans_8[1][0] * o[0]
+ + g_ai2_ihevc_trans_8[1][1] * o[1]
+ + g_ai2_ihevc_trans_8[1][2] * o[2]
+ + g_ai2_ihevc_trans_8[1][3] * o[3] + add) >> shift;
+ pi2_dst[3 * dst_strd] = (g_ai2_ihevc_trans_8[3][0] * o[0]
+ + g_ai2_ihevc_trans_8[3][1] * o[1]
+ + g_ai2_ihevc_trans_8[3][2] * o[2]
+ + g_ai2_ihevc_trans_8[3][3] * o[3] + add) >> shift;
+ pi2_dst[5 * dst_strd] = (g_ai2_ihevc_trans_8[5][0] * o[0]
+ + g_ai2_ihevc_trans_8[5][1] * o[1]
+ + g_ai2_ihevc_trans_8[5][2] * o[2]
+ + g_ai2_ihevc_trans_8[5][3] * o[3] + add) >> shift;
+ pi2_dst[7 * dst_strd] = (g_ai2_ihevc_trans_8[7][0] * o[0]
+ + g_ai2_ihevc_trans_8[7][1] * o[1]
+ + g_ai2_ihevc_trans_8[7][2] * o[2]
+ + g_ai2_ihevc_trans_8[7][3] * o[3] + add) >> shift;
+
+ pi2_tmp += trans_size;
+ pi2_dst++;
+ }
+}
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs residue calculation and forward transform on
+ * input pixels
+ *
+ * @par Description:
+ * Performs residue calculation by subtracting source and prediction and
+ * followed by forward transform
+ *
+ * @param[in] pu1_src
+ * Input 16x16 pixels
+ *
+ * @param[in] pu1_pred
+ * Prediction data
+ *
+ * @param[in] pi2_tmp
+ * Temporary buffer of size 16x16
+ *
+ * @param[out] pi2_dst
+ * Output 16x16 coefficients
+ *
+ * @param[in] src_strd
+ * Input stride
+ *
+ * @param[in] pred_strd
+ * Prediction Stride
+ *
+ * @param[in] dst_strd_chr_flag
+ * Output Stride and Chroma Flag packed in the MS and LS 16-bit
+ *
+ * @returns Void
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+UWORD32 ihevc_resi_trans_16x16(UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD32 *pi4_temp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd_chr_flag)
+{
+ WORD32 i, k;
+ WORD32 e[8], o[8];
+ WORD32 ee[4], eo[4];
+ WORD32 eee[2], eeo[2];
+ WORD32 add, shift;
+ WORD32 trans_size;
+ WORD32 *pi4_tmp_orig;
+ WORD16 *pi2_dst_orig;
+ UWORD32 u4_blk_sad = 0;
+ WORD32 chroma_flag;
+ WORD32 dst_strd;
+
+ chroma_flag = dst_strd_chr_flag & 1;
+ dst_strd = dst_strd_chr_flag >> 16;
+
+ pi2_dst_orig = pi2_dst;
+ pi4_tmp_orig = pi4_temp;
+ trans_size = TRANS_SIZE_16;
+ /* Residue + Forward Transform 1st stage */
+ shift = 3; // log2(iWidth) - 1 + g_uiBitIncrement
+ add = 1 << (shift - 1);
+
+ for(i = 0; i < trans_size; i++)
+ {
+ WORD32 resi_tmp_1, resi_tmp_2;
+ /* e and o*/
+ for(k = 0; k < 8; k++)
+ {
+ resi_tmp_1 = pu1_src[k*(1+chroma_flag)] - pu1_pred[k*(1+chroma_flag)];
+ resi_tmp_2 = pu1_src[(15-k)*(1+chroma_flag)] - pu1_pred[(15-k)*(1+chroma_flag)];
+ e[k] = resi_tmp_1 + resi_tmp_2;
+ o[k] = resi_tmp_1 - resi_tmp_2;
+ u4_blk_sad += abs(resi_tmp_1) + abs(resi_tmp_2);
+ }
+ /* ee and eo */
+ for(k = 0; k < 4; k++)
+ {
+ ee[k] = e[k] + e[7 - k];
+ eo[k] = e[k] - e[7 - k];
+ }
+ /* eee and eeo */
+ eee[0] = ee[0] + ee[3];
+ eeo[0] = ee[0] - ee[3];
+ eee[1] = ee[1] + ee[2];
+ eeo[1] = ee[1] - ee[2];
+
+ pi4_temp[0] = (g_ai2_ihevc_trans_16[0][0] * eee[0]
+ + g_ai2_ihevc_trans_16[0][1] * eee[1]);// + add) >> shift;
+ pi4_temp[8 * trans_size] = (g_ai2_ihevc_trans_16[8][0] * eee[0]
+ + g_ai2_ihevc_trans_16[8][1] * eee[1]);// + add) >> shift;
+ pi4_temp[4 * trans_size] = (g_ai2_ihevc_trans_16[4][0] * eeo[0]
+ + g_ai2_ihevc_trans_16[4][1] * eeo[1]);// + add) >> shift;
+ pi4_temp[12 * trans_size] = (g_ai2_ihevc_trans_16[12][0] * eeo[0]
+ + g_ai2_ihevc_trans_16[12][1] * eeo[1]);// + add) >> shift;
+
+ for(k = 2; k < 16; k += 4)
+ {
+ pi4_temp[k * trans_size] = (g_ai2_ihevc_trans_16[k][0] * eo[0]
+ + g_ai2_ihevc_trans_16[k][1] * eo[1]
+ + g_ai2_ihevc_trans_16[k][2] * eo[2]
+ + g_ai2_ihevc_trans_16[k][3] * eo[3]);// + add)>> shift;
+
+ }
+
+ for(k = 1; k < 16; k += 2)
+ {
+ pi4_temp[k * trans_size] = (g_ai2_ihevc_trans_16[k][0] * o[0]
+ + g_ai2_ihevc_trans_16[k][1] * o[1]
+ + g_ai2_ihevc_trans_16[k][2] * o[2]
+ + g_ai2_ihevc_trans_16[k][3] * o[3]
+ + g_ai2_ihevc_trans_16[k][4] * o[4]
+ + g_ai2_ihevc_trans_16[k][5] * o[5]
+ + g_ai2_ihevc_trans_16[k][6] * o[6]
+ + g_ai2_ihevc_trans_16[k][7] * o[7]);// + add) >> shift;
+ }
+ pu1_src += src_strd;
+ pu1_pred += pred_strd;
+ pi4_temp++;
+ }
+
+ pi4_temp = pi4_tmp_orig;
+ /* Forward Transform 2nd stage */
+ shift = 13; // log2(iHeight) + 6
+ add = 1 << (shift - 1);
+
+ for(i = 0; i < TRANS_SIZE_16; i++)
+ {
+ /* e and o*/
+ for(k = 0; k < 8; k++)
+ {
+ e[k] = pi4_temp[k] + pi4_temp[15 - k];
+ o[k] = pi4_temp[k] - pi4_temp[15 - k];
+ }
+ /* ee and eo */
+ for(k = 0; k < 4; k++)
+ {
+ ee[k] = e[k] + e[7 - k];
+ eo[k] = e[k] - e[7 - k];
+ }
+ /* eee and eeo */
+ eee[0] = ee[0] + ee[3];
+ eeo[0] = ee[0] - ee[3];
+ eee[1] = ee[1] + ee[2];
+ eeo[1] = ee[1] - ee[2];
+
+ pi2_dst[0] = (g_ai2_ihevc_trans_16[0][0] * eee[0]
+ + g_ai2_ihevc_trans_16[0][1] * eee[1] + add) >> shift;
+ pi2_dst[8 * dst_strd] = (g_ai2_ihevc_trans_16[8][0] * eee[0]
+ + g_ai2_ihevc_trans_16[8][1] * eee[1] + add) >> shift;
+ pi2_dst[4 * dst_strd] = (g_ai2_ihevc_trans_16[4][0] * eeo[0]
+ + g_ai2_ihevc_trans_16[4][1] * eeo[1] + add) >> shift;
+ pi2_dst[12 * dst_strd] = (g_ai2_ihevc_trans_16[12][0] * eeo[0]
+ + g_ai2_ihevc_trans_16[12][1] * eeo[1] + add) >> shift;
+
+ for(k = 2; k < 16; k += 4)
+ {
+ pi2_dst[k * dst_strd] = (g_ai2_ihevc_trans_16[k][0] * eo[0]
+ + g_ai2_ihevc_trans_16[k][1] * eo[1]
+ + g_ai2_ihevc_trans_16[k][2] * eo[2]
+ + g_ai2_ihevc_trans_16[k][3] * eo[3] + add)
+ >> shift;
+ }
+
+ for(k = 1; k < 16; k += 2)
+ {
+ pi2_dst[k * dst_strd] = (g_ai2_ihevc_trans_16[k][0] * o[0]
+ + g_ai2_ihevc_trans_16[k][1] * o[1]
+ + g_ai2_ihevc_trans_16[k][2] * o[2]
+ + g_ai2_ihevc_trans_16[k][3] * o[3]
+ + g_ai2_ihevc_trans_16[k][4] * o[4]
+ + g_ai2_ihevc_trans_16[k][5] * o[5]
+ + g_ai2_ihevc_trans_16[k][6] * o[6]
+ + g_ai2_ihevc_trans_16[k][7] * o[7] + add) >> shift;
+ }
+
+ pi4_temp += trans_size;
+ pi2_dst++;
+ }
+
+ return u4_blk_sad;
+}
+
+
+void ihevc_resi_trans_16x16_16bit(WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ WORD16 *pi2_tmp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd)
+{
+ WORD32 i, k;
+ WORD32 e[8], o[8];
+ WORD32 ee[4], eo[4];
+ WORD32 eee[2], eeo[2];
+ WORD32 add, shift;
+ WORD32 trans_size;
+ WORD16 *pi2_tmp_orig;
+ WORD16 *pi2_dst_orig;
+
+ pi2_dst_orig = pi2_dst;
+ pi2_tmp_orig = pi2_tmp;
+ trans_size = TRANS_SIZE_16;
+ /* Residue + Forward Transform 1st stage */
+ shift = 3; // log2(iWidth) - 1 + g_uiBitIncrement
+ add = 1 << (shift - 1);
+
+ for(i = 0; i < trans_size; i++)
+ {
+ WORD32 resi_tmp_1, resi_tmp_2;
+ /* e and o*/
+ for(k = 0; k < 8; k++)
+ {
+ resi_tmp_1 = pi2_src[k] - pu1_pred[k];
+ resi_tmp_2 = pi2_src[15 - k] - pu1_pred[15 - k];
+ e[k] = resi_tmp_1 + resi_tmp_2;
+ o[k] = resi_tmp_1 - resi_tmp_2;
+ }
+ /* ee and eo */
+ for(k = 0; k < 4; k++)
+ {
+ ee[k] = e[k] + e[7 - k];
+ eo[k] = e[k] - e[7 - k];
+ }
+ /* eee and eeo */
+ eee[0] = ee[0] + ee[3];
+ eeo[0] = ee[0] - ee[3];
+ eee[1] = ee[1] + ee[2];
+ eeo[1] = ee[1] - ee[2];
+
+ pi2_tmp[0] = (g_ai2_ihevc_trans_16[0][0] * eee[0]
+ + g_ai2_ihevc_trans_16[0][1] * eee[1] + add) >> shift;
+ pi2_tmp[8 * trans_size] = (g_ai2_ihevc_trans_16[8][0] * eee[0]
+ + g_ai2_ihevc_trans_16[8][1] * eee[1] + add) >> shift;
+ pi2_tmp[4 * trans_size] = (g_ai2_ihevc_trans_16[4][0] * eeo[0]
+ + g_ai2_ihevc_trans_16[4][1] * eeo[1] + add) >> shift;
+ pi2_tmp[12 * trans_size] = (g_ai2_ihevc_trans_16[12][0] * eeo[0]
+ + g_ai2_ihevc_trans_16[12][1] * eeo[1] + add) >> shift;
+
+ for(k = 2; k < 16; k += 4)
+ {
+ pi2_tmp[k * trans_size] = (g_ai2_ihevc_trans_16[k][0] * eo[0]
+ + g_ai2_ihevc_trans_16[k][1] * eo[1]
+ + g_ai2_ihevc_trans_16[k][2] * eo[2]
+ + g_ai2_ihevc_trans_16[k][3] * eo[3] + add)
+ >> shift;
+ }
+
+ for(k = 1; k < 16; k += 2)
+ {
+ pi2_tmp[k * trans_size] = (g_ai2_ihevc_trans_16[k][0] * o[0]
+ + g_ai2_ihevc_trans_16[k][1] * o[1]
+ + g_ai2_ihevc_trans_16[k][2] * o[2]
+ + g_ai2_ihevc_trans_16[k][3] * o[3]
+ + g_ai2_ihevc_trans_16[k][4] * o[4]
+ + g_ai2_ihevc_trans_16[k][5] * o[5]
+ + g_ai2_ihevc_trans_16[k][6] * o[6]
+ + g_ai2_ihevc_trans_16[k][7] * o[7] + add) >> shift;
+ }
+ pi2_src += src_strd;
+ pu1_pred += pred_strd;
+ pi2_tmp++;
+ }
+
+ pi2_tmp = pi2_tmp_orig;
+ /* Forward Transform 2nd stage */
+ shift = 10; // log2(iHeight) + 6
+ add = 1 << (shift - 1);
+
+ for(i = 0; i < TRANS_SIZE_16; i++)
+ {
+ /* e and o*/
+ for(k = 0; k < 8; k++)
+ {
+ e[k] = pi2_tmp[k] + pi2_tmp[15 - k];
+ o[k] = pi2_tmp[k] - pi2_tmp[15 - k];
+ }
+ /* ee and eo */
+ for(k = 0; k < 4; k++)
+ {
+ ee[k] = e[k] + e[7 - k];
+ eo[k] = e[k] - e[7 - k];
+ }
+ /* eee and eeo */
+ eee[0] = ee[0] + ee[3];
+ eeo[0] = ee[0] - ee[3];
+ eee[1] = ee[1] + ee[2];
+ eeo[1] = ee[1] - ee[2];
+
+ pi2_dst[0] = (g_ai2_ihevc_trans_16[0][0] * eee[0]
+ + g_ai2_ihevc_trans_16[0][1] * eee[1] + add) >> shift;
+ pi2_dst[8 * dst_strd] = (g_ai2_ihevc_trans_16[8][0] * eee[0]
+ + g_ai2_ihevc_trans_16[8][1] * eee[1] + add) >> shift;
+ pi2_dst[4 * dst_strd] = (g_ai2_ihevc_trans_16[4][0] * eeo[0]
+ + g_ai2_ihevc_trans_16[4][1] * eeo[1] + add) >> shift;
+ pi2_dst[12 * dst_strd] = (g_ai2_ihevc_trans_16[12][0] * eeo[0]
+ + g_ai2_ihevc_trans_16[12][1] * eeo[1] + add) >> shift;
+
+ for(k = 2; k < 16; k += 4)
+ {
+ pi2_dst[k * dst_strd] = (g_ai2_ihevc_trans_16[k][0] * eo[0]
+ + g_ai2_ihevc_trans_16[k][1] * eo[1]
+ + g_ai2_ihevc_trans_16[k][2] * eo[2]
+ + g_ai2_ihevc_trans_16[k][3] * eo[3] + add)
+ >> shift;
+ }
+
+ for(k = 1; k < 16; k += 2)
+ {
+ pi2_dst[k * dst_strd] = (g_ai2_ihevc_trans_16[k][0] * o[0]
+ + g_ai2_ihevc_trans_16[k][1] * o[1]
+ + g_ai2_ihevc_trans_16[k][2] * o[2]
+ + g_ai2_ihevc_trans_16[k][3] * o[3]
+ + g_ai2_ihevc_trans_16[k][4] * o[4]
+ + g_ai2_ihevc_trans_16[k][5] * o[5]
+ + g_ai2_ihevc_trans_16[k][6] * o[6]
+ + g_ai2_ihevc_trans_16[k][7] * o[7] + add) >> shift;
+ }
+
+ pi2_tmp += trans_size;
+ pi2_dst++;
+ }
+}
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs residue calculation and forward transform on
+ * input pixels
+ *
+ * @par Description:
+ * Performs residue calculation by subtracting source and prediction and
+ * followed by forward transform
+ *
+ * @param[in] pu1_src
+ * Input 32x32 pixels
+ *
+ * @param[in] pu1_pred
+ * Prediction data
+ *
+ * @param[in] pi2_tmp
+ * Temporary buffer of size 32x32
+ *
+ * @param[out] pi2_dst
+ * Output 32x32 coefficients
+ *
+ * @param[in] src_strd
+ * Input stride
+ *
+ * @param[in] pred_strd
+ * Prediction Stride
+ *
+ * @param[in] dst_strd_chr_flag
+ * Output Stride and Chroma Flag packed in the MS and LS 16-bit
+ *
+ * @returns Void
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+
+UWORD32 ihevc_resi_trans_32x32(UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD32 *pi4_temp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd_chr_flag)
+{
+ WORD32 i, k;
+ WORD32 e[16], o[16];
+ WORD32 ee[8], eo[8];
+ WORD32 eee[4], eeo[4];
+ WORD32 eeee[2], eeeo[2];
+ WORD32 add, shift;
+ WORD32 trans_size;
+ WORD32 *pi4_tmp_orig;
+ WORD16 *pi2_dst_orig;
+ UWORD32 u4_blk_sad = 0 ;
+ WORD32 chroma_flag;
+ WORD32 dst_strd;
+
+ chroma_flag = dst_strd_chr_flag & 1;
+ dst_strd = dst_strd_chr_flag >> 16;
+
+ pi2_dst_orig = pi2_dst;
+ pi4_tmp_orig = pi4_temp;
+ trans_size = TRANS_SIZE_32;
+ /* Residue + Forward Transform 1st stage */
+ /* Made to zero to match with intrinsics */
+ shift = 0; // 4 : log2(iWidth) - 1 + g_uiBitIncrement
+ add = 0 ; //1 << (shift - 1);
+
+ for(i = 0; i < trans_size; i++)
+ {
+ WORD32 resi_tmp_1, resi_tmp_2;
+ /* e and o*/
+ for(k = 0; k < 16; k++)
+ {
+ resi_tmp_1 = pu1_src[k] - pu1_pred[k];
+ resi_tmp_2 = pu1_src[31 - k] - pu1_pred[31 - k];
+ e[k] = resi_tmp_1 + resi_tmp_2;
+ o[k] = resi_tmp_1 - resi_tmp_2;
+ u4_blk_sad += abs(resi_tmp_1) + abs(resi_tmp_2);
+ }
+ /* ee and eo */
+ for(k = 0; k < 8; k++)
+ {
+ ee[k] = e[k] + e[15 - k];
+ eo[k] = e[k] - e[15 - k];
+ }
+ /* eee and eeo */
+ for(k = 0; k < 4; k++)
+ {
+ eee[k] = ee[k] + ee[7 - k];
+ eeo[k] = ee[k] - ee[7 - k];
+ }
+ /* eeee and eeeo */
+ eeee[0] = eee[0] + eee[3];
+ eeeo[0] = eee[0] - eee[3];
+ eeee[1] = eee[1] + eee[2];
+ eeeo[1] = eee[1] - eee[2];
+
+ pi4_temp[0] = (g_ai2_ihevc_trans_32[0][0] * eeee[0]
+ + g_ai2_ihevc_trans_32[0][1] * eeee[1]);// + add) >> shift;
+ pi4_temp[16 * trans_size] = (g_ai2_ihevc_trans_32[16][0] * eeee[0]
+ + g_ai2_ihevc_trans_32[16][1] * eeee[1]);// + add) >> shift;
+ pi4_temp[8 * trans_size] = (g_ai2_ihevc_trans_32[8][0] * eeeo[0]
+ + g_ai2_ihevc_trans_32[8][1] * eeeo[1]);// + add) >> shift;
+ pi4_temp[24 * trans_size] = (g_ai2_ihevc_trans_32[24][0] * eeeo[0]
+ + g_ai2_ihevc_trans_32[24][1] * eeeo[1]);// + add) >> shift;
+ for(k = 4; k < 32; k += 8)
+ {
+ pi4_temp[k * trans_size] = (g_ai2_ihevc_trans_32[k][0] * eeo[0]
+ + g_ai2_ihevc_trans_32[k][1] * eeo[1]
+ + g_ai2_ihevc_trans_32[k][2] * eeo[2]
+ + g_ai2_ihevc_trans_32[k][3] * eeo[3]);// + add)>> shift;
+ }
+ for(k = 2; k < 32; k += 4)
+ {
+ pi4_temp[k * trans_size] = (g_ai2_ihevc_trans_32[k][0] * eo[0]
+ + g_ai2_ihevc_trans_32[k][1] * eo[1]
+ + g_ai2_ihevc_trans_32[k][2] * eo[2]
+ + g_ai2_ihevc_trans_32[k][3] * eo[3]
+ + g_ai2_ihevc_trans_32[k][4] * eo[4]
+ + g_ai2_ihevc_trans_32[k][5] * eo[5]
+ + g_ai2_ihevc_trans_32[k][6] * eo[6]
+ + g_ai2_ihevc_trans_32[k][7] * eo[7]);// + add)>> shift;
+ }
+ for(k = 1; k < 32; k += 2)
+ {
+ pi4_temp[k * trans_size] = (g_ai2_ihevc_trans_32[k][0] * o[0]
+ + g_ai2_ihevc_trans_32[k][1] * o[1]
+ + g_ai2_ihevc_trans_32[k][2] * o[2]
+ + g_ai2_ihevc_trans_32[k][3] * o[3]
+ + g_ai2_ihevc_trans_32[k][4] * o[4]
+ + g_ai2_ihevc_trans_32[k][5] * o[5]
+ + g_ai2_ihevc_trans_32[k][6] * o[6]
+ + g_ai2_ihevc_trans_32[k][7] * o[7]
+ + g_ai2_ihevc_trans_32[k][8] * o[8]
+ + g_ai2_ihevc_trans_32[k][9] * o[9]
+ + g_ai2_ihevc_trans_32[k][10] * o[10]
+ + g_ai2_ihevc_trans_32[k][11] * o[11]
+ + g_ai2_ihevc_trans_32[k][12] * o[12]
+ + g_ai2_ihevc_trans_32[k][13] * o[13]
+ + g_ai2_ihevc_trans_32[k][14] * o[14]
+ + g_ai2_ihevc_trans_32[k][15] * o[15]);// + add) >> shift;
+ }
+ pu1_src += src_strd;
+ pu1_pred += pred_strd;
+ pi4_temp++;
+ }
+
+ pi4_temp = pi4_tmp_orig;
+ /* Forward Transform 2nd stage */
+ shift = 15; // log2(iHeight) + 6
+ add = 1 << (shift - 1);
+
+ for(i = 0; i < TRANS_SIZE_32; i++)
+ {
+ /* e and o*/
+ for(k = 0; k < 16; k++)
+ {
+ e[k] = pi4_temp[k] + pi4_temp[31 - k];
+ o[k] = pi4_temp[k] - pi4_temp[31 - k];
+ }
+ /* ee and eo */
+ for(k = 0; k < 8; k++)
+ {
+ ee[k] = e[k] + e[15 - k];
+ eo[k] = e[k] - e[15 - k];
+ }
+ /* eee and eeo */
+ for(k = 0; k < 4; k++)
+ {
+ eee[k] = ee[k] + ee[7 - k];
+ eeo[k] = ee[k] - ee[7 - k];
+ }
+ /* eeee and eeeo */
+ eeee[0] = eee[0] + eee[3];
+ eeeo[0] = eee[0] - eee[3];
+ eeee[1] = eee[1] + eee[2];
+ eeeo[1] = eee[1] - eee[2];
+
+ pi2_dst[0] = (g_ai2_ihevc_trans_32[0][0] * eeee[0]
+ + g_ai2_ihevc_trans_32[0][1] * eeee[1] + add) >> shift;
+ pi2_dst[16 * dst_strd] = (g_ai2_ihevc_trans_32[16][0] * eeee[0]
+ + g_ai2_ihevc_trans_32[16][1] * eeee[1] + add) >> shift;
+ pi2_dst[8 * dst_strd] = (g_ai2_ihevc_trans_32[8][0] * eeeo[0]
+ + g_ai2_ihevc_trans_32[8][1] * eeeo[1] + add) >> shift;
+ pi2_dst[24 * dst_strd] = (g_ai2_ihevc_trans_32[24][0] * eeeo[0]
+ + g_ai2_ihevc_trans_32[24][1] * eeeo[1] + add) >> shift;
+ for(k = 4; k < 32; k += 8)
+ {
+ pi2_dst[k * dst_strd] = (g_ai2_ihevc_trans_32[k][0] * eeo[0]
+ + g_ai2_ihevc_trans_32[k][1] * eeo[1]
+ + g_ai2_ihevc_trans_32[k][2] * eeo[2]
+ + g_ai2_ihevc_trans_32[k][3] * eeo[3] + add)
+ >> shift;
+ }
+ for(k = 2; k < 32; k += 4)
+ {
+ pi2_dst[k * dst_strd] = (g_ai2_ihevc_trans_32[k][0] * eo[0]
+ + g_ai2_ihevc_trans_32[k][1] * eo[1]
+ + g_ai2_ihevc_trans_32[k][2] * eo[2]
+ + g_ai2_ihevc_trans_32[k][3] * eo[3]
+ + g_ai2_ihevc_trans_32[k][4] * eo[4]
+ + g_ai2_ihevc_trans_32[k][5] * eo[5]
+ + g_ai2_ihevc_trans_32[k][6] * eo[6]
+ + g_ai2_ihevc_trans_32[k][7] * eo[7] + add)
+ >> shift;
+ }
+ for(k = 1; k < 32; k += 2)
+ {
+ pi2_dst[k * dst_strd] = (g_ai2_ihevc_trans_32[k][0] * o[0]
+ + g_ai2_ihevc_trans_32[k][1] * o[1]
+ + g_ai2_ihevc_trans_32[k][2] * o[2]
+ + g_ai2_ihevc_trans_32[k][3] * o[3]
+ + g_ai2_ihevc_trans_32[k][4] * o[4]
+ + g_ai2_ihevc_trans_32[k][5] * o[5]
+ + g_ai2_ihevc_trans_32[k][6] * o[6]
+ + g_ai2_ihevc_trans_32[k][7] * o[7]
+ + g_ai2_ihevc_trans_32[k][8] * o[8]
+ + g_ai2_ihevc_trans_32[k][9] * o[9]
+ + g_ai2_ihevc_trans_32[k][10] * o[10]
+ + g_ai2_ihevc_trans_32[k][11] * o[11]
+ + g_ai2_ihevc_trans_32[k][12] * o[12]
+ + g_ai2_ihevc_trans_32[k][13] * o[13]
+ + g_ai2_ihevc_trans_32[k][14] * o[14]
+ + g_ai2_ihevc_trans_32[k][15] * o[15] + add)
+ >> shift;
+ }
+
+ pi4_temp += trans_size;
+ pi2_dst++;
+ }
+
+ return u4_blk_sad;
+}
+
+
+
+void ihevc_resi_trans_32x32_16bit(WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ WORD16 *pi2_tmp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd)
+{
+ WORD32 i, k;
+ WORD32 e[16], o[16];
+ WORD32 ee[8], eo[8];
+ WORD32 eee[4], eeo[4];
+ WORD32 eeee[2], eeeo[2];
+ WORD32 add, shift;
+ WORD32 trans_size;
+ WORD16 *pi2_tmp_orig;
+ WORD16 *pi2_dst_orig;
+
+ pi2_dst_orig = pi2_dst;
+ pi2_tmp_orig = pi2_tmp;
+ trans_size = TRANS_SIZE_32;
+ /* Residue + Forward Transform 1st stage */
+ shift = 4; // log2(iWidth) - 1 + g_uiBitIncrement
+ add = 1 << (shift - 1);
+
+ for(i = 0; i < trans_size; i++)
+ {
+ WORD32 resi_tmp_1, resi_tmp_2;
+ /* e and o*/
+ for(k = 0; k < 16; k++)
+ {
+ resi_tmp_1 = pi2_src[k] - pu1_pred[k];
+ resi_tmp_2 = pi2_src[31 - k] - pu1_pred[31 - k];
+ e[k] = resi_tmp_1 + resi_tmp_2;
+ o[k] = resi_tmp_1 - resi_tmp_2;
+ }
+ /* ee and eo */
+ for(k = 0; k < 8; k++)
+ {
+ ee[k] = e[k] + e[15 - k];
+ eo[k] = e[k] - e[15 - k];
+ }
+ /* eee and eeo */
+ for(k = 0; k < 4; k++)
+ {
+ eee[k] = ee[k] + ee[7 - k];
+ eeo[k] = ee[k] - ee[7 - k];
+ }
+ /* eeee and eeeo */
+ eeee[0] = eee[0] + eee[3];
+ eeeo[0] = eee[0] - eee[3];
+ eeee[1] = eee[1] + eee[2];
+ eeeo[1] = eee[1] - eee[2];
+
+ pi2_tmp[0] = (g_ai2_ihevc_trans_32[0][0] * eeee[0]
+ + g_ai2_ihevc_trans_32[0][1] * eeee[1] + add) >> shift;
+ pi2_tmp[16 * trans_size] = (g_ai2_ihevc_trans_32[16][0] * eeee[0]
+ + g_ai2_ihevc_trans_32[16][1] * eeee[1] + add) >> shift;
+ pi2_tmp[8 * trans_size] = (g_ai2_ihevc_trans_32[8][0] * eeeo[0]
+ + g_ai2_ihevc_trans_32[8][1] * eeeo[1] + add) >> shift;
+ pi2_tmp[24 * trans_size] = (g_ai2_ihevc_trans_32[24][0] * eeeo[0]
+ + g_ai2_ihevc_trans_32[24][1] * eeeo[1] + add) >> shift;
+ for(k = 4; k < 32; k += 8)
+ {
+ pi2_tmp[k * trans_size] = (g_ai2_ihevc_trans_32[k][0] * eeo[0]
+ + g_ai2_ihevc_trans_32[k][1] * eeo[1]
+ + g_ai2_ihevc_trans_32[k][2] * eeo[2]
+ + g_ai2_ihevc_trans_32[k][3] * eeo[3] + add)
+ >> shift;
+ }
+ for(k = 2; k < 32; k += 4)
+ {
+ pi2_tmp[k * trans_size] = (g_ai2_ihevc_trans_32[k][0] * eo[0]
+ + g_ai2_ihevc_trans_32[k][1] * eo[1]
+ + g_ai2_ihevc_trans_32[k][2] * eo[2]
+ + g_ai2_ihevc_trans_32[k][3] * eo[3]
+ + g_ai2_ihevc_trans_32[k][4] * eo[4]
+ + g_ai2_ihevc_trans_32[k][5] * eo[5]
+ + g_ai2_ihevc_trans_32[k][6] * eo[6]
+ + g_ai2_ihevc_trans_32[k][7] * eo[7] + add)
+ >> shift;
+ }
+ for(k = 1; k < 32; k += 2)
+ {
+ pi2_tmp[k * trans_size] = (g_ai2_ihevc_trans_32[k][0] * o[0]
+ + g_ai2_ihevc_trans_32[k][1] * o[1]
+ + g_ai2_ihevc_trans_32[k][2] * o[2]
+ + g_ai2_ihevc_trans_32[k][3] * o[3]
+ + g_ai2_ihevc_trans_32[k][4] * o[4]
+ + g_ai2_ihevc_trans_32[k][5] * o[5]
+ + g_ai2_ihevc_trans_32[k][6] * o[6]
+ + g_ai2_ihevc_trans_32[k][7] * o[7]
+ + g_ai2_ihevc_trans_32[k][8] * o[8]
+ + g_ai2_ihevc_trans_32[k][9] * o[9]
+ + g_ai2_ihevc_trans_32[k][10] * o[10]
+ + g_ai2_ihevc_trans_32[k][11] * o[11]
+ + g_ai2_ihevc_trans_32[k][12] * o[12]
+ + g_ai2_ihevc_trans_32[k][13] * o[13]
+ + g_ai2_ihevc_trans_32[k][14] * o[14]
+ + g_ai2_ihevc_trans_32[k][15] * o[15] + add)
+ >> shift;
+ }
+ pi2_src += src_strd;
+ pu1_pred += pred_strd;
+ pi2_tmp++;
+ }
+
+ pi2_tmp = pi2_tmp_orig;
+ /* Forward Transform 2nd stage */
+ shift = 11; // log2(iHeight) + 6
+ add = 1 << (shift - 1);
+
+ for(i = 0; i < TRANS_SIZE_32; i++)
+ {
+ /* e and o*/
+ for(k = 0; k < 16; k++)
+ {
+ e[k] = pi2_tmp[k] + pi2_tmp[31 - k];
+ o[k] = pi2_tmp[k] - pi2_tmp[31 - k];
+ }
+ /* ee and eo */
+ for(k = 0; k < 8; k++)
+ {
+ ee[k] = e[k] + e[15 - k];
+ eo[k] = e[k] - e[15 - k];
+ }
+ /* eee and eeo */
+ for(k = 0; k < 4; k++)
+ {
+ eee[k] = ee[k] + ee[7 - k];
+ eeo[k] = ee[k] - ee[7 - k];
+ }
+ /* eeee and eeeo */
+ eeee[0] = eee[0] + eee[3];
+ eeeo[0] = eee[0] - eee[3];
+ eeee[1] = eee[1] + eee[2];
+ eeeo[1] = eee[1] - eee[2];
+
+ pi2_dst[0] = (g_ai2_ihevc_trans_32[0][0] * eeee[0]
+ + g_ai2_ihevc_trans_32[0][1] * eeee[1] + add) >> shift;
+ pi2_dst[16 * dst_strd] = (g_ai2_ihevc_trans_32[16][0] * eeee[0]
+ + g_ai2_ihevc_trans_32[16][1] * eeee[1] + add) >> shift;
+ pi2_dst[8 * dst_strd] = (g_ai2_ihevc_trans_32[8][0] * eeeo[0]
+ + g_ai2_ihevc_trans_32[8][1] * eeeo[1] + add) >> shift;
+ pi2_dst[24 * dst_strd] = (g_ai2_ihevc_trans_32[24][0] * eeeo[0]
+ + g_ai2_ihevc_trans_32[24][1] * eeeo[1] + add) >> shift;
+ for(k = 4; k < 32; k += 8)
+ {
+ pi2_dst[k * dst_strd] = (g_ai2_ihevc_trans_32[k][0] * eeo[0]
+ + g_ai2_ihevc_trans_32[k][1] * eeo[1]
+ + g_ai2_ihevc_trans_32[k][2] * eeo[2]
+ + g_ai2_ihevc_trans_32[k][3] * eeo[3] + add)
+ >> shift;
+ }
+ for(k = 2; k < 32; k += 4)
+ {
+ pi2_dst[k * dst_strd] = (g_ai2_ihevc_trans_32[k][0] * eo[0]
+ + g_ai2_ihevc_trans_32[k][1] * eo[1]
+ + g_ai2_ihevc_trans_32[k][2] * eo[2]
+ + g_ai2_ihevc_trans_32[k][3] * eo[3]
+ + g_ai2_ihevc_trans_32[k][4] * eo[4]
+ + g_ai2_ihevc_trans_32[k][5] * eo[5]
+ + g_ai2_ihevc_trans_32[k][6] * eo[6]
+ + g_ai2_ihevc_trans_32[k][7] * eo[7] + add)
+ >> shift;
+ }
+ for(k = 1; k < 32; k += 2)
+ {
+ pi2_dst[k * dst_strd] = (g_ai2_ihevc_trans_32[k][0] * o[0]
+ + g_ai2_ihevc_trans_32[k][1] * o[1]
+ + g_ai2_ihevc_trans_32[k][2] * o[2]
+ + g_ai2_ihevc_trans_32[k][3] * o[3]
+ + g_ai2_ihevc_trans_32[k][4] * o[4]
+ + g_ai2_ihevc_trans_32[k][5] * o[5]
+ + g_ai2_ihevc_trans_32[k][6] * o[6]
+ + g_ai2_ihevc_trans_32[k][7] * o[7]
+ + g_ai2_ihevc_trans_32[k][8] * o[8]
+ + g_ai2_ihevc_trans_32[k][9] * o[9]
+ + g_ai2_ihevc_trans_32[k][10] * o[10]
+ + g_ai2_ihevc_trans_32[k][11] * o[11]
+ + g_ai2_ihevc_trans_32[k][12] * o[12]
+ + g_ai2_ihevc_trans_32[k][13] * o[13]
+ + g_ai2_ihevc_trans_32[k][14] * o[14]
+ + g_ai2_ihevc_trans_32[k][15] * o[15] + add)
+ >> shift;
+ }
+
+ pi2_tmp += trans_size;
+ pi2_dst++;
+ }
+}
+
diff --git a/common/ihevc_resi_trans.h b/common/ihevc_resi_trans.h
new file mode 100644
index 0000000..3ca184b
--- /dev/null
+++ b/common/ihevc_resi_trans.h
@@ -0,0 +1,235 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevc_resi_trans.h
+*
+* @brief
+* Functions declarations for residue and forward transform
+*
+* @author
+* Ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+#ifndef _IHEVC_RESI_TRANS_H_
+#define _IHEVC_RESI_TRANS_H_
+
+typedef UWORD32 ihevc_resi_trans_4x4_ttype1_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD32 *pi4_temp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd_chr_flag);
+
+typedef UWORD32 ihevc_hbd_resi_trans_4x4_ttype1_ft(UWORD16 *pu2_src,
+ UWORD16 *pu2_pred,
+ WORD32 *pi4_temp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd_chr_flag,
+ UWORD8 bit_depth);
+
+typedef UWORD32 ihevc_resi_trans_4x4_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD32 *pi4_temp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd_chr_flag);
+
+typedef UWORD32 ihevc_hbd_resi_trans_4x4_ft
+ (
+ UWORD16 *pu2_src,
+ UWORD16 *pu2_pred,
+ WORD32 *pi4_temp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd_chr_flag,
+ UWORD8 bit_depth
+ );
+
+typedef UWORD32 ihevc_resi_trans_8x8_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD32 *pi4_temp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd_chr_flag);
+
+typedef UWORD32 ihevc_hbd_resi_trans_8x8_ft
+ (
+ UWORD16 *pu2_src,
+ UWORD16 *pu2_pred,
+ WORD32 *pi4_temp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd_chr_flag,
+ UWORD8 bit_depth
+ );
+
+
+typedef UWORD32 ihevc_resi_trans_16x16_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD32 *pi4_temp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd_chr_flag);
+
+typedef UWORD32 ihevc_hbd_resi_trans_16x16_ft(UWORD16 *pu2_src,
+ UWORD16 *pu2_pred,
+ WORD32 *pi4_temp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd_chr_flag,
+ UWORD8 bit_depth);
+
+typedef UWORD32 ihevc_resi_trans_32x32_ft(UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD32 *pi4_temp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd_chr_flag);
+
+typedef UWORD32 ihevc_hbd_resi_trans_32x32_ft(UWORD16 *pu2_src,
+ UWORD16 *pu2_pred,
+ WORD32 *pi4_temp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd_chr_flag,
+ UWORD8 bit_depth);
+
+
+typedef void ihevc_resi_trans_4x4_16bit_ft(WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ WORD16 *pi2_tmp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd);
+
+typedef void ihevc_resi_trans_8x8_16bit_ft(WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ WORD16 *pi2_tmp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd);
+
+typedef void ihevc_resi_trans_16x16_16bit_ft(WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ WORD16 *pi2_tmp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd);
+
+typedef void ihevc_resi_trans_32x32_16bit_ft(WORD16 *pi2_src,
+ UWORD8 *pu1_pred,
+ WORD16 *pi2_tmp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd);
+
+ihevc_resi_trans_4x4_ttype1_ft ihevc_resi_trans_4x4_ttype1;
+ihevc_resi_trans_4x4_ft ihevc_resi_trans_4x4;
+ihevc_resi_trans_8x8_ft ihevc_resi_trans_8x8;
+ihevc_resi_trans_16x16_ft ihevc_resi_trans_16x16;
+ihevc_resi_trans_32x32_ft ihevc_resi_trans_32x32;
+ihevc_resi_trans_4x4_16bit_ft ihevc_resi_trans_4x4_16bit;
+ihevc_resi_trans_8x8_16bit_ft ihevc_resi_trans_8x8_16bit;
+ihevc_resi_trans_16x16_16bit_ft ihevc_resi_trans_16x16_16bit;
+ihevc_resi_trans_32x32_16bit_ft ihevc_resi_trans_32x32_16bit;
+
+ihevc_resi_trans_4x4_ttype1_ft ihevc_resi_trans_4x4_ttype1_sse42;
+ihevc_resi_trans_4x4_ft ihevc_resi_trans_4x4_sse42;
+ihevc_resi_trans_8x8_ft ihevc_resi_trans_8x8_sse42;
+ihevc_resi_trans_16x16_ft ihevc_resi_trans_16x16_sse42;
+ihevc_resi_trans_32x32_ft ihevc_resi_trans_32x32_sse42;
+ihevc_resi_trans_4x4_16bit_ft ihevc_resi_trans_4x4_16bit_sse42;
+ihevc_resi_trans_8x8_16bit_ft ihevc_resi_trans_8x8_16bit_sse42;
+ihevc_resi_trans_16x16_16bit_ft ihevc_resi_trans_16x16_16bit_sse42;
+ihevc_resi_trans_32x32_16bit_ft ihevc_resi_trans_32x32_16bit_sse42;
+
+
+ihevc_resi_trans_4x4_ttype1_ft ihevc_resi_trans_4x4_ttype1_avx;
+ihevc_resi_trans_4x4_ft ihevc_resi_trans_4x4_avx;
+ihevc_resi_trans_8x8_ft ihevc_resi_trans_8x8_avx;
+ihevc_resi_trans_16x16_ft ihevc_resi_trans_16x16_avx;
+ihevc_resi_trans_32x32_ft ihevc_resi_trans_32x32_avx;
+ihevc_resi_trans_4x4_16bit_ft ihevc_resi_trans_4x4_16bit_avx;
+ihevc_resi_trans_8x8_16bit_ft ihevc_resi_trans_8x8_16bit_avx;
+
+#ifndef DISABLE_AVX2
+ihevc_resi_trans_8x8_ft ihevc_resi_trans_8x8_avx2;
+ihevc_resi_trans_16x16_ft ihevc_resi_trans_16x16_avx2;
+ihevc_resi_trans_32x32_ft ihevc_resi_trans_32x32_avx2;
+#endif
+
+ihevc_hbd_resi_trans_4x4_ttype1_ft ihevc_hbd_resi_trans_4x4_ttype1;
+ihevc_hbd_resi_trans_4x4_ft ihevc_hbd_resi_trans_4x4;
+ihevc_hbd_resi_trans_8x8_ft ihevc_hbd_resi_trans_8x8;
+ihevc_hbd_resi_trans_16x16_ft ihevc_hbd_resi_trans_16x16;
+ihevc_hbd_resi_trans_32x32_ft ihevc_hbd_resi_trans_32x32;
+
+ihevc_hbd_resi_trans_4x4_ttype1_ft ihevc_hbd_resi_trans_4x4_ttype1_sse42;
+ihevc_hbd_resi_trans_4x4_ft ihevc_hbd_resi_trans_4x4_sse42;
+ihevc_hbd_resi_trans_8x8_ft ihevc_hbd_resi_trans_8x8_sse42;
+ihevc_hbd_resi_trans_16x16_ft ihevc_hbd_resi_trans_16x16_sse42;
+ihevc_hbd_resi_trans_32x32_ft ihevc_hbd_resi_trans_32x32_sse42;
+
+
+ihevc_hbd_resi_trans_4x4_ttype1_ft ihevc_hbd_resi_trans_4x4_ttype1_avx;
+ihevc_hbd_resi_trans_4x4_ft ihevc_hbd_resi_trans_4x4_avx;
+ihevc_hbd_resi_trans_8x8_ft ihevc_hbd_resi_trans_8x8_avx;
+ihevc_hbd_resi_trans_16x16_ft ihevc_hbd_resi_trans_16x16_avx;
+ihevc_hbd_resi_trans_32x32_ft ihevc_hbd_resi_trans_32x32_avx;
+
+/* AVX2 declarations */
+ihevc_hbd_resi_trans_8x8_ft ihevc_hbd_resi_trans_8x8_avx2;
+ihevc_hbd_resi_trans_16x16_ft ihevc_hbd_resi_trans_16x16_avx2;
+ihevc_hbd_resi_trans_32x32_ft ihevc_hbd_resi_trans_32x32_avx2;
+
+/*A9 declarations*/
+ihevc_resi_trans_16x16_ft ihevc_resi_trans_16x16_a9q;
+ihevc_resi_trans_4x4_ft ihevc_resi_trans_4x4_a9q;
+ihevc_resi_trans_8x8_ft ihevc_resi_trans_8x8_a9q;
+ihevc_resi_trans_4x4_ttype1_ft ihevc_resi_trans_4x4_ttype1_a9q;
+ihevc_resi_trans_32x32_ft ihevc_resi_trans_32x32_a9q;
+ihevc_resi_trans_4x4_ft ihevc_resi_trans_4x4_neon;
+ihevc_resi_trans_4x4_ttype1_ft ihevc_resi_trans_4x4_ttype1_neon;
+ihevc_resi_trans_8x8_ft ihevc_resi_trans_8x8_neon;
+ihevc_resi_trans_16x16_ft ihevc_resi_trans_16x16_neon;
+ihevc_resi_trans_32x32_ft ihevc_resi_trans_32x32_neon;
+
+#endif /*_IHEVC_RESI_TRANS_H_*/
diff --git a/common/ihevc_structs.h b/common/ihevc_structs.h
index 52dda76..55d746e 100644
--- a/common/ihevc_structs.h
+++ b/common/ihevc_structs.h
@@ -447,6 +447,24 @@
}time_code_t;
+/**
+ * @brief Structure for Content Light Level Info
+ *
+ */
+typedef struct
+{
+ /**
+ * 16bit unsigned number which indicates the maximum pixel intensity of all samples in bit-stream in units of 1 candela per square metre
+ */
+ UWORD16 u2_sei_max_cll;
+
+ /**
+ * 16bit unsigned number which indicates the average pixel intensity of all samples in bit-stream in units of 1 candela per square metre
+ */
+ UWORD16 u2_sei_avg_cll;
+
+}content_light_level_info_sei_params_t;
+
/**
* Structure to hold SEI parameters Info
@@ -468,6 +486,8 @@
WORD8 i4_sei_mastering_disp_colour_vol_params_present_flags;
+ WORD8 i1_sei_cll_enable;
+
/* Enable/Disable SEI Hash on the Decoded picture & Hash type */
/* < 3 : Checksum, 2 : CRC, 1 : MD5, 0 : disable > */
/* Other values are not supported */
@@ -490,6 +510,8 @@
hash_sei_param_t s_hash_sei_params;
+ content_light_level_info_sei_params_t s_cll_info_sei_params;
+
mastering_dis_col_vol_sei_params_t s_mastering_dis_col_vol_sei_params;
user_data_registered_itu_t_t35_t as_user_data_registered_itu_t_t35[USER_DATA_MAX];
@@ -653,7 +675,7 @@
* element units that specify HRD output times of consecutive pictures in output order is constrained
* refer to Table E-6
*/
- UWORD8 au1_elemental_duration_in_tc_minus1[VPS_MAX_SUB_LAYERS];
+ UWORD16 au2_elemental_duration_in_tc_minus1[VPS_MAX_SUB_LAYERS];
/**
* specifies the HRD operational mode
@@ -834,7 +856,7 @@
* num_ticks_poc_diff_one_minus1 plus 1 specifies the number of clock ticks
* corresponding to a difference of poc values equal to 1
*/
- UWORD8 u1_num_ticks_poc_diff_one_minus1;
+ UWORD32 u4_num_ticks_poc_diff_one_minus1;
/**
* 1, specifies that the following cvs bitstream restriction parameters are present
@@ -916,6 +938,9 @@
WORD32 u4_ts;
UWORD8 u1_used_as_ref;
+ /** Used to idicate if this buffer needed for output */
+ UWORD8 u1_pic_output_flag;
+
UWORD8 u1_free_delay_cnt;
/**
@@ -1347,12 +1372,18 @@
*/
UWORD32 b1_cb_cbf : 1;
+#ifdef ENABLE_MAIN_REXT_PROFILE
+ UWORD32 b1_cb_cbf_subtu1 : 1;
+#endif
/**
* Cr CBF
*/
UWORD32 b1_cr_cbf : 1;
+#ifdef ENABLE_MAIN_REXT_PROFILE
+ UWORD32 b1_cr_cbf_subtu1 : 1;
+#endif
/**
* Flag to indicate if it is the first TU in a CU
@@ -2038,6 +2069,19 @@
/** delta_chroma_log2_weight_denom */
WORD8 i1_chroma_log2_weight_denom;
+#ifdef ENABLE_MAIN_REXT_PROFILE
+ /** WpOffsetBdShiftY */
+ WORD8 i1_wp_ofst_bd_shift_luma;
+
+ /** WpOffsetBdShiftC */
+ WORD8 i1_wp_ofst_bd_shift_chroma;
+
+ /** WpOffsetHalfRangeY */
+ WORD32 i4_wp_ofst_half_rng_luma;
+
+ /** WpOffsetHalfRangeC */
+ WORD32 i4_wp_ofst_half_rng_chroma;
+#endif
/** luma_weight_l0_flag[ i ] */
WORD8 i1_luma_weight_l0_flag[MAX_DPB_SIZE];
@@ -2500,6 +2544,53 @@
/*************************************************************************/
WORD16 *pi2_scaling_mat;
+#ifdef ENABLE_MAIN_REXT_PROFILE
+
+ /**
+ * transform_skip_rotation_enabled_flag
+ */
+ WORD8 i1_transform_skip_rotation_enabled_flag;
+
+ /**
+ * transform_skip_context_enabled_flag
+ */
+ WORD8 i1_transform_skip_context_enabled_flag;
+
+ /**
+ * implicit_rdpcm_enabled_flag
+ */
+ WORD8 i1_implicit_rdpcm_enabled_flag;
+
+ /**
+ * explicit_rdpcm_enabled_flag
+ */
+ WORD8 i1_explicit_rdpcm_enabled_flag;
+
+ /**
+ * extended_precision_processing_flag
+ */
+ WORD8 i1_extended_precision_processing_flag;
+
+ /**
+ * intra_smoothing_disabled_flag
+ */
+ WORD8 i1_intra_smoothing_disabled_flag;
+
+ /**
+ * high_precision_offsets_enabled_flag
+ */
+ WORD8 i1_use_high_precision_pred_wt;
+
+ /**
+ * fast_rice_adaptation_enabled_flag
+ */
+ WORD8 i1_fast_rice_adaptation_enabled_flag;
+
+ /**
+ * cabac_bypass_alignment_enabled_flag
+ */
+ WORD8 i1_align_cabac_before_bypass;
+#endif
/*
* Flag indicating if the SPS is parsed
@@ -2743,6 +2834,53 @@
*/
WORD8 i1_log2_min_cu_qp_delta_size;
+#ifdef ENABLE_MAIN_REXT_PROFILE
+ /**
+ * log2_max_transform_skip_block_size_minus2
+ */
+ WORD32 i4_log2_max_transform_skip_block_size_minus2;
+
+ /**
+ * cross_component_prediction_enabled_flag
+ */
+ WORD8 i1_cross_component_prediction_enabled_flag;
+
+ /**
+ * chroma_qp_offset_list_enabled_flag
+ */
+ WORD8 i1_chroma_qp_offset_list_enabled_flag;
+
+ /**
+ * diff_cu_chroma_qp_offset_depth
+ */
+ WORD32 i4_diff_cu_chroma_qp_offset_depth;
+
+ /**
+ * chroma_qp_offset_list_len_minus1
+ */
+ WORD32 i4_chroma_qp_offset_list_len_minus1;
+
+ /**
+ * cb_qp_offset_list[]
+ */
+ WORD32 i4_cb_qp_offset_list[6];
+
+ /**
+ * cr_qp_offset_list[]
+ */
+ WORD32 i4_cr_qp_offset_list[6];
+
+ /**
+ * log2_sao_offset_scale_luma
+ */
+ WORD8 i1_log2_sao_ofst_scale_luma;
+
+ /**
+ * log2_sao_offset_scale_chroma
+ */
+ WORD8 i1_log2_sao_ofst_scale_chroma;
+
+#endif
/*
* Flag indicating if the PPS is parsed
@@ -3026,6 +3164,12 @@
*/
WORD16 i2_independent_ctb_y;
+#ifdef ENABLE_MAIN_REXT_PROFILE
+ /**
+ * cu_chroma_qp_offset_enabled_flag
+ */
+ WORD8 i1_cu_chroma_qp_offset_enabled_flag;
+#endif
UWORD8 u1_parse_data_init_done;
diff --git a/common/mips/ihevc_platform_macros.h b/common/mips/ihevc_platform_macros.h
index 6c0d49c..d94a3f4 100644
--- a/common/mips/ihevc_platform_macros.h
+++ b/common/mips/ihevc_platform_macros.h
@@ -63,7 +63,7 @@
if(u4_word)
return (__builtin_clz(u4_word));
else
- return 32;
+ return 31;
}
static inline UWORD32 CLZNZ(UWORD32 u4_word)
diff --git a/common/x86/ihevc_intra_pred_filters_ssse3_intr.c b/common/x86/ihevc_intra_pred_filters_ssse3_intr.c
index dbab80a..ea35672 100644
--- a/common/x86/ihevc_intra_pred_filters_ssse3_intr.c
+++ b/common/x86/ihevc_intra_pred_filters_ssse3_intr.c
@@ -2890,7 +2890,7 @@
__m128i row_4x32b, two_nt_4x32b, ref_main_idx_4x32b, res_temp5_4x32b, sm3;
- UWORD8 ref_tmp[2 * MAX_CU_SIZE + 2];
+ UWORD8 ref_tmp[2 * MAX_CU_SIZE + 2] = {0};
UWORD8 *ref_main;
UWORD8 *ref_temp;
UNUSED(src_strd);
@@ -3723,7 +3723,7 @@
WORD32 inv_ang, inv_ang_sum;
//WORD32 ref_main_idx, pos, fract, idx;
WORD32 ref_idx;
- UWORD8 ref_tmp[(2 * MAX_CU_SIZE) + 2];
+ UWORD8 ref_tmp[(2 * MAX_CU_SIZE) + 2] = {0};
UWORD8 *ref_main, *ref_temp;
__m128i /*fract_8x16b,*/ const_temp_8x16b, sm3;
diff --git a/common/x86/ihevc_platform_macros.h b/common/x86/ihevc_platform_macros.h
index 7b10473..a2aa98c 100644
--- a/common/x86/ihevc_platform_macros.h
+++ b/common/x86/ihevc_platform_macros.h
@@ -36,11 +36,9 @@
#ifndef _IHEVC_PLATFORM_MACROS_H_
#define _IHEVC_PLATFORM_MACROS_H_
-//#include <immintrin.h>
-
-#define CLIP_U8(x) CLIP3((x), 0, 255)
-#define CLIP_S8(x) CLIP3((x), -128, 127)
+#define CLIP_U8(x) CLIP3((x), 0, 255);
+#define CLIP_S8(x) CLIP3((x), -128, 127);
#define CLIP_U10(x) CLIP3((x), 0, 1023);
#define CLIP_S10(x) CLIP3((x), -512, 511);
@@ -48,9 +46,11 @@
#define CLIP_U12(x) CLIP3((x), 0, 4095);
#define CLIP_S12(x) CLIP3((x), -2048, 2047);
-#define CLIP_U16(x) CLIP3((x), 0, 65535)
-#define CLIP_S16(x) CLIP3((x), -32768, 32767)
+#define CLIP_U14(x) CLIP3((x), 0, 16383);
+#define CLIP_S14(x) CLIP3((x), -8192, 8191);
+#define CLIP_U16(x) CLIP3((x), 0, 65535);
+#define CLIP_S16(x) CLIP3((x), -32768, 32767);
#define SHL(x,y) (((y) < 32) ? ((x) << (y)) : 0)
@@ -65,8 +65,7 @@
((x & 0x00ff0000) >> 8) | \
((UWORD32)x >> 24);
-
-#define NOP(nop_cnt) {UWORD32 nop_i; for (nop_i = (nop_cnt) ; nop_i > 0 ; nop_i--) asm("nop");}
+#define NOP(nop_cnt) {UWORD32 nop_i; for (nop_i = 0; nop_i < nop_cnt; nop_i++) asm("nop");}
#define POPCNT_U32(x) __builtin_popcount(x)
@@ -78,12 +77,14 @@
if(u4_word)
return (__builtin_clz(u4_word));
else
- return 32;
+ return 31;
}
+
static INLINE UWORD32 CLZNZ(UWORD32 u4_word)
{
return (__builtin_clz(u4_word));
}
+
static INLINE UWORD32 CTZ(UWORD32 u4_word)
{
if(0 == u4_word)
@@ -104,15 +105,15 @@
******************************************************************************
*/
#define GET_POS_MSB_32(r,word) \
-{ \
+{ \
if(word) \
- { \
+ { \
r = 31 - __builtin_clz(word); \
- } \
- else \
- { \
- r = -1; \
- } \
+ } \
+ else \
+ { \
+ r = -1; \
+ } \
}
/**
@@ -138,17 +139,36 @@
* @brief returns max number of bits required to represent input word (max 32bits)
******************************************************************************
*/
-#define GETRANGE(r,word) \
-{ \
- if(word) \
- { \
- r = 32 - __builtin_clz(word); \
- } \
- else \
- { \
- r = 1; \
- } \
+#define GETRANGE(r,word) \
+{ \
+ if(word) \
+ { \
+ r = 32 - __builtin_clz(word); \
+ } \
+ else \
+ { \
+ r = 1; \
+ } \
}
+
+/**
+*****************************************************************************************************
+* @brief returns max number of bits required to represent input unsigned long long word (max 64bits)
+*****************************************************************************************************
+*/
+#define GETRANGE64(r,llword) \
+{ \
+ if(llword) \
+ { \
+ r = 64 - __builtin_clzll(llword); \
+ } \
+ else \
+ { \
+ r = 1; \
+ } \
+}
+
+
#define GCC_ENABLE 0
#if GCC_ENABLE
@@ -160,7 +180,6 @@
#endif
-
#define PREFETCH_ENABLE 1
#if PREFETCH_ENABLE
diff --git a/common/x86/ihevc_sao_ssse3_intr.c b/common/x86/ihevc_sao_ssse3_intr.c
index cffd2a9..a8b2f30 100644
--- a/common/x86/ihevc_sao_ssse3_intr.c
+++ b/common/x86/ihevc_sao_ssse3_intr.c
@@ -158,10 +158,10 @@
band_pos_16x8b = _mm_set1_epi16((WORD16)(sao_band_pos << 3));
//value set for sao_offset extraction
- tmp_set_128i_1 = _mm_set_epi8(128, 1, 128, 1, 128, 1, 128, 1, 128, 1, 128, 1, 128, 1, 128, 1);
- tmp_set_128i_2 = _mm_set_epi8(128, 2, 128, 2, 128, 2, 128, 2, 128, 2, 128, 2, 128, 2, 128, 2);
- tmp_set_128i_3 = _mm_set_epi8(128, 3, 128, 3, 128, 3, 128, 3, 128, 3, 128, 3, 128, 3, 128, 3);
- tmp_set_128i_4 = _mm_set_epi8(128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4);
+ tmp_set_128i_1 = _mm_set_epi8(-128, 1, -128, 1, -128, 1, -128, 1, -128, 1, -128, 1, -128, 1, -128, 1);
+ tmp_set_128i_2 = _mm_set_epi8(-128, 2, -128, 2, -128, 2, -128, 2, -128, 2, -128, 2, -128, 2, -128, 2);
+ tmp_set_128i_3 = _mm_set_epi8(-128, 3, -128, 3, -128, 3, -128, 3, -128, 3, -128, 3, -128, 3, -128, 3);
+ tmp_set_128i_4 = _mm_set_epi8(-128, 4, -128, 4, -128, 4, -128, 4, -128, 4, -128, 4, -128, 4, -128, 4);
//loaded sao offset values
sao_offset = _mm_loadl_epi64((__m128i *)pi1_sao_offset);
@@ -481,10 +481,10 @@
//replicating sao_band_pos as 8 bit value 16 times
band_pos_u_16x8b = _mm_set1_epi16((WORD16)(sao_band_pos_u << 3));
//value set for sao_offset extraction
- tmp_set_128i_1 = _mm_set_epi8(128, 1, 128, 1, 128, 1, 128, 1, 128, 1, 128, 1, 128, 1, 128, 1);
- tmp_set_128i_2 = _mm_set_epi8(128, 2, 128, 2, 128, 2, 128, 2, 128, 2, 128, 2, 128, 2, 128, 2);
- tmp_set_128i_3 = _mm_set_epi8(128, 3, 128, 3, 128, 3, 128, 3, 128, 3, 128, 3, 128, 3, 128, 3);
- tmp_set_128i_4 = _mm_set_epi8(128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4, 128, 4);
+ tmp_set_128i_1 = _mm_set_epi8(-128, 1, -128, 1, -128, 1, -128, 1, -128, 1, -128, 1, -128, 1, -128, 1);
+ tmp_set_128i_2 = _mm_set_epi8(-128, 2, -128, 2, -128, 2, -128, 2, -128, 2, -128, 2, -128, 2, -128, 2);
+ tmp_set_128i_3 = _mm_set_epi8(-128, 3, -128, 3, -128, 3, -128, 3, -128, 3, -128, 3, -128, 3, -128, 3);
+ tmp_set_128i_4 = _mm_set_epi8(-128, 4, -128, 4, -128, 4, -128, 4, -128, 4, -128, 4, -128, 4, -128, 4);
//loaded sao offset values
sao_offset = _mm_loadl_epi64((__m128i *)pi1_sao_offset_u);
diff --git a/decoder/ihevcd_api.c b/decoder/ihevcd_api.c
index 8abef0f..134ed31 100644
--- a/decoder/ihevcd_api.c
+++ b/decoder/ihevcd_api.c
@@ -1169,6 +1169,7 @@
pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, sizeof(iv_obj_t));
RETURN_IF((NULL == pv_buf), IV_FAIL);
+ memset(pv_buf, 0, sizeof(iv_obj_t));
*pps_codec_obj = (iv_obj_t *)pv_buf;
ps_create_op->s_ivd_create_op_t.pv_handle = *pps_codec_obj;
@@ -1205,6 +1206,7 @@
size = MAX_PROCESS_THREADS * ithread_get_handle_size();
pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
RETURN_IF((NULL == pv_buf), IV_FAIL);
+ memset(pv_buf, 0, size);
for(i = 0; i < MAX_PROCESS_THREADS; i++)
{
@@ -1217,6 +1219,7 @@
size = MIN_BITSBUF_SIZE;
pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size + 16); //Alloc extra for parse optimization
RETURN_IF((NULL == pv_buf), IV_FAIL);
+ memset(pv_buf, 0, size + 16);
ps_codec->pu1_bitsbuf_static = pv_buf;
ps_codec->u4_bitsbuf_size_static = size;
@@ -1224,24 +1227,28 @@
size = sizeof(buf_mgr_t);
pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
RETURN_IF((NULL == pv_buf), IV_FAIL);
+ memset(pv_buf, 0, size);
ps_codec->pv_disp_buf_mgr = pv_buf;
/* size for holding dpb manager context */
size = sizeof(dpb_mgr_t);
pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
RETURN_IF((NULL == pv_buf), IV_FAIL);
+ memset(pv_buf, 0, size);
ps_codec->pv_dpb_mgr = pv_buf;
/* size for holding buffer manager context */
size = sizeof(buf_mgr_t);
pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
RETURN_IF((NULL == pv_buf), IV_FAIL);
+ memset(pv_buf, 0, size);
ps_codec->pv_pic_buf_mgr = pv_buf;
/* size for holding mv buffer manager context */
size = sizeof(buf_mgr_t);
pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
RETURN_IF((NULL == pv_buf), IV_FAIL);
+ memset(pv_buf, 0, size);
ps_codec->pv_mv_buf_mgr = pv_buf;
size = MAX_VPS_CNT * sizeof(vps_t);
@@ -1277,6 +1284,7 @@
size = (MAX_SPS_CNT + MAX_PPS_CNT) * size * sizeof(WORD16);
pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
RETURN_IF((NULL == pv_buf), IV_FAIL);
+ memset(pv_buf, 0, size);
ps_codec->pi2_scaling_mat = (WORD16 *)pv_buf;
@@ -1287,6 +1295,7 @@
size = BUF_MGR_MAX_CNT * sizeof(pic_buf_t);
pv_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
RETURN_IF((NULL == pv_buf), IV_FAIL);
+ memset(pv_buf, 0, size);
ps_codec->pv_pic_buf_base = (UWORD8 *)pv_buf;
/* TO hold scratch buffers needed for each SAO context */
@@ -1298,6 +1307,7 @@
pu1_buf = pf_aligned_alloc(pv_mem_ctxt, 128, size);
RETURN_IF((NULL == pu1_buf), IV_FAIL);
+ memset(pu1_buf, 0, size);
for(i = 0; i < MAX_PROCESS_THREADS; i++)
{
@@ -1607,6 +1617,7 @@
pv_buf = ps_codec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
RETURN_IF((NULL == pv_buf), IV_FAIL);
+ memset(pv_buf, 0, size);
ps_codec->pv_proc_jobq_buf = pv_buf;
ps_codec->i4_proc_jobq_buf_size = size;
@@ -1914,6 +1925,7 @@
{
pv_buf = ps_codec->pf_aligned_alloc(pv_mem_ctxt, 128, size + 16); //Alloc extra for parse optimization
RETURN_IF((NULL == pv_buf), IV_FAIL);
+ memset(pv_buf, 0, size + 16);
ps_codec->pu1_bitsbuf_dynamic = pv_buf;
ps_codec->u4_bitsbuf_size_dynamic = size;
}
@@ -1950,6 +1962,7 @@
pv_buf = ps_codec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
RETURN_IF((NULL == pv_buf), IV_FAIL);
+ memset(pv_buf, 0, size);
ps_codec->pv_mv_bank_buf_base = pv_buf;
ps_codec->i4_total_mv_bank_size = size;
@@ -1969,7 +1982,7 @@
size = ihevcd_get_total_pic_buf_size(ps_codec, wd, ht);
pv_buf = ps_codec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
RETURN_IF((NULL == pv_buf), IV_FAIL);
-
+ memset(pv_buf, 0, size);
ps_codec->i4_total_pic_buf_size = size;
ps_codec->pu1_ref_pic_buf_base = (UWORD8 *)pv_buf;
@@ -2112,7 +2125,7 @@
}
}
ps_create_op->s_ivd_create_op_t.u4_error_code = IVD_MEM_ALLOC_FAILED;
- ps_create_op->s_ivd_create_op_t.u4_error_code = 1 << IVD_FATALERROR;
+ ps_create_op->s_ivd_create_op_t.u4_error_code |= 1 << IVD_FATALERROR;
return IV_FAIL;
}
@@ -2229,6 +2242,7 @@
pu1_chroma_buf = ps_codec->pf_aligned_alloc(pv_mem_ctxt, 128, size);
RETURN_IF((NULL == pu1_chroma_buf), IV_FAIL);
+ memset(pu1_chroma_buf, 0, size);
ps_codec->pu1_cur_chroma_ref_buf = pu1_chroma_buf;
}
@@ -3140,7 +3154,7 @@
ps_op->u4_vui_num_units_in_tick = ps_vui->u4_vui_num_units_in_tick;
ps_op->u4_vui_time_scale = ps_vui->u4_vui_time_scale;
ps_op->u1_poc_proportional_to_timing_flag = ps_vui->u1_poc_proportional_to_timing_flag;
- ps_op->u1_num_ticks_poc_diff_one_minus1 = ps_vui->u1_num_ticks_poc_diff_one_minus1;
+ ps_op->u4_num_ticks_poc_diff_one_minus1 = ps_vui->u4_num_ticks_poc_diff_one_minus1;
ps_op->u1_bitstream_restriction_flag = ps_vui->u1_bitstream_restriction_flag;
ps_op->u1_tiles_fixed_structure_flag = ps_vui->u1_tiles_fixed_structure_flag;
ps_op->u1_motion_vectors_over_pic_boundaries_flag = ps_vui->u1_motion_vectors_over_pic_boundaries_flag;
@@ -3175,7 +3189,7 @@
{
ps_op->au1_fixed_pic_rate_general_flag[i] = ps_vui->s_vui_hrd_parameters.au1_fixed_pic_rate_general_flag[i];
ps_op->au1_fixed_pic_rate_within_cvs_flag[i] = ps_vui->s_vui_hrd_parameters.au1_fixed_pic_rate_within_cvs_flag[i];
- ps_op->au1_elemental_duration_in_tc_minus1[i] = ps_vui->s_vui_hrd_parameters.au1_elemental_duration_in_tc_minus1[i];
+ ps_op->au2_elemental_duration_in_tc_minus1[i] = ps_vui->s_vui_hrd_parameters.au2_elemental_duration_in_tc_minus1[i];
ps_op->au1_low_delay_hrd_flag[i] = ps_vui->s_vui_hrd_parameters.au1_low_delay_hrd_flag[i];
ps_op->au1_cpb_cnt_minus1[i] = ps_vui->s_vui_hrd_parameters.au1_cpb_cnt_minus1[i];
}
diff --git a/decoder/ihevcd_bitstream.c b/decoder/ihevcd_bitstream.c
index 3b8d24f..21f1b2e 100644
--- a/decoder/ihevcd_bitstream.c
+++ b/decoder/ihevcd_bitstream.c
@@ -247,8 +247,8 @@
* then subtract abs_numbits from offset and add 32 and move cur_word to nxt_word
* and load cur_word appropriately and decrement pu4_buf
*/
- ps_bitstrm->u4_bit_ofst -= abs_numbits;
ps_bitstrm->u4_bit_ofst += 32;
+ ps_bitstrm->u4_bit_ofst -= abs_numbits;
ps_bitstrm->pu4_buf--;
val = *(ps_bitstrm->pu4_buf - 2);
diff --git a/decoder/ihevcd_cxa.h b/decoder/ihevcd_cxa.h
index 21b2023..f81a7eb 100644
--- a/decoder/ihevcd_cxa.h
+++ b/decoder/ihevcd_cxa.h
@@ -45,6 +45,7 @@
/*****************************************************************************/
/* Constant Macros */
/*****************************************************************************/
+#define IVD_ERROR_MASK 0xFF
/*****************************************************************************/
/* Function Macros */
@@ -787,7 +788,7 @@
* num_ticks_poc_diff_one_minus1 plus 1 specifies the number of clock ticks
* corresponding to a difference of poc values equal to 1
*/
- UWORD8 u1_num_ticks_poc_diff_one_minus1;
+ UWORD32 u4_num_ticks_poc_diff_one_minus1;
/**
* 1, specifies that the following cvs bitstream restriction parameters are present
@@ -955,7 +956,7 @@
* element units that specify HRD output times of consecutive pictures in output order is constrained
* refer to Table E-6
*/
- UWORD8 au1_elemental_duration_in_tc_minus1[6];
+ UWORD16 au2_elemental_duration_in_tc_minus1[6];
/**
* specifies the HRD operational mode
diff --git a/decoder/ihevcd_decode.c b/decoder/ihevcd_decode.c
index d2ea7a5..1295687 100644
--- a/decoder/ihevcd_decode.c
+++ b/decoder/ihevcd_decode.c
@@ -137,6 +137,10 @@
case IHEVCD_NUM_EXTRA_DISP_UNSUPPORTED:
case IHEVCD_INSUFFICIENT_MEM_MVBANK:
case IHEVCD_INSUFFICIENT_MEM_PICBUF:
+ case IHEVCD_UNSUPPORTED_CHROMA_FMT_IDC:
+ case IHEVCD_UNSUPPORTED_BIT_DEPTH:
+ case IVD_MEM_ALLOC_FAILED:
+ case IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED:
error_code |= 1 << IVD_FATALERROR;
break;
case IHEVCD_INVALID_DISP_STRD:
@@ -144,8 +148,6 @@
case IHEVCD_UNSUPPORTED_VPS_ID:
case IHEVCD_UNSUPPORTED_SPS_ID:
case IHEVCD_UNSUPPORTED_PPS_ID:
- case IHEVCD_UNSUPPORTED_CHROMA_FMT_IDC:
- case IHEVCD_UNSUPPORTED_BIT_DEPTH:
case IHEVCD_BUF_MGR_ERROR:
case IHEVCD_NO_FREE_MVBANK:
case IHEVCD_NO_FREE_PICBUF:
@@ -389,6 +391,8 @@
/* Initialize error code */
ps_codec->i4_error_code = 0;
+ /* Initialize bytes remaining */
+ ps_codec->i4_bytes_remaining = 0;
ps_dec_ip = (ivd_video_decode_ip_t *)pv_api_ip;
ps_dec_op = (ivd_video_decode_op_t *)pv_api_op;
@@ -715,7 +719,7 @@
/* Free any dynamic buffers that are allocated */
ihevcd_free_dynamic_bufs(ps_codec);
ps_codec->i4_error_code = IVD_MEM_ALLOC_FAILED;
- ps_dec_op->u4_error_code |= 1 << IVD_FATALERROR;
+ ps_dec_op->u4_error_code = 1 << IVD_FATALERROR;
ps_dec_op->u4_error_code |= IVD_MEM_ALLOC_FAILED;
return IV_FAIL;
diff --git a/decoder/ihevcd_get_mv.c b/decoder/ihevcd_get_mv.c
index 25ddbd9..5914ed4 100644
--- a/decoder/ihevcd_get_mv.c
+++ b/decoder/ihevcd_get_mv.c
@@ -509,13 +509,17 @@
WORD32 ctb_row, ctb_col, index_pic_map, index_nbr_map;
WORD32 first_pu_of_ctb;
first_pu_of_ctb = pu4_nbr_pu_idx[1 + nbr_pu_idx_strd];
+ UWORD32 cur_ctb_ht_in_min_pu = MIN(((ps_sps->i2_pic_height_in_luma_samples
+ - (ps_mv_ctxt->i4_ctb_y << ps_sps->i1_log2_ctb_size)) / MIN_PU_SIZE), ctb_size_in_min_pu);
+ UWORD32 cur_ctb_wd_in_min_pu = MIN(((ps_sps->i2_pic_width_in_luma_samples
+ - (ps_mv_ctxt->i4_ctb_x << ps_sps->i1_log2_ctb_size)) / MIN_PU_SIZE), ctb_size_in_min_pu);
index_pic_map = 0 * ctb_size_in_min_pu + 0;
index_nbr_map = (0 + 1) * nbr_pu_idx_strd + (0 + 1);
- for(ctb_row = 0; ctb_row < ctb_size_in_min_pu; ctb_row++)
+ for(ctb_row = 0; ctb_row < cur_ctb_ht_in_min_pu; ctb_row++)
{
- for(ctb_col = 0; ctb_col < ctb_size_in_min_pu; ctb_col++)
+ for(ctb_col = 0; ctb_col < cur_ctb_wd_in_min_pu; ctb_col++)
{
pu1_pic_pu_map_ctb[index_pic_map + ctb_col] = pu4_nbr_pu_idx[index_nbr_map + ctb_col]
- first_pu_of_ctb;
diff --git a/decoder/ihevcd_nal.c b/decoder/ihevcd_nal.c
index d00050d..18d9a5d 100644
--- a/decoder/ihevcd_nal.c
+++ b/decoder/ihevcd_nal.c
@@ -301,7 +301,7 @@
unused = ihevcd_bits_get(ps_bitstrm, 6);
/* Syntax : nuh_temporal_id_plus1 */
- ps_nal->i1_nuh_temporal_id = ihevcd_bits_get(ps_bitstrm, 3) - 1;
+ ps_nal->i1_nuh_temporal_id = (WORD32)ihevcd_bits_get(ps_bitstrm, 3) - 1;
return ret;
diff --git a/decoder/ihevcd_parse_headers.c b/decoder/ihevcd_parse_headers.c
index 47edb1f..2d1f567 100644
--- a/decoder/ihevcd_parse_headers.c
+++ b/decoder/ihevcd_parse_headers.c
@@ -42,6 +42,8 @@
#include <stdlib.h>
#include <string.h>
#include <assert.h>
+#include <limits.h>
+#include <stdint.h>
#include "ihevc_typedefs.h"
#include "iv.h"
@@ -150,17 +152,27 @@
{
IHEVCD_ERROR_T ret = (IHEVCD_ERROR_T)IHEVCD_SUCCESS;
WORD32 value;
+ UWORD32 u4_value;
WORD32 i;
pred_wt_ofst_t *ps_wt_ofst = &ps_slice_hdr->s_wt_ofst;
UNUSED(ps_pps);
- UEV_PARSE("luma_log2_weight_denom", value, ps_bitstrm);
- ps_wt_ofst->i1_luma_log2_weight_denom = value;
+ UEV_PARSE("luma_log2_weight_denom", u4_value, ps_bitstrm);
+ if(u4_value > 7)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
+ ps_wt_ofst->i1_luma_log2_weight_denom = u4_value;
if(ps_sps->i1_chroma_format_idc != 0)
{
SEV_PARSE("delta_chroma_log2_weight_denom", value, ps_bitstrm);
+ if(((ps_wt_ofst->i1_luma_log2_weight_denom + value) < 0) ||
+ ((ps_wt_ofst->i1_luma_log2_weight_denom + value) > 7))
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_wt_ofst->i1_chroma_log2_weight_denom = ps_wt_ofst->i1_luma_log2_weight_denom + value;
}
@@ -194,11 +206,18 @@
if(ps_wt_ofst->i1_luma_weight_l0_flag[i])
{
SEV_PARSE("delta_luma_weight_l0[ i ]", value, ps_bitstrm);
-
+ if( value < -128 || value > 127 )
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_wt_ofst->i2_luma_weight_l0[i] = (1 << ps_wt_ofst->i1_luma_log2_weight_denom) + value;
SEV_PARSE("luma_offset_l0[ i ]", value, ps_bitstrm);
+ if( value < -128 || value > 127 )
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_wt_ofst->i2_luma_offset_l0[i] = value;
}
@@ -212,20 +231,36 @@
WORD32 ofst;
WORD32 shift = (1 << (BIT_DEPTH_CHROMA - 1));
SEV_PARSE("delta_chroma_weight_l0[ i ][ j ]", value, ps_bitstrm);
+ if(value < -128 || value > 127)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_wt_ofst->i2_chroma_weight_l0_cb[i] = (1 << ps_wt_ofst->i1_chroma_log2_weight_denom) + value;
SEV_PARSE("delta_chroma_offset_l0[ i ][ j ]", value, ps_bitstrm);
+ if( value < -512 || value > 511 )
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ofst = ((shift * ps_wt_ofst->i2_chroma_weight_l0_cb[i]) >> ps_wt_ofst->i1_chroma_log2_weight_denom);
ofst = value - ofst + shift;
ps_wt_ofst->i2_chroma_offset_l0_cb[i] = CLIP_S8(ofst);
SEV_PARSE("delta_chroma_weight_l0[ i ][ j ]", value, ps_bitstrm);
+ if(value < -128 || value > 127)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_wt_ofst->i2_chroma_weight_l0_cr[i] = (1 << ps_wt_ofst->i1_chroma_log2_weight_denom) + value;
SEV_PARSE("delta_chroma_offset_l0[ i ][ j ]", value, ps_bitstrm);
+ if( value < -512 || value > 511 )
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ofst = ((shift * ps_wt_ofst->i2_chroma_weight_l0_cr[i]) >> ps_wt_ofst->i1_chroma_log2_weight_denom);
ofst = value - ofst + shift;
@@ -270,11 +305,18 @@
if(ps_wt_ofst->i1_luma_weight_l1_flag[i])
{
SEV_PARSE("delta_luma_weight_l1[ i ]", value, ps_bitstrm);
-
+ if( value < -128 || value > 127 )
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_wt_ofst->i2_luma_weight_l1[i] = (1 << ps_wt_ofst->i1_luma_log2_weight_denom) + value;
SEV_PARSE("luma_offset_l1[ i ]", value, ps_bitstrm);
+ if( value < -128 || value > 127 )
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_wt_ofst->i2_luma_offset_l1[i] = value;
}
@@ -289,20 +331,36 @@
WORD32 ofst;
WORD32 shift = (1 << (BIT_DEPTH_CHROMA - 1));
SEV_PARSE("delta_chroma_weight_l1[ i ][ j ]", value, ps_bitstrm);
+ if(value < -128 || value > 127)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_wt_ofst->i2_chroma_weight_l1_cb[i] = (1 << ps_wt_ofst->i1_chroma_log2_weight_denom) + value;;
SEV_PARSE("delta_chroma_offset_l1[ i ][ j ]", value, ps_bitstrm);
+ if( value < -512 || value > 511 )
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ofst = ((shift * ps_wt_ofst->i2_chroma_weight_l1_cb[i]) >> ps_wt_ofst->i1_chroma_log2_weight_denom);
ofst = value - ofst + shift;
ps_wt_ofst->i2_chroma_offset_l1_cb[i] = CLIP_S8(ofst);;
SEV_PARSE("delta_chroma_weight_l1[ i ][ j ]", value, ps_bitstrm);
+ if(value < -128 || value > 127)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_wt_ofst->i2_chroma_weight_l1_cr[i] = (1 << ps_wt_ofst->i1_chroma_log2_weight_denom) + value;
SEV_PARSE("delta_chroma_offset_l1[ i ][ j ]", value, ps_bitstrm);
+ if( value < -512 || value > 511 )
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ofst = ((shift * ps_wt_ofst->i2_chroma_weight_l1_cr[i]) >> ps_wt_ofst->i1_chroma_log2_weight_denom);
ofst = value - ofst + shift;
@@ -358,7 +416,7 @@
stref_picset_t *ps_stref_picset)
{
IHEVCD_ERROR_T ret = (IHEVCD_ERROR_T)IHEVCD_SUCCESS;
- WORD32 value;
+ UWORD32 value;
stref_picset_t *ps_stref_picset_ref;
WORD32 delta_idx, delta_rps;
WORD32 r_idx;
@@ -383,6 +441,10 @@
if(idx == num_short_term_ref_pic_sets)
{
UEV_PARSE("delta_idx_minus1", value, ps_bitstrm);
+ if(value > num_short_term_ref_pic_sets - 1)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
delta_idx = value + 1;
}
else
@@ -398,6 +460,10 @@
delta_rps_sign = value;
UEV_PARSE("abs_delta_rps_minus1", value, ps_bitstrm);
+ if(value > 32767)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
abs_delta_rps = value + 1;
delta_rps = (1 - 2 * delta_rps_sign) * (abs_delta_rps);
@@ -492,16 +558,18 @@
WORD32 poc;
UEV_PARSE("num_negative_pics", value, ps_bitstrm);
+ if(value > MAX_DPB_SIZE - 1)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_stref_picset->i1_num_neg_pics = value;
- ps_stref_picset->i1_num_neg_pics = CLIP3(ps_stref_picset->i1_num_neg_pics,
- 0,
- MAX_DPB_SIZE - 1);
UEV_PARSE("num_positive_pics", value, ps_bitstrm);
+ if(value > (MAX_DPB_SIZE - 1 - ps_stref_picset->i1_num_neg_pics))
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_stref_picset->i1_num_pos_pics = value;
- ps_stref_picset->i1_num_pos_pics = CLIP3(ps_stref_picset->i1_num_pos_pics,
- 0,
- (MAX_DPB_SIZE - 1 - ps_stref_picset->i1_num_neg_pics));
ps_stref_picset->i1_num_delta_pocs =
ps_stref_picset->i1_num_neg_pics +
@@ -511,7 +579,11 @@
for(i = 0; i < ps_stref_picset->i1_num_neg_pics; i++)
{
UEV_PARSE("delta_poc_s0_minus1", value, ps_bitstrm);
- poc = prev_poc - (value + 1);
+ if(value > 32767)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
+ poc = prev_poc - ((WORD32)(value + 1));
prev_poc = poc;
ps_stref_picset->ai2_delta_poc[i] = poc;
@@ -525,6 +597,10 @@
i++)
{
UEV_PARSE("delta_poc_s1_minus1", value, ps_bitstrm);
+ if(value > 32767)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
poc = prev_poc + (value + 1);
prev_poc = poc;
ps_stref_picset->ai2_delta_poc[i] = poc;
@@ -551,12 +627,27 @@
for(i = 0; i <= cpb_cnt; i++)
{
UEV_PARSE("bit_rate_value_minus1[ i ]", ps_sub_layer_hrd_params->au4_bit_rate_value_minus1[i], ps_bitstrm);
+ if(ps_sub_layer_hrd_params->au4_bit_rate_value_minus1[i] > UINT_MAX - 1)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
UEV_PARSE("cpb_size_value_minus1[ i ]", ps_sub_layer_hrd_params->au4_cpb_size_value_minus1[i], ps_bitstrm);
-
+ if(ps_sub_layer_hrd_params->au4_cpb_size_value_minus1[i] > UINT_MAX - 1)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
if(sub_pic_cpb_params_present_flag)
{
UEV_PARSE("cpb_size_du_value_minus1[ i ]", ps_sub_layer_hrd_params->au4_cpb_size_du_value_minus1[i], ps_bitstrm);
+ if(ps_sub_layer_hrd_params->au4_cpb_size_du_value_minus1[i] > UINT_MAX - 1)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
UEV_PARSE("bit_rate_du_value_minus1[ i ]", ps_sub_layer_hrd_params->au4_bit_rate_du_value_minus1[i], ps_bitstrm);
+ if(ps_sub_layer_hrd_params->au4_bit_rate_du_value_minus1[i] > UINT_MAX - 1)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
}
BITS_PARSE("cbr_flag[ i ]", ps_sub_layer_hrd_params->au1_cbr_flag[i], ps_bitstrm, 1);
}
@@ -624,7 +715,7 @@
BITS_PARSE("fixed_pic_rate_general_flag[ i ]", ps_hrd->au1_fixed_pic_rate_general_flag[i], ps_bitstrm, 1);
ps_hrd->au1_fixed_pic_rate_within_cvs_flag[i] = 1;
- ps_hrd->au1_elemental_duration_in_tc_minus1[i] = 0;
+ ps_hrd->au2_elemental_duration_in_tc_minus1[i] = 0;
ps_hrd->au1_low_delay_hrd_flag[i] = 0;
ps_hrd->au1_cpb_cnt_minus1[i] = 0;
@@ -633,7 +724,11 @@
if(ps_hrd->au1_fixed_pic_rate_within_cvs_flag[i])
{
- UEV_PARSE("elemental_duration_in_tc_minus1[ i ]", ps_hrd->au1_elemental_duration_in_tc_minus1[i], ps_bitstrm);
+ UEV_PARSE("elemental_duration_in_tc_minus1[ i ]", ps_hrd->au2_elemental_duration_in_tc_minus1[i], ps_bitstrm);
+ if(ps_hrd->au2_elemental_duration_in_tc_minus1[i] > 2047)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
}
else
{
@@ -641,10 +736,13 @@
}
if(!ps_hrd->au1_low_delay_hrd_flag[i])
+ {
UEV_PARSE("cpb_cnt_minus1[ i ]", ps_hrd->au1_cpb_cnt_minus1[i], ps_bitstrm);
-
- if(ps_hrd->au1_cpb_cnt_minus1[i] >= (MAX_CPB_CNT - 1))
- return IHEVCD_INVALID_PARAMETER;
+ if(ps_hrd->au1_cpb_cnt_minus1[i] > (MAX_CPB_CNT - 1))
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
+ }
if(ps_hrd->u1_nal_hrd_parameters_present_flag)
ihevcd_parse_sub_layer_hrd_parameters(ps_bitstrm,
@@ -791,7 +889,15 @@
if(ps_vui->u1_chroma_loc_info_present_flag)
{
UEV_PARSE("chroma_sample_loc_type_top_field", ps_vui->u1_chroma_sample_loc_type_top_field, ps_bitstrm);
+ if(ps_vui->u1_chroma_sample_loc_type_top_field > CHROMA_FMT_IDC_YUV444_PLANES + 1)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
UEV_PARSE("chroma_sample_loc_type_bottom_field", ps_vui->u1_chroma_sample_loc_type_bottom_field, ps_bitstrm);
+ if(ps_vui->u1_chroma_sample_loc_type_bottom_field > CHROMA_FMT_IDC_YUV444_PLANES + 1)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
}
BITS_PARSE("neutral_chroma_indication_flag", ps_vui->u1_neutral_chroma_indication_flag, ps_bitstrm, 1);
@@ -817,8 +923,13 @@
BITS_PARSE("vui_time_scale", ps_vui->u4_vui_time_scale, ps_bitstrm, 32);
BITS_PARSE("vui_poc_proportional_to_timing_flag", ps_vui->u1_poc_proportional_to_timing_flag, ps_bitstrm, 1);
if(ps_vui->u1_poc_proportional_to_timing_flag)
- UEV_PARSE("vui_num_ticks_poc_diff_one_minus1", ps_vui->u1_num_ticks_poc_diff_one_minus1, ps_bitstrm);
-
+ {
+ UEV_PARSE("vui_num_ticks_poc_diff_one_minus1", ps_vui->u4_num_ticks_poc_diff_one_minus1, ps_bitstrm);
+ if(ps_vui->u4_num_ticks_poc_diff_one_minus1 > UINT_MAX - 1)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
+ }
BITS_PARSE("vui_hrd_parameters_present_flag", ps_vui->u1_vui_hrd_parameters_present_flag, ps_bitstrm, 1);
if(ps_vui->u1_vui_hrd_parameters_present_flag)
{
@@ -843,10 +954,30 @@
BITS_PARSE("restricted_ref_pic_lists_flag", ps_vui->u1_restricted_ref_pic_lists_flag, ps_bitstrm, 1);
UEV_PARSE("min_spatial_segmentation_idc", ps_vui->u4_min_spatial_segmentation_idc, ps_bitstrm);
+ if(ps_vui->u4_min_spatial_segmentation_idc > 4095)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
UEV_PARSE("max_bytes_per_pic_denom", ps_vui->u1_max_bytes_per_pic_denom, ps_bitstrm);
+ if(ps_vui->u1_max_bytes_per_pic_denom > 16)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
UEV_PARSE("max_bits_per_min_cu_denom", ps_vui->u1_max_bits_per_mincu_denom, ps_bitstrm);
+ if(ps_vui->u1_max_bits_per_mincu_denom > 16)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
UEV_PARSE("log2_max_mv_length_horizontal", ps_vui->u1_log2_max_mv_length_horizontal, ps_bitstrm);
+ if(ps_vui->u1_max_bits_per_mincu_denom > 16)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
UEV_PARSE("log2_max_mv_length_vertical", ps_vui->u1_log2_max_mv_length_vertical, ps_bitstrm);
+ if(ps_vui->u1_max_bits_per_mincu_denom > 15)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
}
return ret;
@@ -1036,6 +1167,7 @@
WORD32 size_id;
WORD32 matrix_id;
WORD32 value, dc_value = 0;
+ UWORD32 u4_value;
WORD32 next_coef;
WORD32 coef_num;
WORD32 i, j, offset;
@@ -1058,13 +1190,16 @@
if(!scaling_list_pred_mode_flag)
{
WORD32 num_elements;
- UEV_PARSE("scaling_list_pred_matrix_id_delta", value,
+ UEV_PARSE("scaling_list_pred_matrix_id_delta", u4_value,
ps_bitstrm);
- value = CLIP3(value, 0, matrix_id);
+ if(u4_value > matrix_id)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
num_elements = (1 << (4 + (size_id << 1)));
- if(0 != value)
- memcpy(pi2_scaling_mat_offset, pi2_scaling_mat_offset - value * num_elements, num_elements * sizeof(WORD16));
+ if(0 != u4_value)
+ memmove(pi2_scaling_mat_offset, pi2_scaling_mat_offset - u4_value * num_elements, num_elements * sizeof(WORD16));
}
else
{
@@ -1075,7 +1210,10 @@
{
SEV_PARSE("scaling_list_dc_coef_minus8", value,
ps_bitstrm);
-
+ if(value < -7 || value > 247)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
next_coef = value + 8;
dc_value = next_coef;
}
@@ -1087,6 +1225,10 @@
{
SEV_PARSE("scaling_list_delta_coef",
scaling_list_delta_coef, ps_bitstrm);
+ if((scaling_list_delta_coef < -256) || (scaling_list_delta_coef > 255))
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
next_coef = (next_coef + scaling_list_delta_coef + 256)
% 256;
pi2_scaling_mat_offset[scan_table[i]] = next_coef;
@@ -1100,6 +1242,10 @@
{
SEV_PARSE("scaling_list_delta_coef",
scaling_list_delta_coef, ps_bitstrm);
+ if((scaling_list_delta_coef < -256) || (scaling_list_delta_coef > 255))
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
next_coef = (next_coef + scaling_list_delta_coef + 256)
% 256;
@@ -1120,6 +1266,10 @@
{
SEV_PARSE("scaling_list_delta_coef",
scaling_list_delta_coef, ps_bitstrm);
+ if((scaling_list_delta_coef < -256) || (scaling_list_delta_coef > 255))
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
next_coef = (next_coef + scaling_list_delta_coef + 256)
% 256;
@@ -1169,7 +1319,7 @@
{
IHEVCD_ERROR_T ret = (IHEVCD_ERROR_T)IHEVCD_SUCCESS;
WORD32 i;
- WORD32 value;
+ UWORD32 value;
WORD32 vps_id;
vps_t *ps_vps;
bitstrm_t *ps_bitstrm = &ps_codec->s_parse.s_bitstrm;
@@ -1196,6 +1346,10 @@
BITS_PARSE("vps_max_sub_layers_minus1", value, ps_bitstrm, 3);
+ if(value > SPS_MAX_SUB_LAYERS - 1)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_vps->i1_vps_max_sub_layers = value + 1;
ASSERT(ps_vps->i1_vps_max_sub_layers < VPS_MAX_SUB_LAYERS);
@@ -1216,13 +1370,25 @@
for(; i < ps_vps->i1_vps_max_sub_layers; i++)
{
UEV_PARSE("vps_max_dec_pic_buffering[i]", value, ps_bitstrm);
+ if(value > MAX_DPB_SIZE)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_vps->ai1_vps_max_dec_pic_buffering[i] = value;
/* vps_num_reorder_pics (no max) used in print in order to match with HM */
UEV_PARSE("vps_num_reorder_pics[i]", value, ps_bitstrm);
+ if(value >= ps_vps->ai1_vps_max_dec_pic_buffering[i])
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_vps->ai1_vps_max_num_reorder_pics[i] = value;
UEV_PARSE("vps_max_latency_increase[i]", value, ps_bitstrm);
+ if(value > UINT_MAX - 2)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_vps->ai1_vps_max_latency_increase[i] = value;
}
@@ -1232,6 +1398,10 @@
//ps_vps->i1_vps_max_layer_id = value;
UEV_PARSE("vps_num_layer_sets_minus1", value, ps_bitstrm);
+ if(value > 1023)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
//ps_vps->i1_vps_num_layer_sets = value + 1;
BITS_PARSE("vps_timing_info_present_flag", value, ps_bitstrm, 1);
@@ -1267,7 +1437,7 @@
IHEVCD_ERROR_T ihevcd_parse_sps(codec_t *ps_codec)
{
IHEVCD_ERROR_T ret = (IHEVCD_ERROR_T)IHEVCD_SUCCESS;
- WORD32 value;
+ UWORD32 value;
WORD32 i;
WORD32 vps_id;
@@ -1281,12 +1451,18 @@
BITS_PARSE("video_parameter_set_id", value, ps_bitstrm, 4);
+ if(value > MAX_VPS_CNT - 1)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
vps_id = value;
- vps_id = CLIP3(vps_id, 0, MAX_VPS_CNT - 1);
BITS_PARSE("sps_max_sub_layers_minus1", value, ps_bitstrm, 3);
+ if(value > SPS_MAX_SUB_LAYERS - 1)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
sps_max_sub_layers = value + 1;
- sps_max_sub_layers = CLIP3(sps_max_sub_layers, 1, 7);
BITS_PARSE("sps_temporal_id_nesting_flag", value, ps_bitstrm, 1);
sps_temporal_id_nesting_flag = value;
@@ -1297,7 +1473,6 @@
UEV_PARSE("seq_parameter_set_id", value, ps_bitstrm);
sps_id = value;
-
if((sps_id >= MAX_SPS_CNT) || (sps_id < 0))
{
if(ps_codec->i4_sps_done)
@@ -1322,6 +1497,10 @@
memcpy(&ps_sps->s_ptl, &s_ptl, sizeof(profile_tier_lvl_info_t));
UEV_PARSE("chroma_format_idc", value, ps_bitstrm);
+ if(value > 3)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_sps->i1_chroma_format_idc = value;
if(ps_sps->i1_chroma_format_idc != CHROMA_FMT_IDC_YUV420)
@@ -1341,9 +1520,17 @@
}
UEV_PARSE("pic_width_in_luma_samples", value, ps_bitstrm);
+ if(value > INT16_MAX)
+ {
+ return (IHEVCD_ERROR_T)IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED;
+ }
ps_sps->i2_pic_width_in_luma_samples = value;
UEV_PARSE("pic_height_in_luma_samples", value, ps_bitstrm);
+ if(value > INT16_MAX)
+ {
+ return (IHEVCD_ERROR_T)IVD_STREAM_WIDTH_HEIGHT_NOT_SUPPORTED;
+ }
ps_sps->i2_pic_height_in_luma_samples = value;
if((0 >= ps_sps->i2_pic_width_in_luma_samples) || (0 >= ps_sps->i2_pic_height_in_luma_samples))
@@ -1356,28 +1543,28 @@
{
UEV_PARSE("pic_crop_left_offset", value, ps_bitstrm);
- if (value < 0 || value >= ps_sps->i2_pic_width_in_luma_samples)
+ if (value >= ps_sps->i2_pic_width_in_luma_samples)
{
return IHEVCD_INVALID_PARAMETER;
}
ps_sps->i2_pic_crop_left_offset = value;
UEV_PARSE("pic_crop_right_offset", value, ps_bitstrm);
- if (value < 0 || value >= ps_sps->i2_pic_width_in_luma_samples)
+ if (value >= ps_sps->i2_pic_width_in_luma_samples)
{
return IHEVCD_INVALID_PARAMETER;
}
ps_sps->i2_pic_crop_right_offset = value;
UEV_PARSE("pic_crop_top_offset", value, ps_bitstrm);
- if (value < 0 || value >= ps_sps->i2_pic_height_in_luma_samples)
+ if (value >= ps_sps->i2_pic_height_in_luma_samples)
{
return IHEVCD_INVALID_PARAMETER;
}
ps_sps->i2_pic_crop_top_offset = value;
UEV_PARSE("pic_crop_bottom_offset", value, ps_bitstrm);
- if (value < 0 || value >= ps_sps->i2_pic_height_in_luma_samples)
+ if (value >= ps_sps->i2_pic_height_in_luma_samples)
{
return IHEVCD_INVALID_PARAMETER;
}
@@ -1401,11 +1588,12 @@
return IHEVCD_UNSUPPORTED_BIT_DEPTH;
UEV_PARSE("log2_max_pic_order_cnt_lsb_minus4", value, ps_bitstrm);
- if(value < 0 || value > 12)
+ if(value > 12)
return IHEVCD_INVALID_PARAMETER;
ps_sps->i1_log2_max_pic_order_cnt_lsb = value + 4;
BITS_PARSE("sps_sub_layer_ordering_info_present_flag", value, ps_bitstrm, 1);
+
ps_sps->i1_sps_sub_layer_ordering_info_present_flag = value;
@@ -1413,20 +1601,24 @@
for(; i < ps_sps->i1_sps_max_sub_layers; i++)
{
UEV_PARSE("max_dec_pic_buffering", value, ps_bitstrm);
- if(value < 0 || (value + 1) > MAX_DPB_SIZE)
+ if(value > (MAX_DPB_SIZE - 1))
{
return IHEVCD_INVALID_PARAMETER;
}
ps_sps->ai1_sps_max_dec_pic_buffering[i] = value + 1;
UEV_PARSE("num_reorder_pics", value, ps_bitstrm);
- if(value < 0 || value > ps_sps->ai1_sps_max_dec_pic_buffering[i])
+ if(value >= ps_sps->ai1_sps_max_dec_pic_buffering[i])
{
return IHEVCD_INVALID_PARAMETER;
}
ps_sps->ai1_sps_max_num_reorder_pics[i] = value;
UEV_PARSE("max_latency_increase", value, ps_bitstrm);
+ if(value > UINT_MAX - 2)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_sps->ai1_sps_max_latency_increase[i] = value;
}
@@ -1464,9 +1656,14 @@
return IHEVCD_INVALID_PARAMETER;
}
ps_sps->i1_log2_min_coding_block_size = value + 3;
+ if((ps_sps->i2_pic_width_in_luma_samples % (1 << ps_sps->i1_log2_min_coding_block_size) != 0) ||
+ (ps_sps->i2_pic_height_in_luma_samples % (1 << ps_sps->i1_log2_min_coding_block_size) != 0))
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
UEV_PARSE("log2_diff_max_min_coding_block_size", value, ps_bitstrm);
- if(value > (LOG2_MAX_CU_SIZE - LOG2_MIN_CU_SIZE))
+ if(value > (LOG2_MAX_CU_SIZE - ps_sps->i1_log2_min_coding_block_size))
{
return IHEVCD_INVALID_PARAMETER;
}
@@ -1474,6 +1671,12 @@
ctb_log2_size_y = ps_sps->i1_log2_min_coding_block_size + ps_sps->i1_log2_diff_max_min_coding_block_size;
+ if((ctb_log2_size_y < LOG2_MIN_CTB_SIZE) || (ctb_log2_size_y > LOG2_MAX_CTB_SIZE))
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
+ ps_sps->i1_log2_ctb_size = ctb_log2_size_y;
+
UEV_PARSE("log2_min_transform_block_size_minus2", value, ps_bitstrm);
if(value > (LOG2_MAX_TU_SIZE - 2))
{
@@ -1486,7 +1689,7 @@
}
UEV_PARSE("log2_diff_max_min_transform_block_size", value, ps_bitstrm);
- if(value > (LOG2_MAX_TU_SIZE - LOG2_MIN_TU_SIZE))
+ if(value > (LOG2_MAX_TU_SIZE - ps_sps->i1_log2_min_transform_block_size))
{
return IHEVCD_INVALID_PARAMETER;
}
@@ -1495,23 +1698,7 @@
ps_sps->i1_log2_max_transform_block_size = ps_sps->i1_log2_min_transform_block_size +
ps_sps->i1_log2_diff_max_min_transform_block_size;
- if ((ps_sps->i1_log2_max_transform_block_size < 0) ||
- (ps_sps->i1_log2_max_transform_block_size > MIN(ctb_log2_size_y, 5)))
- {
- return IHEVCD_INVALID_PARAMETER;
- }
-
- ps_sps->i1_log2_ctb_size = ps_sps->i1_log2_min_coding_block_size +
- ps_sps->i1_log2_diff_max_min_coding_block_size;
-
- if((ps_sps->i1_log2_min_coding_block_size < 3) ||
- (ps_sps->i1_log2_min_transform_block_size < 2) ||
- (ps_sps->i1_log2_diff_max_min_transform_block_size < 0) ||
- (ps_sps->i1_log2_max_transform_block_size > ps_sps->i1_log2_ctb_size) ||
- (ps_sps->i1_log2_ctb_size < 4) ||
- (ps_sps->i1_log2_ctb_size > 6) ||
- (ps_sps->i2_pic_width_in_luma_samples % (1 << ps_sps->i1_log2_min_coding_block_size) != 0) ||
- (ps_sps->i2_pic_height_in_luma_samples % (1 << ps_sps->i1_log2_min_coding_block_size) != 0))
+ if(ps_sps->i1_log2_max_transform_block_size > ps_sps->i1_log2_ctb_size)
{
return IHEVCD_INVALID_PARAMETER;
}
@@ -1520,14 +1707,14 @@
ps_sps->i1_log2_diff_max_min_pcm_coding_block_size = 0;
UEV_PARSE("max_transform_hierarchy_depth_inter", value, ps_bitstrm);
- if(value < 0 || value > (ps_sps->i1_log2_ctb_size - ps_sps->i1_log2_min_transform_block_size))
+ if(value > (ps_sps->i1_log2_ctb_size - ps_sps->i1_log2_min_transform_block_size))
{
return IHEVCD_INVALID_PARAMETER;
}
ps_sps->i1_max_transform_hierarchy_depth_inter = value;
UEV_PARSE("max_transform_hierarchy_depth_intra", value, ps_bitstrm);
- if(value < 0 || value > (ps_sps->i1_log2_ctb_size - ps_sps->i1_log2_min_transform_block_size))
+ if(value > (ps_sps->i1_log2_ctb_size - ps_sps->i1_log2_min_transform_block_size))
{
return IHEVCD_INVALID_PARAMETER;
}
@@ -1569,16 +1756,24 @@
ps_sps->i1_pcm_sample_bit_depth_chroma = value + 1;
UEV_PARSE("log2_min_pcm_coding_block_size_minus3", value, ps_bitstrm);
+ if(value < (ps_sps->i1_log2_min_coding_block_size - 3) || value > (MIN(ctb_log2_size_y, 5) - 3))
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_sps->i1_log2_min_pcm_coding_block_size = value + 3;
UEV_PARSE("log2_diff_max_min_pcm_coding_block_size", value, ps_bitstrm);
+ if(value > MIN(ctb_log2_size_y, 5) - ps_sps->i1_log2_min_pcm_coding_block_size)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_sps->i1_log2_diff_max_min_pcm_coding_block_size = value;
BITS_PARSE("pcm_loop_filter_disable_flag", value, ps_bitstrm, 1);
ps_sps->i1_pcm_loop_filter_disable_flag = value;
}
UEV_PARSE("num_short_term_ref_pic_sets", value, ps_bitstrm);
- if(value < 0 || value > MAX_STREF_PICS_SPS)
+ if(value > MAX_STREF_PICS_SPS)
{
return IHEVCD_INVALID_PARAMETER;
}
@@ -1599,7 +1794,7 @@
if(ps_sps->i1_long_term_ref_pics_present_flag)
{
UEV_PARSE("num_long_term_ref_pics_sps", value, ps_bitstrm);
- if(value < 0 || value > MAX_LTREF_PICS_SPS)
+ if(value > MAX_LTREF_PICS_SPS)
{
return IHEVCD_INVALID_PARAMETER;
}
@@ -1833,7 +2028,8 @@
IHEVCD_ERROR_T ihevcd_parse_pps(codec_t *ps_codec)
{
IHEVCD_ERROR_T ret = (IHEVCD_ERROR_T)IHEVCD_SUCCESS;
- WORD32 value;
+ UWORD32 value;
+ WORD32 i4_value;
WORD32 pps_id;
pps_t *ps_pps;
@@ -1861,8 +2057,11 @@
ps_pps->i1_pps_id = pps_id;
UEV_PARSE("seq_parameter_set_id", value, ps_bitstrm);
+ if(value > MAX_SPS_CNT - 2)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_pps->i1_sps_id = value;
- ps_pps->i1_sps_id = CLIP3(ps_pps->i1_sps_id, 0, MAX_SPS_CNT - 2);
ps_sps = (ps_codec->s_parse.ps_sps_base + ps_pps->i1_sps_id);
@@ -1897,13 +2096,25 @@
ps_pps->i1_cabac_init_present_flag = value;
UEV_PARSE("num_ref_idx_l0_default_active_minus1", value, ps_bitstrm);
+ if(value > MAX_DPB_SIZE - 2)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_pps->i1_num_ref_idx_l0_default_active = value + 1;
UEV_PARSE("num_ref_idx_l1_default_active_minus1", value, ps_bitstrm);
+ if(value > MAX_DPB_SIZE - 2)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_pps->i1_num_ref_idx_l1_default_active = value + 1;
- SEV_PARSE("pic_init_qp_minus26", value, ps_bitstrm);
- ps_pps->i1_pic_init_qp = value + 26;
+ SEV_PARSE("pic_init_qp_minus26", i4_value, ps_bitstrm);
+ if(i4_value < -26 || i4_value > 25)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
+ ps_pps->i1_pic_init_qp = i4_value + 26;
BITS_PARSE("constrained_intra_pred_flag", value, ps_bitstrm, 1);
ps_pps->i1_constrained_intra_pred_flag = value;
@@ -1917,6 +2128,10 @@
if(ps_pps->i1_cu_qp_delta_enabled_flag)
{
UEV_PARSE("diff_cu_qp_delta_depth", value, ps_bitstrm);
+ if(value > ps_sps->i1_log2_diff_max_min_coding_block_size)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_pps->i1_diff_cu_qp_delta_depth = value;
}
else
@@ -1925,12 +2140,20 @@
}
ps_pps->i1_log2_min_cu_qp_delta_size = ps_sps->i1_log2_ctb_size - ps_pps->i1_diff_cu_qp_delta_depth;
/* Print different */
- SEV_PARSE("cb_qp_offset", value, ps_bitstrm);
- ps_pps->i1_pic_cb_qp_offset = value;
+ SEV_PARSE("cb_qp_offset", i4_value, ps_bitstrm);
+ if(i4_value < -12 || i4_value > 12)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
+ ps_pps->i1_pic_cb_qp_offset = i4_value;
/* Print different */
- SEV_PARSE("cr_qp_offset", value, ps_bitstrm);
- ps_pps->i1_pic_cr_qp_offset = value;
+ SEV_PARSE("cr_qp_offset", i4_value, ps_bitstrm);
+ if(i4_value < -12 || i4_value > 12)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
+ ps_pps->i1_pic_cr_qp_offset = i4_value;
/* Print different */
BITS_PARSE("slicelevel_chroma_qp_flag", value, ps_bitstrm, 1);
@@ -2007,7 +2230,7 @@
{
- WORD32 start;
+ UWORD32 start;
WORD32 i, j;
@@ -2046,7 +2269,7 @@
start += value;
if((start > ps_sps->i2_pic_wd_in_ctb) ||
- (value <= 0))
+ (value == 0))
return IHEVCD_INVALID_HEADER;
}
@@ -2086,7 +2309,7 @@
start += value;
if((start > ps_sps->i2_pic_ht_in_ctb) ||
- (value <= 0))
+ (value == 0))
return IHEVCD_INVALID_HEADER;
}
}
@@ -2134,11 +2357,19 @@
if(!ps_pps->i1_pic_disable_deblocking_filter_flag)
{
- SEV_PARSE("pps_beta_offset_div2", value, ps_bitstrm);
- ps_pps->i1_beta_offset_div2 = value;
+ SEV_PARSE("pps_beta_offset_div2", i4_value, ps_bitstrm);
+ if(i4_value < -6 || i4_value > 6)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
+ ps_pps->i1_beta_offset_div2 = i4_value;
- SEV_PARSE("pps_tc_offset_div2", value, ps_bitstrm);
- ps_pps->i1_tc_offset_div2 = value;
+ SEV_PARSE("pps_tc_offset_div2", i4_value, ps_bitstrm);
+ if(i4_value < -6 || i4_value > 6)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
+ ps_pps->i1_tc_offset_div2 = i4_value;
}
}
@@ -2155,6 +2386,10 @@
BITS_PARSE("lists_modification_present_flag", value, ps_bitstrm, 1);
ps_pps->i1_lists_modification_present_flag = value;
UEV_PARSE("log2_parallel_merge_level_minus2", value, ps_bitstrm);
+ if(value > (ps_sps->i1_log2_min_coding_block_size + ps_sps->i1_log2_diff_max_min_coding_block_size))
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_pps->i1_log2_parallel_merge_level = value + 2;
BITS_PARSE("slice_header_extension_present_flag", value, ps_bitstrm, 1);
@@ -2223,6 +2458,10 @@
ps_parse->s_sei_params.i1_buf_period_params_present_flag = 1;
UEV_PARSE("bp_seq_parameter_set_id", value, ps_bitstrm);
+ if(value > MAX_SPS_CNT - 2)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_buf_period_sei_params->u1_bp_seq_parameter_set_id = value;
if(!ps_vui_hdr->u1_sub_pic_cpb_params_present_flag)
@@ -2415,6 +2654,10 @@
UWORD32 array_size;
UEV_PARSE("num_decoding_units_minus1", value, ps_bitstrm);
+ if(value > (ps_sps->i4_pic_size_in_ctb -1))
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_pic_timing->u4_num_decoding_units_minus1 = value;
num_units_minus1 = ps_pic_timing->u4_num_decoding_units_minus1;
@@ -2440,6 +2683,10 @@
for(i = 0; i <= ps_pic_timing->u4_num_decoding_units_minus1; i++)
{
UEV_PARSE("num_nalus_in_du_minus1", value, ps_bitstrm);
+ if(value > (ps_sps->i4_pic_size_in_ctb -1))
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_pic_timing->au4_num_nalus_in_du_minus1[i] = value;
if((!ps_pic_timing->u1_du_common_cpb_removal_delay_flag)
diff --git a/decoder/ihevcd_parse_residual.c b/decoder/ihevcd_parse_residual.c
index 6e2a39c..0a39f99 100644
--- a/decoder/ihevcd_parse_residual.c
+++ b/decoder/ihevcd_parse_residual.c
@@ -721,7 +721,7 @@
/* If there are zero coeffs, then shift by as many zero coeffs and decrement n */
clz = CLZ(u4_sig_coeff_map_shift);
u4_sig_coeff_map_shift <<= clz;
- n -= clz;
+ n -= (WORD32)clz;
}while(u4_sig_coeff_map_shift);
}
/* At this level u4_sig_coeff_map is non-zero i.e. has atleast one non-zero coeff */
@@ -883,7 +883,7 @@
/* If there are zero coeffs, then shift by as many zero coeffs and decrement n */
clz = CLZ(u4_sig_coeff_map_shift);
u4_sig_coeff_map_shift <<= clz;
- n -= clz;
+ n -= (WORD32)clz;
}while(u4_sig_coeff_map_shift);
diff --git a/decoder/ihevcd_parse_slice.c b/decoder/ihevcd_parse_slice.c
index aedfbe7..a3e1e69 100644
--- a/decoder/ihevcd_parse_slice.c
+++ b/decoder/ihevcd_parse_slice.c
@@ -3168,13 +3168,17 @@
WORD32 ctb_row, ctb_col, index_pic_map, index_nbr_map;
WORD32 first_pu_of_ctb;
first_pu_of_ctb = pu4_nbr_pu_idx[1 + nbr_pu_idx_strd];
+ UWORD32 cur_ctb_ht_in_min_pu = MIN(((ps_sps->i2_pic_height_in_luma_samples
+ - (ps_codec->s_parse.i4_ctb_y << ps_sps->i1_log2_ctb_size)) / MIN_PU_SIZE), ctb_size_in_min_pu);
+ UWORD32 cur_ctb_wd_in_min_pu = MIN(((ps_sps->i2_pic_width_in_luma_samples
+ - (ps_codec->s_parse.i4_ctb_x << ps_sps->i1_log2_ctb_size)) / MIN_PU_SIZE), ctb_size_in_min_pu);
index_pic_map = 0 * ctb_size_in_min_pu + 0;
index_nbr_map = (0 + 1) * nbr_pu_idx_strd + (0 + 1);
- for(ctb_row = 0; ctb_row < ctb_size_in_min_pu; ctb_row++)
+ for(ctb_row = 0; ctb_row < cur_ctb_ht_in_min_pu; ctb_row++)
{
- for(ctb_col = 0; ctb_col < ctb_size_in_min_pu; ctb_col++)
+ for(ctb_col = 0; ctb_col < cur_ctb_wd_in_min_pu; ctb_col++)
{
pu1_pic_pu_map_ctb[index_pic_map + ctb_col] = pu4_nbr_pu_idx[index_nbr_map + ctb_col]
- first_pu_of_ctb;
diff --git a/decoder/ihevcd_parse_slice_header.c b/decoder/ihevcd_parse_slice_header.c
index 2ef174c..ba2c5e8 100644
--- a/decoder/ihevcd_parse_slice_header.c
+++ b/decoder/ihevcd_parse_slice_header.c
@@ -218,7 +218,8 @@
nal_header_t *ps_nal)
{
IHEVCD_ERROR_T ret = (IHEVCD_ERROR_T)IHEVCD_SUCCESS;
- WORD32 value;
+ UWORD32 value;
+ WORD32 i4_value;
WORD32 i, j;
WORD32 sps_id;
@@ -233,7 +234,7 @@
WORD32 no_output_of_prior_pics_flag = 0;
WORD8 i1_nal_unit_type = ps_nal->i1_nal_unit_type;
WORD32 num_poc_total_curr = 0;
- WORD32 slice_address;
+ UWORD32 slice_address;
WORD32 prev_slice_incomplete_flag = 0;
if(ps_codec->i4_slice_error == 1)
@@ -250,7 +251,10 @@
BITS_PARSE("no_output_of_prior_pics_flag", no_output_of_prior_pics_flag, ps_bitstrm, 1);
}
UEV_PARSE("pic_parameter_set_id", pps_id, ps_bitstrm);
- pps_id = CLIP3(pps_id, 0, MAX_PPS_CNT - 2);
+ if(pps_id < 0 || pps_id > MAX_PPS_CNT - 2)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
/* Get the current PPS structure */
ps_pps = ps_codec->s_parse.ps_pps_base + pps_id;
@@ -357,7 +361,7 @@
slice_address = value;
/* If slice address is greater than the number of CTBs in a picture,
* ignore the slice */
- if(value >= ps_sps->i4_pic_size_in_ctb || value <= 0)
+ if(value >= ps_sps->i4_pic_size_in_ctb || value == 0)
return IHEVCD_IGNORE_SLICE;
}
else
@@ -378,6 +382,10 @@
//slice_reserved_undetermined_flag[ i ]
}
UEV_PARSE("slice_type", value, ps_bitstrm);
+ if(value > 2)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_slice_hdr->i1_slice_type = value;
/* If the picture is IRAP, slice type must be equal to ISLICE */
@@ -456,16 +464,18 @@
if(ps_sps->i1_num_long_term_ref_pics_sps > 0)
{
UEV_PARSE("num_long_term_sps", value, ps_bitstrm);
+ if(value > ps_sps->i1_num_long_term_ref_pics_sps)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_slice_hdr->i1_num_long_term_sps = value;
-
- ps_slice_hdr->i1_num_long_term_sps = CLIP3(ps_slice_hdr->i1_num_long_term_sps,
- 0, MAX_DPB_SIZE - num_neg_pics - num_pos_pics);
}
UEV_PARSE("num_long_term_pics", value, ps_bitstrm);
+ if((value + ps_slice_hdr->i1_num_long_term_sps + num_neg_pics + num_pos_pics) > (MAX_DPB_SIZE - 1))
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_slice_hdr->i1_num_long_term_pics = value;
- ps_slice_hdr->i1_num_long_term_pics = CLIP3(ps_slice_hdr->i1_num_long_term_pics,
- 0, MAX_DPB_SIZE - num_neg_pics - num_pos_pics -
- ps_slice_hdr->i1_num_long_term_sps);
for(i = 0; i < (ps_slice_hdr->i1_num_long_term_sps +
ps_slice_hdr->i1_num_long_term_pics); i++)
@@ -566,11 +576,19 @@
if(ps_slice_hdr->i1_num_ref_idx_active_override_flag)
{
UEV_PARSE("num_ref_idx_l0_active_minus1", value, ps_bitstrm);
+ if(value > MAX_DPB_SIZE - 2)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_slice_hdr->i1_num_ref_idx_l0_active = value + 1;
if(BSLICE == ps_slice_hdr->i1_slice_type)
{
UEV_PARSE("num_ref_idx_l1_active_minus1", value, ps_bitstrm);
+ if(value > MAX_DPB_SIZE - 2)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_slice_hdr->i1_num_ref_idx_l1_active = value + 1;
}
@@ -578,16 +596,12 @@
else
{
ps_slice_hdr->i1_num_ref_idx_l0_active = ps_pps->i1_num_ref_idx_l0_default_active;
-
if(BSLICE == ps_slice_hdr->i1_slice_type)
{
ps_slice_hdr->i1_num_ref_idx_l1_active = ps_pps->i1_num_ref_idx_l1_default_active;
}
}
- ps_slice_hdr->i1_num_ref_idx_l0_active = CLIP3(ps_slice_hdr->i1_num_ref_idx_l0_active, 0, MAX_DPB_SIZE - 1);
- ps_slice_hdr->i1_num_ref_idx_l1_active = CLIP3(ps_slice_hdr->i1_num_ref_idx_l1_active, 0, MAX_DPB_SIZE - 1);
-
if(0 == num_poc_total_curr)
return IHEVCD_IGNORE_SLICE;
if((ps_pps->i1_lists_modification_present_flag) && (num_poc_total_curr > 1))
@@ -628,11 +642,25 @@
(!ps_slice_hdr->i1_collocated_from_l0_flag && (ps_slice_hdr->i1_num_ref_idx_l1_active > 1)))
{
UEV_PARSE("collocated_ref_idx", value, ps_bitstrm);
+ if((PSLICE == ps_slice_hdr->i1_slice_type || BSLICE == ps_slice_hdr->i1_slice_type) &&
+ ps_slice_hdr->i1_collocated_from_l0_flag)
+ {
+ if(value >= ps_slice_hdr->i1_num_ref_idx_l0_active)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
+ }
+ if(BSLICE == ps_slice_hdr->i1_slice_type && !ps_slice_hdr->i1_collocated_from_l0_flag)
+ {
+ if(value >= ps_slice_hdr->i1_num_ref_idx_l1_active)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
+ }
ps_slice_hdr->i1_collocated_ref_idx = value;
}
}
- ps_slice_hdr->i1_collocated_ref_idx = CLIP3(ps_slice_hdr->i1_collocated_ref_idx, 0, MAX_DPB_SIZE - 1);
if((ps_pps->i1_weighted_pred_flag && (PSLICE == ps_slice_hdr->i1_slice_type)) ||
(ps_pps->i1_weighted_bipred_flag && (BSLICE == ps_slice_hdr->i1_slice_type)))
@@ -640,20 +668,35 @@
ihevcd_parse_pred_wt_ofst(ps_bitstrm, ps_sps, ps_pps, ps_slice_hdr);
}
UEV_PARSE("five_minus_max_num_merge_cand", value, ps_bitstrm);
+ if(value > 4)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_slice_hdr->i1_max_num_merge_cand = 5 - value;
}
- ps_slice_hdr->i1_max_num_merge_cand = CLIP3(ps_slice_hdr->i1_max_num_merge_cand, 1, 5);
- SEV_PARSE("slice_qp_delta", value, ps_bitstrm);
- ps_slice_hdr->i1_slice_qp_delta = value;
+ SEV_PARSE("slice_qp_delta", i4_value, ps_bitstrm);
+ if((i4_value + ps_pps->i1_pic_init_qp) < 0 || (i4_value + ps_pps->i1_pic_init_qp) > MAX_HEVC_QP)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
+ ps_slice_hdr->i1_slice_qp_delta = i4_value;
if(ps_pps->i1_pic_slice_level_chroma_qp_offsets_present_flag)
{
- SEV_PARSE("slice_cb_qp_offset", value, ps_bitstrm);
- ps_slice_hdr->i1_slice_cb_qp_offset = value;
+ SEV_PARSE("slice_cb_qp_offset", i4_value, ps_bitstrm);
+ if(i4_value < -12 || i4_value > 12)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
+ ps_slice_hdr->i1_slice_cb_qp_offset = i4_value;
- SEV_PARSE("slice_cr_qp_offset", value, ps_bitstrm);
- ps_slice_hdr->i1_slice_cr_qp_offset = value;
+ SEV_PARSE("slice_cr_qp_offset", i4_value, ps_bitstrm);
+ if(i4_value < -12 || i4_value > 12)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
+ ps_slice_hdr->i1_slice_cr_qp_offset = i4_value;
}
ps_slice_hdr->i1_deblocking_filter_override_flag = 0;
@@ -680,11 +723,19 @@
if(!ps_slice_hdr->i1_slice_disable_deblocking_filter_flag)
{
- SEV_PARSE("beta_offset_div2", value, ps_bitstrm);
- ps_slice_hdr->i1_beta_offset_div2 = value;
+ SEV_PARSE("beta_offset_div2", i4_value, ps_bitstrm);
+ if(i4_value < -6 || i4_value > 6)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
+ ps_slice_hdr->i1_beta_offset_div2 = i4_value;
- SEV_PARSE("tc_offset_div2", value, ps_bitstrm);
- ps_slice_hdr->i1_tc_offset_div2 = value;
+ SEV_PARSE("tc_offset_div2", i4_value, ps_bitstrm);
+ if(i4_value < -6 || i4_value > 6)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
+ ps_slice_hdr->i1_tc_offset_div2 = i4_value;
}
}
@@ -819,24 +870,30 @@
if((ps_pps->i1_tiles_enabled_flag) &&
(ps_pps->i1_entropy_coding_sync_enabled_flag))
{
- max_num_entry_point_offsets = ps_pps->i1_num_tile_columns * (ps_sps->i2_pic_ht_in_ctb - 1);
+ max_num_entry_point_offsets = ps_pps->i1_num_tile_columns * ps_sps->i2_pic_ht_in_ctb - 1;
}
else if(ps_pps->i1_tiles_enabled_flag)
{
- max_num_entry_point_offsets = ps_pps->i1_num_tile_columns * ps_pps->i1_num_tile_rows;
+ max_num_entry_point_offsets = ps_pps->i1_num_tile_columns * ps_pps->i1_num_tile_rows - 1 ;
}
else
{
max_num_entry_point_offsets = (ps_sps->i2_pic_ht_in_ctb - 1);
}
- ps_slice_hdr->i4_num_entry_point_offsets = CLIP3(ps_slice_hdr->i4_num_entry_point_offsets,
- 0, max_num_entry_point_offsets);
+ if(ps_slice_hdr->i4_num_entry_point_offsets < 0 || ps_slice_hdr->i4_num_entry_point_offsets > max_num_entry_point_offsets)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
}
if(ps_slice_hdr->i4_num_entry_point_offsets > 0)
{
UEV_PARSE("offset_len_minus1", value, ps_bitstrm);
+ if(value > 31)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_slice_hdr->i1_offset_len = value + 1;
for(i = 0; i < ps_slice_hdr->i4_num_entry_point_offsets; i++)
@@ -853,6 +910,10 @@
if(ps_pps->i1_slice_header_extension_present_flag)
{
UEV_PARSE("slice_header_extension_length", value, ps_bitstrm);
+ if(value > 256)
+ {
+ return IHEVCD_INVALID_PARAMETER;
+ }
ps_slice_hdr->i2_slice_header_extension_length = value;
diff --git a/decoder/ihevcd_process_slice.c b/decoder/ihevcd_process_slice.c
index 72db2cc..c5af08e 100644
--- a/decoder/ihevcd_process_slice.c
+++ b/decoder/ihevcd_process_slice.c
@@ -760,13 +760,17 @@
WORD32 ctb_row, ctb_col, index_pic_map, index_nbr_map;
WORD32 first_pu_of_ctb;
first_pu_of_ctb = pu4_nbr_pu_idx[1 + nbr_pu_idx_strd];
+ UWORD32 cur_ctb_ht_in_min_pu = MIN(((ps_sps->i2_pic_height_in_luma_samples
+ - (ps_proc->i4_ctb_y << ps_sps->i1_log2_ctb_size)) / MIN_PU_SIZE), ctb_size_in_min_pu);
+ UWORD32 cur_ctb_wd_in_min_pu = MIN(((ps_sps->i2_pic_width_in_luma_samples
+ - (ps_proc->i4_ctb_x << ps_sps->i1_log2_ctb_size)) / MIN_PU_SIZE), ctb_size_in_min_pu);
index_pic_map = 0 * ctb_size_in_min_pu + 0;
index_nbr_map = (0 + 1) * nbr_pu_idx_strd + (0 + 1);
- for(ctb_row = 0; ctb_row < ctb_size_in_min_pu; ctb_row++)
+ for(ctb_row = 0; ctb_row < cur_ctb_ht_in_min_pu; ctb_row++)
{
- for(ctb_col = 0; ctb_col < ctb_size_in_min_pu; ctb_col++)
+ for(ctb_col = 0; ctb_col < cur_ctb_wd_in_min_pu; ctb_col++)
{
pu1_pic_pu_map_ctb[index_pic_map + ctb_col] = pu4_nbr_pu_idx[index_nbr_map + ctb_col]
- first_pu_of_ctb;
diff --git a/decoder/ihevcd_ref_list.c b/decoder/ihevcd_ref_list.c
index 0fe6aa4..5f8e135 100644
--- a/decoder/ihevcd_ref_list.c
+++ b/decoder/ihevcd_ref_list.c
@@ -188,7 +188,7 @@
i4_poc_lt = ps_slice_hdr->ai4_poc_lsb_lt[i];
if(ps_slice_hdr->ai1_delta_poc_msb_present_flag[i])
{
- i4_poc_lt += i4_pic_order_cnt_val - ps_slice_hdr->ai1_delta_poc_msb_cycle_lt[i] * u4_max_poc_lsb - ps_slice_hdr->i4_pic_order_cnt_lsb;
+ i4_poc_lt += i4_pic_order_cnt_val - ps_slice_hdr->ai1_delta_poc_msb_cycle_lt[i] * (WORD32)u4_max_poc_lsb - ps_slice_hdr->i4_pic_order_cnt_lsb;
}
if(ps_slice_hdr->ai1_used_by_curr_pic_lt_flag[i])
diff --git a/decoder/ihevcd_sao.c b/decoder/ihevcd_sao.c
index dc852c6..3940b6a 100644
--- a/decoder/ihevcd_sao.c
+++ b/decoder/ihevcd_sao.c
@@ -274,7 +274,7 @@
u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
- tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
+ tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
}
}
@@ -337,7 +337,7 @@
u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
- tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
+ tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
}
else
{
@@ -352,7 +352,7 @@
u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
- tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
+ tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
}
}
@@ -407,7 +407,7 @@
u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
- tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
+ tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
}
else
{
@@ -422,7 +422,7 @@
u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
- tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
+ tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
}
}
@@ -491,7 +491,7 @@
u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
pu1_src_tmp += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
pu1_src_copy += MIN((WORD32)CTZ(u4_no_loop_filter_flag), tmp_wd);
- tmp_wd -= CTZ(u4_no_loop_filter_flag) * min_cu;
+ tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) * min_cu);
}
else
{
@@ -506,7 +506,7 @@
u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
pu1_src_tmp += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
pu1_src_copy += MIN((WORD32)CTZ(~u4_no_loop_filter_flag), tmp_wd);
- tmp_wd -= CTZ(~u4_no_loop_filter_flag) * min_cu;
+ tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) * min_cu);
}
}
@@ -669,7 +669,7 @@
{
pu1_src_tmp_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
pu1_src_backup_luma += MIN((WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
- tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
+ tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
}
else
@@ -683,7 +683,7 @@
}
pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
- tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
+ tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
}
}
@@ -748,7 +748,7 @@
{
pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
- tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
+ tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
}
else
@@ -763,7 +763,7 @@
pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
- tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
+ tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
}
}
@@ -3392,7 +3392,7 @@
{
pu1_src_tmp_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
pu1_src_backup_luma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
- tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
+ tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
}
else
@@ -3406,7 +3406,7 @@
}
pu1_src_tmp_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
pu1_src_backup_luma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
- tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
+ tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
}
}
@@ -3470,7 +3470,7 @@
{
pu1_src_tmp_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
pu1_src_backup_chroma += MIN(((WORD32)CTZ(u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
- tmp_wd -= CTZ(u4_no_loop_filter_flag) << log2_min_cu;
+ tmp_wd -= (WORD32)(CTZ(u4_no_loop_filter_flag) << log2_min_cu);
u4_no_loop_filter_flag >>= (CTZ(u4_no_loop_filter_flag));
}
else
@@ -3485,7 +3485,7 @@
pu1_src_tmp_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
pu1_src_backup_chroma += MIN(((WORD32)CTZ(~u4_no_loop_filter_flag) << log2_min_cu), tmp_wd);
- tmp_wd -= CTZ(~u4_no_loop_filter_flag) << log2_min_cu;
+ tmp_wd -= (WORD32)(CTZ(~u4_no_loop_filter_flag) << log2_min_cu);
u4_no_loop_filter_flag >>= (CTZ(~u4_no_loop_filter_flag));
}
}
diff --git a/encoder/arm/ihevce_coarse_layer_sad_neon.c b/encoder/arm/ihevce_coarse_layer_sad_neon.c
new file mode 100644
index 0000000..85200aa
--- /dev/null
+++ b/encoder/arm/ihevce_coarse_layer_sad_neon.c
@@ -0,0 +1,766 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevce_coarse_layer_sad_neon.c
+*
+* @brief
+* Contains intrinsic definitions of functions for computing sad
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+********************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <arm_neon.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevc_cmn_utils_neon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_debug.h"
+#include "ihevc_deblk.h"
+#include "ihevc_defs.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_macros.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_sao.h"
+#include "ihevc_structs.h"
+#include "ihevc_weighted_pred.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevce_api.h"
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_ipe_instr_set_router.h"
+#include "ihevce_global_tables.h"
+
+#include "hme_datatype.h"
+#include "hme_common_defs.h"
+#include "hme_interface.h"
+#include "hme_defs.h"
+#include "hme_globals.h"
+
+#include "ihevce_me_instr_set_router.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+void hme_store_4x4_sads_high_speed_neon(
+ hme_search_prms_t *ps_search_prms,
+ layer_ctxt_t *ps_layer_ctxt,
+ range_prms_t *ps_mv_limit,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ S16 *pi2_sads_4x4)
+{
+ uint8x8_t src2[4];
+ uint8x16_t src;
+
+ S32 i, j;
+
+ /* Input and reference attributes */
+ U08 *pu1_inp, *pu1_ref;
+ S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;
+
+ /* The reference is actually an array of ptrs since there are several */
+ /* reference id. So an array gets passed form calling function */
+ U08 **ppu1_ref, *pu1_ref_coloc;
+
+ S32 stepy, stepx, step_shift_x, step_shift_y;
+ S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range;
+
+ /* Points to the range limits for mv */
+ range_prms_t *ps_range_prms = ps_search_prms->aps_mv_range[0];
+
+ /* Reference index to be searched */
+ S32 i4_search_idx = ps_search_prms->i1_ref_idx;
+
+ pu1_inp = ps_wt_inp_prms->apu1_wt_inp[i4_search_idx];
+ i4_inp_stride = ps_search_prms->i4_inp_stride;
+
+ /* Move to the location of the search blk in inp buffer */
+ pu1_inp += ps_search_prms->i4_cu_x_off;
+ pu1_inp += ps_search_prms->i4_cu_y_off * i4_inp_stride;
+
+ /*************************************************************************/
+ /* we use either input of previously encoded pictures as reference */
+ /* in coarse layer */
+ /*************************************************************************/
+ i4_ref_stride = ps_layer_ctxt->i4_inp_stride;
+ ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
+
+ /* colocated position in reference picture */
+ i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
+ pu1_ref_coloc = ppu1_ref[i4_search_idx] + i4_ref_offset;
+
+ stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_SPEED;
+ /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_SPEED */
+ step_shift_x = step_shift_y = 2;
+
+ mv_x_offset = -(ps_mv_limit->i2_min_x >> step_shift_x);
+ mv_y_offset = -(ps_mv_limit->i2_min_y >> step_shift_y);
+ mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x;
+ mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y;
+
+ ASSERT(4 == stepx);
+
+ /* load input */
+ {
+ S32 mv_x_sweep = ps_range_prms->i2_max_x - ps_range_prms->i2_min_x;
+ uint32x2_t a[4];
+
+ for(i = 0; i < 4; i++)
+ {
+ a[i] = vld1_dup_u32((uint32_t *)pu1_inp);
+ pu1_inp += i4_inp_stride;
+ }
+ src2[0] = vreinterpret_u8_u32(a[0]);
+ src2[1] = vreinterpret_u8_u32(a[1]);
+ src2[2] = vreinterpret_u8_u32(a[2]);
+ src2[3] = vreinterpret_u8_u32(a[3]);
+
+ if((mv_x_sweep >> step_shift_x) & 1)
+ {
+ uint32x2x2_t l = vtrn_u32(a[0], a[1]);
+ uint32x2x2_t m = vtrn_u32(a[2], a[3]);
+
+ src = vcombine_u8(vreinterpret_u8_u32(l.val[0]), vreinterpret_u8_u32(m.val[0]));
+ }
+ }
+
+ /* Run 2loops to sweep over the reference area */
+ for(mvy = ps_range_prms->i2_min_y; mvy < ps_range_prms->i2_max_y; mvy += stepy)
+ {
+ for(mvx = ps_range_prms->i2_min_x; mvx < ps_range_prms->i2_max_x;)
+ {
+ U16 *pu2_sad = (U16 *)&pi2_sads_4x4
+ [((mvx >> step_shift_x) + mv_x_offset) +
+ ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range];
+
+ pu1_ref = pu1_ref_coloc + mvx + (mvy * i4_ref_stride);
+ if((mvx + (stepx * 4)) <= ps_range_prms->i2_max_x) // 16x4
+ {
+ uint16x8_t abs_01 = vdupq_n_u16(0);
+ uint16x8_t abs_23 = vdupq_n_u16(0);
+ uint16x4_t tmp_a0, tmp_a1;
+
+ for(j = 0; j < 4; j++)
+ {
+ uint8x16_t ref = vld1q_u8(pu1_ref);
+
+ abs_01 = vabal_u8(abs_01, src2[j], vget_low_u8(ref));
+ abs_23 = vabal_u8(abs_23, src2[j], vget_high_u8(ref));
+ pu1_ref += i4_ref_stride;
+ }
+ tmp_a0 = vpadd_u16(vget_low_u16(abs_01), vget_high_u16(abs_01));
+ tmp_a1 = vpadd_u16(vget_low_u16(abs_23), vget_high_u16(abs_23));
+ abs_01 = vcombine_u16(tmp_a0, tmp_a1);
+ tmp_a0 = vpadd_u16(vget_low_u16(abs_01), vget_high_u16(abs_01));
+ vst1_u16(pu2_sad, tmp_a0);
+ mvx += stepx * 4;
+ }
+ else if((mvx + (stepx * 2)) <= ps_range_prms->i2_max_x) // 8x4
+ {
+ uint16x8_t abs_01 = vdupq_n_u16(0);
+ uint16x4_t tmp_a;
+ uint32x2_t tmp_b;
+
+ for(j = 0; j < 4; j++)
+ {
+ uint8x8_t ref = vld1_u8(pu1_ref);
+
+ abs_01 = vabal_u8(abs_01, src2[j], ref);
+ pu1_ref += i4_ref_stride;
+ }
+ tmp_a = vpadd_u16(vget_low_u16(abs_01), vget_high_u16(abs_01));
+ tmp_b = vpaddl_u16(tmp_a);
+ pu2_sad[0] = vget_lane_u32(tmp_b, 0);
+ pu2_sad[1] = vget_lane_u32(tmp_b, 1);
+ mvx += stepx * 2;
+ }
+ else if((mvx + stepx) <= ps_range_prms->i2_max_x) // 4x4
+ {
+ const uint8x16_t ref = load_unaligned_u8q(pu1_ref, i4_ref_stride);
+ uint16x8_t abs = vabdl_u8(vget_low_u8(src), vget_low_u8(ref));
+ uint32x4_t b;
+ uint64x2_t c;
+
+ abs = vabal_u8(abs, vget_high_u8(src), vget_high_u8(ref));
+ b = vpaddlq_u16(abs);
+ c = vpaddlq_u32(b);
+ *pu2_sad = vget_lane_u32(
+ vadd_u32(
+ vreinterpret_u32_u64(vget_low_u64(c)),
+ vreinterpret_u32_u64(vget_high_u64(c))),
+ 0);
+ mvx += stepx;
+ }
+ }
+ }
+}
+
+void hme_store_4x4_sads_high_quality_neon(
+ hme_search_prms_t *ps_search_prms,
+ layer_ctxt_t *ps_layer_ctxt,
+ range_prms_t *ps_mv_limit,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ S16 *pi2_sads_4x4)
+{
+ uint8x8_t src2[4];
+ uint8x16_t src;
+
+ S32 i, j;
+
+ /* Input and reference attributes */
+ U08 *pu1_inp, *pu1_ref;
+ S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;
+
+ /* The reference is actually an array of ptrs since there are several */
+ /* reference id. So an array gets passed form calling function */
+ U08 **ppu1_ref, *pu1_ref_coloc;
+
+ S32 stepy, stepx, step_shift_x, step_shift_y;
+ S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range;
+
+ /* Points to the range limits for mv */
+ range_prms_t *ps_range_prms = ps_search_prms->aps_mv_range[0];
+
+ /* Reference index to be searched */
+ S32 i4_search_idx = ps_search_prms->i1_ref_idx;
+
+ pu1_inp = ps_wt_inp_prms->apu1_wt_inp[i4_search_idx];
+ i4_inp_stride = ps_search_prms->i4_inp_stride;
+
+ /* Move to the location of the search blk in inp buffer */
+ pu1_inp += ps_search_prms->i4_cu_x_off;
+ pu1_inp += ps_search_prms->i4_cu_y_off * i4_inp_stride;
+
+ /*************************************************************************/
+ /* we use either input of previously encoded pictures as reference */
+ /* in coarse layer */
+ /*************************************************************************/
+ i4_ref_stride = ps_layer_ctxt->i4_inp_stride;
+ ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
+
+ /* colocated position in reference picture */
+ i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
+ pu1_ref_coloc = ppu1_ref[i4_search_idx] + i4_ref_offset;
+
+ stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_QUALITY;
+ /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_QUALITY */
+ step_shift_x = step_shift_y = 1;
+
+ mv_x_offset = -(ps_mv_limit->i2_min_x >> step_shift_x);
+ mv_y_offset = -(ps_mv_limit->i2_min_y >> step_shift_y);
+ mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x;
+ mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y;
+
+ /* load input */
+ {
+ S32 mv_x_sweep = ps_range_prms->i2_max_x - ps_range_prms->i2_min_x;
+ uint32x2_t a[4];
+
+ for(i = 0; i < 4; i++)
+ {
+ a[i] = vld1_dup_u32((uint32_t *)pu1_inp);
+ pu1_inp += i4_inp_stride;
+ }
+ src2[0] = vreinterpret_u8_u32(a[0]);
+ src2[1] = vreinterpret_u8_u32(a[1]);
+ src2[2] = vreinterpret_u8_u32(a[2]);
+ src2[3] = vreinterpret_u8_u32(a[3]);
+
+ if((mv_x_sweep >> 2) & 1)
+ {
+ uint32x2x2_t l = vtrn_u32(a[0], a[1]);
+ uint32x2x2_t m = vtrn_u32(a[2], a[3]);
+
+ src = vcombine_u8(vreinterpret_u8_u32(l.val[0]), vreinterpret_u8_u32(m.val[0]));
+ }
+ }
+
+ /* Run 2loops to sweep over the reference area */
+ for(mvy = ps_range_prms->i2_min_y; mvy < ps_range_prms->i2_max_y; mvy += stepy)
+ {
+ for(mvx = ps_range_prms->i2_min_x; mvx < ps_range_prms->i2_max_x;)
+ {
+ U16 *pu2_sad = (U16 *)&pi2_sads_4x4
+ [((mvx >> step_shift_x) + mv_x_offset) +
+ ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range];
+
+ pu1_ref = pu1_ref_coloc + mvx + (mvy * i4_ref_stride);
+ if((mvx + (stepx * 8)) <= ps_range_prms->i2_max_x) // 16x4
+ {
+ uint16x8_t abs_a_01 = vdupq_n_u16(0);
+ uint16x8_t abs_a_23 = vdupq_n_u16(0);
+ uint16x8_t abs_b_01 = vdupq_n_u16(0);
+ uint16x8_t abs_b_23 = vdupq_n_u16(0);
+ uint16x4_t tmp_b0, tmp_b1;
+ uint16x4x2_t tmp_a;
+
+ for(j = 0; j < 4; j++)
+ {
+ uint8x16_t ref_a = vld1q_u8(pu1_ref);
+ uint8x16_t ref_b = vld1q_u8(pu1_ref + 2);
+
+ abs_a_01 = vabal_u8(abs_a_01, src2[j], vget_low_u8(ref_a));
+ abs_a_23 = vabal_u8(abs_a_23, src2[j], vget_high_u8(ref_a));
+ abs_b_01 = vabal_u8(abs_b_01, src2[j], vget_low_u8(ref_b));
+ abs_b_23 = vabal_u8(abs_b_23, src2[j], vget_high_u8(ref_b));
+ pu1_ref += i4_ref_stride;
+ }
+ tmp_a.val[0] = vpadd_u16(vget_low_u16(abs_a_01), vget_high_u16(abs_a_01));
+ tmp_a.val[1] = vpadd_u16(vget_low_u16(abs_a_23), vget_high_u16(abs_a_23));
+ abs_a_01 = vcombine_u16(tmp_a.val[0], tmp_a.val[1]);
+ tmp_a.val[0] = vpadd_u16(vget_low_u16(abs_a_01), vget_high_u16(abs_a_01));
+ tmp_b0 = vpadd_u16(vget_low_u16(abs_b_01), vget_high_u16(abs_b_01));
+ tmp_b1 = vpadd_u16(vget_low_u16(abs_b_23), vget_high_u16(abs_b_23));
+ abs_b_01 = vcombine_u16(tmp_b0, tmp_b1);
+ tmp_a.val[1] = vpadd_u16(vget_low_u16(abs_b_01), vget_high_u16(abs_b_01));
+ vst2_u16(pu2_sad, tmp_a);
+ mvx += stepx * 8;
+ }
+ else if((mvx + (stepx * 4)) <= ps_range_prms->i2_max_x) // 8x4
+ {
+ uint16x8_t abs_a_01 = vdupq_n_u16(0);
+ uint16x8_t abs_b_01 = vdupq_n_u16(0);
+ uint16x4_t tmp_a, tmp_b;
+
+ for(j = 0; j < 4; j++)
+ {
+ uint8x8_t ref_a = vld1_u8(pu1_ref);
+ uint8x8_t ref_b = vld1_u8(pu1_ref + 2);
+
+ abs_a_01 = vabal_u8(abs_a_01, src2[j], ref_a);
+ abs_b_01 = vabal_u8(abs_b_01, src2[j], ref_b);
+ pu1_ref += i4_ref_stride;
+ }
+ tmp_a = vpadd_u16(vget_low_u16(abs_a_01), vget_high_u16(abs_a_01));
+ tmp_b = vpadd_u16(vget_low_u16(abs_b_01), vget_high_u16(abs_b_01));
+ tmp_a = vpadd_u16(tmp_a, tmp_b);
+
+ pu2_sad[0] = vget_lane_u16(tmp_a, 0);
+ pu2_sad[1] = vget_lane_u16(tmp_a, 2);
+ pu2_sad[2] = vget_lane_u16(tmp_a, 1);
+ pu2_sad[3] = vget_lane_u16(tmp_a, 3);
+ mvx += stepx * 4;
+ }
+ else if((mvx + stepx * 2) <= ps_range_prms->i2_max_x) // 4x4
+ {
+ uint8x16_t ref = load_unaligned_u8q(pu1_ref, i4_ref_stride);
+ uint16x8_t abs = vabdl_u8(vget_low_u8(src), vget_low_u8(ref));
+ uint32x4_t b;
+ uint64x2_t c;
+
+ abs = vabal_u8(abs, vget_high_u8(src), vget_high_u8(ref));
+ b = vpaddlq_u16(abs);
+ c = vpaddlq_u32(b);
+ *pu2_sad = vget_lane_u32(
+ vadd_u32(
+ vreinterpret_u32_u64(vget_low_u64(c)),
+ vreinterpret_u32_u64(vget_high_u64(c))),
+ 0);
+
+ ref = load_unaligned_u8q(pu1_ref + 2, i4_ref_stride);
+ abs = vabdl_u8(vget_low_u8(src), vget_low_u8(ref));
+ abs = vabal_u8(abs, vget_high_u8(src), vget_high_u8(ref));
+ b = vpaddlq_u16(abs);
+ c = vpaddlq_u32(b);
+ pu2_sad[1] = vget_lane_u32(
+ vadd_u32(
+ vreinterpret_u32_u64(vget_low_u64(c)),
+ vreinterpret_u32_u64(vget_high_u64(c))),
+ 0);
+ mvx += stepx * 2;
+ }
+ else
+ {
+ assert(0);
+ }
+ }
+ }
+}
+
+#define BEST_COST(i) \
+ if(sad_array[0][i] < min_cost_4x8) \
+ { \
+ best_mv_x_4x8 = mvx + i * stepx; \
+ best_mv_y_4x8 = mvy; \
+ min_cost_4x8 = sad_array[0][i]; \
+ } \
+ if(sad_array[1][i] < min_cost_8x4) \
+ { \
+ best_mv_x_8x4 = mvx + i * stepx; \
+ best_mv_y_8x4 = mvy; \
+ min_cost_8x4 = sad_array[1][i]; \
+ }
+
+void hme_combine_4x4_sads_and_compute_cost_high_speed_neon(
+ S08 i1_ref_idx,
+ range_prms_t *ps_mv_range,
+ range_prms_t *ps_mv_limit,
+ hme_mv_t *ps_best_mv_4x8,
+ hme_mv_t *ps_best_mv_8x4,
+ pred_ctxt_t *ps_pred_ctxt,
+ PF_MV_COST_FXN pf_mv_cost_compute,
+ S16 *pi2_sads_4x4_current,
+ S16 *pi2_sads_4x4_east,
+ S16 *pi2_sads_4x4_south)
+{
+ S32 best_mv_y_4x8, best_mv_x_4x8, best_mv_y_8x4, best_mv_x_8x4;
+
+ S32 stepy = HME_COARSE_STEP_SIZE_HIGH_SPEED;
+ S32 stepx = HME_COARSE_STEP_SIZE_HIGH_SPEED;
+ /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_SPEED */
+ S32 step_shift_x = 2;
+ S32 step_shift_y = 2;
+
+ S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range;
+
+ S32 lambda = ps_pred_ctxt->lambda;
+ S32 lambda_q_shift = ps_pred_ctxt->lambda_q_shift;
+ S32 rnd = 1 << (lambda_q_shift - 1);
+
+ S32 min_cost_4x8 = MAX_32BIT_VAL;
+ S32 min_cost_8x4 = MAX_32BIT_VAL;
+
+ S32 i;
+
+ const uint16x8_t v_ref_idx = vdupq_n_u16(i1_ref_idx);
+ const uint32x4_t v_lambda = vdupq_n_u32(lambda);
+ const uint32x4_t v_rnd_factor = vdupq_n_u32(rnd);
+ const int32x4_t v_lambda_q_shift = vdupq_n_s32(-lambda_q_shift);
+
+ mv_x_offset = (-ps_mv_limit->i2_min_x >> step_shift_x);
+ mv_y_offset = (-ps_mv_limit->i2_min_y >> step_shift_y);
+ mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x;
+ mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y;
+
+ ASSERT(MAX_MVX_SUPPORTED_IN_COARSE_LAYER >= ABS(ps_mv_range->i2_max_x));
+ ASSERT(MAX_MVY_SUPPORTED_IN_COARSE_LAYER >= ABS(ps_mv_range->i2_max_y));
+
+ /* Run 2loops to sweep over the reference area */
+ for(mvy = ps_mv_range->i2_min_y; mvy < ps_mv_range->i2_max_y; mvy += stepy)
+ {
+ /* LUT: (2 * hme_get_range(mv_y) - 1) + ((!mv_y) ? 0 : 1) */
+ uint16x8_t mvy_wt = vld1q_u16((U16 *)&gi2_mvy_range[ABS(mvy)][0]);
+
+ /* mvy wt + ref_idx */
+ mvy_wt = vaddq_u16(mvy_wt, v_ref_idx);
+
+ for(mvx = ps_mv_range->i2_min_x; mvx < ps_mv_range->i2_max_x;)
+ {
+ S32 sad_pos = ((mvx >> step_shift_x) + mv_x_offset) +
+ ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range;
+
+ if(mvx + (8 * stepx) <= ps_mv_range->i2_max_x) // 8x4
+ {
+ uint16x8_t curr = vld1q_u16((U16 *)pi2_sads_4x4_current + sad_pos);
+ uint16x8_t south = vld1q_u16((U16 *)pi2_sads_4x4_south + sad_pos);
+ uint16x8_t east = vld1q_u16((U16 *)pi2_sads_4x4_east + sad_pos);
+ uint16x8_t sad_4x8 = vaddq_u16(curr, south);
+ uint16x8_t sad_8x4 = vaddq_u16(curr, east);
+ /* LUT: (2 * hme_get_range(mv_x) - 1) + ((!mv_x) ? 0 : 1) */
+ uint16x8_t mv_wt =
+ vld1q_u16((U16 *)&gi2_mvx_range[mvx + MAX_MVX_SUPPORTED_IN_COARSE_LAYER][0]);
+ uint32x4_t total_cost_0, total_cost_1;
+ uint16x8_t total_cost;
+ U16 sad_array[2][8];
+
+ /* mv weight + ref_idx */
+ mv_wt = vaddq_u16(mv_wt, mvy_wt);
+
+ total_cost_0 = vmulq_u32(v_lambda, vmovl_u16(vget_low_u16(mv_wt)));
+ total_cost_1 = vmulq_u32(v_lambda, vmovl_u16(vget_high_u16(mv_wt)));
+
+ total_cost_0 = vaddq_u32(total_cost_0, v_rnd_factor);
+ total_cost_1 = vaddq_u32(total_cost_1, v_rnd_factor);
+
+ total_cost_0 = vshlq_u32(total_cost_0, v_lambda_q_shift);
+ total_cost_1 = vshlq_u32(total_cost_1, v_lambda_q_shift);
+
+ total_cost = vcombine_u16(vmovn_u32(total_cost_0), vmovn_u32(total_cost_1));
+
+ sad_4x8 = vaddq_u16(total_cost, sad_4x8);
+ sad_8x4 = vaddq_u16(total_cost, sad_8x4);
+
+ vst1q_u16(sad_array[0], sad_4x8);
+ vst1q_u16(sad_array[1], sad_8x4);
+
+ for(i = 0; i < 8; i++)
+ {
+ BEST_COST(i);
+ }
+ mvx += stepx * 8;
+ }
+ else if(mvx + (4 * stepx) <= ps_mv_range->i2_max_x) // 4x4
+ {
+ uint16x4_t curr = vld1_u16((U16 *)pi2_sads_4x4_current + sad_pos);
+ uint16x4_t south = vld1_u16((U16 *)pi2_sads_4x4_south + sad_pos);
+ uint16x4_t east = vld1_u16((U16 *)pi2_sads_4x4_east + sad_pos);
+ uint16x4_t sad_4x8 = vadd_u16(curr, south);
+ uint16x4_t sad_8x4 = vadd_u16(curr, east);
+ /* LUT: (2 * hme_get_range(mv_x) - 1) + ((!mv_x) ? 0 : 1) */
+ uint16x4_t mv_wt =
+ vld1_u16((U16 *)&gi2_mvx_range[mvx + MAX_MVX_SUPPORTED_IN_COARSE_LAYER][0]);
+ uint32x4_t total_cost;
+ U16 sad_array[2][4];
+
+ /* mv weight + ref_idx */
+ mv_wt = vadd_u16(mv_wt, vget_low_u16(mvy_wt));
+
+ total_cost = vmulq_u32(v_lambda, vmovl_u16(mv_wt));
+ total_cost = vaddq_u32(total_cost, v_rnd_factor);
+ total_cost = vshlq_u32(total_cost, v_lambda_q_shift);
+
+ sad_4x8 = vadd_u16(vmovn_u32(total_cost), sad_4x8);
+ sad_8x4 = vadd_u16(vmovn_u32(total_cost), sad_8x4);
+
+ vst1_u16(sad_array[0], sad_4x8);
+ vst1_u16(sad_array[1], sad_8x4);
+
+ for(i = 0; i < 4; i++)
+ {
+ BEST_COST(i);
+ }
+
+ mvx += stepx * 4;
+ }
+ else
+ {
+ S16 sad_array[2][1];
+ S32 mv_cost;
+
+ /* Get SAD by adding SAD for current and neighbour S */
+ sad_array[0][0] = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_south[sad_pos];
+ sad_array[1][0] = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_east[sad_pos];
+
+ mv_cost = gi2_mvy_range[ABS(mvy)][0] +
+ gi2_mvx_range[mvx + MAX_MVX_SUPPORTED_IN_COARSE_LAYER][0] + i1_ref_idx;
+
+ mv_cost = (mv_cost * lambda + rnd) >> lambda_q_shift;
+
+ sad_array[0][0] += mv_cost;
+ sad_array[1][0] += mv_cost;
+
+ BEST_COST(0);
+ mvx += stepx;
+ }
+ }
+ }
+
+ ps_best_mv_4x8->i2_mv_x = best_mv_x_4x8;
+ ps_best_mv_4x8->i2_mv_y = best_mv_y_4x8;
+
+ ps_best_mv_8x4->i2_mv_x = best_mv_x_8x4;
+ ps_best_mv_8x4->i2_mv_y = best_mv_y_8x4;
+}
+
+void hme_combine_4x4_sads_and_compute_cost_high_quality_neon(
+ S08 i1_ref_idx,
+ range_prms_t *ps_mv_range,
+ range_prms_t *ps_mv_limit,
+ hme_mv_t *ps_best_mv_4x8,
+ hme_mv_t *ps_best_mv_8x4,
+ pred_ctxt_t *ps_pred_ctxt,
+ PF_MV_COST_FXN pf_mv_cost_compute,
+ S16 *pi2_sads_4x4_current,
+ S16 *pi2_sads_4x4_east,
+ S16 *pi2_sads_4x4_south)
+{
+ S32 best_mv_y_4x8, best_mv_x_4x8, best_mv_y_8x4, best_mv_x_8x4;
+
+ S32 stepy = HME_COARSE_STEP_SIZE_HIGH_QUALITY;
+ S32 stepx = HME_COARSE_STEP_SIZE_HIGH_QUALITY;
+ /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_SPEED */
+ S32 step_shift_x = 1;
+ S32 step_shift_y = 1;
+
+ S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range;
+
+ S32 lambda = ps_pred_ctxt->lambda;
+ S32 lambda_q_shift = ps_pred_ctxt->lambda_q_shift;
+ S32 rnd = 1 << (lambda_q_shift - 1);
+
+ S32 min_cost_4x8 = MAX_32BIT_VAL;
+ S32 min_cost_8x4 = MAX_32BIT_VAL;
+
+ S32 i;
+
+ const uint16x8_t v_ref_idx = vdupq_n_u16(i1_ref_idx);
+ const uint32x4_t v_lambda = vdupq_n_u32(lambda);
+ const uint32x4_t v_rnd_factor = vdupq_n_u32(rnd);
+ const int32x4_t v_lambda_q_shift = vdupq_n_s32(-lambda_q_shift);
+
+ mv_x_offset = (-ps_mv_limit->i2_min_x >> step_shift_x);
+ mv_y_offset = (-ps_mv_limit->i2_min_y >> step_shift_y);
+ mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x;
+ mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y;
+
+ ASSERT(MAX_MVX_SUPPORTED_IN_COARSE_LAYER >= ABS(ps_mv_range->i2_max_x));
+ ASSERT(MAX_MVY_SUPPORTED_IN_COARSE_LAYER >= ABS(ps_mv_range->i2_max_y));
+
+ /* Run 2loops to sweep over the reference area */
+ for(mvy = ps_mv_range->i2_min_y; mvy < ps_mv_range->i2_max_y; mvy += stepy)
+ {
+ /* LUT: (2 * hme_get_range(mv_y) - 1) + ((!mv_y) ? 0 : 1) */
+ uint16x8_t mvy_wt = vld1q_u16((U16 *)&gi2_mvy_range[ABS(mvy)][0]);
+
+ /* mvy wt + ref_idx */
+ mvy_wt = vaddq_u16(mvy_wt, v_ref_idx);
+
+ for(mvx = ps_mv_range->i2_min_x; mvx < ps_mv_range->i2_max_x;)
+ {
+ S32 sad_pos = ((mvx >> step_shift_x) + mv_x_offset) +
+ ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range;
+
+ if(mvx + (8 * stepx) <= ps_mv_range->i2_max_x) // 8x4
+ {
+ uint16x8_t curr = vld1q_u16((U16 *)pi2_sads_4x4_current + sad_pos);
+ uint16x8_t south = vld1q_u16((U16 *)pi2_sads_4x4_south + sad_pos);
+ uint16x8_t east = vld1q_u16((U16 *)pi2_sads_4x4_east + sad_pos);
+ uint16x8_t sad_4x8 = vaddq_u16(curr, south);
+ uint16x8_t sad_8x4 = vaddq_u16(curr, east);
+ /* LUT: (2 * hme_get_range(mv_x) - 1) + ((!mv_x) ? 0 : 1) */
+ uint16x8_t mv_wt = vld1q_u16(
+ (U16 *)&gi2_mvx_range_high_quality[mvx + MAX_MVX_SUPPORTED_IN_COARSE_LAYER][0]);
+ uint32x4_t total_cost_0, total_cost_1;
+ uint16x8_t total_cost;
+ U16 sad_array[2][8];
+
+ /* mv weight + ref_idx */
+ mv_wt = vaddq_u16(mv_wt, mvy_wt);
+
+ total_cost_0 = vmulq_u32(v_lambda, vmovl_u16(vget_low_u16(mv_wt)));
+ total_cost_1 = vmulq_u32(v_lambda, vmovl_u16(vget_high_u16(mv_wt)));
+
+ total_cost_0 = vaddq_u32(total_cost_0, v_rnd_factor);
+ total_cost_1 = vaddq_u32(total_cost_1, v_rnd_factor);
+
+ total_cost_0 = vshlq_u32(total_cost_0, v_lambda_q_shift);
+ total_cost_1 = vshlq_u32(total_cost_1, v_lambda_q_shift);
+
+ total_cost = vcombine_u16(vmovn_u32(total_cost_0), vmovn_u32(total_cost_1));
+
+ sad_4x8 = vaddq_u16(total_cost, sad_4x8);
+ sad_8x4 = vaddq_u16(total_cost, sad_8x4);
+
+ vst1q_u16(sad_array[0], sad_4x8);
+ vst1q_u16(sad_array[1], sad_8x4);
+
+ for(i = 0; i < 8; i++)
+ {
+ BEST_COST(i);
+ }
+ mvx += stepx * 8;
+ }
+ else if(mvx + (4 * stepx) <= ps_mv_range->i2_max_x) // 4x4
+ {
+ uint16x4_t curr = vld1_u16((U16 *)pi2_sads_4x4_current + sad_pos);
+ uint16x4_t south = vld1_u16((U16 *)pi2_sads_4x4_south + sad_pos);
+ uint16x4_t east = vld1_u16((U16 *)pi2_sads_4x4_east + sad_pos);
+ uint16x4_t sad_4x8 = vadd_u16(curr, south);
+ uint16x4_t sad_8x4 = vadd_u16(curr, east);
+ /* LUT: (2 * hme_get_range(mv_x) - 1) + ((!mv_x) ? 0 : 1) */
+ uint16x4_t mv_wt = vld1_u16(
+ (U16 *)&gi2_mvx_range_high_quality[mvx + MAX_MVX_SUPPORTED_IN_COARSE_LAYER][0]);
+ uint32x4_t total_cost;
+ U16 sad_array[2][4];
+
+ /* mv weight + ref_idx */
+ mv_wt = vadd_u16(mv_wt, vget_low_u16(mvy_wt));
+
+ total_cost = vmulq_u32(v_lambda, vmovl_u16(mv_wt));
+ total_cost = vaddq_u32(total_cost, v_rnd_factor);
+ total_cost = vshlq_u32(total_cost, v_lambda_q_shift);
+
+ sad_4x8 = vadd_u16(vmovn_u32(total_cost), sad_4x8);
+ sad_8x4 = vadd_u16(vmovn_u32(total_cost), sad_8x4);
+
+ vst1_u16(sad_array[0], sad_4x8);
+ vst1_u16(sad_array[1], sad_8x4);
+
+ for(i = 0; i < 4; i++)
+ {
+ BEST_COST(i);
+ }
+
+ mvx += stepx * 4;
+ }
+ else
+ {
+ S16 sad_array[2][1];
+ S32 mv_cost;
+
+ /* Get SAD by adding SAD for current and neighbour S */
+ sad_array[0][0] = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_south[sad_pos];
+ sad_array[1][0] = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_east[sad_pos];
+
+ mv_cost = gi2_mvy_range[ABS(mvy)][0] +
+ gi2_mvx_range_high_quality[mvx + MAX_MVX_SUPPORTED_IN_COARSE_LAYER][0] +
+ i1_ref_idx;
+
+ mv_cost = (mv_cost * lambda + rnd) >> lambda_q_shift;
+
+ sad_array[0][0] += mv_cost;
+ sad_array[1][0] += mv_cost;
+
+ BEST_COST(0);
+ mvx += stepx;
+ }
+ }
+ }
+
+ ps_best_mv_4x8->i2_mv_x = best_mv_x_4x8;
+ ps_best_mv_4x8->i2_mv_y = best_mv_y_4x8;
+
+ ps_best_mv_8x4->i2_mv_x = best_mv_x_8x4;
+ ps_best_mv_8x4->i2_mv_y = best_mv_y_8x4;
+}
diff --git a/encoder/arm/ihevce_common_utils_neon.c b/encoder/arm/ihevce_common_utils_neon.c
new file mode 100644
index 0000000..62fffc2
--- /dev/null
+++ b/encoder/arm/ihevce_common_utils_neon.c
@@ -0,0 +1,2059 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevce_common_utils_neon.c
+*
+* @brief
+* Contains intrinsic definitions of functions for sao param
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - ihevce_get_luma_eo_sao_params_neon()
+* - ihevce_get_chroma_eo_sao_params_neon()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include <arm_neon.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_debug.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+#include "ihevc_cmn_utils_neon.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_hle_interface.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_common_utils.h"
+#include "ihevce_global_tables.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+static void ihevce_wt_avg_2d_16x1_neon(
+ UWORD8 *pu1_pred0,
+ UWORD8 *pu1_pred1,
+ UWORD8 *pu1_dst,
+ WORD32 w0,
+ WORD32 w1,
+ WORD32 rnd,
+ WORD32 shift)
+{
+ uint8x16_t a0, a1;
+ int32x4_t a6, a7, a9;
+ int32x4_t reg0[4], reg1[4];
+ int16x8_t a2, a3, a4, a5, a8;
+
+ a8 = vdupq_n_s16((WORD16)rnd);
+
+ a6 = vdupq_n_s32(w0);
+ a7 = vdupq_n_s32(w1);
+ a9 = vdupq_n_s32(-shift);
+
+ a0 = vld1q_u8(pu1_pred0);
+ a1 = vld1q_u8(pu1_pred1);
+
+ a2 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(a0)));
+ a3 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(a0)));
+ a4 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(a1)));
+ a5 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(a1)));
+
+ reg0[0] = vmovl_s16(vget_low_s16(a2));
+ reg0[1] = vmovl_s16(vget_high_s16(a2));
+ reg0[2] = vmovl_s16(vget_low_s16(a3));
+ reg0[3] = vmovl_s16(vget_high_s16(a3));
+
+ reg1[0] = vmovl_s16(vget_low_s16(a4));
+ reg1[1] = vmovl_s16(vget_high_s16(a4));
+ reg1[2] = vmovl_s16(vget_low_s16(a5));
+ reg1[3] = vmovl_s16(vget_high_s16(a5));
+
+ reg0[0] = vmulq_s32(reg0[0], a6);
+ reg0[1] = vmulq_s32(reg0[1], a6);
+ reg0[2] = vmulq_s32(reg0[2], a6);
+ reg0[3] = vmulq_s32(reg0[3], a6);
+
+ reg1[0] = vmulq_s32(reg1[0], a7);
+ reg1[1] = vmulq_s32(reg1[1], a7);
+ reg1[2] = vmulq_s32(reg1[2], a7);
+ reg1[3] = vmulq_s32(reg1[3], a7);
+
+ reg0[0] = vaddq_s32(reg0[0], reg1[0]);
+ reg0[1] = vaddq_s32(reg0[1], reg1[1]);
+ reg0[2] = vaddq_s32(reg0[2], reg1[2]);
+ reg0[3] = vaddq_s32(reg0[3], reg1[3]);
+
+ reg0[0] = vshlq_s32(reg0[0], a9);
+ reg0[1] = vshlq_s32(reg0[1], a9);
+ reg0[2] = vshlq_s32(reg0[2], a9);
+ reg0[3] = vshlq_s32(reg0[3], a9); // (p0*w0 + p1*w1) >> shift
+
+ a2 = vcombine_s16(vmovn_s32(reg0[0]), vmovn_s32(reg0[1]));
+ a3 = vcombine_s16(vmovn_s32(reg0[2]), vmovn_s32(reg0[3]));
+
+ a2 = vaddq_s16(a2, a8);
+ a3 = vaddq_s16(a3, a8); // ((p0*w0 + p1*w1) >> shift) + rnd
+ a0 = vcombine_u8(vqmovun_s16(a2), vqmovun_s16(a3));
+
+ vst1q_u8(pu1_dst, a0);
+}
+
+static void ihevce_wt_avg_2d_8x1_neon(
+ UWORD8 *pu1_pred0,
+ UWORD8 *pu1_pred1,
+ UWORD8 *pu1_dst,
+ WORD32 w0,
+ WORD32 w1,
+ WORD32 rnd,
+ WORD32 shift)
+{
+ uint8x8_t a2, a3;
+ int16x8_t a0, a1, a6;
+ int32x4_t a4, a5, a7, a8, a9, a10, a11;
+
+ a6 = vdupq_n_s16((WORD16)rnd);
+
+ a4 = vdupq_n_s32(w0);
+ a5 = vdupq_n_s32(w1);
+ a7 = vdupq_n_s32((-shift));
+
+ a2 = vld1_u8(pu1_pred0);
+ a3 = vld1_u8(pu1_pred1);
+ a0 = vreinterpretq_s16_u16(vmovl_u8(a2));
+ a1 = vreinterpretq_s16_u16(vmovl_u8(a3));
+
+ a8 = vmovl_s16(vget_low_s16(a0));
+ a9 = vmovl_s16(vget_high_s16(a0));
+ a10 = vmovl_s16(vget_low_s16(a1));
+ a11 = vmovl_s16(vget_high_s16(a1));
+
+ a8 = vmulq_s32(a8, a4);
+ a9 = vmulq_s32(a9, a4);
+ a10 = vmulq_s32(a10, a5);
+ a11 = vmulq_s32(a11, a5);
+
+ a8 = vaddq_s32(a8, a10);
+ a10 = vaddq_s32(a9, a11);
+
+ a8 = vshlq_s32(a8, a7);
+ a10 = vshlq_s32(a10, a7);
+
+ a0 = vcombine_s16(vmovn_s32(a8), vmovn_s32(a10));
+ a0 = vaddq_s16(a0, a6);
+ a2 = vqmovun_s16(a0);
+ vst1_u8(pu1_dst, a2);
+}
+
+static void ihevce_wt_avg_2d_4xn_neon(
+ UWORD8 *pu1_pred0,
+ UWORD8 *pu1_pred1,
+ WORD32 pred0_strd,
+ WORD32 pred1_strd,
+ WORD32 wd,
+ WORD32 ht,
+ UWORD8 *pu1_dst,
+ WORD32 dst_strd,
+ WORD32 w0,
+ WORD32 w1,
+ WORD32 rnd,
+ WORD32 shift)
+{
+ WORD32 i, j;
+ uint8x16_t src0_u8, src1_u8;
+ uint16x8_t a0, a1, a2, a3;
+ int32x4_t reg0[4], reg1[4];
+ int32x4_t a4, a5, a7;
+ int16x8_t a8, a9, a6;
+ uint32x2_t p0, p1;
+
+ a6 = vdupq_n_s16((WORD16)rnd);
+
+ a4 = vdupq_n_s32(w0);
+ a5 = vdupq_n_s32(w1);
+ a7 = vdupq_n_s32((-shift));
+
+ for(i = 0; i < ht; i = i + 4)
+ {
+ for(j = 0; j < wd; j = j + 4)
+ {
+ src0_u8 = load_unaligned_u8q(pu1_pred0 + ((i * pred0_strd) + j), pred0_strd);
+ src1_u8 = load_unaligned_u8q(pu1_pred1 + ((i * pred1_strd) + j), pred1_strd);
+
+ a0 = vmovl_u8(vget_low_u8(src0_u8));
+ a1 = vmovl_u8(vget_high_u8(src0_u8));
+ a2 = vmovl_u8(vget_low_u8(src1_u8));
+ a3 = vmovl_u8(vget_high_u8(src1_u8));
+
+ reg0[0] = vmovl_s16(vreinterpret_s16_u16(vget_low_u16(a0)));
+ reg0[1] = vmovl_s16(vreinterpret_s16_u16(vget_high_u16(a0)));
+ reg0[2] = vmovl_s16(vreinterpret_s16_u16(vget_low_u16(a1)));
+ reg0[3] = vmovl_s16(vreinterpret_s16_u16(vget_high_u16(a1)));
+
+ reg1[0] = vmovl_s16(vreinterpret_s16_u16(vget_low_u16(a2)));
+ reg1[1] = vmovl_s16(vreinterpret_s16_u16(vget_high_u16(a2)));
+ reg1[2] = vmovl_s16(vreinterpret_s16_u16(vget_low_u16(a3)));
+ reg1[3] = vmovl_s16(vreinterpret_s16_u16(vget_high_u16(a3)));
+
+ reg0[0] = vmulq_s32(reg0[0], a4);
+ reg0[1] = vmulq_s32(reg0[1], a4);
+ reg0[2] = vmulq_s32(reg0[2], a4);
+ reg0[3] = vmulq_s32(reg0[3], a4);
+
+ reg1[0] = vmulq_s32(reg1[0], a5);
+ reg1[1] = vmulq_s32(reg1[1], a5);
+ reg1[2] = vmulq_s32(reg1[2], a5);
+ reg1[3] = vmulq_s32(reg1[3], a5);
+
+ reg0[0] = vaddq_s32(reg0[0], reg1[0]);
+ reg0[1] = vaddq_s32(reg0[1], reg1[1]);
+ reg0[2] = vaddq_s32(reg0[2], reg1[2]);
+ reg0[3] = vaddq_s32(reg0[3], reg1[3]);
+
+ reg0[0] = vshlq_s32(reg0[0], a7);
+ reg0[1] = vshlq_s32(reg0[1], a7);
+ reg0[2] = vshlq_s32(reg0[2], a7);
+ reg0[3] = vshlq_s32(reg0[3], a7);
+
+ a8 = vcombine_s16(vmovn_s32(reg0[0]), vmovn_s32(reg0[1]));
+ a9 = vcombine_s16(vmovn_s32(reg0[2]), vmovn_s32(reg0[3]));
+
+ a8 = vaddq_s16(a8, a6);
+ a9 = vaddq_s16(a9, a6);
+
+ p0 = vreinterpret_u32_u8(vqmovun_s16(a8));
+ p1 = vreinterpret_u32_u8(vqmovun_s16(a9));
+
+ *(UWORD32 *)pu1_dst = vget_lane_u32(p0, 0);
+ *(UWORD32 *)(pu1_dst + dst_strd) = vget_lane_u32(p0, 1);
+ *(UWORD32 *)(pu1_dst + 2 * dst_strd) = vget_lane_u32(p1, 0);
+ *(UWORD32 *)(pu1_dst + 3 * dst_strd) = vget_lane_u32(p1, 1);
+
+ pu1_dst += 4;
+ }
+ pu1_dst = pu1_dst - wd + 4 * dst_strd;
+ }
+}
+
+/**
+********************************************************************************
+*
+* @brief Weighted pred of 2 predictor buffers as per spec
+*
+* @param[in] pu1_pred0 : Pred0 buffer
+*
+* @param[in] pu1_pred1 : Pred1 buffer
+*
+* @param[in] pred0_strd : Stride of pred0 buffer
+*
+* @param[in] pred1_strd : Stride of pred1 buffer
+*
+* @param[in] wd : Width of pred block
+*
+* @param[in] ht : Height of pred block
+*
+* @param[out] pu1_dst : Destination buffer that will hold result
+*
+* @param[in] dst_strd : Stride of dest buffer
+*
+* @param[in] w0 : Weighting factor of Pred0
+*
+* @param[in] w1 : weighting factor of pred1
+*
+* @param[in] o0 : offset for pred0
+*
+* @param[in] o1 : offset for pred1
+*
+* @param[in] log_wdc : shift factor as per spec
+*
+* @return none
+*
+********************************************************************************
+*/
+void ihevce_wt_avg_2d_neon(
+ UWORD8 *pu1_pred0,
+ UWORD8 *pu1_pred1,
+ WORD32 pred0_strd,
+ WORD32 pred1_strd,
+ WORD32 wd,
+ WORD32 ht,
+ UWORD8 *pu1_dst,
+ WORD32 dst_strd,
+ WORD32 w0,
+ WORD32 w1,
+ WORD32 o0,
+ WORD32 o1,
+ WORD32 log_wdc)
+{
+ /* Total Rounding term to be added, including offset */
+ WORD32 rnd = (o0 + o1 + 1) >> 1; // << log_wdc;
+ /* Downshift */
+ WORD32 shift = log_wdc + 1;
+ /* loop counters */
+ WORD32 i, j;
+
+ switch(wd)
+ {
+ case 4:
+ case 12:
+ ihevce_wt_avg_2d_4xn_neon(
+ pu1_pred0,
+ pu1_pred1,
+ pred0_strd,
+ pred1_strd,
+ wd,
+ ht,
+ pu1_dst,
+ dst_strd,
+ w0,
+ w1,
+ rnd,
+ shift);
+ break;
+ case 8:
+ case 24:
+ for(i = 0; i < ht; i++)
+ {
+ for(j = 0; j < wd; j = j + 8)
+ {
+ ihevce_wt_avg_2d_8x1_neon(
+ pu1_pred0 + ((i * pred0_strd) + j),
+ pu1_pred1 + ((i * pred1_strd) + j),
+ pu1_dst + ((i * dst_strd) + j),
+ w0,
+ w1,
+ rnd,
+ shift);
+ }
+ }
+ break;
+ case 16:
+ for(i = 0; i < ht; i++)
+ ihevce_wt_avg_2d_16x1_neon(
+ pu1_pred0 + (i * pred0_strd),
+ pu1_pred1 + (i * pred1_strd),
+ pu1_dst + (i * dst_strd),
+ w0,
+ w1,
+ rnd,
+ shift);
+ break;
+ case 32:
+ case 64:
+ for(i = 0; i < ht; i++)
+ {
+ for(j = 0; j < wd; j = j + 16)
+ {
+ ihevce_wt_avg_2d_16x1_neon(
+ pu1_pred0 + ((i * pred0_strd) + j),
+ pu1_pred1 + ((i * pred1_strd) + j),
+ pu1_dst + ((i * dst_strd) + j),
+ w0,
+ w1,
+ rnd,
+ shift);
+ }
+ }
+ break;
+ case 48:
+ for(i = 0; i < ht; i++)
+ {
+ for(j = 0; j < wd; j = j + 16)
+ {
+ ihevce_wt_avg_2d_16x1_neon(
+ pu1_pred0 + ((i * pred0_strd) + j),
+ pu1_pred1 + ((i * pred1_strd) + j),
+ pu1_dst + ((i * dst_strd) + j),
+ w0,
+ w1,
+ rnd,
+ shift);
+ }
+ }
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ return;
+}
+
+static INLINE WORD32 sad_cal(int16x8_t temp_reg)
+{
+ int64x2_t sad_reg = vpaddlq_s32(vpaddlq_s16(temp_reg));
+
+ return (vget_lane_s32(
+ vadd_s32(
+ vreinterpret_s32_s64(vget_low_s64(sad_reg)),
+ vreinterpret_s32_s64(vget_high_s64(sad_reg))),
+ 0));
+}
+
+void ihevce_get_luma_eo_sao_params_neon(
+ void *pv_sao_ctxt,
+ WORD32 eo_sao_class,
+ WORD32 *pi4_acc_error_category,
+ WORD32 *pi4_category_count)
+{
+ /*temp var*/
+ UWORD8 *pu1_luma_recon_buf, *pu1_luma_src_buf;
+ UWORD8 *pu1_luma_src_buf_copy, *pu1_luma_recon_buf_copy;
+ WORD32 row_end, col_end, row, col;
+ WORD32 row_start = 0, col_start = 0;
+ WORD32 wd, rem_wd;
+ WORD32 a, b, c, edge_idx, pel_err;
+
+ int16x8_t temp_reg0, temp_reg1, temp_reg2, temp_reg3, temp_reg4;
+ int16x8_t edgeidx_reg0, edgeidx_reg1, edgeidx_reg2, edgeidx_reg3, edgeidx_reg4;
+ int16x8_t edgeidx_reg5, edgeidx_reg6, edgeidx_reg7;
+ int16x8_t pel_error, pel_error1;
+ int16x8_t sign_reg0, sign_reg1, sign_reg, sign_reg2, sign_reg3;
+ int16x8_t edgeidx, edgeidx1;
+ int16x8_t temp_reg5, temp_reg6, temp_reg7;
+ uint8x16_t src_buf_8x16, recon_buf_8x16, recon_buf0_8x16, recon_buf1_8x16;
+ uint8x8_t src_buf, recon_buf, recon_buf0, recon_buf1;
+
+ sao_ctxt_t *ps_sao_ctxt = (sao_ctxt_t *)pv_sao_ctxt;
+ const WORD32 i4_luma_recon_strd = ps_sao_ctxt->i4_cur_luma_recon_stride;
+ const WORD32 i4_luma_src_strd = ps_sao_ctxt->i4_cur_luma_src_stride;
+
+ const int16x8_t const_2 = vdupq_n_s16(2);
+ const int16x8_t const_0 = vdupq_n_s16(0);
+ const int16x8_t const_1 = vdupq_n_s16(1);
+ const int16x8_t const_3 = vdupq_n_s16(3);
+ const int16x8_t const_4 = vdupq_n_s16(4);
+
+ row_end = ps_sao_ctxt->i4_sao_blk_ht;
+ col_end = ps_sao_ctxt->i4_sao_blk_wd;
+
+ if((ps_sao_ctxt->i4_ctb_x == 0) && (eo_sao_class != SAO_EDGE_90_DEG))
+ {
+ col_start = 1;
+ }
+
+ if(((ps_sao_ctxt->i4_ctb_x + 1) == ps_sao_ctxt->ps_sps->i2_pic_wd_in_ctb) &&
+ (eo_sao_class != SAO_EDGE_90_DEG))
+ {
+ col_end = col_end - 1;
+ }
+
+ if((ps_sao_ctxt->i4_ctb_y == 0) && (eo_sao_class != SAO_EDGE_0_DEG))
+ {
+ row_start = 1;
+ }
+
+ if(((ps_sao_ctxt->i4_ctb_y + 1) == ps_sao_ctxt->ps_sps->i2_pic_ht_in_ctb) &&
+ (eo_sao_class != SAO_EDGE_0_DEG))
+ {
+ row_end = row_end - 1;
+ }
+ wd = col_end - col_start;
+ rem_wd = wd;
+ pu1_luma_recon_buf =
+ ps_sao_ctxt->pu1_cur_luma_recon_buf + col_start + (row_start * i4_luma_recon_strd);
+ pu1_luma_src_buf =
+ ps_sao_ctxt->pu1_cur_luma_src_buf + col_start + (row_start * i4_luma_src_strd);
+
+ switch(eo_sao_class)
+ {
+ case SAO_EDGE_0_DEG:
+ for(row = row_start; row < row_end; row++)
+ {
+ pu1_luma_src_buf_copy = pu1_luma_src_buf;
+ pu1_luma_recon_buf_copy = pu1_luma_recon_buf;
+ for(col = wd; col > 15; col -= 16)
+ {
+ /*load src and recon data*/
+ src_buf_8x16 = vld1q_u8(pu1_luma_src_buf);
+ recon_buf_8x16 = vld1q_u8(pu1_luma_recon_buf);
+ recon_buf0_8x16 = vld1q_u8(pu1_luma_recon_buf - 1);
+ recon_buf1_8x16 = vld1q_u8(pu1_luma_recon_buf + 1);
+
+ /*pel_error*/
+ pel_error = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(src_buf_8x16), vget_low_u8(recon_buf_8x16)));
+ pel_error1 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(src_buf_8x16), vget_high_u8(recon_buf_8x16)));
+
+ /*sign*/
+ sign_reg0 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(recon_buf_8x16), vget_low_u8(recon_buf0_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg0, const_0);
+ sign_reg0 = (int16x8_t)vcltq_s16(sign_reg0, const_0);
+ sign_reg0 = vsubq_s16(sign_reg0, sign_reg);
+
+ sign_reg1 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(recon_buf_8x16), vget_low_u8(recon_buf1_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg1, const_0);
+ sign_reg1 = (int16x8_t)vcltq_s16(sign_reg1, const_0);
+ sign_reg1 = vsubq_s16(sign_reg1, sign_reg);
+
+ sign_reg2 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(recon_buf_8x16), vget_high_u8(recon_buf0_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg2, const_0);
+ sign_reg2 = (int16x8_t)vcltq_s16(sign_reg2, const_0);
+ sign_reg2 = vsubq_s16(sign_reg2, sign_reg);
+
+ sign_reg3 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(recon_buf_8x16), vget_high_u8(recon_buf1_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg3, const_0);
+ sign_reg3 = (int16x8_t)vcltq_s16(sign_reg3, const_0);
+ sign_reg3 = vsubq_s16(sign_reg3, sign_reg);
+ /*edgidx*/
+ edgeidx = vaddq_s16(vaddq_s16(sign_reg0, const_2), sign_reg1);
+ edgeidx1 = vaddq_s16(vaddq_s16(sign_reg2, const_2), sign_reg3);
+
+ edgeidx_reg0 = vmvnq_s16((int16x8_t)vceqq_s16(const_0, pel_error));
+ edgeidx = vandq_s16(edgeidx_reg0, edgeidx);
+
+ edgeidx_reg5 = vmvnq_s16((int16x8_t)vceqq_s16(const_0, pel_error1));
+ edgeidx1 = vandq_s16(edgeidx_reg5, edgeidx1);
+
+ temp_reg0 = (int16x8_t)vceqq_s16(const_0, edgeidx);
+ temp_reg4 = (int16x8_t)vceqq_s16(const_0, edgeidx1);
+ temp_reg1 = (int16x8_t)vceqq_s16(const_1, edgeidx);
+ temp_reg5 = (int16x8_t)vceqq_s16(const_1, edgeidx1);
+
+ temp_reg2 = (int16x8_t)vceqq_s16(const_3, edgeidx);
+ temp_reg6 = (int16x8_t)vceqq_s16(const_3, edgeidx1);
+ temp_reg3 = (int16x8_t)vceqq_s16(const_4, edgeidx);
+ temp_reg7 = (int16x8_t)vceqq_s16(const_4, edgeidx1);
+
+ edgeidx_reg1 = vabsq_s16(temp_reg1);
+ edgeidx_reg5 = vabsq_s16(temp_reg5);
+
+ edgeidx_reg2 = vabsq_s16(temp_reg2);
+ edgeidx_reg6 = vabsq_s16(temp_reg6);
+ edgeidx_reg3 = vabsq_s16(temp_reg3);
+ edgeidx_reg7 = vabsq_s16(temp_reg7);
+
+ temp_reg0 = vandq_s16(temp_reg0, pel_error);
+ temp_reg4 = vandq_s16(temp_reg4, pel_error1);
+ temp_reg1 = vandq_s16(temp_reg1, pel_error);
+ temp_reg5 = vandq_s16(temp_reg5, pel_error1);
+
+ temp_reg2 = vandq_s16(temp_reg2, pel_error);
+ temp_reg6 = vandq_s16(temp_reg6, pel_error1);
+ temp_reg3 = vandq_s16(temp_reg3, pel_error);
+ temp_reg7 = vandq_s16(temp_reg7, pel_error1);
+
+ edgeidx_reg0 = vaddq_s16(const_1, (int16x8_t)vceqq_s16(const_0, temp_reg0));
+ edgeidx_reg4 = vaddq_s16(const_1, (int16x8_t)vceqq_s16(const_0, temp_reg4));
+
+ temp_reg0 = vaddq_s16(temp_reg0, temp_reg4);
+ temp_reg1 = vaddq_s16(temp_reg1, temp_reg5);
+ temp_reg2 = vaddq_s16(temp_reg2, temp_reg6);
+ temp_reg3 = vaddq_s16(temp_reg3, temp_reg7);
+
+ edgeidx_reg0 = vaddq_s16(edgeidx_reg0, edgeidx_reg4);
+ edgeidx_reg1 = vaddq_s16(edgeidx_reg1, edgeidx_reg5);
+ edgeidx_reg2 = vaddq_s16(edgeidx_reg2, edgeidx_reg6);
+ edgeidx_reg3 = vaddq_s16(edgeidx_reg3, edgeidx_reg7);
+
+ /*store peel error*/
+ pi4_acc_error_category[0] += sad_cal(temp_reg0);
+ pi4_acc_error_category[1] += sad_cal(temp_reg1);
+ pi4_acc_error_category[3] += sad_cal(temp_reg2);
+ pi4_acc_error_category[4] += sad_cal(temp_reg3);
+
+ /*store edgeidx account*/
+ pi4_category_count[0] += sad_cal(edgeidx_reg0);
+ pi4_category_count[1] += sad_cal(edgeidx_reg1);
+ pi4_category_count[3] += sad_cal(edgeidx_reg2);
+ pi4_category_count[4] += sad_cal(edgeidx_reg3);
+ pu1_luma_recon_buf += 16;
+ pu1_luma_src_buf += 16;
+ }
+ rem_wd &= 0x0F;
+
+ if(rem_wd > 7)
+ {
+ /*load data*/
+ src_buf = vld1_u8(pu1_luma_src_buf);
+ recon_buf = vld1_u8(pu1_luma_recon_buf);
+ recon_buf0 = vld1_u8(pu1_luma_recon_buf - 1);
+ recon_buf1 = vld1_u8(pu1_luma_recon_buf + 1);
+ /*pel_error*/
+ pel_error = vreinterpretq_s16_u16(vsubl_u8(src_buf, recon_buf));
+ /*sign*/
+ sign_reg0 = vreinterpretq_s16_u16(vsubl_u8(recon_buf, recon_buf0));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg0, const_0);
+ sign_reg0 = (int16x8_t)vcltq_s16(sign_reg0, const_0);
+ sign_reg0 = vsubq_s16(sign_reg0, sign_reg);
+
+ sign_reg1 = vreinterpretq_s16_u16(vsubl_u8(recon_buf, recon_buf1));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg1, const_0);
+ sign_reg1 = (int16x8_t)vcltq_s16(sign_reg1, const_0);
+ sign_reg1 = vsubq_s16(sign_reg1, sign_reg);
+
+ edgeidx = vaddq_s16(vaddq_s16(sign_reg0, const_2), sign_reg1);
+
+ edgeidx_reg0 = vmvnq_s16((int16x8_t)vceqq_s16(const_0, pel_error));
+ edgeidx = vandq_s16(edgeidx_reg0, edgeidx);
+
+ temp_reg0 = (int16x8_t)vceqq_s16(const_0, edgeidx);
+ temp_reg1 = (int16x8_t)vceqq_s16(const_1, edgeidx);
+ temp_reg2 = (int16x8_t)vceqq_s16(const_3, edgeidx);
+ temp_reg3 = (int16x8_t)vceqq_s16(const_4, edgeidx);
+
+ edgeidx_reg1 = vabsq_s16(temp_reg1);
+ edgeidx_reg2 = vabsq_s16(temp_reg2);
+ edgeidx_reg3 = vabsq_s16(temp_reg3);
+
+ temp_reg0 = vandq_s16(temp_reg0, pel_error);
+ temp_reg1 = vandq_s16(temp_reg1, pel_error);
+ temp_reg2 = vandq_s16(temp_reg2, pel_error);
+ temp_reg3 = vandq_s16(temp_reg3, pel_error);
+
+ edgeidx_reg0 = vaddq_s16(const_1, (int16x8_t)vceqq_s16(const_0, temp_reg0));
+ /*store */
+ pi4_acc_error_category[0] += sad_cal(temp_reg0);
+ pi4_acc_error_category[1] += sad_cal(temp_reg1);
+ pi4_acc_error_category[3] += sad_cal(temp_reg2);
+ pi4_acc_error_category[4] += sad_cal(temp_reg3);
+
+ pi4_category_count[0] += sad_cal(edgeidx_reg0);
+ pi4_category_count[1] += sad_cal(edgeidx_reg1);
+ pi4_category_count[3] += sad_cal(edgeidx_reg2);
+ pi4_category_count[4] += sad_cal(edgeidx_reg3);
+ pu1_luma_recon_buf += 8;
+ pu1_luma_src_buf += 8;
+ }
+ rem_wd &= 0x7;
+ if(rem_wd)
+ {
+ for(col = 0; col < rem_wd; col++)
+ {
+ c = pu1_luma_recon_buf[col];
+ a = pu1_luma_recon_buf[col - 1];
+ b = pu1_luma_recon_buf[col + 1];
+ pel_err = pu1_luma_src_buf[col] - pu1_luma_recon_buf[col];
+ edge_idx = 2 + SIGN(c - a) + SIGN(c - b);
+
+ if(pel_err != 0)
+ {
+ pi4_acc_error_category[edge_idx] += pel_err;
+ pi4_category_count[edge_idx]++;
+ }
+ }
+ }
+ pu1_luma_recon_buf = pu1_luma_recon_buf_copy + i4_luma_recon_strd;
+ pu1_luma_src_buf = pu1_luma_src_buf_copy + i4_luma_src_strd;
+ rem_wd = wd;
+ }
+ break;
+ case SAO_EDGE_90_DEG:
+ for(row = row_start; row < row_end; row++)
+ {
+ pu1_luma_src_buf_copy = pu1_luma_src_buf;
+ pu1_luma_recon_buf_copy = pu1_luma_recon_buf;
+ for(col = wd; col > 15; col -= 16)
+ {
+ /*load src and recon data*/
+ src_buf_8x16 = vld1q_u8(pu1_luma_src_buf);
+ recon_buf_8x16 = vld1q_u8(pu1_luma_recon_buf);
+ recon_buf0_8x16 = vld1q_u8(pu1_luma_recon_buf - i4_luma_recon_strd);
+ recon_buf1_8x16 = vld1q_u8(pu1_luma_recon_buf + i4_luma_recon_strd);
+ /*pel_error*/
+ pel_error = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(src_buf_8x16), vget_low_u8(recon_buf_8x16)));
+ pel_error1 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(src_buf_8x16), vget_high_u8(recon_buf_8x16)));
+ /*sign*/
+ sign_reg0 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(recon_buf_8x16), vget_low_u8(recon_buf0_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg0, const_0);
+ sign_reg0 = (int16x8_t)vcltq_s16(sign_reg0, const_0);
+ sign_reg0 = vsubq_s16(sign_reg0, sign_reg);
+
+ sign_reg1 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(recon_buf_8x16), vget_low_u8(recon_buf1_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg1, const_0);
+ sign_reg1 = (int16x8_t)vcltq_s16(sign_reg1, const_0);
+ sign_reg1 = vsubq_s16(sign_reg1, sign_reg);
+
+ sign_reg2 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(recon_buf_8x16), vget_high_u8(recon_buf0_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg2, const_0);
+ sign_reg2 = (int16x8_t)vcltq_s16(sign_reg2, const_0);
+ sign_reg2 = vsubq_s16(sign_reg2, sign_reg);
+
+ sign_reg3 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(recon_buf_8x16), vget_high_u8(recon_buf1_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg3, const_0);
+ sign_reg3 = (int16x8_t)vcltq_s16(sign_reg3, const_0);
+ sign_reg3 = vsubq_s16(sign_reg3, sign_reg);
+ /*edgeidx*/
+ edgeidx = vaddq_s16(vaddq_s16(sign_reg0, const_2), sign_reg1);
+ edgeidx1 = vaddq_s16(vaddq_s16(sign_reg2, const_2), sign_reg3);
+
+ edgeidx_reg0 = vmvnq_s16((int16x8_t)vceqq_s16(const_0, pel_error));
+ edgeidx = vandq_s16(edgeidx_reg0, edgeidx);
+
+ edgeidx_reg5 = vmvnq_s16((int16x8_t)vceqq_s16(const_0, pel_error1));
+ edgeidx1 = vandq_s16(edgeidx_reg5, edgeidx1);
+
+ temp_reg0 = (int16x8_t)vceqq_s16(const_0, edgeidx);
+ temp_reg4 = (int16x8_t)vceqq_s16(const_0, edgeidx1);
+ temp_reg1 = (int16x8_t)vceqq_s16(const_1, edgeidx);
+ temp_reg5 = (int16x8_t)vceqq_s16(const_1, edgeidx1);
+
+ temp_reg2 = (int16x8_t)vceqq_s16(const_3, edgeidx);
+ temp_reg6 = (int16x8_t)vceqq_s16(const_3, edgeidx1);
+ temp_reg3 = (int16x8_t)vceqq_s16(const_4, edgeidx);
+ temp_reg7 = (int16x8_t)vceqq_s16(const_4, edgeidx1);
+
+ edgeidx_reg1 = vabsq_s16(temp_reg1);
+ edgeidx_reg5 = vabsq_s16(temp_reg5);
+
+ edgeidx_reg2 = vabsq_s16(temp_reg2);
+ edgeidx_reg6 = vabsq_s16(temp_reg6);
+ edgeidx_reg3 = vabsq_s16(temp_reg3);
+ edgeidx_reg7 = vabsq_s16(temp_reg7);
+
+ temp_reg0 = vandq_s16(temp_reg0, pel_error);
+ temp_reg4 = vandq_s16(temp_reg4, pel_error1);
+ temp_reg1 = vandq_s16(temp_reg1, pel_error);
+ temp_reg5 = vandq_s16(temp_reg5, pel_error1);
+
+ temp_reg2 = vandq_s16(temp_reg2, pel_error);
+ temp_reg6 = vandq_s16(temp_reg6, pel_error1);
+ temp_reg3 = vandq_s16(temp_reg3, pel_error);
+ temp_reg7 = vandq_s16(temp_reg7, pel_error1);
+
+ edgeidx_reg0 = vaddq_s16(const_1, (int16x8_t)vceqq_s16(const_0, temp_reg0));
+ edgeidx_reg4 = vaddq_s16(const_1, (int16x8_t)vceqq_s16(const_0, temp_reg4));
+
+ temp_reg0 = vaddq_s16(temp_reg0, temp_reg4);
+ temp_reg1 = vaddq_s16(temp_reg1, temp_reg5);
+ temp_reg2 = vaddq_s16(temp_reg2, temp_reg6);
+ temp_reg3 = vaddq_s16(temp_reg3, temp_reg7);
+
+ edgeidx_reg0 = vaddq_s16(edgeidx_reg0, edgeidx_reg4);
+ edgeidx_reg1 = vaddq_s16(edgeidx_reg1, edgeidx_reg5);
+ edgeidx_reg2 = vaddq_s16(edgeidx_reg2, edgeidx_reg6);
+ edgeidx_reg3 = vaddq_s16(edgeidx_reg3, edgeidx_reg7);
+ /* store */
+ pi4_acc_error_category[0] += sad_cal(temp_reg0);
+ pi4_acc_error_category[1] += sad_cal(temp_reg1);
+ pi4_acc_error_category[3] += sad_cal(temp_reg2);
+ pi4_acc_error_category[4] += sad_cal(temp_reg3);
+ /*store account*/
+ pi4_category_count[0] += sad_cal(edgeidx_reg0);
+ pi4_category_count[1] += sad_cal(edgeidx_reg1);
+ pi4_category_count[3] += sad_cal(edgeidx_reg2);
+ pi4_category_count[4] += sad_cal(edgeidx_reg3);
+ pu1_luma_recon_buf += 16;
+ pu1_luma_src_buf += 16;
+ }
+ rem_wd &= 0x0F;
+
+ if(rem_wd > 7)
+ {
+ /*load*/
+ src_buf = vld1_u8(pu1_luma_src_buf);
+ recon_buf = vld1_u8(pu1_luma_recon_buf);
+ recon_buf0 = vld1_u8(pu1_luma_recon_buf - i4_luma_recon_strd);
+ recon_buf1 = vld1_u8(pu1_luma_recon_buf + i4_luma_recon_strd);
+ /*pel_error*/
+ pel_error = vreinterpretq_s16_u16(vsubl_u8(src_buf, recon_buf));
+ /*sign*/
+ sign_reg0 = vreinterpretq_s16_u16(vsubl_u8(recon_buf, recon_buf0));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg0, const_0);
+ sign_reg0 = (int16x8_t)vcltq_s16(sign_reg0, const_0);
+ sign_reg0 = vsubq_s16(sign_reg0, sign_reg);
+
+ sign_reg1 = vreinterpretq_s16_u16(vsubl_u8(recon_buf, recon_buf1));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg1, const_0);
+ sign_reg1 = (int16x8_t)vcltq_s16(sign_reg1, const_0);
+ sign_reg1 = vsubq_s16(sign_reg1, sign_reg);
+
+ edgeidx = vaddq_s16(vaddq_s16(sign_reg0, const_2), sign_reg1);
+ edgeidx_reg0 = vmvnq_s16((int16x8_t)vceqq_s16(const_0, pel_error));
+ edgeidx = vandq_s16(edgeidx_reg0, edgeidx);
+
+ temp_reg0 = (int16x8_t)vceqq_s16(const_0, edgeidx);
+ temp_reg1 = (int16x8_t)vceqq_s16(const_1, edgeidx);
+ temp_reg2 = (int16x8_t)vceqq_s16(const_3, edgeidx);
+ temp_reg3 = (int16x8_t)vceqq_s16(const_4, edgeidx);
+
+ edgeidx_reg1 = vabsq_s16(temp_reg1);
+ edgeidx_reg2 = vabsq_s16(temp_reg2);
+ edgeidx_reg3 = vabsq_s16(temp_reg3);
+
+ temp_reg0 = vandq_s16(temp_reg0, pel_error);
+ temp_reg1 = vandq_s16(temp_reg1, pel_error);
+ temp_reg2 = vandq_s16(temp_reg2, pel_error);
+ temp_reg3 = vandq_s16(temp_reg3, pel_error);
+
+ edgeidx_reg0 = vaddq_s16(const_1, (int16x8_t)vceqq_s16(const_0, temp_reg0));
+ /*store*/
+ pi4_acc_error_category[0] += sad_cal(temp_reg0);
+ pi4_acc_error_category[1] += sad_cal(temp_reg1);
+ pi4_acc_error_category[3] += sad_cal(temp_reg2);
+ pi4_acc_error_category[4] += sad_cal(temp_reg3);
+
+ pi4_category_count[0] += sad_cal(edgeidx_reg0);
+ pi4_category_count[1] += sad_cal(edgeidx_reg1);
+ pi4_category_count[3] += sad_cal(edgeidx_reg2);
+ pi4_category_count[4] += sad_cal(edgeidx_reg3);
+ pu1_luma_recon_buf += 8;
+ pu1_luma_src_buf += 8;
+ }
+ rem_wd &= 0x7;
+ if(rem_wd)
+ {
+ for(col = 0; col < rem_wd; col++)
+ {
+ c = pu1_luma_recon_buf[col];
+ a = pu1_luma_recon_buf[col - i4_luma_recon_strd];
+ b = pu1_luma_recon_buf[col + i4_luma_recon_strd];
+ pel_err = pu1_luma_src_buf[col] - pu1_luma_recon_buf[col];
+ edge_idx = 2 + SIGN(c - a) + SIGN(c - b);
+
+ if(pel_err != 0)
+ {
+ pi4_acc_error_category[edge_idx] += pel_err;
+ pi4_category_count[edge_idx]++;
+ }
+ }
+ }
+ pu1_luma_recon_buf = pu1_luma_recon_buf_copy + i4_luma_recon_strd;
+ pu1_luma_src_buf = pu1_luma_src_buf_copy + i4_luma_src_strd;
+ rem_wd = wd;
+ }
+ break;
+ case SAO_EDGE_135_DEG:
+ for(row = row_start; row < row_end; row++)
+ {
+ pu1_luma_src_buf_copy = pu1_luma_src_buf;
+ pu1_luma_recon_buf_copy = pu1_luma_recon_buf;
+ for(col = wd; col > 15; col -= 16)
+ {
+ /*load src and recon data*/
+ src_buf_8x16 = vld1q_u8(pu1_luma_src_buf);
+ recon_buf_8x16 = vld1q_u8(pu1_luma_recon_buf);
+ recon_buf0_8x16 = vld1q_u8(pu1_luma_recon_buf - 1 - i4_luma_recon_strd);
+ recon_buf1_8x16 = vld1q_u8(pu1_luma_recon_buf + 1 + i4_luma_recon_strd);
+ /*pel_error*/
+ pel_error = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(src_buf_8x16), vget_low_u8(recon_buf_8x16)));
+ pel_error1 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(src_buf_8x16), vget_high_u8(recon_buf_8x16)));
+ /*sign*/
+ sign_reg0 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(recon_buf_8x16), vget_low_u8(recon_buf0_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg0, const_0);
+ sign_reg0 = (int16x8_t)vcltq_s16(sign_reg0, const_0);
+ sign_reg0 = vsubq_s16(sign_reg0, sign_reg);
+
+ sign_reg1 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(recon_buf_8x16), vget_low_u8(recon_buf1_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg1, const_0);
+ sign_reg1 = (int16x8_t)vcltq_s16(sign_reg1, const_0);
+ sign_reg1 = vsubq_s16(sign_reg1, sign_reg);
+
+ sign_reg2 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(recon_buf_8x16), vget_high_u8(recon_buf0_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg2, const_0);
+ sign_reg2 = (int16x8_t)vcltq_s16(sign_reg2, const_0);
+ sign_reg2 = vsubq_s16(sign_reg2, sign_reg);
+
+ sign_reg3 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(recon_buf_8x16), vget_high_u8(recon_buf1_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg3, const_0);
+ sign_reg3 = (int16x8_t)vcltq_s16(sign_reg3, const_0);
+ sign_reg3 = vsubq_s16(sign_reg3, sign_reg);
+
+ edgeidx = vaddq_s16(vaddq_s16(sign_reg0, const_2), sign_reg1);
+ edgeidx1 = vaddq_s16(vaddq_s16(sign_reg2, const_2), sign_reg3);
+
+ edgeidx_reg0 = vmvnq_s16((int16x8_t)vceqq_s16(const_0, pel_error));
+ edgeidx = vandq_s16(edgeidx_reg0, edgeidx);
+
+ edgeidx_reg5 = vmvnq_s16((int16x8_t)vceqq_s16(const_0, pel_error1));
+ edgeidx1 = vandq_s16(edgeidx_reg5, edgeidx1);
+
+ temp_reg0 = (int16x8_t)vceqq_s16(const_0, edgeidx);
+ temp_reg4 = (int16x8_t)vceqq_s16(const_0, edgeidx1);
+ temp_reg1 = (int16x8_t)vceqq_s16(const_1, edgeidx);
+ temp_reg5 = (int16x8_t)vceqq_s16(const_1, edgeidx1);
+
+ temp_reg2 = (int16x8_t)vceqq_s16(const_3, edgeidx);
+ temp_reg6 = (int16x8_t)vceqq_s16(const_3, edgeidx1);
+ temp_reg3 = (int16x8_t)vceqq_s16(const_4, edgeidx);
+ temp_reg7 = (int16x8_t)vceqq_s16(const_4, edgeidx1);
+
+ edgeidx_reg1 = vabsq_s16(temp_reg1);
+ edgeidx_reg5 = vabsq_s16(temp_reg5);
+
+ edgeidx_reg2 = vabsq_s16(temp_reg2);
+ edgeidx_reg6 = vabsq_s16(temp_reg6);
+ edgeidx_reg3 = vabsq_s16(temp_reg3);
+ edgeidx_reg7 = vabsq_s16(temp_reg7);
+
+ temp_reg0 = vandq_s16(temp_reg0, pel_error);
+ temp_reg4 = vandq_s16(temp_reg4, pel_error1);
+ temp_reg1 = vandq_s16(temp_reg1, pel_error);
+ temp_reg5 = vandq_s16(temp_reg5, pel_error1);
+
+ temp_reg2 = vandq_s16(temp_reg2, pel_error);
+ temp_reg6 = vandq_s16(temp_reg6, pel_error1);
+ temp_reg3 = vandq_s16(temp_reg3, pel_error);
+ temp_reg7 = vandq_s16(temp_reg7, pel_error1);
+
+ edgeidx_reg0 = vaddq_s16(const_1, (int16x8_t)vceqq_s16(const_0, temp_reg0));
+ edgeidx_reg4 = vaddq_s16(const_1, (int16x8_t)vceqq_s16(const_0, temp_reg4));
+
+ temp_reg0 = vaddq_s16(temp_reg0, temp_reg4);
+ temp_reg1 = vaddq_s16(temp_reg1, temp_reg5);
+ temp_reg2 = vaddq_s16(temp_reg2, temp_reg6);
+ temp_reg3 = vaddq_s16(temp_reg3, temp_reg7);
+
+ edgeidx_reg0 = vaddq_s16(edgeidx_reg0, edgeidx_reg4);
+ edgeidx_reg1 = vaddq_s16(edgeidx_reg1, edgeidx_reg5);
+ edgeidx_reg2 = vaddq_s16(edgeidx_reg2, edgeidx_reg6);
+ edgeidx_reg3 = vaddq_s16(edgeidx_reg3, edgeidx_reg7);
+ /*store*/
+ pi4_acc_error_category[0] += sad_cal(temp_reg0);
+ pi4_acc_error_category[1] += sad_cal(temp_reg1);
+ pi4_acc_error_category[3] += sad_cal(temp_reg2);
+ pi4_acc_error_category[4] += sad_cal(temp_reg3);
+
+ pi4_category_count[0] += sad_cal(edgeidx_reg0);
+ pi4_category_count[1] += sad_cal(edgeidx_reg1);
+ pi4_category_count[3] += sad_cal(edgeidx_reg2);
+ pi4_category_count[4] += sad_cal(edgeidx_reg3);
+ pu1_luma_recon_buf += 16;
+ pu1_luma_src_buf += 16;
+ }
+ rem_wd &= 0x0F;
+
+ if(rem_wd > 7)
+ {
+ /*load data*/
+ src_buf = vld1_u8(pu1_luma_src_buf);
+ recon_buf = vld1_u8(pu1_luma_recon_buf);
+ recon_buf0 = vld1_u8(pu1_luma_recon_buf - 1 - i4_luma_recon_strd);
+ recon_buf1 = vld1_u8(pu1_luma_recon_buf + 1 + i4_luma_recon_strd);
+ /*pel_error*/
+ pel_error = vreinterpretq_s16_u16(vsubl_u8(src_buf, recon_buf));
+ /*sign*/
+ sign_reg0 = vreinterpretq_s16_u16(vsubl_u8(recon_buf, recon_buf0));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg0, const_0);
+ sign_reg0 = (int16x8_t)vcltq_s16(sign_reg0, const_0);
+ sign_reg0 = vsubq_s16(sign_reg0, sign_reg);
+
+ sign_reg1 = vreinterpretq_s16_u16(vsubl_u8(recon_buf, recon_buf1));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg1, const_0);
+ sign_reg1 = (int16x8_t)vcltq_s16(sign_reg1, const_0);
+ sign_reg1 = vsubq_s16(sign_reg1, sign_reg);
+
+ edgeidx = vaddq_s16(vaddq_s16(sign_reg0, const_2), sign_reg1);
+ edgeidx_reg0 = vmvnq_s16((int16x8_t)vceqq_s16(const_0, pel_error));
+ edgeidx = vandq_s16(edgeidx_reg0, edgeidx);
+
+ temp_reg0 = (int16x8_t)vceqq_s16(const_0, edgeidx);
+ temp_reg1 = (int16x8_t)vceqq_s16(const_1, edgeidx);
+ temp_reg3 = (int16x8_t)vceqq_s16(const_3, edgeidx);
+ temp_reg4 = (int16x8_t)vceqq_s16(const_4, edgeidx);
+
+ edgeidx_reg1 = vabsq_s16(temp_reg1);
+ edgeidx_reg3 = vabsq_s16(temp_reg3);
+ edgeidx_reg4 = vabsq_s16(temp_reg4);
+
+ temp_reg0 = vandq_s16(temp_reg0, pel_error);
+ temp_reg1 = vandq_s16(temp_reg1, pel_error);
+ temp_reg3 = vandq_s16(temp_reg3, pel_error);
+ temp_reg4 = vandq_s16(temp_reg4, pel_error);
+
+ edgeidx_reg0 = vaddq_s16(const_1, (int16x8_t)vceqq_s16(const_0, temp_reg0));
+ /*store*/
+ pi4_acc_error_category[0] += sad_cal(temp_reg0);
+ pi4_acc_error_category[1] += sad_cal(temp_reg1);
+ pi4_acc_error_category[3] += sad_cal(temp_reg3);
+ pi4_acc_error_category[4] += sad_cal(temp_reg4);
+
+ pi4_category_count[0] += sad_cal(edgeidx_reg0);
+ pi4_category_count[1] += sad_cal(edgeidx_reg1);
+ pi4_category_count[3] += sad_cal(edgeidx_reg3);
+ pi4_category_count[4] += sad_cal(edgeidx_reg4);
+ pu1_luma_recon_buf += 8;
+ pu1_luma_src_buf += 8;
+ }
+ rem_wd &= 0x7;
+ if(rem_wd)
+ {
+ for(col = 0; col < rem_wd; col++)
+ {
+ c = pu1_luma_recon_buf[col];
+ a = pu1_luma_recon_buf[col - 1 - i4_luma_recon_strd];
+ b = pu1_luma_recon_buf[col + 1 + i4_luma_recon_strd];
+ pel_err = pu1_luma_src_buf[col] - pu1_luma_recon_buf[col];
+ edge_idx = 2 + SIGN(c - a) + SIGN(c - b);
+
+ if(pel_err != 0)
+ {
+ pi4_acc_error_category[edge_idx] += pel_err;
+ pi4_category_count[edge_idx]++;
+ }
+ }
+ }
+ pu1_luma_recon_buf = pu1_luma_recon_buf_copy + i4_luma_recon_strd;
+ pu1_luma_src_buf = pu1_luma_src_buf_copy + i4_luma_src_strd;
+ rem_wd = wd;
+ }
+ break;
+ case SAO_EDGE_45_DEG:
+ for(row = row_start; row < row_end; row++)
+ {
+ pu1_luma_src_buf_copy = pu1_luma_src_buf;
+ pu1_luma_recon_buf_copy = pu1_luma_recon_buf;
+ for(col = wd; col > 15; col -= 16)
+ {
+ /*load data*/
+ src_buf_8x16 = vld1q_u8(pu1_luma_src_buf);
+ recon_buf_8x16 = vld1q_u8(pu1_luma_recon_buf);
+ recon_buf0_8x16 = vld1q_u8(pu1_luma_recon_buf + 1 - i4_luma_recon_strd);
+ recon_buf1_8x16 = vld1q_u8(pu1_luma_recon_buf - 1 + i4_luma_recon_strd);
+ /*pel_error*/
+ pel_error = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(src_buf_8x16), vget_low_u8(recon_buf_8x16)));
+ pel_error1 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(src_buf_8x16), vget_high_u8(recon_buf_8x16)));
+ /*sign*/
+ sign_reg0 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(recon_buf_8x16), vget_low_u8(recon_buf0_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg0, const_0);
+ sign_reg0 = (int16x8_t)vcltq_s16(sign_reg0, const_0);
+ sign_reg0 = vsubq_s16(sign_reg0, sign_reg);
+
+ sign_reg1 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(recon_buf_8x16), vget_low_u8(recon_buf1_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg1, const_0);
+ sign_reg1 = (int16x8_t)vcltq_s16(sign_reg1, const_0);
+ sign_reg1 = vsubq_s16(sign_reg1, sign_reg);
+
+ sign_reg2 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(recon_buf_8x16), vget_high_u8(recon_buf0_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg2, const_0);
+ sign_reg2 = (int16x8_t)vcltq_s16(sign_reg2, const_0);
+ sign_reg2 = vsubq_s16(sign_reg2, sign_reg);
+
+ sign_reg3 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(recon_buf_8x16), vget_high_u8(recon_buf1_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg3, const_0);
+ sign_reg3 = (int16x8_t)vcltq_s16(sign_reg3, const_0);
+ sign_reg3 = vsubq_s16(sign_reg3, sign_reg);
+
+ edgeidx = vaddq_s16(vaddq_s16(sign_reg0, const_2), sign_reg1);
+ edgeidx1 = vaddq_s16(vaddq_s16(sign_reg2, const_2), sign_reg3);
+
+ edgeidx_reg0 = vmvnq_s16((int16x8_t)vceqq_s16(const_0, pel_error));
+ edgeidx = vandq_s16(edgeidx_reg0, edgeidx);
+
+ edgeidx_reg5 = vmvnq_s16((int16x8_t)vceqq_s16(const_0, pel_error1));
+ edgeidx1 = vandq_s16(edgeidx_reg5, edgeidx1);
+
+ temp_reg0 = (int16x8_t)vceqq_s16(const_0, edgeidx);
+ temp_reg4 = (int16x8_t)vceqq_s16(const_0, edgeidx1);
+ temp_reg1 = (int16x8_t)vceqq_s16(const_1, edgeidx);
+ temp_reg5 = (int16x8_t)vceqq_s16(const_1, edgeidx1);
+
+ temp_reg2 = (int16x8_t)vceqq_s16(const_3, edgeidx);
+ temp_reg6 = (int16x8_t)vceqq_s16(const_3, edgeidx1);
+ temp_reg3 = (int16x8_t)vceqq_s16(const_4, edgeidx);
+ temp_reg7 = (int16x8_t)vceqq_s16(const_4, edgeidx1);
+
+ edgeidx_reg1 = vabsq_s16(temp_reg1);
+ edgeidx_reg5 = vabsq_s16(temp_reg5);
+
+ edgeidx_reg2 = vabsq_s16(temp_reg2);
+ edgeidx_reg6 = vabsq_s16(temp_reg6);
+ edgeidx_reg3 = vabsq_s16(temp_reg3);
+ edgeidx_reg7 = vabsq_s16(temp_reg7);
+
+ temp_reg0 = vandq_s16(temp_reg0, pel_error);
+ temp_reg4 = vandq_s16(temp_reg4, pel_error1);
+ temp_reg1 = vandq_s16(temp_reg1, pel_error);
+ temp_reg5 = vandq_s16(temp_reg5, pel_error1);
+
+ temp_reg2 = vandq_s16(temp_reg2, pel_error);
+ temp_reg6 = vandq_s16(temp_reg6, pel_error1);
+ temp_reg3 = vandq_s16(temp_reg3, pel_error);
+ temp_reg7 = vandq_s16(temp_reg7, pel_error1);
+
+ edgeidx_reg0 = vaddq_s16(const_1, (int16x8_t)vceqq_s16(const_0, temp_reg0));
+ edgeidx_reg4 = vaddq_s16(const_1, (int16x8_t)vceqq_s16(const_0, temp_reg4));
+
+ temp_reg0 = vaddq_s16(temp_reg0, temp_reg4);
+ temp_reg1 = vaddq_s16(temp_reg1, temp_reg5);
+ temp_reg2 = vaddq_s16(temp_reg2, temp_reg6);
+ temp_reg3 = vaddq_s16(temp_reg3, temp_reg7);
+
+ edgeidx_reg0 = vaddq_s16(edgeidx_reg0, edgeidx_reg4);
+ edgeidx_reg1 = vaddq_s16(edgeidx_reg1, edgeidx_reg5);
+ edgeidx_reg2 = vaddq_s16(edgeidx_reg2, edgeidx_reg6);
+ edgeidx_reg3 = vaddq_s16(edgeidx_reg3, edgeidx_reg7);
+ /*store*/
+ pi4_acc_error_category[0] += sad_cal(temp_reg0);
+ pi4_acc_error_category[1] += sad_cal(temp_reg1);
+ pi4_acc_error_category[3] += sad_cal(temp_reg2);
+ pi4_acc_error_category[4] += sad_cal(temp_reg3);
+
+ pi4_category_count[0] += sad_cal(edgeidx_reg0);
+ pi4_category_count[1] += sad_cal(edgeidx_reg1);
+ pi4_category_count[3] += sad_cal(edgeidx_reg2);
+ pi4_category_count[4] += sad_cal(edgeidx_reg3);
+ pu1_luma_recon_buf += 16;
+ pu1_luma_src_buf += 16;
+ }
+ rem_wd &= 0x0F;
+
+ if(rem_wd > 7)
+ {
+ /*load*/
+ src_buf = vld1_u8(pu1_luma_src_buf);
+ recon_buf = vld1_u8(pu1_luma_recon_buf);
+ recon_buf0 = vld1_u8(pu1_luma_recon_buf + 1 - i4_luma_recon_strd);
+ recon_buf1 = vld1_u8(pu1_luma_recon_buf - 1 + i4_luma_recon_strd);
+
+ pel_error = vreinterpretq_s16_u16(vsubl_u8(src_buf, recon_buf));
+
+ sign_reg0 = vreinterpretq_s16_u16(vsubl_u8(recon_buf, recon_buf0));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg0, const_0);
+ sign_reg0 = (int16x8_t)vcltq_s16(sign_reg0, const_0);
+ sign_reg0 = vsubq_s16(sign_reg0, sign_reg);
+
+ sign_reg1 = vreinterpretq_s16_u16(vsubl_u8(recon_buf, recon_buf1));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg1, const_0);
+ sign_reg1 = (int16x8_t)vcltq_s16(sign_reg1, const_0);
+ sign_reg1 = vsubq_s16(sign_reg1, sign_reg);
+
+ edgeidx = vaddq_s16(vaddq_s16(sign_reg0, const_2), sign_reg1);
+
+ edgeidx_reg0 = vmvnq_s16((int16x8_t)vceqq_s16(const_0, pel_error));
+ edgeidx = vandq_s16(edgeidx_reg0, edgeidx);
+
+ temp_reg0 = (int16x8_t)vceqq_s16(const_0, edgeidx);
+ temp_reg1 = (int16x8_t)vceqq_s16(const_1, edgeidx);
+ temp_reg3 = (int16x8_t)vceqq_s16(const_3, edgeidx);
+ temp_reg4 = (int16x8_t)vceqq_s16(const_4, edgeidx);
+
+ edgeidx_reg1 = vabsq_s16(temp_reg1);
+ edgeidx_reg3 = vabsq_s16(temp_reg3);
+ edgeidx_reg4 = vabsq_s16(temp_reg4);
+
+ temp_reg0 = vandq_s16(temp_reg0, pel_error);
+ temp_reg1 = vandq_s16(temp_reg1, pel_error);
+ temp_reg3 = vandq_s16(temp_reg3, pel_error);
+ temp_reg4 = vandq_s16(temp_reg4, pel_error);
+
+ edgeidx_reg0 = vaddq_s16(const_1, (int16x8_t)vceqq_s16(const_0, temp_reg0));
+ /*store*/
+ pi4_acc_error_category[0] += sad_cal(temp_reg0);
+ pi4_acc_error_category[1] += sad_cal(temp_reg1);
+ pi4_acc_error_category[3] += sad_cal(temp_reg3);
+ pi4_acc_error_category[4] += sad_cal(temp_reg4);
+
+ pi4_category_count[0] += sad_cal(edgeidx_reg0);
+ pi4_category_count[1] += sad_cal(edgeidx_reg1);
+ pi4_category_count[3] += sad_cal(edgeidx_reg3);
+ pi4_category_count[4] += sad_cal(edgeidx_reg4);
+ pu1_luma_recon_buf += 8;
+ pu1_luma_src_buf += 8;
+ }
+ rem_wd &= 0x7;
+ if(rem_wd)
+ {
+ for(col = 0; col < rem_wd; col++)
+ {
+ c = pu1_luma_recon_buf[col];
+ a = pu1_luma_recon_buf[col + 1 - i4_luma_recon_strd];
+ b = pu1_luma_recon_buf[col - 1 + i4_luma_recon_strd];
+ pel_err = pu1_luma_src_buf[col] - pu1_luma_recon_buf[col];
+ edge_idx = 2 + SIGN(c - a) + SIGN(c - b);
+ if(pel_err != 0)
+ {
+ pi4_acc_error_category[edge_idx] += pel_err;
+ pi4_category_count[edge_idx]++;
+ }
+ }
+ }
+ pu1_luma_recon_buf = pu1_luma_recon_buf_copy + i4_luma_recon_strd;
+ pu1_luma_src_buf = pu1_luma_src_buf_copy + i4_luma_src_strd;
+ rem_wd = wd;
+ }
+ break;
+ default:
+ break;
+ }
+}
+
+void ihevce_get_chroma_eo_sao_params_neon(
+ void *pv_sao_ctxt,
+ WORD32 eo_sao_class,
+ WORD32 *pi4_acc_error_category,
+ WORD32 *pi4_category_count)
+{
+ /*temp var*/
+ UWORD8 *pu1_chroma_recon_buf, *pu1_chroma_src_buf;
+ UWORD8 *pu1_chroma_src_buf_copy, *pu1_chroma_recon_buf_copy;
+ WORD32 row_end, col_end, row, col;
+ WORD32 row_start = 0, col_start = 0;
+ WORD32 wd, rem_wd;
+ WORD32 a, b, c, edge_idx, pel_err;
+
+ int16x8_t temp_reg0, temp_reg1, temp_reg2, temp_reg3, temp_reg4;
+ int16x8_t edgeidx_reg0, edgeidx_reg1, edgeidx_reg2, edgeidx_reg3, edgeidx_reg4;
+ int16x8_t edgeidx_reg5, edgeidx_reg6, edgeidx_reg7;
+ int16x8_t pel_error, pel_error1;
+ int16x8_t sign_reg0, sign_reg1, sign_reg, sign_reg2, sign_reg3;
+ int16x8_t edgeidx, edgeidx1;
+ int16x8_t temp_reg5, temp_reg6, temp_reg7;
+ uint8x16_t src_buf_8x16, recon_buf_8x16, recon_buf0_8x16, recon_buf1_8x16;
+ uint8x8_t src_buf, recon_buf, recon_buf0, recon_buf1;
+
+ sao_ctxt_t *ps_sao_ctxt = (sao_ctxt_t *)pv_sao_ctxt;
+ const WORD32 i4_chroma_recon_strd = ps_sao_ctxt->i4_cur_chroma_recon_stride;
+ const WORD32 i4_chroma_src_strd = ps_sao_ctxt->i4_cur_chroma_src_stride;
+
+ const int16x8_t const_2 = vdupq_n_s16(2);
+ const int16x8_t const_0 = vdupq_n_s16(0);
+ const int16x8_t const_1 = vdupq_n_s16(1);
+ const int16x8_t const_3 = vdupq_n_s16(3);
+ const int16x8_t const_4 = vdupq_n_s16(4);
+
+ row_end = ps_sao_ctxt->i4_sao_blk_ht >> 1;
+ col_end = ps_sao_ctxt->i4_sao_blk_wd;
+
+ if((ps_sao_ctxt->i4_ctb_x == 0) && (eo_sao_class != SAO_EDGE_90_DEG))
+ {
+ col_start = 2;
+ }
+
+ if(((ps_sao_ctxt->i4_ctb_x + 1) == ps_sao_ctxt->ps_sps->i2_pic_wd_in_ctb) &&
+ (eo_sao_class != SAO_EDGE_90_DEG))
+ {
+ col_end = col_end - 2;
+ }
+
+ if((ps_sao_ctxt->i4_ctb_y == 0) && (eo_sao_class != SAO_EDGE_0_DEG))
+ {
+ row_start = 1;
+ }
+
+ if(((ps_sao_ctxt->i4_ctb_y + 1) == ps_sao_ctxt->ps_sps->i2_pic_ht_in_ctb) &&
+ (eo_sao_class != SAO_EDGE_0_DEG))
+ {
+ row_end = row_end - 1;
+ }
+ wd = col_end - col_start;
+ rem_wd = wd;
+ pu1_chroma_recon_buf =
+ ps_sao_ctxt->pu1_cur_chroma_recon_buf + col_start + (row_start * i4_chroma_recon_strd);
+ pu1_chroma_src_buf =
+ ps_sao_ctxt->pu1_cur_chroma_src_buf + col_start + (row_start * i4_chroma_src_strd);
+
+ switch(eo_sao_class)
+ {
+ case SAO_EDGE_0_DEG:
+ for(row = row_start; row < row_end; row++)
+ {
+ pu1_chroma_src_buf_copy = pu1_chroma_src_buf;
+ pu1_chroma_recon_buf_copy = pu1_chroma_recon_buf;
+ for(col = wd; col > 15; col -= 16)
+ {
+ /*load src and recon data*/
+ src_buf_8x16 = vld1q_u8(pu1_chroma_src_buf);
+ recon_buf_8x16 = vld1q_u8(pu1_chroma_recon_buf);
+ recon_buf0_8x16 = vld1q_u8(pu1_chroma_recon_buf - 2);
+ recon_buf1_8x16 = vld1q_u8(pu1_chroma_recon_buf + 2);
+
+ /*pel_error*/
+ pel_error = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(src_buf_8x16), vget_low_u8(recon_buf_8x16)));
+ pel_error1 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(src_buf_8x16), vget_high_u8(recon_buf_8x16)));
+
+ /*sign*/
+ sign_reg0 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(recon_buf_8x16), vget_low_u8(recon_buf0_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg0, const_0);
+ sign_reg0 = (int16x8_t)vcltq_s16(sign_reg0, const_0);
+ sign_reg0 = vsubq_s16(sign_reg0, sign_reg);
+
+ sign_reg1 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(recon_buf_8x16), vget_low_u8(recon_buf1_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg1, const_0);
+ sign_reg1 = (int16x8_t)vcltq_s16(sign_reg1, const_0);
+ sign_reg1 = vsubq_s16(sign_reg1, sign_reg);
+
+ sign_reg2 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(recon_buf_8x16), vget_high_u8(recon_buf0_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg2, const_0);
+ sign_reg2 = (int16x8_t)vcltq_s16(sign_reg2, const_0);
+ sign_reg2 = vsubq_s16(sign_reg2, sign_reg);
+
+ sign_reg3 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(recon_buf_8x16), vget_high_u8(recon_buf1_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg3, const_0);
+ sign_reg3 = (int16x8_t)vcltq_s16(sign_reg3, const_0);
+ sign_reg3 = vsubq_s16(sign_reg3, sign_reg);
+ /*edgidx*/
+ edgeidx = vaddq_s16(vaddq_s16(sign_reg0, const_2), sign_reg1);
+ edgeidx1 = vaddq_s16(vaddq_s16(sign_reg2, const_2), sign_reg3);
+
+ edgeidx_reg0 = vmvnq_s16((int16x8_t)vceqq_s16(const_0, pel_error));
+ edgeidx = vandq_s16(edgeidx_reg0, edgeidx);
+
+ edgeidx_reg5 = vmvnq_s16((int16x8_t)vceqq_s16(const_0, pel_error1));
+ edgeidx1 = vandq_s16(edgeidx_reg5, edgeidx1);
+
+ temp_reg0 = (int16x8_t)vceqq_s16(const_0, edgeidx);
+ temp_reg4 = (int16x8_t)vceqq_s16(const_0, edgeidx1);
+ temp_reg1 = (int16x8_t)vceqq_s16(const_1, edgeidx);
+ temp_reg5 = (int16x8_t)vceqq_s16(const_1, edgeidx1);
+
+ temp_reg2 = (int16x8_t)vceqq_s16(const_3, edgeidx);
+ temp_reg6 = (int16x8_t)vceqq_s16(const_3, edgeidx1);
+ temp_reg3 = (int16x8_t)vceqq_s16(const_4, edgeidx);
+ temp_reg7 = (int16x8_t)vceqq_s16(const_4, edgeidx1);
+
+ edgeidx_reg1 = vabsq_s16(temp_reg1);
+ edgeidx_reg5 = vabsq_s16(temp_reg5);
+
+ edgeidx_reg2 = vabsq_s16(temp_reg2);
+ edgeidx_reg6 = vabsq_s16(temp_reg6);
+ edgeidx_reg3 = vabsq_s16(temp_reg3);
+ edgeidx_reg7 = vabsq_s16(temp_reg7);
+
+ temp_reg0 = vandq_s16(temp_reg0, pel_error);
+ temp_reg4 = vandq_s16(temp_reg4, pel_error1);
+ temp_reg1 = vandq_s16(temp_reg1, pel_error);
+ temp_reg5 = vandq_s16(temp_reg5, pel_error1);
+
+ temp_reg2 = vandq_s16(temp_reg2, pel_error);
+ temp_reg6 = vandq_s16(temp_reg6, pel_error1);
+ temp_reg3 = vandq_s16(temp_reg3, pel_error);
+ temp_reg7 = vandq_s16(temp_reg7, pel_error1);
+
+ edgeidx_reg0 = vaddq_s16(const_1, (int16x8_t)vceqq_s16(const_0, temp_reg0));
+ edgeidx_reg4 = vaddq_s16(const_1, (int16x8_t)vceqq_s16(const_0, temp_reg4));
+
+ temp_reg0 = vaddq_s16(temp_reg0, temp_reg4);
+ temp_reg1 = vaddq_s16(temp_reg1, temp_reg5);
+ temp_reg2 = vaddq_s16(temp_reg2, temp_reg6);
+ temp_reg3 = vaddq_s16(temp_reg3, temp_reg7);
+
+ edgeidx_reg0 = vaddq_s16(edgeidx_reg0, edgeidx_reg4);
+ edgeidx_reg1 = vaddq_s16(edgeidx_reg1, edgeidx_reg5);
+ edgeidx_reg2 = vaddq_s16(edgeidx_reg2, edgeidx_reg6);
+ edgeidx_reg3 = vaddq_s16(edgeidx_reg3, edgeidx_reg7);
+
+ /*store peel error*/
+ pi4_acc_error_category[0] += sad_cal(temp_reg0);
+ pi4_acc_error_category[1] += sad_cal(temp_reg1);
+ pi4_acc_error_category[3] += sad_cal(temp_reg2);
+ pi4_acc_error_category[4] += sad_cal(temp_reg3);
+
+ /*store edgeidx account*/
+ pi4_category_count[0] += sad_cal(edgeidx_reg0);
+ pi4_category_count[1] += sad_cal(edgeidx_reg1);
+ pi4_category_count[3] += sad_cal(edgeidx_reg2);
+ pi4_category_count[4] += sad_cal(edgeidx_reg3);
+ pu1_chroma_recon_buf += 16;
+ pu1_chroma_src_buf += 16;
+ }
+ rem_wd &= 0x0F;
+
+ if(rem_wd > 7)
+ {
+ /*load data*/
+ src_buf = vld1_u8(pu1_chroma_src_buf);
+ recon_buf = vld1_u8(pu1_chroma_recon_buf);
+ recon_buf0 = vld1_u8(pu1_chroma_recon_buf - 2);
+ recon_buf1 = vld1_u8(pu1_chroma_recon_buf + 2);
+ /*pel_error*/
+ pel_error = vreinterpretq_s16_u16(vsubl_u8(src_buf, recon_buf));
+ /*sign*/
+ sign_reg0 = vreinterpretq_s16_u16(vsubl_u8(recon_buf, recon_buf0));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg0, const_0);
+ sign_reg0 = (int16x8_t)vcltq_s16(sign_reg0, const_0);
+ sign_reg0 = vsubq_s16(sign_reg0, sign_reg);
+
+ sign_reg1 = vreinterpretq_s16_u16(vsubl_u8(recon_buf, recon_buf1));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg1, const_0);
+ sign_reg1 = (int16x8_t)vcltq_s16(sign_reg1, const_0);
+ sign_reg1 = vsubq_s16(sign_reg1, sign_reg);
+
+ edgeidx = vaddq_s16(vaddq_s16(sign_reg0, const_2), sign_reg1);
+
+ edgeidx_reg0 = vmvnq_s16((int16x8_t)vceqq_s16(const_0, pel_error));
+ edgeidx = vandq_s16(edgeidx_reg0, edgeidx);
+
+ temp_reg0 = (int16x8_t)vceqq_s16(const_0, edgeidx);
+ temp_reg1 = (int16x8_t)vceqq_s16(const_1, edgeidx);
+ temp_reg2 = (int16x8_t)vceqq_s16(const_3, edgeidx);
+ temp_reg3 = (int16x8_t)vceqq_s16(const_4, edgeidx);
+
+ edgeidx_reg1 = vabsq_s16(temp_reg1);
+ edgeidx_reg2 = vabsq_s16(temp_reg2);
+ edgeidx_reg3 = vabsq_s16(temp_reg3);
+
+ temp_reg0 = vandq_s16(temp_reg0, pel_error);
+ temp_reg1 = vandq_s16(temp_reg1, pel_error);
+ temp_reg2 = vandq_s16(temp_reg2, pel_error);
+ temp_reg3 = vandq_s16(temp_reg3, pel_error);
+
+ edgeidx_reg0 = vaddq_s16(const_1, (int16x8_t)vceqq_s16(const_0, temp_reg0));
+ /*store */
+ pi4_acc_error_category[0] += sad_cal(temp_reg0);
+ pi4_acc_error_category[1] += sad_cal(temp_reg1);
+ pi4_acc_error_category[3] += sad_cal(temp_reg2);
+ pi4_acc_error_category[4] += sad_cal(temp_reg3);
+
+ pi4_category_count[0] += sad_cal(edgeidx_reg0);
+ pi4_category_count[1] += sad_cal(edgeidx_reg1);
+ pi4_category_count[3] += sad_cal(edgeidx_reg2);
+ pi4_category_count[4] += sad_cal(edgeidx_reg3);
+ pu1_chroma_recon_buf += 8;
+ pu1_chroma_src_buf += 8;
+ }
+ rem_wd &= 0x7;
+ if(rem_wd)
+ {
+ for(col = 0; col < rem_wd; col++)
+ {
+ c = pu1_chroma_recon_buf[col];
+ a = pu1_chroma_recon_buf[col - 2];
+ b = pu1_chroma_recon_buf[col + 2];
+ pel_err = pu1_chroma_src_buf[col] - pu1_chroma_recon_buf[col];
+ edge_idx = 2 + SIGN(c - a) + SIGN(c - b);
+
+ if(pel_err != 0)
+ {
+ pi4_acc_error_category[edge_idx] += pel_err;
+ pi4_category_count[edge_idx]++;
+ }
+ }
+ }
+ pu1_chroma_recon_buf = pu1_chroma_recon_buf_copy + i4_chroma_recon_strd;
+ pu1_chroma_src_buf = pu1_chroma_src_buf_copy + i4_chroma_src_strd;
+ rem_wd = wd;
+ }
+ break;
+ case SAO_EDGE_90_DEG:
+ for(row = row_start; row < row_end; row++)
+ {
+ pu1_chroma_src_buf_copy = pu1_chroma_src_buf;
+ pu1_chroma_recon_buf_copy = pu1_chroma_recon_buf;
+ for(col = wd; col > 15; col -= 16)
+ {
+ /*load src and recon data*/
+ src_buf_8x16 = vld1q_u8(pu1_chroma_src_buf);
+ recon_buf_8x16 = vld1q_u8(pu1_chroma_recon_buf);
+ recon_buf0_8x16 = vld1q_u8(pu1_chroma_recon_buf - i4_chroma_recon_strd);
+ recon_buf1_8x16 = vld1q_u8(pu1_chroma_recon_buf + i4_chroma_recon_strd);
+ /*pel_error*/
+ pel_error = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(src_buf_8x16), vget_low_u8(recon_buf_8x16)));
+ pel_error1 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(src_buf_8x16), vget_high_u8(recon_buf_8x16)));
+ /*sign*/
+ sign_reg0 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(recon_buf_8x16), vget_low_u8(recon_buf0_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg0, const_0);
+ sign_reg0 = (int16x8_t)vcltq_s16(sign_reg0, const_0);
+ sign_reg0 = vsubq_s16(sign_reg0, sign_reg);
+
+ sign_reg1 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(recon_buf_8x16), vget_low_u8(recon_buf1_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg1, const_0);
+ sign_reg1 = (int16x8_t)vcltq_s16(sign_reg1, const_0);
+ sign_reg1 = vsubq_s16(sign_reg1, sign_reg);
+
+ sign_reg2 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(recon_buf_8x16), vget_high_u8(recon_buf0_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg2, const_0);
+ sign_reg2 = (int16x8_t)vcltq_s16(sign_reg2, const_0);
+ sign_reg2 = vsubq_s16(sign_reg2, sign_reg);
+
+ sign_reg3 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(recon_buf_8x16), vget_high_u8(recon_buf1_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg3, const_0);
+ sign_reg3 = (int16x8_t)vcltq_s16(sign_reg3, const_0);
+ sign_reg3 = vsubq_s16(sign_reg3, sign_reg);
+ /*edgeidx*/
+ edgeidx = vaddq_s16(vaddq_s16(sign_reg0, const_2), sign_reg1);
+ edgeidx1 = vaddq_s16(vaddq_s16(sign_reg2, const_2), sign_reg3);
+
+ edgeidx_reg0 = vmvnq_s16((int16x8_t)vceqq_s16(const_0, pel_error));
+ edgeidx = vandq_s16(edgeidx_reg0, edgeidx);
+
+ edgeidx_reg5 = vmvnq_s16((int16x8_t)vceqq_s16(const_0, pel_error1));
+ edgeidx1 = vandq_s16(edgeidx_reg5, edgeidx1);
+
+ temp_reg0 = (int16x8_t)vceqq_s16(const_0, edgeidx);
+ temp_reg4 = (int16x8_t)vceqq_s16(const_0, edgeidx1);
+ temp_reg1 = (int16x8_t)vceqq_s16(const_1, edgeidx);
+ temp_reg5 = (int16x8_t)vceqq_s16(const_1, edgeidx1);
+
+ temp_reg2 = (int16x8_t)vceqq_s16(const_3, edgeidx);
+ temp_reg6 = (int16x8_t)vceqq_s16(const_3, edgeidx1);
+ temp_reg3 = (int16x8_t)vceqq_s16(const_4, edgeidx);
+ temp_reg7 = (int16x8_t)vceqq_s16(const_4, edgeidx1);
+
+ edgeidx_reg1 = vabsq_s16(temp_reg1);
+ edgeidx_reg5 = vabsq_s16(temp_reg5);
+
+ edgeidx_reg2 = vabsq_s16(temp_reg2);
+ edgeidx_reg6 = vabsq_s16(temp_reg6);
+ edgeidx_reg3 = vabsq_s16(temp_reg3);
+ edgeidx_reg7 = vabsq_s16(temp_reg7);
+
+ temp_reg0 = vandq_s16(temp_reg0, pel_error);
+ temp_reg4 = vandq_s16(temp_reg4, pel_error1);
+ temp_reg1 = vandq_s16(temp_reg1, pel_error);
+ temp_reg5 = vandq_s16(temp_reg5, pel_error1);
+
+ temp_reg2 = vandq_s16(temp_reg2, pel_error);
+ temp_reg6 = vandq_s16(temp_reg6, pel_error1);
+ temp_reg3 = vandq_s16(temp_reg3, pel_error);
+ temp_reg7 = vandq_s16(temp_reg7, pel_error1);
+
+ edgeidx_reg0 = vaddq_s16(const_1, (int16x8_t)vceqq_s16(const_0, temp_reg0));
+ edgeidx_reg4 = vaddq_s16(const_1, (int16x8_t)vceqq_s16(const_0, temp_reg4));
+
+ temp_reg0 = vaddq_s16(temp_reg0, temp_reg4);
+ temp_reg1 = vaddq_s16(temp_reg1, temp_reg5);
+ temp_reg2 = vaddq_s16(temp_reg2, temp_reg6);
+ temp_reg3 = vaddq_s16(temp_reg3, temp_reg7);
+
+ edgeidx_reg0 = vaddq_s16(edgeidx_reg0, edgeidx_reg4);
+ edgeidx_reg1 = vaddq_s16(edgeidx_reg1, edgeidx_reg5);
+ edgeidx_reg2 = vaddq_s16(edgeidx_reg2, edgeidx_reg6);
+ edgeidx_reg3 = vaddq_s16(edgeidx_reg3, edgeidx_reg7);
+ /* store */
+ pi4_acc_error_category[0] += sad_cal(temp_reg0);
+ pi4_acc_error_category[1] += sad_cal(temp_reg1);
+ pi4_acc_error_category[3] += sad_cal(temp_reg2);
+ pi4_acc_error_category[4] += sad_cal(temp_reg3);
+ /*store account*/
+ pi4_category_count[0] += sad_cal(edgeidx_reg0);
+ pi4_category_count[1] += sad_cal(edgeidx_reg1);
+ pi4_category_count[3] += sad_cal(edgeidx_reg2);
+ pi4_category_count[4] += sad_cal(edgeidx_reg3);
+ pu1_chroma_recon_buf += 16;
+ pu1_chroma_src_buf += 16;
+ }
+ rem_wd &= 0x0F;
+
+ if(rem_wd > 7)
+ {
+ /*load*/
+ src_buf = vld1_u8(pu1_chroma_src_buf);
+ recon_buf = vld1_u8(pu1_chroma_recon_buf);
+ recon_buf0 = vld1_u8(pu1_chroma_recon_buf - i4_chroma_recon_strd);
+ recon_buf1 = vld1_u8(pu1_chroma_recon_buf + i4_chroma_recon_strd);
+ /*pel_error*/
+ pel_error = vreinterpretq_s16_u16(vsubl_u8(src_buf, recon_buf));
+ /*sign*/
+ sign_reg0 = vreinterpretq_s16_u16(vsubl_u8(recon_buf, recon_buf0));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg0, const_0);
+ sign_reg0 = (int16x8_t)vcltq_s16(sign_reg0, const_0);
+ sign_reg0 = vsubq_s16(sign_reg0, sign_reg);
+
+ sign_reg1 = vreinterpretq_s16_u16(vsubl_u8(recon_buf, recon_buf1));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg1, const_0);
+ sign_reg1 = (int16x8_t)vcltq_s16(sign_reg1, const_0);
+ sign_reg1 = vsubq_s16(sign_reg1, sign_reg);
+
+ edgeidx = vaddq_s16(vaddq_s16(sign_reg0, const_2), sign_reg1);
+ edgeidx_reg0 = vmvnq_s16((int16x8_t)vceqq_s16(const_0, pel_error));
+ edgeidx = vandq_s16(edgeidx_reg0, edgeidx);
+
+ temp_reg0 = (int16x8_t)vceqq_s16(const_0, edgeidx);
+ temp_reg1 = (int16x8_t)vceqq_s16(const_1, edgeidx);
+ temp_reg2 = (int16x8_t)vceqq_s16(const_3, edgeidx);
+ temp_reg3 = (int16x8_t)vceqq_s16(const_4, edgeidx);
+
+ edgeidx_reg1 = vabsq_s16(temp_reg1);
+ edgeidx_reg2 = vabsq_s16(temp_reg2);
+ edgeidx_reg3 = vabsq_s16(temp_reg3);
+
+ temp_reg0 = vandq_s16(temp_reg0, pel_error);
+ temp_reg1 = vandq_s16(temp_reg1, pel_error);
+ temp_reg2 = vandq_s16(temp_reg2, pel_error);
+ temp_reg3 = vandq_s16(temp_reg3, pel_error);
+
+ edgeidx_reg0 = vaddq_s16(const_1, (int16x8_t)vceqq_s16(const_0, temp_reg0));
+ /*store*/
+ pi4_acc_error_category[0] += sad_cal(temp_reg0);
+ pi4_acc_error_category[1] += sad_cal(temp_reg1);
+ pi4_acc_error_category[3] += sad_cal(temp_reg2);
+ pi4_acc_error_category[4] += sad_cal(temp_reg3);
+
+ pi4_category_count[0] += sad_cal(edgeidx_reg0);
+ pi4_category_count[1] += sad_cal(edgeidx_reg1);
+ pi4_category_count[3] += sad_cal(edgeidx_reg2);
+ pi4_category_count[4] += sad_cal(edgeidx_reg3);
+ pu1_chroma_recon_buf += 8;
+ pu1_chroma_src_buf += 8;
+ }
+ rem_wd &= 0x7;
+ if(rem_wd)
+ {
+ for(col = 0; col < rem_wd; col++)
+ {
+ c = pu1_chroma_recon_buf[col];
+ a = pu1_chroma_recon_buf[col - i4_chroma_recon_strd];
+ b = pu1_chroma_recon_buf[col + i4_chroma_recon_strd];
+ pel_err = pu1_chroma_src_buf[col] - pu1_chroma_recon_buf[col];
+ edge_idx = 2 + SIGN(c - a) + SIGN(c - b);
+
+ if(pel_err != 0)
+ {
+ pi4_acc_error_category[edge_idx] += pel_err;
+ pi4_category_count[edge_idx]++;
+ }
+ }
+ }
+ pu1_chroma_recon_buf = pu1_chroma_recon_buf_copy + i4_chroma_recon_strd;
+ pu1_chroma_src_buf = pu1_chroma_src_buf_copy + i4_chroma_src_strd;
+ rem_wd = wd;
+ }
+ break;
+ case SAO_EDGE_135_DEG:
+ for(row = row_start; row < row_end; row++)
+ {
+ pu1_chroma_src_buf_copy = pu1_chroma_src_buf;
+ pu1_chroma_recon_buf_copy = pu1_chroma_recon_buf;
+ for(col = wd; col > 15; col -= 16)
+ {
+ /*load src and recon data*/
+ src_buf_8x16 = vld1q_u8(pu1_chroma_src_buf);
+ recon_buf_8x16 = vld1q_u8(pu1_chroma_recon_buf);
+ recon_buf0_8x16 = vld1q_u8(pu1_chroma_recon_buf - 2 - i4_chroma_recon_strd);
+ recon_buf1_8x16 = vld1q_u8(pu1_chroma_recon_buf + 2 + i4_chroma_recon_strd);
+ /*pel_error*/
+ pel_error = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(src_buf_8x16), vget_low_u8(recon_buf_8x16)));
+ pel_error1 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(src_buf_8x16), vget_high_u8(recon_buf_8x16)));
+ /*sign*/
+ sign_reg0 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(recon_buf_8x16), vget_low_u8(recon_buf0_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg0, const_0);
+ sign_reg0 = (int16x8_t)vcltq_s16(sign_reg0, const_0);
+ sign_reg0 = vsubq_s16(sign_reg0, sign_reg);
+
+ sign_reg1 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(recon_buf_8x16), vget_low_u8(recon_buf1_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg1, const_0);
+ sign_reg1 = (int16x8_t)vcltq_s16(sign_reg1, const_0);
+ sign_reg1 = vsubq_s16(sign_reg1, sign_reg);
+
+ sign_reg2 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(recon_buf_8x16), vget_high_u8(recon_buf0_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg2, const_0);
+ sign_reg2 = (int16x8_t)vcltq_s16(sign_reg2, const_0);
+ sign_reg2 = vsubq_s16(sign_reg2, sign_reg);
+
+ sign_reg3 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(recon_buf_8x16), vget_high_u8(recon_buf1_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg3, const_0);
+ sign_reg3 = (int16x8_t)vcltq_s16(sign_reg3, const_0);
+ sign_reg3 = vsubq_s16(sign_reg3, sign_reg);
+
+ edgeidx = vaddq_s16(vaddq_s16(sign_reg0, const_2), sign_reg1);
+ edgeidx1 = vaddq_s16(vaddq_s16(sign_reg2, const_2), sign_reg3);
+
+ edgeidx_reg0 = vmvnq_s16((int16x8_t)vceqq_s16(const_0, pel_error));
+ edgeidx = vandq_s16(edgeidx_reg0, edgeidx);
+
+ edgeidx_reg5 = vmvnq_s16((int16x8_t)vceqq_s16(const_0, pel_error1));
+ edgeidx1 = vandq_s16(edgeidx_reg5, edgeidx1);
+
+ temp_reg0 = (int16x8_t)vceqq_s16(const_0, edgeidx);
+ temp_reg4 = (int16x8_t)vceqq_s16(const_0, edgeidx1);
+ temp_reg1 = (int16x8_t)vceqq_s16(const_1, edgeidx);
+ temp_reg5 = (int16x8_t)vceqq_s16(const_1, edgeidx1);
+
+ temp_reg2 = (int16x8_t)vceqq_s16(const_3, edgeidx);
+ temp_reg6 = (int16x8_t)vceqq_s16(const_3, edgeidx1);
+ temp_reg3 = (int16x8_t)vceqq_s16(const_4, edgeidx);
+ temp_reg7 = (int16x8_t)vceqq_s16(const_4, edgeidx1);
+
+ edgeidx_reg1 = vabsq_s16(temp_reg1);
+ edgeidx_reg5 = vabsq_s16(temp_reg5);
+
+ edgeidx_reg2 = vabsq_s16(temp_reg2);
+ edgeidx_reg6 = vabsq_s16(temp_reg6);
+ edgeidx_reg3 = vabsq_s16(temp_reg3);
+ edgeidx_reg7 = vabsq_s16(temp_reg7);
+
+ temp_reg0 = vandq_s16(temp_reg0, pel_error);
+ temp_reg4 = vandq_s16(temp_reg4, pel_error1);
+ temp_reg1 = vandq_s16(temp_reg1, pel_error);
+ temp_reg5 = vandq_s16(temp_reg5, pel_error1);
+
+ temp_reg2 = vandq_s16(temp_reg2, pel_error);
+ temp_reg6 = vandq_s16(temp_reg6, pel_error1);
+ temp_reg3 = vandq_s16(temp_reg3, pel_error);
+ temp_reg7 = vandq_s16(temp_reg7, pel_error1);
+
+ edgeidx_reg0 = vaddq_s16(const_1, (int16x8_t)vceqq_s16(const_0, temp_reg0));
+ edgeidx_reg4 = vaddq_s16(const_1, (int16x8_t)vceqq_s16(const_0, temp_reg4));
+
+ temp_reg0 = vaddq_s16(temp_reg0, temp_reg4);
+ temp_reg1 = vaddq_s16(temp_reg1, temp_reg5);
+ temp_reg2 = vaddq_s16(temp_reg2, temp_reg6);
+ temp_reg3 = vaddq_s16(temp_reg3, temp_reg7);
+
+ edgeidx_reg0 = vaddq_s16(edgeidx_reg0, edgeidx_reg4);
+ edgeidx_reg1 = vaddq_s16(edgeidx_reg1, edgeidx_reg5);
+ edgeidx_reg2 = vaddq_s16(edgeidx_reg2, edgeidx_reg6);
+ edgeidx_reg3 = vaddq_s16(edgeidx_reg3, edgeidx_reg7);
+ /*store*/
+ pi4_acc_error_category[0] += sad_cal(temp_reg0);
+ pi4_acc_error_category[1] += sad_cal(temp_reg1);
+ pi4_acc_error_category[3] += sad_cal(temp_reg2);
+ pi4_acc_error_category[4] += sad_cal(temp_reg3);
+
+ pi4_category_count[0] += sad_cal(edgeidx_reg0);
+ pi4_category_count[1] += sad_cal(edgeidx_reg1);
+ pi4_category_count[3] += sad_cal(edgeidx_reg2);
+ pi4_category_count[4] += sad_cal(edgeidx_reg3);
+ pu1_chroma_recon_buf += 16;
+ pu1_chroma_src_buf += 16;
+ }
+ rem_wd &= 0x0F;
+
+ if(rem_wd > 7)
+ {
+ /*load data*/
+ src_buf = vld1_u8(pu1_chroma_src_buf);
+ recon_buf = vld1_u8(pu1_chroma_recon_buf);
+ recon_buf0 = vld1_u8(pu1_chroma_recon_buf - 2 - i4_chroma_recon_strd);
+ recon_buf1 = vld1_u8(pu1_chroma_recon_buf + 2 + i4_chroma_recon_strd);
+ /*pel_error*/
+ pel_error = vreinterpretq_s16_u16(vsubl_u8(src_buf, recon_buf));
+ /*sign*/
+ sign_reg0 = vreinterpretq_s16_u16(vsubl_u8(recon_buf, recon_buf0));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg0, const_0);
+ sign_reg0 = (int16x8_t)vcltq_s16(sign_reg0, const_0);
+ sign_reg0 = vsubq_s16(sign_reg0, sign_reg);
+
+ sign_reg1 = vreinterpretq_s16_u16(vsubl_u8(recon_buf, recon_buf1));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg1, const_0);
+ sign_reg1 = (int16x8_t)vcltq_s16(sign_reg1, const_0);
+ sign_reg1 = vsubq_s16(sign_reg1, sign_reg);
+
+ edgeidx = vaddq_s16(vaddq_s16(sign_reg0, const_2), sign_reg1);
+ edgeidx_reg0 = vmvnq_s16((int16x8_t)vceqq_s16(const_0, pel_error));
+ edgeidx = vandq_s16(edgeidx_reg0, edgeidx);
+
+ temp_reg0 = (int16x8_t)vceqq_s16(const_0, edgeidx);
+ temp_reg1 = (int16x8_t)vceqq_s16(const_1, edgeidx);
+ temp_reg3 = (int16x8_t)vceqq_s16(const_3, edgeidx);
+ temp_reg4 = (int16x8_t)vceqq_s16(const_4, edgeidx);
+
+ edgeidx_reg1 = vabsq_s16(temp_reg1);
+ edgeidx_reg3 = vabsq_s16(temp_reg3);
+ edgeidx_reg4 = vabsq_s16(temp_reg4);
+
+ temp_reg0 = vandq_s16(temp_reg0, pel_error);
+ temp_reg1 = vandq_s16(temp_reg1, pel_error);
+ temp_reg3 = vandq_s16(temp_reg3, pel_error);
+ temp_reg4 = vandq_s16(temp_reg4, pel_error);
+
+ edgeidx_reg0 = vaddq_s16(const_1, (int16x8_t)vceqq_s16(const_0, temp_reg0));
+ /*store*/
+ pi4_acc_error_category[0] += sad_cal(temp_reg0);
+ pi4_acc_error_category[1] += sad_cal(temp_reg1);
+ pi4_acc_error_category[3] += sad_cal(temp_reg3);
+ pi4_acc_error_category[4] += sad_cal(temp_reg4);
+
+ pi4_category_count[0] += sad_cal(edgeidx_reg0);
+ pi4_category_count[1] += sad_cal(edgeidx_reg1);
+ pi4_category_count[3] += sad_cal(edgeidx_reg3);
+ pi4_category_count[4] += sad_cal(edgeidx_reg4);
+ pu1_chroma_recon_buf += 8;
+ pu1_chroma_src_buf += 8;
+ }
+ rem_wd &= 0x7;
+ if(rem_wd)
+ {
+ for(col = 0; col < rem_wd; col++)
+ {
+ c = pu1_chroma_recon_buf[col];
+ a = pu1_chroma_recon_buf[col - 2 - i4_chroma_recon_strd];
+ b = pu1_chroma_recon_buf[col + 2 + i4_chroma_recon_strd];
+ pel_err = pu1_chroma_src_buf[col] - pu1_chroma_recon_buf[col];
+ edge_idx = 2 + SIGN(c - a) + SIGN(c - b);
+
+ if(pel_err != 0)
+ {
+ pi4_acc_error_category[edge_idx] += pel_err;
+ pi4_category_count[edge_idx]++;
+ }
+ }
+ }
+ pu1_chroma_recon_buf = pu1_chroma_recon_buf_copy + i4_chroma_recon_strd;
+ pu1_chroma_src_buf = pu1_chroma_src_buf_copy + i4_chroma_src_strd;
+ rem_wd = wd;
+ }
+ break;
+ case SAO_EDGE_45_DEG:
+ for(row = row_start; row < row_end; row++)
+ {
+ pu1_chroma_src_buf_copy = pu1_chroma_src_buf;
+ pu1_chroma_recon_buf_copy = pu1_chroma_recon_buf;
+ for(col = wd; col > 15; col -= 16)
+ {
+ /*load data*/
+ src_buf_8x16 = vld1q_u8(pu1_chroma_src_buf);
+ recon_buf_8x16 = vld1q_u8(pu1_chroma_recon_buf);
+ recon_buf0_8x16 = vld1q_u8(pu1_chroma_recon_buf + 2 - i4_chroma_recon_strd);
+ recon_buf1_8x16 = vld1q_u8(pu1_chroma_recon_buf - 2 + i4_chroma_recon_strd);
+ /*pel_error*/
+ pel_error = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(src_buf_8x16), vget_low_u8(recon_buf_8x16)));
+ pel_error1 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(src_buf_8x16), vget_high_u8(recon_buf_8x16)));
+ /*sign*/
+ sign_reg0 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(recon_buf_8x16), vget_low_u8(recon_buf0_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg0, const_0);
+ sign_reg0 = (int16x8_t)vcltq_s16(sign_reg0, const_0);
+ sign_reg0 = vsubq_s16(sign_reg0, sign_reg);
+
+ sign_reg1 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_low_u8(recon_buf_8x16), vget_low_u8(recon_buf1_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg1, const_0);
+ sign_reg1 = (int16x8_t)vcltq_s16(sign_reg1, const_0);
+ sign_reg1 = vsubq_s16(sign_reg1, sign_reg);
+
+ sign_reg2 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(recon_buf_8x16), vget_high_u8(recon_buf0_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg2, const_0);
+ sign_reg2 = (int16x8_t)vcltq_s16(sign_reg2, const_0);
+ sign_reg2 = vsubq_s16(sign_reg2, sign_reg);
+
+ sign_reg3 = vreinterpretq_s16_u16(
+ vsubl_u8(vget_high_u8(recon_buf_8x16), vget_high_u8(recon_buf1_8x16)));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg3, const_0);
+ sign_reg3 = (int16x8_t)vcltq_s16(sign_reg3, const_0);
+ sign_reg3 = vsubq_s16(sign_reg3, sign_reg);
+
+ edgeidx = vaddq_s16(vaddq_s16(sign_reg0, const_2), sign_reg1);
+ edgeidx1 = vaddq_s16(vaddq_s16(sign_reg2, const_2), sign_reg3);
+
+ edgeidx_reg0 = vmvnq_s16((int16x8_t)vceqq_s16(const_0, pel_error));
+ edgeidx = vandq_s16(edgeidx_reg0, edgeidx);
+
+ edgeidx_reg5 = vmvnq_s16((int16x8_t)vceqq_s16(const_0, pel_error1));
+ edgeidx1 = vandq_s16(edgeidx_reg5, edgeidx1);
+
+ temp_reg0 = (int16x8_t)vceqq_s16(const_0, edgeidx);
+ temp_reg4 = (int16x8_t)vceqq_s16(const_0, edgeidx1);
+ temp_reg1 = (int16x8_t)vceqq_s16(const_1, edgeidx);
+ temp_reg5 = (int16x8_t)vceqq_s16(const_1, edgeidx1);
+
+ temp_reg2 = (int16x8_t)vceqq_s16(const_3, edgeidx);
+ temp_reg6 = (int16x8_t)vceqq_s16(const_3, edgeidx1);
+ temp_reg3 = (int16x8_t)vceqq_s16(const_4, edgeidx);
+ temp_reg7 = (int16x8_t)vceqq_s16(const_4, edgeidx1);
+
+ edgeidx_reg1 = vabsq_s16(temp_reg1);
+ edgeidx_reg5 = vabsq_s16(temp_reg5);
+
+ edgeidx_reg2 = vabsq_s16(temp_reg2);
+ edgeidx_reg6 = vabsq_s16(temp_reg6);
+ edgeidx_reg3 = vabsq_s16(temp_reg3);
+ edgeidx_reg7 = vabsq_s16(temp_reg7);
+
+ temp_reg0 = vandq_s16(temp_reg0, pel_error);
+ temp_reg4 = vandq_s16(temp_reg4, pel_error1);
+ temp_reg1 = vandq_s16(temp_reg1, pel_error);
+ temp_reg5 = vandq_s16(temp_reg5, pel_error1);
+
+ temp_reg2 = vandq_s16(temp_reg2, pel_error);
+ temp_reg6 = vandq_s16(temp_reg6, pel_error1);
+ temp_reg3 = vandq_s16(temp_reg3, pel_error);
+ temp_reg7 = vandq_s16(temp_reg7, pel_error1);
+
+ edgeidx_reg0 = vaddq_s16(const_1, (int16x8_t)vceqq_s16(const_0, temp_reg0));
+ edgeidx_reg4 = vaddq_s16(const_1, (int16x8_t)vceqq_s16(const_0, temp_reg4));
+
+ temp_reg0 = vaddq_s16(temp_reg0, temp_reg4);
+ temp_reg1 = vaddq_s16(temp_reg1, temp_reg5);
+ temp_reg2 = vaddq_s16(temp_reg2, temp_reg6);
+ temp_reg3 = vaddq_s16(temp_reg3, temp_reg7);
+
+ edgeidx_reg0 = vaddq_s16(edgeidx_reg0, edgeidx_reg4);
+ edgeidx_reg1 = vaddq_s16(edgeidx_reg1, edgeidx_reg5);
+ edgeidx_reg2 = vaddq_s16(edgeidx_reg2, edgeidx_reg6);
+ edgeidx_reg3 = vaddq_s16(edgeidx_reg3, edgeidx_reg7);
+ /*store*/
+ pi4_acc_error_category[0] += sad_cal(temp_reg0);
+ pi4_acc_error_category[1] += sad_cal(temp_reg1);
+ pi4_acc_error_category[3] += sad_cal(temp_reg2);
+ pi4_acc_error_category[4] += sad_cal(temp_reg3);
+
+ pi4_category_count[0] += sad_cal(edgeidx_reg0);
+ pi4_category_count[1] += sad_cal(edgeidx_reg1);
+ pi4_category_count[3] += sad_cal(edgeidx_reg2);
+ pi4_category_count[4] += sad_cal(edgeidx_reg3);
+ pu1_chroma_recon_buf += 16;
+ pu1_chroma_src_buf += 16;
+ }
+ rem_wd &= 0x0F;
+
+ if(rem_wd > 7)
+ {
+ /*load*/
+ src_buf = vld1_u8(pu1_chroma_src_buf);
+ recon_buf = vld1_u8(pu1_chroma_recon_buf);
+ recon_buf0 = vld1_u8(pu1_chroma_recon_buf + 2 - i4_chroma_recon_strd);
+ recon_buf1 = vld1_u8(pu1_chroma_recon_buf - 2 + i4_chroma_recon_strd);
+
+ pel_error = vreinterpretq_s16_u16(vsubl_u8(src_buf, recon_buf));
+
+ sign_reg0 = vreinterpretq_s16_u16(vsubl_u8(recon_buf, recon_buf0));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg0, const_0);
+ sign_reg0 = (int16x8_t)vcltq_s16(sign_reg0, const_0);
+ sign_reg0 = vsubq_s16(sign_reg0, sign_reg);
+
+ sign_reg1 = vreinterpretq_s16_u16(vsubl_u8(recon_buf, recon_buf1));
+ sign_reg = (int16x8_t)vcgtq_s16(sign_reg1, const_0);
+ sign_reg1 = (int16x8_t)vcltq_s16(sign_reg1, const_0);
+ sign_reg1 = vsubq_s16(sign_reg1, sign_reg);
+
+ edgeidx = vaddq_s16(vaddq_s16(sign_reg0, const_2), sign_reg1);
+
+ edgeidx_reg0 = vmvnq_s16((int16x8_t)vceqq_s16(const_0, pel_error));
+ edgeidx = vandq_s16(edgeidx_reg0, edgeidx);
+
+ temp_reg0 = (int16x8_t)vceqq_s16(const_0, edgeidx);
+ temp_reg1 = (int16x8_t)vceqq_s16(const_1, edgeidx);
+ temp_reg3 = (int16x8_t)vceqq_s16(const_3, edgeidx);
+ temp_reg4 = (int16x8_t)vceqq_s16(const_4, edgeidx);
+
+ edgeidx_reg1 = vabsq_s16(temp_reg1);
+ edgeidx_reg3 = vabsq_s16(temp_reg3);
+ edgeidx_reg4 = vabsq_s16(temp_reg4);
+
+ temp_reg0 = vandq_s16(temp_reg0, pel_error);
+ temp_reg1 = vandq_s16(temp_reg1, pel_error);
+ temp_reg3 = vandq_s16(temp_reg3, pel_error);
+ temp_reg4 = vandq_s16(temp_reg4, pel_error);
+
+ edgeidx_reg0 = vaddq_s16(const_1, (int16x8_t)vceqq_s16(const_0, temp_reg0));
+ /*store*/
+ pi4_acc_error_category[0] += sad_cal(temp_reg0);
+ pi4_acc_error_category[1] += sad_cal(temp_reg1);
+ pi4_acc_error_category[3] += sad_cal(temp_reg3);
+ pi4_acc_error_category[4] += sad_cal(temp_reg4);
+
+ pi4_category_count[0] += sad_cal(edgeidx_reg0);
+ pi4_category_count[1] += sad_cal(edgeidx_reg1);
+ pi4_category_count[3] += sad_cal(edgeidx_reg3);
+ pi4_category_count[4] += sad_cal(edgeidx_reg4);
+ pu1_chroma_recon_buf += 8;
+ pu1_chroma_src_buf += 8;
+ }
+ rem_wd &= 0x7;
+ if(rem_wd)
+ {
+ for(col = 0; col < rem_wd; col++)
+ {
+ c = pu1_chroma_recon_buf[col];
+ a = pu1_chroma_recon_buf[col + 2 - i4_chroma_recon_strd];
+ b = pu1_chroma_recon_buf[col - 2 + i4_chroma_recon_strd];
+ pel_err = pu1_chroma_src_buf[col] - pu1_chroma_recon_buf[col];
+ edge_idx = 2 + SIGN(c - a) + SIGN(c - b);
+ if(pel_err != 0)
+ {
+ pi4_acc_error_category[edge_idx] += pel_err;
+ pi4_category_count[edge_idx]++;
+ }
+ }
+ }
+ pu1_chroma_recon_buf = pu1_chroma_recon_buf_copy + i4_chroma_recon_strd;
+ pu1_chroma_src_buf = pu1_chroma_src_buf_copy + i4_chroma_src_strd;
+ rem_wd = wd;
+ }
+ break;
+ default:
+ break;
+ }
+}
diff --git a/encoder/arm/ihevce_copy_neon.c b/encoder/arm/ihevce_copy_neon.c
new file mode 100644
index 0000000..e404abf
--- /dev/null
+++ b/encoder/arm/ihevce_copy_neon.c
@@ -0,0 +1,301 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevce_copy_neon.c
+*
+* @brief
+* Contains intrinsic definitions of functions for block copy
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* - ihevce_2d_square_copy_luma_neon()
+* - ihevce_copy_2d_neon()
+* - ihevce_chroma_interleave_2d_copy_neon()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <string.h>
+#include <assert.h>
+#include <arm_neon.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevc_platform_macros.h"
+
+#include "ihevce_cmn_utils_instr_set_router.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+void ihevce_chroma_interleave_2d_copy_neon(
+ UWORD8 *pu1_uv_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_uv_dst,
+ WORD32 dst_strd,
+ WORD32 w,
+ WORD32 h,
+ CHROMA_PLANE_ID_T e_chroma_plane)
+{
+ (void)h;
+ assert(w == h);
+ assert((e_chroma_plane == U_PLANE) || (e_chroma_plane == V_PLANE));
+
+ if(w == 4)
+ {
+ uint16x4_t select = vdup_n_u16(0xff << (e_chroma_plane << 3));
+
+ for(; w > 0; w--)
+ {
+ uint8x8_t src_0, dst_0;
+
+ // row 0
+ src_0 = vld1_u8(pu1_uv_src);
+ dst_0 = vld1_u8(pu1_uv_dst);
+ dst_0 = vbsl_u8(vreinterpret_u8_u16(select), src_0, dst_0);
+ vst1_u8(pu1_uv_dst, dst_0);
+ pu1_uv_src += src_strd;
+ pu1_uv_dst += dst_strd;
+ }
+ }
+ else
+ {
+ uint16x8_t select = vdupq_n_u16(0xff << (e_chroma_plane << 3));
+ WORD32 i, j;
+
+ assert(w % 8 == 0);
+ for(j = 0; j < w; j += 1)
+ {
+ UWORD8 *dst_ol = pu1_uv_dst + j * dst_strd;
+ UWORD8 *src_ol = pu1_uv_src + j * src_strd;
+
+ for(i = 0; i < w; i += 8)
+ {
+ UWORD8 *dst_il = dst_ol + (i * 2);
+ UWORD8 *src_il = src_ol + (i * 2);
+ uint8x16_t src_0, dst_0;
+
+ // row 0
+ src_0 = vld1q_u8(src_il);
+ dst_0 = vld1q_u8(dst_il);
+ dst_0 = vbslq_u8(vreinterpretq_u8_u16(select), src_0, dst_0);
+ vst1q_u8(dst_il, dst_0);
+ }
+ }
+ }
+}
+
+static void copy_2d_neon(
+ UWORD8 *pu1_dst, WORD32 dst_strd, UWORD8 *pu1_src, WORD32 src_strd, WORD32 blk_wd, WORD32 blk_ht)
+{
+ assert(blk_wd == 4 || blk_wd == 8 || blk_wd == 16 || blk_wd == 32 || (blk_wd % 64 == 0));
+
+ if(blk_wd == 4)
+ {
+ assert((blk_ht & 1) == 0);
+ for(; blk_ht > 0; blk_ht -= 2)
+ {
+ // row 0
+ *(uint32_t *)pu1_dst = *(const uint32_t *)pu1_src;
+ pu1_src += src_strd;
+ pu1_dst += dst_strd;
+ // row 1
+ *(uint32_t *)pu1_dst = *(const uint32_t *)pu1_src;
+ pu1_src += src_strd;
+ pu1_dst += dst_strd;
+ }
+ }
+ else if(blk_wd == 8)
+ {
+ assert((blk_ht & 1) == 0);
+ for(; blk_ht > 0; blk_ht -= 2)
+ {
+ uint8x8_t src_0, src_1;
+
+ // row 0
+ src_0 = vld1_u8(pu1_src);
+ vst1_u8(pu1_dst, src_0);
+ // row 1
+ src_1 = vld1_u8(pu1_src + src_strd);
+ vst1_u8(pu1_dst + dst_strd, src_1);
+ pu1_src += 2 * src_strd;
+ pu1_dst += 2 * dst_strd;
+ }
+ }
+ else if(blk_wd == 16)
+ {
+ assert((blk_ht & 1) == 0);
+ for(; blk_ht > 0; blk_ht -= 2)
+ {
+ uint8x16_t src_0, src_1;
+
+ // row 0
+ src_0 = vld1q_u8(pu1_src);
+ vst1q_u8(pu1_dst, src_0);
+ // row 1
+ src_1 = vld1q_u8(pu1_src + src_strd);
+ vst1q_u8(pu1_dst + dst_strd, src_1);
+ pu1_src += 2 * src_strd;
+ pu1_dst += 2 * dst_strd;
+ }
+ }
+ else if(blk_wd == 32)
+ {
+ for(; blk_ht > 0; blk_ht--)
+ {
+ uint8x16_t src_0, src_1;
+
+ // row 0
+ src_0 = vld1q_u8(pu1_src);
+ vst1q_u8(pu1_dst, src_0);
+ src_1 = vld1q_u8(pu1_src + 16);
+ vst1q_u8(pu1_dst + 16, src_1);
+ pu1_src += src_strd;
+ pu1_dst += dst_strd;
+ }
+ }
+ else if(blk_wd % 64 == 0)
+ {
+ WORD32 i, j;
+
+ for(j = 0; j < blk_ht; j += 1)
+ {
+ UWORD8 *dst_ol = pu1_dst + j * dst_strd;
+ UWORD8 *src_ol = pu1_src + j * src_strd;
+
+ for(i = 0; i < blk_wd; i += 64)
+ {
+ uint8x16_t src_0, src_1, src_2, src_3;
+ UWORD8 *dst_il = dst_ol + i;
+ UWORD8 *src_il = src_ol + i;
+
+ src_0 = vld1q_u8(src_il);
+ vst1q_u8(dst_il, src_0);
+ src_1 = vld1q_u8(src_il + 16);
+ vst1q_u8(dst_il + 16, src_1);
+ src_2 = vld1q_u8(src_il + 32);
+ vst1q_u8(dst_il + 32, src_2);
+ src_3 = vld1q_u8(src_il + 48);
+ vst1q_u8(dst_il + 48, src_3);
+ }
+ }
+ }
+}
+
+void ihevce_2d_square_copy_luma_neon(
+ void *p_dst,
+ WORD32 dst_strd,
+ void *p_src,
+ WORD32 src_strd,
+ WORD32 num_cols_to_copy,
+ WORD32 unit_size)
+{
+ UWORD8 *pu1_dst = (UWORD8 *)p_dst;
+ UWORD8 *pu1_src = (UWORD8 *)p_src;
+
+ copy_2d_neon(
+ pu1_dst,
+ dst_strd * unit_size,
+ pu1_src,
+ src_strd * unit_size,
+ num_cols_to_copy * unit_size,
+ num_cols_to_copy);
+}
+
+void ihevce_copy_2d_neon(
+ UWORD8 *pu1_dst, WORD32 dst_strd, UWORD8 *pu1_src, WORD32 src_strd, WORD32 blk_wd, WORD32 blk_ht)
+{
+ if(blk_wd == 0)
+ return;
+
+ if(blk_wd > 64)
+ {
+ copy_2d_neon(pu1_dst, dst_strd, pu1_src, src_strd, 64, blk_ht);
+ ihevce_copy_2d_neon(pu1_dst + 64, dst_strd, pu1_src + 64, src_strd, blk_wd - 64, blk_ht);
+ }
+ else if(blk_wd > 32)
+ {
+ copy_2d_neon(pu1_dst, dst_strd, pu1_src, src_strd, 32, blk_ht);
+ ihevce_copy_2d_neon(pu1_dst + 32, dst_strd, pu1_src + 32, src_strd, blk_wd - 32, blk_ht);
+ }
+ else if(blk_wd >= 16)
+ {
+ if(blk_ht % 2 == 0)
+ {
+ copy_2d_neon(pu1_dst, dst_strd, pu1_src, src_strd, 16, blk_ht);
+ ihevce_copy_2d_neon(
+ pu1_dst + 16, dst_strd, pu1_src + 16, src_strd, blk_wd - 16, blk_ht);
+ }
+ else
+ {
+ copy_2d_neon(pu1_dst, dst_strd, pu1_src, src_strd, 16, blk_ht - 1);
+ memcpy(pu1_dst + (blk_ht - 1) * dst_strd, pu1_src + (blk_ht - 1) * src_strd, blk_wd);
+ ihevce_copy_2d_neon(
+ pu1_dst + 16, dst_strd, pu1_src + 16, src_strd, blk_wd - 16, blk_ht - 1);
+ }
+ }
+ else if(blk_wd >= 8)
+ {
+ if(blk_ht % 2 == 0)
+ {
+ copy_2d_neon(pu1_dst, dst_strd, pu1_src, src_strd, 8, blk_ht);
+ ihevce_copy_2d_neon(pu1_dst + 8, dst_strd, pu1_src + 8, src_strd, blk_wd - 8, blk_ht);
+ }
+ else
+ {
+ copy_2d_neon(pu1_dst, dst_strd, pu1_src, src_strd, 8, blk_ht - 1);
+ memcpy(pu1_dst + (blk_ht - 1) * dst_strd, pu1_src + (blk_ht - 1) * src_strd, blk_wd);
+ ihevce_copy_2d_neon(
+ pu1_dst + 8, dst_strd, pu1_src + 8, src_strd, blk_wd - 8, blk_ht - 1);
+ }
+ }
+ else if(blk_wd >= 4)
+ {
+ if(blk_ht % 2 == 0)
+ {
+ copy_2d_neon(pu1_dst, dst_strd, pu1_src, src_strd, 4, blk_ht);
+ ihevce_copy_2d_neon(pu1_dst + 4, dst_strd, pu1_src + 4, src_strd, blk_wd - 4, blk_ht);
+ }
+ else
+ {
+ copy_2d_neon(pu1_dst, dst_strd, pu1_src, src_strd, 4, blk_ht - 1);
+ memcpy(pu1_dst + (blk_ht - 1) * dst_strd, pu1_src + (blk_ht - 1) * src_strd, blk_wd);
+ ihevce_copy_2d_neon(
+ pu1_dst + 4, dst_strd, pu1_src + 4, src_strd, blk_wd - 4, blk_ht - 1);
+ }
+ }
+ else
+ {
+ ihevce_copy_2d(pu1_dst, dst_strd, pu1_src, src_strd, blk_wd, blk_ht);
+ }
+}
diff --git a/encoder/arm/ihevce_decomp_pre_intra_pass_neon.c b/encoder/arm/ihevce_decomp_pre_intra_pass_neon.c
new file mode 100644
index 0000000..3baaa8f
--- /dev/null
+++ b/encoder/arm/ihevce_decomp_pre_intra_pass_neon.c
@@ -0,0 +1,302 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevce_decomp_pre_intra_pass_neon.c
+*
+* @brief
+* Contains functions to perform input scaling
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+********************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <arm_neon.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "ihevc_macros.h"
+#include "ihevc_platform_macros.h"
+#include "itt_video_api.h"
+#include "ihevc_defs.h"
+#include "ihevc_cmn_utils_neon.h"
+#include "ihevce_ipe_instr_set_router.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+void ihevce_scaling_filter_mxn(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_scrtch,
+ WORD32 scrtch_strd,
+ UWORD8 *pu1_dst,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd)
+{
+#define FILT_TAP_Q 8
+#define N_TAPS 7
+ const WORD16 i4_ftaps[N_TAPS] = { -18, 0, 80, 132, 80, 0, -18 };
+ WORD32 i, j;
+ WORD32 tmp;
+ UWORD8 *pu1_src_tmp = pu1_src - 3 * src_strd;
+ UWORD8 *pu1_scrtch_tmp = pu1_scrtch;
+
+ /* horizontal filtering */
+ for(i = -3; i < ht + 2; i++)
+ {
+ for(j = 0; j < wd; j += 2)
+ {
+ tmp = (i4_ftaps[3] * pu1_src_tmp[j] +
+ i4_ftaps[2] * (pu1_src_tmp[j - 1] + pu1_src_tmp[j + 1]) +
+ i4_ftaps[1] * (pu1_src_tmp[j + 2] + pu1_src_tmp[j - 2]) +
+ i4_ftaps[0] * (pu1_src_tmp[j + 3] + pu1_src_tmp[j - 3]) +
+ (1 << (FILT_TAP_Q - 1))) >>
+ FILT_TAP_Q;
+ pu1_scrtch_tmp[j >> 1] = CLIP_U8(tmp);
+ }
+ pu1_scrtch_tmp += scrtch_strd;
+ pu1_src_tmp += src_strd;
+ }
+ /* vertical filtering */
+ pu1_scrtch_tmp = pu1_scrtch + 3 * scrtch_strd;
+ for(i = 0; i < ht; i += 2)
+ {
+ for(j = 0; j < (wd >> 1); j++)
+ {
+ tmp =
+ (i4_ftaps[3] * pu1_scrtch_tmp[j] +
+ i4_ftaps[2] * (pu1_scrtch_tmp[j + scrtch_strd] + pu1_scrtch_tmp[j - scrtch_strd]) +
+ i4_ftaps[1] *
+ (pu1_scrtch_tmp[j + 2 * scrtch_strd] + pu1_scrtch_tmp[j - 2 * scrtch_strd]) +
+ i4_ftaps[0] *
+ (pu1_scrtch_tmp[j + 3 * scrtch_strd] + pu1_scrtch_tmp[j - 3 * scrtch_strd]) +
+ (1 << (FILT_TAP_Q - 1))) >>
+ FILT_TAP_Q;
+ pu1_dst[j] = CLIP_U8(tmp);
+ }
+ pu1_dst += dst_strd;
+ pu1_scrtch_tmp += (scrtch_strd << 1);
+ }
+}
+
+void ihevce_scale_by_2_neon(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_dst,
+ WORD32 dst_strd,
+ WORD32 wd,
+ WORD32 ht,
+ UWORD8 *pu1_wkg_mem,
+ WORD32 ht_offset,
+ WORD32 block_ht,
+ WORD32 wd_offset,
+ WORD32 block_wd,
+ FT_COPY_2D *pf_copy_2d)
+{
+#define MAX_BLK_SZ (MAX_CTB_SIZE + ((N_TAPS >> 1) << 1))
+ UWORD8 au1_cpy[MAX_BLK_SZ * MAX_BLK_SZ];
+ UWORD32 cpy_strd = MAX_BLK_SZ;
+ UWORD8 *pu1_cpy = au1_cpy + cpy_strd * (N_TAPS >> 1) + (N_TAPS >> 1);
+
+ UWORD8 *pu1_in, *pu1_out;
+ WORD32 in_strd, wkg_mem_strd;
+
+ WORD32 row_start, row_end;
+ WORD32 col_start, col_end;
+ WORD32 i, fun_select;
+ WORD32 ht_tmp, wd_tmp;
+ FT_SCALING_FILTER_BY_2 *ihevce_scaling_filters[2];
+
+ assert((wd & 1) == 0);
+ assert((ht & 1) == 0);
+ assert(block_wd <= MAX_CTB_SIZE);
+ assert(block_ht <= MAX_CTB_SIZE);
+
+ /* function pointers for filtering different dimensions */
+ ihevce_scaling_filters[0] = ihevce_scaling_filter_mxn;
+ ihevce_scaling_filters[1] = ihevce_scaling_filter_mxn_neon;
+
+ /* handle boundary blks */
+ col_start = (wd_offset < (N_TAPS >> 1)) ? 1 : 0;
+ row_start = (ht_offset < (N_TAPS >> 1)) ? 1 : 0;
+ col_end = ((wd_offset + block_wd) > (wd - (N_TAPS >> 1))) ? 1 : 0;
+ row_end = ((ht_offset + block_ht) > (ht - (N_TAPS >> 1))) ? 1 : 0;
+ if(col_end && (wd % block_wd != 0))
+ {
+ block_wd = (wd % block_wd);
+ }
+ if(row_end && (ht % block_ht != 0))
+ {
+ block_ht = (ht % block_ht);
+ }
+
+ /* boundary blks needs to be padded, copy src to tmp buffer */
+ if(col_start || col_end || row_end || row_start)
+ {
+ UWORD8 *pu1_src_tmp = pu1_src + wd_offset + ht_offset * src_strd;
+
+ pu1_cpy -= (3 * (1 - col_start) + cpy_strd * 3 * (1 - row_start));
+ pu1_src_tmp -= (3 * (1 - col_start) + src_strd * 3 * (1 - row_start));
+ ht_tmp = block_ht + 3 * (1 - row_start) + 3 * (1 - row_end);
+ wd_tmp = block_wd + 3 * (1 - col_start) + 3 * (1 - col_end);
+ pf_copy_2d(pu1_cpy, cpy_strd, pu1_src_tmp, src_strd, wd_tmp, ht_tmp);
+ pu1_in = au1_cpy + cpy_strd * 3 + 3;
+ in_strd = cpy_strd;
+ }
+ else
+ {
+ pu1_in = pu1_src + wd_offset + ht_offset * src_strd;
+ in_strd = src_strd;
+ }
+
+ /*top padding*/
+ if(row_start)
+ {
+ UWORD8 *pu1_cpy_tmp = au1_cpy + cpy_strd * 3;
+
+ pu1_cpy = au1_cpy + cpy_strd * (3 - 1);
+ memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6);
+ pu1_cpy -= cpy_strd;
+ memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6);
+ pu1_cpy -= cpy_strd;
+ memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6);
+ }
+
+ /*bottom padding*/
+ if(row_end)
+ {
+ UWORD8 *pu1_cpy_tmp = au1_cpy + cpy_strd * 3 + (block_ht - 1) * cpy_strd;
+
+ pu1_cpy = pu1_cpy_tmp + cpy_strd;
+ memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6);
+ pu1_cpy += cpy_strd;
+ memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6);
+ pu1_cpy += cpy_strd;
+ memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6);
+ }
+
+ /*left padding*/
+ if(col_start)
+ {
+ UWORD8 *pu1_cpy_tmp = au1_cpy + 3;
+
+ pu1_cpy = au1_cpy;
+ for(i = 0; i < block_ht + 6; i++)
+ {
+ pu1_cpy[0] = pu1_cpy[1] = pu1_cpy[2] = pu1_cpy_tmp[0];
+ pu1_cpy += cpy_strd;
+ pu1_cpy_tmp += cpy_strd;
+ }
+ }
+
+ /*right padding*/
+ if(col_end)
+ {
+ UWORD8 *pu1_cpy_tmp = au1_cpy + 3 + block_wd - 1;
+
+ pu1_cpy = au1_cpy + 3 + block_wd;
+ for(i = 0; i < block_ht + 6; i++)
+ {
+ pu1_cpy[0] = pu1_cpy[1] = pu1_cpy[2] = pu1_cpy_tmp[0];
+ pu1_cpy += cpy_strd;
+ pu1_cpy_tmp += cpy_strd;
+ }
+ }
+
+ wkg_mem_strd = block_wd >> 1;
+ pu1_out = pu1_dst + (wd_offset >> 1);
+ fun_select = (block_wd % 16 == 0);
+ ihevce_scaling_filters[fun_select](
+ pu1_in, in_strd, pu1_wkg_mem, wkg_mem_strd, pu1_out, dst_strd, block_ht, block_wd);
+
+ /* Left padding of 16 for 1st block of every row */
+ if(wd_offset == 0)
+ {
+ UWORD8 u1_val;
+ WORD32 pad_wd = 16;
+ WORD32 pad_ht = block_ht >> 1;
+ UWORD8 *dst = pu1_dst;
+
+ for(i = 0; i < pad_ht; i++)
+ {
+ u1_val = dst[0];
+ memset(&dst[-pad_wd], u1_val, pad_wd);
+ dst += dst_strd;
+ }
+ }
+
+ if(wd == wd_offset + block_wd)
+ {
+ /* Right padding of (16 + (CEIL16(wd/2))-wd/2) for last block of every row */
+ /* Right padding is done only after processing of last block of that row is done*/
+ UWORD8 u1_val;
+ WORD32 pad_wd = 16 + CEIL16((wd >> 1)) - (wd >> 1) + 4;
+ WORD32 pad_ht = block_ht >> 1;
+ UWORD8 *dst = pu1_dst + (wd >> 1) - 1;
+
+ for(i = 0; i < pad_ht; i++)
+ {
+ u1_val = dst[0];
+ memset(&dst[1], u1_val, pad_wd);
+ dst += dst_strd;
+ }
+
+ if(ht_offset == 0)
+ {
+ /* Top padding of 16 is done for 1st row only after we reach end of that row */
+ WORD32 pad_wd = dst_strd;
+ WORD32 pad_ht = 16;
+ UWORD8 *dst = pu1_dst - 16;
+
+ for(i = 1; i <= pad_ht; i++)
+ {
+ memcpy(dst - (i * dst_strd), dst, pad_wd);
+ }
+ }
+
+ /* Bottom padding of (16 + (CEIL16(ht/2)) - ht/2) is done only if we have
+ reached end of frame */
+ if(ht - ht_offset - block_ht == 0)
+ {
+ WORD32 pad_wd = dst_strd;
+ WORD32 pad_ht = 16 + CEIL16((ht >> 1)) - (ht >> 1) + 4;
+ UWORD8 *dst = pu1_dst + (((block_ht >> 1) - 1) * dst_strd) - 16;
+
+ for(i = 1; i <= pad_ht; i++)
+ memcpy(dst + (i * dst_strd), dst, pad_wd);
+ }
+ }
+}
diff --git a/encoder/arm/ihevce_had_compute_neon.c b/encoder/arm/ihevce_had_compute_neon.c
new file mode 100644
index 0000000..fef3313
--- /dev/null
+++ b/encoder/arm/ihevce_had_compute_neon.c
@@ -0,0 +1,1183 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevce_had_compute_neon.c
+*
+* @brief
+* Contains intrinsic definitions of functions for computing had
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+********************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <string.h>
+#include <assert.h>
+#include <arm_neon.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevc_cmn_utils_neon.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+
+/*****************************************************************************/
+/* Globals */
+/*****************************************************************************/
+const int16_t gu2_dc_mask[8] = { 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff };
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+#define RESIDUE(k, is_chroma) \
+ if(!is_chroma) \
+ { \
+ const uint8x8_t s##k = vld1_u8(pu1_src); \
+ const uint8x8_t p##k = vld1_u8(pu1_pred); \
+ *r##k = vreinterpretq_s16_u16(vsubl_u8(s##k, p##k)); \
+ pu1_src += src_strd; \
+ pu1_pred += pred_strd; \
+ } \
+ else \
+ { \
+ const uint8x8_t s##k = vld2_u8(pu1_src).val[0]; \
+ const uint8x8_t p##k = vld2_u8(pu1_pred).val[0]; \
+ *r##k = vreinterpretq_s16_u16(vsubl_u8(s##k, p##k)); \
+ pu1_src += src_strd; \
+ pu1_pred += pred_strd; \
+ }
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+static INLINE void
+ hadamard4x4_2_one_pass(int16x8_t *r0, int16x8_t *r1, int16x8_t *r2, int16x8_t *r3)
+{
+ const int16x8_t a0 = vaddq_s16(*r0, *r2);
+ const int16x8_t a1 = vaddq_s16(*r1, *r3);
+ const int16x8_t a2 = vsubq_s16(*r0, *r2);
+ const int16x8_t a3 = vsubq_s16(*r1, *r3);
+
+ *r0 = vaddq_s16(a0, a1);
+ *r1 = vsubq_s16(a0, a1);
+ *r2 = vaddq_s16(a2, a3);
+ *r3 = vsubq_s16(a2, a3);
+}
+
+static INLINE void hadamard4x4_2(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ int16x8_t *r0,
+ int16x8_t *r1,
+ int16x8_t *r2,
+ int16x8_t *r3)
+{
+ // compute error between src and pred
+ RESIDUE(0, 0);
+ RESIDUE(1, 0);
+ RESIDUE(2, 0);
+ RESIDUE(3, 0);
+
+ // vertical hadamard tx
+ hadamard4x4_2_one_pass(r0, r1, r2, r3);
+
+ // transpose
+ transpose_s16_4x4q(r0, r1, r2, r3);
+
+ // horizontal hadamard tx
+ hadamard4x4_2_one_pass(r0, r1, r2, r3);
+}
+
+static INLINE void hadamard4x4_4(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ int16x8_t *r0,
+ int16x8_t *r1,
+ int16x8_t *r2,
+ int16x8_t *r3,
+ int16x8_t *r4,
+ int16x8_t *r5,
+ int16x8_t *r6,
+ int16x8_t *r7)
+{
+ // hadamard 4x4_2n
+ hadamard4x4_2(pu1_src, src_strd, pu1_pred, pred_strd, r0, r1, r2, r3);
+
+ // hadamard 4x4_2n
+ pu1_src += (4 * src_strd);
+ pu1_pred += (4 * pred_strd);
+ hadamard4x4_2(pu1_src, src_strd, pu1_pred, pred_strd, r4, r5, r6, r7);
+}
+
+static INLINE WORD32 hadamard_sad4x4_4(int16x8_t *a, WORD32 *pi4_hsad, WORD32 hsad_stride)
+{
+ int16x8_t p[8];
+ int32x4_t b01, b23;
+ int64x2_t c01, c23;
+ int32x2_t d01, d23;
+
+ // satd
+ p[0] = vabsq_s16(a[0]);
+ p[1] = vabsq_s16(a[1]);
+ p[0] = vaddq_s16(p[0], p[1]);
+ p[2] = vabsq_s16(a[2]);
+ p[3] = vabsq_s16(a[3]);
+ p[2] = vaddq_s16(p[2], p[3]);
+
+ p[4] = vabsq_s16(a[4]);
+ p[5] = vabsq_s16(a[5]);
+ p[4] = vaddq_s16(p[4], p[5]);
+ p[6] = vabsq_s16(a[6]);
+ p[7] = vabsq_s16(a[7]);
+ p[6] = vaddq_s16(p[6], p[7]);
+
+ p[0] = vaddq_s16(p[0], p[2]);
+ b01 = vpaddlq_s16(p[0]);
+ c01 = vpaddlq_s32(b01);
+ d01 = vrshrn_n_s64(c01, 2);
+ vst1_s32(pi4_hsad, d01);
+ pi4_hsad += hsad_stride;
+
+ p[4] = vaddq_s16(p[4], p[6]);
+ b23 = vpaddlq_s16(p[4]);
+ c23 = vpaddlq_s32(b23);
+ d23 = vrshrn_n_s64(c23, 2);
+ vst1_s32(pi4_hsad, d23);
+
+ d01 = vadd_s32(d01, d23);
+
+ return (WORD32)(vget_lane_s64(vpaddl_s32(d01), 0));
+}
+
+static INLINE WORD32 hadamard_sad8x8_using4x4(int16x8_t *a, WORD32 *early_cbf, WORD32 i4_frm_qstep)
+{
+ int16x8_t p[8];
+ const int16x8_t threshold = vdupq_n_s16((int16_t)(i4_frm_qstep >> 8));
+ int32x4_t b;
+ int64x2_t c;
+ int64_t satd;
+ WORD32 i;
+
+ for(i = 0; i < 4; i++)
+ {
+ int16x8_t p0 = vaddq_s16(a[i], a[i + 4]);
+ int16x8_t p1 = vsubq_s16(a[i], a[i + 4]);
+
+ int16x4_t q0 = vadd_s16(vget_low_s16(p0), vget_high_s16(p0));
+ int16x4_t q1 = vsub_s16(vget_low_s16(p0), vget_high_s16(p0));
+ int16x4_t q2 = vadd_s16(vget_low_s16(p1), vget_high_s16(p1));
+ int16x4_t q3 = vsub_s16(vget_low_s16(p1), vget_high_s16(p1));
+
+ a[i] = vcombine_s16(q0, q2);
+ a[i + 4] = vcombine_s16(q1, q3);
+ }
+
+#define EARLY_EXIT(k) \
+ { \
+ p[k] = vabsq_s16(a[k]); \
+ if(*early_cbf == 0) \
+ { \
+ uint16x8_t cmp; \
+ cmp = vcgtq_s16(p[k], threshold); \
+ if(vget_lane_s64(vreinterpret_s64_u16(vget_low_u16(cmp)), 0) || \
+ vget_lane_s64(vreinterpret_s64_u16(vget_high_u16(cmp)), 0)) \
+ { \
+ *early_cbf = 1; \
+ } \
+ } \
+ }
+ // satd
+ EARLY_EXIT(0);
+ EARLY_EXIT(1);
+ p[0] = vaddq_s16(p[0], p[1]);
+ EARLY_EXIT(2);
+ EARLY_EXIT(3);
+ p[2] = vaddq_s16(p[2], p[3]);
+
+ EARLY_EXIT(4);
+ EARLY_EXIT(5);
+ p[4] = vaddq_s16(p[4], p[5]);
+ EARLY_EXIT(6);
+ EARLY_EXIT(7);
+#undef EARLY_EXIT
+ p[6] = vaddq_s16(p[6], p[7]);
+
+ p[0] = vaddq_s16(p[0], p[2]);
+ p[4] = vaddq_s16(p[4], p[6]);
+ p[0] = vaddq_s16(p[0], p[4]);
+ b = vpaddlq_s16(p[0]);
+ c = vpaddlq_s32(b);
+ satd = vget_lane_s64(vadd_s64(vget_low_s64(c), vget_high_s64(c)), 0);
+
+ return ((satd + 4) >> 3);
+}
+
+static INLINE void hadamard8x8_one_pass(
+ int16x8_t *r0,
+ int16x8_t *r1,
+ int16x8_t *r2,
+ int16x8_t *r3,
+ int16x8_t *r4,
+ int16x8_t *r5,
+ int16x8_t *r6,
+ int16x8_t *r7)
+{
+ const int16x8_t a0 = vaddq_s16(*r0, *r4);
+ const int16x8_t a4 = vsubq_s16(*r0, *r4);
+ const int16x8_t a1 = vaddq_s16(*r1, *r5);
+ const int16x8_t a5 = vsubq_s16(*r1, *r5);
+ const int16x8_t a2 = vaddq_s16(*r2, *r6);
+ const int16x8_t a6 = vsubq_s16(*r2, *r6);
+ const int16x8_t a3 = vaddq_s16(*r3, *r7);
+ const int16x8_t a7 = vsubq_s16(*r3, *r7);
+
+ const int16x8_t b0 = vaddq_s16(a0, a2);
+ const int16x8_t b2 = vsubq_s16(a0, a2);
+ const int16x8_t b1 = vaddq_s16(a1, a3);
+ const int16x8_t b3 = vsubq_s16(a1, a3);
+ const int16x8_t b4 = vaddq_s16(a4, a6);
+ const int16x8_t b6 = vsubq_s16(a4, a6);
+ const int16x8_t b5 = vaddq_s16(a5, a7);
+ const int16x8_t b7 = vsubq_s16(a5, a7);
+
+ *r0 = vaddq_s16(b0, b1);
+ *r1 = vsubq_s16(b0, b1);
+ *r2 = vaddq_s16(b2, b3);
+ *r3 = vsubq_s16(b2, b3);
+ *r4 = vaddq_s16(b4, b5);
+ *r5 = vsubq_s16(b4, b5);
+ *r6 = vaddq_s16(b6, b7);
+ *r7 = vsubq_s16(b6, b7);
+}
+
+static INLINE void hadamard8x8(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ int16x8_t *r0,
+ int16x8_t *r1,
+ int16x8_t *r2,
+ int16x8_t *r3,
+ int16x8_t *r4,
+ int16x8_t *r5,
+ int16x8_t *r6,
+ int16x8_t *r7,
+ WORD32 is_chroma)
+{
+ // compute error between src and pred
+ RESIDUE(0, is_chroma);
+ RESIDUE(1, is_chroma);
+ RESIDUE(2, is_chroma);
+ RESIDUE(3, is_chroma);
+ RESIDUE(4, is_chroma);
+ RESIDUE(5, is_chroma);
+ RESIDUE(6, is_chroma);
+ RESIDUE(7, is_chroma);
+
+ // vertical hadamard tx
+ hadamard8x8_one_pass(r0, r1, r2, r3, r4, r5, r6, r7);
+
+ // transpose
+ transpose_s16_8x8(r0, r1, r2, r3, r4, r5, r6, r7);
+
+ // horizontal hadamard tx
+ hadamard8x8_one_pass(r0, r1, r2, r3, r4, r5, r6, r7);
+}
+
+static INLINE UWORD32 ihevce_HAD_8x8_8bit_plane_neon(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD32 is_chroma,
+ WORD32 ac_only)
+{
+ int16x8_t a0, a1, a2, a3, a4, a5, a6, a7;
+ int32x4_t b;
+ int64x2_t c;
+ int64_t satd;
+
+ // hadamard 8x8
+ hadamard8x8(
+ pu1_src, src_strd, pu1_pred, pred_strd, &a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7, is_chroma);
+
+ if(ac_only)
+ {
+ const int16x8_t mask = vld1q_s16(gu2_dc_mask);
+ a0 = vandq_s16(a0, mask);
+ }
+
+ // satd
+ a0 = vabsq_s16(a0);
+ a1 = vabsq_s16(a1);
+ a0 = vaddq_s16(a0, a1);
+ a2 = vabsq_s16(a2);
+ a3 = vabsq_s16(a3);
+ a2 = vaddq_s16(a2, a3);
+
+ a4 = vabsq_s16(a4);
+ a5 = vabsq_s16(a5);
+ a4 = vaddq_s16(a4, a5);
+ a6 = vabsq_s16(a6);
+ a7 = vabsq_s16(a7);
+ a6 = vaddq_s16(a6, a7);
+
+ a0 = vaddq_s16(a0, a2);
+ a4 = vaddq_s16(a4, a6);
+ a0 = vaddq_s16(a0, a4);
+ b = vpaddlq_s16(a0);
+ c = vpaddlq_s32(b);
+ satd = vget_lane_s64(vadd_s64(vget_low_s64(c), vget_high_s64(c)), 0);
+
+ return ((satd + 4) >> 3);
+}
+
+static INLINE UWORD32 ihevce_HAD_4x4_8bit_plane_neon(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD32 is_chroma,
+ WORD32 ac_only)
+{
+ uint8x16_t src_u8, pred_u8;
+ int16x8_t res_01, res_23;
+ int16x4_t h[4];
+ int16x4_t v[4];
+ int16x4x2_t trans_4[2];
+ int16x8_t combined_rows[4];
+ int32x4x2_t trans_8;
+ int32x4_t sad_32_4[3];
+ int32x2_t sad_32_2;
+ int64x1_t sad_64_1;
+ int32_t sad;
+
+ if(!is_chroma)
+ {
+ src_u8 = load_unaligned_u8q(pu1_src, src_strd);
+ pred_u8 = load_unaligned_u8q(pu1_pred, pred_strd);
+ }
+ else
+ {
+ src_u8 = load_unaligned_u8qi(pu1_src, src_strd);
+ pred_u8 = load_unaligned_u8qi(pu1_pred, pred_strd);
+ }
+ res_01 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(src_u8), vget_low_u8(pred_u8)));
+ res_23 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(src_u8), vget_high_u8(pred_u8)));
+
+ h[0] = vadd_s16(vget_low_s16(res_01), vget_high_s16(res_23));
+ h[1] = vadd_s16(vget_high_s16(res_01), vget_low_s16(res_23));
+ h[2] = vsub_s16(vget_high_s16(res_01), vget_low_s16(res_23));
+ h[3] = vsub_s16(vget_low_s16(res_01), vget_high_s16(res_23));
+
+ v[0] = vadd_s16(h[0], h[1]);
+ v[1] = vadd_s16(h[3], h[2]);
+ v[2] = vsub_s16(h[0], h[1]);
+ v[3] = vsub_s16(h[3], h[2]);
+
+ trans_4[0] = vtrn_s16(v[0], v[2]);
+ trans_4[1] = vtrn_s16(v[1], v[3]);
+
+ combined_rows[0] = vcombine_s16(trans_4[0].val[0], trans_4[1].val[0]);
+ combined_rows[1] = vcombine_s16(trans_4[0].val[1], trans_4[1].val[1]);
+
+ combined_rows[2] = vaddq_s16(combined_rows[0], combined_rows[1]);
+ combined_rows[3] = vsubq_s16(combined_rows[0], combined_rows[1]);
+
+ trans_8 =
+ vtrnq_s32(vreinterpretq_s32_s16(combined_rows[2]), vreinterpretq_s32_s16(combined_rows[3]));
+
+ combined_rows[0] =
+ vaddq_s16(vreinterpretq_s16_s32(trans_8.val[0]), vreinterpretq_s16_s32(trans_8.val[1]));
+ combined_rows[0] = vabsq_s16(combined_rows[0]);
+ combined_rows[1] =
+ vsubq_s16(vreinterpretq_s16_s32(trans_8.val[0]), vreinterpretq_s16_s32(trans_8.val[1]));
+ combined_rows[1] = vabsq_s16(combined_rows[1]);
+
+ if(ac_only)
+ {
+ const int16x8_t mask = vld1q_s16(gu2_dc_mask);
+ combined_rows[0] = vandq_s16(combined_rows[0], mask);
+ }
+
+ sad_32_4[0] = vpaddlq_s16(combined_rows[0]);
+ sad_32_4[1] = vpaddlq_s16(combined_rows[1]);
+ sad_32_4[2] = vaddq_s32(sad_32_4[0], sad_32_4[1]);
+ sad_32_2 = vadd_s32(vget_high_s32(sad_32_4[2]), vget_low_s32(sad_32_4[2]));
+ sad_64_1 = vpaddl_s32(sad_32_2);
+ sad = vget_lane_s64(sad_64_1, 0);
+
+ return ((sad + 2) >> 2);
+}
+
+UWORD32 ihevce_HAD_4x4_8bit_neon(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd)
+{
+ (void)pi2_dst;
+ (void)dst_strd;
+ return ihevce_HAD_4x4_8bit_plane_neon(pu1_src, src_strd, pu1_pred, pred_strd, 0, 0);
+}
+
+UWORD32 ihevce_chroma_compute_AC_HAD_4x4_8bit_neon(
+ UWORD8 *pu1_origin,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred_buf,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd)
+{
+ (void)pi2_dst;
+ (void)dst_strd;
+ return ihevce_HAD_4x4_8bit_plane_neon(pu1_origin, src_strd, pu1_pred_buf, pred_strd, 1, 1);
+}
+
+UWORD32 ihevce_HAD_8x8_8bit_neon(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd)
+{
+ (void)pi2_dst;
+ (void)dst_strd;
+ return ihevce_HAD_8x8_8bit_plane_neon(pu1_src, src_strd, pu1_pred, pred_strd, 0, 0);
+}
+
+UWORD32 ihevce_compute_ac_had_8x8_8bit_neon(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd)
+{
+ (void)pi2_dst;
+ (void)dst_strd;
+ return ihevce_HAD_8x8_8bit_plane_neon(pu1_src, src_strd, pu1_pred, pred_strd, 0, 1);
+}
+
+UWORD32 ihevce_HAD_16x16_8bit_neon(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd)
+{
+ int16x8_t b0[8];
+ int16x8_t b1[8];
+ int16x8_t b2[8];
+ int16x8_t b3[8];
+ uint32x4_t sum = vdupq_n_u32(0);
+ uint64x2_t c;
+ uint64_t satd;
+ WORD32 i;
+
+ (void)pi2_dst;
+ (void)dst_strd;
+
+ // hadamard 8x8 - b0
+ hadamard8x8(
+ pu1_src,
+ src_strd,
+ pu1_pred,
+ pred_strd,
+ &b0[0],
+ &b0[1],
+ &b0[2],
+ &b0[3],
+ &b0[4],
+ &b0[5],
+ &b0[6],
+ &b0[7],
+ 0);
+ // hadamard 8x8 - b1
+ hadamard8x8(
+ pu1_src + 8,
+ src_strd,
+ pu1_pred + 8,
+ pred_strd,
+ &b1[0],
+ &b1[1],
+ &b1[2],
+ &b1[3],
+ &b1[4],
+ &b1[5],
+ &b1[6],
+ &b1[7],
+ 0);
+ // hadamard 8x8 - b2
+ hadamard8x8(
+ pu1_src + (8 * src_strd),
+ src_strd,
+ pu1_pred + (8 * pred_strd),
+ pred_strd,
+ &b2[0],
+ &b2[1],
+ &b2[2],
+ &b2[3],
+ &b2[4],
+ &b2[5],
+ &b2[6],
+ &b2[7],
+ 0);
+ // hadamard 8x8 - b3
+ hadamard8x8(
+ pu1_src + (8 * src_strd) + 8,
+ src_strd,
+ pu1_pred + (8 * pred_strd) + 8,
+ pred_strd,
+ &b3[0],
+ &b3[1],
+ &b3[2],
+ &b3[3],
+ &b3[4],
+ &b3[5],
+ &b3[6],
+ &b3[7],
+ 0);
+
+ for(i = 0; i < 8; i++)
+ {
+ int16x8_t p0 = vhaddq_s16(b0[i], b1[i]);
+ int16x8_t p1 = vhsubq_s16(b0[i], b1[i]);
+ int16x8_t p2 = vhaddq_s16(b2[i], b3[i]);
+ int16x8_t p3 = vhsubq_s16(b2[i], b3[i]);
+
+ int16x8_t q0 = vaddq_s16(p0, p2);
+ int16x8_t q1 = vsubq_s16(p0, p2);
+ int16x8_t q2 = vaddq_s16(p1, p3);
+ int16x8_t q3 = vsubq_s16(p1, p3);
+
+ uint16x8_t r0 =
+ vaddq_u16(vreinterpretq_u16_s16(vabsq_s16(q0)), vreinterpretq_u16_s16(vabsq_s16(q1)));
+ uint16x8_t r1 =
+ vaddq_u16(vreinterpretq_u16_s16(vabsq_s16(q2)), vreinterpretq_u16_s16(vabsq_s16(q3)));
+
+ uint32x4_t s0 = vaddl_u16(vget_low_u16(r0), vget_high_u16(r0));
+ uint32x4_t s1 = vaddl_u16(vget_low_u16(r1), vget_high_u16(r1));
+
+ sum = vaddq_u32(sum, s0);
+ sum = vaddq_u32(sum, s1);
+ }
+
+ c = vpaddlq_u32(sum);
+ satd = vget_lane_u64(vadd_u64(vget_low_u64(c), vget_high_u64(c)), 0);
+
+ return ((satd + 4) >> 3);
+}
+
+UWORD32 ihevce_chroma_HAD_4x4_8bit_neon(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd)
+{
+ (void)pi2_dst;
+ (void)dst_strd;
+ return ihevce_HAD_4x4_8bit_plane_neon(pu1_src, src_strd, pu1_pred, pred_strd, 1, 0);
+}
+
+UWORD32 ihevce_chroma_HAD_8x8_8bit_neon(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd)
+{
+ (void)pi2_dst;
+ (void)dst_strd;
+ return ihevce_HAD_8x8_8bit_plane_neon(pu1_src, src_strd, pu1_pred, pred_strd, 1, 0);
+}
+
+UWORD32 ihevce_chroma_HAD_16x16_8bit_neon(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd)
+{
+ UWORD32 au4_satd[4];
+
+ (void)pi2_dst;
+ (void)dst_strd;
+ au4_satd[0] = ihevce_HAD_8x8_8bit_plane_neon(pu1_src, src_strd, pu1_pred, pred_strd, 1, 0);
+ au4_satd[1] =
+ ihevce_HAD_8x8_8bit_plane_neon(pu1_src + 16, src_strd, pu1_pred + 16, pred_strd, 1, 0);
+ au4_satd[2] = ihevce_HAD_8x8_8bit_plane_neon(
+ pu1_src + 8 * src_strd, src_strd, pu1_pred + 8 * pred_strd, pred_strd, 1, 0);
+ au4_satd[3] = ihevce_HAD_8x8_8bit_plane_neon(
+ pu1_src + 8 * src_strd + 16, src_strd, pu1_pred + 8 * pred_strd + 16, pred_strd, 1, 0);
+
+ return au4_satd[0] + au4_satd[1] + au4_satd[2] + au4_satd[3];
+}
+
+UWORD32 ihevce_HAD_32x32_8bit_neon(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd)
+{
+ int16x8_t a[4][4][8];
+ uint32x4_t sum = vdupq_n_u32(0);
+ WORD32 b8, b16;
+ uint64x2_t c;
+ uint64_t satd;
+ WORD32 i, j;
+
+ (void)pi2_dst;
+ (void)dst_strd;
+ // hadamard 32x32
+ for(b16 = 0; b16 < 4; b16++)
+ {
+ UWORD8 *pu1_src_b16 = pu1_src + (b16 >> 1) * (src_strd * 16) + ((b16 & 1) * 16);
+ UWORD8 *pu1_pred_b16 = pu1_pred + (b16 >> 1) * (pred_strd * 16) + ((b16 & 1) * 16);
+ // hadamard 16x16
+ for(b8 = 0; b8 < 4; b8++)
+ {
+ UWORD8 *pu1_src_b8 = pu1_src_b16 + (b8 >> 1) * (src_strd * 8) + ((b8 & 1) * 8);
+ UWORD8 *pu1_pred_b8 = pu1_pred_b16 + (b8 >> 1) * (pred_strd * 8) + ((b8 & 1) * 8);
+ // hadamard 8x8
+ hadamard8x8(
+ pu1_src_b8,
+ src_strd,
+ pu1_pred_b8,
+ pred_strd,
+ &a[b16][b8][0],
+ &a[b16][b8][1],
+ &a[b16][b8][2],
+ &a[b16][b8][3],
+ &a[b16][b8][4],
+ &a[b16][b8][5],
+ &a[b16][b8][6],
+ &a[b16][b8][7],
+ 0);
+ }
+ for(i = 0; i < 8; i++)
+ {
+ int16x8_t p0 = vhaddq_s16(a[b16][0][i], a[b16][1][i]);
+ int16x8_t p1 = vhsubq_s16(a[b16][0][i], a[b16][1][i]);
+ int16x8_t p2 = vhaddq_s16(a[b16][2][i], a[b16][3][i]);
+ int16x8_t p3 = vhsubq_s16(a[b16][2][i], a[b16][3][i]);
+
+ a[b16][0][i] = vaddq_s16(p0, p2);
+ a[b16][1][i] = vsubq_s16(p0, p2);
+ a[b16][2][i] = vaddq_s16(p1, p3);
+ a[b16][3][i] = vsubq_s16(p1, p3);
+
+ a[b16][0][i] = vshrq_n_s16(a[b16][0][i], 2);
+ a[b16][1][i] = vshrq_n_s16(a[b16][1][i], 2);
+ a[b16][2][i] = vshrq_n_s16(a[b16][2][i], 2);
+ a[b16][3][i] = vshrq_n_s16(a[b16][3][i], 2);
+ }
+ }
+ for(j = 0; j < 4; j++)
+ {
+ for(i = 0; i < 8; i++)
+ {
+ int16x8_t p0 = vaddq_s16(a[0][j][i], a[1][j][i]);
+ int16x8_t p1 = vsubq_s16(a[0][j][i], a[1][j][i]);
+ int16x8_t p2 = vaddq_s16(a[2][j][i], a[3][j][i]);
+ int16x8_t p3 = vsubq_s16(a[2][j][i], a[3][j][i]);
+
+ int16x8_t q0 = vaddq_s16(p0, p2);
+ int16x8_t q1 = vsubq_s16(p0, p2);
+ int16x8_t q2 = vaddq_s16(p1, p3);
+ int16x8_t q3 = vsubq_s16(p1, p3);
+
+ uint16x8_t r0 = vaddq_u16(
+ vreinterpretq_u16_s16(vabsq_s16(q0)), vreinterpretq_u16_s16(vabsq_s16(q1)));
+ uint16x8_t r1 = vaddq_u16(
+ vreinterpretq_u16_s16(vabsq_s16(q2)), vreinterpretq_u16_s16(vabsq_s16(q3)));
+
+ uint32x4_t s0 = vaddl_u16(vget_low_u16(r0), vget_high_u16(r0));
+ uint32x4_t s1 = vaddl_u16(vget_low_u16(r1), vget_high_u16(r1));
+
+ sum = vaddq_u32(sum, s0);
+ sum = vaddq_u32(sum, s1);
+ }
+ }
+ c = vpaddlq_u32(sum);
+ satd = vget_lane_u64(vadd_u64(vget_low_u64(c), vget_high_u64(c)), 0);
+
+ return ((satd + 2) >> 2);
+}
+
+WORD32 ihevce_had4_4x4_neon(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst4x4,
+ WORD32 dst_strd,
+ WORD32 *pi4_hsad,
+ WORD32 hsad_stride,
+ WORD32 i4_frm_qstep)
+{
+ int16x8_t a[8];
+
+ (void)pi2_dst4x4;
+ (void)dst_strd;
+ (void)i4_frm_qstep;
+
+ /* -------- Compute four 4x4 HAD Transforms of 8x8 in one call--------- */
+ hadamard4x4_4(
+ pu1_src,
+ src_strd,
+ pu1_pred,
+ pred_strd,
+ &a[0],
+ &a[1],
+ &a[2],
+ &a[3],
+ &a[4],
+ &a[5],
+ &a[6],
+ &a[7]);
+
+ return hadamard_sad4x4_4(a, pi4_hsad, hsad_stride);
+}
+
+WORD32 ihevce_had_8x8_using_4_4x4_r_neon(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 **ppi4_hsad,
+ WORD32 **ppi4_tu_split,
+ WORD32 **ppi4_tu_early_cbf,
+ WORD32 pos_x_y_4x4,
+ WORD32 num_4x4_in_row,
+ WORD32 lambda,
+ WORD32 lambda_q_shift,
+ WORD32 i4_frm_qstep,
+ WORD32 i4_cur_depth,
+ WORD32 i4_max_depth,
+ WORD32 i4_max_tr_size,
+ WORD32 *pi4_tu_split_cost,
+ void *pv_func_sel)
+{
+ WORD32 pos_x = pos_x_y_4x4 & 0xFFFF;
+ WORD32 pos_y = (pos_x_y_4x4 >> 16) & 0xFFFF;
+
+ WORD32 *pi4_4x4_hsad;
+ WORD32 *pi4_8x8_hsad;
+ WORD32 *pi4_8x8_tu_split;
+ WORD32 *pi4_8x8_tu_early_cbf;
+
+ WORD32 cost_child, cost_parent;
+ WORD32 best_cost;
+ WORD32 early_cbf = 0;
+ const UWORD8 u1_cur_tr_size = 8;
+
+ WORD32 i;
+
+ int16x8_t a[8];
+
+ (void)pv_func_sel;
+
+ assert(pos_x >= 0);
+ assert(pos_y >= 0);
+
+ /* Initialize pointers to store 4x4 and 8x8 HAD SATDs */
+ pi4_4x4_hsad = ppi4_hsad[HAD_4x4] + pos_x + pos_y * num_4x4_in_row;
+ pi4_8x8_hsad = ppi4_hsad[HAD_8x8] + (pos_x >> 1) + (pos_y >> 1) * (num_4x4_in_row >> 1);
+ pi4_8x8_tu_split = ppi4_tu_split[HAD_8x8] + (pos_x >> 1) + (pos_y >> 1) * (num_4x4_in_row >> 1);
+ pi4_8x8_tu_early_cbf =
+ ppi4_tu_early_cbf[HAD_8x8] + (pos_x >> 1) + (pos_y >> 1) * (num_4x4_in_row >> 1);
+
+ /* -------- Compute four 4x4 HAD Transforms of 8x8 in one call--------- */
+ hadamard4x4_4(
+ pu1_src,
+ src_strd,
+ pu1_pred,
+ pred_strd,
+ &a[0],
+ &a[1],
+ &a[2],
+ &a[3],
+ &a[4],
+ &a[5],
+ &a[6],
+ &a[7]);
+
+ /* -------- cost child -------- */
+ cost_child = hadamard_sad4x4_4(a, pi4_4x4_hsad, num_4x4_in_row);
+ /* 4 CBF Flags, extra 1 becoz of the 0.5 bits per bin is assumed */
+ cost_child += ((4) * lambda) >> (lambda_q_shift + 1);
+
+ /* -------- cost parent -------- */
+ cost_parent = hadamard_sad8x8_using4x4(a, &early_cbf, i4_frm_qstep);
+ for(i = 0; i < 8; i++, pi2_dst += dst_strd)
+ vst1q_s16(pi2_dst, a[i]);
+
+ if(i4_cur_depth < i4_max_depth)
+ {
+ if((cost_child < cost_parent) || (i4_max_tr_size < u1_cur_tr_size))
+ {
+ *pi4_tu_split_cost += (4 * lambda) >> (lambda_q_shift + 1);
+ best_cost = cost_child;
+ best_cost <<= 1;
+ best_cost++;
+ pi4_8x8_tu_split[0] = 1;
+ pi4_8x8_hsad[0] = cost_child;
+ }
+ else
+ {
+ best_cost = cost_parent;
+ best_cost <<= 1;
+ pi4_8x8_tu_split[0] = 0;
+ pi4_8x8_hsad[0] = cost_parent;
+ }
+ }
+ else
+ {
+ best_cost = cost_parent;
+ best_cost <<= 1;
+ pi4_8x8_tu_split[0] = 0;
+ pi4_8x8_hsad[0] = cost_parent;
+ }
+
+ pi4_8x8_tu_early_cbf[0] = early_cbf;
+
+ /* best cost has tu_split_flag at LSB(Least significant bit) */
+ return ((best_cost << 1) + early_cbf);
+}
+
+static WORD32 ihevce_compute_16x16HAD_using_8x8_neon(
+ WORD16 *pi2_8x8_had,
+ WORD32 had8_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 i4_frm_qstep,
+ WORD32 *pi4_cbf)
+{
+ int16x8_t b0[8];
+ int16x8_t b1[8];
+ int16x8_t b2[8];
+ int16x8_t b3[8];
+ const int16x8_t threshold = vdupq_n_s16((int16_t)(i4_frm_qstep >> 8));
+ uint32x4_t sum = vdupq_n_u32(0);
+ uint64x2_t c;
+ uint64_t satd;
+ WORD32 i;
+
+ for(i = 0; i < 8; i++, pi2_8x8_had += had8_strd)
+ {
+ b0[i] = vld1q_s16(pi2_8x8_had);
+ b1[i] = vld1q_s16(pi2_8x8_had + 8);
+ }
+ for(i = 0; i < 8; i++, pi2_8x8_had += had8_strd)
+ {
+ b2[i] = vld1q_s16(pi2_8x8_had);
+ b3[i] = vld1q_s16(pi2_8x8_had + 8);
+ }
+
+#define EARLY_EXIT(k) \
+ { \
+ p##k = vabsq_s16(q##k); \
+ if(*pi4_cbf == 0) \
+ { \
+ uint16x8_t cmp; \
+ cmp = vcgtq_s16(p##k, threshold); \
+ if(vget_lane_s64(vreinterpret_s64_u16(vget_low_u16(cmp)), 0) || \
+ vget_lane_s64(vreinterpret_s64_u16(vget_high_u16(cmp)), 0)) \
+ { \
+ *pi4_cbf = 1; \
+ } \
+ } \
+ }
+ for(i = 0; i < 8; i++, pi2_dst += dst_strd)
+ {
+ int16x8_t p0 = vhaddq_s16(b0[i], b1[i]);
+ int16x8_t p1 = vhsubq_s16(b0[i], b1[i]);
+ int16x8_t p2 = vhaddq_s16(b2[i], b3[i]);
+ int16x8_t p3 = vhsubq_s16(b2[i], b3[i]);
+
+ int16x8_t q0 = vaddq_s16(p0, p2);
+ int16x8_t q1 = vsubq_s16(p0, p2);
+ int16x8_t q2 = vaddq_s16(p1, p3);
+ int16x8_t q3 = vsubq_s16(p1, p3);
+
+ vst1q_s16(pi2_dst, q0);
+ EARLY_EXIT(0);
+ vst1q_s16(pi2_dst + 8, q1);
+ EARLY_EXIT(1);
+ vst1q_s16(pi2_dst + 8 * dst_strd, q2);
+ EARLY_EXIT(2);
+ vst1q_s16(pi2_dst + 8 * dst_strd + 8, q3);
+ EARLY_EXIT(3);
+ uint16x8_t r0 = vaddq_u16(vreinterpretq_u16_s16(p0), vreinterpretq_u16_s16(p1));
+ uint16x8_t r1 = vaddq_u16(vreinterpretq_u16_s16(p2), vreinterpretq_u16_s16(p3));
+
+ uint32x4_t s0 = vaddl_u16(vget_low_u16(r0), vget_high_u16(r0));
+ uint32x4_t s1 = vaddl_u16(vget_low_u16(r1), vget_high_u16(r1));
+
+ sum = vaddq_u32(sum, s0);
+ sum = vaddq_u32(sum, s1);
+ }
+
+ c = vpaddlq_u32(sum);
+ satd = vget_lane_u64(vadd_u64(vget_low_u64(c), vget_high_u64(c)), 0);
+
+ return ((satd + 4) >> 3);
+}
+
+WORD32 ihevce_had_16x16_r_neon(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 **ppi4_hsad,
+ WORD32 **ppi4_tu_split,
+ WORD32 **ppi4_tu_early_cbf,
+ WORD32 pos_x_y_4x4,
+ WORD32 num_4x4_in_row,
+ WORD32 lambda,
+ WORD32 lambda_q_shift,
+ WORD32 i4_frm_qstep,
+ WORD32 i4_cur_depth,
+ WORD32 i4_max_depth,
+ WORD32 i4_max_tr_size,
+ WORD32 *pi4_tu_split_cost,
+ void *pv_func_sel)
+{
+ WORD16 ai2_8x8_had[256];
+
+ WORD32 *pi4_16x16_hsad;
+ WORD32 *pi4_16x16_tu_split;
+ WORD32 *pi4_16x16_tu_early_cbf;
+
+ WORD32 best_cost, best_cost_tu_split;
+ WORD32 tu_split_flag = 0;
+ WORD32 i4_early_cbf_flag = 0, early_cbf = 0;
+ WORD32 cost_parent, cost_child = 0;
+
+ const UWORD8 u1_cur_tr_size = 16;
+
+ WORD32 i;
+
+ WORD16 *pi2_y0;
+ UWORD8 *src, *pred;
+ WORD32 pos_x_y_4x4_0;
+
+ WORD32 pos_x = pos_x_y_4x4 & 0xFFFF;
+ WORD32 pos_y = (pos_x_y_4x4 >> 16) & 0xFFFF;
+
+ assert(pos_x >= 0);
+ assert(pos_y >= 0);
+
+ /* Initialize pointers to store 16x16 SATDs */
+ pi4_16x16_hsad = ppi4_hsad[HAD_16x16] + (pos_x >> 2) + (pos_y >> 2) * (num_4x4_in_row >> 2);
+
+ pi4_16x16_tu_split =
+ ppi4_tu_split[HAD_16x16] + (pos_x >> 2) + (pos_y >> 2) * (num_4x4_in_row >> 2);
+
+ pi4_16x16_tu_early_cbf =
+ ppi4_tu_early_cbf[HAD_16x16] + (pos_x >> 2) + (pos_y >> 2) * (num_4x4_in_row >> 2);
+
+ /* -------- Compute four 8x8 HAD Transforms of 16x16 call--------- */
+ for(i = 0; i < 4; i++)
+ {
+ src = pu1_src + (i & 0x01) * 8 + (i >> 1) * src_strd * 8;
+ pred = pu1_pred + (i & 0x01) * 8 + (i >> 1) * pred_strd * 8;
+ pi2_y0 = ai2_8x8_had + (i & 0x01) * 8 + (i >> 1) * 16 * 8;
+ pos_x_y_4x4_0 = pos_x_y_4x4 + (i & 0x01) * 2 + (i >> 1) * (2 << 16);
+
+ best_cost_tu_split = ihevce_had_8x8_using_4_4x4_r_neon(
+ src,
+ src_strd,
+ pred,
+ pred_strd,
+ pi2_y0,
+ 16,
+ ppi4_hsad,
+ ppi4_tu_split,
+ ppi4_tu_early_cbf,
+ pos_x_y_4x4_0,
+ num_4x4_in_row,
+ lambda,
+ lambda_q_shift,
+ i4_frm_qstep,
+ i4_cur_depth + 1,
+ i4_max_depth,
+ i4_max_tr_size,
+ pi4_tu_split_cost,
+ pv_func_sel);
+
+ /* Cost is shifted by two bits for Tu_split_flag and early cbf flag */
+ best_cost = (best_cost_tu_split >> 2);
+
+ /* Last but one bit stores the information regarding the TU_Split */
+ tu_split_flag += (best_cost_tu_split & 0x3) >> 1;
+
+ /* Last bit stores the information regarding the early_cbf */
+ i4_early_cbf_flag += (best_cost_tu_split & 0x1);
+
+ cost_child += best_cost;
+
+ tu_split_flag <<= 1;
+ i4_early_cbf_flag <<= 1;
+ }
+
+ /* -------- Compute 16x16 HAD Transform using 8x8 results ------------- */
+ pi2_y0 = ai2_8x8_had;
+
+ /* Threshold currently passed as "0" */
+ cost_parent = ihevce_compute_16x16HAD_using_8x8_neon(
+ pi2_y0, 16, pi2_dst, dst_strd, i4_frm_qstep, &early_cbf);
+
+ /* 4 TU_Split flags , 4 CBF Flags, extra 1 becoz of the 0.5 bits per bin is assumed */
+ cost_child += ((4 + 4) * lambda) >> (lambda_q_shift + 1);
+
+ i4_early_cbf_flag += early_cbf;
+
+ /* Right now the depth is hard-coded to 4: The depth can be modified from the config file
+ which decides the extent to which TU_REC needs to be done */
+ if(i4_cur_depth < i4_max_depth)
+ {
+ if((cost_child < cost_parent) || (i4_max_tr_size < u1_cur_tr_size))
+ {
+ *pi4_tu_split_cost += ((4 + 4) * lambda) >> (lambda_q_shift + 1);
+ tu_split_flag += 1;
+ best_cost = cost_child;
+ }
+ else
+ {
+ tu_split_flag += 0;
+ best_cost = cost_parent;
+ }
+ }
+ else
+ {
+ tu_split_flag += 0;
+ best_cost = cost_parent;
+ }
+
+ pi4_16x16_hsad[0] = best_cost;
+ pi4_16x16_tu_split[0] = tu_split_flag;
+ pi4_16x16_tu_early_cbf[0] = i4_early_cbf_flag;
+
+ /*returning two values(best cost & tu_split_flag) as a single value*/
+ return ((best_cost << 10) + (tu_split_flag << 5) + i4_early_cbf_flag);
+}
+
+UWORD32 ihevce_compute_32x32HAD_using_16x16_neon(
+ WORD16 *pi2_16x16_had,
+ WORD32 had16_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 i4_frm_qstep,
+ WORD32 *pi4_cbf)
+{
+ int16x8_t a[4][4][8];
+ uint32x4_t sum = vdupq_n_u32(0);
+ const int16x8_t threshold = vdupq_n_s16((int16_t)(i4_frm_qstep >> 8));
+ WORD32 b8, b16;
+ uint64x2_t c;
+ WORD32 i, j;
+
+ (void)pi2_dst;
+ (void)dst_strd;
+
+ for(b16 = 0; b16 < 4; b16++)
+ {
+ WORD16 *pi2_b16 = pi2_16x16_had + (b16 >> 1) * (had16_strd * 16) + ((b16 & 1) * 16);
+
+ for(b8 = 0; b8 < 4; b8++)
+ {
+ WORD16 *pi2_b8 = pi2_b16 + (b8 >> 1) * (had16_strd * 8) + ((b8 & 1) * 8);
+
+ for(i = 0; i < 8; i++, pi2_b8 += had16_strd)
+ {
+ a[b16][b8][i] = vld1q_s16(pi2_b8);
+ a[b16][b8][i] = vshrq_n_s16(a[b16][b8][i], 2);
+ }
+ }
+ }
+
+ for(j = 0; j < 4; j++)
+ {
+ for(i = 0; i < 8; i++)
+ {
+ int16x8_t p0 = vaddq_s16(a[0][j][i], a[1][j][i]);
+ int16x8_t p1 = vsubq_s16(a[0][j][i], a[1][j][i]);
+ int16x8_t p2 = vaddq_s16(a[2][j][i], a[3][j][i]);
+ int16x8_t p3 = vsubq_s16(a[2][j][i], a[3][j][i]);
+
+ int16x8_t q0 = vaddq_s16(p0, p2);
+ int16x8_t q1 = vsubq_s16(p0, p2);
+ int16x8_t q2 = vaddq_s16(p1, p3);
+ int16x8_t q3 = vsubq_s16(p1, p3);
+
+ EARLY_EXIT(0);
+ EARLY_EXIT(1);
+ EARLY_EXIT(2);
+ EARLY_EXIT(3);
+
+ uint16x8_t r0 = vaddq_u16(vreinterpretq_u16_s16(p0), vreinterpretq_u16_s16(p1));
+ uint16x8_t r1 = vaddq_u16(vreinterpretq_u16_s16(p2), vreinterpretq_u16_s16(p3));
+
+ uint32x4_t s0 = vaddl_u16(vget_low_u16(r0), vget_high_u16(r0));
+ uint32x4_t s1 = vaddl_u16(vget_low_u16(r1), vget_high_u16(r1));
+
+ sum = vaddq_u32(sum, s0);
+ sum = vaddq_u32(sum, s1);
+ }
+ }
+ c = vpaddlq_u32(sum);
+
+ return vget_lane_u64(vadd_u64(vget_low_u64(c), vget_high_u64(c)), 0);
+}
diff --git a/encoder/arm/ihevce_hme_utils_neon.c b/encoder/arm/ihevce_hme_utils_neon.c
new file mode 100644
index 0000000..28a6d3b
--- /dev/null
+++ b/encoder/arm/ihevce_hme_utils_neon.c
@@ -0,0 +1,792 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevce_hme_utils_neon.c
+*
+* @brief
+* Contains function definitions for hme utils function in neon intrinsic
+*
+*
+* @author
+* ittian
+*
+* @par List of Functions:
+* - ihevce_get_wt_inp_8x8_neon()
+* - ihevce_get_wt_inp_ctb_neon()
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <arm_neon.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevc_cmn_utils_neon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_debug.h"
+#include "ihevc_deblk.h"
+#include "ihevc_defs.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_macros.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_sao.h"
+#include "ihevc_structs.h"
+#include "ihevc_weighted_pred.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevce_api.h"
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_ipe_instr_set_router.h"
+#include "ihevce_global_tables.h"
+
+#include "hme_datatype.h"
+#include "hme_interface.h"
+#include "hme_common_defs.h"
+#include "hme_defs.h"
+#include "ihevce_me_instr_set_router.h"
+#include "hme_globals.h"
+#include "hme_utils.h"
+#include "hme_coarse.h"
+#include "hme_refine.h"
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define IHEVCE_WT_PRED_SHIFT 15
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+static INLINE void ihevce_get_wt_inp_4x8_neon(
+ const UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ WORD32 u1_num_valid_refs,
+ WORD32 *ai4_wt_refs,
+ WORD32 src_stride,
+ WORD32 dst_stride)
+{
+ S32 inv_wt;
+ S16 off;
+ uint8x8_t src0_8x8b, src1_8x8b, src2_8x8b, src3_8x8b;
+ int16x8_t src0_8x16b, src1_8x16b, src2_8x16b, src3_8x16b;
+ int16x8_t src4_8x16b, src5_8x16b, src6_8x16b, src7_8x16b, off_8x16b;
+ int32x4_t dst0_4x32b, dst1_4x32b, dst2_4x32b, dst3_4x32b;
+ int32x4_t dst4_4x32b, dst5_4x32b, dst6_4x32b, dst7_4x32b;
+ int32x4_t add_4x32b, inv_wt_4x32b;
+ U08 ref;
+ int32x4_t log_wdc = vdupq_n_s32(ps_wt_inp_prms->wpred_log_wdc);
+
+ src0_8x8b = vld1_u8((pu1_src + 0 * src_stride));
+ src1_8x8b = vld1_u8((pu1_src + 1 * src_stride));
+ src2_8x8b = vld1_u8((pu1_src + 2 * src_stride));
+ src3_8x8b = vld1_u8((pu1_src + 3 * src_stride));
+ /* Store */
+ vst1_u8((pu1_dst + 0 * dst_stride), src0_8x8b);
+ vst1_u8((pu1_dst + 1 * dst_stride), src1_8x8b);
+ vst1_u8((pu1_dst + 2 * dst_stride), src2_8x8b);
+ vst1_u8((pu1_dst + 3 * dst_stride), src3_8x8b);
+
+ if(u1_num_valid_refs)
+ {
+ /* Right 4x4 Block */
+ src0_8x16b = vreinterpretq_s16_u16(vmovl_u8(src0_8x8b));
+ src1_8x16b = vreinterpretq_s16_u16(vmovl_u8(src1_8x8b));
+ src2_8x16b = vreinterpretq_s16_u16(vmovl_u8(src2_8x8b));
+ src3_8x16b = vreinterpretq_s16_u16(vmovl_u8(src3_8x8b));
+
+ /* add value */
+ add_4x32b = vdupq_n_s32(0x4000);
+ }
+
+ /* Run thro all ref ids, except ref==num_ref, which is already done */
+ for(ref = 0; ref < u1_num_valid_refs; ref++)
+ {
+ S32 i4_ref_idx = ai4_wt_refs[ref];
+
+ /* InvWt and off specific to this ref id */
+ inv_wt = ps_wt_inp_prms->a_inv_wpred_wt[i4_ref_idx];
+ off = (S16)ps_wt_inp_prms->a_wpred_off[i4_ref_idx];
+
+ /* set1 uses multiple instructions : Try to AVOID it */
+ off_8x16b = vdupq_n_s16(off);
+ inv_wt_4x32b = vdupq_n_s32(inv_wt);
+
+ /* Each ref id may have differnet wt/offset. */
+ /* So we have unique inp buf for each ref id */
+ pu1_dst = ps_wt_inp_prms->apu1_wt_inp[i4_ref_idx];
+
+ /* inp - off */
+ src4_8x16b = vsubq_s16(src0_8x16b, off_8x16b);
+ src5_8x16b = vsubq_s16(src1_8x16b, off_8x16b);
+ src6_8x16b = vsubq_s16(src2_8x16b, off_8x16b);
+ src7_8x16b = vsubq_s16(src3_8x16b, off_8x16b);
+
+ dst0_4x32b = vmovl_s16(vget_low_s16(src4_8x16b));
+ dst1_4x32b = vmovl_s16(vget_low_s16(src5_8x16b));
+ dst2_4x32b = vmovl_s16(vget_low_s16(src6_8x16b));
+ dst3_4x32b = vmovl_s16(vget_low_s16(src7_8x16b));
+
+ dst4_4x32b = vmovl_s16(vget_high_s16(src4_8x16b));
+ dst5_4x32b = vmovl_s16(vget_high_s16(src5_8x16b));
+ dst6_4x32b = vmovl_s16(vget_high_s16(src6_8x16b));
+ dst7_4x32b = vmovl_s16(vget_high_s16(src7_8x16b));
+
+ /* (inp-off) << shift */
+ dst0_4x32b = vshlq_s32(dst0_4x32b, log_wdc);
+ dst1_4x32b = vshlq_s32(dst1_4x32b, log_wdc);
+ dst2_4x32b = vshlq_s32(dst2_4x32b, log_wdc);
+ dst3_4x32b = vshlq_s32(dst3_4x32b, log_wdc);
+
+ /* (inp-off) << shift */
+ dst4_4x32b = vshlq_s32(dst4_4x32b, log_wdc);
+ dst5_4x32b = vshlq_s32(dst5_4x32b, log_wdc);
+ dst6_4x32b = vshlq_s32(dst6_4x32b, log_wdc);
+ dst7_4x32b = vshlq_s32(dst7_4x32b, log_wdc);
+
+ /* ((inp-off) << shift) * inv_wt + 1<<14 */
+ dst0_4x32b = vmlaq_s32(add_4x32b, dst0_4x32b, inv_wt_4x32b);
+ dst1_4x32b = vmlaq_s32(add_4x32b, dst1_4x32b, inv_wt_4x32b);
+ dst2_4x32b = vmlaq_s32(add_4x32b, dst2_4x32b, inv_wt_4x32b);
+ dst3_4x32b = vmlaq_s32(add_4x32b, dst3_4x32b, inv_wt_4x32b);
+
+ /* ((inp-off) << shift) * inv_wt + 1<<14 */
+ dst4_4x32b = vmlaq_s32(add_4x32b, dst4_4x32b, inv_wt_4x32b);
+ dst5_4x32b = vmlaq_s32(add_4x32b, dst5_4x32b, inv_wt_4x32b);
+ dst6_4x32b = vmlaq_s32(add_4x32b, dst6_4x32b, inv_wt_4x32b);
+ dst7_4x32b = vmlaq_s32(add_4x32b, dst7_4x32b, inv_wt_4x32b);
+
+ /* (((inp-off) << shift) * inv_wt + 1<<14) >> 15 */
+ src4_8x16b = vcombine_s16(
+ vshrn_n_s32(dst0_4x32b, IHEVCE_WT_PRED_SHIFT),
+ vshrn_n_s32(dst4_4x32b, IHEVCE_WT_PRED_SHIFT));
+ src5_8x16b = vcombine_s16(
+ vshrn_n_s32(dst1_4x32b, IHEVCE_WT_PRED_SHIFT),
+ vshrn_n_s32(dst5_4x32b, IHEVCE_WT_PRED_SHIFT));
+ src6_8x16b = vcombine_s16(
+ vshrn_n_s32(dst2_4x32b, IHEVCE_WT_PRED_SHIFT),
+ vshrn_n_s32(dst6_4x32b, IHEVCE_WT_PRED_SHIFT));
+ src7_8x16b = vcombine_s16(
+ vshrn_n_s32(dst3_4x32b, IHEVCE_WT_PRED_SHIFT),
+ vshrn_n_s32(dst7_4x32b, IHEVCE_WT_PRED_SHIFT));
+ /* Store */
+ vst1_u8((pu1_dst + 0 * dst_stride), vqmovun_s16(src4_8x16b));
+ vst1_u8((pu1_dst + 1 * dst_stride), vqmovun_s16(src5_8x16b));
+ vst1_u8((pu1_dst + 2 * dst_stride), vqmovun_s16(src6_8x16b));
+ vst1_u8((pu1_dst + 3 * dst_stride), vqmovun_s16(src7_8x16b));
+ }
+}
+
+void hme_get_wt_inp_8x8_neon(
+ layer_ctxt_t *ps_curr_layer,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ S32 dst_stride,
+ S32 pos_x,
+ S32 pos_y,
+ S32 size,
+ S32 num_ref,
+ U08 u1_is_wt_pred_on)
+{
+ WORD32 ref;
+ UWORD8 *pu1_src, *pu1_dst;
+ WORD32 x_count, y_count;
+ WORD32 src_stride = ps_curr_layer->i4_inp_stride;
+
+ /* Make sure the start positions of block are inside frame limits */
+ pos_x = MIN(pos_x, ps_curr_layer->i4_wd - 1);
+ pos_y = MIN(pos_y, ps_curr_layer->i4_ht - 1);
+
+ /* In case we handle imcomplete CTBs, we copy only as much as reqd */
+ /* from input buffers to prevent out of bound accesses. In this */
+ /* case, we do padding in x or y or both dirns */
+ x_count = MIN(size, (ps_curr_layer->i4_wd - pos_x));
+ y_count = MIN(size, (ps_curr_layer->i4_ht - pos_y));
+
+ /* Fixed source */
+ pu1_src = ps_curr_layer->pu1_inp;
+ pu1_src += (pos_x + (pos_y * src_stride));
+
+ if(!u1_is_wt_pred_on)
+ {
+ uint8x8_t src0_8x8b, src1_8x8b, src2_8x8b, src3_8x8b;
+
+ /************* Top 4x8 Processing ****************/
+ /* Load Source : Lower 64 bit */
+ src0_8x8b = vld1_u8(pu1_src + 0 * src_stride);
+ src1_8x8b = vld1_u8(pu1_src + 1 * src_stride);
+ src2_8x8b = vld1_u8(pu1_src + 2 * src_stride);
+ src3_8x8b = vld1_u8(pu1_src + 3 * src_stride);
+
+ /* ref==num_ref */ /* last ref will be non weighted input */
+ pu1_dst = ps_wt_inp_prms->apu1_wt_inp_buf_array[num_ref];
+ /* Store */
+ vst1_u8((pu1_dst + 0 * dst_stride), src0_8x8b);
+ vst1_u8((pu1_dst + 1 * dst_stride), src1_8x8b);
+ vst1_u8((pu1_dst + 2 * dst_stride), src2_8x8b);
+ vst1_u8((pu1_dst + 3 * dst_stride), src3_8x8b);
+
+ /************* Bottom 4x8 Processing ****************/
+ pu1_src += 4 * src_stride;
+ pu1_dst = ps_wt_inp_prms->apu1_wt_inp_buf_array[num_ref] + 4 * dst_stride;
+
+ /* Load Source : Lower 64 bit */
+ src0_8x8b = vld1_u8(pu1_src + 0 * src_stride);
+ src1_8x8b = vld1_u8(pu1_src + 1 * src_stride);
+ src2_8x8b = vld1_u8(pu1_src + 2 * src_stride);
+ src3_8x8b = vld1_u8(pu1_src + 3 * src_stride);
+ /* ref==num_ref */ /* last ref will be non weighted input */
+ /* Store */
+ vst1_u8((pu1_dst + 0 * dst_stride), src0_8x8b);
+ vst1_u8((pu1_dst + 1 * dst_stride), src1_8x8b);
+ vst1_u8((pu1_dst + 2 * dst_stride), src2_8x8b);
+ vst1_u8((pu1_dst + 3 * dst_stride), src3_8x8b);
+
+ pu1_dst = ps_wt_inp_prms->apu1_wt_inp_buf_array[num_ref];
+
+ if(x_count != size)
+ {
+ hme_pad_right(pu1_dst + x_count - 1, dst_stride, size - x_count, y_count);
+ }
+
+ /* Check and do padding in bottom directino if need be */
+ if(y_count != size)
+ {
+ hme_pad_bot(pu1_dst + (y_count - 1) * dst_stride, dst_stride, size - y_count, size);
+ }
+
+ for(ref = 0; ref < num_ref + 1; ref++)
+ {
+ ps_wt_inp_prms->apu1_wt_inp[ref] = ps_wt_inp_prms->apu1_wt_inp_buf_array[num_ref];
+ }
+ }
+ else
+ {
+ S32 wt, off;
+ S32 ai4_wt_refs[MAX_NUM_REF];
+ U08 u1_num_valid_refs = 0;
+
+ for(ref = 0; ref < num_ref; ref++)
+ {
+ wt = ps_wt_inp_prms->a_wpred_wt[ref];
+ off = ps_wt_inp_prms->a_wpred_off[ref];
+
+ if((WGHT_DEFAULT == wt) && (0 == off))
+ {
+ ps_wt_inp_prms->apu1_wt_inp[ref] = ps_wt_inp_prms->apu1_wt_inp_buf_array[num_ref];
+ }
+ else
+ {
+ ai4_wt_refs[u1_num_valid_refs++] = ref;
+ ps_wt_inp_prms->apu1_wt_inp[ref] = ps_wt_inp_prms->apu1_wt_inp_buf_array[ref];
+ }
+ }
+
+ ps_wt_inp_prms->apu1_wt_inp[num_ref] = ps_wt_inp_prms->apu1_wt_inp_buf_array[num_ref];
+
+ /************* Top 4x8 Processing ****************/
+ /* ref==num_ref */ /* last ref will be non weighted input */
+ pu1_dst = ps_wt_inp_prms->apu1_wt_inp[num_ref];
+ ihevce_get_wt_inp_4x8_neon(
+ pu1_src,
+ pu1_dst,
+ ps_wt_inp_prms,
+ u1_num_valid_refs,
+ ai4_wt_refs,
+ src_stride,
+ dst_stride);
+ /************* Bottom 4x8 Processing ****************/
+ pu1_src += 4 * src_stride;
+ pu1_dst = ps_wt_inp_prms->apu1_wt_inp[num_ref] + 4 * dst_stride;
+ ihevce_get_wt_inp_4x8_neon(
+ pu1_src,
+ pu1_dst,
+ ps_wt_inp_prms,
+ u1_num_valid_refs,
+ ai4_wt_refs,
+ src_stride,
+ dst_stride);
+
+ for(ref = 0; ref < u1_num_valid_refs; ref++)
+ {
+ /* Check and do padding in right direction if need be */
+ pu1_dst = ps_wt_inp_prms->apu1_wt_inp[ai4_wt_refs[ref]];
+ if(x_count != size)
+ {
+ hme_pad_right(pu1_dst + x_count - 1, dst_stride, size - x_count, y_count);
+ }
+
+ /* Check and do padding in bottom directino if need be */
+ if(y_count != size)
+ {
+ hme_pad_bot(pu1_dst + (y_count - 1) * dst_stride, dst_stride, size - y_count, size);
+ }
+ }
+
+ /* Check and do padding in right direction if need be */
+ pu1_dst = ps_wt_inp_prms->apu1_wt_inp[num_ref];
+ if(x_count != size)
+ {
+ hme_pad_right(pu1_dst + x_count - 1, dst_stride, size - x_count, y_count);
+ }
+
+ /* Check and do padding in bottom directino if need be */
+ if(y_count != size)
+ {
+ hme_pad_bot(pu1_dst + (y_count - 1) * dst_stride, dst_stride, size - y_count, size);
+ }
+ }
+}
+
+void hme_get_wt_inp_ctb_neon(
+ layer_ctxt_t *ps_curr_layer,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ S32 dst_stride,
+ S32 pos_x,
+ S32 pos_y,
+ S32 size,
+ S32 num_ref,
+ U08 u1_is_wt_pred_on)
+{
+ WORD32 ref, i, j;
+ UWORD8 *pu1_src, *pu1_dst;
+ WORD32 x_count, y_count;
+ WORD32 src_stride = ps_curr_layer->i4_inp_stride;
+
+ /* In case we handle imcomplete CTBs, we copy only as much as reqd */
+ /* from input buffers to prevent out of bound accesses. In this */
+ /* case, we do padding in x or y or both dirns */
+ x_count = MIN(size, (ps_curr_layer->i4_wd - pos_x));
+ y_count = MIN(size, (ps_curr_layer->i4_ht - pos_y));
+
+ /* Fixed source */
+ pu1_src = ps_curr_layer->pu1_inp;
+ pu1_src += (pos_x + (pos_y * src_stride));
+
+ if(!u1_is_wt_pred_on)
+ {
+ pu1_dst = ps_wt_inp_prms->apu1_wt_inp_buf_array[num_ref];
+
+ if(0 == (x_count & 15))
+ {
+ uint8x16_t src0_16x8b, src1_16x8b, src2_16x8b, src3_16x8b;
+
+ for(i = 0; i < y_count; i += 4) /* 4 rows at a time */
+ {
+ for(j = 0; j < x_count; j += 16) /* 16 cols at a time */
+ {
+ /* Load 4x16 Source */
+ src0_16x8b = vld1q_u8(pu1_src + 0 * src_stride);
+ src1_16x8b = vld1q_u8(pu1_src + 1 * src_stride);
+ src2_16x8b = vld1q_u8(pu1_src + 2 * src_stride);
+ src3_16x8b = vld1q_u8(pu1_src + 3 * src_stride);
+
+ /* ref==num_ref */ /* last ref will be non weighted input */
+ /* Store */
+ vst1q_u8((pu1_dst + 0 * dst_stride), src0_16x8b);
+ vst1q_u8((pu1_dst + 1 * dst_stride), src1_16x8b);
+ vst1q_u8((pu1_dst + 2 * dst_stride), src2_16x8b);
+ vst1q_u8((pu1_dst + 3 * dst_stride), src3_16x8b);
+
+ pu1_src += 16;
+ pu1_dst += 16;
+ }
+
+ pu1_src = pu1_src - x_count + 4 * src_stride;
+ pu1_dst = pu1_dst - x_count + 4 * dst_stride;
+ }
+ }
+ else if(0 == (x_count & 7)) /* wd multiple of 8 case */
+ {
+ uint8x8_t src0_8x8b, src1_8x8b, src2_8x8b, src3_8x8b;
+ for(i = 0; i < y_count; i += 4) /* 4 rows at a time */
+ {
+ for(j = 0; j < x_count; j += 8) /* 8 cols at a time */
+ {
+ /* Load 4x8 Source */
+ src0_8x8b = vld1_u8(pu1_src + 0 * src_stride);
+ src1_8x8b = vld1_u8(pu1_src + 1 * src_stride);
+ src2_8x8b = vld1_u8(pu1_src + 2 * src_stride);
+ src3_8x8b = vld1_u8(pu1_src + 3 * src_stride);
+
+ /* ref==num_ref */ /* last ref will be non weighted input */
+ /* Store */
+ vst1_u8((pu1_dst + 0 * dst_stride), src0_8x8b);
+ vst1_u8((pu1_dst + 1 * dst_stride), src1_8x8b);
+ vst1_u8((pu1_dst + 2 * dst_stride), src2_8x8b);
+ vst1_u8((pu1_dst + 3 * dst_stride), src3_8x8b);
+
+ pu1_src += 8;
+ pu1_dst += 8;
+ }
+
+ pu1_src = pu1_src - x_count + 4 * src_stride;
+ pu1_dst = pu1_dst - x_count + 4 * dst_stride;
+ }
+ }
+ else /* wd multiple of 4 case */
+ {
+ for(i = 0; i < y_count; i += 4) /* 4 rows at a time */
+ {
+ for(j = 0; j < x_count; j += 4) /* 4 cols at a time */
+ {
+ /* ref==num_ref */ /* last ref will be non weighted input */
+ *(WORD32 *)(&pu1_dst[0 * dst_stride]) = *(WORD32 *)(&pu1_src[0 * src_stride]);
+ *(WORD32 *)(&pu1_dst[1 * dst_stride]) = *(WORD32 *)(&pu1_src[1 * src_stride]);
+ *(WORD32 *)(&pu1_dst[2 * dst_stride]) = *(WORD32 *)(&pu1_src[2 * src_stride]);
+ *(WORD32 *)(&pu1_dst[3 * dst_stride]) = *(WORD32 *)(&pu1_src[3 * src_stride]);
+
+ pu1_src += 4;
+ pu1_dst += 4;
+ }
+
+ pu1_src -= x_count + 4 * src_stride;
+ pu1_dst = pu1_dst - x_count + 4 * dst_stride;
+ }
+ }
+
+ for(i = 0; i < num_ref + 1; i++)
+ {
+ ps_wt_inp_prms->apu1_wt_inp[i] = ps_wt_inp_prms->apu1_wt_inp_buf_array[num_ref];
+ }
+
+ /* Padding */
+ pu1_dst = ps_wt_inp_prms->apu1_wt_inp[num_ref];
+
+ if(x_count != size)
+ {
+ hme_pad_right(pu1_dst + x_count - 1, dst_stride, size - x_count, y_count);
+ }
+
+ /* Check and do padding in bottom directino if need be */
+ if(y_count != size)
+ {
+ hme_pad_bot(pu1_dst + (y_count - 1) * dst_stride, dst_stride, size - y_count, size);
+ }
+ }
+ else
+ {
+ S32 ai4_wt_refs[MAX_NUM_REF];
+ U08 u1_num_valid_refs = 0;
+ int32x4_t dst0_4x32b, dst1_4x32b, dst2_4x32b, dst3_4x32b;
+ int32x4_t inv_wt_4x32b, off_4x32b;
+ int16x8_t src0_8x16b, src1_8x16b, src2_8x16b, src3_8x16b;
+
+ /* add value */
+ int32x4_t add_4x32b = vdupq_n_s32(0x4000);
+ int32x4_t log_wdc = vdupq_n_s32(ps_wt_inp_prms->wpred_log_wdc);
+
+ for(i = 0; i < num_ref; i++)
+ {
+ if((WGHT_DEFAULT == (ps_wt_inp_prms->a_wpred_wt[i])) &&
+ (0 == (ps_wt_inp_prms->a_wpred_off[i])))
+ {
+ ps_wt_inp_prms->apu1_wt_inp[i] = ps_wt_inp_prms->apu1_wt_inp_buf_array[num_ref];
+ }
+ else
+ {
+ ai4_wt_refs[u1_num_valid_refs++] = i;
+ ps_wt_inp_prms->apu1_wt_inp[i] = ps_wt_inp_prms->apu1_wt_inp_buf_array[i];
+ }
+ }
+
+ ps_wt_inp_prms->apu1_wt_inp[num_ref] = ps_wt_inp_prms->apu1_wt_inp_buf_array[num_ref];
+
+ if(0 == (x_count & 7)) /* wd multiple of 8 case */
+ {
+ uint8x8_t src0_8x8b, src1_8x8b, src2_8x8b, src3_8x8b;
+ int16x8_t src4_8x16b, src5_8x16b, src6_8x16b, src7_8x16b, off_8x16b;
+ int32x4_t dst4_4x32b, dst5_4x32b, dst6_4x32b, dst7_4x32b;
+
+ for(i = 0; i < y_count; i += 4) /* 4 rows at a time */
+ {
+ for(j = 0; j < x_count; j += 8) /* 8 cols at a time */
+ {
+ /* Load 4x8 Source */
+ /* Load Source : Lower 32 bit, Upper 32 bit neglected */
+ src0_8x8b = vld1_u8(pu1_src + 0 * src_stride);
+ src1_8x8b = vld1_u8(pu1_src + 1 * src_stride);
+ src2_8x8b = vld1_u8(pu1_src + 2 * src_stride);
+ src3_8x8b = vld1_u8(pu1_src + 3 * src_stride);
+
+ /* ref==num_ref */ /* last ref will be non weighted input */
+ pu1_dst = (ps_wt_inp_prms->apu1_wt_inp[num_ref]) + (i * dst_stride) + j;
+
+ /* Store */
+ vst1_u8((pu1_dst + 0 * dst_stride), src0_8x8b);
+ vst1_u8((pu1_dst + 1 * dst_stride), src1_8x8b);
+ vst1_u8((pu1_dst + 2 * dst_stride), src2_8x8b);
+ vst1_u8((pu1_dst + 3 * dst_stride), src3_8x8b);
+
+ if(u1_num_valid_refs)
+ {
+ /* Right 4x4 Block */
+ src0_8x16b = vreinterpretq_s16_u16(vmovl_u8(src0_8x8b));
+ src1_8x16b = vreinterpretq_s16_u16(vmovl_u8(src1_8x8b));
+ src2_8x16b = vreinterpretq_s16_u16(vmovl_u8(src2_8x8b));
+ src3_8x16b = vreinterpretq_s16_u16(vmovl_u8(src3_8x8b));
+ }
+
+ /* Run thro all ref ids, except ref==num_ref, which is already done */
+ for(ref = 0; ref < u1_num_valid_refs; ref++)
+ {
+ U08 u1_ref_idx = ai4_wt_refs[ref];
+
+ /* Each ref id may have differnet wt/offset. */
+ /* So we have unique inp buf for each ref id */
+ pu1_dst = ps_wt_inp_prms->apu1_wt_inp[u1_ref_idx] + (i * dst_stride) + j;
+
+ /* InvWt and off specific to this ref id */
+ off_8x16b = vdupq_n_s16(ps_wt_inp_prms->a_wpred_off[u1_ref_idx]);
+ inv_wt_4x32b = vdupq_n_s32(ps_wt_inp_prms->a_inv_wpred_wt[u1_ref_idx]);
+
+ /* inp - off */
+ src4_8x16b = vsubq_s16(src0_8x16b, off_8x16b);
+ src5_8x16b = vsubq_s16(src1_8x16b, off_8x16b);
+ src6_8x16b = vsubq_s16(src2_8x16b, off_8x16b);
+ src7_8x16b = vsubq_s16(src3_8x16b, off_8x16b);
+
+ dst0_4x32b = vmovl_s16(vget_low_s16(src4_8x16b));
+ dst1_4x32b = vmovl_s16(vget_low_s16(src5_8x16b));
+ dst2_4x32b = vmovl_s16(vget_low_s16(src6_8x16b));
+ dst3_4x32b = vmovl_s16(vget_low_s16(src7_8x16b));
+
+ dst4_4x32b = vmovl_s16(vget_high_s16(src4_8x16b));
+ dst5_4x32b = vmovl_s16(vget_high_s16(src5_8x16b));
+ dst6_4x32b = vmovl_s16(vget_high_s16(src6_8x16b));
+ dst7_4x32b = vmovl_s16(vget_high_s16(src7_8x16b));
+
+ /* (inp-off) << shift */
+ dst0_4x32b = vshlq_s32(dst0_4x32b, log_wdc);
+ dst1_4x32b = vshlq_s32(dst1_4x32b, log_wdc);
+ dst2_4x32b = vshlq_s32(dst2_4x32b, log_wdc);
+ dst3_4x32b = vshlq_s32(dst3_4x32b, log_wdc);
+
+ /* (inp-off) << shift */
+ dst4_4x32b = vshlq_s32(dst4_4x32b, log_wdc);
+ dst5_4x32b = vshlq_s32(dst5_4x32b, log_wdc);
+ dst6_4x32b = vshlq_s32(dst6_4x32b, log_wdc);
+ dst7_4x32b = vshlq_s32(dst7_4x32b, log_wdc);
+
+ /* ((inp-off) << shift) * inv_wt + 1<<14 */
+ dst0_4x32b = vmlaq_s32(add_4x32b, dst0_4x32b, inv_wt_4x32b);
+ dst1_4x32b = vmlaq_s32(add_4x32b, dst1_4x32b, inv_wt_4x32b);
+ dst2_4x32b = vmlaq_s32(add_4x32b, dst2_4x32b, inv_wt_4x32b);
+ dst3_4x32b = vmlaq_s32(add_4x32b, dst3_4x32b, inv_wt_4x32b);
+
+ /* ((inp-off) << shift) * inv_wt + 1<<14 */
+ dst4_4x32b = vmlaq_s32(add_4x32b, dst4_4x32b, inv_wt_4x32b);
+ dst5_4x32b = vmlaq_s32(add_4x32b, dst5_4x32b, inv_wt_4x32b);
+ dst6_4x32b = vmlaq_s32(add_4x32b, dst6_4x32b, inv_wt_4x32b);
+ dst7_4x32b = vmlaq_s32(add_4x32b, dst7_4x32b, inv_wt_4x32b);
+
+ /* (((inp-off) << shift) * inv_wt + 1<<14) >> 15 */
+ src4_8x16b = vcombine_s16(
+ vshrn_n_s32(dst0_4x32b, IHEVCE_WT_PRED_SHIFT),
+ vshrn_n_s32(dst4_4x32b, IHEVCE_WT_PRED_SHIFT));
+ src5_8x16b = vcombine_s16(
+ vshrn_n_s32(dst1_4x32b, IHEVCE_WT_PRED_SHIFT),
+ vshrn_n_s32(dst5_4x32b, IHEVCE_WT_PRED_SHIFT));
+ src6_8x16b = vcombine_s16(
+ vshrn_n_s32(dst2_4x32b, IHEVCE_WT_PRED_SHIFT),
+ vshrn_n_s32(dst6_4x32b, IHEVCE_WT_PRED_SHIFT));
+ src7_8x16b = vcombine_s16(
+ vshrn_n_s32(dst3_4x32b, IHEVCE_WT_PRED_SHIFT),
+ vshrn_n_s32(dst7_4x32b, IHEVCE_WT_PRED_SHIFT));
+ /* Store */
+ vst1_u8((pu1_dst + 0 * dst_stride), vqmovun_s16(src4_8x16b));
+ vst1_u8((pu1_dst + 1 * dst_stride), vqmovun_s16(src5_8x16b));
+ vst1_u8((pu1_dst + 2 * dst_stride), vqmovun_s16(src6_8x16b));
+ vst1_u8((pu1_dst + 3 * dst_stride), vqmovun_s16(src7_8x16b));
+ }
+ /* Pointer update */
+ pu1_src += 8;
+ }
+ /* Pointer update */
+ pu1_src = pu1_src - x_count + 4 * src_stride;
+ }
+ }
+ else /* wd multiple of 4 case */
+ {
+ uint8x16_t src0_16x8b;
+ int32x4_t src0_4x32b, src1_4x32b, src2_4x32b, src3_4x32b;
+ WORD32 dst0, dst1, dst2, dst3;
+ pu1_dst = ps_wt_inp_prms->apu1_wt_inp[num_ref];
+ for(i = 0; i < y_count; i += 4) /* 4 rows at a time */
+ {
+ for(j = 0; j < x_count; j += 4) /* 4 cols at a time */
+ {
+ /* ref==num_ref */ /* last ref will be non weighted input */
+
+ *(WORD32 *)(&pu1_dst[0 * dst_stride]) = *(WORD32 *)(&pu1_src[0 * src_stride]);
+ *(WORD32 *)(&pu1_dst[1 * dst_stride]) = *(WORD32 *)(&pu1_src[1 * src_stride]);
+ *(WORD32 *)(&pu1_dst[2 * dst_stride]) = *(WORD32 *)(&pu1_src[2 * src_stride]);
+ *(WORD32 *)(&pu1_dst[3 * dst_stride]) = *(WORD32 *)(&pu1_src[3 * src_stride]);
+
+ /* Pointer update */
+ pu1_src += 4;
+ pu1_dst += 4;
+ }
+ /* Pointer update */
+ pu1_src = pu1_src - x_count + 4 * src_stride;
+ pu1_dst = pu1_dst - x_count + 4 * dst_stride;
+ }
+
+ if(u1_num_valid_refs)
+ {
+ pu1_src = ps_curr_layer->pu1_inp;
+ pu1_src += (pos_x + (pos_y * src_stride));
+
+ /* Run thro all ref ids, except ref==num_ref, which is already done */
+ for(ref = 0; ref < u1_num_valid_refs; ref++)
+ {
+ U08 u1_ref_idx = ai4_wt_refs[ref];
+
+ pu1_dst = ps_wt_inp_prms->apu1_wt_inp[u1_ref_idx];
+
+ /* InvWt and off specific to this ref id */
+ off_4x32b = vdupq_n_s32(ps_wt_inp_prms->a_wpred_off[u1_ref_idx]);
+ inv_wt_4x32b = vdupq_n_s32(ps_wt_inp_prms->a_inv_wpred_wt[u1_ref_idx]);
+
+ for(i = 0; i < y_count; i += 4) /* 4 rows at a time */
+ {
+ for(j = 0; j < x_count; j += 4) /* 4 cols at a time */
+ {
+ src0_16x8b = load_unaligned_u8q(pu1_src, src_stride);
+
+ src0_8x16b = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(src0_16x8b)));
+ src1_8x16b = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(src0_16x8b)));
+
+ src0_4x32b = vmovl_s16(vget_low_s16(src0_8x16b));
+ src1_4x32b = vmovl_s16(vget_high_s16(src0_8x16b));
+ src2_4x32b = vmovl_s16(vget_low_s16(src1_8x16b));
+ src3_4x32b = vmovl_s16(vget_high_s16(src1_8x16b));
+
+ /* inp - off */
+ dst0_4x32b = vsubq_s32(src0_4x32b, off_4x32b);
+ dst1_4x32b = vsubq_s32(src1_4x32b, off_4x32b);
+ dst2_4x32b = vsubq_s32(src2_4x32b, off_4x32b);
+ dst3_4x32b = vsubq_s32(src3_4x32b, off_4x32b);
+
+ /* (inp-off) << shift */
+ dst0_4x32b = vshlq_s32(dst0_4x32b, log_wdc);
+ dst1_4x32b = vshlq_s32(dst1_4x32b, log_wdc);
+ dst2_4x32b = vshlq_s32(dst2_4x32b, log_wdc);
+ dst3_4x32b = vshlq_s32(dst3_4x32b, log_wdc);
+
+ /* ((inp-off) << shift) * inv_wt */
+ dst0_4x32b = vmlaq_s32(add_4x32b, dst0_4x32b, inv_wt_4x32b);
+ dst1_4x32b = vmlaq_s32(add_4x32b, dst1_4x32b, inv_wt_4x32b);
+ dst2_4x32b = vmlaq_s32(add_4x32b, dst2_4x32b, inv_wt_4x32b);
+ dst3_4x32b = vmlaq_s32(add_4x32b, dst3_4x32b, inv_wt_4x32b);
+
+ /* (((inp-off) << shift) * inv_wt + 1<<14) >> 15 */
+ dst0 = (WORD32)vget_lane_u64(
+ vreinterpret_u64_u16(
+ vqshrun_n_s32(dst0_4x32b, IHEVCE_WT_PRED_SHIFT)),
+ 0);
+ dst1 = (WORD32)vget_lane_u64(
+ vreinterpret_u64_u16(
+ vqshrun_n_s32(dst1_4x32b, IHEVCE_WT_PRED_SHIFT)),
+ 0);
+ dst2 = (WORD32)vget_lane_u64(
+ vreinterpret_u64_u16(
+ vqshrun_n_s32(dst2_4x32b, IHEVCE_WT_PRED_SHIFT)),
+ 0);
+ dst3 = (WORD32)vget_lane_u64(
+ vreinterpret_u64_u16(
+ vqshrun_n_s32(dst3_4x32b, IHEVCE_WT_PRED_SHIFT)),
+ 0);
+
+ *(WORD32 *)(&pu1_dst[0 * dst_stride]) = dst0;
+ *(WORD32 *)(&pu1_dst[1 * dst_stride]) = dst1;
+ *(WORD32 *)(&pu1_dst[2 * dst_stride]) = dst2;
+ *(WORD32 *)(&pu1_dst[3 * dst_stride]) = dst3;
+
+ /* Pointer update */
+ pu1_src += 4;
+ pu1_dst += 4;
+ }
+ /* Pointer update */
+ pu1_src = pu1_src - x_count + 4 * src_stride;
+ pu1_dst = pu1_dst - x_count + 4 * dst_stride;
+ }
+ }
+ }
+ }
+
+ /* Padding */
+ for(ref = 0; ref < u1_num_valid_refs; ref++)
+ {
+ /* Check and do padding in right direction if need be */
+ pu1_dst = ps_wt_inp_prms->apu1_wt_inp[ai4_wt_refs[ref]];
+ if(x_count != size)
+ {
+ hme_pad_right(pu1_dst + x_count - 1, dst_stride, size - x_count, y_count);
+ }
+
+ /* Check and do padding in bottom directino if need be */
+ if(y_count != size)
+ {
+ hme_pad_bot(pu1_dst + (y_count - 1) * dst_stride, dst_stride, size - y_count, size);
+ }
+ }
+
+ /* Check and do padding in right direction if need be */
+ pu1_dst = ps_wt_inp_prms->apu1_wt_inp[num_ref];
+
+ if(x_count != size)
+ {
+ hme_pad_right(pu1_dst + x_count - 1, dst_stride, size - x_count, y_count);
+ }
+
+ /* Check and do padding in bottom directino if need be */
+ if(y_count != size)
+ {
+ hme_pad_bot(pu1_dst + (y_count - 1) * dst_stride, dst_stride, size - y_count, size);
+ }
+ }
+}
diff --git a/encoder/arm/ihevce_itrans_recon_neon.c b/encoder/arm/ihevce_itrans_recon_neon.c
new file mode 100644
index 0000000..01ac0e8
--- /dev/null
+++ b/encoder/arm/ihevce_itrans_recon_neon.c
@@ -0,0 +1,308 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevce_itrans_recon_neon.c
+*
+* @brief
+* Contains functions to inverse transform and adds residue to pred buffer
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* - ihevce_itrans_recon_dc_neon
+*
+* @remarks
+* None
+*
+********************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <string.h>
+#include <arm_neon.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevc_cmn_utils_neon.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevc_defs.h"
+#include "ihevc_macros.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+static INLINE void ihevce_itrans_recon_dc_4x4_luma_neon(
+ UWORD8 *pu1_pred, WORD32 pred_strd, UWORD8 *pu1_dst, WORD32 dst_strd, WORD32 dc_value)
+{
+ uint8x16_t src_u8;
+ int16x8_t a0, a1, a2;
+ uint8x8_t a3, a4;
+
+ src_u8 = load_unaligned_u8q(pu1_pred, pred_strd);
+ a0 = vdupq_n_s16(dc_value);
+ a1 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(src_u8)));
+ a2 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(src_u8)));
+ a1 = vaddq_s16(a1, a0);
+ a2 = vaddq_s16(a2, a0);
+ a3 = vqmovun_s16(a1);
+ a4 = vqmovun_s16(a2);
+ uint32x2_t p0 = vreinterpret_u32_u8(a3);
+ uint32x2_t p1 = vreinterpret_u32_u8(a4);
+ *(UWORD32 *)(pu1_dst) = vget_lane_u32(p0, 0);
+ *(UWORD32 *)(pu1_dst + dst_strd) = vget_lane_u32(p0, 1);
+ *(UWORD32 *)(pu1_dst + 2 * dst_strd) = vget_lane_u32(p1, 0);
+ *(UWORD32 *)(pu1_dst + 3 * dst_strd) = vget_lane_u32(p1, 1);
+}
+
+static INLINE void ihevce_itrans_recon_dc_4x4_chroma_neon(
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ UWORD8 *pu1_dst,
+ WORD32 dst_strd,
+ WORD32 trans_size,
+ WORD32 dc_value,
+ CHROMA_PLANE_ID_T e_chroma_plane)
+{
+ WORD32 i;
+ int16x8_t a0, a1;
+ uint8x8_t a2, a3;
+ uint16x4_t select = vdup_n_u16(0xff << (e_chroma_plane << 3));
+
+ a0 = vdupq_n_s16(dc_value);
+ for(i = 0; i < trans_size; i++)
+ {
+ a1 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(pu1_pred + i * pred_strd)));
+ a2 = vqmovun_s16(vaddq_s16(a0, a1));
+ a3 = vld1_u8(pu1_dst + i * dst_strd);
+ a3 = vbsl_u8(vreinterpret_u8_u16(select), a2, a3);
+ vst1_u8(pu1_dst + i * dst_strd, a3);
+ }
+}
+
+static INLINE void ihevce_itrans_recon_dc_8x8_luma_neon(
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ UWORD8 *pu1_dst,
+ WORD32 dst_strd,
+ WORD32 trans_size,
+ WORD32 dc_value)
+{
+ WORD32 i;
+ uint8x16_t a1, a4, a5;
+ uint8x8_t a0, a2, a3;
+
+ a0 = (dc_value >= 0) ? vqmovun_s16(vdupq_n_s16(dc_value))
+ : vqmovun_s16(vabsq_s16(vdupq_n_s16(dc_value)));
+ a1 = vcombine_u8(a0, a0);
+ for(i = 0; i < trans_size; i += 2)
+ {
+ a2 = vld1_u8(pu1_pred + i * pred_strd);
+ a3 = vld1_u8(pu1_pred + (i + 1) * pred_strd);
+ a4 = vcombine_u8(a2, a3);
+ a5 = (dc_value >= 0) ? vqaddq_u8(a1, a4) : vqsubq_u8(a4, a1);
+ vst1_u8(pu1_dst + i * dst_strd, vget_low_u8(a5));
+ vst1_u8(pu1_dst + (i + 1) * dst_strd, vget_high_u8(a5));
+ }
+}
+
+static INLINE void ihevce_itrans_recon_dc_8x8_chroma_neon(
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ UWORD8 *pu1_dst,
+ WORD32 dst_strd,
+ WORD32 trans_size,
+ WORD32 dc_value,
+ CHROMA_PLANE_ID_T e_chroma_plane)
+{
+ WORD32 i;
+ uint8x16_t a4, a0, a5;
+ uint8x8_t a1, a2, a3;
+ uint8x16x2_t a6;
+
+ a1 = (dc_value >= 0) ? vqmovun_s16(vdupq_n_s16(dc_value))
+ : vqmovun_s16(vabsq_s16(vdupq_n_s16(dc_value)));
+ a0 = vcombine_u8(a1, a1);
+ for(i = 0; i < trans_size; i += 2)
+ {
+ a2 = vld2_u8(pu1_pred + i * pred_strd).val[e_chroma_plane];
+ a3 = vld2_u8(pu1_pred + (i + 1) * pred_strd).val[e_chroma_plane];
+ a4 = vcombine_u8(a2, a3);
+ a4 = (dc_value >= 0) ? vqaddq_u8(a0, a4) : vqsubq_u8(a4, a0);
+ a2 = vld2_u8(pu1_dst + i * dst_strd).val[!e_chroma_plane];
+ a3 = vld2_u8(pu1_dst + (i + 1) * dst_strd).val[!e_chroma_plane];
+ a5 = vcombine_u8(a2, a3);
+ a6 = (e_chroma_plane == 0) ? vzipq_u8(a4, a5) : vzipq_u8(a5, a4);
+ vst1q_u8(pu1_dst + i * dst_strd, a6.val[0]);
+ vst1q_u8(pu1_dst + (i + 1) * dst_strd, a6.val[1]);
+ }
+}
+
+static INLINE void ihevce_itrans_recon_dc_16x16_luma_neon(
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ UWORD8 *pu1_dst,
+ WORD32 dst_strd,
+ WORD32 trans_size,
+ WORD32 dc_value)
+{
+ WORD32 i;
+ uint8x16_t a1, a3, a2;
+ uint8x8_t a0;
+
+ a0 = (dc_value >= 0) ? vqmovun_s16(vdupq_n_s16(dc_value))
+ : vqmovun_s16(vabsq_s16(vdupq_n_s16(dc_value)));
+ a1 = vcombine_u8(a0, a0);
+ for(i = 0; i < trans_size; i++)
+ {
+ a2 = vld1q_u8(pu1_pred + i * pred_strd);
+ a3 = (dc_value >= 0) ? vqaddq_u8(a2, a1) : vqsubq_u8(a2, a1);
+ vst1q_u8(pu1_dst + i * dst_strd, a3);
+ }
+}
+
+static INLINE void ihevce_itrans_recon_dc_16x16_chroma_neon(
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ UWORD8 *pu1_dst,
+ WORD32 dst_strd,
+ WORD32 trans_size,
+ WORD32 dc_value,
+ CHROMA_PLANE_ID_T e_chroma_plane)
+{
+ WORD32 i;
+ uint8x8_t a0;
+ uint8x16_t a1, a2, a3;
+ uint8x16x2_t a4;
+
+ a0 = (dc_value >= 0) ? vqmovun_s16(vdupq_n_s16(dc_value))
+ : vqmovun_s16(vabsq_s16(vdupq_n_s16(dc_value)));
+ a1 = vcombine_u8(a0, a0);
+ for(i = 0; i < trans_size; i++)
+ {
+ a2 = vld2q_u8(pu1_pred + i * pred_strd).val[e_chroma_plane];
+ a2 = (dc_value >= 0) ? vqaddq_u8(a2, a1) : vqsubq_u8(a2, a1);
+ a3 = vld2q_u8(pu1_dst + i * dst_strd).val[!e_chroma_plane];
+ a4 = (e_chroma_plane == 0) ? vzipq_u8(a2, a3) : vzipq_u8(a3, a2);
+ vst1q_u8(pu1_dst + i * dst_strd, a4.val[0]);
+ vst1q_u8(pu1_dst + i * dst_strd + 16, a4.val[1]);
+ }
+}
+
+void ihevce_itrans_recon_dc_neon(
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ UWORD8 *pu1_dst,
+ WORD32 dst_strd,
+ WORD32 trans_size,
+ WORD16 i2_deq_value,
+ CHROMA_PLANE_ID_T e_chroma_plane)
+{
+ WORD32 add, shift;
+ WORD32 dc_value;
+
+ shift = IT_SHIFT_STAGE_1;
+ add = 1 << (shift - 1);
+ dc_value = CLIP_S16((i2_deq_value * 64 + add) >> shift);
+ shift = IT_SHIFT_STAGE_2;
+ add = 1 << (shift - 1);
+ dc_value = CLIP_S16((dc_value * 64 + add) >> shift);
+
+ switch(trans_size)
+ {
+ case 4:
+ if(NULL_PLANE == e_chroma_plane)
+ {
+ ihevce_itrans_recon_dc_4x4_luma_neon(pu1_pred, pred_strd, pu1_dst, dst_strd, dc_value);
+ }
+ else
+ {
+ ihevce_itrans_recon_dc_4x4_chroma_neon(
+ pu1_pred, pred_strd, pu1_dst, dst_strd, trans_size, dc_value, e_chroma_plane);
+ }
+ break;
+
+ case 8:
+ if(NULL_PLANE == e_chroma_plane)
+ {
+ ihevce_itrans_recon_dc_8x8_luma_neon(
+ pu1_pred, pred_strd, pu1_dst, dst_strd, trans_size, dc_value);
+ }
+ else
+ {
+ ihevce_itrans_recon_dc_8x8_chroma_neon(
+ pu1_pred, pred_strd, pu1_dst, dst_strd, trans_size, dc_value, e_chroma_plane);
+ }
+ break;
+
+ case 16:
+ if(NULL_PLANE == e_chroma_plane)
+ {
+ ihevce_itrans_recon_dc_16x16_luma_neon(
+ pu1_pred, pred_strd, pu1_dst, dst_strd, trans_size, dc_value);
+ }
+ else
+ {
+ ihevce_itrans_recon_dc_16x16_chroma_neon(
+ pu1_pred, pred_strd, pu1_dst, dst_strd, trans_size, dc_value, e_chroma_plane);
+ }
+ break;
+
+ case 32:
+ if(NULL_PLANE == e_chroma_plane)
+ {
+ WORD32 b16;
+
+ for(b16 = 0; b16 < 4; b16++)
+ {
+ ihevce_itrans_recon_dc_16x16_luma_neon(
+ pu1_pred + ((b16 >> 1) * pred_strd * 16) + ((b16 & 1) * 16),
+ pred_strd,
+ pu1_dst + ((b16 >> 1) * dst_strd * 16) + ((b16 & 1) * 16),
+ dst_strd,
+ trans_size >> 1,
+ dc_value);
+ }
+ }
+ else
+ {
+ WORD32 b16;
+
+ for(b16 = 0; b16 < 4; b16++)
+ {
+ ihevce_itrans_recon_dc_16x16_chroma_neon(
+ pu1_pred + ((b16 >> 1) * pred_strd * 16) + ((b16 & 1) * 32),
+ pred_strd,
+ pu1_dst + ((b16 >> 1) * dst_strd * 16) + ((b16 & 1) * 32),
+ dst_strd,
+ trans_size >> 1,
+ dc_value,
+ e_chroma_plane);
+ }
+ }
+ break;
+ }
+}
diff --git a/encoder/arm/ihevce_me_neon.c b/encoder/arm/ihevce_me_neon.c
new file mode 100644
index 0000000..f3c4a90
--- /dev/null
+++ b/encoder/arm/ihevce_me_neon.c
@@ -0,0 +1,666 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+******************************************************************************
+* @file
+* ihevce_me_neon.c
+*
+* @brief
+* Subpel refinement modules for ME algo
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+********************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <arm_neon.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevc_cmn_utils_neon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_debug.h"
+#include "ihevc_deblk.h"
+#include "ihevc_defs.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_macros.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_sao.h"
+#include "ihevc_structs.h"
+#include "ihevc_weighted_pred.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevce_api.h"
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_ipe_instr_set_router.h"
+#include "ihevce_global_tables.h"
+
+#include "hme_datatype.h"
+#include "hme_common_defs.h"
+#include "hme_common_utils.h"
+#include "hme_interface.h"
+#include "hme_defs.h"
+#include "hme_err_compute.h"
+#include "hme_globals.h"
+
+#include "ihevce_me_instr_set_router.h"
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+typedef void ft_calc_sad4_nxn(
+ UWORD8 *pu1_src, WORD32 src_strd, UWORD8 *pu1_pred, WORD32 pred_strd, UWORD32 *pu4_sad);
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+#define COMBINE_SADS(pps, as, i) \
+ { \
+ pps[PART_ID_NxN_TL][i] = (as[0] + as[1] + as[4] + as[5]); \
+ pps[PART_ID_NxN_TR][i] = (as[2] + as[3] + as[6] + as[7]); \
+ pps[PART_ID_NxN_BL][i] = (as[8] + as[9] + as[12] + as[13]); \
+ pps[PART_ID_NxN_BR][i] = (as[10] + as[11] + as[14] + as[15]); \
+ \
+ pps[PART_ID_Nx2N_L][i] = pps[PART_ID_NxN_TL][i] + pps[PART_ID_NxN_BL][i]; \
+ pps[PART_ID_Nx2N_R][i] = pps[PART_ID_NxN_TR][i] + pps[PART_ID_NxN_BR][i]; \
+ pps[PART_ID_2NxN_T][i] = pps[PART_ID_NxN_TR][i] + pps[PART_ID_NxN_TL][i]; \
+ pps[PART_ID_2NxN_B][i] = pps[PART_ID_NxN_BR][i] + pps[PART_ID_NxN_BL][i]; \
+ \
+ pps[PART_ID_nLx2N_L][i] = (as[8] + as[0] + as[12] + as[4]); \
+ pps[PART_ID_nRx2N_R][i] = (as[3] + as[7] + as[15] + as[11]); \
+ pps[PART_ID_2NxnU_T][i] = (as[1] + as[0] + as[2] + as[3]); \
+ pps[PART_ID_2NxnD_B][i] = (as[15] + as[14] + as[12] + as[13]); \
+ \
+ pps[PART_ID_2Nx2N][i] = pps[PART_ID_2NxN_T][i] + pps[PART_ID_2NxN_B][i]; \
+ \
+ pps[PART_ID_2NxnU_B][i] = pps[PART_ID_2Nx2N][i] - pps[PART_ID_2NxnU_T][i]; \
+ pps[PART_ID_2NxnD_T][i] = pps[PART_ID_2Nx2N][i] - pps[PART_ID_2NxnD_B][i]; \
+ pps[PART_ID_nRx2N_L][i] = pps[PART_ID_2Nx2N][i] - pps[PART_ID_nRx2N_R][i]; \
+ pps[PART_ID_nLx2N_R][i] = pps[PART_ID_2Nx2N][i] - pps[PART_ID_nLx2N_L][i]; \
+ }
+
+#define COMBINE_SADS_2(ps, as) \
+ { \
+ ps[PART_ID_NxN_TL] = (as[0] + as[1] + as[4] + as[5]); \
+ ps[PART_ID_NxN_TR] = (as[2] + as[3] + as[6] + as[7]); \
+ ps[PART_ID_NxN_BL] = (as[8] + as[9] + as[12] + as[13]); \
+ ps[PART_ID_NxN_BR] = (as[10] + as[11] + as[14] + as[15]); \
+ \
+ ps[PART_ID_Nx2N_L] = ps[PART_ID_NxN_TL] + ps[PART_ID_NxN_BL]; \
+ ps[PART_ID_Nx2N_R] = ps[PART_ID_NxN_TR] + ps[PART_ID_NxN_BR]; \
+ ps[PART_ID_2NxN_T] = ps[PART_ID_NxN_TR] + ps[PART_ID_NxN_TL]; \
+ ps[PART_ID_2NxN_B] = ps[PART_ID_NxN_BR] + ps[PART_ID_NxN_BL]; \
+ \
+ ps[PART_ID_nLx2N_L] = (as[8] + as[0] + as[12] + as[4]); \
+ ps[PART_ID_nRx2N_R] = (as[3] + as[7] + as[15] + as[11]); \
+ ps[PART_ID_2NxnU_T] = (as[1] + as[0] + as[2] + as[3]); \
+ ps[PART_ID_2NxnD_B] = (as[15] + as[14] + as[12] + as[13]); \
+ \
+ ps[PART_ID_2Nx2N] = ps[PART_ID_2NxN_T] + ps[PART_ID_2NxN_B]; \
+ \
+ ps[PART_ID_2NxnU_B] = ps[PART_ID_2Nx2N] - ps[PART_ID_2NxnU_T]; \
+ ps[PART_ID_2NxnD_T] = ps[PART_ID_2Nx2N] - ps[PART_ID_2NxnD_B]; \
+ ps[PART_ID_nRx2N_L] = ps[PART_ID_2Nx2N] - ps[PART_ID_nRx2N_R]; \
+ ps[PART_ID_nLx2N_R] = ps[PART_ID_2Nx2N] - ps[PART_ID_nLx2N_L]; \
+ }
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+static void ihevce_sad4_2x2_neon(
+ UWORD8 *pu1_src, WORD32 src_strd, UWORD8 *pu1_pred, WORD32 pred_strd, UWORD32 *pu4_sad)
+{
+ uint16x8_t abs = vdupq_n_u16(0);
+ uint32x4_t sad;
+ WORD32 i;
+
+ /* -------- Compute four 2x2 SAD Transforms of 8x2 in one call--------- */
+ for(i = 0; i < 2; i++)
+ {
+ const uint8x8_t src = vld1_u8(pu1_src);
+ const uint8x8_t pred = vld1_u8(pu1_pred);
+
+ abs = vabal_u8(abs, src, pred);
+ pu1_src += src_strd;
+ pu1_pred += pred_strd;
+ }
+ sad = vpaddlq_u16(abs);
+ vst1q_u32(pu4_sad, sad);
+}
+
+static void ihevce_sad4_4x4_neon(
+ UWORD8 *pu1_src, WORD32 src_strd, UWORD8 *pu1_pred, WORD32 pred_strd, UWORD16 *pu2_sad)
+{
+ uint16x8_t abs_01 = vdupq_n_u16(0);
+ uint16x8_t abs_23 = vdupq_n_u16(0);
+ uint16x4_t tmp_a0, tmp_a1;
+ WORD32 i;
+
+ /* -------- Compute four 4x4 SAD Transforms of 16x4 in one call--------- */
+ for(i = 0; i < 4; i++)
+ {
+ const uint8x16_t src = vld1q_u8(pu1_src);
+ const uint8x16_t pred = vld1q_u8(pu1_pred);
+
+ abs_01 = vabal_u8(abs_01, vget_low_u8(src), vget_low_u8(pred));
+ abs_23 = vabal_u8(abs_23, vget_high_u8(src), vget_high_u8(pred));
+ pu1_src += src_strd;
+ pu1_pred += pred_strd;
+ }
+ tmp_a0 = vpadd_u16(vget_low_u16(abs_01), vget_high_u16(abs_01));
+ tmp_a1 = vpadd_u16(vget_low_u16(abs_23), vget_high_u16(abs_23));
+ abs_01 = vcombine_u16(tmp_a0, tmp_a1);
+ tmp_a0 = vpadd_u16(vget_low_u16(abs_01), vget_high_u16(abs_01));
+ vst1_u16(pu2_sad, tmp_a0);
+}
+
+static void ihevce_sad4_8x8_neon(
+ UWORD8 *pu1_src, WORD32 src_strd, UWORD8 *pu1_pred, WORD32 pred_strd, UWORD32 *pu4_sad)
+{
+ uint16x8_t abs_0 = vdupq_n_u16(0);
+ uint16x8_t abs_1 = vdupq_n_u16(0);
+ uint16x8_t abs_2 = vdupq_n_u16(0);
+ uint16x8_t abs_3 = vdupq_n_u16(0);
+ uint16x4_t tmp_a0, tmp_a1;
+ uint32x4_t sad;
+ WORD32 i;
+
+ /* -------- Compute four 8x8 SAD Transforms of 32x8 in one call--------- */
+ for(i = 0; i < 8; i++)
+ {
+ uint8x16_t src_01 = vld1q_u8(pu1_src);
+ uint8x16_t pred_01 = vld1q_u8(pu1_pred);
+ uint8x16_t src_23 = vld1q_u8(pu1_src + 16);
+ uint8x16_t pred_23 = vld1q_u8(pu1_pred + 16);
+
+ abs_0 = vabal_u8(abs_0, vget_low_u8(src_01), vget_low_u8(pred_01));
+ abs_1 = vabal_u8(abs_1, vget_high_u8(src_01), vget_high_u8(pred_01));
+ abs_2 = vabal_u8(abs_2, vget_low_u8(src_23), vget_low_u8(pred_23));
+ abs_3 = vabal_u8(abs_3, vget_high_u8(src_23), vget_high_u8(pred_23));
+ pu1_src += src_strd;
+ pu1_pred += pred_strd;
+ }
+ tmp_a0 = vpadd_u16(vget_low_u16(abs_0), vget_high_u16(abs_0));
+ tmp_a1 = vpadd_u16(vget_low_u16(abs_1), vget_high_u16(abs_1));
+ abs_0 = vcombine_u16(tmp_a0, tmp_a1);
+ tmp_a0 = vpadd_u16(vget_low_u16(abs_2), vget_high_u16(abs_2));
+ tmp_a1 = vpadd_u16(vget_low_u16(abs_3), vget_high_u16(abs_3));
+ abs_1 = vcombine_u16(tmp_a0, tmp_a1);
+ tmp_a0 = vpadd_u16(vget_low_u16(abs_0), vget_high_u16(abs_0));
+ tmp_a1 = vpadd_u16(vget_low_u16(abs_1), vget_high_u16(abs_1));
+ abs_0 = vcombine_u16(tmp_a0, tmp_a1);
+ sad = vpaddlq_u16(abs_0);
+ vst1q_u32(pu4_sad, sad);
+}
+
+static void ihevce_sad4_16x16_neon(
+ UWORD8 *pu1_src, WORD32 src_strd, UWORD8 *pu1_pred, WORD32 pred_strd, UWORD32 *pu4_sad)
+{
+ WORD32 i;
+
+ /* ------ Compute four 16x16 SAD Transforms of 64x16 in one call-------- */
+ for(i = 0; i < 4; i++)
+ {
+ pu4_sad[i] = ihevce_nxn_sad_computer_neon(
+ pu1_src + (i * 16), src_strd, pu1_pred + (i * 16), pred_strd, 16);
+ }
+}
+
+void compute_part_sads_for_MxM_blk_neon(
+ grid_ctxt_t *ps_grid,
+ UWORD8 *pu1_cur_ptr,
+ WORD32 cur_buf_stride,
+ WORD32 **pp_part_sads,
+ cand_t *ps_cand,
+ WORD32 *num_cands,
+ CU_SIZE_T e_cu_size)
+{
+ WORD16 grd_sz_y = (ps_grid->grd_sz_y_x & 0xFFFF0000) >> 16;
+ WORD16 grd_sz_x = (ps_grid->grd_sz_y_x & 0xFFFF);
+
+ /* Assumes the following order: C, L, T, R, B, TL, TR, BL, BR */
+ WORD32 offset_x[NUM_CANDIDATES_IN_GRID] = { 0, -grd_sz_x, 0, grd_sz_x, 0,
+ -grd_sz_x, grd_sz_x, -grd_sz_x, grd_sz_x };
+ WORD32 offset_y[NUM_CANDIDATES_IN_GRID] = { 0, 0, -grd_sz_y, 0, grd_sz_y,
+ -grd_sz_y, -grd_sz_y, grd_sz_y, grd_sz_y };
+ WORD32 shift = (WORD32)e_cu_size;
+
+ WORD32 ref_buf_stride = ps_grid->ref_buf_stride;
+ WORD32 cur_buf_stride_lsN = (cur_buf_stride << (1 + shift));
+ WORD32 ref_buf_stride_lsN = (ref_buf_stride << (1 + shift));
+
+ cand_t *cand0 = ps_cand;
+
+ ft_calc_sad4_nxn *calc_sad4 = NULL;
+
+ /* for a 2Nx2N partition we evaluate (N/2)x(N/2) SADs. This is needed for
+ * AMP cases */
+ UWORD32 au4_nxn_sad[16];
+
+ WORD32 i, j;
+
+ *num_cands = 0;
+
+ /* Loop to fill up the cand_t array and to calculate num_cands */
+ for(i = 0; i < ps_grid->num_grids; i++)
+ {
+ WORD32 j;
+ WORD32 mask = ps_grid->pi4_grd_mask[i];
+ UWORD8 *pu1_ref_ptr_center = ps_grid->ppu1_ref_ptr[i];
+ WORD32 mv_x = ps_grid->p_mv[i].i2_mv_x;
+ WORD32 mv_y = (ps_grid->p_mv[i].i2_mv_y);
+
+ for(j = 0; j < NUM_CANDIDATES_IN_GRID; j++, mask >>= 1)
+ {
+ if(mask & 1)
+ {
+ *num_cands = *num_cands + 1;
+ cand0->grid_ix = i;
+ cand0->ref_idx = ps_grid->p_ref_idx[i];
+ cand0->pu1_ref_ptr =
+ pu1_ref_ptr_center + offset_x[j] + ref_buf_stride * offset_y[j];
+ cand0->mv.i2_mv_x = (S16)(mv_x) + offset_x[j];
+ cand0->mv.i2_mv_y = (S16)(mv_y) + offset_y[j];
+ cand0++;
+ }
+ }
+ }
+
+ /* fn selector */
+ if(e_cu_size == CU_8x8)
+ calc_sad4 = ihevce_sad4_2x2_neon;
+ else if(e_cu_size == CU_32x32)
+ calc_sad4 = ihevce_sad4_8x8_neon;
+ else if(e_cu_size == CU_64x64)
+ calc_sad4 = ihevce_sad4_16x16_neon;
+
+ /* Loop to compute the SAD's */
+ for(i = 0; i < *num_cands; i++)
+ {
+ cand_t *cand = ps_cand + i;
+
+ for(j = 0; j < 4; j++)
+ (*calc_sad4)(
+ pu1_cur_ptr + j * cur_buf_stride_lsN,
+ cur_buf_stride,
+ cand->pu1_ref_ptr + j * ref_buf_stride_lsN,
+ ref_buf_stride,
+ &au4_nxn_sad[4 * j]);
+
+ COMBINE_SADS(pp_part_sads, au4_nxn_sad, i);
+ }
+}
+
+void compute_4x4_sads_for_16x16_blk_neon(
+ grid_ctxt_t *ps_grid,
+ UWORD8 *pu1_cur_ptr,
+ WORD32 cur_buf_stride,
+ UWORD16 **pp_part_sads,
+ cand_t *ps_cand,
+ WORD32 *num_cands)
+{
+ WORD16 grd_sz_y = (ps_grid->grd_sz_y_x & 0xFFFF0000) >> 16;
+ WORD16 grd_sz_x = (ps_grid->grd_sz_y_x & 0xFFFF);
+
+ /* Assumes the following order: C, L, T, R, B, TL, TR, BL, BR */
+ WORD32 offset_x[NUM_CANDIDATES_IN_GRID] = { 0, -grd_sz_x, 0, grd_sz_x, 0,
+ -grd_sz_x, grd_sz_x, -grd_sz_x, grd_sz_x };
+ WORD32 offset_y[NUM_CANDIDATES_IN_GRID] = { 0, 0, -grd_sz_y, 0, grd_sz_y,
+ -grd_sz_y, -grd_sz_y, grd_sz_y, grd_sz_y };
+
+ WORD32 ref_buf_stride = ps_grid->ref_buf_stride;
+ WORD32 cur_buf_stride_ls2 = (cur_buf_stride << 2);
+ WORD32 ref_buf_stride_ls2 = (ref_buf_stride << 2);
+
+ cand_t *cand0 = ps_cand;
+
+ /* for a 2Nx2N partition we evaluate (N/2)x(N/2) SADs. This is needed for
+ * AMP cases */
+ UWORD16 au2_4x4_sad[16];
+
+ WORD32 i, j;
+
+ *num_cands = 0;
+
+ /* Loop to fill up the cand_t array and to calculate num_cands */
+ for(i = 0; i < ps_grid->num_grids; i++)
+ {
+ WORD32 j;
+ WORD32 mask = ps_grid->pi4_grd_mask[i];
+ UWORD8 *pu1_ref_ptr_center = ps_grid->ppu1_ref_ptr[i];
+ WORD32 mv_x = ps_grid->p_mv[i].i2_mv_x;
+ WORD32 mv_y = (ps_grid->p_mv[i].i2_mv_y);
+
+ for(j = 0; j < NUM_CANDIDATES_IN_GRID; j++, mask >>= 1)
+ {
+ if(mask & 1)
+ {
+ *num_cands = *num_cands + 1;
+ cand0->grid_ix = i;
+ cand0->ref_idx = ps_grid->p_ref_idx[i];
+ cand0->pu1_ref_ptr =
+ pu1_ref_ptr_center + offset_x[j] + ref_buf_stride * offset_y[j];
+ cand0->mv.i2_mv_x = (S16)(mv_x) + offset_x[j];
+ cand0->mv.i2_mv_y = (S16)(mv_y) + offset_y[j];
+ cand0++;
+ }
+ }
+ }
+
+ /* Loop to compute the SAD's */
+ for(i = 0; i < *num_cands; i++)
+ {
+ cand_t *cand = ps_cand + i;
+
+ for(j = 0; j < 4; j++)
+ ihevce_sad4_4x4_neon(
+ pu1_cur_ptr + j * cur_buf_stride_ls2,
+ cur_buf_stride,
+ cand->pu1_ref_ptr + j * ref_buf_stride_ls2,
+ ref_buf_stride,
+ &au2_4x4_sad[4 * j]);
+
+ COMBINE_SADS(pp_part_sads, au2_4x4_sad, i);
+ }
+}
+
+void hme_evalsad_grid_npu_MxN_neon(err_prms_t *ps_prms)
+{
+ S32 *pi4_sad = ps_prms->pi4_sad_grid;
+ S32 i, grid_count = 0;
+ S32 x_off = ps_prms->i4_step;
+ S32 y_off = ps_prms->i4_step * ps_prms->i4_ref_stride;
+
+ assert((ps_prms->i4_part_mask & (ps_prms->i4_part_mask - 1)) == 0);
+
+ for(i = 0; i < 9; i++)
+ {
+ if(ps_prms->i4_grid_mask & (1 << i))
+ grid_count++;
+ }
+ pi4_sad += (ps_prms->pi4_valid_part_ids[0] * grid_count);
+
+ for(i = 0; i < 9; i++)
+ {
+ U08 *pu1_inp = ps_prms->pu1_inp;
+ U08 *pu1_ref = ps_prms->pu1_ref;
+
+ if(!(ps_prms->i4_grid_mask & (1 << i)))
+ continue;
+
+ pu1_ref += x_off * gai1_grid_id_to_x[i];
+ pu1_ref += y_off * gai1_grid_id_to_y[i];
+ *pi4_sad++ = ihevce_4mx4n_sad_computer_neon(
+ pu1_inp,
+ pu1_ref,
+ ps_prms->i4_inp_stride,
+ ps_prms->i4_ref_stride,
+ ps_prms->i4_blk_wd,
+ ps_prms->i4_blk_ht);
+ }
+}
+
+void hme_evalsad_pt_npu_MxN_8bit_neon(err_prms_t *ps_prms)
+{
+ ps_prms->pi4_sad_grid[0] = ihevce_4mx4n_sad_computer_neon(
+ ps_prms->pu1_inp,
+ ps_prms->pu1_ref,
+ ps_prms->i4_inp_stride,
+ ps_prms->i4_ref_stride,
+ ps_prms->i4_blk_wd,
+ ps_prms->i4_blk_ht);
+}
+
+void hme_calc_sad_and_1_best_result_neon(
+ hme_search_prms_t *ps_search_prms,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ err_prms_t *ps_err_prms,
+ result_upd_prms_t *ps_result_prms,
+ U08 **ppu1_ref,
+ S32 i4_ref_stride)
+{
+ mv_refine_ctxt_t *refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt;
+ search_node_t *ps_search_node = ps_search_prms->ps_search_nodes;
+ S32 i4_num_nodes = ps_search_prms->i4_num_search_nodes;
+ S32 *pi4_sad_grid = ps_err_prms->pi4_sad_grid;
+ S32 cur_buf_stride = ps_err_prms->i4_inp_stride;
+ S32 ref_buf_stride = ps_err_prms->i4_ref_stride;
+ S32 cur_buf_stride_ls2 = (cur_buf_stride << 2);
+ S32 ref_buf_stride_ls2 = (ref_buf_stride << 2);
+ S32 i4_inp_off, i4_ref_off;
+ S32 i;
+
+ i4_inp_off = ps_search_prms->i4_cu_x_off;
+ i4_inp_off += (ps_search_prms->i4_cu_y_off * cur_buf_stride);
+ i4_ref_off = ps_search_prms->i4_x_off;
+ i4_ref_off += (ps_search_prms->i4_y_off * i4_ref_stride);
+
+ /* Run through each of the candts in a loop */
+ for(i = 0; i < i4_num_nodes; i++)
+ {
+ U16 au2_4x4_sad[16];
+ S32 i4_mv_cost;
+ S32 j;
+
+ if(ps_search_node->s_mv.i2_mvx == INTRA_MV)
+ {
+ continue;
+ }
+
+ ps_err_prms->pu1_inp = ps_wt_inp_prms->apu1_wt_inp[ps_search_node->i1_ref_idx] + i4_inp_off;
+ ps_err_prms->pu1_ref = ppu1_ref[ps_search_node->i1_ref_idx] + i4_ref_off;
+ ps_err_prms->pu1_ref += ps_search_node->s_mv.i2_mvx;
+ ps_err_prms->pu1_ref += (ps_search_node->s_mv.i2_mvy * i4_ref_stride);
+
+ /* Loop to compute the SAD's */
+ for(j = 0; j < 4; j++)
+ {
+ UWORD8 *pu1_curr = ps_err_prms->pu1_inp;
+ UWORD8 *pu1_ref = ps_err_prms->pu1_ref;
+
+ ihevce_sad4_4x4_neon(
+ pu1_curr + j * cur_buf_stride_ls2,
+ cur_buf_stride,
+ pu1_ref + j * ref_buf_stride_ls2,
+ ref_buf_stride,
+ &au2_4x4_sad[4 * j]);
+ }
+
+ COMBINE_SADS_2(pi4_sad_grid, au2_4x4_sad);
+
+ // calculate MV cost
+ {
+ S16 mvdx1, mvdy1;
+ S32 i4_ref_idx = ps_result_prms->i1_ref_idx;
+ search_results_t *ps_srch_rslts = ps_result_prms->ps_search_results;
+
+ pred_ctxt_t *ps_pred_ctxt = &ps_srch_rslts->as_pred_ctxt[i4_ref_idx];
+ pred_candt_nodes_t *ps_pred_nodes = &ps_pred_ctxt->as_pred_nodes[PART_2Nx2N];
+ search_node_t *ps_mvp_node = ps_pred_nodes->ps_mvp_node;
+
+ S32 inp_shift = 2;
+ S32 pred_shift = ps_mvp_node->u1_subpel_done ? 0 : 2;
+ S32 lambda_q_shift = ps_pred_ctxt->lambda_q_shift;
+ S32 lambda = ps_pred_ctxt->lambda;
+ S32 rnd = 1 << (lambda_q_shift - 1);
+ S32 mv_p_x = ps_mvp_node->s_mv.i2_mvx;
+ S32 mv_p_y = ps_mvp_node->s_mv.i2_mvy;
+ S32 ref_bits =
+ ps_pred_ctxt->ppu1_ref_bits_tlu[ps_pred_ctxt->pred_lx][ps_search_node->i1_ref_idx];
+
+ COMPUTE_DIFF_MV(mvdx1, mvdy1, ps_search_node, mv_p_x, mv_p_y, inp_shift, pred_shift);
+
+ mvdx1 = ABS(mvdx1);
+ mvdy1 = ABS(mvdy1);
+
+ i4_mv_cost = hme_get_range(mvdx1) + hme_get_range(mvdy1) + (mvdx1 > 0) + (mvdy1 > 0) +
+ ref_bits + 2;
+
+ i4_mv_cost *= lambda;
+ i4_mv_cost += rnd;
+ i4_mv_cost >>= lambda_q_shift;
+
+ i4_mv_cost = CLIP_U16(i4_mv_cost);
+ }
+
+ {
+ S32 i4_sad, i4_tot_cost;
+ S32 *pi4_valid_part_ids = &refine_ctxt->ai4_part_id[0];
+ S32 best_node_cost;
+
+ /* For each valid partition, update the refine_prm structure to
+ * reflect the best and second best candidates for that partition */
+ for(j = 0; j < refine_ctxt->i4_num_valid_parts; j++)
+ {
+ S32 part_id = pi4_valid_part_ids[j];
+ S32 id = (refine_ctxt->i4_num_valid_parts > 8) ? part_id : j;
+
+ i4_sad = CLIP3(pi4_sad_grid[part_id], 0, 0x7fff);
+ i4_tot_cost = CLIP_S16(i4_sad + i4_mv_cost);
+
+ best_node_cost = CLIP_S16(refine_ctxt->i2_tot_cost[0][id]);
+
+ if(i4_tot_cost < best_node_cost)
+ {
+ refine_ctxt->i2_tot_cost[0][id] = i4_tot_cost;
+ refine_ctxt->i2_mv_cost[0][id] = i4_mv_cost;
+ refine_ctxt->i2_mv_x[0][id] = ps_search_node->s_mv.i2_mvx;
+ refine_ctxt->i2_mv_y[0][id] = ps_search_node->s_mv.i2_mvy;
+ refine_ctxt->i2_ref_idx[0][id] = ps_search_node->i1_ref_idx;
+ }
+ }
+ }
+
+ ps_search_node++;
+ }
+
+ ps_search_node = ps_search_prms->ps_search_nodes;
+
+ for(i = 0; i < refine_ctxt->i4_num_valid_parts; i++)
+ {
+ S32 part_id = refine_ctxt->ai4_part_id[i];
+
+ if(refine_ctxt->i2_tot_cost[0][part_id] >= MAX_SIGNED_16BIT_VAL)
+ {
+ assert(refine_ctxt->i2_mv_cost[0][part_id] == MAX_SIGNED_16BIT_VAL);
+ assert(refine_ctxt->i2_mv_x[0][part_id] == 0);
+ assert(refine_ctxt->i2_mv_y[0][part_id] == 0);
+
+ refine_ctxt->i2_ref_idx[0][part_id] = ps_search_node->i1_ref_idx;
+ }
+ if(refine_ctxt->i2_tot_cost[1][part_id] >= MAX_SIGNED_16BIT_VAL)
+ {
+ assert(refine_ctxt->i2_mv_cost[1][part_id] == MAX_SIGNED_16BIT_VAL);
+ assert(refine_ctxt->i2_mv_x[1][part_id] == 0);
+ assert(refine_ctxt->i2_mv_y[1][part_id] == 0);
+
+ refine_ctxt->i2_ref_idx[1][part_id] = ps_search_node->i1_ref_idx;
+ }
+ }
+}
+
+void hme_calc_sad_and_1_best_result_subpel_neon(
+ err_prms_t *ps_err_prms, result_upd_prms_t *ps_result_prms)
+{
+ mv_refine_ctxt_t *refine_ctxt = ps_result_prms->ps_subpel_refine_ctxt;
+ S32 *pi4_sad_grid = ps_err_prms->pi4_sad_grid;
+ S32 *pi4_valid_part_ids = &refine_ctxt->ai4_part_id[0];
+ S32 cur_buf_stride = ps_err_prms->i4_inp_stride;
+ S32 ref_buf_stride = ps_err_prms->i4_ref_stride;
+ S32 cur_buf_stride_ls2 = (cur_buf_stride << 2);
+ S32 ref_buf_stride_ls2 = (ref_buf_stride << 2);
+ U16 au2_4x4_sad[16];
+ S32 i;
+
+ /* Loop to compute the SAD's */
+ for(i = 0; i < 4; i++)
+ {
+ UWORD8 *pu1_curr = ps_err_prms->pu1_inp;
+ UWORD8 *pu1_ref = ps_err_prms->pu1_ref;
+
+ ihevce_sad4_4x4_neon(
+ pu1_curr + i * cur_buf_stride_ls2,
+ cur_buf_stride,
+ pu1_ref + i * ref_buf_stride_ls2,
+ ref_buf_stride,
+ &au2_4x4_sad[4 * i]);
+ }
+
+ COMBINE_SADS_2(pi4_sad_grid, au2_4x4_sad);
+
+ /* For each valid partition, update the refine_prm structure to
+ * reflect the best and second best candidates for that partition */
+ for(i = 0; i < refine_ctxt->i4_num_valid_parts; i++)
+ {
+ S32 part_id = pi4_valid_part_ids[i];
+ S32 id = (refine_ctxt->i4_num_valid_parts > 8) ? part_id : i;
+ S32 i4_mv_cost = refine_ctxt->i2_mv_cost[0][id];
+ S32 i4_sad = CLIP3(pi4_sad_grid[part_id], 0, 0x7fff);
+ S32 i4_tot_cost = CLIP_S16(i4_sad + i4_mv_cost);
+ S32 best_node_cost = CLIP_S16(refine_ctxt->i2_tot_cost[0][id]);
+
+ if(i4_tot_cost < best_node_cost)
+ {
+ refine_ctxt->i2_tot_cost[0][id] = i4_tot_cost;
+ refine_ctxt->i2_mv_cost[0][id] = i4_mv_cost;
+ refine_ctxt->i2_mv_x[0][id] = ps_result_prms->i2_mv_x;
+ refine_ctxt->i2_mv_y[0][id] = ps_result_prms->i2_mv_y;
+ refine_ctxt->i2_ref_idx[0][id] = ps_result_prms->i1_ref_idx;
+ }
+ }
+
+ for(i = 0; i < TOT_NUM_PARTS; i++)
+ {
+ if(refine_ctxt->i2_tot_cost[0][i] >= MAX_SIGNED_16BIT_VAL)
+ {
+ refine_ctxt->ai2_fullpel_satd[0][i] = MAX_SIGNED_16BIT_VAL;
+ }
+ }
+}
diff --git a/encoder/arm/ihevce_sad_compute_neon.c b/encoder/arm/ihevce_sad_compute_neon.c
new file mode 100644
index 0000000..94da3fc
--- /dev/null
+++ b/encoder/arm/ihevce_sad_compute_neon.c
@@ -0,0 +1,318 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevce_sad_compute_neon.c
+*
+* @brief
+* Contains definitions of functions to compute sad
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+********************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <string.h>
+#include <assert.h>
+#include <arm_neon.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "ihevc_macros.h"
+#include "itt_video_api.h"
+#include "ihevc_cmn_utils_neon.h"
+#include "ihevce_ipe_instr_set_router.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+UWORD16 ihevce_4x4_sad_computer_neon(
+ UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 src_strd, WORD32 pred_strd)
+{
+ const uint8x16_t src_u8 = load_unaligned_u8q(pu1_src, src_strd);
+ const uint8x16_t ref_u8 = load_unaligned_u8q(pu1_pred, pred_strd);
+ uint16x8_t abs = vabdl_u8(vget_low_u8(src_u8), vget_low_u8(ref_u8));
+ uint32x4_t b;
+ uint64x2_t c;
+
+ abs = vabal_u8(abs, vget_high_u8(src_u8), vget_high_u8(ref_u8));
+ b = vpaddlq_u16(abs);
+ c = vpaddlq_u32(b);
+ return vget_lane_u32(
+ vadd_u32(vreinterpret_u32_u64(vget_low_u64(c)), vreinterpret_u32_u64(vget_high_u64(c))), 0);
+}
+
+static UWORD16 ihevce_8xn_sad_computer_neon(
+ UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 src_strd, WORD32 pred_strd, WORD32 ht)
+{
+ uint16x8_t abs = vdupq_n_u16(0);
+ uint32x4_t tmp_a;
+ uint64x2_t tmp_b;
+ uint32x2_t sad;
+ WORD32 i;
+
+ assert(ht <= 8);
+
+ for(i = 0; i < ht; i++)
+ {
+ const uint8x8_t src = vld1_u8(pu1_src);
+ const uint8x8_t pred = vld1_u8(pu1_pred);
+
+ abs = vabal_u8(abs, src, pred);
+ pu1_src += src_strd;
+ pu1_pred += pred_strd;
+ }
+ tmp_a = vpaddlq_u16(abs);
+ tmp_b = vpaddlq_u32(tmp_a);
+ sad = vadd_u32(
+ vreinterpret_u32_u64(vget_low_u64(tmp_b)), vreinterpret_u32_u64(vget_high_u64(tmp_b)));
+ return vget_lane_u32(sad, 0);
+}
+
+static UWORD32 ihevce_16xn_sad_computer_neon(
+ UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 src_strd, WORD32 pred_strd, WORD32 ht)
+{
+ uint16x8_t abs_0 = vdupq_n_u16(0);
+ uint16x8_t abs_1 = vdupq_n_u16(0);
+ uint32x4_t tmp_a;
+ uint64x2_t tmp_b;
+ uint32x2_t sad;
+ WORD32 i;
+
+ assert(ht <= 16);
+
+ for(i = 0; i < ht; i++)
+ {
+ const uint8x16_t src = vld1q_u8(pu1_src);
+ const uint8x16_t pred = vld1q_u8(pu1_pred);
+
+ abs_0 = vabal_u8(abs_0, vget_low_u8(src), vget_low_u8(pred));
+ abs_1 = vabal_u8(abs_1, vget_high_u8(src), vget_high_u8(pred));
+ pu1_src += src_strd;
+ pu1_pred += pred_strd;
+ }
+ tmp_a = vpaddlq_u16(abs_0);
+ tmp_a = vpadalq_u16(tmp_a, abs_1);
+ tmp_b = vpaddlq_u32(tmp_a);
+ sad = vadd_u32(
+ vreinterpret_u32_u64(vget_low_u64(tmp_b)), vreinterpret_u32_u64(vget_high_u64(tmp_b)));
+ return vget_lane_u32(sad, 0);
+}
+
+static UWORD32 ihevce_32xn_sad_computer_neon(
+ UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 src_strd, WORD32 pred_strd, WORD32 ht)
+{
+ uint16x8_t abs_0 = vdupq_n_u16(0);
+ uint16x8_t abs_1 = vdupq_n_u16(0);
+ uint32x4_t tmp_a;
+ uint64x2_t tmp_b;
+ uint32x2_t sad;
+ WORD32 i;
+
+ assert(ht <= 32);
+
+ for(i = 0; i < ht; i++)
+ {
+ const uint8x16_t src_0 = vld1q_u8(pu1_src);
+ const uint8x16_t pred_0 = vld1q_u8(pu1_pred);
+ const uint8x16_t src_1 = vld1q_u8(pu1_src + 16);
+ const uint8x16_t pred_1 = vld1q_u8(pu1_pred + 16);
+
+ abs_0 = vabal_u8(abs_0, vget_low_u8(src_0), vget_low_u8(pred_0));
+ abs_0 = vabal_u8(abs_0, vget_high_u8(src_0), vget_high_u8(pred_0));
+ abs_1 = vabal_u8(abs_1, vget_low_u8(src_1), vget_low_u8(pred_1));
+ abs_1 = vabal_u8(abs_1, vget_high_u8(src_1), vget_high_u8(pred_1));
+ pu1_src += src_strd;
+ pu1_pred += pred_strd;
+ }
+ tmp_a = vpaddlq_u16(abs_0);
+ tmp_a = vpadalq_u16(tmp_a, abs_1);
+ tmp_b = vpaddlq_u32(tmp_a);
+ sad = vadd_u32(
+ vreinterpret_u32_u64(vget_low_u64(tmp_b)), vreinterpret_u32_u64(vget_high_u64(tmp_b)));
+ return vget_lane_u32(sad, 0);
+}
+
+static UWORD32 ihevce_64xn_sad_computer_neon(
+ UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 src_strd, WORD32 pred_strd, WORD32 ht)
+{
+ uint16x8_t abs_0 = vdupq_n_u16(0);
+ uint16x8_t abs_1 = vdupq_n_u16(0);
+ uint32x4_t tmp_a;
+ uint64x2_t tmp_b;
+ uint32x2_t sad;
+ WORD32 i;
+
+ assert(ht <= 64);
+
+ for(i = 0; i < ht; i++)
+ {
+ const uint8x16_t src_0 = vld1q_u8(pu1_src);
+ const uint8x16_t pred_0 = vld1q_u8(pu1_pred);
+ const uint8x16_t src_1 = vld1q_u8(pu1_src + 16);
+ const uint8x16_t pred_1 = vld1q_u8(pu1_pred + 16);
+ const uint8x16_t src_2 = vld1q_u8(pu1_src + 32);
+ const uint8x16_t pred_2 = vld1q_u8(pu1_pred + 32);
+ const uint8x16_t src_3 = vld1q_u8(pu1_src + 48);
+ const uint8x16_t pred_3 = vld1q_u8(pu1_pred + 48);
+
+ abs_0 = vabal_u8(abs_0, vget_low_u8(src_0), vget_low_u8(pred_0));
+ abs_0 = vabal_u8(abs_0, vget_high_u8(src_0), vget_high_u8(pred_0));
+ abs_0 = vabal_u8(abs_0, vget_low_u8(src_1), vget_low_u8(pred_1));
+ abs_0 = vabal_u8(abs_0, vget_high_u8(src_1), vget_high_u8(pred_1));
+ abs_1 = vabal_u8(abs_1, vget_low_u8(src_2), vget_low_u8(pred_2));
+ abs_1 = vabal_u8(abs_1, vget_high_u8(src_2), vget_high_u8(pred_2));
+ abs_1 = vabal_u8(abs_1, vget_low_u8(src_3), vget_low_u8(pred_3));
+ abs_1 = vabal_u8(abs_1, vget_high_u8(src_3), vget_high_u8(pred_3));
+ pu1_src += src_strd;
+ pu1_pred += pred_strd;
+ }
+ tmp_a = vpaddlq_u16(abs_0);
+ tmp_a = vpadalq_u16(tmp_a, abs_1);
+ tmp_b = vpaddlq_u32(tmp_a);
+ sad = vadd_u32(
+ vreinterpret_u32_u64(vget_low_u64(tmp_b)), vreinterpret_u32_u64(vget_high_u64(tmp_b)));
+ return vget_lane_u32(sad, 0);
+}
+
+UWORD32 ihevce_4mx4n_sad_computer_neon(
+ UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 blk_wd,
+ WORD32 blk_ht)
+{
+ WORD32 sad = 0;
+ WORD32 i, j;
+
+ assert(blk_wd % 4 == 0);
+ assert(blk_ht % 4 == 0);
+
+ if(((blk_wd & (blk_wd - 1)) == 0) && (blk_wd <= 64))
+ {
+ // blk_wd { 4, 8, 16, 32, 64 }
+ for(i = 0; i < blk_ht;)
+ {
+ WORD32 ht = MIN(blk_wd, blk_ht - i);
+
+ switch(blk_wd)
+ {
+ case 4:
+ sad += ihevce_4x4_sad_computer_neon(pu1_src, pu1_pred, src_strd, pred_strd);
+ break;
+ case 8:
+ sad += ihevce_8xn_sad_computer_neon(pu1_src, pu1_pred, src_strd, pred_strd, ht);
+ break;
+ case 16:
+ sad += ihevce_16xn_sad_computer_neon(pu1_src, pu1_pred, src_strd, pred_strd, ht);
+ break;
+ case 32:
+ sad += ihevce_32xn_sad_computer_neon(pu1_src, pu1_pred, src_strd, pred_strd, ht);
+ break;
+ case 64:
+ sad += ihevce_64xn_sad_computer_neon(pu1_src, pu1_pred, src_strd, pred_strd, ht);
+ break;
+ default:
+ // should not be here
+ return -1;
+ }
+ i += ht;
+ pu1_src += (ht * src_strd);
+ pu1_pred += (ht * pred_strd);
+ }
+ }
+ else
+ {
+ // Generic Case
+ for(i = 0; i < blk_ht; i += 4)
+ {
+ for(j = 0; j < blk_wd;)
+ {
+ WORD32 wd = blk_wd - j;
+
+ if(wd >= 32)
+ {
+ sad += ihevce_32xn_sad_computer_neon(
+ pu1_src + j, pu1_pred + j, src_strd, pred_strd, 4);
+ j += 32;
+ }
+ else if(wd >= 16)
+ {
+ sad += ihevce_16xn_sad_computer_neon(
+ pu1_src + j, pu1_pred + j, src_strd, pred_strd, 4);
+ j += 16;
+ }
+ else if(wd >= 8)
+ {
+ sad += ihevce_8xn_sad_computer_neon(
+ pu1_src + j, pu1_pred + j, src_strd, pred_strd, 4);
+ j += 8;
+ }
+ else
+ {
+ sad += ihevce_4x4_sad_computer_neon(
+ pu1_src + j, pu1_pred + j, src_strd, pred_strd);
+ j += 4;
+ }
+ }
+ pu1_src += (4 * src_strd);
+ pu1_pred += (4 * pred_strd);
+ }
+ }
+ return sad;
+}
+
+UWORD16 ihevce_8x8_sad_computer_neon(
+ UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 src_strd, WORD32 pred_strd)
+{
+ return ihevce_8xn_sad_computer_neon(pu1_src, pu1_pred, src_strd, pred_strd, 8);
+}
+
+WORD32 ihevce_nxn_sad_computer_neon(
+ UWORD8 *pu1_src, WORD32 src_strd, UWORD8 *pu1_pred, WORD32 pred_strd, WORD32 trans_size)
+{
+ switch(trans_size)
+ {
+ case 4:
+ return ihevce_4x4_sad_computer_neon(pu1_src, pu1_pred, src_strd, pred_strd);
+ case 8:
+ return ihevce_8xn_sad_computer_neon(pu1_src, pu1_pred, src_strd, pred_strd, 8);
+ case 16:
+ return ihevce_16xn_sad_computer_neon(pu1_src, pu1_pred, src_strd, pred_strd, 16);
+ case 32:
+ return ihevce_32xn_sad_computer_neon(pu1_src, pu1_pred, src_strd, pred_strd, 32);
+ case 64:
+ return ihevce_64xn_sad_computer_neon(pu1_src, pu1_pred, src_strd, pred_strd, 64);
+ default:
+ // should not be here
+ return -1;
+ }
+}
diff --git a/encoder/arm/ihevce_scale_by_2_neon.c b/encoder/arm/ihevce_scale_by_2_neon.c
new file mode 100644
index 0000000..55e87fd
--- /dev/null
+++ b/encoder/arm/ihevce_scale_by_2_neon.c
@@ -0,0 +1,251 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevce_scale_by_2_neon.c
+*
+* @brief
+* Contains definitions of functions for scale by 2
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+********************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <arm_neon.h>
+
+/* System user files */
+#include "ihevc_typedefs.h"
+#include "ihevc_macros.h"
+#include "itt_video_api.h"
+#include "ihevce_ipe_instr_set_router.h"
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define FILT_TAP_Q 7
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+static void ihevce_horz_scale_neon_w16(
+ UWORD8 *pu1_src, WORD32 src_strd, UWORD8 *pu1_dst, WORD32 dst_strd, WORD32 wd, WORD32 ht)
+{
+ const int16x8_t prec = vdupq_n_s16(8192);
+ const int16x8_t inv_prec = vdupq_n_s16(64);
+ const uint8x8_t wt_0 = vdup_n_u8(66);
+ const int8_t wt_1 = 40;
+ const int8_t wt_2 = 9;
+ WORD32 i, j;
+
+ for(j = 0; j < ht; j++)
+ {
+ UWORD8 *pu1_src_tmp = pu1_src + j * src_strd - 3;
+ UWORD8 *pu1_dst_tmp = pu1_dst + j * dst_strd;
+
+ for(i = 0; i < wd;)
+ {
+ uint8x16x2_t src = vld2q_u8(pu1_src_tmp);
+ uint8x8_t c, l0, r0, r3;
+ int16x8_t p, q, r;
+ int16x8_t sum;
+
+ c = vext_u8(vget_low_u8(src.val[1]), vget_high_u8(src.val[1]), 1);
+ l0 = vext_u8(vget_low_u8(src.val[0]), vget_high_u8(src.val[0]), 1);
+ r0 = vext_u8(vget_low_u8(src.val[0]), vget_high_u8(src.val[0]), 2);
+ r3 = vext_u8(vget_low_u8(src.val[0]), vget_high_u8(src.val[0]), 3);
+
+ p = vreinterpretq_s16_u16(vmull_u8(c, wt_0)); // a[0] * 66
+ q = vreinterpretq_s16_u16(vaddl_u8(l0, r0));
+ q = vmulq_n_s16(q, wt_1); // (a[-1] + a[1]) * 40
+ r = vreinterpretq_s16_u16(vaddl_u8(r3, vget_low_u8(src.val[0])));
+ r = vmulq_n_s16(r, wt_2); // (a[-3] + a[3]) * 9
+
+ // a[0] * 66 + (a[-1] + a[1]) * 40 - (a[-3] + a[3]) * 9
+ sum = vsubq_s16(p, prec);
+ q = vsubq_s16(q, r);
+ sum = vaddq_s16(q, sum);
+ sum = vrshrq_n_s16(sum, FILT_TAP_Q);
+ sum = vaddq_s16(sum, inv_prec);
+
+ // result
+ c = vqmovun_s16(sum);
+ vst1_u8(pu1_dst_tmp, c);
+
+ i += 16;
+ pu1_src_tmp += 16;
+ pu1_dst_tmp += 8;
+ }
+ }
+}
+
+static void ihevce_vert_scale_neon_w16(
+ UWORD8 *pu1_src, WORD32 src_strd, UWORD8 *pu1_dst, WORD32 dst_strd, WORD32 wd, WORD32 ht)
+{
+ const int16x8_t prec = vdupq_n_s16(8192);
+ const int16x8_t inv_prec = vdupq_n_s16(64);
+ const uint8x8_t wt_0 = vdup_n_u8(66);
+ const int8_t wt_1 = 40;
+ const int8_t wt_2 = 9;
+ WORD32 i, j;
+
+#define LOAD_ROW() \
+ { \
+ src[mod8] = vld1q_u8(pu1_src_tmp); \
+ pu1_src_tmp += src_strd; \
+ mod8++; \
+ mod8 &= 7; \
+ }
+
+ for(i = 0; i < wd; i += 16)
+ {
+ UWORD8 *pu1_src_tmp = pu1_src - 3 * src_strd + i;
+ WORD32 lut_id = 0;
+ UWORD8 mod8 = 0;
+ uint8x16_t src[8];
+
+ LOAD_ROW() // r[-3]
+ LOAD_ROW() // r[-2]
+ LOAD_ROW() // r[-1]
+ LOAD_ROW() // r[0]
+ LOAD_ROW() // r[1]
+
+ for(j = 0; j < ht; j += 2)
+ {
+ UWORD8 *pu1_dst_tmp = pu1_dst + (j >> 1) * dst_strd + i;
+ UWORD8 c, t1, b1, t2, b2;
+ int16x8_t p, q, r;
+ int16x8_t sum;
+
+ LOAD_ROW() // r[2]
+ LOAD_ROW() // r[3]
+
+ t2 = (lut_id & 7);
+ t1 = (lut_id + 2) & 7;
+ c = (lut_id + 3) & 7;
+ b1 = (lut_id + 4) & 7;
+ b2 = (lut_id + 6) & 7;
+ lut_id += 2;
+
+ // a[0] * 66
+ p = vreinterpretq_s16_u16(vmull_u8(vget_low_u8(src[c]), wt_0));
+ // (a[-1] + a[1]) * 40
+ q = vreinterpretq_s16_u16(vaddl_u8(vget_low_u8(src[t1]), vget_low_u8(src[b1])));
+ q = vmulq_n_s16(q, wt_1);
+ // (a[-3] + a[3]) * 9
+ r = vreinterpretq_s16_u16(vaddl_u8(vget_low_u8(src[t2]), vget_low_u8(src[b2])));
+ r = vmulq_n_s16(r, wt_2);
+
+ // a[0] * 66 + (a[-1] + a[1]) * 40 - (a[-3] + a[3]) * 9
+ sum = vsubq_s16(p, prec);
+ q = vsubq_s16(q, r);
+ sum = vaddq_s16(q, sum);
+ sum = vrshrq_n_s16(sum, FILT_TAP_Q);
+ sum = vaddq_s16(sum, inv_prec);
+
+ vst1_u8(pu1_dst_tmp, vqmovun_s16(sum));
+
+ // a[0] * 66
+ p = vreinterpretq_s16_u16(vmull_u8(vget_high_u8(src[c]), wt_0));
+ // (a[-1] + a[1]) * 40
+ q = vreinterpretq_s16_u16(vaddl_u8(vget_high_u8(src[t1]), vget_high_u8(src[b1])));
+ q = vmulq_n_s16(q, wt_1);
+ // (a[-3] + a[3]) * 9
+ r = vreinterpretq_s16_u16(vaddl_u8(vget_high_u8(src[t2]), vget_high_u8(src[b2])));
+ r = vmulq_n_s16(r, wt_2);
+
+ // a[0] * 66 + (a[-1] + a[1]) * 40 - (a[-3] + a[3]) * 9
+ sum = vsubq_s16(p, prec);
+ q = vsubq_s16(q, r);
+ sum = vaddq_s16(q, sum);
+ sum = vrshrq_n_s16(sum, FILT_TAP_Q);
+ sum = vaddq_s16(sum, inv_prec);
+
+ vst1_u8(pu1_dst_tmp + 8, vqmovun_s16(sum));
+
+ pu1_dst_tmp += 16;
+ }
+ }
+}
+
+void ihevce_scaling_filter_mxn_neon(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_scrtch,
+ WORD32 scrtch_strd,
+ UWORD8 *pu1_dst,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd)
+{
+ WORD32 i, j;
+
+ assert(wd >= 16 && wd % 16 == 0);
+ assert(ht % 2 == 0);
+ for(j = 0; j < ht;)
+ {
+ UWORD8 *pu1_src_tmp = pu1_src + j * src_strd;
+ UWORD8 *pu1_dst_tmp = pu1_dst + (j >> 1) * dst_strd;
+ WORD32 rows = MIN(64, (ht - j));
+
+ for(i = 0; i < wd;)
+ {
+ WORD32 cols;
+
+ if((wd - i) >= 64)
+ cols = 64;
+ else if((wd - i) >= 32)
+ cols = 32;
+ else
+ cols = 16;
+
+ ihevce_horz_scale_neon_w16(
+ pu1_src_tmp - 3 * src_strd + i,
+ src_strd,
+ pu1_scrtch,
+ scrtch_strd,
+ cols,
+ (3 + rows + 2));
+ ihevce_vert_scale_neon_w16(
+ pu1_scrtch + 3 * scrtch_strd,
+ scrtch_strd,
+ pu1_dst_tmp + (i >> 1),
+ dst_strd,
+ (cols >> 1),
+ rows);
+ i += cols;
+ }
+ j += rows;
+ }
+}
diff --git a/encoder/arm/ihevce_scan_coeffs_neon.c b/encoder/arm/ihevce_scan_coeffs_neon.c
new file mode 100644
index 0000000..becb5bd
--- /dev/null
+++ b/encoder/arm/ihevce_scan_coeffs_neon.c
@@ -0,0 +1,496 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevce_scan_coeffs_neon.c
+*
+* @brief
+* Contains definitions for scanning quantized tu
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+********************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#include <arm_neon.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevc_defs.h"
+#include "ihevc_debug.h"
+#include "ihevce_api.h"
+#include "ihevce_defs.h"
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_ipe_instr_set_router.h"
+#include "ihevce_common_utils.h"
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+FT_SCAN_COEFFS ihevce_scan_coeffs_neon;
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+static WORD32 movemask_neon(uint8x16_t input)
+{
+ const int8_t __attribute__((aligned(16))) xr[8] = { -7, -6, -5, -4, -3, -2, -1, 0 };
+ uint8x8_t mask_and = vdup_n_u8(0x80);
+ int8x8_t mask_shift = vld1_s8(xr);
+
+ uint8x8_t lo = vget_low_u8(input);
+ uint8x8_t hi = vget_high_u8(input);
+
+ lo = vand_u8(lo, mask_and);
+ lo = vshl_u8(lo, mask_shift);
+
+ hi = vand_u8(hi, mask_and);
+ hi = vshl_u8(hi, mask_shift);
+
+ lo = vpadd_u8(lo, lo);
+ lo = vpadd_u8(lo, lo);
+ lo = vpadd_u8(lo, lo);
+
+ hi = vpadd_u8(hi, hi);
+ hi = vpadd_u8(hi, hi);
+ hi = vpadd_u8(hi, hi);
+
+ return ((hi[0] << 8) | (lo[0] & 0xFF));
+}
+
+WORD32 ihevce_scan_coeffs_neon(
+ WORD16 *pi2_quant_coeffs,
+ WORD32 *pi4_subBlock2csbfId_map,
+ WORD32 scan_idx,
+ WORD32 trans_size,
+ UWORD8 *pu1_out_data,
+ UWORD8 *pu1_csbf_buf,
+ WORD32 i4_csbf_stride)
+{
+ WORD32 i, trans_unit_idx, num_gt1_flag, num_gt0_flag;
+ UWORD16 u2_csbf0flags;
+ WORD32 num_bytes = 0;
+ UWORD8 *pu1_trans_table;
+ UWORD8 *pu1_csb_table;
+ WORD32 shift_value, mask_value;
+ WORD32 blk_row, blk_col;
+ WORD32 x_pos, y_pos;
+ WORD32 quant_coeff;
+
+ UWORD8 *pu1_out_data_header;
+ UWORD16 *pu2_out_data_coeff;
+
+ int8x16_t one, shuffle, zero;
+ int16x8_t ones;
+ int8x8x2_t quant;
+
+ (void)i4_csbf_stride;
+ pu1_out_data_header = pu1_out_data;
+ u2_csbf0flags = 0xBAD0;
+
+ pu1_csb_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
+
+ GETRANGE(shift_value, trans_size);
+ shift_value = shift_value - 3;
+ mask_value = (trans_size / 4) - 1;
+
+ switch(trans_size)
+ {
+ case 32:
+ pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_8x8[scan_idx][0]);
+ break;
+ case 16:
+ pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
+ break;
+ case 8:
+ pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_2x2[scan_idx][0]);
+ break;
+ case 4:
+ pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_1x1[0]);
+ break;
+ }
+
+ shuffle = vld1q_s8((WORD8 *)pu1_csb_table);
+ zero = vdupq_n_s8(0);
+ one = vdupq_n_s8(1);
+ ones = vdupq_n_s16(1);
+
+ for(trans_unit_idx = (trans_size * trans_size / 16) - 1; trans_unit_idx >= 0; trans_unit_idx--)
+ {
+ if(pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]])
+ {
+ WORD32 sig_coeff_abs_gt0_flags, sig_coeff_abs_gt1_flags;
+ WORD32 sign_flag, pos_last_coded;
+ UWORD8 u1_last_x, u1_last_y;
+ WORD16 *pi2_temp_quant_coeff = pi2_quant_coeffs;
+
+ int16x4_t quant0, quant1, quant2, quant3;
+ int16x8_t quant01, quant23;
+ int8x8_t a, b, c, d, shuffle_0, shuffle_1;
+ int8x16_t shuffle_out, shuffle_out_abs;
+ uint8x16_t sign, eq0, eq1;
+
+ blk_row = pu1_trans_table[trans_unit_idx] >> shift_value;
+ blk_col = pu1_trans_table[trans_unit_idx] & mask_value;
+
+ pi2_temp_quant_coeff += (blk_col * 4 + (blk_row * 4) * trans_size);
+
+ quant0 = vld1_s16(pi2_temp_quant_coeff + 0 * trans_size);
+ quant1 = vld1_s16(pi2_temp_quant_coeff + 1 * trans_size);
+ quant2 = vld1_s16(pi2_temp_quant_coeff + 2 * trans_size);
+ quant3 = vld1_s16(pi2_temp_quant_coeff + 3 * trans_size);
+
+ quant01 = vcombine_s16(quant0, quant1);
+ quant23 = vcombine_s16(quant2, quant3);
+
+ a = vqmovn_s16(quant01);
+ b = vqmovn_s16(quant23);
+
+ quant.val[0] = a;
+ quant.val[1] = b;
+
+ c = vget_low_s8(shuffle);
+ d = vget_high_s8(shuffle);
+
+ shuffle_0 = vtbl2_s8(quant, c);
+ shuffle_1 = vtbl2_s8(quant, d);
+ shuffle_out = vcombine_s8(shuffle_0, shuffle_1);
+
+ shuffle_out_abs = vabsq_s8(shuffle_out);
+
+ sign = vcgtq_s8(zero, shuffle_out);
+ eq0 = vceqq_s8(shuffle_out, zero);
+ eq1 = vceqq_s8(shuffle_out_abs, one);
+
+ sign_flag = movemask_neon(sign);
+ sig_coeff_abs_gt0_flags = movemask_neon(eq0);
+ sig_coeff_abs_gt1_flags = movemask_neon(eq1);
+
+ sig_coeff_abs_gt0_flags = ~sig_coeff_abs_gt0_flags;
+ sig_coeff_abs_gt1_flags = ~sig_coeff_abs_gt1_flags;
+ sig_coeff_abs_gt0_flags = sig_coeff_abs_gt0_flags & 0x0000FFFF;
+ sig_coeff_abs_gt1_flags = sig_coeff_abs_gt1_flags & sig_coeff_abs_gt0_flags;
+
+ ASSERT(sig_coeff_abs_gt0_flags != 0);
+ GET_POS_MSB_32(pos_last_coded, sig_coeff_abs_gt0_flags);
+
+ /* Update gt1 flag based on num_gt0_flag */
+ num_gt0_flag = ihevce_num_ones_popcnt(sig_coeff_abs_gt0_flags);
+
+ /* Find the position of 9th(MAX_GT_ONE+1) 1 in sig_coeff_abs_gt0_flags from MSB and update gt1 flag */
+ if(num_gt0_flag > MAX_GT_ONE)
+ {
+ WORD32 gt0_first_byte = sig_coeff_abs_gt0_flags & 0xFF;
+ WORD32 num_gt0_second_byte =
+ ihevce_num_ones_popcnt(sig_coeff_abs_gt0_flags & 0xFF00);
+ WORD32 pos_nineth_one; /* pos. of 9th one from MSB of sig_coeff_abs_gt0_flags */
+ WORD32 gt0_after_nineth_one, num_gt0_first_byte_to_nine;
+
+ num_gt0_first_byte_to_nine = (MAX_GT_ONE + 1) - num_gt0_second_byte;
+
+ while(num_gt0_first_byte_to_nine)
+ {
+ GET_POS_MSB_32(pos_nineth_one, gt0_first_byte);
+ gt0_first_byte = CLEAR_BIT(
+ gt0_first_byte,
+ pos_nineth_one); /*gt0_second_byte &= (~(0x1<<pos_eighth_one));*/
+ num_gt0_first_byte_to_nine--;
+ }
+
+ /* Update gt1 based on pos_eighth_one */
+ gt0_after_nineth_one = SET_BIT(gt0_first_byte, pos_nineth_one);
+ sig_coeff_abs_gt1_flags = sig_coeff_abs_gt1_flags | gt0_after_nineth_one;
+ }
+
+ /* Get x_pos & y_pos of last coded in csb wrt to TU */
+ u1_last_x = (pu1_csb_table[pos_last_coded] & 0x3) + blk_col * 4;
+ u1_last_y = (pu1_csb_table[pos_last_coded] >> 2) + blk_row * 4;
+
+ num_gt1_flag = ihevce_num_ones_popcnt(sig_coeff_abs_gt1_flags);
+
+ /* storing last_x and last_y */
+ *pu1_out_data_header = u1_last_x;
+ pu1_out_data_header++;
+
+ *pu1_out_data_header = u1_last_y;
+ pu1_out_data_header++;
+
+ /* storing the scan order */
+ *pu1_out_data_header = (UWORD8)scan_idx;
+ pu1_out_data_header++;
+
+ /* storing last_sub_block pos. in scan order count */
+ *pu1_out_data_header = (UWORD8)trans_unit_idx;
+ pu1_out_data_header++;
+
+ /*stored the first 4 bytes, now all are word16. So word16 pointer*/
+ pu2_out_data_coeff = (UWORD16 *)pu1_out_data_header;
+
+ /* u2_csbf0flags word */
+ u2_csbf0flags = 0xBAD0 | 1; /*since right&bottom csbf is 0*/
+ /* storing u2_csbf0flags word */
+ *pu2_out_data_coeff = u2_csbf0flags;
+ pu2_out_data_coeff++;
+
+ /* storing u2_sig_coeff_abs_gt0_flags 2 bytes */
+ *pu2_out_data_coeff = (UWORD16)sig_coeff_abs_gt0_flags;
+ pu2_out_data_coeff++;
+
+ /* storing u2_sig_coeff_abs_gt1_flags 2 bytes */
+ *pu2_out_data_coeff = (UWORD16)sig_coeff_abs_gt1_flags;
+ pu2_out_data_coeff++;
+
+ /* storing u2_sign_flags 2 bytes */
+ *pu2_out_data_coeff = (UWORD16)sign_flag;
+ pu2_out_data_coeff++;
+
+ /* Store the u2_abs_coeff_remaining[] */
+ for(i = 0; i < num_gt1_flag; i++)
+ {
+ volatile WORD32 bit_pos;
+ ASSERT(sig_coeff_abs_gt1_flags != 0);
+
+ GET_POS_MSB_32(bit_pos, sig_coeff_abs_gt1_flags);
+ sig_coeff_abs_gt1_flags = CLEAR_BIT(
+ sig_coeff_abs_gt1_flags,
+ bit_pos); /*sig_coeff_abs_gt1_flags &= (~(0x1<<bit_pos));*/
+
+ x_pos = (pu1_csb_table[bit_pos] & 0x3);
+ y_pos = (pu1_csb_table[bit_pos] >> 2);
+
+ quant_coeff = pi2_temp_quant_coeff[x_pos + (y_pos * trans_size)];
+
+ /* storing u2_abs_coeff_remaining[i] 2 bytes */
+ *pu2_out_data_coeff = (UWORD16)abs(quant_coeff) - 1;
+ pu2_out_data_coeff++;
+ }
+
+ break; /*We just need this loop for finding 1st non-zero csb only*/
+ }
+ }
+
+ /* go through remaining csb in the scan order */
+ for(trans_unit_idx = trans_unit_idx - 1; trans_unit_idx >= 0; trans_unit_idx--)
+ {
+ blk_row = pu1_trans_table[trans_unit_idx] >> shift_value; /*row of csb*/
+ blk_col = pu1_trans_table[trans_unit_idx] & mask_value; /*col of csb*/
+
+ /* u2_csbf0flags word */
+ u2_csbf0flags = 0xBAD0 | /* assuming csbf_buf has only 0 or 1 values */
+ (pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]]);
+
+ /********************************************************************/
+ /* Minor hack: As per HEVC spec csbf in not signalled in stream for */
+ /* block0, instead sig coeff map is directly signalled. This is */
+ /* taken care by forcing csbf for block0 to be 1 even if it is 0 */
+ /********************************************************************/
+ if(0 == trans_unit_idx)
+ {
+ u2_csbf0flags |= 1;
+ }
+
+ if((blk_col + 1 < trans_size / 4)) /* checking right boundary */
+ {
+ if(pu1_csbf_buf[pi4_subBlock2csbfId_map[blk_row * trans_size / 4 + blk_col + 1]])
+ {
+ /* set the 2nd bit of u2_csbf0flags for right csbf */
+ u2_csbf0flags = u2_csbf0flags | (1 << 1);
+ }
+ }
+ if((blk_row + 1 < trans_size / 4)) /* checking bottom oundary */
+ {
+ if(pu1_csbf_buf[pi4_subBlock2csbfId_map[(blk_row + 1) * trans_size / 4 + blk_col]])
+ {
+ /* set the 3rd bit of u2_csbf0flags for bottom csbf */
+ u2_csbf0flags = u2_csbf0flags | (1 << 2);
+ }
+ }
+
+ /* storing u2_csbf0flags word */
+ *pu2_out_data_coeff = u2_csbf0flags;
+ pu2_out_data_coeff++;
+
+ /* check for the csb flag in our scan order */
+ if(u2_csbf0flags & 0x1)
+ {
+ WORD32 sig_coeff_abs_gt0_flags, sig_coeff_abs_gt1_flags;
+ WORD32 sign_flag;
+
+ int16x4_t quant0, quant1, quant2, quant3;
+ int16x8_t quant01, quant23;
+ int8x8_t a, b, c, d, shuffle_0, shuffle_1;
+ int8x16_t shuffle_out, shuffle_out_abs;
+ uint8x16_t sign, eq0, eq1;
+
+ /* x_pos=blk_col*4, y_pos=blk_row*4 */
+ WORD16 *pi2_temp_quant_coeff =
+ pi2_quant_coeffs + blk_col * 4 + (blk_row * 4) * trans_size;
+
+ /* Load Quant Values */
+ quant0 = vld1_s16(pi2_temp_quant_coeff + 0 * trans_size);
+ quant1 = vld1_s16(pi2_temp_quant_coeff + 1 * trans_size);
+ quant2 = vld1_s16(pi2_temp_quant_coeff + 2 * trans_size);
+ quant3 = vld1_s16(pi2_temp_quant_coeff + 3 * trans_size);
+
+ /* Two quant rows together */
+ quant01 = vcombine_s16(quant0, quant1);
+ quant23 = vcombine_s16(quant2, quant3);
+
+ /* All 4 rows: For sign, gt0, gt1 flags, even 8 bit version is enough! */
+ a = vqmovn_s16(quant01);
+ b = vqmovn_s16(quant23);
+
+ quant.val[0] = a;
+ quant.val[1] = b;
+
+ c = vget_low_s8(shuffle);
+ d = vget_high_s8(shuffle);
+
+ shuffle_0 = vtbl2_s8(quant, c);
+ shuffle_1 = vtbl2_s8(quant, d);
+ shuffle_out = vcombine_s8(shuffle_0, shuffle_1);
+
+ /* ABS values */
+ shuffle_out_abs = vabsq_s8(shuffle_out);
+
+ /* sign bits : Will get 0xFF if (0 > shuffle_out) */
+ sign = vcgtq_s8(zero, shuffle_out);
+ /* gt0 : Will get 0xFF if ( shuffle_out == 0 ) */
+ eq0 = vceqq_s8(shuffle_out, zero);
+ /* gt1 : Will get 0xFF if ( abs(shuffle_out) == 1 ) */
+ eq1 = vceqq_s8(shuffle_out_abs, one);
+
+ /* movemask:0 extended upper 16bits,Only low16 bits are required while storing */
+ sign_flag = movemask_neon(sign);
+ sig_coeff_abs_gt0_flags = movemask_neon(eq0);
+ sig_coeff_abs_gt1_flags = movemask_neon(eq1);
+
+ /* Update gt0 and gt1 based on ==0 and ==1 flag */
+ sig_coeff_abs_gt0_flags = ~sig_coeff_abs_gt0_flags; /* != 0 */
+ sig_coeff_abs_gt1_flags = ~sig_coeff_abs_gt1_flags; /* (abs) != 1 */
+ sig_coeff_abs_gt0_flags = sig_coeff_abs_gt0_flags & 0x0000FFFF; /* Clear high Word */
+ sig_coeff_abs_gt1_flags = sig_coeff_abs_gt1_flags & sig_coeff_abs_gt0_flags;
+
+ /* Update gt1 flag based on num_gt0_flag */
+ num_gt0_flag = ihevce_num_ones_popcnt(sig_coeff_abs_gt0_flags);
+
+ /* Find the position of 9th(MAX_GT_ONE+1) 1 in sig_coeff_abs_gt0_flags from MSB and update gt1 flag */
+ if(num_gt0_flag > MAX_GT_ONE)
+ {
+ WORD32 gt0_first_byte = sig_coeff_abs_gt0_flags & 0xFF;
+ WORD32 num_gt0_second_byte =
+ ihevce_num_ones_popcnt(sig_coeff_abs_gt0_flags & 0xFF00);
+ WORD32 pos_nineth_one; /* pos. of 9th one from MSB of sig_coeff_abs_gt0_flags */
+ WORD32 gt0_after_nineth_one, num_gt0_first_byte_to_nine;
+
+ num_gt0_first_byte_to_nine = (MAX_GT_ONE + 1) - num_gt0_second_byte;
+
+ while(num_gt0_first_byte_to_nine)
+ {
+ GET_POS_MSB_32(pos_nineth_one, gt0_first_byte);
+ gt0_first_byte = CLEAR_BIT(
+ gt0_first_byte,
+ pos_nineth_one); /*gt0_second_byte &= (~(0x1<<pos_eighth_one));*/
+ num_gt0_first_byte_to_nine--;
+ }
+
+ /* Update gt1 based on pos_eighth_one */
+ gt0_after_nineth_one = SET_BIT(gt0_first_byte, pos_nineth_one);
+ sig_coeff_abs_gt1_flags = sig_coeff_abs_gt1_flags | gt0_after_nineth_one;
+ }
+
+ num_gt1_flag = ihevce_num_ones_popcnt(sig_coeff_abs_gt1_flags);
+
+ /* storing u2_sig_coeff_abs_gt0_flags 2 bytes */
+ *pu2_out_data_coeff = (UWORD16)sig_coeff_abs_gt0_flags;
+ pu2_out_data_coeff++;
+
+ /* storing u2_sig_coeff_abs_gt1_flags 2 bytes */
+ *pu2_out_data_coeff = (UWORD16)sig_coeff_abs_gt1_flags;
+ pu2_out_data_coeff++;
+
+ /* storing u2_sign_flags 2 bytes */
+ *pu2_out_data_coeff = (UWORD16)sign_flag;
+ pu2_out_data_coeff++;
+
+ /* Store the u2_abs_coeff_remaining[] */
+ for(i = 0; i < num_gt1_flag; i++)
+ {
+ volatile WORD32 bit_pos;
+ ASSERT(sig_coeff_abs_gt1_flags != 0);
+
+ GET_POS_MSB_32(bit_pos, sig_coeff_abs_gt1_flags);
+ sig_coeff_abs_gt1_flags = CLEAR_BIT(
+ sig_coeff_abs_gt1_flags,
+ bit_pos); /*sig_coeff_abs_gt1_flags &= (~(0x1<<bit_pos));*/
+
+ x_pos = (pu1_csb_table[bit_pos] & 0x3);
+ y_pos = (pu1_csb_table[bit_pos] >> 2);
+
+ quant_coeff = pi2_temp_quant_coeff[x_pos + (y_pos * trans_size)];
+
+ /* storing u2_abs_coeff_remaining[i] 2 bytes */
+ *pu2_out_data_coeff = (UWORD16)abs(quant_coeff) - 1;
+ pu2_out_data_coeff++;
+ }
+ }
+ }
+
+ num_bytes = (UWORD8 *)pu2_out_data_coeff - pu1_out_data;
+ return num_bytes; /* Return the number of bytes written to out_data */
+}
diff --git a/encoder/arm/ihevce_ssd_and_sad_calculator_neon.c b/encoder/arm/ihevce_ssd_and_sad_calculator_neon.c
new file mode 100644
index 0000000..7ae9b74
--- /dev/null
+++ b/encoder/arm/ihevce_ssd_and_sad_calculator_neon.c
@@ -0,0 +1,317 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevce_ssd_and_sad_calculator_neon.c
+*
+* @brief
+* Contains intrinsic definitions of functions for ssd and sad computation
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+********************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <string.h>
+#include <assert.h>
+#include <arm_neon.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevc_cmn_utils_neon.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+LWORD64 ihevce_ssd_and_sad_calculator_neon(
+ UWORD8 *pu1_recon,
+ WORD32 recon_strd,
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 trans_size,
+ UWORD32 *pu4_blk_sad)
+{
+ WORD32 i, ssd = 0;
+
+ if(4 == trans_size)
+ {
+ const uint8x16_t src_u8 = load_unaligned_u8q(pu1_src, src_strd);
+ const uint8x16_t ref_u8 = load_unaligned_u8q(pu1_recon, recon_strd);
+ const uint8x8_t abs_l = vabd_u8(vget_low_u8(src_u8), vget_low_u8(ref_u8));
+ const uint8x8_t abs_h = vabd_u8(vget_high_u8(src_u8), vget_high_u8(ref_u8));
+ const uint16x8_t sq_abs_l = vmull_u8(abs_l, abs_l);
+ const uint16x8_t sq_abs_h = vmull_u8(abs_h, abs_h);
+ uint16x8_t abs_sum;
+ uint32x4_t b, d;
+ uint32x2_t ssd, sad;
+ uint64x2_t c;
+
+ abs_sum = vaddl_u8(abs_l, abs_h);
+ b = vpaddlq_u16(abs_sum);
+ c = vpaddlq_u32(b);
+ sad =
+ vadd_u32(vreinterpret_u32_u64(vget_low_u64(c)), vreinterpret_u32_u64(vget_high_u64(c)));
+ *pu4_blk_sad = vget_lane_u32(sad, 0);
+ b = vaddl_u16(vget_low_u16(sq_abs_l), vget_high_u16(sq_abs_l));
+ d = vaddl_u16(vget_low_u16(sq_abs_h), vget_high_u16(sq_abs_h));
+ b = vaddq_u32(b, d);
+ ssd = vadd_u32(vget_low_u32(b), vget_high_u32(b));
+
+ return vget_lane_u64(vpaddl_u32(ssd), 0);
+ }
+ else if(8 == trans_size)
+ {
+ uint16x8_t abs_sum = vdupq_n_u16(0);
+ uint32x4_t sqabs_sum = vdupq_n_u32(0);
+ uint16x8_t abs, sqabs;
+ uint32x4_t tmp_a;
+ uint32x2_t sad, ssd;
+ uint64x2_t tmp_b;
+
+ for(i = 0; i < 8; i++)
+ {
+ const uint8x8_t src = vld1_u8(pu1_src);
+ const uint8x8_t ref = vld1_u8(pu1_recon);
+
+ abs = vabdl_u8(src, ref);
+ sqabs = vmulq_u16(abs, abs);
+ abs_sum = vaddq_u16(abs_sum, abs);
+ tmp_a = vaddl_u16(vget_low_u16(sqabs), vget_high_u16(sqabs));
+ sqabs_sum = vaddq_u32(sqabs_sum, tmp_a);
+
+ pu1_src += src_strd;
+ pu1_recon += recon_strd;
+ }
+ tmp_a = vpaddlq_u16(abs_sum);
+ tmp_b = vpaddlq_u32(tmp_a);
+ sad = vadd_u32(
+ vreinterpret_u32_u64(vget_low_u64(tmp_b)), vreinterpret_u32_u64(vget_high_u64(tmp_b)));
+ *pu4_blk_sad = vget_lane_u32(sad, 0);
+ ssd = vadd_u32(vget_low_u32(sqabs_sum), vget_high_u32(sqabs_sum));
+
+ return vget_lane_u64(vpaddl_u32(ssd), 0);
+ }
+ else if(16 == trans_size)
+ {
+ uint16x8_t abs_sum_l = vdupq_n_u16(0);
+ uint16x8_t abs_sum_h = vdupq_n_u16(0);
+ uint32x4_t sqabs_sum_l = vdupq_n_u32(0);
+ uint32x4_t sqabs_sum_h = vdupq_n_u32(0);
+ uint16x8_t abs_l, abs_h;
+ uint16x8_t sqabs_l, sqabs_h;
+ uint32x4_t tmp_a, tmp_c;
+ uint64x2_t tmp_b;
+ uint32x2_t sad, ssd;
+ WORD32 i;
+
+ for(i = 0; i < 16; i++)
+ {
+ const uint8x16_t src = vld1q_u8(pu1_src);
+ const uint8x16_t pred = vld1q_u8(pu1_recon);
+
+ abs_l = vabdl_u8(vget_low_u8(src), vget_low_u8(pred));
+ abs_h = vabdl_u8(vget_high_u8(src), vget_high_u8(pred));
+
+ sqabs_l = vmulq_u16(abs_l, abs_l);
+ sqabs_h = vmulq_u16(abs_h, abs_h);
+
+ abs_sum_l = vaddq_u16(abs_sum_l, abs_l);
+ abs_sum_h = vaddq_u16(abs_sum_h, abs_h);
+
+ tmp_a = vaddl_u16(vget_low_u16(sqabs_l), vget_high_u16(sqabs_l));
+ tmp_c = vaddl_u16(vget_low_u16(sqabs_h), vget_high_u16(sqabs_h));
+
+ sqabs_sum_l = vaddq_u32(sqabs_sum_l, tmp_a);
+ sqabs_sum_h = vaddq_u32(sqabs_sum_h, tmp_c);
+ pu1_src += src_strd;
+ pu1_recon += recon_strd;
+ }
+ tmp_a = vpaddlq_u16(abs_sum_l);
+ tmp_a = vpadalq_u16(tmp_a, abs_sum_h);
+ tmp_b = vpaddlq_u32(tmp_a);
+ sad = vadd_u32(
+ vreinterpret_u32_u64(vget_low_u64(tmp_b)), vreinterpret_u32_u64(vget_high_u64(tmp_b)));
+ *pu4_blk_sad = vget_lane_u32(sad, 0);
+
+ sqabs_sum_l = vaddq_u32(sqabs_sum_l, sqabs_sum_h);
+ ssd = vadd_u32(vget_low_u32(sqabs_sum_l), vget_high_u32(sqabs_sum_l));
+
+ return vget_lane_u64(vpaddl_u32(ssd), 0);
+ }
+ else if(32 == trans_size)
+ {
+ uint16x8_t abs_sum = vdupq_n_u16(0);
+ uint16x8_t abs_sum_l, abs_sum_h;
+ uint32x4_t sqabs_sum_l = vdupq_n_u32(0);
+ uint32x4_t sqabs_sum_h = vdupq_n_u32(0);
+ uint8x8_t abs_l, abs_h;
+ uint16x8_t sqabs_l, sqabs_h;
+ uint32x4_t tmp_a, tmp_c;
+ uint64x2_t tmp_b;
+ uint32x2_t sad, ssd;
+ WORD32 i;
+
+ for(i = 0; i < 32; i++)
+ {
+ const uint8x16_t src_0 = vld1q_u8(pu1_src);
+ const uint8x16_t pred_0 = vld1q_u8(pu1_recon);
+ const uint8x16_t src_1 = vld1q_u8(pu1_src + 16);
+ const uint8x16_t pred_1 = vld1q_u8(pu1_recon + 16);
+
+ abs_l = vabd_u8(vget_low_u8(src_0), vget_low_u8(pred_0));
+ abs_h = vabd_u8(vget_high_u8(src_0), vget_high_u8(pred_0));
+
+ abs_sum_l = vaddl_u8(abs_l, abs_h);
+ sqabs_l = vmull_u8(abs_l, abs_l);
+ sqabs_h = vmull_u8(abs_h, abs_h);
+ tmp_a = vaddl_u16(vget_low_u16(sqabs_l), vget_high_u16(sqabs_l));
+ tmp_c = vaddl_u16(vget_low_u16(sqabs_h), vget_high_u16(sqabs_h));
+ sqabs_sum_l = vaddq_u32(sqabs_sum_l, tmp_a);
+ sqabs_sum_h = vaddq_u32(sqabs_sum_h, tmp_c);
+
+ abs_l = vabd_u8(vget_low_u8(src_1), vget_low_u8(pred_1));
+ abs_h = vabd_u8(vget_high_u8(src_1), vget_high_u8(pred_1));
+
+ abs_sum_h = vaddl_u8(abs_l, abs_h);
+ sqabs_l = vmull_u8(abs_l, abs_l);
+ sqabs_h = vmull_u8(abs_h, abs_h);
+ tmp_a = vaddl_u16(vget_low_u16(sqabs_l), vget_high_u16(sqabs_l));
+ tmp_c = vaddl_u16(vget_low_u16(sqabs_h), vget_high_u16(sqabs_h));
+ sqabs_sum_l = vaddq_u32(sqabs_sum_l, tmp_a);
+ sqabs_sum_h = vaddq_u32(sqabs_sum_h, tmp_c);
+
+ abs_sum_l = vaddq_u16(abs_sum_l, abs_sum_h);
+ abs_sum = vaddq_u16(abs_sum, abs_sum_l);
+
+ pu1_src += src_strd;
+ pu1_recon += recon_strd;
+ }
+ tmp_a = vpaddlq_u16(abs_sum);
+ tmp_b = vpaddlq_u32(tmp_a);
+ sad = vadd_u32(
+ vreinterpret_u32_u64(vget_low_u64(tmp_b)), vreinterpret_u32_u64(vget_high_u64(tmp_b)));
+ *pu4_blk_sad = vget_lane_u32(sad, 0);
+
+ sqabs_sum_l = vaddq_u32(sqabs_sum_l, sqabs_sum_h);
+ ssd = vadd_u32(vget_low_u32(sqabs_sum_l), vget_high_u32(sqabs_sum_l));
+
+ return vget_lane_u64(vpaddl_u32(ssd), 0);
+ }
+ else if(64 == trans_size)
+ {
+ uint32x4_t abs_sum = vdupq_n_u32(0);
+ uint16x8_t abs_sum_0, abs_sum_1, abs_sum_2, abs_sum_3;
+ uint32x4_t sqabs_sum_l = vdupq_n_u32(0);
+ uint32x4_t sqabs_sum_h = vdupq_n_u32(0);
+ uint8x8_t abs_l, abs_h;
+ uint16x8_t sqabs_l, sqabs_h;
+ uint32x4_t tmp_a, tmp_c;
+ uint64x2_t tmp_b;
+ uint32x2_t sad, ssd;
+ WORD32 i;
+
+ for(i = 0; i < 64; i++)
+ {
+ const uint8x16_t src_0 = vld1q_u8(pu1_src);
+ const uint8x16_t pred_0 = vld1q_u8(pu1_recon);
+ const uint8x16_t src_1 = vld1q_u8(pu1_src + 16);
+ const uint8x16_t pred_1 = vld1q_u8(pu1_recon + 16);
+ const uint8x16_t src_2 = vld1q_u8(pu1_src + 32);
+ const uint8x16_t pred_2 = vld1q_u8(pu1_recon + 32);
+ const uint8x16_t src_3 = vld1q_u8(pu1_src + 48);
+ const uint8x16_t pred_3 = vld1q_u8(pu1_recon + 48);
+
+ abs_l = vabd_u8(vget_low_u8(src_0), vget_low_u8(pred_0));
+ abs_h = vabd_u8(vget_high_u8(src_0), vget_high_u8(pred_0));
+
+ abs_sum_0 = vaddl_u8(abs_l, abs_h);
+ sqabs_l = vmull_u8(abs_l, abs_l);
+ sqabs_h = vmull_u8(abs_h, abs_h);
+ tmp_a = vaddl_u16(vget_low_u16(sqabs_l), vget_high_u16(sqabs_l));
+ tmp_c = vaddl_u16(vget_low_u16(sqabs_h), vget_high_u16(sqabs_h));
+ sqabs_sum_l = vaddq_u32(sqabs_sum_l, tmp_a);
+ sqabs_sum_h = vaddq_u32(sqabs_sum_h, tmp_c);
+
+ abs_l = vabd_u8(vget_low_u8(src_1), vget_low_u8(pred_1));
+ abs_h = vabd_u8(vget_high_u8(src_1), vget_high_u8(pred_1));
+
+ abs_sum_1 = vaddl_u8(abs_l, abs_h);
+ sqabs_l = vmull_u8(abs_l, abs_l);
+ sqabs_h = vmull_u8(abs_h, abs_h);
+ tmp_a = vaddl_u16(vget_low_u16(sqabs_l), vget_high_u16(sqabs_l));
+ tmp_c = vaddl_u16(vget_low_u16(sqabs_h), vget_high_u16(sqabs_h));
+ sqabs_sum_l = vaddq_u32(sqabs_sum_l, tmp_a);
+ sqabs_sum_h = vaddq_u32(sqabs_sum_h, tmp_c);
+
+ abs_l = vabd_u8(vget_low_u8(src_2), vget_low_u8(pred_2));
+ abs_h = vabd_u8(vget_high_u8(src_2), vget_high_u8(pred_2));
+
+ abs_sum_2 = vaddl_u8(abs_l, abs_h);
+ sqabs_l = vmull_u8(abs_l, abs_l);
+ sqabs_h = vmull_u8(abs_h, abs_h);
+ tmp_a = vaddl_u16(vget_low_u16(sqabs_l), vget_high_u16(sqabs_l));
+ tmp_c = vaddl_u16(vget_low_u16(sqabs_h), vget_high_u16(sqabs_h));
+ sqabs_sum_l = vaddq_u32(sqabs_sum_l, tmp_a);
+ sqabs_sum_h = vaddq_u32(sqabs_sum_h, tmp_c);
+
+ abs_l = vabd_u8(vget_low_u8(src_3), vget_low_u8(pred_3));
+ abs_h = vabd_u8(vget_high_u8(src_3), vget_high_u8(pred_3));
+
+ abs_sum_3 = vaddl_u8(abs_l, abs_h);
+ sqabs_l = vmull_u8(abs_l, abs_l);
+ sqabs_h = vmull_u8(abs_h, abs_h);
+ tmp_a = vaddl_u16(vget_low_u16(sqabs_l), vget_high_u16(sqabs_l));
+ tmp_c = vaddl_u16(vget_low_u16(sqabs_h), vget_high_u16(sqabs_h));
+ sqabs_sum_l = vaddq_u32(sqabs_sum_l, tmp_a);
+ sqabs_sum_h = vaddq_u32(sqabs_sum_h, tmp_c);
+
+ abs_sum_0 = vaddq_u16(abs_sum_0, abs_sum_1);
+ abs_sum_2 = vaddq_u16(abs_sum_2, abs_sum_3);
+ abs_sum_0 = vaddq_u16(abs_sum_0, abs_sum_2);
+ tmp_a = vaddl_u16(vget_low_u16(abs_sum_0), vget_high_u16(abs_sum_0));
+ abs_sum = vaddq_u32(abs_sum, tmp_a);
+
+ pu1_src += src_strd;
+ pu1_recon += recon_strd;
+ }
+ tmp_b = vpaddlq_u32(abs_sum);
+ sad = vadd_u32(
+ vreinterpret_u32_u64(vget_low_u64(tmp_b)), vreinterpret_u32_u64(vget_high_u64(tmp_b)));
+ *pu4_blk_sad = vget_lane_u32(sad, 0);
+
+ sqabs_sum_l = vaddq_u32(sqabs_sum_l, sqabs_sum_h);
+ ssd = vadd_u32(vget_low_u32(sqabs_sum_l), vget_high_u32(sqabs_sum_l));
+
+ return vget_lane_u64(vpaddl_u32(ssd), 0);
+ }
+ return (ssd);
+}
diff --git a/encoder/arm/ihevce_ssd_calculator_neon.c b/encoder/arm/ihevce_ssd_calculator_neon.c
new file mode 100644
index 0000000..d62bcfc
--- /dev/null
+++ b/encoder/arm/ihevce_ssd_calculator_neon.c
@@ -0,0 +1,295 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevce_ssd_calculator_neon.c
+*
+* @brief
+* Contains intrinsic definitions of functions for sad computation
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+********************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <string.h>
+#include <assert.h>
+#include <arm_neon.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevc_cmn_utils_neon.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+static INLINE uint32x4_t ihevce_4x4_ssd_computer_neon(
+ UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 src_strd, WORD32 pred_strd, WORD32 is_chroma)
+{
+ uint32x4_t ssd_low, ssd_high;
+ uint8x16_t src, pred, abs;
+ uint16x8_t sqabs_low, sqabs_high;
+
+ if(!is_chroma)
+ {
+ src = load_unaligned_u8q(pu1_src, src_strd);
+ pred = load_unaligned_u8q(pu1_pred, pred_strd);
+ }
+ else
+ {
+ src = load_unaligned_u8qi(pu1_src, src_strd);
+ pred = load_unaligned_u8qi(pu1_pred, pred_strd);
+ }
+ abs = vabdq_u8(src, pred);
+ sqabs_low = vmull_u8(vget_low_u8(abs), vget_low_u8(abs));
+ sqabs_high = vmull_u8(vget_high_u8(abs), vget_high_u8(abs));
+
+ ssd_low = vaddl_u16(vget_low_u16(sqabs_low), vget_high_u16(sqabs_low));
+ ssd_high = vaddl_u16(vget_low_u16(sqabs_high), vget_high_u16(sqabs_high));
+ return vaddq_u32(ssd_low, ssd_high);
+}
+
+static INLINE uint32x4_t
+ ihevce_1x8_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 is_chroma)
+{
+ uint32x4_t ssd_val;
+ uint8x8_t src, pred, abs;
+ uint16x8_t sqabs;
+
+ if(!is_chroma)
+ {
+ src = vld1_u8(pu1_src);
+ pred = vld1_u8(pu1_pred);
+ }
+ else
+ {
+ src = vld2_u8(pu1_src).val[0];
+ pred = vld2_u8(pu1_pred).val[0];
+ }
+ abs = vabd_u8(src, pred);
+ sqabs = vmull_u8(abs, abs);
+
+ ssd_val = vaddl_u16(vget_low_u16(sqabs), vget_high_u16(sqabs));
+ return ssd_val;
+}
+
+static INLINE uint32x4_t
+ ihevce_1x16_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 is_chroma)
+{
+ uint32x4_t ssd_low, ssd_high;
+ uint8x16_t src, pred, abs;
+ uint16x8_t sqabs_low, sqabs_high;
+
+ if(!is_chroma)
+ {
+ src = vld1q_u8(pu1_src);
+ pred = vld1q_u8(pu1_pred);
+ }
+ else
+ {
+ src = vld2q_u8(pu1_src).val[0];
+ pred = vld2q_u8(pu1_pred).val[0];
+ }
+ abs = vabdq_u8(src, pred);
+ sqabs_low = vmull_u8(vget_low_u8(abs), vget_low_u8(abs));
+ sqabs_high = vmull_u8(vget_high_u8(abs), vget_high_u8(abs));
+
+ ssd_low = vaddl_u16(vget_low_u16(sqabs_low), vget_high_u16(sqabs_low));
+ ssd_high = vaddl_u16(vget_low_u16(sqabs_high), vget_high_u16(sqabs_high));
+ return vaddq_u32(ssd_low, ssd_high);
+}
+
+static INLINE uint32x4_t
+ ihevce_1x32_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 is_chroma)
+{
+ uint32x4_t ssd_0, ssd_1, ssd_2, ssd_3;
+ uint8x16_t src_0, pred_0, src_1, pred_1, abs_0, abs_1;
+ uint16x8_t sqabs_0, sqabs_1, sqabs_2, sqabs_3;
+
+ if(!is_chroma)
+ {
+ src_0 = vld1q_u8(pu1_src);
+ pred_0 = vld1q_u8(pu1_pred);
+ src_1 = vld1q_u8(pu1_src + 16);
+ pred_1 = vld1q_u8(pu1_pred + 16);
+ }
+ else
+ {
+ src_0 = vld2q_u8(pu1_src).val[0];
+ pred_0 = vld2q_u8(pu1_pred).val[0];
+ src_1 = vld2q_u8(pu1_src + 32).val[0];
+ pred_1 = vld2q_u8(pu1_pred + 32).val[0];
+ }
+ abs_0 = vabdq_u8(src_0, pred_0);
+ abs_1 = vabdq_u8(src_1, pred_1);
+ sqabs_0 = vmull_u8(vget_low_u8(abs_0), vget_low_u8(abs_0));
+ sqabs_1 = vmull_u8(vget_high_u8(abs_0), vget_high_u8(abs_0));
+ sqabs_2 = vmull_u8(vget_low_u8(abs_1), vget_low_u8(abs_1));
+ sqabs_3 = vmull_u8(vget_high_u8(abs_1), vget_high_u8(abs_1));
+
+ ssd_0 = vaddl_u16(vget_low_u16(sqabs_0), vget_high_u16(sqabs_0));
+ ssd_1 = vaddl_u16(vget_low_u16(sqabs_1), vget_high_u16(sqabs_1));
+ ssd_2 = vaddl_u16(vget_low_u16(sqabs_2), vget_high_u16(sqabs_2));
+ ssd_3 = vaddl_u16(vget_low_u16(sqabs_3), vget_high_u16(sqabs_3));
+ ssd_0 = vaddq_u32(ssd_0, ssd_1);
+ ssd_2 = vaddq_u32(ssd_2, ssd_3);
+ return vaddq_u32(ssd_0, ssd_2);
+}
+
+static INLINE uint32x4_t
+ ihevce_1x64_ssd_computer_neon(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 is_chroma)
+{
+ uint32x4_t ssd_0, ssd_1, ssd_2, ssd_3;
+ uint32x4_t ssd_4, ssd_5, ssd_6, ssd_7;
+ uint8x16_t src_0, src_1, src_2, src_3;
+ uint8x16_t pred_0, pred_1, pred_2, pred_3;
+ uint8x16_t abs_0, abs_1, abs_2, abs_3;
+ uint16x8_t sqabs_0, sqabs_1, sqabs_2, sqabs_3;
+ uint16x8_t sqabs_4, sqabs_5, sqabs_6, sqabs_7;
+
+ if(!is_chroma)
+ {
+ src_0 = vld1q_u8(pu1_src);
+ pred_0 = vld1q_u8(pu1_pred);
+ src_1 = vld1q_u8(pu1_src + 16);
+ pred_1 = vld1q_u8(pu1_pred + 16);
+ src_2 = vld1q_u8(pu1_src + 32);
+ pred_2 = vld1q_u8(pu1_pred + 32);
+ src_3 = vld1q_u8(pu1_src + 48);
+ pred_3 = vld1q_u8(pu1_pred + 48);
+ }
+ else
+ {
+ src_0 = vld2q_u8(pu1_src).val[0];
+ pred_0 = vld2q_u8(pu1_pred).val[0];
+ src_1 = vld2q_u8(pu1_src + 32).val[0];
+ pred_1 = vld2q_u8(pu1_pred + 32).val[0];
+ src_2 = vld2q_u8(pu1_src + 64).val[0];
+ pred_2 = vld2q_u8(pu1_pred + 64).val[0];
+ src_3 = vld2q_u8(pu1_src + 96).val[0];
+ pred_3 = vld2q_u8(pu1_pred + 96).val[0];
+ }
+ abs_0 = vabdq_u8(src_0, pred_0);
+ abs_1 = vabdq_u8(src_1, pred_1);
+ abs_2 = vabdq_u8(src_2, pred_2);
+ abs_3 = vabdq_u8(src_3, pred_3);
+ sqabs_0 = vmull_u8(vget_low_u8(abs_0), vget_low_u8(abs_0));
+ sqabs_1 = vmull_u8(vget_high_u8(abs_0), vget_high_u8(abs_0));
+ sqabs_2 = vmull_u8(vget_low_u8(abs_1), vget_low_u8(abs_1));
+ sqabs_3 = vmull_u8(vget_high_u8(abs_1), vget_high_u8(abs_1));
+ sqabs_4 = vmull_u8(vget_low_u8(abs_2), vget_low_u8(abs_2));
+ sqabs_5 = vmull_u8(vget_high_u8(abs_2), vget_high_u8(abs_2));
+ sqabs_6 = vmull_u8(vget_low_u8(abs_3), vget_low_u8(abs_3));
+ sqabs_7 = vmull_u8(vget_high_u8(abs_3), vget_high_u8(abs_3));
+
+ ssd_0 = vaddl_u16(vget_low_u16(sqabs_0), vget_high_u16(sqabs_0));
+ ssd_1 = vaddl_u16(vget_low_u16(sqabs_1), vget_high_u16(sqabs_1));
+ ssd_2 = vaddl_u16(vget_low_u16(sqabs_2), vget_high_u16(sqabs_2));
+ ssd_3 = vaddl_u16(vget_low_u16(sqabs_3), vget_high_u16(sqabs_3));
+ ssd_4 = vaddl_u16(vget_low_u16(sqabs_4), vget_high_u16(sqabs_4));
+ ssd_5 = vaddl_u16(vget_low_u16(sqabs_5), vget_high_u16(sqabs_5));
+ ssd_6 = vaddl_u16(vget_low_u16(sqabs_6), vget_high_u16(sqabs_6));
+ ssd_7 = vaddl_u16(vget_low_u16(sqabs_7), vget_high_u16(sqabs_7));
+ ssd_0 = vaddq_u32(ssd_0, ssd_1);
+ ssd_2 = vaddq_u32(ssd_2, ssd_3);
+ ssd_4 = vaddq_u32(ssd_4, ssd_5);
+ ssd_6 = vaddq_u32(ssd_6, ssd_7);
+ ssd_0 = vaddq_u32(ssd_0, ssd_2);
+ ssd_4 = vaddq_u32(ssd_4, ssd_6);
+ return vaddq_u32(ssd_0, ssd_4);
+}
+
+static LWORD64 ihevce_ssd_calculator_plane_neon(
+ UWORD8 *pu1_inp,
+ UWORD8 *pu1_ref,
+ UWORD32 inp_stride,
+ UWORD32 ref_stride,
+ UWORD32 wd,
+ UWORD32 ht,
+ WORD32 is_chroma)
+{
+ uint32x4_t ssd = vdupq_n_u32(0);
+ uint32x2_t sum;
+
+ if(wd >= 8)
+ {
+ UWORD32 row;
+
+ for(row = ht; row > 0; row--)
+ {
+ if(wd == 8)
+ ssd = vaddq_u32(ssd, ihevce_1x8_ssd_computer_neon(pu1_inp, pu1_ref, is_chroma));
+ else if(wd == 16)
+ ssd = vaddq_u32(ssd, ihevce_1x16_ssd_computer_neon(pu1_inp, pu1_ref, is_chroma));
+ else if(wd == 32)
+ ssd = vaddq_u32(ssd, ihevce_1x32_ssd_computer_neon(pu1_inp, pu1_ref, is_chroma));
+ else if(wd == 64)
+ ssd = vaddq_u32(ssd, ihevce_1x64_ssd_computer_neon(pu1_inp, pu1_ref, is_chroma));
+ else if(wd % 8 == 0)
+ {
+ UWORD32 col;
+ UWORD8 *inp = pu1_inp, *ref = pu1_ref;
+
+ for(col = 0; col < wd; col += 8)
+ {
+ ssd = vaddq_u32(ssd, ihevce_1x8_ssd_computer_neon(inp, ref, is_chroma));
+ ref = ref + 8;
+ inp = inp + 8;
+ }
+ }
+
+ pu1_inp += inp_stride;
+ pu1_ref += ref_stride;
+ }
+ }
+ else if(wd == 4)
+ {
+ assert(ht == 4);
+ ssd = ihevce_4x4_ssd_computer_neon(pu1_inp, pu1_ref, inp_stride, ref_stride, is_chroma);
+ }
+
+ sum = vadd_u32(vget_low_u32(ssd), vget_high_u32(ssd));
+ return vget_lane_u64(vpaddl_u32(sum), 0);
+}
+
+LWORD64 ihevce_ssd_calculator_neon(
+ UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd, UWORD32 ht)
+{
+ return ihevce_ssd_calculator_plane_neon(pu1_inp, pu1_ref, inp_stride, ref_stride, wd, ht, 0);
+}
+
+LWORD64 ihevce_chroma_interleave_ssd_calculator_neon(
+ UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd, UWORD32 ht)
+{
+ return ihevce_ssd_calculator_plane_neon(pu1_inp, pu1_ref, inp_stride, ref_stride, wd, ht, 1);
+}
diff --git a/encoder/arm/ihevce_subpel_neon.c b/encoder/arm/ihevce_subpel_neon.c
new file mode 100644
index 0000000..583f7c0
--- /dev/null
+++ b/encoder/arm/ihevce_subpel_neon.c
@@ -0,0 +1,444 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+******************************************************************************
+* @file
+* ihevce_subpel_neon.c
+*
+* @brief
+* Subpel refinement modules for ME algo
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+********************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <arm_neon.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevc_cmn_utils_neon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_debug.h"
+#include "ihevc_deblk.h"
+#include "ihevc_defs.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_macros.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_sao.h"
+#include "ihevc_structs.h"
+#include "ihevc_weighted_pred.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevce_api.h"
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_ipe_instr_set_router.h"
+#include "ihevce_global_tables.h"
+
+#include "hme_datatype.h"
+#include "hme_common_defs.h"
+#include "hme_interface.h"
+#include "hme_defs.h"
+
+#include "ihevce_me_instr_set_router.h"
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+FT_CALC_SATD_AND_RESULT hme_evalsatd_update_1_best_result_pt_pu_16x16_neon;
+
+WORD32 ihevce_had4_4x4_neon(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst4x4,
+ WORD32 dst_strd,
+ WORD32 *pi4_hsad,
+ WORD32 hsad_stride,
+ WORD32 i4_frm_qstep);
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+static void hme_4x4_qpel_interp_avg_neon(
+ UWORD8 *pu1_src_a,
+ UWORD8 *pu1_src_b,
+ WORD32 src_a_strd,
+ WORD32 src_b_strd,
+ UWORD8 *pu1_dst,
+ WORD32 dst_strd)
+{
+ uint8x16_t src_a = load_unaligned_u8q(pu1_src_a, src_a_strd);
+ uint8x16_t src_b = load_unaligned_u8q(pu1_src_b, src_b_strd);
+ uint8x16_t dst = vrhaddq_u8(src_a, src_b);
+
+ store_unaligned_u8q(pu1_dst, dst_strd, dst);
+}
+
+static void hme_8xn_qpel_interp_avg_neon(
+ UWORD8 *pu1_src_a,
+ UWORD8 *pu1_src_b,
+ WORD32 src_a_strd,
+ WORD32 src_b_strd,
+ UWORD8 *pu1_dst,
+ WORD32 dst_strd,
+ WORD32 ht)
+{
+ WORD32 i;
+
+ for(i = 0; i < ht; i++)
+ {
+ uint8x8_t src_a = vld1_u8(pu1_src_a);
+ uint8x8_t src_b = vld1_u8(pu1_src_b);
+ uint8x8_t dst = vrhadd_u8(src_a, src_b);
+
+ vst1_u8(pu1_dst, dst);
+ pu1_src_a += src_a_strd;
+ pu1_src_b += src_b_strd;
+ pu1_dst += dst_strd;
+ }
+}
+
+static void hme_16xn_qpel_interp_avg_neon(
+ UWORD8 *pu1_src_a,
+ UWORD8 *pu1_src_b,
+ WORD32 src_a_strd,
+ WORD32 src_b_strd,
+ UWORD8 *pu1_dst,
+ WORD32 dst_strd,
+ WORD32 ht)
+{
+ WORD32 i;
+
+ for(i = 0; i < ht; i++)
+ {
+ uint8x16_t src_a = vld1q_u8(pu1_src_a);
+ uint8x16_t src_b = vld1q_u8(pu1_src_b);
+ uint8x16_t dst = vrhaddq_u8(src_a, src_b);
+
+ vst1q_u8(pu1_dst, dst);
+ pu1_src_a += src_a_strd;
+ pu1_src_b += src_b_strd;
+ pu1_dst += dst_strd;
+ }
+}
+
+static void hme_32xn_qpel_interp_avg_neon(
+ UWORD8 *pu1_src_a,
+ UWORD8 *pu1_src_b,
+ WORD32 src_a_strd,
+ WORD32 src_b_strd,
+ UWORD8 *pu1_dst,
+ WORD32 dst_strd,
+ WORD32 ht)
+{
+ WORD32 i;
+
+ for(i = 0; i < ht; i++)
+ {
+ uint8x16_t src_a_0 = vld1q_u8(pu1_src_a);
+ uint8x16_t src_b_0 = vld1q_u8(pu1_src_b);
+ uint8x16_t dst_0 = vrhaddq_u8(src_a_0, src_b_0);
+
+ uint8x16_t src_a_1 = vld1q_u8(pu1_src_a + 16);
+ uint8x16_t src_b_1 = vld1q_u8(pu1_src_b + 16);
+ uint8x16_t dst_1 = vrhaddq_u8(src_a_1, src_b_1);
+
+ vst1q_u8(pu1_dst, dst_0);
+ vst1q_u8(pu1_dst + 16, dst_1);
+ pu1_src_a += src_a_strd;
+ pu1_src_b += src_b_strd;
+ pu1_dst += dst_strd;
+ }
+}
+
+static void hme_4mx4n_qpel_interp_avg_neon(
+ UWORD8 *pu1_src_a,
+ UWORD8 *pu1_src_b,
+ WORD32 src_a_strd,
+ WORD32 src_b_strd,
+ UWORD8 *pu1_dst,
+ WORD32 dst_strd,
+ WORD32 blk_wd,
+ WORD32 blk_ht)
+{
+ WORD32 i, j;
+
+ assert(blk_wd % 4 == 0);
+ assert(blk_ht % 4 == 0);
+
+ for(i = 0; i < blk_ht; i += 4)
+ {
+ for(j = 0; j < blk_wd;)
+ {
+ WORD32 wd = blk_wd - j;
+
+ if(wd >= 32)
+ {
+ hme_32xn_qpel_interp_avg_neon(
+ pu1_src_a + j, pu1_src_b + j, src_a_strd, src_b_strd, pu1_dst + j, dst_strd, 4);
+ j += 32;
+ }
+ else if(wd >= 16)
+ {
+ hme_16xn_qpel_interp_avg_neon(
+ pu1_src_a + j, pu1_src_b + j, src_a_strd, src_b_strd, pu1_dst + j, dst_strd, 4);
+ j += 16;
+ }
+ else if(wd >= 8)
+ {
+ hme_8xn_qpel_interp_avg_neon(
+ pu1_src_a + j, pu1_src_b + j, src_a_strd, src_b_strd, pu1_dst + j, dst_strd, 4);
+ j += 8;
+ }
+ else
+ {
+ hme_4x4_qpel_interp_avg_neon(
+ pu1_src_a + j, pu1_src_b + j, src_a_strd, src_b_strd, pu1_dst + j, dst_strd);
+ j += 4;
+ }
+ }
+ pu1_src_a += (4 * src_a_strd);
+ pu1_src_b += (4 * src_b_strd);
+ pu1_dst += (4 * dst_strd);
+ }
+}
+
+void hme_qpel_interp_avg_neon(interp_prms_t *ps_prms, S32 i4_mv_x, S32 i4_mv_y, S32 i4_buf_id)
+{
+ U08 *pu1_src1, *pu1_src2, *pu1_dst;
+ qpel_input_buf_cfg_t *ps_inp_cfg;
+ S32 i4_mv_x_frac, i4_mv_y_frac, i4_offset;
+ S32 i4_ref_stride = ps_prms->i4_ref_stride;
+
+ i4_mv_x_frac = i4_mv_x & 3;
+ i4_mv_y_frac = i4_mv_y & 3;
+
+ i4_offset = (i4_mv_x >> 2) + (i4_mv_y >> 2) * i4_ref_stride;
+
+ /* Derive the descriptor that has all offset and size info */
+ ps_inp_cfg = &gas_qpel_inp_buf_cfg[i4_mv_y_frac][i4_mv_x_frac];
+
+ if(ps_inp_cfg->i1_buf_id1 == ps_inp_cfg->i1_buf_id2)
+ {
+ /* This is case for fxfy/hxfy/fxhy/hxhy */
+ ps_prms->pu1_final_out = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id1];
+ ps_prms->pu1_final_out += ps_inp_cfg->i1_buf_xoff1 + i4_offset;
+ ps_prms->pu1_final_out += (ps_inp_cfg->i1_buf_yoff1 * ps_prms->i4_ref_stride);
+ ps_prms->i4_final_out_stride = i4_ref_stride;
+
+ return;
+ }
+
+ pu1_src1 = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id1];
+ pu1_src1 += ps_inp_cfg->i1_buf_xoff1 + i4_offset;
+ pu1_src1 += (ps_inp_cfg->i1_buf_yoff1 * i4_ref_stride);
+
+ pu1_src2 = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id2];
+ pu1_src2 += ps_inp_cfg->i1_buf_xoff2 + i4_offset;
+ pu1_src2 += (ps_inp_cfg->i1_buf_yoff2 * i4_ref_stride);
+
+ pu1_dst = ps_prms->apu1_interp_out[i4_buf_id];
+
+ hme_4mx4n_qpel_interp_avg_neon(
+ pu1_src1,
+ pu1_src2,
+ ps_prms->i4_ref_stride,
+ ps_prms->i4_ref_stride,
+ pu1_dst,
+ ps_prms->i4_out_stride,
+ ps_prms->i4_blk_wd,
+ ps_prms->i4_blk_ht);
+ ps_prms->pu1_final_out = pu1_dst;
+ ps_prms->i4_final_out_stride = ps_prms->i4_out_stride;
+}
+
+// TODO: Can this function and above function be unified
+void hme_qpel_interp_avg_1pt_neon(
+ interp_prms_t *ps_prms,
+ S32 i4_mv_x,
+ S32 i4_mv_y,
+ S32 i4_buf_id,
+ U08 **ppu1_final,
+ S32 *pi4_final_stride)
+{
+ U08 *pu1_src1, *pu1_src2, *pu1_dst;
+ qpel_input_buf_cfg_t *ps_inp_cfg;
+ S32 i4_mv_x_frac, i4_mv_y_frac, i4_offset;
+ S32 i4_ref_stride = ps_prms->i4_ref_stride;
+
+ i4_mv_x_frac = i4_mv_x & 3;
+ i4_mv_y_frac = i4_mv_y & 3;
+
+ i4_offset = (i4_mv_x >> 2) + (i4_mv_y >> 2) * i4_ref_stride;
+
+ /* Derive the descriptor that has all offset and size info */
+ ps_inp_cfg = &gas_qpel_inp_buf_cfg[i4_mv_y_frac][i4_mv_x_frac];
+
+ pu1_src1 = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id1];
+ pu1_src1 += ps_inp_cfg->i1_buf_xoff1 + i4_offset;
+ pu1_src1 += (ps_inp_cfg->i1_buf_yoff1 * i4_ref_stride);
+
+ pu1_src2 = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id2];
+ pu1_src2 += ps_inp_cfg->i1_buf_xoff2 + i4_offset;
+ pu1_src2 += (ps_inp_cfg->i1_buf_yoff2 * i4_ref_stride);
+
+ pu1_dst = ps_prms->apu1_interp_out[i4_buf_id];
+
+ hme_4mx4n_qpel_interp_avg_neon(
+ pu1_src1,
+ pu1_src2,
+ ps_prms->i4_ref_stride,
+ ps_prms->i4_ref_stride,
+ pu1_dst,
+ ps_prms->i4_out_stride,
+ ps_prms->i4_blk_wd,
+ ps_prms->i4_blk_ht);
+ ppu1_final[i4_buf_id] = pu1_dst;
+ pi4_final_stride[i4_buf_id] = ps_prms->i4_out_stride;
+}
+
+void hme_qpel_interp_avg_2pt_vert_with_reuse_neon(
+ interp_prms_t *ps_prms, S32 i4_mv_x, S32 i4_mv_y, U08 **ppu1_final, S32 *pi4_final_stride)
+{
+ hme_qpel_interp_avg_1pt_neon(ps_prms, i4_mv_x, i4_mv_y + 1, 3, ppu1_final, pi4_final_stride);
+
+ hme_qpel_interp_avg_1pt_neon(ps_prms, i4_mv_x, i4_mv_y - 1, 1, ppu1_final, pi4_final_stride);
+}
+
+void hme_qpel_interp_avg_2pt_horz_with_reuse_neon(
+ interp_prms_t *ps_prms, S32 i4_mv_x, S32 i4_mv_y, U08 **ppu1_final, S32 *pi4_final_stride)
+{
+ hme_qpel_interp_avg_1pt_neon(ps_prms, i4_mv_x + 1, i4_mv_y, 2, ppu1_final, pi4_final_stride);
+
+ hme_qpel_interp_avg_1pt_neon(ps_prms, i4_mv_x - 1, i4_mv_y, 0, ppu1_final, pi4_final_stride);
+}
+
+void hme_evalsatd_update_1_best_result_pt_pu_16x16_neon(
+ err_prms_t *ps_prms, result_upd_prms_t *ps_result_prms)
+{
+ mv_refine_ctxt_t *refine_ctxt = ps_result_prms->ps_subpel_refine_ctxt;
+ S32 *pi4_sad_grid = ps_prms->pi4_sad_grid;
+ S32 *pi4_valid_part_ids = &refine_ctxt->ai4_part_id[0];
+
+ S32 ai4_satd_4x4[16];
+ S32 ai4_satd_8x8[4];
+
+ U08 *pu1_inp = ps_prms->pu1_inp;
+ U08 *pu1_ref = ps_prms->pu1_ref;
+
+ S32 inp_stride = ps_prms->i4_inp_stride;
+ S32 ref_stride = ps_prms->i4_ref_stride;
+
+ S32 i;
+
+ /* Call recursive 16x16 HAD module; updates satds for 4x4, 8x8 and 16x16 */
+ for(i = 0; i < 4; i++)
+ {
+ U08 *pu1_src = pu1_inp + (i & 0x1) * 8 + (i >> 1) * inp_stride * 8;
+ U08 *pu1_pred = pu1_ref + (i & 0x1) * 8 + (i >> 1) * ref_stride * 8;
+ S16 idx = (i & 0x1) * 2 + (i >> 1) * 8;
+
+ ai4_satd_8x8[i] = ihevce_had4_4x4_neon(
+ pu1_src, inp_stride, pu1_pred, ref_stride, NULL, 0, &ai4_satd_4x4[idx], 4, 0);
+ }
+
+ /* Update 16x16 SATDs */
+ pi4_sad_grid[PART_ID_2Nx2N] =
+ ai4_satd_8x8[0] + ai4_satd_8x8[1] + ai4_satd_8x8[2] + ai4_satd_8x8[3];
+
+ pi4_sad_grid[PART_ID_NxN_TL] = ai4_satd_8x8[0];
+ pi4_sad_grid[PART_ID_NxN_TR] = ai4_satd_8x8[1];
+ pi4_sad_grid[PART_ID_NxN_BL] = ai4_satd_8x8[2];
+ pi4_sad_grid[PART_ID_NxN_BR] = ai4_satd_8x8[3];
+
+ /* Update 8x16 / 16x8 SATDs */
+ pi4_sad_grid[PART_ID_Nx2N_L] = ai4_satd_8x8[0] + ai4_satd_8x8[2];
+ pi4_sad_grid[PART_ID_Nx2N_R] = ai4_satd_8x8[1] + ai4_satd_8x8[3];
+ pi4_sad_grid[PART_ID_2NxN_T] = ai4_satd_8x8[0] + ai4_satd_8x8[1];
+ pi4_sad_grid[PART_ID_2NxN_B] = ai4_satd_8x8[2] + ai4_satd_8x8[3];
+
+ /* Update AMP SATDs 16x12,16x4, 12x16,4x16 */
+ pi4_sad_grid[PART_ID_nLx2N_L] =
+ ai4_satd_4x4[0] + ai4_satd_4x4[2] + ai4_satd_4x4[8] + ai4_satd_4x4[10];
+ pi4_sad_grid[PART_ID_nRx2N_R] =
+ ai4_satd_4x4[5] + ai4_satd_4x4[7] + ai4_satd_4x4[13] + ai4_satd_4x4[15];
+ pi4_sad_grid[PART_ID_2NxnU_T] =
+ ai4_satd_4x4[0] + ai4_satd_4x4[1] + ai4_satd_4x4[4] + ai4_satd_4x4[5];
+ pi4_sad_grid[PART_ID_2NxnD_B] =
+ ai4_satd_4x4[10] + ai4_satd_4x4[11] + ai4_satd_4x4[14] + ai4_satd_4x4[15];
+
+ pi4_sad_grid[PART_ID_nLx2N_R] = pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_nLx2N_L];
+ pi4_sad_grid[PART_ID_nRx2N_L] = pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_nRx2N_R];
+ pi4_sad_grid[PART_ID_2NxnU_B] = pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_2NxnU_T];
+ pi4_sad_grid[PART_ID_2NxnD_T] = pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_2NxnD_B];
+
+ /* For each valid partition, update the refine_prm structure to
+ * reflect the best and second best candidates for that partition */
+ for(i = 0; i < refine_ctxt->i4_num_valid_parts; i++)
+ {
+ S32 part_id = pi4_valid_part_ids[i];
+ S32 id = (refine_ctxt->i4_num_valid_parts > 8) ? part_id : i;
+ S32 i4_mv_cost = refine_ctxt->i2_mv_cost[0][id];
+ S32 i4_sad = CLIP3(pi4_sad_grid[part_id], 0, 0x7fff);
+ S32 i4_tot_cost = CLIP_S16(i4_sad + i4_mv_cost);
+ S32 best_node_cost = CLIP_S16(refine_ctxt->i2_tot_cost[0][id]);
+
+ if(i4_tot_cost < best_node_cost)
+ {
+ refine_ctxt->i2_tot_cost[0][id] = i4_tot_cost;
+ refine_ctxt->i2_mv_cost[0][id] = i4_mv_cost;
+ refine_ctxt->i2_mv_x[0][id] = ps_result_prms->i2_mv_x;
+ refine_ctxt->i2_mv_y[0][id] = ps_result_prms->i2_mv_y;
+ refine_ctxt->i2_ref_idx[0][id] = ps_result_prms->i1_ref_idx;
+ }
+ }
+}
diff --git a/encoder/bit_allocation.c b/encoder/bit_allocation.c
new file mode 100644
index 0000000..df9bf26
--- /dev/null
+++ b/encoder/bit_allocation.c
@@ -0,0 +1,3235 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file bit_allocation.c
+*
+* \brief
+* This file contain bit processing functions
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ittiam_datatypes.h"
+#include "mem_req_and_acq.h"
+#include "rc_common.h"
+#include "rc_cntrl_param.h"
+#include "var_q_operator.h"
+#include "fixed_point_error_bits.h"
+#include "cbr_buffer_control.h"
+#include "rc_rd_model.h"
+#include "est_sad.h"
+#include "cbr_buffer_control.h"
+#include "picture_type.h"
+#include "bit_allocation.h"
+#include "trace_support.h"
+#include "rc_frame_info_collector.h"
+#include "rate_control_api.h"
+
+/** Macros **/
+#define MIN(x, y) ((x) < (y)) ? (x) : (y)
+#define MAX(x, y) ((x) < (y)) ? (y) : (x)
+
+/* State structure for bit allocation */
+typedef struct
+{
+ WORD32 i4_rem_bits_in_period;
+ /* Storing inputs */
+ WORD32 i4_tot_frms_in_gop;
+ WORD32 i4_num_intra_frm_interval;
+ WORD32 i4_bits_per_frm;
+} rem_bit_in_prd_t;
+
+typedef struct bit_allocation_t
+{
+ rem_bit_in_prd_t s_rbip;
+ WORD32
+ i2_K[MAX_PIC_TYPE]; /* A universal constant giving the relative complexity between pictures */
+ WORD32 i4_prev_frm_header_bits[MAX_PIC_TYPE]; /* To get a estimate of the header bits consumed */
+ WORD32 ai4_prev_frm_tot_bits[MAX_PIC_TYPE];
+ WORD32 ai4_prev_frm_tot_est_bits[MAX_PIC_TYPE];
+ WORD32 i4_bits_per_frm;
+ WORD32 i4_num_gops_in_period;
+ WORD32
+ i4_actual_num_gops_in_period; /* Num gops as set by rate control module */
+ WORD32 i4_saved_bits;
+ WORD32 i4_max_bits_per_frm[MAX_NUM_DRAIN_RATES];
+ WORD32 i4_min_bits_per_frm;
+ /* Error bits module */
+ error_bits_handle ps_error_bits;
+ /* Storing frame rate */
+ WORD32 i4_frame_rate;
+ WORD32 i4_bit_rate;
+ WORD32 ai4_peak_bit_rate[MAX_NUM_DRAIN_RATES];
+ WORD32 i4_max_tex_bits_for_i;
+ WORD32 i4_pels_in_frame;
+ /* Errors within GOP and across GOP */
+ WORD32 i4_gop_level_bit_error;
+ WORD32 i4_frame_level_bit_error;
+ WORD32 ai4_cur_frm_est_tex_bits[MAX_NUM_FRAME_PARALLEL];
+ WORD32 ai4_cur_frm_est_hdr_bits[MAX_NUM_FRAME_PARALLEL];
+ WORD32 i4_buffer_based_bit_error;
+ WORD32 i4_bits_from_buffer_in_cur_gop;
+ WORD32 i4_excess_bits_from_buffer;
+
+ WORD32 i4_is_hbr;
+ WORD32 i4_rem_frame_in_period;
+ /*HEVC_RC : this will be updated by rc_interface.c to have number of SCD in lap window.
+ Ni will be incremented using this to bring down buffer level and also back to back scd within lap window*/
+ WORD32 i4_num_scd_in_lap_window;
+ WORD32 i4_num_frm_b4_scd;
+ WORD32 i4_num_active_pic_type;
+ WORD32 i4_lap_window;
+ WORD32 i4_field_pic;
+ WORD32 i4_ba_rc_pass;
+ void *pv_gop_stat;
+ LWORD64 i8_cur_gop_num;
+ LWORD64
+ i8_frm_num_in_gop; /*TBD: For enc loop parallel this variable needs to maintained outside rate control since qp will not be queried in actual bitstream order*/
+ float af_sum_weigh[MAX_PIC_TYPE][3];
+ LWORD64 i8_cur_gop_bit_consumption; /*To calculate the deviaiton in 2 pass*/
+ //LWORD64 i8_2pass_gop_error_accum;
+ LWORD64
+ i8_2pass_alloc_per_frm_bits; /*Per frame bits allocated to GOP in 2 pass*/
+ //LWORD64 i8_2pass_alloc_per_frm_bits_next_gop;
+
+ float f_min_complexity_cross_peak_rate; /*complexity of gop beyond which it is clipped to peak rate in 2ns pass*/
+ WORD32 i4_next_sc_i_in_rc_look_ahead;
+ /*The peak factor is multiplied to get the total bits for a gop based on squashing function*/
+ float f_cur_peak_factor_2pass;
+ LWORD64 i8_total_bits_allocated;
+ WORD32 i4_luma_pels;
+ WORD32 i4_num_gop;
+ /*The bitrate will keep changing in 2 pass due to error*/
+ LWORD64 i8_current_bitrate_2_pass;
+ /*i4_flag_no_more_set_rbip - once we have reached the end of total number of frames to be encoded in
+ 2nd pass sliding window bit allocation there is no need to set rbip again*/
+ WORD32 i4_flag_no_more_set_rbip;
+ /*i8_vbv_based_excess_for_segment will be distributed across the complex segments depending on the
+ ratio of current complexity to f_sum_complexity_segment_cross_peak*/
+ float f_sum_complexity_segment_cross_peak;
+ /*(i8_vbv_based_excess_for_segment)Buffer play excess is calculated for the entire segment of complex
+ content which may consist of multiple gop's*/
+ //LWORD64 i8_vbv_based_excess_for_segment;
+ /*I frame bit allocation during scene cuts is dependent on f_curr_i_to_sum which will signal
+ the complexity difference between current i to future i's if present in the same default gop*/
+ float f_curr_i_to_sum;
+ float f_curr_by_sum_subgop;
+ WORD32 ai4_pic_types_in_subgop[MAX_PIC_TYPE];
+ WORD32 i4_use_subgop_bit_alloc_flag;
+ WORD32 i4_num_frames_since_last_I_frame;
+ LWORD64 i8_first_pic_bits_pictype[MAX_PIC_TYPE];
+ LWORD64 i8_avg_bits_pictype[MAX_PIC_TYPE];
+ WORD32 i4_avg_qscale_gop_first_pass;
+ WORD32 i4_fp_bit_alloc_in_sp;
+ WORD32 i4_frame_level_error_ctr_update_rc;
+ float f_qscale_max_clip_in_second_pass;
+ float f_average_qscale_1st_pass;
+ float f_max_average_qscale_1st_pass;
+ LWORD64 i8_bit_consumption_so_far;
+ WORD32 i4_total_2pass_frames;
+ LWORD64 i8_2pass_avg_bit_rate;
+ WORD32 i4_br_id;
+} bit_allocation_t;
+
+static WORD32 get_actual_num_frames_in_gop(pic_handling_handle ps_pic_handling)
+{
+ WORD32 i4_tot_frms_in_gop = 0, i;
+ WORD32 ai4_actual_frms_in_gop[MAX_PIC_TYPE];
+ memset(ai4_actual_frms_in_gop, 0, MAX_PIC_TYPE * sizeof(WORD32));
+ pic_type_get_actual_frms_in_gop(ps_pic_handling, ai4_actual_frms_in_gop);
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ i4_tot_frms_in_gop += ai4_actual_frms_in_gop[i];
+ }
+ return (i4_tot_frms_in_gop);
+}
+
+float get_cur_peak_factor_2pass(bit_allocation_t *ps_bit_allocation)
+{
+ return (ps_bit_allocation->f_cur_peak_factor_2pass);
+}
+float get_cur_min_complexity_factor_2pass(bit_allocation_t *ps_bit_allocation)
+{
+ return (ps_bit_allocation->f_min_complexity_cross_peak_rate);
+}
+void set_2pass_total_gops(bit_allocation_t *ps_bit_allocation, WORD32 i4_num_gop)
+{
+ ps_bit_allocation->i4_num_gop = i4_num_gop;
+}
+#if NON_STEADSTATE_CODE
+/* Module for accessing remaining bits in period */
+/*****************************************************************************
+ Function Name : init_rbip
+ Description : Initalises the remaining bits in period structure.
+ Inputs : ps_rbip -remaining bits in period structure
+ ps_pic_handling - Pic handling structure
+ num_intra_frm_interval - num of I frm intervals in this period
+ i4_bits_per_frm - num bits per frm
+Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+static void init_rbip(
+ rem_bit_in_prd_t *ps_rbip,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_bits_per_frm,
+ WORD32 i4_num_intra_frm_interval)
+{
+ WORD32 i4_tot_frms_in_gop = get_actual_num_frames_in_gop(ps_pic_handling);
+ /* WORD32 i4_frm_correction_for_open_gop = 0; */
+ /* If the GOP is open, then we need to subtract the num_b_frames for the first gop */
+ /*if(!pic_type_is_gop_closed(ps_pic_handling))
+ {
+ i4_frm_correction_for_open_gop = pic_type_get_inter_frame_interval(ps_pic_handling)-1;
+ }*/
+ ps_rbip->i4_rem_bits_in_period =
+ i4_bits_per_frm *
+ (i4_tot_frms_in_gop * i4_num_intra_frm_interval /*- i4_frm_correction_for_open_gop*/);
+
+ /* Store the total number of frames in GOP value which is
+ * used from module A */
+ ps_rbip->i4_tot_frms_in_gop = i4_tot_frms_in_gop;
+ ps_rbip->i4_num_intra_frm_interval = i4_num_intra_frm_interval;
+ ps_rbip->i4_bits_per_frm = i4_bits_per_frm;
+}
+#endif /* #if NON_STEADSTATE_CODE */
+
+/*****************************************************************************
+ Function Name : check_update_rbip
+ Description : Function for updating rbip.
+ Inputs : ps_rbip -remaining bits in period structure
+ ps_pic_handling - Pic handling structure
+Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+static void check_update_rbip(rem_bit_in_prd_t *ps_rbip, pic_handling_handle ps_pic_handling)
+{
+ /* NOTE: Intra frame interval changes aafter the first I frame that is encoded in a GOP */
+ WORD32 i4_new_tot_frms_in_gop = get_actual_num_frames_in_gop(ps_pic_handling);
+ if(i4_new_tot_frms_in_gop != ps_rbip->i4_tot_frms_in_gop)
+ {
+ WORD32 i4_num_frames_in_period = ps_rbip->i4_num_intra_frm_interval *
+ (i4_new_tot_frms_in_gop - ps_rbip->i4_tot_frms_in_gop);
+ overflow_avoided_summation(
+ &ps_rbip->i4_rem_bits_in_period, (ps_rbip->i4_bits_per_frm * i4_num_frames_in_period));
+ }
+ /* Updated the new values */
+ ps_rbip->i4_tot_frms_in_gop = i4_new_tot_frms_in_gop;
+}
+/*****************************************************************************
+ Function Name : ret_rbip_default_preenc
+ Description : Function for calculating bits in period.
+ Inputs : ps_rbip -remaining bits in period structure
+ ps_pic_handling - Pic handling structure
+Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+static WORD32
+ ret_rbip_default_preenc(rem_bit_in_prd_t *ps_rbip, pic_handling_handle ps_pic_handling)
+{
+ WORD32 i4_bits_in_period =
+ pic_type_get_intra_frame_interval(ps_pic_handling) * ps_rbip->i4_bits_per_frm;
+ return (i4_bits_in_period);
+}
+/*****************************************************************************
+ Function Name : update_rbip
+ Description : Function for updating rbip.
+ Inputs : ps_rbip -remaining bits in period structure
+ ps_pic_handling - Pic handling structure
+Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+static WORD32 update_rbip(
+ rem_bit_in_prd_t *ps_rbip, pic_handling_handle ps_pic_handling, WORD32 i4_num_of_bits)
+{
+ check_update_rbip(ps_rbip, ps_pic_handling);
+ overflow_avoided_summation(&ps_rbip->i4_rem_bits_in_period, i4_num_of_bits);
+ return (ps_rbip->i4_rem_bits_in_period);
+}
+/*****************************************************************************
+ Function Name : get_rbip_and_num_frames
+ Description : Update rbip and get number of frames.
+ Inputs : ps_rbip -remaining bits in period structure
+ ps_pic_handling - Pic handling structure
+ pi4_num_frames - pointer to update number of frmes
+Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+static LWORD64 get_rbip_and_num_frames(
+ rem_bit_in_prd_t *ps_rbip,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_num_of_bits,
+ WORD32 *pi4_num_frames)
+{
+ check_update_rbip(ps_rbip, ps_pic_handling);
+ overflow_avoided_summation(&ps_rbip->i4_rem_bits_in_period, i4_num_of_bits);
+ *pi4_num_frames = ps_rbip->i4_tot_frms_in_gop;
+ return (ps_rbip->i4_rem_bits_in_period);
+}
+/*****************************************************************************
+ Function Name : set_rbip
+ Description : Set rbip
+ Inputs : ps_rbip -remaining bits in period structure
+ i4_error_bits - Error bits
+Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+static WORD32 set_rbip(rem_bit_in_prd_t *ps_rbip, WORD32 i4_error_bits)
+{
+ ps_rbip->i4_rem_bits_in_period = (ps_rbip->i4_bits_per_frm * ps_rbip->i4_tot_frms_in_gop *
+ ps_rbip->i4_num_intra_frm_interval) +
+ i4_error_bits;
+
+ return ps_rbip->i4_rem_bits_in_period;
+}
+
+/*****************************************************************************
+ Function Name : multi_pass_set_rbip
+ Description : 2 pass set RBIP, since the gop bits shall not depend on bitrate or framerate,
+ GOP bits is directly obtained from first pass data
+ Inputs : ps_rbip -remaining bits in period structure
+ ps_pic_handling - Pic handling structure
+ i4_cur_gop_bits - bits allocated for the curr gop
+ i4_tot_frm_in_cur_gop - frames in the gop
+Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+static void multi_pass_set_rbip(
+ rem_bit_in_prd_t *ps_rbip,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_cur_gop_bits,
+ WORD32 i4_tot_frm_in_cur_gop)
+{
+ WORD32 i4_num_frames_in_gop = get_actual_num_frames_in_gop(ps_pic_handling);
+ ps_rbip->i4_rem_bits_in_period =
+ (WORD32)((LWORD64)i4_cur_gop_bits * i4_num_frames_in_gop / i4_tot_frm_in_cur_gop);
+ ps_rbip->i4_tot_frms_in_gop = i4_num_frames_in_gop;
+ ps_rbip->i4_bits_per_frm = ps_rbip->i4_rem_bits_in_period / i4_num_frames_in_gop;
+}
+static void change_rbip(
+ rem_bit_in_prd_t *ps_rbip, WORD32 i4_new_bits_per_frm, WORD32 i4_new_num_intra_frm_interval)
+{
+ if(i4_new_bits_per_frm != ps_rbip->i4_bits_per_frm)
+ {
+ WORD32 i4_rem_frms_in_period =
+ (ps_rbip->i4_num_intra_frm_interval) * ps_rbip->i4_tot_frms_in_gop;
+ overflow_avoided_summation(
+ &ps_rbip->i4_rem_bits_in_period,
+ ((i4_new_bits_per_frm - ps_rbip->i4_bits_per_frm) * i4_rem_frms_in_period));
+ }
+ if(i4_new_num_intra_frm_interval != ps_rbip->i4_num_intra_frm_interval)
+ {
+ overflow_avoided_summation(
+ &ps_rbip->i4_rem_bits_in_period,
+ (i4_new_bits_per_frm * ps_rbip->i4_tot_frms_in_gop *
+ (i4_new_num_intra_frm_interval - ps_rbip->i4_num_intra_frm_interval)));
+ }
+ /* Update the new value */
+ ps_rbip->i4_num_intra_frm_interval = i4_new_num_intra_frm_interval;
+ ps_rbip->i4_bits_per_frm = i4_new_bits_per_frm;
+}
+
+#if NON_STEADSTATE_CODE
+WORD32 bit_allocation_num_fill_use_free_memtab(
+ bit_allocation_t **pps_bit_allocation, itt_memtab_t *ps_memtab, ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static bit_allocation_t s_bit_allocation_temp;
+
+ /* Hack for al alloc, during which we dont have any state memory.
+ Dereferencing can cause issues */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_bit_allocation) = &s_bit_allocation_temp;
+
+ /*for src rate control state structure*/
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(
+ &ps_memtab[i4_mem_tab_idx],
+ sizeof(bit_allocation_t),
+ MEM_TAB_ALIGNMENT,
+ PERSISTENT,
+ DDR);
+ use_or_fill_base(&ps_memtab[0], (void **)pps_bit_allocation, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ i4_mem_tab_idx += error_bits_num_fill_use_free_memtab(
+ &pps_bit_allocation[0]->ps_error_bits, &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+ return (i4_mem_tab_idx);
+}
+#endif /* #if NON_STEADSTATE_CODE */
+
+/*****************************************************************************
+ Function Name : get_bits_based_on_complexity
+ Description : function calculates the bits to be allocated for the current
+ picture type given the relative complexity between different
+ picture types
+ Inputs : i4_bits_in_period
+ pi4_frms_in_period - num frames of each pictype in the period
+ pvq_complexity_estimate - complexity for each pictype
+ e_pic_type - current picture type
+ i4_call_type
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+static WORD32 get_bits_based_on_complexity(
+ bit_allocation_t *ps_bit_allocation,
+ WORD32 i4_bits_in_period,
+ WORD32 *pi4_frms_in_period,
+ number_t *pvq_complexity_estimate,
+ picture_type_e e_pic_type,
+ WORD32 i4_call_type)
+{
+ WORD32 i, i4_estimated_bits;
+ number_t vq_bits_in_period, vq_frms_in_period[MAX_PIC_TYPE], vq_comp_coeff,
+ vq_est_texture_bits_for_frm;
+ WORD32 i4_num_scd_in_LAP_window = ps_bit_allocation->i4_num_scd_in_lap_window;
+ WORD32 i4_active_pic_types = ps_bit_allocation->i4_num_active_pic_type,
+ i4_field_pic = ps_bit_allocation->i4_field_pic;
+ float af_sum_weigh[MAX_PIC_TYPE][3];
+
+ memmove(af_sum_weigh, ps_bit_allocation->af_sum_weigh, ((sizeof(float)) * MAX_PIC_TYPE * 3));
+
+ /** Increment I frame count if there is scene cut in LAP window*/
+ i4_num_scd_in_LAP_window = 0;
+ pi4_frms_in_period[I_PIC] += i4_num_scd_in_LAP_window;
+ /* Converting inputs to var_q format */
+ SET_VAR_Q(vq_bits_in_period, i4_bits_in_period, 0);
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ SET_VAR_Q(vq_frms_in_period[i], pi4_frms_in_period[i], 0);
+ }
+ /******************************************************************
+ Estimated texture bits =
+ (remaining bits) * (cur frm complexity)
+ ---------------------------------------
+ (num_i_frm*i_frm_complexity) + (num_p_frm*pfrm_complexity) + (b_frm * b_frm_cm)
+ *******************************************************************/
+ /*Taking the numerator weight into account*/
+ if(i4_call_type == 1)
+ {
+ trace_printf("1 CUrr / avg %f", af_sum_weigh[e_pic_type][0]);
+ }
+ if(af_sum_weigh[e_pic_type][0] > 4.0f)
+ af_sum_weigh[e_pic_type][0] = 4.0f;
+ if(af_sum_weigh[e_pic_type][0] < 0.3f)
+ af_sum_weigh[e_pic_type][0] = 0.3f;
+ if(i4_call_type == 1)
+ {
+ trace_printf("2 CUrr / avg %f", af_sum_weigh[e_pic_type][0]);
+ }
+
+ if((ps_bit_allocation->i4_ba_rc_pass != 2) || (i4_call_type == 0) ||
+ (ps_bit_allocation->i4_fp_bit_alloc_in_sp == 0))
+ {
+ convert_float_to_fix(af_sum_weigh[e_pic_type][0], &vq_comp_coeff);
+ mult32_var_q(vq_bits_in_period, vq_comp_coeff, &vq_bits_in_period);
+ mult32_var_q(vq_bits_in_period, pvq_complexity_estimate[e_pic_type], &vq_bits_in_period);
+ }
+ else
+ {
+ WORD32 i4_frame_num = (WORD32)ps_bit_allocation->i8_frm_num_in_gop, i4_offset;
+ gop_level_stat_t *ps_gop;
+ LWORD64 i8_firs_pass_tot_bits;
+ float f_bits_cur_pic, f_offset;
+ ps_gop =
+ (gop_level_stat_t *)ps_bit_allocation->pv_gop_stat + ps_bit_allocation->i8_cur_gop_num;
+ i8_firs_pass_tot_bits = ps_gop->ai8_tex_bits_consumed[i4_frame_num] +
+ ps_gop->ai8_head_bits_consumed[i4_frame_num];
+ i4_offset = (ps_gop->ai4_q6_frame_offsets[i4_frame_num] * 1000) >> QSCALE_Q_FAC;
+ f_offset = ((float)i4_offset) / 1000;
+ f_bits_cur_pic =
+ (float)(i8_firs_pass_tot_bits * ps_gop->ai4_first_pass_qscale[i4_frame_num]) /
+ (ps_bit_allocation->i4_avg_qscale_gop_first_pass * f_offset);
+ convert_float_to_fix(f_bits_cur_pic, &vq_comp_coeff);
+ mult32_var_q(vq_bits_in_period, vq_comp_coeff, &vq_bits_in_period);
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ number_t temp;
+ convert_float_to_fix((float)ps_bit_allocation->i8_avg_bits_pictype[i], &temp);
+ pvq_complexity_estimate[i] = temp;
+ }
+ }
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ /*For 2nd pass we will be reducing the num of pictures as they are coded so we dont want to replace 0's*/
+ if(af_sum_weigh[i][1] == 0.0 &&
+ !((i4_call_type == 1) && (ps_bit_allocation->i4_ba_rc_pass == 2)))
+ af_sum_weigh[i][1] = (float)pi4_frms_in_period[i];
+
+ convert_float_to_fix(af_sum_weigh[i][1], &vq_comp_coeff);
+ mult32_var_q(vq_comp_coeff, pvq_complexity_estimate[i], &vq_frms_in_period[i]);
+ }
+
+ /* changed the index range from active_pic to max_pic*/
+ if(i4_field_pic)
+ {
+ for(i = 1; i < i4_active_pic_types; i++)
+ {
+ add32_var_q(vq_frms_in_period[I_PIC], vq_frms_in_period[i], &vq_frms_in_period[I_PIC]);
+ add32_var_q(
+ vq_frms_in_period[I_PIC],
+ vq_frms_in_period[i + FIELD_OFFSET],
+ &vq_frms_in_period[I_PIC]);
+ }
+ }
+ else /*field case*/
+ {
+ for(i = 1; i < i4_active_pic_types; i++)
+ {
+ add32_var_q(vq_frms_in_period[I_PIC], vq_frms_in_period[i], &vq_frms_in_period[I_PIC]);
+ }
+ }
+
+ div32_var_q(vq_bits_in_period, vq_frms_in_period[I_PIC], &vq_est_texture_bits_for_frm);
+ number_t_to_word32(vq_est_texture_bits_for_frm, &i4_estimated_bits);
+
+ /* If the number of frames are zero then return zero */
+ if(!pi4_frms_in_period[e_pic_type])
+ i4_estimated_bits = 0;
+ return (i4_estimated_bits);
+}
+/*****************************************************************************
+ Function Name : assign_complexity_coeffs
+ Description :
+ Inputs : af_sum_weigh
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+void assign_complexity_coeffs(
+ bit_allocation_t *ps_bit_allocation, float af_sum_weigh[MAX_PIC_TYPE][3])
+{
+ WORD32 i;
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_bit_allocation->af_sum_weigh[i][0] = af_sum_weigh[i][0];
+ ps_bit_allocation->af_sum_weigh[i][1] = af_sum_weigh[i][1];
+ ps_bit_allocation->af_sum_weigh[i][2] = af_sum_weigh[i][2];
+ }
+}
+/*****************************************************************************
+ Function Name : ba_get_rbip_and_num_frames
+ Description :
+ Inputs : pi4_num_frames
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+LWORD64 ba_get_rbip_and_num_frames(
+ bit_allocation_t *ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ WORD32 *pi4_num_frames)
+{
+ return (
+ get_rbip_and_num_frames(&ps_bit_allocation->s_rbip, ps_pic_handling, 0, pi4_num_frames));
+}
+/*****************************************************************************
+ Function Name : init_prev_header_bits
+ Description : Intialise header bits for each pic type
+ Inputs : ps_bit_allocation
+ ps_pic_handling
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+void init_prev_header_bits(bit_allocation_t *ps_bit_allocation, pic_handling_handle ps_pic_handling)
+{
+ WORD32 i4_rem_bits_in_period, /* ai4_rem_frms_in_period[MAX_PIC_TYPE], */
+ ai4_frms_in_period[MAX_PIC_TYPE], i, j;
+ number_t avq_complexity_estimate[MAX_PIC_TYPE];
+ WORD32 i4_field_pic;
+ i4_field_pic = pic_type_get_field_pic(ps_pic_handling);
+ /* Assigning Percentages of I, P and B frame header bits based on actual bits allocated for I, P and B frames */
+ /* Getting the remaining bits in period */
+ i4_rem_bits_in_period = update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, 0);
+
+ /* Hardcoding the bit ratios between I, P and B */
+ SET_VAR_Q(
+ avq_complexity_estimate[I_PIC],
+ (I_TO_P_BIT_RATIO * P_TO_B_BIT_RATIO * B_TO_B1_BIT_RATO0 * B1_TO_B2_BIT_RATIO),
+ 0);
+ SET_VAR_Q(
+ avq_complexity_estimate[P_PIC],
+ (P_TO_B_BIT_RATIO * B_TO_B1_BIT_RATO0 * B1_TO_B2_BIT_RATIO),
+ 0);
+ SET_VAR_Q(
+ avq_complexity_estimate[P1_PIC],
+ (P_TO_B_BIT_RATIO * B_TO_B1_BIT_RATO0 * B1_TO_B2_BIT_RATIO),
+ 0);
+ SET_VAR_Q(avq_complexity_estimate[B_PIC], (B_TO_B1_BIT_RATO0 * B1_TO_B2_BIT_RATIO), 0);
+ SET_VAR_Q(avq_complexity_estimate[BB_PIC], (B_TO_B1_BIT_RATO0 * B1_TO_B2_BIT_RATIO), 0);
+ SET_VAR_Q(avq_complexity_estimate[B1_PIC], (B1_TO_B2_BIT_RATIO), 0);
+ SET_VAR_Q(
+ avq_complexity_estimate[B11_PIC],
+ (B1_TO_B2_BIT_RATIO),
+ 0); //temporarliy treat ref and non ref as same
+ SET_VAR_Q(avq_complexity_estimate[B2_PIC], 1, 0);
+ SET_VAR_Q(avq_complexity_estimate[B22_PIC], 1, 0);
+
+ /* Get the rem_frms_in_gop & the frms_in_gop from the pic_type state struct */
+ /* pic_type_get_rem_frms_in_gop(ps_pic_handling, ai4_rem_frms_in_period); */
+ pic_type_get_frms_in_gop(ps_pic_handling, ai4_frms_in_period);
+
+ /* Depending on the number of gops in a period, find the num_frms_in_prd */
+ for(j = 0; j < MAX_PIC_TYPE; j++)
+ {
+ ai4_frms_in_period[j] = (ai4_frms_in_period[j] * ps_bit_allocation->i4_num_gops_in_period);
+ }
+
+ /* Percentage of header bits in teh overall bits allocated to I, P and B frames
+ when the data is not known. Since this value is based on bitrate using a equation
+ to fit the values. Ran the header bit ratio for [1080@30] carnival, ihits and
+ football at 9, 12 and 16 mbps and based on that deriving a equation using bpp.
+ Values obtained are:
+ (bitrate/bpp) I P B
+ 9/2.87 0.358617291155 0.549124350786 0.798772545232
+ 12/3.83 0.288633642796 0.444797334749 0.693933711801
+ 16/5.11 0.284241839623 0.330152764298 0.557999732549
+ Equation for I:
+ if bpp > 3.83 hdr = 0.29
+ else hdr = -0.073*bpp + 0.569
+ Equation for P: hdr = -0.098*bpp + 0.825
+ Equation for B: hdr = -0.108*bpp + 1.108
+ */
+ {
+#define FRAME_HEADER_BITS_Q_FACTOR (10)
+ WORD32 ai4_header_bits_percentage[MAX_PIC_TYPE];
+
+ WORD32 i4_bpp;
+ X_PROD_Y_DIV_Z(
+ ps_bit_allocation->i4_bits_per_frm,
+ (1 << FRAME_HEADER_BITS_Q_FACTOR),
+ ps_bit_allocation->i4_pels_in_frame,
+ i4_bpp);
+ //ps_bit_allocation->i4_bits_per_frm*(1<<FRAME_HEADER_BITS_Q_FACTOR)/ps_bit_allocation->i4_pels_in_frame;
+
+ if(i4_bpp > 131)
+ ai4_header_bits_percentage[I_PIC] = 297;
+ else
+ ai4_header_bits_percentage[I_PIC] =
+ ((-2238 * i4_bpp) >> FRAME_HEADER_BITS_Q_FACTOR) + 583;
+ ai4_header_bits_percentage[P_PIC] = ((-2990 * i4_bpp) >> FRAME_HEADER_BITS_Q_FACTOR) + 845;
+
+ ai4_header_bits_percentage[B_PIC] = ((-3308 * i4_bpp) >> FRAME_HEADER_BITS_Q_FACTOR) + 1135;
+
+ /* Changes for 2B subGOP */
+ ai4_header_bits_percentage[P_PIC] = (ai4_header_bits_percentage[P_PIC] * 13) >> 4;
+ ai4_header_bits_percentage[P1_PIC] = (ai4_header_bits_percentage[P_PIC] * 13) >> 4;
+ ai4_header_bits_percentage[B_PIC] = (ai4_header_bits_percentage[B_PIC] * 12) >> 4;
+ ai4_header_bits_percentage[BB_PIC] = (ai4_header_bits_percentage[B_PIC] * 12) >> 4;
+ /*HEVC_hierarchy: temp change consider same percentage because of insufficient data*/
+ ai4_header_bits_percentage[B1_PIC] = ai4_header_bits_percentage[B_PIC];
+ ai4_header_bits_percentage[B11_PIC] = ai4_header_bits_percentage[B_PIC];
+ ai4_header_bits_percentage[B2_PIC] = ai4_header_bits_percentage[B_PIC];
+ ai4_header_bits_percentage[B22_PIC] = ai4_header_bits_percentage[B_PIC];
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_bit_allocation->af_sum_weigh[i][0] = 1.0;
+ ps_bit_allocation->af_sum_weigh[i][1] = 0.0;
+ ps_bit_allocation->af_sum_weigh[i][2] = 0.0;
+ }
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ /* Getting the total bits allocated for each picture type */
+ WORD32 i4_num_bits_allocated = get_bits_based_on_complexity(
+ ps_bit_allocation,
+ i4_rem_bits_in_period,
+ ai4_frms_in_period,
+ avq_complexity_estimate,
+ (picture_type_e)i,
+ 0);
+
+ if(ai4_header_bits_percentage[i] < 0)
+ ai4_header_bits_percentage[i] = 0;
+
+ ps_bit_allocation->i4_prev_frm_header_bits[i] = (WORD32)(
+ ((LWORD64)ai4_header_bits_percentage[i] * i4_num_bits_allocated) >>
+ FRAME_HEADER_BITS_Q_FACTOR);
+ }
+ }
+}
+
+/*****************************************************************************
+ Function Name : init_bit_allocation
+ Description : Initalises the bit_allocation structure.
+ Inputs : intra_frm_interval - num frames between two I frames
+ num_intra_frm_interval - num such intervals
+ i4_bit_rate - num bits per second
+ i4_frm_rate - num frms in 1000 seconds
+ i4_num_active_pic_type
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+#if NON_STEADSTATE_CODE
+void init_bit_allocation(
+ bit_allocation_t *ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_num_intra_frm_interval, /* num such intervals */
+ WORD32 i4_bit_rate, /* num bits per second */
+ WORD32 i4_frm_rate, /* num frms in 1000 seconds */
+ WORD32 *i4_peak_bit_rate,
+ WORD32 i4_min_bitrate, /* The minimum bit rate that is to be satisfied for a gop */
+ WORD32 i4_pels_in_frame,
+ WORD32 i4_is_hbr,
+ WORD32 i4_num_active_pic_type,
+ WORD32 i4_lap_window,
+ WORD32 i4_field_pic,
+ WORD32 rc_pass,
+ WORD32 i4_luma_pels,
+ WORD32 i4_fp_bit_alloc_in_sp)
+{
+ WORD32 i;
+ WORD32 i4_bits_per_frm, i4_max_bits_per_frm[MAX_NUM_DRAIN_RATES];
+ /* Store the pels in frame value */
+ ps_bit_allocation->i4_pels_in_frame = i4_pels_in_frame;
+ ps_bit_allocation->i4_num_scd_in_lap_window = 0;
+ ps_bit_allocation->i4_num_frm_b4_scd = 0;
+ ps_bit_allocation->i4_num_active_pic_type = i4_num_active_pic_type;
+ ps_bit_allocation->i4_field_pic = i4_field_pic;
+ ps_bit_allocation->i4_ba_rc_pass = rc_pass;
+ ps_bit_allocation->i4_br_id = 0; /* 0 - peak, 1 - average*/
+ ps_bit_allocation->i8_cur_gop_num =
+ 0; /*Will be incremented after first frame allocation is done(during init itslef)*/
+ ps_bit_allocation->i8_frm_num_in_gop = 0;
+ ps_bit_allocation->pv_gop_stat =
+ NULL; /*In 2 pass the gop stat pointer is set API parameter call*/
+ ps_bit_allocation->f_min_complexity_cross_peak_rate =
+ 1.0; /*In single pass buffer based additional bits movement is disabled, hence set to max complexity
+ Reset to lower value in case of two pass*/
+
+ ps_bit_allocation->f_cur_peak_factor_2pass = -1.0;
+ ps_bit_allocation->i8_total_bits_allocated = -1;
+ ps_bit_allocation->i4_luma_pels = i4_luma_pels;
+ ps_bit_allocation->i4_num_gop = -1;
+ ps_bit_allocation->f_sum_complexity_segment_cross_peak = 0.0f;
+ //ps_bit_allocation->i8_vbv_based_excess_for_segment = 0;
+ ps_bit_allocation->i4_flag_no_more_set_rbip = 0;
+ ps_bit_allocation->f_curr_i_to_sum = 1.0f;
+ ps_bit_allocation->i4_fp_bit_alloc_in_sp = i4_fp_bit_alloc_in_sp;
+
+ /* Calculate the bits per frame */
+ X_PROD_Y_DIV_Z(i4_bit_rate, 1000, i4_frm_rate, i4_bits_per_frm);
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ X_PROD_Y_DIV_Z(i4_peak_bit_rate[i], 1000, i4_frm_rate, i4_max_bits_per_frm[i]);
+ }
+ /* Initialize the bits_per_frame */
+ ps_bit_allocation->i4_bits_per_frm = i4_bits_per_frm;
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ ps_bit_allocation->i4_max_bits_per_frm[i] = i4_max_bits_per_frm[i];
+ }
+ X_PROD_Y_DIV_Z(i4_min_bitrate, 1000, i4_frm_rate, ps_bit_allocation->i4_min_bits_per_frm);
+
+ /* Initialise the rem_bits in period
+ The first gop in case of an OPEN GOP may have fewer B_PICs,
+ That condition is not taken care of */
+ init_rbip(
+ &ps_bit_allocation->s_rbip, ps_pic_handling, i4_bits_per_frm, i4_num_intra_frm_interval);
+
+ /* Initialize the num_gops_in_period */
+ ps_bit_allocation->i4_num_gops_in_period = i4_num_intra_frm_interval;
+ ps_bit_allocation->i4_actual_num_gops_in_period = i4_num_intra_frm_interval;
+
+ /* Relative complexity between I and P frames */
+ ps_bit_allocation->i2_K[I_PIC] = (1 << K_Q);
+ ps_bit_allocation->i2_K[P_PIC] = I_TO_P_RATIO;
+ ps_bit_allocation->i2_K[P1_PIC] = I_TO_P_RATIO;
+ ps_bit_allocation->i2_K[B_PIC] = (P_TO_B_RATIO * I_TO_P_RATIO) >> K_Q;
+ ps_bit_allocation->i2_K[BB_PIC] = (P_TO_B_RATIO * I_TO_P_RATIO) >> K_Q;
+
+ /*HEVC_hierarchy: force qp offset with one high level of hierarchy*/
+ ps_bit_allocation->i2_K[B1_PIC] = (B_TO_B1_RATIO * P_TO_B_RATIO * I_TO_P_RATIO) >> (K_Q + K_Q);
+ ps_bit_allocation->i2_K[B11_PIC] = (B_TO_B1_RATIO * P_TO_B_RATIO * I_TO_P_RATIO) >> (K_Q + K_Q);
+ ps_bit_allocation->i2_K[B2_PIC] =
+ (B1_TO_B2_RATIO * B_TO_B1_RATIO * P_TO_B_RATIO * I_TO_P_RATIO) >> (K_Q + K_Q + K_Q);
+ ps_bit_allocation->i2_K[B22_PIC] =
+ (B1_TO_B2_RATIO * B_TO_B1_RATIO * P_TO_B_RATIO * I_TO_P_RATIO) >> (K_Q + K_Q + K_Q);
+
+ /* Initialize the saved bits to 0*/
+ ps_bit_allocation->i4_saved_bits = 0;
+
+ /* Update the error bits module with average bits */
+ init_error_bits(ps_bit_allocation->ps_error_bits, i4_frm_rate, i4_bit_rate);
+ /* Store the input for implementing change in values */
+ ps_bit_allocation->i4_frame_rate = i4_frm_rate;
+ ps_bit_allocation->i4_bit_rate = i4_bit_rate;
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ ps_bit_allocation->ai4_peak_bit_rate[i] = i4_peak_bit_rate[i];
+
+ ps_bit_allocation->i4_is_hbr = i4_is_hbr;
+ /* Initilising the header bits to be used for each picture type */
+ init_prev_header_bits(ps_bit_allocation, ps_pic_handling);
+
+ /*HEVC_RC*/
+ /*remember prev frames tot bit consumption. This is required to calcualte error after first sub gop where estimate is not known*/
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_bit_allocation->ai4_prev_frm_tot_bits[i] =
+ -1; //-1 indicates that pic type has not been encoded
+ ps_bit_allocation->ai4_prev_frm_tot_est_bits[i] = -1;
+ }
+
+ /* #define STATIC_I_TO_P_RATIO ((STATIC_I_TO_B_RATIO)/(STATIC_P_TO_B_RATIO)) */
+ /* Calcualte the max i frame bits */
+ {
+ WORD32 ai4_frms_in_period[MAX_PIC_TYPE];
+ WORD32 ai4_actual_frms_in_period[MAX_PIC_TYPE], i4_actual_frms_in_period = 0;
+ WORD32 i4_rem_texture_bits, j, i4_tot_header_bits_est = 0;
+ number_t avq_complexity_estimate[MAX_PIC_TYPE];
+ WORD32 i4_total_frms;
+
+ /* Get the rem_frms_in_gop & the frms_in_gop from the pic_type state struct */
+ pic_type_get_frms_in_gop(ps_pic_handling, ai4_frms_in_period);
+ pic_type_get_actual_frms_in_gop(ps_pic_handling, ai4_actual_frms_in_period);
+ /* Depending on the number of gops in a period, find the num_frms_in_prd */
+ i4_total_frms = 0;
+ for(j = 0; j < MAX_PIC_TYPE; j++)
+ {
+ ai4_frms_in_period[j] *= ps_bit_allocation->i4_num_gops_in_period;
+ ai4_actual_frms_in_period[j] *= ps_bit_allocation->i4_num_gops_in_period;
+ i4_total_frms += ai4_frms_in_period[j];
+ i4_actual_frms_in_period += ai4_actual_frms_in_period[j];
+ }
+ ps_bit_allocation->i4_rem_frame_in_period = i4_actual_frms_in_period; /*i_only*/
+
+ for(j = 0; j < MAX_PIC_TYPE; j++)
+ {
+ i4_tot_header_bits_est +=
+ ai4_frms_in_period[j] * ps_bit_allocation->i4_prev_frm_header_bits[j];
+ }
+ /* Remove the header bits from the remaining bits to find how many bits you
+ can transfer.*/
+ i4_rem_texture_bits =
+ ps_bit_allocation->i4_bits_per_frm * i4_actual_frms_in_period - i4_tot_header_bits_est;
+
+ /* Set the complexities for static content */
+ SET_VAR_Q(avq_complexity_estimate[I_PIC], STATIC_I_TO_B2_RATIO, 0);
+ SET_VAR_Q(avq_complexity_estimate[P_PIC], STATIC_P_TO_B2_RATIO, 0);
+ SET_VAR_Q(avq_complexity_estimate[P1_PIC], STATIC_P_TO_B2_RATIO, 0);
+ SET_VAR_Q(avq_complexity_estimate[B_PIC], STATIC_B_TO_B2_RATIO, 0);
+ SET_VAR_Q(avq_complexity_estimate[BB_PIC], STATIC_B_TO_B2_RATIO, 0);
+ SET_VAR_Q(avq_complexity_estimate[B1_PIC], STATIC_B1_TO_B2_RATIO, 0);
+ SET_VAR_Q(avq_complexity_estimate[B11_PIC], STATIC_B1_TO_B2_RATIO, 0);
+ SET_VAR_Q(avq_complexity_estimate[B2_PIC], 1, 0);
+ SET_VAR_Q(avq_complexity_estimate[B22_PIC], 1, 0);
+ /* Get the texture bits required for the current frame */
+ ps_bit_allocation->i4_max_tex_bits_for_i = get_bits_based_on_complexity(
+ ps_bit_allocation,
+ i4_rem_texture_bits,
+ ai4_frms_in_period,
+ avq_complexity_estimate,
+ I_PIC,
+ 0);
+ }
+ /* initialise the GOP and bit errors to zero */
+ ps_bit_allocation->i4_gop_level_bit_error = 0;
+ ps_bit_allocation->i4_frame_level_bit_error = 0;
+ for(i = 0; i < MAX_NUM_FRAME_PARALLEL; i++)
+ {
+ ps_bit_allocation->ai4_cur_frm_est_tex_bits[i] = 0;
+ ps_bit_allocation->ai4_cur_frm_est_hdr_bits[i] = 0;
+ }
+ ps_bit_allocation->i4_buffer_based_bit_error = 0;
+ ps_bit_allocation->i4_bits_from_buffer_in_cur_gop = 0;
+ ps_bit_allocation->i4_excess_bits_from_buffer = 0;
+ ps_bit_allocation->i4_lap_window = i4_lap_window;
+ ps_bit_allocation->i8_cur_gop_bit_consumption = 0;
+ //ps_bit_allocation->i8_2pass_gop_error_accum = 0;
+ ps_bit_allocation->f_qscale_max_clip_in_second_pass = (float)0x7FFFFFFF;
+
+ /*Buffer play for single pass*/
+ if(rc_pass != 2)
+ {
+ /*Find ps_bit_allocation->f_min_complexity_cross_peak_rate*/
+ /*Find the complexity which maps to peak bit-rate*/
+ {
+ ps_bit_allocation->f_min_complexity_cross_peak_rate = ba_get_min_complexity_for_peak_br(
+ i4_peak_bit_rate[0], i4_bit_rate, 10.0f, 1.0f, 0.0f, rc_pass);
+ }
+ }
+
+ ps_bit_allocation->i4_total_2pass_frames = 0;
+ ps_bit_allocation->i8_2pass_avg_bit_rate = -1;
+}
+
+/*****************************************************************************
+ Function Name : ba_init_stat_data
+ Description : The parsing of stat file is done at the end of init (by that time
+ bit allocation init would have already happened,The memory for gop
+ stat data is alocated inside the parse stat file code. Hence the
+ pointer has to be updated again.
+ Inputs : ps_bit_allocation
+ ps_pic_handling
+ pv_gop_stat - pointer to update
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+void ba_init_stat_data(
+ bit_allocation_t *ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ void *pv_gop_stat,
+ WORD32 *pi4_pic_dist_in_cur_gop,
+ WORD32 i4_total_bits_in_period,
+ WORD32 i4_excess_bits)
+
+{
+ WORD32 i4_tot_frames_in_gop = 0, i;
+
+ ps_bit_allocation->pv_gop_stat = pv_gop_stat;
+
+ /*Init RBIP*/
+ /*Get the complexity*/
+ ASSERT(ps_bit_allocation->i8_cur_gop_num == 0);
+ ASSERT(ps_bit_allocation->i8_frm_num_in_gop == 0);
+
+ /*INit frames of each type*/
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ i4_tot_frames_in_gop += pi4_pic_dist_in_cur_gop[i];
+ }
+
+ /*ASSERT(i4_tot_frames_in_gop == i4_intra_period);*/
+ /*Also allocate data for first GOP*/
+ /*Added for getting actual gop structure*/
+ pic_type_update_frms_in_gop(ps_pic_handling, pi4_pic_dist_in_cur_gop);
+
+ multi_pass_set_rbip(
+ &ps_bit_allocation->s_rbip, ps_pic_handling, i4_total_bits_in_period, i4_tot_frames_in_gop);
+
+ ps_bit_allocation->i8_2pass_alloc_per_frm_bits =
+ (i4_total_bits_in_period + (i4_tot_frames_in_gop >> 1)) / i4_tot_frames_in_gop;
+ ps_bit_allocation->i8_bit_consumption_so_far = 0;
+
+ ASSERT(ps_bit_allocation->i4_ba_rc_pass == 2);
+}
+#endif /* #if NON_STEADSTATE_CODE */
+
+/*****************************************************************************
+ Function Name : bit_alloc_get_intra_bits
+ Description :
+ Inputs : ps_bit_allocation
+ ps_pic_handling
+ pvq_complexity_estimate
+ I_to_avg_rest
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+WORD32 bit_alloc_get_intra_bits(
+ bit_allocation_t *ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ cbr_buffer_handle ps_cbr_buf_handling,
+ picture_type_e e_pic_type,
+ number_t *pvq_complexity_estimate,
+ WORD32 i4_is_scd,
+ float I_to_avg_rest,
+ WORD32 i4_call_type,
+ WORD32 i4_non_I_scd,
+ float f_percent_head_bits)
+{
+ WORD32 ai4_frms_in_period[MAX_PIC_TYPE], ai4_frm_in_gop[MAX_PIC_TYPE], tot_frms_in_period = 0;
+ WORD32 i4_field_pic,
+ i4_safe_margin = 0,
+ i4_lap_window; //margin in buffer to handle I frames that can come immediately after encoding huge static I frame
+ /*obtain buffer size*/
+ WORD32 i4_buffer_size =
+ ((get_cbr_buffer_size(ps_cbr_buf_handling)) >> 4) * UPPER_THRESHOLD_EBF_Q4;
+ WORD32 i4_cur_buf_pos = get_cbr_ebf(ps_cbr_buf_handling), i4_max_buffer_based,
+ i4_max_buffer_based_I_pic, i, i4_num_scaled_frms = 1;
+ WORD32 i4_bit_alloc_window =
+ (ps_bit_allocation->s_rbip.i4_tot_frms_in_gop *
+ ps_bit_allocation->s_rbip.i4_num_intra_frm_interval);
+ WORD32 i4_num_buf_frms,
+ ai4_frms_in_baw[MAX_PIC_TYPE]; //window for which I frame bit allocation is done
+ WORD32 i4_bits_in_period, i4_frames_in_buf = 0, i4_default_bits_in_period = 0;
+ WORD32 i4_est_bits_for_I, i4_peak_drain_rate, i4_subgop_size;
+ rc_type_e rc_type = get_rc_type(ps_cbr_buf_handling);
+ pic_type_get_actual_frms_in_gop(ps_pic_handling, ai4_frm_in_gop);
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ai4_frms_in_baw[i] =
+ ai4_frm_in_gop[i] * ps_bit_allocation->s_rbip.i4_num_intra_frm_interval;
+ ai4_frms_in_period[i] =
+ ai4_frm_in_gop[i] * ps_bit_allocation->s_rbip.i4_num_intra_frm_interval;
+ tot_frms_in_period += ai4_frm_in_gop[i];
+ }
+
+ if(i4_call_type == 1)
+ {
+ i4_default_bits_in_period = update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, 0);
+ if((i4_default_bits_in_period + ps_bit_allocation->i4_frame_level_bit_error) <
+ (i4_default_bits_in_period * 0.30))
+ {
+ ps_bit_allocation->i4_frame_level_bit_error = 0; //-(i4_default_bits_in_period * 0.70);
+ }
+ i4_bits_in_period = i4_default_bits_in_period + ps_bit_allocation->i4_frame_level_bit_error;
+ if(i4_non_I_scd == 0)
+ {
+ /*For the first gop unnecessarily the QP is going high in order to prevent this bits corresponding to full gop instead of gop-subgop*/
+
+ WORD32 i4_intra_int = pic_type_get_intra_frame_interval(ps_pic_handling);
+ WORD32 i4_inter_int = pic_type_get_inter_frame_interval(ps_pic_handling);
+ if((tot_frms_in_period ==
+ (i4_intra_int - i4_inter_int + (1 << ps_bit_allocation->i4_field_pic))) &&
+ (i4_intra_int != 1))
+ {
+ i4_bits_in_period =
+ (WORD32)(i4_bits_in_period * ((float)i4_intra_int / tot_frms_in_period));
+ }
+ }
+ trace_printf("\nBits in period %d", i4_bits_in_period);
+ }
+ else
+ {
+ i4_bits_in_period = ret_rbip_default_preenc(&ps_bit_allocation->s_rbip, ps_pic_handling);
+
+ if(ps_bit_allocation->i4_ba_rc_pass == 2)
+ i4_default_bits_in_period = update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, 0);
+ }
+
+ i4_peak_drain_rate = get_buf_max_drain_rate(ps_cbr_buf_handling);
+ i4_num_buf_frms =
+ (get_cbr_buffer_size(ps_cbr_buf_handling) + (ps_bit_allocation->i4_bits_per_frm >> 1)) /
+ ps_bit_allocation->i4_bits_per_frm;
+ /*In VBR encoder buffer will be drained faster, i4_num_buf_frms should correspond to maximum number of bits that can be drained
+ In CBR both peak and average must be same*/
+ i4_num_buf_frms = i4_num_buf_frms * i4_peak_drain_rate / ps_bit_allocation->i4_bits_per_frm;
+
+ i4_field_pic = pic_type_get_field_pic(ps_pic_handling);
+
+ i4_subgop_size = pic_type_get_inter_frame_interval(ps_pic_handling);
+ if(pvq_complexity_estimate == NULL)
+ i4_cur_buf_pos = 0;
+
+ i4_lap_window = ps_bit_allocation->i4_lap_window;
+
+ /*assume minimum lap visibilty.A static I frame is given only the bits of duration for which we have visibility*/
+ if(ps_bit_allocation->i4_lap_window < MINIMUM_VISIBILITY_B4_STATIC_I)
+ {
+ i4_lap_window = MINIMUM_VISIBILITY_B4_STATIC_I;
+ }
+ else
+ {
+ i4_lap_window = ps_bit_allocation->i4_lap_window;
+ /*clip buffer window to max of lap window or buffer window*/
+ if((i4_lap_window < i4_num_buf_frms) && (i4_call_type == 1))
+ i4_num_buf_frms = i4_lap_window + i4_subgop_size;
+ }
+
+ if(i4_lap_window < MINIMUM_FRM_I_TO_REST_LAP_ENABLED)
+ i4_lap_window = MINIMUM_FRM_I_TO_REST_LAP_ENABLED;
+ if(ps_bit_allocation->i4_ba_rc_pass != 2)
+ {
+ if(i4_lap_window < i4_num_buf_frms)
+ i4_num_buf_frms = i4_lap_window;
+ }
+
+ if(i4_num_buf_frms > tot_frms_in_period)
+ {
+ i4_num_buf_frms = tot_frms_in_period;
+ i4_bit_alloc_window = i4_num_buf_frms;
+ }
+ /*get picture type dist based on bit alloc window*/
+ if(i4_num_buf_frms < tot_frms_in_period)
+ {
+ for(i = 1; i < ps_bit_allocation->i4_num_active_pic_type; i++)
+ {
+ ai4_frms_in_baw[i] =
+ (ai4_frms_in_period[i] * i4_num_buf_frms + (tot_frms_in_period >> 1)) /
+ tot_frms_in_period;
+ i4_num_scaled_frms += ai4_frms_in_baw[i];
+ if(ps_bit_allocation->i4_field_pic)
+ {
+ ai4_frms_in_baw[i + FIELD_OFFSET] = ai4_frms_in_baw[i];
+ i4_num_scaled_frms += ai4_frms_in_baw[i];
+ }
+ }
+ if(ps_bit_allocation->i4_field_pic)
+ {
+ ai4_frms_in_baw[5]++;
+ i4_num_scaled_frms++;
+ }
+ //if prorating is not exact account for diff with highest layer pic types
+ if(!ps_bit_allocation->i4_field_pic)
+ {
+ ai4_frms_in_baw[ps_bit_allocation->i4_num_active_pic_type - 1] +=
+ (i4_num_buf_frms - i4_num_scaled_frms);
+ }
+ else
+ {
+ ai4_frms_in_baw[ps_bit_allocation->i4_num_active_pic_type - 1] +=
+ ((i4_num_buf_frms - i4_num_scaled_frms) >> 1);
+ ai4_frms_in_baw[ps_bit_allocation->i4_num_active_pic_type - 1 + FIELD_OFFSET] +=
+ ((i4_num_buf_frms - i4_num_scaled_frms) >> 1);
+ }
+ i4_bits_in_period =
+ ((LWORD64)i4_bits_in_period * i4_num_buf_frms + (tot_frms_in_period >> 1)) /
+ tot_frms_in_period;
+ i4_bit_alloc_window = i4_num_buf_frms;
+ }
+
+ i4_safe_margin = (WORD32)(i4_buffer_size * 0.1);
+ i4_max_buffer_based = ((LWORD64)i4_buffer_size - i4_cur_buf_pos) /
+ ps_bit_allocation->i4_bits_per_frm * i4_peak_drain_rate;
+ i4_max_buffer_based_I_pic = i4_buffer_size - i4_cur_buf_pos;
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ i4_frames_in_buf += ai4_frms_in_baw[i];
+ }
+
+ if((rc_type == VBR_STREAMING) && (i4_call_type == 1))
+ {
+ WORD32 i4_delay_frames = cbr_get_delay_frames(ps_cbr_buf_handling);
+ i4_max_buffer_based =
+ (i4_peak_drain_rate *
+ (ps_bit_allocation->s_rbip.i4_tot_frms_in_gop + (WORD32)(i4_delay_frames * 0.8f)) -
+ i4_cur_buf_pos);
+
+ /*RBIP is updated once it is restricted for an Intra period */
+ if(i4_default_bits_in_period > i4_max_buffer_based)
+ update_rbip(
+ &ps_bit_allocation->s_rbip,
+ ps_pic_handling,
+ i4_max_buffer_based - i4_default_bits_in_period);
+
+ i4_max_buffer_based =
+ (i4_peak_drain_rate * (i4_frames_in_buf + (WORD32)(i4_delay_frames * 0.8f)) -
+ i4_cur_buf_pos);
+ }
+ else
+ {
+ i4_max_buffer_based =
+ ((((LWORD64)i4_buffer_size - i4_cur_buf_pos) / ps_bit_allocation->i4_bits_per_frm) +
+ i4_frames_in_buf) *
+ i4_peak_drain_rate;
+ }
+
+ /*the estimated bits for total period is clipped to buffer limits*/
+ if(i4_bits_in_period > i4_max_buffer_based)
+ i4_bits_in_period = i4_max_buffer_based;
+
+ /*get I pic bits with altered bits in period*/
+ if((!i4_is_scd) &&
+ (ps_bit_allocation->i4_num_frames_since_last_I_frame <
+ (ps_bit_allocation->i4_frame_rate * 2) / 1000) &&
+ (ps_bit_allocation->i4_ba_rc_pass != 2))
+ {
+ /*returns texture bits*/
+ LWORD64 i8_header_bits_in_previous_period = 0, i8_total_bits_in_previous_period = 0,
+ i4_frames_in_header = 0;
+ WORD32 i4_texture_bits = 0;
+ float f_percent_header_bits = 0.0f;
+ /* Remove the header bits from the remaining bits to find how many bits you
+ can transfer.*/
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ i8_header_bits_in_previous_period +=
+ (ps_bit_allocation->i4_prev_frm_header_bits[i] * ai4_frms_in_baw[i]);
+ i8_total_bits_in_previous_period +=
+ (ps_bit_allocation->ai4_prev_frm_tot_bits[i] * ai4_frms_in_baw[i]);
+ i4_frames_in_header += ai4_frms_in_baw[i];
+ }
+
+ if((i4_call_type == 1) && (ps_bit_allocation->i4_ba_rc_pass == 2))
+ {
+ i4_texture_bits = (WORD32)(i4_bits_in_period * (1 - f_percent_head_bits));
+ }
+ else
+ {
+ f_percent_header_bits =
+ (float)i8_header_bits_in_previous_period / i8_total_bits_in_previous_period;
+ i4_texture_bits =
+ i4_bits_in_period - (WORD32)(f_percent_header_bits * i4_bits_in_period);
+ }
+
+ if(i4_call_type == 1)
+ {
+ trace_printf(
+ "\nHeader Bits in period %d, total_frames %d "
+ "i4_max_buffer_based %d ",
+ (WORD32)f_percent_header_bits * i4_bits_in_period,
+ i4_frames_in_header,
+ i4_max_buffer_based);
+ }
+ i4_est_bits_for_I = get_bits_based_on_complexity(
+ ps_bit_allocation,
+ i4_texture_bits,
+ ai4_frms_in_baw,
+ pvq_complexity_estimate,
+ e_pic_type,
+ i4_call_type);
+ /*twice the bitrate */
+ if(i4_est_bits_for_I > ((ps_bit_allocation->i4_bit_rate << 1) -
+ ps_bit_allocation->i4_prev_frm_header_bits[I_PIC]))
+ i4_est_bits_for_I =
+ ((ps_bit_allocation->i4_bit_rate << 1) -
+ ps_bit_allocation->i4_prev_frm_header_bits[I_PIC]);
+
+ if(i4_est_bits_for_I >
+ (i4_max_buffer_based_I_pic - ps_bit_allocation->i4_prev_frm_header_bits[I_PIC]))
+ {
+ i4_est_bits_for_I =
+ (i4_max_buffer_based_I_pic - ps_bit_allocation->i4_prev_frm_header_bits[I_PIC]);
+ }
+ }
+ else
+ {
+ /*returns total bits incase of scene cut*/
+ ASSERT(ai4_frms_in_baw[I_PIC] != 0);
+ if((i4_non_I_scd == 1) && (i4_call_type == 1) &&
+ (ps_bit_allocation->f_curr_i_to_sum != 1.0f))
+ ai4_frms_in_baw[I_PIC]++;
+
+ i4_est_bits_for_I = (WORD32)(
+ (i4_bits_in_period * I_to_avg_rest * ai4_frms_in_baw[I_PIC]) /
+ (ai4_frms_in_baw[I_PIC] * I_to_avg_rest +
+ (i4_bit_alloc_window - ai4_frms_in_baw[I_PIC])));
+
+ if(i4_call_type == 1)
+ i4_est_bits_for_I =
+ (WORD32)((float)i4_est_bits_for_I * ps_bit_allocation->f_curr_i_to_sum);
+ else
+ {
+ if(ai4_frms_in_baw[I_PIC] > 0)
+ i4_est_bits_for_I = (WORD32)((float)i4_est_bits_for_I / ai4_frms_in_baw[I_PIC]);
+ }
+
+ if(i4_call_type == 1)
+ {
+ trace_printf(
+ "bits in period %d I_to_avg_rest %f f_curr_i_to_sum %f i "
+ "frames %d i4_non_I_scd %d ",
+ i4_bits_in_period,
+ I_to_avg_rest,
+ ps_bit_allocation->f_curr_i_to_sum,
+ ai4_frms_in_baw[I_PIC],
+ i4_non_I_scd);
+ }
+
+ if(i4_est_bits_for_I > (ps_bit_allocation->i4_bit_rate << 1))
+ i4_est_bits_for_I = (ps_bit_allocation->i4_bit_rate << 1);
+ if(i4_est_bits_for_I > i4_max_buffer_based_I_pic)
+ i4_est_bits_for_I = i4_max_buffer_based_I_pic;
+ }
+
+ return i4_est_bits_for_I;
+}
+
+/*****************************************************************************
+ Function Name : get_cur_frm_est_texture_bits
+ Description : Based on remaining bits in period and rd_model
+ the number of bits required for the current frame is estimated.
+ Inputs : ps_bit_allocation - bit_allocation structure
+ ps_rd_model - rd model pointer (for all the frame types)
+ e_pic_type - picture type
+ Globals :
+ Processing :
+ Outputs :
+ Returns :
+ Issues :
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+WORD32 get_cur_frm_est_texture_bits(
+ bit_allocation_t *ps_bit_allocation,
+ rc_rd_model_handle *pps_rd_model,
+ est_sad_handle ps_est_sad,
+ pic_handling_handle ps_pic_handling,
+ cbr_buffer_handle ps_cbr_buffer,
+ picture_type_e e_pic_type,
+ WORD32 i4_use_model,
+ WORD32 i4_is_scd_frame,
+ WORD32 i4_call_type,
+ float i_to_avg_ratio,
+ WORD32 i4_is_model_valid)
+{
+ WORD32 i, j;
+ WORD32 i4_est_texture_bits_for_frm;
+ WORD32 i4_rem_texture_bits;
+ number_t avq_complexity_estimate[MAX_PIC_TYPE];
+ WORD32 ai4_frms_in_period[MAX_PIC_TYPE];
+ WORD32 i4_max_consumable_bits, i4_est_tot_head_bits_period = 0, i4_total_bits_prev_gop = 0;
+ WORD32 i4_field_pic, i4_inter_frame_int;
+ WORD32 complexity_est = 0;
+ float f_percent_head_bits = 0.0f;
+ WORD32 i4_intra_frm_int;
+ i4_intra_frm_int = pic_type_get_actual_intra_frame_interval(ps_pic_handling);
+ i4_inter_frame_int = pic_type_get_inter_frame_interval(ps_pic_handling);
+ i4_field_pic = pic_type_get_field_pic(ps_pic_handling);
+
+ /* If the complexity estimate is not filled based on
+ 1) Since not using model
+ 2) Using the module but one of the estimate values are zero
+ Then set the complexity estimate based on default values */
+ // if(!complexity_est)
+ {
+ /* Hardcoding the bit ratios between I, P and B same as during init time*/
+ SET_VAR_Q(
+ avq_complexity_estimate[I_PIC],
+ (I_TO_P_BIT_RATIO * P_TO_B_BIT_RATIO * B_TO_B1_BIT_RATO0 * B1_TO_B2_BIT_RATIO),
+ 0);
+ SET_VAR_Q(
+ avq_complexity_estimate[P_PIC],
+ (P_TO_B_BIT_RATIO * B_TO_B1_BIT_RATO0 * B1_TO_B2_BIT_RATIO),
+ 0);
+ SET_VAR_Q(
+ avq_complexity_estimate[P1_PIC],
+ (P_TO_B_BIT_RATIO * B_TO_B1_BIT_RATO0 * B1_TO_B2_BIT_RATIO),
+ 0);
+ SET_VAR_Q(avq_complexity_estimate[B_PIC], (B_TO_B1_BIT_RATO0 * B1_TO_B2_BIT_RATIO), 0);
+ SET_VAR_Q(avq_complexity_estimate[BB_PIC], (B_TO_B1_BIT_RATO0 * B1_TO_B2_BIT_RATIO), 0);
+ SET_VAR_Q(
+ avq_complexity_estimate[B1_PIC],
+ (B1_TO_B2_BIT_RATIO),
+ 0); //temporarliy treat ref and non ref as same
+ SET_VAR_Q(avq_complexity_estimate[B11_PIC], (B1_TO_B2_BIT_RATIO), 0);
+ SET_VAR_Q(avq_complexity_estimate[B2_PIC], 1, 0);
+ SET_VAR_Q(avq_complexity_estimate[B22_PIC], 1, 0);
+ }
+ /* Get the rem_frms_in_gop & the frms_in_gop from the pic_type state struct */
+ /* pic_type_get_rem_frms_in_gop(ps_pic_handling, ai4_rem_frms_in_period); */
+ pic_type_get_frms_in_gop(ps_pic_handling, ai4_frms_in_period);
+
+ /* Depending on the number of gops in a period, find the num_frms_in_prd */
+ for(j = 0; j < MAX_PIC_TYPE; j++)
+ {
+ /* ai4_rem_frms_in_period[j] += (ai4_frms_in_period[j] * (ps_bit_allocation->i4_num_gops_in_period - 1)); */
+ ai4_frms_in_period[j] *= ps_bit_allocation->i4_num_gops_in_period;
+ }
+
+ /* If a frame is detected as SCD and bit allocation is asked for the remaining part of the frame
+ we allocate bits assuming that frame as a I frame. So reduce 1 frame from the picture type coming in
+ and add that to I frame */
+ if(i4_is_scd_frame && e_pic_type != I_PIC)
+ {
+ /* ai4_rem_frms_in_period[0]++;ai4_rem_frms_in_period[e_pic_type]--; */
+ ai4_frms_in_period[0]++;
+ ai4_frms_in_period[e_pic_type]--;
+ }
+ /*HEVC_hierarchy: calculate header bits for all frames in period*/
+ for(j = 0; j < MAX_PIC_TYPE; j++)
+ {
+ i4_est_tot_head_bits_period +=
+ ai4_frms_in_period[j] * ps_bit_allocation->i4_prev_frm_header_bits[j];
+ i4_total_bits_prev_gop +=
+ ai4_frms_in_period[j] * ps_bit_allocation->ai4_prev_frm_tot_bits[j];
+ }
+
+ {
+ WORD32 ai4_actual_frms_in_gop[MAX_PIC_TYPE], i, i4_total_frames = 0;
+ pic_type_get_actual_frms_in_gop(ps_pic_handling, ai4_actual_frms_in_gop);
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ i4_total_frames += ai4_actual_frms_in_gop[i];
+ }
+ i4_max_consumable_bits = ps_bit_allocation->i4_max_bits_per_frm[0] * i4_total_frames;
+
+ /* Remove the header bits from the remaining bits to find how many bits you
+ can transfer.*/
+ if(i4_call_type == 1)
+ {
+ if(ps_bit_allocation->i4_ba_rc_pass == 2)
+ {
+ WORD32 i4_tot_frm_remain = 0, i4_tot_head_bits_in_gop = 0,
+ i4_tot_bits_last_in_gop = 0, i4_use_default_flag = 0;
+
+ WORD32 i4_rbip = update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, 0);
+ if((i4_rbip + ps_bit_allocation->i4_frame_level_bit_error) < (i4_rbip * 0.30))
+ {
+ ps_bit_allocation->i4_frame_level_bit_error = 0; //-(i4_rbip * 0.70);
+ }
+ i4_rem_texture_bits =
+ i4_rbip +
+ ps_bit_allocation->i4_frame_level_bit_error /*- i4_est_tot_head_bits_period*/
+ ;
+
+ i4_est_tot_head_bits_period = 0;
+ for(j = 0; j < MAX_PIC_TYPE; j++)
+ {
+ if((WORD32)ps_bit_allocation->af_sum_weigh[j][1] > 0)
+ {
+ i4_tot_frm_remain += (WORD32)ps_bit_allocation->af_sum_weigh[j][1];
+ i4_tot_head_bits_in_gop += (WORD32)(
+ ps_bit_allocation->i4_prev_frm_header_bits[j] *
+ ps_bit_allocation->af_sum_weigh[j][1]);
+ i4_tot_bits_last_in_gop += (WORD32)(
+ ps_bit_allocation->ai4_prev_frm_tot_bits[j] *
+ ps_bit_allocation->af_sum_weigh[j][1]);
+ if(ps_bit_allocation->ai4_prev_frm_tot_bits[j] == -1)
+ {
+ i4_use_default_flag = 1;
+ }
+ }
+ }
+
+ if(i4_use_default_flag != 1)
+ {
+ f_percent_head_bits = (float)i4_tot_head_bits_in_gop / i4_tot_bits_last_in_gop;
+
+ if(f_percent_head_bits > 0.7f)
+ f_percent_head_bits = 0.7f;
+
+ /*Subtracting a percentage of header bits from the remaining bits in period*/
+ i4_rem_texture_bits = (WORD32)(i4_rem_texture_bits * (1 - f_percent_head_bits));
+ }
+ else
+ {
+ /*Assuming 30% of bits will go for header bits in a gop in preenc*/
+ i4_rem_texture_bits -= (WORD32)((float)i4_rem_texture_bits * 0.3f);
+ }
+
+ trace_printf(
+ "Remaining texture bits %d fbe %d fphb %f thbg %d tblg %d",
+ i4_rem_texture_bits,
+ ps_bit_allocation->i4_frame_level_bit_error,
+ f_percent_head_bits,
+ i4_tot_head_bits_in_gop,
+ i4_tot_bits_last_in_gop);
+ }
+ else
+ {
+ /* Remove the header bits from the remaining bits to find how many bits you
+ can transfer.*/
+ WORD32 i4_rbip = update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, 0);
+ if((i4_rbip + ps_bit_allocation->i4_frame_level_bit_error) < (i4_rbip * 0.30))
+ {
+ ps_bit_allocation->i4_frame_level_bit_error = 0; //-(i4_rbip * 0.70);
+ }
+ i4_rem_texture_bits = update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, 0) +
+ ps_bit_allocation->i4_frame_level_bit_error;
+
+ i4_est_tot_head_bits_period = (WORD32)(
+ ((float)(i4_est_tot_head_bits_period) / (float)i4_total_bits_prev_gop) *
+ i4_rem_texture_bits);
+
+ if(i4_is_model_valid)
+ {
+ i4_rem_texture_bits = i4_rem_texture_bits - i4_est_tot_head_bits_period;
+ }
+ else
+ {
+ /*inorder to estimate the buffer position for model invalid cases, to control
+ encoder buffer overflow*/
+ i4_rem_texture_bits = ((i4_rem_texture_bits * 3) >> 1);
+ }
+
+ trace_printf(
+ "Remaining texture bits %d fbe %d ethp %d",
+ i4_rem_texture_bits,
+ ps_bit_allocation->i4_frame_level_bit_error,
+ i4_est_tot_head_bits_period);
+ }
+
+ {
+ WORD32 i4_drain_bits_per_frame = get_buf_max_drain_rate(ps_cbr_buffer), i4_ebf;
+ WORD32 i4_delay = cbr_get_delay_frames(ps_cbr_buffer), max_buffer_level = 0,
+ rc_type = get_rc_type(ps_cbr_buffer);
+
+ if(rc_type == VBR_STREAMING)
+ max_buffer_level = i4_drain_bits_per_frame * i4_delay;
+ else
+ max_buffer_level = get_cbr_buffer_size(ps_cbr_buffer);
+
+ i4_ebf = get_cbr_ebf(ps_cbr_buffer);
+
+ if(i4_ebf > (WORD32)(0.8f * max_buffer_level))
+ {
+ if(ps_bit_allocation->af_sum_weigh[e_pic_type][0] > 1.0f)
+ ps_bit_allocation->af_sum_weigh[e_pic_type][0] = 1.0f;
+ }
+ if(i4_ebf > (WORD32)(0.6f * max_buffer_level))
+ {
+ if(ps_bit_allocation->af_sum_weigh[e_pic_type][0] > 1.5f)
+ ps_bit_allocation->af_sum_weigh[e_pic_type][0] = 1.5f;
+ }
+ }
+ }
+ else
+ {
+ i4_rem_texture_bits =
+ ret_rbip_default_preenc(&ps_bit_allocation->s_rbip, ps_pic_handling);
+ /*Assuming 30% of bits will go for header bits in a gop in preenc*/
+ i4_rem_texture_bits -= (WORD32)(i4_rem_texture_bits * 0.3f);
+ }
+ }
+
+ if(i4_use_model)
+ {
+ /* The bits are then allocated based on the relative complexity of the
+ current frame with respect to that of the rest of the frames in period */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ number_t vq_lin_mod_coeff, vq_est_sad, vq_K;
+
+ if(ai4_frms_in_period[i] > 0) /*Added for field case */
+ {
+ /* Getting the linear model coefficient */
+ vq_lin_mod_coeff = get_linear_coefficient(pps_rd_model[i]);
+ /* Getting the estimated SAD */
+ SET_VAR_Q(vq_est_sad, get_est_sad(ps_est_sad, (picture_type_e)i), 0);
+ /* Making K factor a var Q format */
+ SET_VAR_Q(vq_K, ps_bit_allocation->i2_K[i], K_Q);
+ /* Complexity_estimate = [ (lin_mod_coeff * estimated_sad) / K factor ] */
+ mult32_var_q(vq_lin_mod_coeff, vq_est_sad, &vq_lin_mod_coeff);
+ div32_var_q(vq_lin_mod_coeff, vq_K, &avq_complexity_estimate[i]);
+ }
+ }
+ /*flag to check if complexity estimate is available*/
+ complexity_est = 1;
+
+ /*HEVC_hierarchy: If complexity estimate = 0 for any pic type then use default ratios*/
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ if(ai4_frms_in_period[i] > 0)
+ {
+ complexity_est = complexity_est && avq_complexity_estimate[i].sm;
+ }
+ }
+ }
+
+ /* Make the picture type of the SCD frame a I_PIC */
+ if(i4_is_scd_frame && e_pic_type != I_PIC)
+ e_pic_type = I_PIC;
+
+ if(e_pic_type == I_PIC)
+ {
+ /*clip min max values*/
+ if(i_to_avg_ratio > I_TO_AVG_REST_GOP_BIT_MAX)
+ i_to_avg_ratio = I_TO_AVG_REST_GOP_BIT_MAX;
+
+ if(i_to_avg_ratio < I_TO_AVG_REST_GOP_BIT_MIN)
+ i_to_avg_ratio = I_TO_AVG_REST_GOP_BIT_MIN;
+
+ i4_est_texture_bits_for_frm = bit_alloc_get_intra_bits(
+ ps_bit_allocation,
+ ps_pic_handling,
+ ps_cbr_buffer,
+ e_pic_type,
+ avq_complexity_estimate,
+ 0,
+ i_to_avg_ratio,
+ i4_call_type,
+ 0,
+ f_percent_head_bits);
+ }
+ else
+ {
+ /* Get the texture bits required for the current frame */
+ i4_est_texture_bits_for_frm = get_bits_based_on_complexity(
+ ps_bit_allocation,
+ i4_rem_texture_bits,
+ ai4_frms_in_period,
+ avq_complexity_estimate,
+ e_pic_type,
+ i4_call_type);
+ }
+
+ ps_bit_allocation->i4_excess_bits_from_buffer = 0;
+
+ /* If the remaining bits in the period becomes negative then the estimated texture
+ bits would also become negative. This would send a feedback to the model which
+ may go for a toss. Thus sending the minimum possible value = 0 */
+ if(i4_est_texture_bits_for_frm < 0)
+ i4_est_texture_bits_for_frm = 0;
+
+ return (i4_est_texture_bits_for_frm);
+}
+
+/*****************************************************************************
+ Function Name : get_cur_frm_est_header_bits
+ Description : Based on remaining bits in period and rd_model
+ the number of bits required for the current frame is estimated.
+ Inputs : ps_bit_allocation - bit_allocation structure
+ ps_rd_model - rd model pointer (for all the frame types)
+ e_pic_type - picture type
+ Globals :
+ Processing :
+ Outputs :
+ Returns :
+ Issues :
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+WORD32 get_cur_frm_est_header_bits(bit_allocation_t *ps_bit_allocation, picture_type_e e_pic_type)
+{
+ //ASSERT(ps_bit_allocation->i4_prev_frm_header_bits[e_pic_type] == 0);
+ return (ps_bit_allocation->i4_prev_frm_header_bits[e_pic_type]);
+}
+/*****************************************************************************
+ Function Name : get_rem_bits_in_period
+ Description : Get remaining bits in period
+ Inputs : ps_bit_allocation - bit_allocation structure
+ ps_pic_handling
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+WORD32
+ get_rem_bits_in_period(bit_allocation_t *ps_bit_allocation, pic_handling_handle ps_pic_handling)
+{
+ return (update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, 0));
+}
+/*****************************************************************************
+ Function Name : get_bits_per_frame
+ Description : Get Bits per frame
+ Inputs : ps_bit_allocation - bit_allocation structure
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+WORD32 get_bits_per_frame(bit_allocation_t *ps_bit_allocation)
+{
+ return ((*ps_bit_allocation).i4_bits_per_frm);
+}
+/*****************************************************************************
+ Function Name : ba_get_gop_bits
+ Description :
+ Inputs : ps_bit_allocation - bit_allocation structure
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+LWORD64 ba_get_gop_bits(bit_allocation_t *ps_bit_allocation)
+{
+ gop_level_stat_t *ps_cur_gop_stat;
+ ps_cur_gop_stat =
+ (gop_level_stat_t *)ps_bit_allocation->pv_gop_stat + ps_bit_allocation->i8_cur_gop_num;
+ return (
+ ps_cur_gop_stat->i8_bits_allocated_to_gop +
+ ps_cur_gop_stat->i8_buffer_play_bits_allocated_to_gop);
+}
+/*****************************************************************************
+ Function Name : ba_get_gop_sad
+ Description :
+ Inputs : ps_bit_allocation - bit_allocation structure
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+LWORD64 ba_get_gop_sad(bit_allocation_t *ps_bit_allocation)
+{
+ gop_level_stat_t *ps_cur_gop_stat;
+ ps_cur_gop_stat =
+ (gop_level_stat_t *)ps_bit_allocation->pv_gop_stat + ps_bit_allocation->i8_cur_gop_num;
+ return (ps_cur_gop_stat->i8_acc_gop_sad);
+}
+/*****************************************************************************
+ Function Name : ba_get_buffer_play_bits_for_cur_gop
+ Description :
+ Inputs : ps_bit_allocation - bit_allocation structure
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+LWORD64 ba_get_buffer_play_bits_for_cur_gop(bit_allocation_t *ps_bit_allocation)
+{
+ gop_level_stat_t *ps_cur_gop_stat;
+ ps_cur_gop_stat =
+ (gop_level_stat_t *)ps_bit_allocation->pv_gop_stat + ps_bit_allocation->i8_cur_gop_num;
+ return (ps_cur_gop_stat->i8_buffer_play_bits_allocated_to_gop);
+}
+
+/*****************************************************************************
+ Function Name : update_cur_frm_consumed_bits
+ Description : Based on remaining bits in period and rd_model
+ the number of bits required for the current frame is estimated.
+ Inputs : ps_bit_allocation - bit_allocation structure
+ ps_rd_model - rd model pointer (for all the frame types)
+ e_pic_type - picture type
+ Globals :
+ Processing :
+ Outputs :
+ Returns :
+ Issues :
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+void update_cur_frm_consumed_bits(
+ bit_allocation_t *ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ cbr_buffer_handle ps_cbr_buf_handle,
+ WORD32 i4_total_frame_bits,
+ WORD32 i4_model_updation_hdr_bits,
+ picture_type_e e_pic_type,
+ UWORD8 u1_is_scd,
+ WORD32 i4_last_frm_in_period,
+ WORD32 i4_lap_comp_bits_reset,
+ WORD32 i4_suppress_bpic_update,
+ WORD32 i4_buffer_based_bit_error,
+ WORD32 i4_stuff_bits,
+ WORD32 i4_lap_window_comp,
+ rc_type_e e_rc_type,
+ WORD32 i4_num_gop,
+ WORD32 i4_is_pause_to_resume,
+ WORD32 i4_est_text_bits_ctr_update_qp,
+ WORD32 *pi4_gop_correction,
+ WORD32 *pi4_new_correction)
+{
+ WORD32 i4_error_bits = get_error_bits(ps_bit_allocation->ps_error_bits);
+ WORD32 i4_intra_frm_int, i, i4_flag_no_error_calc = 0; /*i_only*/
+ WORD32 i4_do_correction = 0;
+ i4_intra_frm_int = pic_type_get_intra_frame_interval(ps_pic_handling);
+ ps_bit_allocation->i4_rem_frame_in_period--;
+
+ /*No frame level bit error for top layer pictures*/
+
+ i4_flag_no_error_calc = /*((e_pic_type != B1_PIC && e_pic_type != B11_PIC) && ps_bit_allocation->i4_num_active_pic_type == 4)||
+ ((e_pic_type != B2_PIC && e_pic_type != B22_PIC) && ps_bit_allocation->i4_num_active_pic_type == 5) &&*/
+ (i4_is_pause_to_resume == 0);
+
+ /* Update the remaining bits in period */
+ ps_bit_allocation->i4_bits_from_buffer_in_cur_gop +=
+ ps_bit_allocation->i4_excess_bits_from_buffer;
+ ps_bit_allocation->i4_buffer_based_bit_error -= ps_bit_allocation->i4_excess_bits_from_buffer;
+ ps_bit_allocation->i4_gop_level_bit_error +=
+ (-(i4_total_frame_bits + i4_stuff_bits) + i4_error_bits +
+ ps_bit_allocation->i4_bits_per_frm);
+ ps_bit_allocation->i8_cur_gop_bit_consumption += (i4_total_frame_bits + i4_stuff_bits);
+
+ //if(ps_bit_allocation-> == 2)ASSERT(i4_stuff_bits == 0);//No stuffing in two pass
+ //ASSERT(ps_bit_allocation->i4_prev_frm_header_bits[e_pic_type] == 0);
+ ps_bit_allocation->i4_buffer_based_bit_error += i4_buffer_based_bit_error;
+ ps_bit_allocation->i8_frm_num_in_gop++;
+ if(i4_last_frm_in_period && i4_lap_comp_bits_reset)
+ i4_lap_comp_bits_reset = 0; //end of period is always I frame boundary.
+
+ if(e_pic_type == I_PIC)
+ ps_bit_allocation->i4_num_frames_since_last_I_frame = 1;
+ else
+ ps_bit_allocation->i4_num_frames_since_last_I_frame++;
+
+ if((!i4_suppress_bpic_update))
+ {
+ //if(ps_bit_allocation->ai4_cur_frm_est_tex_bits[i4_est_text_bits_ctr_update_qp] > 0)
+ {
+ ps_bit_allocation->ai4_prev_frm_tot_est_bits[e_pic_type] =
+ ps_bit_allocation->ai4_cur_frm_est_hdr_bits[i4_est_text_bits_ctr_update_qp] +
+ ps_bit_allocation->ai4_cur_frm_est_tex_bits[i4_est_text_bits_ctr_update_qp];
+
+ ps_bit_allocation->i4_frame_level_bit_error +=
+ (ps_bit_allocation->ai4_cur_frm_est_hdr_bits[i4_est_text_bits_ctr_update_qp] +
+ ps_bit_allocation->ai4_cur_frm_est_tex_bits[i4_est_text_bits_ctr_update_qp] -
+ i4_total_frame_bits);
+ }
+
+ trace_printf(
+ "Prev frame header %d Total est %d total frame %d",
+ ps_bit_allocation->i4_prev_frm_header_bits[e_pic_type],
+ ps_bit_allocation->ai4_cur_frm_est_tex_bits[i4_est_text_bits_ctr_update_qp],
+ i4_total_frame_bits);
+ }
+
+ trace_printf(
+ " rbip = %d frame lbe = %d bbbe = %d bfbicg = %d\n",
+ update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, 0),
+ ps_bit_allocation->i4_frame_level_bit_error,
+ ps_bit_allocation->i4_buffer_based_bit_error,
+ ps_bit_allocation->i4_bits_from_buffer_in_cur_gop);
+
+ /* Update the header bits so that it can be used as an estimate to the next frame */
+ if(u1_is_scd)
+ {
+ /* Initilising the header bits to be used for each picture type */
+ init_prev_header_bits(ps_bit_allocation, ps_pic_handling);
+
+ /*init tot bits consumed of previous frame*/
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_bit_allocation->ai4_prev_frm_tot_bits[i] = -1;
+ ps_bit_allocation->ai4_prev_frm_tot_est_bits[i] = -1;
+ }
+ /* In case of SCD, eventhough the frame type is P, it is equivalent to a I frame
+ and so the coresponding header bits is updated */
+ //ASSERT(i4_model_updation_hdr_bits == 0);
+ ps_bit_allocation->i4_prev_frm_header_bits[I_PIC] = i4_model_updation_hdr_bits;
+ ps_bit_allocation->ai4_prev_frm_tot_bits[I_PIC] = i4_total_frame_bits;
+ ps_bit_allocation->ai4_prev_frm_tot_est_bits[I_PIC] = i4_total_frame_bits;
+ /*SCD allowed only for I_PIC*/
+ ASSERT(e_pic_type == I_PIC);
+
+#define MAX_NUM_GOPS_IN_PERIOD (5)
+ if(ps_bit_allocation->i4_num_gops_in_period != 1 &&
+ ps_bit_allocation->i4_num_gops_in_period < MAX_NUM_GOPS_IN_PERIOD)
+ {
+ /* Whenever there is a scene change increase the number of gops by 2 so that
+ the number of bits allocated is not very constrained. */
+ ps_bit_allocation->i4_num_gops_in_period += 2;
+ /* Add the extra bits in GOP to remaining bits in period */
+ change_rbip(
+ &ps_bit_allocation->s_rbip,
+ ps_bit_allocation->i4_bits_per_frm,
+ ps_bit_allocation->i4_num_gops_in_period);
+ /* printf((const WORD8*)"SCD rbp %d, ngp %d\n", update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling,0),
+ ps_bit_allocation->i4_num_gops_in_period); */
+ }
+ }
+ else
+ {
+ //ASSERT(i4_model_updation_hdr_bits == 0);
+ if(!i4_suppress_bpic_update)
+ {
+ ps_bit_allocation->i4_prev_frm_header_bits[e_pic_type] = i4_model_updation_hdr_bits;
+ ps_bit_allocation->ai4_prev_frm_tot_bits[e_pic_type] = i4_total_frame_bits;
+ }
+ }
+
+ {
+ /* Removng the error due to buffer movement from gop level bit error */
+ WORD32 i4_gop_correction = 0;
+ WORD32 i4_cur_ebf = get_cbr_ebf(ps_cbr_buf_handle);
+ WORD32 i4_vbv_size = get_cbr_buffer_size(ps_cbr_buf_handle);
+ WORD32 i4_min_vbv_size = (WORD32)(i4_vbv_size * MIN_THRESHOLD_VBV_GOP_ERROR);
+ WORD32 i4_max_vbv_size = (WORD32)(i4_vbv_size * MAX_THRESHOLD_VBV_GOP_ERROR);
+ /*get desired buffer level so that bit error can be calculated. desired buf = 1 - lap window complexity*/
+ if(ps_bit_allocation->i4_ba_rc_pass != 2)
+ {
+ WORD32 i4_inter_frame_interval = pic_type_get_inter_frame_interval(ps_pic_handling);
+ LWORD64 vbv_buffer_based_excess = 0;
+ WORD32 i4_lap_window_comp_temp = i4_lap_window_comp;
+ if(ps_bit_allocation->i4_lap_window > i4_inter_frame_interval)
+ {
+ if(e_rc_type == VBR_STREAMING)
+ {
+ if(((float)i4_lap_window_comp / 128) >
+ ps_bit_allocation->f_min_complexity_cross_peak_rate)
+ i4_lap_window_comp_temp =
+ (WORD32)(ps_bit_allocation->f_min_complexity_cross_peak_rate * 128);
+
+ /*Get excess bits if any from vbv buffer*/
+ vbv_buffer_based_excess = get_vbv_buffer_based_excess(
+ ps_cbr_buf_handle,
+ ps_bit_allocation->f_min_complexity_cross_peak_rate,
+ ((float)i4_lap_window_comp / 128),
+ (i4_intra_frm_int * ps_bit_allocation->s_rbip.i4_num_intra_frm_interval),
+ 1);
+ }
+
+ i4_do_correction = 1;
+ i4_gop_correction = get_error_bits_for_desired_buf(
+ ps_cbr_buf_handle,
+ i4_lap_window_comp_temp,
+ (i4_intra_frm_int * ps_bit_allocation->s_rbip.i4_num_intra_frm_interval));
+ /*In case of VBR, don't do buffer based correction if gop_correction is less than 0, as it is less than average*/
+ if((e_rc_type == VBR_STREAMING) && (i4_gop_correction <= 0))
+ {
+ i4_do_correction = 0;
+ }
+
+ /* vbv buffer position based error correction to keep away encoder buffer overflow at GOP (I to I, not user configured)*/
+ if(i4_do_correction)
+ {
+ WORD32 i4_buffer_err_bits;
+ /*check if the ebf is greater than max ebf,
+ then account for complete error above max ebf in the current GOP itself*/
+ if(i4_cur_ebf > i4_max_vbv_size)
+ {
+ i4_gop_correction -= (i4_cur_ebf - i4_max_vbv_size);
+ *pi4_new_correction -= (i4_cur_ebf - i4_max_vbv_size);
+ i4_cur_ebf = i4_max_vbv_size;
+ }
+ /* if ebf is above min but less than max, then distribute to the next GOPs*/
+ if(i4_cur_ebf > i4_min_vbv_size)
+ {
+ WORD32 i4_num_gops;
+ float f_ebf_percent;
+ /*compute the error bits to be distributed over the next GOPs*/
+ i4_buffer_err_bits = (i4_cur_ebf - i4_min_vbv_size);
+ /*compute number fo GOPs the error to be distributed
+ high error -> few GOPs, less error -> more GOPs*/
+ f_ebf_percent = ((float)i4_cur_ebf / i4_vbv_size);
+ i4_num_gops = (WORD32)((1.0 - f_ebf_percent) * 10) + 2;
+ /*add the error bits to the period*/
+ i4_gop_correction -= (WORD32)(i4_buffer_err_bits / i4_num_gops);
+ *pi4_new_correction -= (WORD32)(i4_buffer_err_bits / i4_num_gops);
+ }
+ }
+ *pi4_gop_correction = i4_gop_correction;
+ set_rbip(
+ &ps_bit_allocation->s_rbip,
+ (i4_gop_correction + (WORD32)vbv_buffer_based_excess));
+
+ update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, 0);
+ ASSERT(ps_bit_allocation->i4_bits_from_buffer_in_cur_gop == 0);
+ trace_printf("\nRBIP updated ");
+ }
+ /* initialise the GOP and bit errors to zero */
+ ps_bit_allocation->i4_gop_level_bit_error = 0;
+ /*frame level error can't be carried over when it is more than VBV buffer size*/
+ if(ps_bit_allocation->i4_frame_level_bit_error > i4_max_vbv_size)
+ {
+ ps_bit_allocation->i4_frame_level_bit_error = i4_max_vbv_size;
+ }
+ if((i4_last_frm_in_period) ||
+ (i4_intra_frm_int == 1 && ps_bit_allocation->i4_rem_frame_in_period == 0))
+ { /*For 1st pass set the errors to 0 at end of a gop*/
+ ps_bit_allocation->i8_cur_gop_bit_consumption = 0;
+ ps_bit_allocation->i4_frame_level_bit_error = 0;
+ ps_bit_allocation->i4_bits_from_buffer_in_cur_gop = 0;
+ ps_bit_allocation->i4_rem_frame_in_period =
+ ps_bit_allocation->i4_num_gops_in_period *
+ i4_intra_frm_int; /*TBD: I only case*/
+ ps_bit_allocation->i8_frm_num_in_gop = 0;
+ }
+ }
+ }
+
+ if(i4_last_frm_in_period && i4_intra_frm_int != 1)
+ {
+ /* If the number of gops in period has been increased due to scene change, slowly bring in down
+ across the gops */
+ if(ps_bit_allocation->i4_num_gops_in_period >
+ ps_bit_allocation->i4_actual_num_gops_in_period)
+ {
+ ps_bit_allocation->i4_num_gops_in_period--;
+ change_rbip(
+ &ps_bit_allocation->s_rbip,
+ ps_bit_allocation->i4_bits_per_frm,
+ ps_bit_allocation->i4_num_gops_in_period);
+ }
+ }
+ /*Check for complexity based bits reset in future with GOP*/
+
+ /* Update the lower modules */
+ update_error_bits(ps_bit_allocation->ps_error_bits);
+}
+
+/*****************************************************************************
+ Function Name : change_remaining_bits_in_period
+ Description :
+ Inputs : ps_bit_allocation - bit_allocation structure
+
+ Globals :
+ Processing :
+ Outputs :
+ Returns :
+ Issues :
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+void change_remaining_bits_in_period(
+ bit_allocation_t *ps_bit_allocation,
+ WORD32 i4_bit_rate,
+ WORD32 i4_frame_rate,
+ WORD32 *i4_peak_bit_rate)
+{
+ WORD32 i4_new_avg_bits_per_frm, i4_new_peak_bits_per_frm[MAX_NUM_DRAIN_RATES];
+ int i;
+
+ /* Calculate the new per frame bits */
+ X_PROD_Y_DIV_Z(i4_bit_rate, 1000, i4_frame_rate, i4_new_avg_bits_per_frm);
+
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ X_PROD_Y_DIV_Z(i4_peak_bit_rate[i], 1000, i4_frame_rate, i4_new_peak_bits_per_frm[i]);
+ }
+
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ ps_bit_allocation->i4_max_bits_per_frm[i] = i4_new_peak_bits_per_frm[i];
+ }
+
+ /* Get the rem_frms_in_prd & the frms_in_prd from the pic_type state struct */
+ /* pic_type_get_rem_frms_in_gop(ps_pic_handling, i4_rem_frms_in_period); */
+
+ /* If the difference > 0(/ <0), the remaining bits in period needs to be increased(/decreased)
+ based on the remaining number of frames */
+ change_rbip(
+ &ps_bit_allocation->s_rbip,
+ i4_new_avg_bits_per_frm,
+ ps_bit_allocation->i4_num_gops_in_period);
+
+ /* Update the new average bits per frame */
+ ps_bit_allocation->i4_bits_per_frm = i4_new_avg_bits_per_frm;
+
+ /*change max_bits_per_frame*/
+ //ps_bit_allocation->i4_max_bits_per_frm[0]=i4_new_avg_bits_per_frm;
+ //ps_bit_allocation->i4_max_bits_per_frm[1]=i4_new_avg_bits_per_frm;
+ ps_bit_allocation->i4_min_bits_per_frm =
+ i4_new_avg_bits_per_frm; /*VBR storage related parameter so this variable is currently not in use*/
+ /* change the lower modules state */
+ /*#ifdef DYNAMIC_RC*/
+ if(i4_bit_rate != ps_bit_allocation->i4_bit_rate)
+ {
+ X_PROD_Y_DIV_Z(
+ ps_bit_allocation->i4_max_tex_bits_for_i,
+ i4_bit_rate,
+ ps_bit_allocation->i4_bit_rate,
+ ps_bit_allocation->i4_max_tex_bits_for_i);
+ }
+ /*#endif*/
+
+ change_bitrate_in_error_bits(ps_bit_allocation->ps_error_bits, i4_bit_rate);
+ change_frm_rate_in_error_bits(ps_bit_allocation->ps_error_bits, i4_frame_rate);
+
+ /* Store the modified frame_rate */
+ ps_bit_allocation->i4_frame_rate = i4_frame_rate;
+ ps_bit_allocation->i4_bit_rate = i4_bit_rate;
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ ps_bit_allocation->ai4_peak_bit_rate[i] = i4_peak_bit_rate[i];
+}
+/*****************************************************************************
+ Function Name : change_ba_peak_bit_rate
+ Description :
+ Inputs : ps_bit_allocation - bit_allocation structure
+ ai4_peak_bit_rate
+ Globals :
+ Processing :
+ Outputs :
+ Returns :
+ Issues :
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+void change_ba_peak_bit_rate(bit_allocation_t *ps_bit_allocation, WORD32 *ai4_peak_bit_rate)
+{
+ WORD32 i;
+ /* Calculate the bits per frame */
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ X_PROD_Y_DIV_Z(
+ ai4_peak_bit_rate[i],
+ 1000,
+ ps_bit_allocation->i4_frame_rate,
+ ps_bit_allocation->i4_max_bits_per_frm[i]);
+ ps_bit_allocation->ai4_peak_bit_rate[i] = ai4_peak_bit_rate[i];
+ }
+}
+/*****************************************************************************
+ Function Name : check_and_update_bit_allocation
+ Description :
+ Inputs : ps_bit_allocation - bit_allocation structure
+ ps_pic_handling
+ i4_max_bits_inflow_per_frm
+ Globals :
+ Processing :
+ Outputs :
+ Returns :
+ Issues :
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+void check_and_update_bit_allocation(
+ bit_allocation_t *ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_max_bits_inflow_per_frm)
+{
+ WORD32 i4_max_drain_bits, i4_extra_bits, i4_less_bits, i4_allocated_saved_bits,
+ i4_min_bits_for_period;
+ WORD32 i4_num_frms_in_period = get_actual_num_frames_in_gop(ps_pic_handling);
+ WORD32 i4_rem_bits_in_period = update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, 0);
+
+ /* If the remaining bits is greater than what can be drained in that period
+ Clip the remaining bits in period to the maximum it can drain in that pariod
+ with the error of current buffer size.Accumulate the saved bits if any.
+ else if the remaining bits is lesser than the minimum bit rate promissed in that period
+ Add the excess bits to remaining bits in period and reduce it from the saved bits
+ Else
+ Provide the extra bits from the "saved bits pool".*/
+
+ i4_max_drain_bits = ps_bit_allocation->i4_num_gops_in_period * i4_num_frms_in_period *
+ i4_max_bits_inflow_per_frm;
+
+ /* Practical DBF = middle of the buffer */
+ /* NITT TO BE VERIFIED
+ MAx drain bits becomes negative if the buffer underflows
+ i4_max_drain_bits += (i4_cur_buf_size + i4_max_bits_inflow_per_frm - i4_tot_frame_bits); */
+
+ i4_min_bits_for_period = ps_bit_allocation->i4_num_gops_in_period * i4_num_frms_in_period *
+ ps_bit_allocation->i4_min_bits_per_frm;
+
+ /* printf((const WORD8*)" mdb %d, mbfp %d, rbip %d, sb %d \n",i4_max_drain_bits,
+ i4_min_bits_for_period, ps_bit_allocation->i4_rem_bits_in_period, ps_bit_allocation->i4_saved_bits); */
+ if(i4_rem_bits_in_period > i4_max_drain_bits)
+ {
+ i4_extra_bits = (i4_rem_bits_in_period - i4_max_drain_bits);
+ update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, -1 * i4_extra_bits);
+ overflow_avoided_summation(&ps_bit_allocation->i4_saved_bits, i4_extra_bits);
+ }
+ else if(i4_rem_bits_in_period < i4_min_bits_for_period)
+ {
+ i4_extra_bits = (i4_min_bits_for_period - i4_rem_bits_in_period);
+ update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, i4_extra_bits);
+ overflow_avoided_summation(&ps_bit_allocation->i4_saved_bits, -1 * i4_extra_bits);
+ }
+ else if(ps_bit_allocation->i4_saved_bits > 0)
+ {
+ i4_less_bits = i4_max_drain_bits - i4_rem_bits_in_period;
+ i4_allocated_saved_bits = MIN(i4_less_bits, ps_bit_allocation->i4_saved_bits);
+ update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handling, i4_allocated_saved_bits);
+ ps_bit_allocation->i4_saved_bits -= i4_allocated_saved_bits;
+ }
+ return;
+}
+/*****************************************************************************
+ Function Name : ba_get_frame_rate
+ Description :
+ Inputs : ps_bit_allocation - bit_allocation structure
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+WORD32 ba_get_frame_rate(bit_allocation_t *ps_bit_allocation)
+{
+ return (ps_bit_allocation->i4_frame_rate);
+}
+/*****************************************************************************
+ Function Name : ba_get_bit_rate
+ Description :
+ Inputs : ps_bit_allocation - bit_allocation structure
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+WORD32 ba_get_bit_rate(bit_allocation_t *ps_bit_allocation)
+{
+ return (ps_bit_allocation->i4_bit_rate);
+}
+/*****************************************************************************
+ Function Name : ba_get_2pass_avg_bit_rate
+ Description :
+ Inputs : ps_bit_allocation - bit_allocation structure
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+LWORD64 ba_get_2pass_avg_bit_rate(bit_allocation_t *ps_bit_allocation)
+{
+ return (ps_bit_allocation->i8_2pass_avg_bit_rate);
+}
+/*****************************************************************************
+ Function Name : ba_set_2pass_avg_bit_rate
+ Description :
+ Inputs : ps_bit_allocation - bit_allocation structure
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+void ba_set_2pass_avg_bit_rate(bit_allocation_t *ps_bit_allocation, LWORD64 i8_2pass_avg_bit_rate)
+{
+ ps_bit_allocation->i8_2pass_avg_bit_rate = i8_2pass_avg_bit_rate;
+}
+/*****************************************************************************
+ Function Name : ba_get_peak_bit_rate
+ Description :
+ Inputs : ps_bit_allocation - bit_allocation structure
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+void ba_get_peak_bit_rate(bit_allocation_t *ps_bit_allocation, WORD32 *pi4_peak_bit_rate)
+{
+ WORD32 i;
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ pi4_peak_bit_rate[i] = ps_bit_allocation->ai4_peak_bit_rate[i];
+ }
+}
+/*****************************************************************************
+ Function Name : init_intra_header_bits
+ Description :
+ Inputs : ps_bit_allocation - bit_allocation structure
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+void init_intra_header_bits(bit_allocation_t *ps_bit_allocation, WORD32 i4_intra_header_bits)
+{
+ //ASSERT(i4_intra_header_bits == 0);
+ ps_bit_allocation->i4_prev_frm_header_bits[0] = i4_intra_header_bits;
+}
+/*****************************************************************************
+ Function Name : get_prev_header_bits
+ Description :
+ Inputs : ps_bit_allocation - bit_allocation structure
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+WORD32 get_prev_header_bits(bit_allocation_t *ps_bit_allocation, WORD32 pic_type)
+{
+ //ASSERT(ps_bit_allocation->i4_prev_frm_header_bits[pic_type] == 0);
+ return (ps_bit_allocation->i4_prev_frm_header_bits[pic_type]);
+}
+
+#define I_TO_P_RATIO_HI_MO (16)
+#define P_TO_B_RATIO_HI_MO (18)
+#define P_TO_B_RATIO_HI_MO_HBR (16)
+/*****************************************************************************
+ Function Name : set_Kp_Kb_for_hi_motion
+ Description :
+ Inputs : ps_bit_allocation - bit_allocation structure
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+void set_Kp_Kb_for_hi_motion(bit_allocation_t *ps_bit_allocation)
+{
+ ps_bit_allocation->i2_K[I_PIC] = (1 << K_Q);
+ ps_bit_allocation->i2_K[P_PIC] = I_TO_P_RATIO_HI_MO;
+
+ if(ps_bit_allocation->i4_is_hbr)
+ {
+ ps_bit_allocation->i2_K[B_PIC] = (P_TO_B_RATIO_HI_MO * I_TO_P_RATIO_HI_MO) >> K_Q;
+ }
+ else
+ {
+ ps_bit_allocation->i2_K[B_PIC] = (P_TO_B_RATIO_HI_MO_HBR * I_TO_P_RATIO_HI_MO) >> K_Q;
+ }
+}
+/*****************************************************************************
+ Function Name : reset_Kp_Kb
+ Description : I_P_B_B1_B2 QP offset calculation based on hme sad
+ Inputs :
+ Globals :
+ Processing :
+ Outputs :
+ Returns :
+ Issues :
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+
+void reset_Kp_Kb(
+ bit_allocation_t *ps_bit_allocation,
+ float f_i_to_avg_ratio,
+ WORD32 i4_num_active_pic_type,
+ float f_hme_sad_per_pixel,
+ float f_max_hme_sad_per_pixel,
+ WORD32 *pi4_complexity_bin,
+ WORD32 i4_rc_pass)
+{
+ WORD32 i, i4_ratio = (WORD32)(f_max_hme_sad_per_pixel / f_hme_sad_per_pixel);
+ WORD32 ai4_offsets[5] = { 0 };
+ float f_ratio = f_max_hme_sad_per_pixel / f_hme_sad_per_pixel;
+
+ /*Filling out the offfset array for QP offset 0 - 7*/
+ const WORD32 ai4_offset_qp[8] = {
+ (1 << K_Q),
+ I_TO_P_RATIO,
+ ((P_TO_B_RATIO * I_TO_P_RATIO) >> K_Q),
+ (B_TO_B1_RATIO * P_TO_B_RATIO * I_TO_P_RATIO) >> (K_Q + K_Q),
+ (B1_TO_B2_RATIO * B_TO_B1_RATIO * P_TO_B_RATIO * I_TO_P_RATIO) >> (K_Q + K_Q + K_Q),
+ (B1_TO_B2_RATIO * B1_TO_B2_RATIO * B_TO_B1_RATIO * P_TO_B_RATIO * I_TO_P_RATIO) >>
+ (K_Q + K_Q + K_Q + K_Q),
+ (B1_TO_B2_RATIO * B1_TO_B2_RATIO * B1_TO_B2_RATIO * B_TO_B1_RATIO * P_TO_B_RATIO *
+ I_TO_P_RATIO) >>
+ (K_Q + K_Q + K_Q + K_Q + K_Q),
+ (B1_TO_B2_RATIO * B1_TO_B2_RATIO * B1_TO_B2_RATIO * B1_TO_B2_RATIO * B_TO_B1_RATIO *
+ P_TO_B_RATIO * I_TO_P_RATIO) >>
+ (K_Q + K_Q + K_Q + K_Q + K_Q + K_Q)
+ };
+
+ ba_get_qp_offset_offline_data(
+ ai4_offsets, i4_ratio, f_ratio, i4_num_active_pic_type, pi4_complexity_bin);
+ for(i = 0; i < 5; i++)
+ {
+ ASSERT((ai4_offsets[i] >= 0) && (ai4_offsets[i] <= 7));
+ ps_bit_allocation->i2_K[i] = ai4_offset_qp[ai4_offsets[i]];
+
+ /*For interlaced also we are filling out the offsets*/
+ if(i > 0)
+ ps_bit_allocation->i2_K[i + 4] = ai4_offset_qp[ai4_offsets[i]];
+ }
+}
+
+/*****************************************************************************
+ Function Name : ba_get_qp_offset_offline_data
+ Description : Offline model for qp offset calculation
+ Inputs : ai4_offsets
+ i4_ratio
+ f_ratio
+ i4_num_active_pic_type
+ pi4_complexity_bin
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+void ba_get_qp_offset_offline_data(
+ WORD32 ai4_offsets[5],
+ WORD32 i4_ratio,
+ float f_ratio,
+ WORD32 i4_num_active_pic_type,
+ WORD32 *pi4_complexity_bin)
+{
+ WORD32 i4_bin;
+ /*Desired QP offset's for different complexity bins depending on number of temporal layers*/
+ /*There are 6 complexity bins
+ Max_compl - Max_compl*3/4,
+ Max_compl*3/4 - Max_compl*1/2,
+ Max_compl*1/2 - Max_compl*1/4,
+ Max_compl*1/4 - Max_compl*1/8,
+ Max_compl*1/8 - Max_compl*1/16
+ <Max_compl*1/16*/
+ /*The kids_rain content was run on different resolutions and the max value for different temporal configs is the max value used*/
+
+ /*First index for complexity bin, second index for pic_types (P,B,B1,B2)*/
+ const WORD32 ai4_offset_values_7B[7][4] = { { 0, 1, 1, 2 }, { 1, 1, 2, 3 }, { 1, 2, 3, 3 },
+ { 1, 2, 3, 4 }, { 2, 2, 3, 4 }, { 2, 3, 4, 5 },
+ { 3, 4, 5, 6 } };
+ const WORD32 ai4_offset_values_3B[7][3] = { { 0, 1, 2 }, { 1, 2, 2 }, { 1, 2, 3 }, { 2, 2, 3 },
+ { 2, 3, 4 }, { 2, 4, 5 }, { 3, 4, 5 } };
+ const WORD32 ai4_offset_values_1B[7][2] = { { 1, 1 }, { 1, 2 }, { 1, 2 }, { 1, 3 },
+ { 2, 3 }, { 3, 4 }, { 3, 5 } };
+ const WORD32 ai4_offset_values_0B[7][1] = { { 0 }, { 1 }, { 2 }, { 2 }, { 3 }, { 3 }, { 4 } };
+
+ /*The ratio is clipped between 16 and 2 to put it into bins*/
+
+ CLIP(i4_ratio, 16, 2);
+
+ for(i4_bin = 1; i4_bin < 5; i4_bin++)
+ {
+ if((i4_ratio >> i4_bin) == 1)
+ {
+ break;
+ }
+ }
+ switch(i4_bin)
+ {
+ case(1):
+ (f_ratio > 2.0f) ? (i4_bin = 3) : ((f_ratio > 1.33f) ? (i4_bin = 2) : (i4_bin = 1));
+ break;
+ case(2):
+ i4_bin = 4;
+ break;
+ case(3):
+ (f_ratio > 12.0f) ? (i4_bin = 6) : (i4_bin = 5);
+ break;
+ case(4):
+ i4_bin = 7;
+ break;
+ }
+
+ /*For the i4_bin == 1, actual ratio could be >2.0,>1.33 or lesser hence putting them into different bins*/
+
+ trace_printf("1 bin %d", i4_bin);
+
+ /*Total 7 bins hence the clip*/
+ CLIP(i4_bin, 7, 1);
+
+ *pi4_complexity_bin = i4_bin - 1;
+
+ switch(i4_num_active_pic_type)
+ {
+ case 5:
+ memmove(
+ &ai4_offsets[1],
+ ai4_offset_values_7B[i4_bin - 1],
+ sizeof(ai4_offset_values_7B[i4_bin - 1]));
+ break;
+ case 4:
+ memmove(
+ &ai4_offsets[1],
+ ai4_offset_values_3B[i4_bin - 1],
+ sizeof(ai4_offset_values_3B[i4_bin - 1]));
+ break;
+ case 3:
+ memmove(
+ &ai4_offsets[1],
+ ai4_offset_values_1B[i4_bin - 1],
+ sizeof(ai4_offset_values_1B[i4_bin - 1]));
+ break;
+ case 2:
+ memmove(
+ &ai4_offsets[1],
+ ai4_offset_values_0B[i4_bin - 1],
+ sizeof(ai4_offset_values_0B[i4_bin - 1]));
+ break;
+ default:
+ memmove(
+ &ai4_offsets[1],
+ ai4_offset_values_0B[i4_bin - 1],
+ sizeof(ai4_offset_values_0B[i4_bin - 1]));
+ break;
+ }
+
+ trace_printf(
+ "Enc %d,%d,%d,%d,%d offsets",
+ ai4_offsets[0],
+ ai4_offsets[1],
+ ai4_offsets[2],
+ ai4_offsets[3],
+ ai4_offsets[4]);
+}
+
+/*****************************************************************************
+ Function Name : get_Kp_Kb
+ Description : Get the operating Kp and Kp so that scene cut sub gop can go
+ with similar qp offset
+ Inputs : ps_bit_allocation
+ e_pic_type
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+
+WORD32 get_Kp_Kb(bit_allocation_t *ps_bit_allocation, picture_type_e e_pic_type)
+{
+ return ps_bit_allocation->i2_K[e_pic_type];
+}
+/*****************************************************************************
+ Function Name : get_scene_change_tot_frm_bits
+ Description : Based on remaining bits in period and default I_TO_B complexity
+ total bit budget for scene cut frame is obtained.
+ Inputs : ps_bit_allocation - bit_allocation structure
+ ps_rd_model - rd model pointer (for all the frame types)
+ e_pic_type - picture type
+ Globals :
+ Processing :
+ Outputs :
+ Returns :
+ Issues :
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+WORD32 get_scene_change_tot_frm_bits(
+ bit_allocation_t *ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ cbr_buffer_handle ps_cbr_buf_handling,
+ WORD32 i4_num_pixels,
+ WORD32 i4_f_sim_lap,
+ float i_to_avg_rest,
+ WORD32 i4_call_type,
+ WORD32 i4_non_I_scd,
+ WORD32 i4_is_infinite_gop)
+{
+ WORD32 j;
+ WORD32 i4_tot_bits_for_scd_frame;
+ WORD32 i4_total_bits_in_period;
+ //number_t avq_complexity_estimate[MAX_PIC_TYPE];
+ WORD32 /* ai4_rem_frms_in_period[MAX_PIC_TYPE], */
+ ai4_frms_in_period[MAX_PIC_TYPE];
+ WORD32 i4_max_consumable_bits;
+ WORD32 i4_intra_frm_int;
+ WORD32 ai4_actual_frms_in_gop[MAX_PIC_TYPE], i, i4_total_frames = 0;
+ float final_ratio, f_sim = (float)i4_f_sim_lap / 128;
+
+ i4_intra_frm_int = pic_type_get_intra_frame_interval(ps_pic_handling);
+
+ /* Get the rem_frms_in_gop & the frms_in_gop from the pic_type state struct */
+ /* pic_type_get_rem_frms_in_gop(ps_pic_handling, ai4_rem_frms_in_period); */
+ pic_type_get_frms_in_gop(ps_pic_handling, ai4_frms_in_period);
+
+ /* Depending on the number of gops in a period, find the num_frms_in_prd */
+ for(j = 0; j < MAX_PIC_TYPE; j++)
+ {
+ /* ai4_rem_frms_in_period[j] += (ai4_frms_in_period[j] * (ps_bit_allocation->i4_num_gops_in_period - 1)); */
+ ai4_frms_in_period[j] *= ps_bit_allocation->i4_num_gops_in_period;
+ }
+
+ /* Remove the header bits from the remaining bits to find how many bits you
+ can transfer.*/
+ {
+ i4_total_bits_in_period = ps_bit_allocation->s_rbip.i4_bits_per_frm *
+ ps_bit_allocation->s_rbip.i4_tot_frms_in_gop;
+ //trace_printf(" SCD_rbip = %d",i4_total_bits_in_period);
+ }
+ //since this marks end of previous GOP it is better to consider actual error than ps_bit_allocation->i4_frame_level_bit_error;
+
+ {
+ pic_type_get_actual_frms_in_gop(ps_pic_handling, ai4_actual_frms_in_gop);
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ i4_total_frames += ai4_frms_in_period[i];
+ }
+ i4_max_consumable_bits = ps_bit_allocation->i4_max_bits_per_frm[0] * i4_total_frames;
+ }
+ if(i4_total_bits_in_period > 0)
+ {
+ i4_total_bits_in_period = MIN(i4_total_bits_in_period, i4_max_consumable_bits);
+ }
+ final_ratio = i_to_avg_rest;
+ /*If FSIM says the content is static (> 126 is assured to be static*/
+ /*Very low FSIM safety check*/
+ if(f_sim < 0.50 && final_ratio > 8)
+ final_ratio = 8;
+ /*Do not apply safety limits if second pass as data is reliable*/
+ if(ps_bit_allocation->i4_ba_rc_pass != 2)
+ {
+ /*clip min max values*/
+ if((i4_is_infinite_gop == 1) && (final_ratio > I_TO_AVG_REST_GOP_BIT_MAX_INFINITE))
+ {
+ final_ratio = I_TO_AVG_REST_GOP_BIT_MAX_INFINITE;
+ }
+ else
+ {
+ if(final_ratio > I_TO_AVG_REST_GOP_BIT_MAX)
+ final_ratio = I_TO_AVG_REST_GOP_BIT_MAX;
+ }
+ if(final_ratio < I_TO_AVG_REST_GOP_BIT_MIN)
+ final_ratio = I_TO_AVG_REST_GOP_BIT_MIN;
+ }
+ else
+ {
+ if(final_ratio > I_TO_AVG_REST_GOP_BIT_MAX_2_PASS)
+ final_ratio = I_TO_AVG_REST_GOP_BIT_MAX_2_PASS;
+
+ if(final_ratio < I_TO_AVG_REST_GOP_BIT_MIN_2_PASS)
+ final_ratio = I_TO_AVG_REST_GOP_BIT_MIN_2_PASS;
+ }
+
+ /*based on offline runs to find I_BITS/(AVERAGE_CONSUMPTION_OF_REST_GOP)*/
+ /* BITS FOR I
+ BITS = I_TO_AVG_REST_GOP * total_bits_period
+ -------------------------------------
+ N - (num_I_in_period) + (I_TO_AVG_REST_GOP * num_I_in_period)
+ */
+ i4_tot_bits_for_scd_frame = bit_alloc_get_intra_bits(
+ ps_bit_allocation,
+ ps_pic_handling,
+ ps_cbr_buf_handling,
+ I_PIC,
+ NULL,
+ 1,
+ final_ratio,
+ i4_call_type,
+ i4_non_I_scd,
+ 0.0f);
+ ps_bit_allocation->i4_excess_bits_from_buffer = 0;
+
+ if(i4_call_type == 1)
+ {
+ trace_printf("I_TO_AVG_REST_GOP_BIT used = %f\n", final_ratio);
+ trace_printf(" SCD DETECTED bits allocated = %d", i4_tot_bits_for_scd_frame);
+ }
+
+ /* If the remaining bits in the period becomes negative then the estimated texture
+ bits would also become negative. This would send a feedback to the model which
+ may go for a toss. Thus sending the minimum possible value = 0 */
+ if(i4_tot_bits_for_scd_frame < 0)
+ i4_tot_bits_for_scd_frame = 0;
+
+ return (i4_tot_bits_for_scd_frame);
+}
+
+/*****************************************************************************
+ Function Name : update_estimate_status
+ Description : Est texture bits in case of scene cut is obtained form offline
+ model. Update bit alloc
+ Inputs : ps_bit_allocation
+ e_pic_type
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+
+void update_estimate_status(
+ bit_allocation_t *ps_bit_allocation,
+ WORD32 i4_est_texture_bits,
+ WORD32 i4_hdr_bits,
+ WORD32 i4_est_text_bits_ctr_get_qp)
+{
+ ps_bit_allocation->ai4_cur_frm_est_tex_bits[i4_est_text_bits_ctr_get_qp] = i4_est_texture_bits;
+ ps_bit_allocation->ai4_cur_frm_est_hdr_bits[i4_est_text_bits_ctr_get_qp] = i4_hdr_bits;
+}
+
+/*****************************************************************************
+ Function Name : bit_allocation_set_num_scd_lap_window
+ Description :
+ Inputs : ps_bit_allocation
+ i4_num_scd_in_lap_window
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+void bit_allocation_set_num_scd_lap_window(
+ bit_allocation_t *ps_bit_allocation,
+ WORD32 i4_num_scd_in_lap_window,
+ WORD32 i4_num_frames_b4_Scd)
+{
+ ps_bit_allocation->i4_num_scd_in_lap_window = i4_num_scd_in_lap_window;
+ ps_bit_allocation->i4_num_frm_b4_scd = i4_num_frames_b4_Scd;
+ /*To avoid trashing I frame badly due to back to back scene cut limit the increment in Ni*/
+ if(ps_bit_allocation->i4_num_scd_in_lap_window > 2)
+ ps_bit_allocation->i4_num_scd_in_lap_window = 2;
+}
+/*****************************************************************************
+ Function Name : bit_allocation_set_sc_i_in_rc_look_ahead
+ Description :
+ Inputs : ps_bit_allocation
+ i4_next_sc_i_in_rc_look_ahead
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+void bit_allocation_set_sc_i_in_rc_look_ahead(
+ bit_allocation_t *ps_bit_allocation, WORD32 i4_next_sc_i_in_rc_look_ahead)
+{
+ ps_bit_allocation->i4_next_sc_i_in_rc_look_ahead = i4_next_sc_i_in_rc_look_ahead;
+}
+/*****************************************************************************
+ Function Name : bit_allocation_update_gop_level_bit_error
+ Description :
+ Inputs : ps_bit_allocation
+ i4_error_bits
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+void bit_allocation_update_gop_level_bit_error(
+ bit_allocation_t *ps_bit_allocation, WORD32 i4_error_bits)
+{
+ ps_bit_allocation->i4_gop_level_bit_error += i4_error_bits;
+ ps_bit_allocation->i4_frame_level_bit_error += i4_error_bits;
+ /*Error is (rdopt - entropy) Hence for total bit consumption subtract error*/
+ ps_bit_allocation->i8_cur_gop_bit_consumption -= i4_error_bits;
+}
+
+/******************************************************************************
+ Function Name : rc_update_bit_distribution_gop_level_2pass
+ Description : This function distributes the bits to all the gops depending
+ on the complexities and the error bits accumulated until now
+ Arguments : ps_rate_control_api - rate control api handle
+ i4_start_gop_number : GOP number from which distribution should happen
+ Return Values :
+ Revision History:
+
+
+ Assumptions -
+
+ Checks -
+*****************************************************************************/
+void rc_update_bit_distribution_gop_level_2pass(
+ bit_allocation_t *ps_bit_allocation,
+ pic_handling_handle ps_pic_handle,
+ void *pv_gop_stat,
+ rc_type_e e_rc_type,
+ WORD32 i4_num_gop,
+ WORD32 i4_start_gop_number,
+ float f_avg_qscale_first_pass,
+ WORD32 i4_max_ebf,
+ WORD32 i4_ebf,
+ LWORD64 i8_tot_bits_sequence,
+ WORD32 i4_comp_error)
+{
+ float cur_peak_factor, f_bits_per_frame;
+ LWORD64 total_nbp_bits_allocated = 0;
+ LWORD64 total_bp_bits_allocated = 0;
+ LWORD64 total_bits_allocated = 0, prev_total_bits_allocated = -1;
+ WORD32
+ i4_num_loop_inter_GOP_alloc = 0, ai4_peak_bitrate[MAX_NUM_DRAIN_RATES] = { 0 },
+ temp_i; /*Loop 20 times to meet precise bitrate, after that exit the loop and distribute remaining bits equally for all GOP*/
+ gop_level_stat_t *ps_cur_gop;
+ WORD32 i4_num_frames_in_gop, i4_cur_gop_num, i4_num_frm_with_rmax, i4_num_frm_with_rmin;
+ LWORD64 i8_max_bit_for_gop, /*i8_min_bit_for_gop,*/ i8_peak_bitrate, i8_frame_rate,
+ i8_current_bitrate = (LWORD64)ba_get_2pass_avg_bit_rate(ps_bit_allocation);
+ LWORD64 i8_actual_avg_bit_rate = ba_get_bit_rate(ps_bit_allocation);
+ LWORD64 i8_num_frame_remaining = 0, i8_excess_bits = 0;
+ float min_complexity_beyond_peak /*,f_max_complexity = 1.0f,f_min_complexity = 0.0f*/
+ ; //The minimum complexity for which bit allocation exceeds peak rate but
+ float f_avg_bits_complexity_based;
+ WORD32 i4_num_gop_not_rmax;
+ LWORD64 i8_bits_for_this_gop;
+
+#define MAX_LOOP_INTER_GOP_ALLOC \
+ 20 /*The below loop shall run maximum of this macro once it exits allocate the difference bits equally for all the GOPS*/
+
+ i4_ebf = MAX(i4_ebf, 0);
+ //i4_ebf = 0;
+ if(i4_start_gop_number == 0)
+ {
+ cur_peak_factor = 7.0;
+ }
+ else
+ {
+ cur_peak_factor = ps_bit_allocation->f_cur_peak_factor_2pass;
+ }
+ /*Parsing of entire log file is done and summary of GOP level data has been updated in the temp,
+ Iteratively allocate the bits to make it meet bitrate*/
+ for(temp_i = i4_start_gop_number; temp_i < i4_num_gop; temp_i++)
+ {
+ ps_cur_gop = (gop_level_stat_t *)((gop_level_stat_t *)pv_gop_stat + temp_i);
+ }
+ i8_frame_rate = ba_get_frame_rate(ps_bit_allocation);
+ ba_get_peak_bit_rate(ps_bit_allocation, &ai4_peak_bitrate[0]);
+ i8_peak_bitrate = (LWORD64)ai4_peak_bitrate[0];
+
+ /*Modify the bitrate depending on the error bits and total bits*/
+ //i8_current_bitrate = (LWORD64)((float)i8_tot_bits_sequence*i8_frame_rate/(1000*i8_num_frame_remaining));
+
+ f_bits_per_frame = (float)i8_current_bitrate / i8_frame_rate * 1000;
+ ps_bit_allocation->i8_current_bitrate_2_pass = i8_current_bitrate;
+ //printf("\n%d current bitrate",i8_current_bitrate);
+
+ do
+ {
+ /*Get gop level stat*/
+ /*recalculate the bits based on new scaling factor*/
+ total_bits_allocated = 0;
+ total_bp_bits_allocated = 0;
+ total_nbp_bits_allocated = 0;
+ min_complexity_beyond_peak =
+ (float)ps_bit_allocation->ai4_peak_bit_rate[0] / i8_current_bitrate;
+
+ /*min_complexity_beyond_peak = ba_get_min_complexity_for_peak_br(ps_bit_allocation->ai4_peak_bit_rate[0],
+ (WORD32)i8_current_bitrate,
+ cur_peak_factor,
+ f_max_complexity,
+ f_min_complexity,
+ ps_bit_allocation->i4_ba_rc_pass);*/
+
+ for(i4_cur_gop_num = i4_start_gop_number; i4_cur_gop_num < i4_num_gop; i4_cur_gop_num++)
+ {
+ ps_cur_gop = (gop_level_stat_t *)((gop_level_stat_t *)pv_gop_stat + i4_cur_gop_num);
+ ps_cur_gop->f_bits_complexity_l1_based_peak_factor =
+ ps_cur_gop->f_bits_complexity_l1_based * cur_peak_factor;
+ }
+ i4_num_frm_with_rmax = 0;
+ i4_num_frm_with_rmin = 0;
+ f_avg_bits_complexity_based = 0.0;
+ i4_num_gop_not_rmax = 0;
+ i8_num_frame_remaining = 0;
+ for(i4_cur_gop_num = i4_start_gop_number; i4_cur_gop_num < i4_num_gop; i4_cur_gop_num++)
+ {
+ ps_cur_gop = (gop_level_stat_t *)((gop_level_stat_t *)pv_gop_stat + i4_cur_gop_num);
+ if(!ps_cur_gop->i4_peak_br_clip)
+ {
+ f_avg_bits_complexity_based +=
+ (ps_cur_gop->f_bits_complexity_l1_based * ps_cur_gop->i4_tot_frm_in_gop);
+ i8_num_frame_remaining += ps_cur_gop->i4_tot_frm_in_gop;
+ i4_num_gop_not_rmax++;
+ }
+ }
+ f_avg_bits_complexity_based = (f_avg_bits_complexity_based / i8_num_frame_remaining);
+ for(i4_cur_gop_num = i4_start_gop_number; i4_cur_gop_num < i4_num_gop; i4_cur_gop_num++)
+ {
+ /*Parse through all the GOP*/
+ /*get current gop data*/
+ //i4_num_frames_in_gop = 0;
+ LWORD64 i8_avg_bit_rate_bits;
+ LWORD64 i8_curr_bit_rate_bits;
+ ps_cur_gop = (gop_level_stat_t *)((gop_level_stat_t *)pv_gop_stat + i4_cur_gop_num);
+
+ if(ps_cur_gop->i4_peak_br_clip)
+ {
+ i4_num_frm_with_rmax++;
+ total_nbp_bits_allocated += ps_cur_gop->i8_bits_allocated_to_gop;
+ continue;
+ }
+ ps_cur_gop->f_buffer_play_complexity = 0.;
+ //ps_cur_gop->f_gop_level_complexity_sum = -1;
+ //ps_cur_gop->i8_buffer_play_bits = 0;
+ ps_cur_gop->i8_buffer_play_bits_allocated_to_gop = 0;
+ i4_num_frames_in_gop = ps_cur_gop->i4_tot_frm_in_gop;
+
+ if(i4_num_gop_not_rmax == i4_num_gop)
+ {
+ i8_bits_for_this_gop =
+ (LWORD64)((i8_current_bitrate * i4_num_frames_in_gop * 1000) / i8_frame_rate);
+ if(e_rc_type == VBR_STREAMING)
+ {
+ ps_cur_gop->i8_bits_allocated_to_gop = (LWORD64)(
+ (ps_cur_gop->f_bits_complexity_l1_based / (f_avg_bits_complexity_based)) *
+ i8_bits_for_this_gop);
+ }
+ else
+ {
+ ps_cur_gop->i8_bits_allocated_to_gop =
+ (LWORD64)(i8_current_bitrate * i4_num_frames_in_gop / i8_frame_rate * 1000);
+ }
+ }
+ else
+ {
+ //i8_bits_for_this_gop = (LWORD64)((i8_excess_bits * i4_num_frames_in_gop * 1000)/(i8_frame_rate*i4_num_gop_not_rmax));
+ i8_bits_for_this_gop =
+ (LWORD64)((i8_excess_bits * i4_num_frames_in_gop) / (i8_num_frame_remaining));
+ if(e_rc_type == VBR_STREAMING)
+ {
+ ps_cur_gop->i8_bits_allocated_to_gop += (LWORD64)(
+ (ps_cur_gop->f_bits_complexity_l1_based / (f_avg_bits_complexity_based)) *
+ i8_bits_for_this_gop);
+ }
+ else
+ {
+ ASSERT(0);
+ }
+ }
+ ps_cur_gop->i8_actual_bits_allocated_to_gop = ps_cur_gop->i8_bits_allocated_to_gop;
+ /*clip based on peak rate*/
+ i8_max_bit_for_gop = i8_peak_bitrate * i4_num_frames_in_gop * 1000 / i8_frame_rate;
+ ps_cur_gop->i8_max_bit_for_gop = i8_max_bit_for_gop;
+ ps_cur_gop->i4_peak_br_clip = 0;
+ if(ps_cur_gop->i8_bits_allocated_to_gop > i8_max_bit_for_gop)
+ {
+ ps_cur_gop->i8_bits_allocated_to_gop = i8_max_bit_for_gop;
+ ps_cur_gop->i4_peak_br_clip = 1;
+ i4_num_frm_with_rmax++;
+ /*if(ps_cur_gop->f_bits_complexity_l1_based < min_complexity_beyond_peak)
+ min_complexity_beyond_peak = ps_cur_gop->f_bits_complexity_l1_based;*/
+ }
+ i8_curr_bit_rate_bits =
+ (LWORD64)(i8_current_bitrate * i4_num_frames_in_gop / i8_frame_rate * 1000);
+ i8_avg_bit_rate_bits =
+ (LWORD64)(i8_actual_avg_bit_rate * i4_num_frames_in_gop / i8_frame_rate * 1000);
+ ps_cur_gop->i4_is_below_avg_rate_gop_frame = 0;
+ if(ps_cur_gop->i8_bits_allocated_to_gop <
+ (MIN(i8_curr_bit_rate_bits, ps_cur_gop->i8_minimum_gop_bits)))
+ {
+ ps_cur_gop->i4_is_below_avg_rate_gop_frame = 1;
+ ps_cur_gop->i8_bits_allocated_to_gop =
+ MIN(i8_curr_bit_rate_bits, ps_cur_gop->i8_minimum_gop_bits);
+ i4_num_frm_with_rmin++;
+ }
+ total_nbp_bits_allocated += ps_cur_gop->i8_bits_allocated_to_gop;
+ }
+ i4_num_loop_inter_GOP_alloc++;
+ /*check for tolerance of 0.5% in terms of meeting bitrate, terminate the loop when bitrate is met*/
+ total_bits_allocated = total_nbp_bits_allocated + total_bp_bits_allocated;
+ if((total_bits_allocated < (1.005 * i8_tot_bits_sequence) &&
+ total_bits_allocated > (0.995 * i8_tot_bits_sequence)) ||
+ (i4_num_loop_inter_GOP_alloc > MAX_LOOP_INTER_GOP_ALLOC) /*|| (cur_peak_factor <= 1 )*/)
+ {
+ float error_bits = ((float)i8_tot_bits_sequence - total_bits_allocated);
+ WORD32 temp_i;
+ float f_per_frm_bits = ((float)(i8_current_bitrate)) / (i8_frame_rate / 1000);
+ //cur_peak_factor *= (float)i8_tot_bits_sequence/total_bits_allocated;
+ if((i4_comp_error == 1) || ((i4_comp_error == 0) && (error_bits < 0)))
+ {
+ for(temp_i = i4_start_gop_number; temp_i < i4_num_gop; temp_i++)
+ {
+ ps_cur_gop = (gop_level_stat_t *)((gop_level_stat_t *)pv_gop_stat + temp_i);
+ ps_cur_gop->i8_bits_allocated_to_gop += (LWORD64)(
+ (error_bits * ps_cur_gop->i8_bits_allocated_to_gop / total_bits_allocated));
+ }
+ }
+ for(temp_i = i4_start_gop_number; temp_i < i4_num_gop; temp_i++)
+ {
+ ps_cur_gop = (gop_level_stat_t *)((gop_level_stat_t *)pv_gop_stat + temp_i);
+ ps_cur_gop->f_avg_complexity_factor = (ps_cur_gop->f_bits_complexity_l1_based /
+ ps_cur_gop->i8_bits_allocated_to_gop) *
+ (f_per_frm_bits) *
+ (ps_cur_gop->i4_tot_frm_in_gop);
+ }
+ break;
+ }
+ else
+ {
+ /*Go for next iteration*/
+ cur_peak_factor *= (float)i8_tot_bits_sequence / total_bits_allocated;
+ //cur_peak_factor = MAX(cur_peak_factor,1);
+ prev_total_bits_allocated = total_bits_allocated;
+ i8_excess_bits = i8_tot_bits_sequence - total_bits_allocated;
+ }
+
+ } while(1);
+ ps_bit_allocation->f_cur_peak_factor_2pass = cur_peak_factor;
+ ps_bit_allocation->i8_total_bits_allocated = total_bits_allocated;
+
+ /*Store complexity beyond which bits are clipped to peak rate*/
+ /*if(i4_start_gop_number == 0)*/
+ {
+ ps_bit_allocation->f_min_complexity_cross_peak_rate = /*min_complexity_beyond_peak*/
+ (float)ps_bit_allocation->ai4_peak_bit_rate[0] / i8_current_bitrate;
+ //ba_get_min_complexity_for_peak_br(ps_bit_allocation->ai4_peak_bit_rate[0],ps_bit_allocation->i4_bit_rate,cur_peak_factor,f_max_complexity,f_min_complexity,ps_bit_allocation->i4_ba_rc_pass);
+ }
+}
+
+/*****************************************************************************
+ Function Name : get_prev_frame_total_header_bits
+ Description :
+ Inputs : ps_bit_allocation
+ e_pic_type
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+void get_prev_frame_total_header_bits(
+ bit_allocation_t *ps_bit_allocation,
+ WORD32 *pi4_prev_frame_total_bits,
+ WORD32 *pi4_prev_frame_header_bits,
+ picture_type_e e_pic_type)
+{
+ *pi4_prev_frame_total_bits = ps_bit_allocation->ai4_prev_frm_tot_bits[e_pic_type];
+ *pi4_prev_frame_header_bits = ps_bit_allocation->i4_prev_frm_header_bits[e_pic_type];
+}
+
+/*****************************************************************************
+ Function Name : bit_alloc_get_gop_num
+ Description :
+ Inputs : ps_bit_allocation
+
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+LWORD64 bit_alloc_get_gop_num(bit_allocation_t *ps_bit_allocation)
+{
+ return (ps_bit_allocation->i8_cur_gop_num);
+}
+/*****************************************************************************
+ Function Name : ba_get_min_bits_per_frame
+ Description :
+ Inputs : ps_bit_allocation
+
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+WORD32 ba_get_min_bits_per_frame(bit_allocation_t *ps_bit_allocation)
+{
+ return (ps_bit_allocation->i4_min_bits_per_frm);
+}
+/*****************************************************************************
+ Function Name : set_bit_allocation_i_frames
+ Description :
+ Inputs : ps_bit_allocation
+
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+void set_bit_allocation_i_frames(
+ bit_allocation_t *ps_bit_allocation,
+ cbr_buffer_handle ps_cbr_buffer,
+ pic_handling_handle ps_pic_handle,
+ WORD32 i4_lap_window_comp,
+ WORD32 i4_num_frames)
+{
+ LWORD64 vbv_buffer_based_excess = 0;
+ WORD32 i4_gop_correction;
+ WORD32 i4_lap_window_comp_temp = i4_lap_window_comp;
+ rc_type_e e_rc_type = get_rc_type(ps_cbr_buffer);
+ if(e_rc_type == VBR_STREAMING)
+ {
+ if(((float)i4_lap_window_comp / 128) > ps_bit_allocation->f_min_complexity_cross_peak_rate)
+ i4_lap_window_comp_temp =
+ (WORD32)(ps_bit_allocation->f_min_complexity_cross_peak_rate * 128);
+
+ /*Get excess bits if any from vbv buffer*/
+ vbv_buffer_based_excess = get_vbv_buffer_based_excess(
+ ps_cbr_buffer,
+ ps_bit_allocation->f_min_complexity_cross_peak_rate,
+ ((float)i4_lap_window_comp / 128),
+ i4_num_frames,
+ 1);
+ }
+ i4_gop_correction =
+ get_error_bits_for_desired_buf(ps_cbr_buffer, i4_lap_window_comp_temp, i4_num_frames);
+
+ update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handle, 0);
+
+ set_rbip(&ps_bit_allocation->s_rbip, (i4_gop_correction + (WORD32)vbv_buffer_based_excess));
+
+ update_rbip(&ps_bit_allocation->s_rbip, ps_pic_handle, 0);
+}
+
+/*****************************************************************************
+ Function Name : bit_alloc_set_curr_i_to_sum_i
+ Description :
+ Inputs : ps_bit_allocation
+
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+void bit_alloc_set_curr_i_to_sum_i(bit_allocation_t *ps_bit_allocation, float f_curr_i_to_sum)
+{
+ ps_bit_allocation->f_curr_i_to_sum = f_curr_i_to_sum;
+}
+
+/*****************************************************************************
+ Function Name : ba_set_gop_stat_in_bit_alloc
+ Description :
+ Inputs : ps_bit_allocation
+
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+void ba_set_gop_stat_in_bit_alloc(bit_allocation_t *ps_bit_allocation, void *pv_gop_stat_summary)
+{
+ ps_bit_allocation->pv_gop_stat = pv_gop_stat_summary;
+}
+/*****************************************************************************
+ Function Name : ba_get_luma_pels
+ Description :
+ Inputs : ps_bit_allocation
+
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+WORD32 ba_get_luma_pels(bit_allocation_t *ps_bit_allocation)
+{
+ return (ps_bit_allocation->i4_luma_pels);
+}
+/*****************************************************************************
+ Function Name : overflow_avoided_summation
+ Description :
+ Inputs : ps_bit_allocation
+
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+void overflow_avoided_summation(WORD32 *pi4_accumulator, WORD32 i4_input)
+{
+ if((pi4_accumulator[0] > 0) && (((int)0x7fffffff - pi4_accumulator[0]) < i4_input))
+ pi4_accumulator[0] = 0x7fffffff;
+ else if((pi4_accumulator[0] < 0) && (((int)0x80000000 - pi4_accumulator[0]) > i4_input))
+ pi4_accumulator[0] = 0x80000000;
+ else
+ pi4_accumulator[0] += i4_input;
+}
+/*****************************************************************************
+ Function Name : ba_get_sum_complexity_segment_cross_peak
+ Description :
+ Inputs : ps_bit_allocation
+
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+float ba_get_sum_complexity_segment_cross_peak(bit_allocation_t *ps_bit_allocation)
+{
+ return (ps_bit_allocation->f_sum_complexity_segment_cross_peak);
+}
+/*****************************************************************************
+ Function Name : ba_get_prev_frame_tot_est_bits
+ Description :
+ Inputs : ps_bit_allocation
+
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+WORD32 ba_get_prev_frame_tot_est_bits(bit_allocation_t *ps_bit_allocation, WORD32 i4_pic)
+{
+ return (ps_bit_allocation->ai4_prev_frm_tot_est_bits[i4_pic]);
+}
+/*****************************************************************************
+ Function Name : ba_get_prev_frame_tot_bits
+ Description :
+ Inputs : ps_bit_allocation
+
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+WORD32 ba_get_prev_frame_tot_bits(bit_allocation_t *ps_bit_allocation, WORD32 i4_pic)
+{
+ return (ps_bit_allocation->ai4_prev_frm_tot_bits[i4_pic]);
+}
+/*****************************************************************************
+ Function Name : ba_gop_info_average_qscale_gop_without_offset
+ Description :
+ Inputs : ps_bit_allocation
+
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+float ba_gop_info_average_qscale_gop_without_offset(bit_allocation_t *ps_bit_allocation)
+{
+ gop_level_stat_t *ps_gop_level_stat =
+ (gop_level_stat_t *)ps_bit_allocation->pv_gop_stat + ps_bit_allocation->i8_cur_gop_num;
+
+ return (ps_gop_level_stat->f_hbd_avg_q_scale_gop_without_offset);
+}
+/*****************************************************************************
+ Function Name : ba_get_min_complexity_for_peak_br
+ Description : compute min complexity above which peak rate needs to be given
+ Inputs : i4_peak_bit_rate
+
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+float ba_get_min_complexity_for_peak_br(
+ WORD32 i4_peak_bit_rate,
+ WORD32 i4_bit_rate,
+ float f_peak_rate_factor,
+ float f_max_val,
+ float f_min_val,
+ WORD32 i4_pass)
+{
+ float f_target_bits_ratio = (float)i4_peak_bit_rate / i4_bit_rate;
+ float f_at_min_val;
+ float f_at_max_val;
+ float f_avg_val, f_at_avg_val;
+ WORD32 i4_iter = 0, i4_max_iter = 25;
+
+ f_avg_val = (f_max_val + f_min_val) / 2;
+ /*i4_target_bits_ratio = (-1.7561*(X*X*X*X) + ( 2.5547 * X * X * X) - 0.3408 * (X * X) + (0.5343 * X) - 0.003) * 10;*/
+ if(i4_pass != 2)
+ {
+ f_at_min_val = COMP_TO_BITS_MAP(f_min_val, f_peak_rate_factor);
+ f_at_max_val = COMP_TO_BITS_MAP(f_max_val, f_peak_rate_factor);
+ f_at_avg_val = COMP_TO_BITS_MAP(f_avg_val, f_peak_rate_factor);
+ }
+ else
+ {
+ f_at_min_val = COMP_TO_BITS_MAP_2_PASS(f_min_val, f_peak_rate_factor);
+ f_at_max_val = COMP_TO_BITS_MAP_2_PASS(f_max_val, f_peak_rate_factor);
+ f_at_avg_val = COMP_TO_BITS_MAP_2_PASS(f_avg_val, f_peak_rate_factor);
+ }
+
+ do
+ {
+ if((f_at_min_val < f_target_bits_ratio) && (f_target_bits_ratio < f_at_avg_val))
+ {
+ f_max_val = f_avg_val;
+ }
+ else
+ {
+ f_min_val = f_avg_val;
+ }
+ f_avg_val = (f_max_val + f_min_val) / 2;
+
+ /*i4_target_bits_ratio = (-1.7561*(X*X*X*X) + ( 2.5547 * X * X * X) - 0.3408 * (X * X) + (0.5343 * X) - 0.003) * 10;*/
+ if(i4_pass != 2)
+ {
+ f_at_min_val = COMP_TO_BITS_MAP(f_min_val, f_peak_rate_factor);
+ f_at_max_val = COMP_TO_BITS_MAP(f_max_val, f_peak_rate_factor);
+ f_at_avg_val = COMP_TO_BITS_MAP(f_avg_val, f_peak_rate_factor);
+ }
+ else
+ {
+ f_at_min_val = COMP_TO_BITS_MAP_2_PASS(f_min_val, f_peak_rate_factor);
+ f_at_max_val = COMP_TO_BITS_MAP_2_PASS(f_max_val, f_peak_rate_factor);
+ f_at_avg_val = COMP_TO_BITS_MAP_2_PASS(f_avg_val, f_peak_rate_factor);
+ }
+
+ if(((fabs((float)(f_at_avg_val - f_target_bits_ratio))) <= .0001f) ||
+ (i4_iter >= i4_max_iter))
+ {
+ break;
+ }
+ i4_iter++;
+ } while(1);
+
+ /*f_min_complexity_across_which pk br is given is unmapped value for 1 pass and mapped value for 2 pass*/
+ if(i4_pass != 2)
+ return (f_avg_val);
+ else
+ return (f_at_avg_val);
+}
+/*****************************************************************************
+ Function Name : get_f_curr_by_sum_subgop
+ Description :
+ Inputs : ps_bit_allocation
+
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+float get_f_curr_by_sum_subgop(bit_allocation_t *ps_bit_allocation)
+{
+ return (ps_bit_allocation->f_curr_by_sum_subgop);
+}
+/*****************************************************************************
+ Function Name : ba_get_frame_number_in_gop
+ Description :
+ Inputs : ps_bit_allocation
+
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+WORD32 ba_get_frame_number_in_gop(bit_allocation_t *ps_bit_allocation)
+{
+ return ((WORD32)(ps_bit_allocation->i8_frm_num_in_gop));
+}
+/*****************************************************************************
+ Function Name : ba_get_qscale_max_clip_in_second_pass
+ Description :
+ Inputs : ps_bit_allocation
+
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+float ba_get_qscale_max_clip_in_second_pass(bit_allocation_t *ps_bit_allocation)
+{
+ return (ps_bit_allocation->f_qscale_max_clip_in_second_pass);
+}
+/*****************************************************************************
+ Function Name : ba_set_avg_qscale_first_pass
+ Description :
+ Inputs : ps_bit_allocation
+
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+void ba_set_avg_qscale_first_pass(
+ bit_allocation_t *ps_bit_allocation, float f_average_qscale_1st_pass)
+{
+ ps_bit_allocation->f_average_qscale_1st_pass = f_average_qscale_1st_pass;
+}
+/*****************************************************************************
+ Function Name : ba_set_max_avg_qscale_first_pass
+ Description :
+ Inputs : ps_bit_allocation
+
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+void ba_set_max_avg_qscale_first_pass(
+ bit_allocation_t *ps_bit_allocation, float f_average_qscale_1st_pass)
+{
+ ps_bit_allocation->f_max_average_qscale_1st_pass = f_average_qscale_1st_pass;
+}
+/*****************************************************************************
+ Function Name : ba_get_avg_qscale_first_pass
+ Description :
+ Inputs : ps_bit_allocation
+
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+float ba_get_avg_qscale_first_pass(bit_allocation_t *ps_bit_allocation)
+{
+ return (ps_bit_allocation->f_average_qscale_1st_pass);
+}
+/*****************************************************************************
+ Function Name : ba_get_max_avg_qscale_first_pass
+ Description :
+ Inputs : ps_bit_allocation
+
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+float ba_get_max_avg_qscale_first_pass(bit_allocation_t *ps_bit_allocation)
+{
+ return (ps_bit_allocation->f_max_average_qscale_1st_pass);
+}
+/*****************************************************************************
+ Function Name : bit_alloc_set_2pass_total_frames
+ Description :
+ Inputs : ps_bit_allocation
+
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+void bit_alloc_set_2pass_total_frames(
+ bit_allocation_t *ps_bit_allocation, WORD32 i4_total_2pass_frames)
+{
+ ps_bit_allocation->i4_total_2pass_frames = i4_total_2pass_frames;
+}
+/*****************************************************************************
+ Function Name : ba_get_2pass_total_frames
+ Description :
+ Inputs : ps_bit_allocation
+
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+WORD32 ba_get_2pass_total_frames(bit_allocation_t *ps_bit_allocation)
+{
+ return (ps_bit_allocation->i4_total_2pass_frames);
+}
+/*****************************************************************************
+ Function Name : ba_set_enable_look_ahead
+ Description :
+ Inputs : ps_bit_allocation
+
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+void ba_set_enable_look_ahead(bit_allocation_t *ps_bit_allocation, WORD32 i4_fp_bit_alloc_in_sp)
+{
+ ps_bit_allocation->i4_fp_bit_alloc_in_sp = i4_fp_bit_alloc_in_sp;
+}
diff --git a/encoder/bit_allocation.h b/encoder/bit_allocation.h
new file mode 100644
index 0000000..5fa1ca9
--- /dev/null
+++ b/encoder/bit_allocation.h
@@ -0,0 +1,333 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file bit_allocation.h
+*
+* \brief
+* This file contain bit processing function declarations
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _BIT_ALLOCATION_H_
+#define _BIT_ALLOCATION_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define MIN_THRESHOLD_VBV_GOP_ERROR (0.30)
+#define MAX_THRESHOLD_VBV_GOP_ERROR (0.80)
+#define MAX_THRESHOLD_VBV_FRM_ERROR (0.80)
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+typedef struct bit_allocation_t *bit_allocation_handle;
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+WORD32 bit_allocation_num_fill_use_free_memtab(
+ bit_allocation_handle *pps_bit_allocation,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+
+void init_bit_allocation(
+ bit_allocation_handle ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_num_intra_frm_interval, /* num such intervals */
+ WORD32 i4_bit_rate, /* num bits per second */
+ WORD32 i4_frm_rate, /* num frms in 1000 seconds */
+ WORD32 *i4_peak_bit_rate,
+ WORD32 i4_min_bitrate, /* The minimum bit rate that is to be satisfied for a gop */
+ WORD32 i4_pels_in_frame,
+ WORD32 i4_is_hbr,
+ WORD32 i4_num_active_pic_type,
+ WORD32 i4_lap_window,
+ WORD32 i4_field_pic,
+ WORD32 rc_pass,
+ WORD32 i4_luma_pels,
+ WORD32 i4_enable_look_ahead);
+
+LWORD64 ba_get_rbip_and_num_frames(
+ bit_allocation_handle ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ WORD32 *pi4_num_frames);
+void assign_complexity_coeffs(
+ bit_allocation_handle ps_bit_allocation, float af_sum_weigh[MAX_PIC_TYPE][3]);
+
+void init_prev_header_bits(
+ bit_allocation_handle ps_bit_allocation, pic_handling_handle ps_pic_handling);
+/* Estimates the number of texture bits required by the current frame */
+WORD32 get_cur_frm_est_texture_bits(
+ bit_allocation_handle ps_bit_allocation,
+ rc_rd_model_handle *pps_rd_model,
+ est_sad_handle ps_est_sad,
+ pic_handling_handle ps_pic_handling,
+ cbr_buffer_handle ps_cbr_buffer,
+ picture_type_e e_pic_type,
+ WORD32 i4_use_model,
+ WORD32 i4_is_scd_frame,
+ WORD32 i4_call_type,
+ float i_to_avg_ratio,
+ WORD32 i4_is_model_valid);
+
+WORD32 bit_alloc_get_intra_bits(
+ bit_allocation_handle ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ cbr_buffer_handle ps_cbr_buf_handling,
+ picture_type_e e_pic_type,
+ number_t *pvq_complexity_estimate,
+ WORD32 i4_is_scd,
+ float scd_ratio,
+ WORD32 i4_call_type,
+ WORD32 i4_non_I_scd,
+ float f_percent_head_bits);
+
+/* Estimate the number of header bits required by the current frame */
+WORD32
+ get_cur_frm_est_header_bits(bit_allocation_handle ps_bit_allocation, picture_type_e e_pic_type);
+
+/* Get the remaining bits allocated in the period */
+WORD32 get_rem_bits_in_period(
+ bit_allocation_handle ps_bit_allocation, pic_handling_handle ps_pic_handling);
+
+WORD32 ba_get_frame_rate(bit_allocation_handle ps_bit_allocation);
+
+WORD32 get_bits_per_frame(bit_allocation_handle ps_bit_allocation);
+
+WORD32 ba_get_bit_rate(bit_allocation_handle ps_bit_allocation);
+void ba_get_peak_bit_rate(bit_allocation_handle ps_bit_allocation, WORD32 *pi4_peak_bit_rate);
+
+LWORD64 ba_get_buffer_play_bits_for_cur_gop(bit_allocation_handle ps_bit_allocation);
+LWORD64 ba_get_gop_bits(bit_allocation_handle ps_bit_allocation);
+LWORD64 ba_get_gop_sad(bit_allocation_handle ps_bit_allocation);
+
+/* Updates the bit allocation module with the actual encoded values */
+void update_cur_frm_consumed_bits(
+ bit_allocation_handle ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ cbr_buffer_handle ps_cbr_buf_handle,
+ WORD32 i4_total_frame_bits,
+ WORD32 i4_model_updation_hdr_bits,
+ picture_type_e e_pic_type,
+ UWORD8 u1_is_scd,
+ WORD32 i4_last_frm_in_gop,
+ WORD32 i4_lap_comp_bits_reset,
+ WORD32 i4_suppress_bpic_update,
+ WORD32 i4_buffer_based_bit_error,
+ WORD32 i4_stuff_bits,
+ WORD32 i4_lap_window_comp,
+ rc_type_e e_rc_type,
+ WORD32 i4_num_gop,
+ WORD32 i4_is_pause_to_resume,
+ WORD32 i4_est_text_bits_ctr_update_qp,
+ WORD32 *pi4_gop_correction,
+ WORD32 *pi4_new_correction);
+
+void check_and_update_bit_allocation(
+ bit_allocation_handle ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_max_bits_inflow_per_frm);
+
+/* Based on the change in frame/bit rate update the remaining bits in period */
+void change_remaining_bits_in_period(
+ bit_allocation_handle ps_bit_allocation,
+ WORD32 i4_bit_rate,
+ WORD32 i4_frame_rate,
+ WORD32 *i4_peak_bit_rate);
+
+/* Change the gop size in the middle of a current gop */
+void change_gop_size(
+ bit_allocation_handle ps_bit_allocation,
+ WORD32 i4_intra_frm_interval,
+ WORD32 i4_inter_frm_interval,
+ WORD32 i4_num_intra_frm_interval);
+
+void update_rem_frms_in_period(
+ bit_allocation_handle ps_bit_allocation,
+ picture_type_e e_pic_type,
+ UWORD8 u1_is_first_frm,
+ WORD32 i4_intra_frm_interval,
+ WORD32 i4_num_intra_frm_interval);
+
+void change_rem_bits_in_prd_at_force_I_frame(
+ bit_allocation_handle ps_bit_allocation, pic_handling_handle ps_pic_handling);
+
+void change_ba_peak_bit_rate(bit_allocation_handle ps_bit_allocation, WORD32 *ai4_peak_bit_rate);
+
+void init_intra_header_bits(bit_allocation_handle ps_bit_allocation, WORD32 i4_intra_header_bits);
+WORD32 get_prev_header_bits(bit_allocation_handle ps_bit_allocation, WORD32 pic_type);
+void set_Kp_Kb_for_hi_motion(bit_allocation_handle ps_bit_allocation);
+
+void ba_get_qp_offset_offline_data(
+ WORD32 ai4_offsets[5],
+ WORD32 i4_ratio,
+ float f_ratio,
+ WORD32 i4_num_active_pic_type,
+ WORD32 *pi4_complexity_bin);
+
+void reset_Kp_Kb(
+ bit_allocation_handle ps_bit_allocation,
+ float f_i_to_avg_ratio,
+ WORD32 i4_num_active_pic_type,
+ float f_hme_sad_per_pixel,
+ float f_max_hme_sad_per_pixel,
+ WORD32 *pi4_complexity_bin,
+ WORD32 i4_rc_pass);
+
+WORD32 get_Kp_Kb(bit_allocation_handle ps_bit_allocation, picture_type_e e_pic_type);
+
+/*get total bits for scene cut frame*/
+WORD32 get_scene_change_tot_frm_bits(
+ bit_allocation_handle ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ cbr_buffer_handle ps_cbr_buf_handling,
+ WORD32 i4_num_pixels,
+ WORD32 i4_f_sim_lap,
+ float i_to_avg_rest,
+ WORD32 i4_call_type,
+ WORD32 i4_non_I_scd,
+ WORD32 i4_is_infinite_gop);
+
+void update_estimate_status(
+ bit_allocation_handle ps_bit_allocation,
+ WORD32 i4_est_texture_bits,
+ WORD32 i4_hdr_bits,
+ WORD32 i4_est_text_bits_ctr_get_qp);
+
+void bit_allocation_set_num_scd_lap_window(
+ bit_allocation_handle ps_bit_allocation,
+ WORD32 i4_num_scd_in_lap_window,
+ WORD32 i4_next_sc_i_in_rc_look_ahead);
+
+void bit_allocation_set_sc_i_in_rc_look_ahead(
+ bit_allocation_handle ps_bit_allocation, WORD32 i4_num_scd_in_lap_window);
+
+/*updates gop based bit error entropy and rdopt estimate*/
+void bit_allocation_update_gop_level_bit_error(
+ bit_allocation_handle ps_bit_allocation, WORD32 i4_error_bits);
+/*
+The parsing of stat file is done at the end of init (by that time bit allocation init would have already happened,
+The memory for gop stat data is alocated inside the parse stat file code. Hence the pointer has to be updated again
+*/
+
+void ba_init_stat_data(
+ bit_allocation_handle ps_bit_allocation,
+ pic_handling_handle ps_pic_handling,
+ void *pv_gop_stat,
+ WORD32 *pi4_pic_dist_in_cur_gop,
+ WORD32 i4_total_bits_in_period,
+ WORD32 i4_excess_bits);
+
+void get_prev_frame_total_header_bits(
+ bit_allocation_handle ps_bit_allocation,
+ WORD32 *pi4_prev_frame_total_bits,
+ WORD32 *pi4_prev_frame_header_bits,
+ picture_type_e e_pic_type);
+
+void rc_update_bit_distribution_gop_level_2pass(
+ bit_allocation_handle ps_bit_allocation,
+ pic_handling_handle ps_pic_handle,
+ void *pv_gop_stat,
+ rc_type_e e_rc_type,
+ WORD32 i4_num_gop,
+ WORD32 i4_start_gop_number,
+ float f_avg_qscale_first_pass,
+ WORD32 i4_max_ebf,
+ WORD32 i4_ebf,
+ LWORD64 i8_tot_bits_sequence,
+ WORD32 i4_comp_error);
+
+LWORD64 bit_alloc_get_gop_num(bit_allocation_handle ps_bit_allocation);
+
+float get_cur_peak_factor_2pass(bit_allocation_handle ps_bit_allocation);
+float get_cur_min_complexity_factor_2pass(bit_allocation_handle ps_bit_allocation);
+
+void set_2pass_total_gops(bit_allocation_handle ps_bit_allocation, WORD32 i4_num_gop);
+WORD32 ba_get_min_bits_per_frame(bit_allocation_handle ps_bit_allocation);
+
+void set_bit_allocation_i_frames(
+ bit_allocation_handle ps_bit_allocation,
+ cbr_buffer_handle ps_cbr_buffer,
+ pic_handling_handle ps_pic_handle,
+ WORD32 i4_lap_window_comp,
+ WORD32 i4_num_frames);
+
+void bit_alloc_set_curr_i_to_sum_i(bit_allocation_handle ps_bit_allocation, float f_i_to_sum);
+
+void ba_set_gop_stat_in_bit_alloc(
+ bit_allocation_handle ps_bit_allocation, void *pv_gop_stat_summary);
+
+WORD32 ba_get_luma_pels(bit_allocation_handle ps_bit_allocation);
+
+void overflow_avoided_summation(WORD32 *pi4_accumulator, WORD32 i4_input);
+
+float ba_get_sum_complexity_segment_cross_peak(bit_allocation_handle ps_bit_allocation);
+
+WORD32 ba_get_prev_frame_tot_est_bits(bit_allocation_handle ps_bit_allocation, WORD32 i4_pic);
+
+WORD32 ba_get_prev_frame_tot_bits(bit_allocation_handle ps_bit_allocation, WORD32 i4_pic);
+
+void ba_set_avg_qscale_first_pass(
+ bit_allocation_handle ps_bit_allocation, float f_average_qscale_1st_pass);
+
+void ba_set_max_avg_qscale_first_pass(
+ bit_allocation_handle ps_bit_allocation, float f_average_qscale_1st_pass);
+
+float ba_get_max_avg_qscale_first_pass(bit_allocation_handle ps_bit_allocation);
+
+float ba_get_avg_qscale_first_pass(bit_allocation_handle ps_bit_allocation);
+
+float ba_get_min_complexity_for_peak_br(
+ WORD32 i4_peak_bit_rate,
+ WORD32 i4_bit_rate,
+ float f_peak_rate_factor,
+ float f_max_val,
+ float f_min_val,
+ WORD32 i4_pass);
+
+float ba_gop_info_average_qscale_gop_without_offset(bit_allocation_handle ps_bit_allocation);
+
+float ba_get_qscale_max_clip_in_second_pass(bit_allocation_handle ps_bit_allocation);
+
+float ba_gop_info_average_qscale_gop(bit_allocation_handle ps_bit_allocation);
+WORD32 ba_get_frame_number_in_gop(bit_allocation_handle ps_bit_allocation);
+
+void bit_alloc_set_2pass_total_frames(
+ bit_allocation_handle ps_bit_allocation, WORD32 i4_total_2pass_frames);
+
+WORD32 ba_get_2pass_total_frames(bit_allocation_handle ps_bit_allocation);
+
+WORD32 ba_get_2pass_bit_rate(bit_allocation_handle ps_bit_allocation);
+
+void ba_set_2pass_bit_rate(bit_allocation_handle ps_bit_allocation, WORD32 i4_2pass_bit_rate);
+
+void ba_set_2pass_avg_bit_rate(
+ bit_allocation_handle ps_bit_allocation, LWORD64 i8_2pass_avg_bit_rate);
+
+void ba_set_enable_look_ahead(bit_allocation_handle ps_bit_allocation, WORD32 i4_enable_look_ahead);
+#endif
diff --git a/encoder/cast_types.h b/encoder/cast_types.h
new file mode 100644
index 0000000..ca8021c
--- /dev/null
+++ b/encoder/cast_types.h
@@ -0,0 +1,85 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : cast_types.h */
+/* */
+/* Description : This file contains all the necessary constants and */
+/* type definitions according to CAST specifications. */
+/* */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 28 09 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+#ifndef CAST_TYPES_H
+#define CAST_TYPES_H
+
+#include "ihevc_typedefs.h"
+
+/*****************************************************************************/
+/* Constants */
+/*****************************************************************************/
+
+/* The following definitions indicates the input parameter / argument state */
+
+/* Parameter declared with IN, will be used to hold INput value */
+#define IN
+
+/* Parameter declared with OUT, will be used to hold OUTput value */
+#define OUT
+
+/* Parameter declared with INOUT, will have INput value and will hold */
+/* OUTput value */
+#define INOUT
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/* Typedef's for built-in datatypes */
+
+typedef float FLOAT;
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/* Defined to hold the unsigned 64 bit data */
+typedef struct
+{
+ UWORD32 lsw; /* Holds lower 32 bits */
+ UWORD32 msw; /* Holds upper 32 bits */
+} UWORD64;
+
+/* Defined to hold the signed 64 bit data */
+typedef struct
+{
+ UWORD32 lsw; /* Holds lower 32 bits */
+ WORD32 msw; /* Holds upper 32 bits */
+} WORD64;
+
+#endif /* CAST_TYPES_H */
diff --git a/encoder/cbr_buffer_control.c b/encoder/cbr_buffer_control.c
new file mode 100644
index 0000000..29aebf3
--- /dev/null
+++ b/encoder/cbr_buffer_control.c
@@ -0,0 +1,1384 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file cbr_buffer_control.c
+*
+* \brief
+* This file contains all functions needed for cbr buffer control
+* \date
+* 06/05/2008
+*
+* \author
+* ittiam
+*
+* \List of Functions
+* init_cbr_buffer
+* cbr_buffer_constraint_check
+* get_cbr_buffer_status
+* update_cbr_buffer
+*
+******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+
+/* User include files */
+#include "assert.h"
+#include "ittiam_datatypes.h"
+#include "rc_cntrl_param.h"
+#include "rc_common.h"
+#include "mem_req_and_acq.h"
+#include "fixed_point_error_bits.h"
+#include "cbr_buffer_control.h"
+#include "trace_support.h"
+#include "var_q_operator.h"
+
+#define MIN(x, y) ((x) < (y)) ? (x) : (y)
+/*allow a maximum of 20 percent deviation when input is very large*/
+#define VBR_MAX_BIT_DEV_SEC 50LL
+
+typedef struct cbr_buffer_t
+{
+ WORD32 i4_buffer_size; /* Buffer size = Delay * Bitrate*/
+ WORD32
+ i4_drain_bits_per_frame[MAX_NUM_DRAIN_RATES]; /* Constant drain rate */
+ WORD32 i4_ebf; /* Encoder Buffer Fullness */
+ LWORD64
+ i8_ebf_bit_alloc; /* current encoder buffer fulness that accounts precise bit consumption (not truncated to max buffer size at skip)*/
+ LWORD64 i8_credit_level;
+ WORD32 i4_upr_thr[MAX_PIC_TYPE]; /* Upper threshold of the Buffer */
+ WORD32 i4_low_thr[MAX_PIC_TYPE]; /* Lower threshold of the Buffer */
+ error_bits_handle
+ aps_bpf_error_bits[MAX_NUM_DRAIN_RATES]; /* For error due to bits per frame calculation */
+ WORD32
+ i4_is_cbr_mode; /* Whether the buffer model is used for CBR or VBR streaming */
+ /* Input parameters stored for initialisation */
+ WORD32 ai4_bit_rate[MAX_NUM_DRAIN_RATES];
+ WORD32 i4_max_delay;
+ WORD32 ai4_num_pics_in_delay_period[MAX_PIC_TYPE];
+ WORD32 i4_tgt_frm_rate;
+ UWORD32 u4_max_vbv_buf_size;
+ WORD32 i4_peak_drain_rate_frame;
+ WORD32 u4_num_frms_in_delay;
+ UWORD32 u4_vbr_max_bit_deviation;
+ rc_type_e e_rc_type;
+ WORD32 i4_vbr_no_peak_rate_duration_limit;
+ LWORD64 i8_tot_frm_to_be_encoded;
+ LWORD64
+ i8_num_frames_encoded; /*need to track the number of frames encoded to calculate possible deviaiton allowed*/
+ WORD32 i4_cbr_rc_pass;
+ WORD32 i4_inter_frame_int;
+ WORD32 i4_intra_frame_int;
+ WORD32 i4_capped_vbr_on;
+ float f_max_dur_peak_rate;
+ LWORD64 i4_ebf_estimate;
+} cbr_buffer_t;
+
+#if NON_STEADSTATE_CODE
+WORD32 cbr_buffer_num_fill_use_free_memtab(
+ cbr_buffer_t **pps_cbr_buffer, itt_memtab_t *ps_memtab, ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0, i;
+ static cbr_buffer_t s_cbr_buffer_temp;
+
+ /* Hack for al alloc, during which we dont have any state memory.
+ Dereferencing can cause issues */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_cbr_buffer) = &s_cbr_buffer_temp;
+
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(
+ &ps_memtab[i4_mem_tab_idx], sizeof(cbr_buffer_t), MEM_TAB_ALIGNMENT, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void **)pps_cbr_buffer, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ i4_mem_tab_idx += error_bits_num_fill_use_free_memtab(
+ &pps_cbr_buffer[0]->aps_bpf_error_bits[i], &ps_memtab[i4_mem_tab_idx], e_func_type);
+ }
+ return (i4_mem_tab_idx);
+}
+static void set_upper_lower_vbv_threshold(cbr_buffer_t *ps_cbr_buffer, WORD32 i4_bits_per_frm)
+{
+ WORD32 i;
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_cbr_buffer->i4_upr_thr[i] =
+ (WORD32)(((LWORD64)ps_cbr_buffer->i4_buffer_size >> 4) * UPPER_THRESHOLD_EBF_Q4);
+ if(ps_cbr_buffer->e_rc_type == VBR_STREAMING)
+ {
+ /*lower threshold can be zero as there is no problem of studffing in this mode (VBR STORAGE)*/
+ if(ps_cbr_buffer->i4_vbr_no_peak_rate_duration_limit)
+ ps_cbr_buffer->i4_low_thr[i] = 0;
+ else
+ ps_cbr_buffer->i4_low_thr[i] = ps_cbr_buffer->i4_inter_frame_int * i4_bits_per_frm;
+ }
+ else
+ {
+ if(ps_cbr_buffer->i4_inter_frame_int == 1)
+ ps_cbr_buffer->i4_low_thr[i] = 0;
+ else
+ {
+ ps_cbr_buffer->i4_low_thr[i] = ps_cbr_buffer->i4_inter_frame_int * i4_bits_per_frm;
+ }
+ }
+ /*For huge buffer low limit can be higher*/
+
+ if(ps_cbr_buffer->i4_low_thr[i] < (ps_cbr_buffer->i4_buffer_size >> 6))
+ ps_cbr_buffer->i4_low_thr[i] = (ps_cbr_buffer->i4_buffer_size >> 6);
+
+ if(ps_cbr_buffer->i4_low_thr[i] > (ps_cbr_buffer->i4_buffer_size >> 3)) //KISH_DEBUG
+ ps_cbr_buffer->i4_low_thr[i] = (ps_cbr_buffer->i4_buffer_size >> 3);
+ ASSERT(ps_cbr_buffer->i4_upr_thr[i] > ps_cbr_buffer->i4_low_thr[i]);
+ }
+}
+/* ******************************************************************************/
+/**
+ * @brief Initialise the CBR VBV buffer state.
+ * This could however be used for VBR streaming VBV also
+ *
+ * @param ps_cbr_buffer
+ * @param i4_buffer_delay
+ * @param i4_tgt_frm_rate
+ * @param i4_bit_rate
+ * @param u4_num_pics_in_delay_prd
+ * @param u4_vbv_buf_size
+ */
+/* ******************************************************************************/
+void init_cbr_buffer(
+ cbr_buffer_t *ps_cbr_buffer,
+ WORD32 i4_buffer_delay,
+ WORD32 i4_tgt_frm_rate,
+ UWORD32 u4_bit_rate,
+ UWORD32 *u4_num_pics_in_delay_prd,
+ UWORD32 u4_vbv_buf_size,
+ UWORD32 u4_intra_frm_int,
+ rc_type_e e_rc_type,
+ UWORD32 u4_peak_bit_rate,
+ UWORD32 u4_num_frames_in_delay,
+ float f_max_dur_peak_rate,
+ LWORD64 i8_num_frames_to_encode,
+ WORD32 i4_inter_frm_int,
+ WORD32 i4_cbr_rc_pass,
+ WORD32 i4_capped_vbr_flag)
+
+{
+ WORD32 i4_bits_per_frm[MAX_NUM_DRAIN_RATES];
+ int i;
+
+ /* Initially Encoder buffer fullness is zero */
+ ps_cbr_buffer->i4_ebf = 0;
+ ps_cbr_buffer->i4_ebf_estimate = 0;
+ ps_cbr_buffer->i8_ebf_bit_alloc = 0;
+ ps_cbr_buffer->i8_credit_level = 0;
+ ps_cbr_buffer->e_rc_type = e_rc_type;
+ ps_cbr_buffer->i4_capped_vbr_on = i4_capped_vbr_flag;
+ /*If this is set to 1, it acts similar to storage VBR which allows peak rate to be sustained for infinite duration*/
+ ps_cbr_buffer->i4_vbr_no_peak_rate_duration_limit = 0;
+ ps_cbr_buffer->i8_num_frames_encoded = 0;
+ ps_cbr_buffer->i8_tot_frm_to_be_encoded = i8_num_frames_to_encode;
+ ps_cbr_buffer->i4_cbr_rc_pass = i4_cbr_rc_pass;
+ ps_cbr_buffer->i4_inter_frame_int = i4_inter_frm_int;
+ ps_cbr_buffer->i4_intra_frame_int = u4_intra_frm_int;
+ ps_cbr_buffer->f_max_dur_peak_rate = f_max_dur_peak_rate;
+
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ X_PROD_Y_DIV_Z(u4_bit_rate, 1000, i4_tgt_frm_rate, i4_bits_per_frm[i]);
+ /* Drain rate = bitrate/(framerate/1000) */
+ ps_cbr_buffer->i4_drain_bits_per_frame[i] = i4_bits_per_frm[i];
+ /* initialise the bits per frame error bits calculation */
+ init_error_bits(ps_cbr_buffer->aps_bpf_error_bits[i], i4_tgt_frm_rate, u4_bit_rate);
+ }
+
+ /* Bitrate * delay = buffer size, divide by 1000 as delay is in ms*/
+ if(e_rc_type == CBR_NLDRC) /* This would mean CBR mode */
+ {
+ //buffer size should be independent of initial delay
+ //X_PROD_Y_DIV_Z(u4_bit_rate,i4_buffer_delay,1000,ps_cbr_buffer->i4_buffer_size);
+ ps_cbr_buffer->i4_buffer_size = (WORD32)u4_vbv_buf_size;
+ ps_cbr_buffer->i4_is_cbr_mode = 1;
+ ps_cbr_buffer->i4_peak_drain_rate_frame = i4_bits_per_frm[0];
+ /*In CBR the max file size deviaiton allowed is specified by buffer size*/
+ ps_cbr_buffer->u4_vbr_max_bit_deviation = ps_cbr_buffer->i4_buffer_size;
+ }
+ else if(e_rc_type == VBR_STREAMING)
+ {
+ /*this is raw vbv buffer size, also initilize the buffer window to bit alloc (credit limit)*/
+ ps_cbr_buffer->i4_buffer_size = (WORD32)u4_vbv_buf_size;
+ /*if there is no limit on duration for which peak bitrate can be sustained, bits can be moved from any region to other region
+ giving better quality*/
+ if(f_max_dur_peak_rate < 0)
+ ps_cbr_buffer->i4_vbr_no_peak_rate_duration_limit = 1;
+ /*To avoid file size deviation in case of VBR mode of rate control, clip the max deviaiton allowed based on number of frames to enode*/
+ {
+ ULWORD64 u8_vbr_max_bit_deviation;
+ ULWORD64 file_size = (ULWORD64)(
+ (((LWORD64)u4_bit_rate * 1000) / i4_tgt_frm_rate) * i8_num_frames_to_encode);
+
+ /*When f_max_dur_peak_rate is -ve, it implies user is not worried about duration for which peak is sustained, hence go with max possible value*/
+ if(f_max_dur_peak_rate > 0)
+ u8_vbr_max_bit_deviation = (ULWORD64)(f_max_dur_peak_rate * u4_bit_rate);
+ else
+ u8_vbr_max_bit_deviation = (ULWORD64)(VBR_MAX_BIT_DEV_SEC * u4_bit_rate);
+
+ /*when num frames to encode is negative is -ve it implies total frames data is not available (as in case of live encoding)*/
+ if(i8_num_frames_to_encode > 0)
+ {
+ /*allow atleast one second deviation or 12% of total file size whichever is higher*/
+ if(u8_vbr_max_bit_deviation > (file_size >> 3))
+ u8_vbr_max_bit_deviation = (UWORD32)(file_size >> 3);
+
+ /*allow atleast one second for shorter sequence*/
+ if(u8_vbr_max_bit_deviation < u4_bit_rate)
+ u8_vbr_max_bit_deviation = u4_bit_rate;
+ }
+ else
+ {
+ /*the data of number of frames to be encoded is not available*/
+ /*start off with one second delay, this will be later adjusted once large number of frames are encoded*/
+ u8_vbr_max_bit_deviation = u4_bit_rate;
+ }
+ ps_cbr_buffer->u4_vbr_max_bit_deviation = u8_vbr_max_bit_deviation;
+ }
+ ps_cbr_buffer->i4_is_cbr_mode = 0;
+ X_PROD_Y_DIV_Z(
+ u4_peak_bit_rate, 1000, i4_tgt_frm_rate, ps_cbr_buffer->i4_peak_drain_rate_frame);
+ }
+ else
+ {
+ /*currently only two modes are supported*/
+ ASSERT(e_rc_type == CONST_QP);
+ }
+
+ if(ps_cbr_buffer->i4_buffer_size > (WORD32)u4_vbv_buf_size)
+ {
+ ps_cbr_buffer->i4_buffer_size = u4_vbv_buf_size;
+ }
+
+ /* Uppr threshold for
+ I frame = 1 * bits per frame
+ P Frame = 4 * bits per frame.
+ The threshold for I frame is only 1 * bits per frame as the threshold should
+ only account for error in estimated bits.
+ In P frame it should account for difference bets bits consumed by I(Scene change)
+ and P frame I to P complexity is assumed to be 5. */
+ /*HEVC_hierarchy*/
+ if(e_rc_type != CONST_QP)
+ set_upper_lower_vbv_threshold(ps_cbr_buffer, i4_bits_per_frm[0]);
+ /* Storing the input parameters for using it for change functions */
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ ps_cbr_buffer->ai4_bit_rate[i] = u4_bit_rate;
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_cbr_buffer->ai4_num_pics_in_delay_period[i] = u4_num_pics_in_delay_prd[i];
+ }
+ ps_cbr_buffer->i4_tgt_frm_rate = i4_tgt_frm_rate;
+ ps_cbr_buffer->i4_max_delay = i4_buffer_delay;
+ ps_cbr_buffer->u4_max_vbv_buf_size = u4_vbv_buf_size;
+ ps_cbr_buffer->u4_num_frms_in_delay = u4_num_frames_in_delay;
+}
+#endif /* #if NON_STEADSTATE_CODE */
+
+/* ******************************************************************************/
+/**
+ * @brief Condition check for constrining the number of bits allocated based on bufer size
+ *
+ * @param ps_cbr_buffer
+ * @param i4_tgt_bits
+ * @param e_pic_type
+ *
+ * @return
+ */
+/* ******************************************************************************/
+WORD32 cbr_buffer_constraint_check(
+ cbr_buffer_t *ps_cbr_buffer,
+ WORD32 i4_tgt_bits,
+ picture_type_e e_pic_type,
+ WORD32 *pi4_max_tgt_bits,
+ WORD32 *pi4_min_tgt_bits)
+{
+ WORD32 i4_max_tgt_bits, i4_min_tgt_bits;
+ WORD32 i4_drain_bits_per_frame = (e_pic_type == I_PIC)
+ ? ps_cbr_buffer->i4_drain_bits_per_frame[0]
+ : ps_cbr_buffer->i4_drain_bits_per_frame[1];
+ WORD32 i4_error_bits = (e_pic_type == I_PIC)
+ ? get_error_bits(ps_cbr_buffer->aps_bpf_error_bits[0])
+ : get_error_bits(ps_cbr_buffer->aps_bpf_error_bits[1]);
+
+ /*trace_printf(" ebf = %d bebf = %d ",ps_cbr_buffer->i4_ebf,ps_cbr_buffer->i8_ebf_bit_alloc);*/
+ /* Max tgt bits = Upper threshold - current encoder buffer fullness */
+ i4_max_tgt_bits =
+ (WORD32)(ps_cbr_buffer->i4_upr_thr[e_pic_type] - ps_cbr_buffer->i4_ebf_estimate);
+ /* Max tgt bits cannot be negative */
+ if(i4_max_tgt_bits < 0)
+ i4_max_tgt_bits = 0;
+
+ /* Min tgt bits , least number of bits in the Encoder after
+ draining such that it is greater than lower threshold */
+ i4_min_tgt_bits = (WORD32)(
+ ps_cbr_buffer->i4_low_thr[e_pic_type] -
+ (ps_cbr_buffer->i4_ebf_estimate - i4_drain_bits_per_frame - i4_error_bits));
+ /*Min tgt bits cannot be negative*/
+ if(i4_min_tgt_bits < 0)
+ i4_min_tgt_bits = 0;
+
+ /* current tgt bits should be between max and min tgt bits*/
+ CLIP(i4_tgt_bits, i4_max_tgt_bits, i4_min_tgt_bits);
+ pi4_min_tgt_bits[0] = i4_min_tgt_bits;
+ pi4_max_tgt_bits[0] = i4_max_tgt_bits;
+ return i4_tgt_bits;
+}
+
+/* ******************************************************************************/
+/**
+ * @brief constaints the bit allocation based on buffer size
+ *
+ * @param ps_cbr_buffer
+ * @param i4_tgt_bits
+ * @param e_pic_type
+ *
+ * @return
+ */
+/* ******************************************************************************/
+WORD32 vbr_stream_buffer_constraint_check(
+ cbr_buffer_t *ps_cbr_buffer,
+ WORD32 i4_tgt_bits,
+ picture_type_e e_pic_type,
+ WORD32 *pi4_max_bits,
+ WORD32 *pi4_min_bits)
+{
+ WORD32 i4_max_tgt_bits, i4_min_tgt_bits = 0;
+
+ /* Max tgt bits = Upper threshold - current encoder buffer fullness */
+ /*maximum target for a pic is amount of bits that can be transmitted to decoder buffer in delay assuming max drain rate
+ This above limit has to be constrained wrt a single frame being accomodated in the buffer*/
+ i4_max_tgt_bits = (WORD32)(
+ (ps_cbr_buffer->u4_num_frms_in_delay * ps_cbr_buffer->i4_peak_drain_rate_frame) -
+ ps_cbr_buffer->i4_ebf_estimate);
+ /*the below check is necessary to make sure that a single frame to be accomodated in encoder buffer*/
+ if(i4_max_tgt_bits > ps_cbr_buffer->i4_upr_thr[e_pic_type] - ps_cbr_buffer->i4_ebf_estimate)
+ {
+ i4_max_tgt_bits =
+ (WORD32)(ps_cbr_buffer->i4_upr_thr[e_pic_type] - ps_cbr_buffer->i4_ebf_estimate);
+ }
+
+ /*In VBR streaming though encoder buffer underflow is not a problem, at any point of time the bitrate underconsumption
+ cannot go below specified limit. Hence it is limited based on possible bitrate deviation allowed*/
+ /*Enabling movement of stuffing bits always*/
+ if(ps_cbr_buffer->i4_vbr_no_peak_rate_duration_limit)
+ {
+ /*If the content has underconsumed force it to consume atleast per frame bits so that end of encoding there wont be too much undersonsumption*/
+ if(ps_cbr_buffer->i8_ebf_bit_alloc < 0 && ps_cbr_buffer->i4_cbr_rc_pass != 2)
+ i4_min_tgt_bits = (ps_cbr_buffer->i4_drain_bits_per_frame[0] >> 1);
+ }
+ else
+ {
+ /*In this case buffer is always guranteed to be positive, to avoid stuffing give decent amount of min bits*/
+ i4_min_tgt_bits = (WORD32)(ps_cbr_buffer->i4_low_thr[0] - ps_cbr_buffer->i8_ebf_bit_alloc);
+ }
+
+ /*Clip min target bit*/
+ if(i4_min_tgt_bits < 0)
+ i4_min_tgt_bits = 0;
+ if(i4_tgt_bits < i4_min_tgt_bits)
+ i4_tgt_bits = i4_min_tgt_bits;
+ pi4_min_bits[0] = i4_min_tgt_bits;
+ /* Max tgt bits cannot be negative */
+ if(i4_max_tgt_bits < 0)
+ i4_max_tgt_bits = 0;
+ if(i4_tgt_bits > i4_max_tgt_bits)
+ i4_tgt_bits = i4_max_tgt_bits;
+ pi4_max_bits[0] = i4_max_tgt_bits;
+
+ return i4_tgt_bits;
+}
+
+/* ******************************************************************************/
+/**
+ * @brief Verifies the buffer state and returns whether it is overflowing, underflowing or normal
+ *
+ * @param ps_cbr_buffer
+ * @param i4_tot_consumed_bits
+ * @param pi4_num_bits_to_prevent_overflow
+ * @param e_pic_type
+ *
+ * @return
+ */
+/* ******************************************************************************/
+vbv_buf_status_e get_cbr_buffer_status(
+ cbr_buffer_t *ps_cbr_buffer,
+ WORD32 i4_tot_consumed_bits,
+ WORD32 *pi4_num_bits_to_prevent_overflow,
+ picture_type_e e_pic_type,
+ rc_type_e e_rc_type)
+{
+ vbv_buf_status_e e_buf_status;
+ WORD32 i4_cur_enc_buf;
+ WORD32 i4_error_bits = (e_pic_type == I_PIC)
+ ? get_error_bits(ps_cbr_buffer->aps_bpf_error_bits[0])
+ : get_error_bits(ps_cbr_buffer->aps_bpf_error_bits[1]);
+ WORD32 i4_drain_bits_per_frame = (e_pic_type == I_PIC)
+ ? ps_cbr_buffer->i4_drain_bits_per_frame[0]
+ : ps_cbr_buffer->i4_drain_bits_per_frame[1];
+
+ /* Add the tot consumed bits to the Encoder Buffer*/
+ i4_cur_enc_buf = ps_cbr_buffer->i4_ebf + i4_tot_consumed_bits;
+
+ /* If the Encoder exceeds the Buffer Size signal an Overflow*/
+ if(i4_cur_enc_buf > ps_cbr_buffer->i4_buffer_size)
+ {
+ e_buf_status = VBV_OVERFLOW;
+ i4_cur_enc_buf = ps_cbr_buffer->i4_buffer_size;
+ }
+ else
+ {
+ /* Subtract the constant drain bits and error bits due to fixed point implementation*/
+ i4_cur_enc_buf -= (i4_drain_bits_per_frame + i4_error_bits);
+
+ if(e_rc_type == VBR_STREAMING)
+ {
+ /*In VBR suffing scenerio will not occur*/
+ if(i4_cur_enc_buf < 0)
+ i4_cur_enc_buf = 0;
+ }
+ /* If the buffer is less than stuffing threshold an Underflow is signaled else its NORMAL*/
+ if(i4_cur_enc_buf < 0)
+ {
+ e_buf_status = VBV_UNDERFLOW;
+ }
+ else
+ {
+ e_buf_status = VBV_NORMAL;
+ }
+
+ if(i4_cur_enc_buf < 0)
+ i4_cur_enc_buf = 0;
+ }
+
+ /* The RC lib models the encoder buffer, but the VBV buffer characterises the decoder buffer */
+ if(e_buf_status == VBV_OVERFLOW)
+ {
+ e_buf_status = VBV_UNDERFLOW;
+ }
+ else if(e_buf_status == VBV_UNDERFLOW)
+ {
+ e_buf_status = VBV_OVERFLOW;
+ }
+
+ pi4_num_bits_to_prevent_overflow[0] = (ps_cbr_buffer->i4_buffer_size - i4_cur_enc_buf);
+
+ return e_buf_status;
+}
+
+/* ******************************************************************************/
+/**
+ * @brief Based on the bits consumed the buffer model is updated
+ *
+ * @param ps_cbr_buffer
+ * @param i4_tot_consumed_bits
+ * @param e_pic_type
+ */
+/* ******************************************************************************/
+void update_cbr_buffer(
+ cbr_buffer_t *ps_cbr_buffer, WORD32 i4_tot_consumed_bits, picture_type_e e_pic_type)
+{
+ WORD32 i;
+ WORD32 i4_error_bits = (e_pic_type == I_PIC)
+ ? get_error_bits(ps_cbr_buffer->aps_bpf_error_bits[0])
+ : get_error_bits(ps_cbr_buffer->aps_bpf_error_bits[1]);
+ WORD32 i4_drain_bits_per_frame = (e_pic_type == I_PIC)
+ ? ps_cbr_buffer->i4_drain_bits_per_frame[0]
+ : ps_cbr_buffer->i4_drain_bits_per_frame[1];
+
+ ps_cbr_buffer->i8_num_frames_encoded++;
+ if(ps_cbr_buffer->e_rc_type == VBR_STREAMING && ps_cbr_buffer->i8_tot_frm_to_be_encoded < 0)
+ {
+ LWORD64 i8_max_bit_dev_allowed = ps_cbr_buffer->ai4_bit_rate[0];
+ LWORD64 approx_file_size = ps_cbr_buffer->i8_num_frames_encoded *
+ ps_cbr_buffer->ai4_bit_rate[0] * 1000 /
+ ps_cbr_buffer->i4_tgt_frm_rate;
+ if(i8_max_bit_dev_allowed < (approx_file_size >> 4))
+ i8_max_bit_dev_allowed = (approx_file_size >> 4);
+
+ /*have a max limit so that bit dev does not grow for very long sequence like 24 hours of encoding (max can be 20 second)*/
+ if(i8_max_bit_dev_allowed > (VBR_MAX_BIT_DEV_SEC * ps_cbr_buffer->ai4_bit_rate[0]))
+ i8_max_bit_dev_allowed = (VBR_MAX_BIT_DEV_SEC * ps_cbr_buffer->ai4_bit_rate[0]);
+
+ ps_cbr_buffer->u4_max_vbv_buf_size = (UWORD32)i8_max_bit_dev_allowed;
+ }
+ /* Update the Encoder buffer with the total consumed bits*/
+ if(ps_cbr_buffer->i4_is_cbr_mode != 0)
+ {
+ ps_cbr_buffer->i4_ebf += i4_tot_consumed_bits;
+ ps_cbr_buffer->i8_ebf_bit_alloc += i4_tot_consumed_bits;
+
+ /* Subtract the drain bits and error bits due to fixed point implementation*/
+ ps_cbr_buffer->i4_ebf -= (i4_drain_bits_per_frame + i4_error_bits);
+ ps_cbr_buffer->i8_ebf_bit_alloc -= (i4_drain_bits_per_frame + i4_error_bits);
+ }
+ else
+ {
+ ps_cbr_buffer->i4_ebf += i4_tot_consumed_bits;
+ ps_cbr_buffer->i4_ebf -=
+ ((MIN(ps_cbr_buffer->i4_peak_drain_rate_frame, ps_cbr_buffer->i4_ebf)) + i4_error_bits);
+
+ ps_cbr_buffer->i8_ebf_bit_alloc += i4_tot_consumed_bits;
+ ps_cbr_buffer->i8_ebf_bit_alloc -=
+ (ps_cbr_buffer->i4_drain_bits_per_frame[0] + i4_error_bits);
+
+ ps_cbr_buffer->i8_credit_level += i4_tot_consumed_bits;
+ ps_cbr_buffer->i8_credit_level -=
+ (ps_cbr_buffer->i4_drain_bits_per_frame[0] + i4_error_bits);
+ /*To keep limit on duration for which peak rate can be sustained limit the accumulation of bits from simpler regions*/
+ if(!ps_cbr_buffer->i4_vbr_no_peak_rate_duration_limit)
+ {
+ if(ps_cbr_buffer->i8_ebf_bit_alloc < 0)
+ ps_cbr_buffer->i8_ebf_bit_alloc =
+ 0; /*This will make VBR buffer believe that the bits are lost*/
+ }
+ }
+
+ /*SS - Fix for lack of stuffing*/
+ if(ps_cbr_buffer->i4_ebf < 0)
+ {
+ //trace_printf("Error: Should not be coming here with bit stuffing \n");
+ ps_cbr_buffer->i4_ebf = 0;
+ }
+
+ if(ps_cbr_buffer->i4_ebf > ps_cbr_buffer->i4_buffer_size)
+ {
+ //trace_printf("Error: Frame should be skipped\n");
+ ps_cbr_buffer->i4_ebf = ps_cbr_buffer->i4_buffer_size;
+ }
+
+ ps_cbr_buffer->i4_ebf_estimate = ps_cbr_buffer->i4_ebf;
+
+ trace_printf(
+ "VBR ebf = %d bebf = %d ", ps_cbr_buffer->i4_ebf, ps_cbr_buffer->i8_ebf_bit_alloc);
+ /* Update the error bits */
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ update_error_bits(ps_cbr_buffer->aps_bpf_error_bits[i]);
+}
+
+/* ******************************************************************************/
+/**
+ * @brief If the buffer underflows then return the number of bits to prevent underflow
+ *
+ * @param ps_cbr_buffer
+ * @param i4_tot_consumed_bits
+ * @param e_pic_type
+ *
+ * @return
+ */
+/* ******************************************************************************/
+WORD32 get_cbr_bits_to_stuff(
+ cbr_buffer_t *ps_cbr_buffer, WORD32 i4_tot_consumed_bits, picture_type_e e_pic_type)
+{
+ WORD32 i4_bits_to_stuff;
+ WORD32 i4_error_bits = (e_pic_type == I_PIC)
+ ? get_error_bits(ps_cbr_buffer->aps_bpf_error_bits[0])
+ : get_error_bits(ps_cbr_buffer->aps_bpf_error_bits[1]);
+ WORD32 i4_drain_bits_per_frame = (e_pic_type == I_PIC)
+ ? ps_cbr_buffer->i4_drain_bits_per_frame[0]
+ : ps_cbr_buffer->i4_drain_bits_per_frame[1];
+
+ /* Stuffing bits got from the following equation
+ Stuffing_threshold = ebf + tcb - drain bits - error bits + stuff_bits*/
+ i4_bits_to_stuff =
+ i4_drain_bits_per_frame + i4_error_bits - (ps_cbr_buffer->i4_ebf + i4_tot_consumed_bits);
+
+ return i4_bits_to_stuff;
+}
+
+/* ******************************************************************************/
+/**
+ * @brief Change the state for change in bit rate
+ *
+ * @param ps_cbr_buffer
+ * @param i4_bit_rate
+ */
+/* ******************************************************************************/
+void change_cbr_vbv_bit_rate(
+ cbr_buffer_t *ps_cbr_buffer, WORD32 *i4_bit_rate, WORD32 i4_peak_bitrate)
+{
+ WORD32 i4_bits_per_frm[MAX_NUM_DRAIN_RATES];
+ int i;
+
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ X_PROD_Y_DIV_Z(i4_bit_rate[i], 1000, ps_cbr_buffer->i4_tgt_frm_rate, i4_bits_per_frm[i]);
+ /* Drain rate = bitrate/(framerate/1000) */
+ ps_cbr_buffer->i4_drain_bits_per_frame[i] = i4_bits_per_frm[i];
+
+ /* initialise the bits per frame error bits calculation */
+ change_bitrate_in_error_bits(ps_cbr_buffer->aps_bpf_error_bits[i], i4_bit_rate[i]);
+ }
+ X_PROD_Y_DIV_Z(
+ i4_peak_bitrate,
+ 1000,
+ ps_cbr_buffer->i4_tgt_frm_rate,
+ ps_cbr_buffer->i4_peak_drain_rate_frame);
+ /* Bitrate * delay = buffer size, divide by 1000 as delay is in ms*/
+ //if(i4_bit_rate[0] == i4_bit_rate[1]) /* This would mean CBR mode */
+ {
+ X_PROD_Y_DIV_Z(
+ i4_bit_rate[0],
+ ps_cbr_buffer->i4_max_delay,
+ 1000,
+ ps_cbr_buffer->i4_buffer_size); //the delay term is supposed to remain constant
+ //ps_cbr_buffer->i4_is_cbr_mode = 1;
+ ps_cbr_buffer->u4_max_vbv_buf_size = ps_cbr_buffer->i4_buffer_size;
+ }
+ if(ps_cbr_buffer->i4_buffer_size > (WORD32)ps_cbr_buffer->u4_max_vbv_buf_size)
+ {
+ ps_cbr_buffer->i4_buffer_size = ps_cbr_buffer->u4_max_vbv_buf_size;
+ }
+ set_upper_lower_vbv_threshold(ps_cbr_buffer, i4_bits_per_frm[0]);
+ if(ps_cbr_buffer->e_rc_type == CBR_NLDRC)
+ {
+ ps_cbr_buffer->u4_vbr_max_bit_deviation = ps_cbr_buffer->i4_buffer_size;
+ }
+ else
+ {
+ /*DCB: the deviaiton must be altered for VBR case, when bitrate is lowered quality might be bad because of this*/
+ {
+ ULWORD64 u8_vbr_max_bit_deviation =
+ (ULWORD64)(ps_cbr_buffer->f_max_dur_peak_rate * i4_bit_rate[0]);
+ ULWORD64 file_size = (ULWORD64)(
+ (((LWORD64)i4_bit_rate[0] * 1000) / ps_cbr_buffer->i4_tgt_frm_rate) *
+ (ps_cbr_buffer->i8_tot_frm_to_be_encoded - ps_cbr_buffer->i8_num_frames_encoded));
+ /*When f_max_dur_peak_rate is -ve, it implies user is not worried about duration for which peak is sustained, hence go with max possible value*/
+ if(ps_cbr_buffer->f_max_dur_peak_rate > 0)
+ u8_vbr_max_bit_deviation =
+ (ULWORD64)(ps_cbr_buffer->f_max_dur_peak_rate * i4_bit_rate[0]);
+ else
+ u8_vbr_max_bit_deviation = VBR_MAX_BIT_DEV_SEC * i4_bit_rate[0];
+
+ /*when num frames to encode is negative is -ve it implies total frames data is not available (as in case of live encoding)*/
+ if(ps_cbr_buffer->i8_tot_frm_to_be_encoded > 0)
+ {
+ /*allow atleast one second deviation or 12% of total file size whichever is higher*/
+ if(u8_vbr_max_bit_deviation > (file_size >> 3))
+ u8_vbr_max_bit_deviation = (UWORD32)(file_size >> 3);
+ }
+ else
+ {
+ u8_vbr_max_bit_deviation = (UWORD32)(file_size >> 3);
+ }
+ /*allow atleast one second for shorter sequence*/
+ if(u8_vbr_max_bit_deviation < (ULWORD64)i4_bit_rate[0])
+ u8_vbr_max_bit_deviation = (ULWORD64)i4_bit_rate[0];
+ ps_cbr_buffer->u4_vbr_max_bit_deviation = u8_vbr_max_bit_deviation;
+ }
+ }
+
+ /* Storing the input parameters for using it for change functions */
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ ps_cbr_buffer->ai4_bit_rate[i] = i4_bit_rate[i];
+}
+/* ******************************************************************************/
+/**
+ * @brief Update the state for change in number of pics in the delay period
+ *
+ * @param ps_cbr_buffer
+ * @param u4_num_pics_in_delay_prd
+ */
+/* ******************************************************************************/
+void change_cbr_vbv_num_pics_in_delay_period(
+ cbr_buffer_t *ps_cbr_buffer, UWORD32 *u4_num_pics_in_delay_prd)
+{
+ WORD32 i;
+
+ if(!ps_cbr_buffer->i4_is_cbr_mode)
+ {
+ ps_cbr_buffer->i4_buffer_size =
+ u4_num_pics_in_delay_prd[0] * ps_cbr_buffer->i4_drain_bits_per_frame[0] +
+ u4_num_pics_in_delay_prd[1] * ps_cbr_buffer->i4_drain_bits_per_frame[1];
+
+ if(ps_cbr_buffer->i4_buffer_size > (WORD32)ps_cbr_buffer->u4_max_vbv_buf_size)
+ {
+ ps_cbr_buffer->i4_buffer_size = ps_cbr_buffer->u4_max_vbv_buf_size;
+ }
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_cbr_buffer->i4_upr_thr[i] =
+ ps_cbr_buffer->i4_buffer_size - (ps_cbr_buffer->i4_buffer_size >> 3);
+ }
+
+ /* Re-initilise the number of pics in delay period */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_cbr_buffer->ai4_num_pics_in_delay_period[i] = u4_num_pics_in_delay_prd[i];
+ }
+ }
+}
+/* ******************************************************************************/
+/**
+ * @ modifies the ebf estimated parameter based on error
+ *
+ * @param ps_cbr_buffer
+ * @param i4_bit_error
+ */
+/* ******************************************************************************/
+void cbr_modify_ebf_estimate(cbr_buffer_t *ps_cbr_buffer, WORD32 i4_bit_error)
+{
+ ps_cbr_buffer->i4_ebf_estimate = ps_cbr_buffer->i4_ebf + i4_bit_error;
+ if(ps_cbr_buffer->i4_ebf_estimate < 0)
+ {
+ ps_cbr_buffer->i4_ebf_estimate = 0;
+ }
+ else if(ps_cbr_buffer->i4_ebf_estimate > ps_cbr_buffer->i4_buffer_size)
+ {
+ ps_cbr_buffer->i4_ebf_estimate = ps_cbr_buffer->i4_buffer_size;
+ }
+}
+
+/* ******************************************************************************/
+/**
+ * @ get the buffer size
+ *
+ * @param ps_cbr_buffer
+ */
+/* ******************************************************************************/
+
+WORD32 get_cbr_buffer_size(cbr_buffer_t *ps_cbr_buffer)
+{
+ return (ps_cbr_buffer->i4_buffer_size);
+}
+
+#if NON_STEADSTATE_CODE
+/* ******************************************************************************/
+/**
+ * @brief update the state for change in target frame rate
+ *
+ * @param ps_cbr_buffer
+ * @param i4_tgt_frm_rate
+ */
+/* ******************************************************************************/
+void change_cbr_vbv_tgt_frame_rate(cbr_buffer_t *ps_cbr_buffer, WORD32 i4_tgt_frm_rate)
+{
+ WORD32 i4_i, i4_bits_per_frm[MAX_NUM_DRAIN_RATES];
+ int i;
+
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ X_PROD_Y_DIV_Z(ps_cbr_buffer->ai4_bit_rate[i], 1000, i4_tgt_frm_rate, i4_bits_per_frm[i]);
+ /* Drain rate = bitrate/(framerate/1000) */
+ ps_cbr_buffer->i4_drain_bits_per_frame[i] = i4_bits_per_frm[i];
+ /* initialise the bits per frame error bits calculation */
+ change_frm_rate_in_error_bits(ps_cbr_buffer->aps_bpf_error_bits[i], i4_tgt_frm_rate);
+ }
+
+ /* Bitrate * delay = buffer size, divide by 1000 as delay is in ms*/
+ if(!ps_cbr_buffer->i4_is_cbr_mode)
+ {
+ /* VBR streaming case which has different drain rates for I and P */
+ ps_cbr_buffer->i4_buffer_size = ps_cbr_buffer->ai4_num_pics_in_delay_period[0] *
+ ps_cbr_buffer->i4_drain_bits_per_frame[0] +
+ ps_cbr_buffer->ai4_num_pics_in_delay_period[1] *
+ ps_cbr_buffer->i4_drain_bits_per_frame[1];
+ }
+
+ if(ps_cbr_buffer->i4_buffer_size > (WORD32)ps_cbr_buffer->u4_max_vbv_buf_size)
+ {
+ ps_cbr_buffer->i4_buffer_size = ps_cbr_buffer->u4_max_vbv_buf_size;
+ }
+
+ for(i4_i = 0; i4_i < MAX_PIC_TYPE; i4_i++)
+ {
+ /* Uppr threshold for
+ I frame = 1 * bits per frame
+ P Frame = 4 * bits per frame.
+ The threshold for I frame is only 1 * bits per frame as the threshold should
+ only account for error in estimated bits.
+ In P frame it should account for difference bets bits consumed by I(Scene change)
+ and P frame I to P complexity is assumed to be 5. */
+ WORD32 i4_index;
+ i4_index = i4_i > 0 ? 1 : 0;
+ ps_cbr_buffer->i4_upr_thr[i4_i] =
+ ps_cbr_buffer->i4_buffer_size - (ps_cbr_buffer->i4_buffer_size >> 3);
+
+ /* For both I and P frame Lower threshold is equal to drain rate.
+ Even if the encoder consumes zero bits it should have enough bits to drain*/
+ ps_cbr_buffer->i4_low_thr[i4_i] = i4_bits_per_frm[i4_index];
+ }
+
+ /* Storing the input parameters for using it for change functions */
+ ps_cbr_buffer->i4_tgt_frm_rate = i4_tgt_frm_rate;
+}
+/* ******************************************************************************/
+/**
+ * @brief update the state for change in buffer delay
+ *
+ * @param ps_cbr_buffer
+ * @param i4_buffer_delay
+ */
+/* ******************************************************************************/
+void change_cbr_buffer_delay(cbr_buffer_t *ps_cbr_buffer, WORD32 i4_buffer_delay)
+{
+ WORD32 i4_i;
+
+ /* Bitrate * delay = buffer size, divide by 1000 as delay is in ms*/
+ if(ps_cbr_buffer->i4_is_cbr_mode)
+ {
+ X_PROD_Y_DIV_Z(
+ ps_cbr_buffer->ai4_bit_rate[0], i4_buffer_delay, 1000, ps_cbr_buffer->i4_buffer_size);
+ }
+
+ if(ps_cbr_buffer->i4_buffer_size > (WORD32)ps_cbr_buffer->u4_max_vbv_buf_size)
+ {
+ ps_cbr_buffer->i4_buffer_size = ps_cbr_buffer->u4_max_vbv_buf_size;
+ }
+
+ for(i4_i = 0; i4_i < MAX_PIC_TYPE; i4_i++)
+ {
+ /* Uppr threshold for
+ I frame = 1 * bits per frame
+ P Frame = 4 * bits per frame.
+ The threshold for I frame is only 1 * bits per frame as the threshold should
+ only account for error in estimated bits.
+ In P frame it should account for difference bets bits consumed by I(Scene change)
+ and P frame I to P complexity is assumed to be 5. */
+ ps_cbr_buffer->i4_upr_thr[i4_i] =
+ ps_cbr_buffer->i4_buffer_size - (ps_cbr_buffer->i4_buffer_size >> 3);
+ }
+
+ /* Storing the input parameters for using it for change functions */
+ ps_cbr_buffer->i4_max_delay = i4_buffer_delay;
+}
+/* ******************************************************************************/
+/**
+ * @brief update the state for change in buffer delay
+ *
+ * @param ps_cbr_buffer
+ * @param i4_buffer_delay
+ */
+/* ******************************************************************************/
+WORD32 get_cbr_buffer_delay(cbr_buffer_t *ps_cbr_buffer)
+{
+ return (ps_cbr_buffer->i4_max_delay);
+}
+/* ******************************************************************************/
+/**
+ * @brief get_cbr_ebf
+ *
+ * @param ps_cbr_buffer
+ */
+/* ******************************************************************************/
+WORD32 get_cbr_ebf(cbr_buffer_t *ps_cbr_buffer)
+{
+ return (ps_cbr_buffer->i4_ebf);
+}
+/* ******************************************************************************/
+/**
+ * @brief get_cbr_max_ebf
+ *
+ * @param ps_cbr_buffer
+ */
+/* ******************************************************************************/
+WORD32 get_cbr_max_ebf(cbr_buffer_t *ps_cbr_buffer)
+{
+ return (ps_cbr_buffer->i4_upr_thr[0]);
+}
+/* ******************************************************************************/
+/**
+ * @brief set_cbr_ebf
+ *
+ * @param ps_cbr_buffer
+ * @param i32_init_ebf
+ */
+/* ******************************************************************************/
+void set_cbr_ebf(cbr_buffer_t *ps_cbr_buffer, WORD32 i32_init_ebf)
+{
+ ps_cbr_buffer->i4_ebf = i32_init_ebf;
+}
+/* ******************************************************************************/
+/**
+ * @brief update_cbr_buf_mismatch_bit
+ *
+ * @param ps_cbr_buffer
+ * @param i4_error_bits
+ */
+/* ******************************************************************************/
+void update_cbr_buf_mismatch_bit(cbr_buffer_t *ps_cbr_buffer, WORD32 i4_error_bits)
+{
+ ps_cbr_buffer->i4_ebf -= i4_error_bits;
+ ps_cbr_buffer->i8_ebf_bit_alloc -= i4_error_bits;
+ ps_cbr_buffer->i8_credit_level -= i4_error_bits;
+}
+/* ******************************************************************************/
+/**
+ * @brief get encoded number of frames
+ *
+ * @param ps_cbr_buffer
+ */
+/* ******************************************************************************/
+LWORD64 get_num_frms_encoded(cbr_buffer_t *ps_cbr_buffer)
+{
+ return ps_cbr_buffer->i8_num_frames_encoded;
+}
+/* ******************************************************************************/
+/**
+ * @brief get num frames to encode
+ *
+ * @param ps_cbr_buffer
+ */
+/* ******************************************************************************/
+LWORD64 get_num_frms_to_encode(cbr_buffer_t *ps_cbr_buffer)
+{
+ return ps_cbr_buffer->i8_tot_frm_to_be_encoded;
+}
+/* ******************************************************************************/
+/**
+ * @brief get peak drain rate
+ *
+ * @param ps_cbr_buffer
+ */
+/* ******************************************************************************/
+/* The buffer limit in bit allocation should be according to peak bitrate */
+WORD32 get_buf_max_drain_rate(cbr_buffer_t *ps_cbr_buffer)
+{
+ if(ps_cbr_buffer->e_rc_type == VBR_STREAMING)
+ return ps_cbr_buffer->i4_peak_drain_rate_frame;
+ else if(ps_cbr_buffer->e_rc_type != CONST_QP)
+ {
+ ASSERT(
+ ps_cbr_buffer->i4_peak_drain_rate_frame == ps_cbr_buffer->i4_drain_bits_per_frame[0]);
+ return ps_cbr_buffer->i4_drain_bits_per_frame[0];
+ }
+ return ps_cbr_buffer->i4_drain_bits_per_frame[0];
+}
+/* ******************************************************************************/
+/**
+ * @brief get excess bits by moving in VBV buffer to enable bitrate greater than peak rate for shorter duration in very
+ * complex contents
+ *
+ * @param ps_cbr_buffer
+ * @param i4_tgt_frm_rate
+ */
+/* ******************************************************************************/
+WORD32 get_vbv_buffer_based_excess(
+ cbr_buffer_t *ps_cbr_buffer,
+ float f_complexity_peak_rate,
+ float f_cur_bits_complexity,
+ WORD32 bit_alloc_period,
+ WORD32 i4_num_gops_for_excess)
+{
+ LWORD64 max_buffer_level = (LWORD64)((float)ps_cbr_buffer->i4_buffer_size * 0.8f);
+ LWORD64 i8_excess_bits;
+ /*LWORD64target_buf_level;*/
+ WORD32
+ num_frm_to_be_distributed; //Number of frames for which excess bits should be distributed, using number of frames corresponding to buffer size for now
+
+ if(ps_cbr_buffer->i4_upr_thr[0] <
+ max_buffer_level) /*choose max allowed level to min(upper_threshold,80% of buffer*/
+ max_buffer_level = ps_cbr_buffer->i4_upr_thr[0];
+
+ if(ps_cbr_buffer->e_rc_type == VBR_STREAMING)
+ max_buffer_level = (LWORD64)(
+ ps_cbr_buffer->i4_peak_drain_rate_frame * ps_cbr_buffer->u4_num_frms_in_delay * 0.8f);
+
+ if(f_cur_bits_complexity >
+ 0.9f) /*clip current to max of 80% of buffer size to avoid dangerous buffer level by end of GOP*/
+ f_cur_bits_complexity = 0.9f;
+
+ if(f_cur_bits_complexity < f_complexity_peak_rate || f_cur_bits_complexity < 0.1f ||
+ ps_cbr_buffer->i4_buffer_size <
+ ps_cbr_buffer->ai4_bit_rate
+ [0]) //For buffer size less than 1 sec disable any contribution from buffer based for extra complex contents
+ {
+ /*For very low compleity content or Cavg do not allow buffer movement*/
+ return 0;
+ }
+
+ i8_excess_bits = (LWORD64)(
+ ((f_cur_bits_complexity - f_complexity_peak_rate) / (0.9f - f_complexity_peak_rate)) *
+ (max_buffer_level - ps_cbr_buffer->i4_ebf));
+
+ if(i8_excess_bits < 0)
+ i8_excess_bits = 0;
+
+ num_frm_to_be_distributed = (WORD32)(
+ ((float)ps_cbr_buffer->i4_buffer_size / ps_cbr_buffer->ai4_bit_rate[0] *
+ ps_cbr_buffer->i4_tgt_frm_rate / 1000) +
+ 0.5);
+ /*Excess bits should be proportional to bit alloc period, shorter intra period should get in small incentives*/
+ if(bit_alloc_period < num_frm_to_be_distributed)
+ i8_excess_bits =
+ (LWORD64)((float)i8_excess_bits * bit_alloc_period / num_frm_to_be_distributed);
+
+ if(ps_cbr_buffer->e_rc_type == VBR_STREAMING)
+ {
+ if(i4_num_gops_for_excess > 1)
+ i8_excess_bits = i8_excess_bits * i4_num_gops_for_excess;
+
+ if(i8_excess_bits > (LWORD64)(
+ (float)ps_cbr_buffer->i4_peak_drain_rate_frame *
+ ps_cbr_buffer->u4_num_frms_in_delay * 0.8f))
+ i8_excess_bits = (LWORD64)(
+ (float)ps_cbr_buffer->i4_peak_drain_rate_frame *
+ ps_cbr_buffer->u4_num_frms_in_delay * 0.8f);
+ }
+ trace_printf(
+ "Excess bits %d %f %f num gops %d",
+ i8_excess_bits,
+ f_cur_bits_complexity,
+ f_complexity_peak_rate,
+ i4_num_gops_for_excess);
+
+ return ((WORD32)i8_excess_bits);
+}
+/* ******************************************************************************/
+/**
+ * @brief get gop correction error bits for the current gop. This will be added to rbip.
+ *
+ * @param ps_cbr_buffer
+ * @param i4_lap_complexity_q7
+ * @param i4_bit_alloc_period
+ */
+/* ******************************************************************************/
+WORD32 get_error_bits_for_desired_buf(
+ cbr_buffer_t *ps_cbr_buffer, WORD32 i4_lap_complexity_q7, WORD32 i4_bit_alloc_period)
+{
+ if(ps_cbr_buffer->e_rc_type == CBR_NLDRC)
+ {
+ LWORD64 error_bits = 0, complexity_mov_buf_size = 0;
+ LWORD64 i8_default_bits_in_period, i8_max_additional_bits_in_period;
+ LWORD64 i8_buf_based_limit_red, i8_buf_based_limit_inc, i8_buf_diff_bits;
+ float buf_diff, abs_lap_complexity;
+
+ /*calculate default allocation*/
+ i8_default_bits_in_period = (LWORD64)ps_cbr_buffer->ai4_bit_rate[0] * 1000 *
+ i4_bit_alloc_period / ps_cbr_buffer->i4_tgt_frm_rate;
+
+ /*In case of VBR give additional bits according to peak bitrate*/
+ if(ps_cbr_buffer->e_rc_type == VBR_STREAMING)
+ {
+ i8_max_additional_bits_in_period =
+ ((LWORD64)ps_cbr_buffer->i4_peak_drain_rate_frame * i4_bit_alloc_period) -
+ i8_default_bits_in_period;
+ ASSERT(i8_max_additional_bits_in_period >= 0);
+ if(i8_max_additional_bits_in_period > (i8_default_bits_in_period))
+ {
+ /*clip max bits that can be given to 2x bitrate since its too riskly to give more than that in single pass encoding
+ where long future is not known*/
+ i8_max_additional_bits_in_period = (i8_default_bits_in_period);
+ }
+ }
+ else
+ {
+ i8_max_additional_bits_in_period = i8_default_bits_in_period;
+ }
+ {
+ float X = ((float)i4_lap_complexity_q7 / 128);
+ float desired_buf_level;
+ /*For CBR VBV buffer size is "complexity_mov_buf_size" and In case of VBR it is determined by bit deviaiton*/
+ if(ps_cbr_buffer->e_rc_type == CBR_NLDRC)
+ {
+ complexity_mov_buf_size = (LWORD64)ps_cbr_buffer->i4_upr_thr[0];
+ }
+ else if(ps_cbr_buffer->e_rc_type == VBR_STREAMING)
+ {
+ complexity_mov_buf_size = ps_cbr_buffer->u4_vbr_max_bit_deviation;
+ }
+ abs_lap_complexity = X;
+
+ if(ps_cbr_buffer->i4_cbr_rc_pass == 2)
+ desired_buf_level = COMP_TO_BITS_MAP_2_PASS(X, complexity_mov_buf_size);
+ else
+ desired_buf_level = COMP_TO_BITS_MAP(X, complexity_mov_buf_size);
+
+ if(desired_buf_level < 0)
+ desired_buf_level = 0;
+ /*map complexity to buffer level*/
+
+ error_bits = (LWORD64)(desired_buf_level - ps_cbr_buffer->i8_ebf_bit_alloc);
+ i8_buf_diff_bits = error_bits;
+ /*For VBR its possible that i8_ebf_bit_alloc can go below 0, that the extent of giving should only be desired - cur( = 0 for cur < 0)*/
+ buf_diff = (float)error_bits / complexity_mov_buf_size;
+
+ /*clipping based on buffer size should depend on gop size. Assuming 7% of gop of gop = 32, calculate for other GOP intervals max 7% while giving from buffer and 10%
+ while stealing from buffer(for GOP of 32)*/
+ /*try to be conservative when giving extra bits to gop and limit while reducing bits to GOP needs to be higher inorder to be buffer compliant if necessary*/
+ i8_buf_based_limit_red =
+ ((LWORD64)complexity_mov_buf_size * i4_bit_alloc_period * 12) >> 12;
+ i8_buf_based_limit_inc = ((LWORD64)complexity_mov_buf_size * i4_bit_alloc_period * 8) >>
+ 12;
+
+ /*(shd be 7 even if GOP size goes lesser)*/
+ if(i8_buf_based_limit_red < (((LWORD64)complexity_mov_buf_size * 10) >> 7))
+ i8_buf_based_limit_red = (((LWORD64)complexity_mov_buf_size * 10) >> 7);
+ if(i8_buf_based_limit_inc < (((LWORD64)complexity_mov_buf_size * 10) >> 7))
+ i8_buf_based_limit_inc = (((LWORD64)complexity_mov_buf_size * 10) >> 7);
+
+ /*if error bits is too high it is given in stages so that buffer is utilized for entire complex content*/
+ /*error bits should not exceed ten 7% of buffer*/
+ /*error bits can be max equal to bitrate*/
+ if(error_bits > 0)
+ {
+ /*if lap compleixty is higher and buffer allows give the bits*/
+ error_bits = (WORD32)(abs_lap_complexity * i8_max_additional_bits_in_period);
+ /*if lap complexity is too simple do not give additional bits to make sure that simple scenes never get additional bits whatsoever*/
+ if(abs_lap_complexity < 0.2f && ps_cbr_buffer->i8_ebf_bit_alloc >= 0)
+ {
+ error_bits = 0;
+ }
+ if(error_bits > i8_buf_diff_bits)
+ error_bits = i8_buf_diff_bits;
+
+ if(error_bits > i8_buf_based_limit_inc)
+ {
+ error_bits = i8_buf_based_limit_inc;
+ }
+ /*If buffer is already half filled be conservative. Allocate 1.5 times bits
+ else allocate twice the bits*/
+ if(ps_cbr_buffer->i8_ebf_bit_alloc >
+ (LWORD64)(ps_cbr_buffer->i4_buffer_size * 0.75))
+ {
+ if(error_bits > (i8_max_additional_bits_in_period >> 1))
+ {
+ error_bits = (i8_max_additional_bits_in_period >> 1);
+ }
+ }
+ else
+ {
+ if(error_bits > i8_max_additional_bits_in_period)
+ {
+ error_bits = i8_max_additional_bits_in_period;
+ }
+ }
+ }
+ else
+ {
+ error_bits = (WORD32)(buf_diff * (i8_default_bits_in_period >> 1));
+ if(error_bits < -i8_buf_based_limit_red)
+ {
+ error_bits = -i8_buf_based_limit_red;
+ }
+ /*when buffer level needs to reduce bits in period*/
+ /*If current level is less than half min bits in period = 70% of constant bit in period else 50%*/
+ if(ps_cbr_buffer->i8_ebf_bit_alloc > (ps_cbr_buffer->i4_buffer_size >> 1))
+ {
+ if(error_bits < -(i8_default_bits_in_period >> 1))
+ {
+ error_bits = -(i8_default_bits_in_period >> 1);
+ }
+ }
+ else
+ {
+ if(error_bits < -((i8_default_bits_in_period * 5) >> 4))
+ {
+ error_bits = -((i8_default_bits_in_period * 5) >> 4);
+ }
+ }
+ }
+ }
+ return (WORD32)error_bits;
+ }
+ else
+ {
+ LWORD64 max_excess_bits, default_allocation_for_period, comp_based_excess = 0;
+ LWORD64 i8_excess_bits = 0, bit_dev_so_far, credit_limit_level;
+ LWORD64 Ravg_dur, num_intra_period_in_Ravg_dur,
+ num_intra_in_clip; //duration for which Ravg has to be met, for shorter slips this can be equal to clip duration
+ LWORD64 i8_buf_based_limit_red, i8_buf_based_limit_inc;
+ float comp_to_bit_mapped, X;
+
+ /*default allocation for period in absence of complexity based bit allocation*/
+ default_allocation_for_period =
+ ps_cbr_buffer->i4_drain_bits_per_frame[0] * i4_bit_alloc_period;
+
+ bit_dev_so_far = ps_cbr_buffer->i8_ebf_bit_alloc;
+ credit_limit_level = ps_cbr_buffer->i8_credit_level;
+ Ravg_dur =
+ ps_cbr_buffer->u4_vbr_max_bit_deviation * 5 / ps_cbr_buffer->i4_drain_bits_per_frame[0];
+ if(Ravg_dur > 20 * ps_cbr_buffer->i8_tot_frm_to_be_encoded / 100)
+ Ravg_dur = 20 * ps_cbr_buffer->i8_tot_frm_to_be_encoded / 100;
+ if(Ravg_dur <= 0)
+ Ravg_dur = 1;
+ /*map the complexity to bits ratio*/
+ X = (float)i4_lap_complexity_q7 / 128;
+ if(ps_cbr_buffer->i4_cbr_rc_pass == 2)
+ comp_to_bit_mapped = COMP_TO_BITS_MAP_2_PASS(X, 1.0f);
+ else
+ comp_to_bit_mapped = COMP_TO_BITS_MAP(X, 1.0f);
+
+ comp_to_bit_mapped *= 10; //mapping it to absolute peak bitrate
+
+ /*calculate the number of bit alloc periods over which the credit limit needs to build up*/
+ num_intra_in_clip = ps_cbr_buffer->i8_tot_frm_to_be_encoded / i4_bit_alloc_period;
+ num_intra_period_in_Ravg_dur = Ravg_dur / i4_bit_alloc_period;
+ //ASSERT(ps_cbr_buffer->i8_tot_frm_to_be_encoded > i4_bit_alloc_period);
+ if(ps_cbr_buffer->i8_tot_frm_to_be_encoded < i4_bit_alloc_period)
+ {
+ num_intra_period_in_Ravg_dur = 1;
+ num_intra_in_clip = 1;
+ }
+ if(num_intra_period_in_Ravg_dur <= 0)
+ {
+ num_intra_period_in_Ravg_dur = 1;
+ }
+ /*max excess bits possible according to given peak bitrate*/
+ {
+ max_excess_bits = (ps_cbr_buffer->i4_peak_drain_rate_frame -
+ ps_cbr_buffer->i4_drain_bits_per_frame[0]) *
+ i4_bit_alloc_period;
+ /*constrain max excess bits allocated to a region if buffer is already at critical level*/
+ /*assume room for 20% over-consumption due to mismatch between allocation and consumption*/
+ if(ps_cbr_buffer->i4_ebf >
+ (ps_cbr_buffer->i4_upr_thr[0] - (WORD32)(max_excess_bits * 0.2)))
+ {
+ max_excess_bits = (LWORD64)(max_excess_bits * 0.8);
+ }
+ }
+ /*clipping based on buffer size should depend on gop size. Assuming 7% of gop of gop = 32, calculate for other GOP intervals max 7% while giving from buffer and 10%
+ while stealing from buffer(for GOP of 32)*/
+ /*try to be conservative when giving extra bits to gop and limit while reducing bits to GOP needs to be higher inorder to be buffer compliant if necessary*/
+ i8_buf_based_limit_red =
+ ((LWORD64)ps_cbr_buffer->u4_vbr_max_bit_deviation * i4_bit_alloc_period * 12) >> 12;
+ i8_buf_based_limit_inc =
+ ((LWORD64)ps_cbr_buffer->u4_vbr_max_bit_deviation * i4_bit_alloc_period * 8) >> 12;
+
+ /*(shd be 7 even if GOP size goes lesser)*/
+ if(i8_buf_based_limit_red < (((LWORD64)ps_cbr_buffer->u4_vbr_max_bit_deviation * 10) >> 7))
+ i8_buf_based_limit_red = (((LWORD64)ps_cbr_buffer->u4_vbr_max_bit_deviation * 10) >> 7);
+ if(i8_buf_based_limit_inc < (((LWORD64)ps_cbr_buffer->u4_vbr_max_bit_deviation * 10) >> 7))
+ i8_buf_based_limit_inc = (((LWORD64)ps_cbr_buffer->u4_vbr_max_bit_deviation * 10) >> 7);
+
+ /*The credit limit is not completly built, hence the average operating bitrate will be lesser than average*/
+ //if(ps_cbr_buffer->i8_ebf_bit_alloc >= 0)
+ //Disabling this to avoid under-consumption of bits since mostly contents will end with simpler sequence
+ if(1 != ps_cbr_buffer->i4_capped_vbr_on)
+ {
+ /*adjust the excess bits to account for deviation in bitrate
+ If bit deviation is positive then overconsumption, hence resuce the default bit allocation*/
+
+ /* In capped vbr mode this is not calculated as there is no constraint to meet the configured bitrate */
+ i8_excess_bits -= (bit_dev_so_far / num_intra_period_in_Ravg_dur);
+ }
+ /*allocate bits based on complexity*/
+ /*comp_to_bit_mapped less than 1 implies a content that requires less than average bitrate,
+ hence due to sign reversal we tend to steal bits*/
+ comp_based_excess = (LWORD64)((comp_to_bit_mapped - 1) * default_allocation_for_period);
+
+ if(1 != ps_cbr_buffer->i4_capped_vbr_on)
+ {
+ /*clip the complexity based on intra period and credit limit buffer size so that when credit limit is lower not everything is used for first GOP*/
+ if(comp_based_excess > i8_buf_based_limit_inc)
+ {
+ comp_based_excess = i8_buf_based_limit_inc;
+ }
+ else if(comp_based_excess < -i8_buf_based_limit_red)
+ {
+ comp_based_excess = -i8_buf_based_limit_red;
+ }
+
+ /*when the credit limit is fully used, stop giving extra*/
+ if(credit_limit_level > ps_cbr_buffer->u4_vbr_max_bit_deviation)
+ {
+ if(comp_based_excess < 0)
+ i8_excess_bits += comp_based_excess;
+ }
+ /*when credit limit is almost full (80 percent full)*/
+ else if(credit_limit_level > (LWORD64)(ps_cbr_buffer->u4_vbr_max_bit_deviation * 0.8f))
+ {
+ /*follow smooth transition, at 80% utilized the excess should be 100 percent, it should move to zero percent as it approaches 100% utlization*/
+ if(comp_based_excess > 0)
+ i8_excess_bits += (LWORD64)(
+ ((ps_cbr_buffer->u4_vbr_max_bit_deviation - credit_limit_level) /
+ (0.2f * ps_cbr_buffer->u4_vbr_max_bit_deviation)) *
+ comp_based_excess);
+ else
+ i8_excess_bits += comp_based_excess;
+ }
+ else if(credit_limit_level > (LWORD64)(ps_cbr_buffer->u4_vbr_max_bit_deviation * 0.2f))
+ {
+ i8_excess_bits += comp_based_excess;
+ }
+ /*When credit limit is almost unutilized*/
+ else if(
+ credit_limit_level < (WORD32)(ps_cbr_buffer->u4_vbr_max_bit_deviation * 0.2f) &&
+ credit_limit_level > 0)
+ {
+ if(comp_based_excess < 0)
+ i8_excess_bits += (LWORD64)(
+ (credit_limit_level / (0.2f * ps_cbr_buffer->u4_vbr_max_bit_deviation)) *
+ comp_based_excess);
+ else
+ i8_excess_bits += comp_based_excess;
+ }
+ /*If the credit limit still uutilized stop drawing bits from simpler content*/
+ else if(credit_limit_level <= 0)
+ {
+ if(comp_based_excess > 0)
+ i8_excess_bits += comp_based_excess;
+ }
+ else
+ ASSERT(0);
+ }
+ else
+ {
+ /* In capped vbr mode excess bits will be based on complexity of content alone*/
+ i8_excess_bits = comp_based_excess;
+ }
+
+ /*Clip the excess bits such that it will never violate peak bitrate and also Rmin*/
+ if(i8_excess_bits > max_excess_bits)
+ i8_excess_bits = max_excess_bits;
+ /*assuming atleast 0.4 times average bitrate even for the simplest content*/
+ if(i8_excess_bits < -(default_allocation_for_period * 0.6f))
+ i8_excess_bits = (LWORD64)(-(default_allocation_for_period * 0.6f));
+
+ ASSERT(i8_excess_bits <= 0x7FFFFFFF);
+ return (WORD32)i8_excess_bits;
+ }
+}
+/* ******************************************************************************/
+/**
+ * @brief get_rc_type.
+ *
+ * @param ps_cbr_buffer
+ */
+/* ******************************************************************************/
+rc_type_e get_rc_type(cbr_buffer_t *ps_cbr_buffer)
+{
+ return (ps_cbr_buffer->e_rc_type);
+}
+/* ******************************************************************************/
+/**
+ * @brief cbr_get_delay_frames
+ *
+ * @param ps_cbr_buffer
+ */
+/* ******************************************************************************/
+UWORD32 cbr_get_delay_frames(cbr_buffer_t *ps_cbr_buffer)
+{
+ return (ps_cbr_buffer->u4_num_frms_in_delay);
+}
+#endif /* #if NON_STEADSTATE_CODE */
diff --git a/encoder/cbr_buffer_control.h b/encoder/cbr_buffer_control.h
new file mode 100644
index 0000000..3cb3b1e
--- /dev/null
+++ b/encoder/cbr_buffer_control.h
@@ -0,0 +1,140 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file cbr_buffer_control.h
+*
+* \brief
+* This file contains all the necessary declarations for
+* cbr_buffer_control functions
+*
+* \date
+* 06/05/2008
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+#ifndef CBR_BUFFER_CONTROL_H
+#define CBR_BUFFER_CONTROL_H
+
+/*****************************************************************************/
+/* Macros */
+/*****************************************************************************/
+/* Macro for clipping a number between to extremes */
+#define CLIP(Number, Max, Min) \
+ if((Number) > (Max)) \
+ (Number) = (Max); \
+ else if((Number) < (Min)) \
+ (Number) = (Min);
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+typedef struct cbr_buffer_t *cbr_buffer_handle;
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+WORD32 cbr_buffer_num_fill_use_free_memtab(
+ cbr_buffer_handle *pps_cbr_buffer, itt_memtab_t *ps_memtab, ITT_FUNC_TYPE_E e_func_type);
+/* Initialize the cbr Buffer*/
+void init_cbr_buffer(
+ cbr_buffer_handle ps_cbr_buffer,
+ WORD32 i4_buffer_delay,
+ WORD32 i4_tgt_frm_rate,
+ UWORD32 u4_bit_rate,
+ UWORD32 *u4_num_pics_in_delay_prd,
+ UWORD32 u4_vbv_buf_size,
+ UWORD32 u4_intra_frm_int,
+ rc_type_e u4_rc_type,
+ UWORD32 u4_peak_bit_rate,
+ UWORD32 u4_num_frames_in_delay,
+ float f_max_peak_rate_dur,
+ LWORD64 i8_num_frames_to_encode,
+ WORD32 i4_inter_frm_int,
+ WORD32 i4_cbr_rc_pass,
+ WORD32 i4_capped_vbr_flag);
+
+/* Check for tgt bits with in CBR buffer*/
+WORD32 cbr_buffer_constraint_check(
+ cbr_buffer_handle ps_cbr_buffer,
+ WORD32 i4_tgt_bits,
+ picture_type_e e_pic_type,
+ WORD32 *pi4_max_tgt_bits,
+ WORD32 *pi4_min_tgt_bits);
+
+/* Get the buffer status with the current consumed bits*/
+vbv_buf_status_e get_cbr_buffer_status(
+ cbr_buffer_handle ps_cbr_buffer,
+ WORD32 i4_tot_consumed_bits,
+ WORD32 *pi4_num_bits_to_prevent_overflow,
+ picture_type_e e_pic_type,
+ rc_type_e e_rc_type);
+
+/* Update the CBR buffer at the end of the VOP*/
+void update_cbr_buffer(
+ cbr_buffer_handle ps_cbr_buffer, WORD32 i4_tot_consumed_bits, picture_type_e e_pic_type);
+
+/*Get the bits needed to stuff in case of Underflow*/
+WORD32 get_cbr_bits_to_stuff(
+ cbr_buffer_handle ps_cbr_buffer, WORD32 i4_tot_consumed_bits, picture_type_e e_pic_type);
+WORD32 get_cbr_buffer_delay(cbr_buffer_handle ps_cbr_buffer);
+WORD32 get_cbr_buffer_size(cbr_buffer_handle ps_cbr_buffer);
+WORD32 get_cbr_ebf(cbr_buffer_handle ps_cbr_buffer);
+WORD32 get_cbr_max_ebf(cbr_buffer_handle ps_cbr_buffer);
+void update_cbr_buf_mismatch_bit(cbr_buffer_handle ps_cbr_buffer, WORD32 i4_error_bits);
+
+WORD32 get_error_bits_for_desired_buf(
+ cbr_buffer_handle ps_cbr_buffer, WORD32 i4_lap_complexity_q7, WORD32 i4_bit_alloc_period);
+
+WORD32 get_buf_max_drain_rate(cbr_buffer_handle ps_cbr_buffer);
+
+WORD32 vbr_stream_buffer_constraint_check(
+ cbr_buffer_handle ps_cbr_buffer,
+ WORD32 i4_tgt_bits,
+ picture_type_e e_pic_type,
+ WORD32 *pi4_max_tgt_bits,
+ WORD32 *pi4_min_tgt_bits);
+
+void change_cbr_vbv_bit_rate(
+ cbr_buffer_handle ps_cbr_buffer, WORD32 *i4_bit_rate, WORD32 i4_peak_bitrate);
+void change_cbr_vbv_tgt_frame_rate(cbr_buffer_handle ps_cbr_buffer, WORD32 i4_tgt_frm_rate);
+void change_cbr_vbv_num_pics_in_delay_period(
+ cbr_buffer_handle ps_cbr_buffer, UWORD32 *u4_num_pics_in_delay_prd);
+void change_cbr_buffer_delay(cbr_buffer_handle ps_cbr_buffer, WORD32 i4_buffer_delay);
+void set_cbr_ebf(cbr_buffer_handle ps_cbr_buffer, WORD32 i32_init_ebf);
+LWORD64 get_num_frms_encoded(cbr_buffer_handle ps_cbr_buffer);
+
+LWORD64 get_num_frms_to_encode(cbr_buffer_handle ps_cbr_buffer);
+
+WORD32 get_vbv_buffer_based_excess(
+ cbr_buffer_handle ps_cbr_buffer,
+ float f_complexity_peak_rate,
+ float f_cur_bits_complexity,
+ WORD32 bit_alloc_period,
+ WORD32 i4_num_gops_for_excess);
+
+rc_type_e get_rc_type(cbr_buffer_handle ps_cbr_buffer);
+
+void cbr_modify_ebf_estimate(cbr_buffer_handle ps_cbr_buffer, WORD32 i4_bit_error);
+
+UWORD32 cbr_get_delay_frames(cbr_buffer_handle ps_cbr_buffer);
+#endif /* CBR_BUFFER_CONTROL_H */
diff --git a/encoder/common_rom.c b/encoder/common_rom.c
new file mode 100644
index 0000000..d193f90
--- /dev/null
+++ b/encoder/common_rom.c
@@ -0,0 +1,1073 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file common_rom.c
+*
+* \brief
+* This file contain square root Table
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <stdlib.h>
+
+/* User include files */
+#include "ia_type_def.h"
+
+const WORD32 gi4_sqrt_tab[513] = /*sqrt_tab in Q15*/
+ {
+
+ 1073741824,
+
+ 1074789888,
+
+ 1075836928,
+
+ 1076882944,
+
+ 1077927936,
+
+ 1078971904,
+
+ 1080014976,
+
+ 1081056896,
+
+ 1082097920,
+
+ 1083137920,
+
+ 1084176896,
+
+ 1085214848,
+
+ 1086251904,
+
+ 1087287808,
+
+ 1088322944,
+
+ 1089356928,
+
+ 1090390016,
+
+ 1091422080,
+
+ 1092453120,
+
+ 1093483264,
+
+ 1094512512,
+
+ 1095540608,
+
+ 1096567936,
+
+ 1097594112,
+
+ 1098619392,
+
+ 1099643776,
+
+ 1100667264,
+
+ 1101689600,
+
+ 1102711168,
+
+ 1103731712,
+
+ 1104751360,
+
+ 1105769984,
+
+ 1106787712,
+
+ 1107804544,
+
+ 1108820352,
+
+ 1109835392,
+
+ 1110849408,
+
+ 1111862400,
+
+ 1112874624,
+
+ 1113885824,
+
+ 1114896128,
+
+ 1115905536,
+
+ 1116914048,
+
+ 1117921664,
+
+ 1118928384,
+
+ 1119934208,
+
+ 1120939008,
+
+ 1121943040,
+
+ 1122946048,
+
+ 1123948288,
+
+ 1124949504,
+
+ 1125949952,
+
+ 1126949504,
+
+ 1127948032,
+
+ 1128945792,
+
+ 1129942656,
+
+ 1130938624,
+
+ 1131933824,
+
+ 1132928000,
+
+ 1133921408,
+
+ 1134913920,
+
+ 1135905536,
+
+ 1136896256,
+
+ 1137886208,
+
+ 1138875136,
+
+ 1139863424,
+
+ 1140850688,
+
+ 1141837184,
+
+ 1142822784,
+
+ 1143807488,
+
+ 1144791424,
+
+ 1145774592,
+
+ 1146756736,
+
+ 1147738112,
+
+ 1148718720,
+
+ 1149698432,
+
+ 1150677376,
+
+ 1151655424,
+
+ 1152632576,
+
+ 1153608960,
+
+ 1154584576,
+
+ 1155559296,
+
+ 1156533248,
+
+ 1157506304,
+
+ 1158478592,
+
+ 1159450112,
+
+ 1160420736,
+
+ 1161390592,
+
+ 1162359680,
+
+ 1163327872,
+
+ 1164295296,
+
+ 1165261952,
+
+ 1166227712,
+
+ 1167192704,
+
+ 1168156928,
+
+ 1169120384,
+
+ 1170083072,
+
+ 1171044864,
+
+ 1172005888,
+
+ 1172966144,
+
+ 1173925632,
+
+ 1174884352,
+
+ 1175842304,
+
+ 1176799488,
+
+ 1177755776,
+
+ 1178711424,
+
+ 1179666176,
+
+ 1180620160,
+
+ 1181573504,
+
+ 1182525952,
+
+ 1183477760,
+
+ 1184428672,
+
+ 1185378816,
+
+ 1186328320,
+
+ 1187277056,
+
+ 1188224896,
+
+ 1189172096,
+
+ 1190118528,
+
+ 1191064192,
+
+ 1192009088,
+
+ 1192953216,
+
+ 1193896704,
+
+ 1194839296,
+
+ 1195781248,
+
+ 1196722432,
+
+ 1197662976,
+
+ 1198602624,
+
+ 1199541632,
+
+ 1200479872,
+
+ 1201417344,
+
+ 1202354176,
+
+ 1203290240,
+
+ 1204225536,
+
+ 1205160064,
+
+ 1206093952,
+
+ 1207027072,
+
+ 1207959552,
+
+ 1208891264,
+
+ 1209822208,
+
+ 1210752512,
+
+ 1211682048,
+
+ 1212610944,
+
+ 1213539072,
+
+ 1214466560,
+
+ 1215393280,
+
+ 1216319232,
+
+ 1217244544,
+
+ 1218169088,
+
+ 1219092992,
+
+ 1220016256,
+
+ 1220938752,
+
+ 1221860608,
+
+ 1222781696,
+
+ 1223702144,
+
+ 1224621824,
+
+ 1225540864,
+
+ 1226459264,
+
+ 1227376896,
+
+ 1228293888,
+
+ 1229210240,
+
+ 1230125824,
+
+ 1231040768,
+
+ 1231954944,
+
+ 1232868608,
+
+ 1233781504,
+
+ 1234693632,
+
+ 1235605248,
+
+ 1236516096,
+
+ 1237426304,
+
+ 1238335872,
+
+ 1239244672,
+
+ 1240152960,
+
+ 1241060480,
+
+ 1241967360,
+
+ 1242873600,
+
+ 1243779072,
+
+ 1244684032,
+
+ 1245588224,
+
+ 1246491776,
+
+ 1247394816,
+
+ 1248297088,
+
+ 1249198592,
+
+ 1250099584,
+
+ 1250999936,
+
+ 1251899648,
+
+ 1252798592,
+
+ 1253697024,
+
+ 1254594816,
+
+ 1255491840,
+
+ 1256388352,
+
+ 1257284224,
+
+ 1258179328,
+
+ 1259073920,
+
+ 1259967744,
+
+ 1260861056,
+
+ 1261753728,
+
+ 1262645760,
+
+ 1263537152,
+
+ 1264427904,
+
+ 1265318016,
+
+ 1266207488,
+
+ 1267096448,
+
+ 1267984640,
+
+ 1268872320,
+
+ 1269759232,
+
+ 1270645632,
+
+ 1271531520,
+
+ 1272416640,
+
+ 1273301120,
+
+ 1274185088,
+
+ 1275068416,
+
+ 1275951104,
+
+ 1276833280,
+
+ 1277714688,
+
+ 1278595584,
+
+ 1279475840,
+
+ 1280355584,
+
+ 1281234560,
+
+ 1282113024,
+
+ 1282990848,
+
+ 1283868160,
+
+ 1284744832,
+
+ 1285620864,
+
+ 1286496384,
+
+ 1287371264,
+
+ 1288245504,
+
+ 1289119232,
+
+ 1289992320,
+
+ 1290864768,
+
+ 1291736704,
+
+ 1292608000,
+
+ 1293478784,
+
+ 1294348928,
+
+ 1295218432,
+
+ 1296087424,
+
+ 1296955904,
+
+ 1297823616,
+
+ 1298690944,
+
+ 1299557504,
+
+ 1300423680,
+
+ 1301289216,
+
+ 1302154112,
+
+ 1303018496,
+
+ 1303882240,
+
+ 1304745472,
+
+ 1305608064,
+
+ 1306470144,
+
+ 1307331712,
+
+ 1308192640,
+
+ 1309052928,
+
+ 1309912704,
+
+ 1310771968,
+
+ 1311630720,
+
+ 1312488832,
+
+ 1313346304,
+
+ 1314203392,
+
+ 1315059840,
+
+ 1315915648,
+
+ 1316770944,
+
+ 1317625728,
+
+ 1318480000,
+
+ 1319333632,
+
+ 1320186752,
+
+ 1321039360,
+
+ 1321891328,
+
+ 1322742784,
+
+ 1323593728,
+
+ 1324444032,
+
+ 1325293824,
+
+ 1326143104,
+
+ 1326991872,
+
+ 1327840128,
+
+ 1328687744,
+
+ 1329534848,
+
+ 1330381440,
+
+ 1331227392,
+
+ 1332072960,
+
+ 1332917888,
+
+ 1333762304,
+
+ 1334606208,
+
+ 1335449472,
+
+ 1336292352,
+
+ 1337134592,
+
+ 1337976448,
+
+ 1338817664,
+
+ 1339658368,
+
+ 1340498560,
+
+ 1341338112,
+
+ 1342177280,
+
+ 1343015936,
+
+ 1343853952,
+
+ 1344691456,
+
+ 1345528576,
+
+ 1346365056,
+
+ 1347201024,
+
+ 1348036480,
+
+ 1348871424,
+
+ 1349705856,
+
+ 1350539776,
+
+ 1351373184,
+
+ 1352206080,
+
+ 1353038464,
+
+ 1353870336,
+
+ 1354701696,
+
+ 1355532544,
+
+ 1356363008,
+
+ 1357192832,
+
+ 1358022144,
+
+ 1358850944,
+
+ 1359679232,
+
+ 1360507008,
+
+ 1361334400,
+
+ 1362161152,
+
+ 1362987520,
+
+ 1363813248,
+
+ 1364638592,
+
+ 1365463424,
+
+ 1366287744,
+
+ 1367111552,
+
+ 1367934848,
+
+ 1368757632,
+
+ 1369579904,
+
+ 1370401792,
+
+ 1371223168,
+
+ 1372043904,
+
+ 1372864384,
+
+ 1373684224,
+
+ 1374503552,
+
+ 1375322496,
+
+ 1376140800,
+
+ 1376958720,
+
+ 1377776256,
+
+ 1378593152,
+
+ 1379409536,
+
+ 1380225536,
+
+ 1381041024,
+
+ 1381856128,
+
+ 1382670592,
+
+ 1383484672,
+
+ 1384298240,
+
+ 1385111296,
+
+ 1385923968,
+
+ 1386736128,
+
+ 1387547776,
+
+ 1388358912,
+
+ 1389169664,
+
+ 1389979904,
+
+ 1390789760,
+
+ 1391598976,
+
+ 1392407808,
+
+ 1393216256,
+
+ 1394024064,
+
+ 1394831488,
+
+ 1395638528,
+
+ 1396445056,
+
+ 1397251072,
+
+ 1398056576,
+
+ 1398861696,
+
+ 1399666304,
+
+ 1400470528,
+
+ 1401274240,
+
+ 1402077440,
+
+ 1402880256,
+
+ 1403682560,
+
+ 1404484480,
+
+ 1405285888,
+
+ 1406086912,
+
+ 1406887296,
+
+ 1407687424,
+
+ 1408487040,
+
+ 1409286144,
+
+ 1410084864,
+
+ 1410883072,
+
+ 1411680896,
+
+ 1412478208,
+
+ 1413275136,
+
+ 1414071552,
+
+ 1414867456,
+
+ 1415662976,
+
+ 1416458112,
+
+ 1417252736,
+
+ 1418046976,
+
+ 1418840704,
+
+ 1419634048,
+
+ 1420426880,
+
+ 1421219328,
+
+ 1422011392,
+
+ 1422802816,
+
+ 1423593984,
+
+ 1424384640,
+
+ 1425174912,
+
+ 1425964672,
+
+ 1426754048,
+
+ 1427542912,
+
+ 1428331392,
+
+ 1429119488,
+
+ 1429907072,
+
+ 1430694272,
+
+ 1431480960,
+
+ 1432267264,
+
+ 1433053184,
+
+ 1433838592,
+
+ 1434623616,
+
+ 1435408256,
+
+ 1436192384,
+
+ 1436976128,
+
+ 1437759488,
+
+ 1438542336,
+
+ 1439324800,
+
+ 1440106752,
+
+ 1440888448,
+
+ 1441669632,
+
+ 1442450304,
+
+ 1443230720,
+
+ 1444010624,
+
+ 1444790144,
+
+ 1445569152,
+
+ 1446347776,
+
+ 1447126016,
+
+ 1447903872,
+
+ 1448681216,
+
+ 1449458304,
+
+ 1450234880,
+
+ 1451010944,
+
+ 1451786752,
+
+ 1452562048,
+
+ 1453336960,
+
+ 1454111360,
+
+ 1454885504,
+
+ 1455659136,
+
+ 1456432384,
+
+ 1457205248,
+
+ 1457977728,
+
+ 1458749696,
+
+ 1459521408,
+
+ 1460292608,
+
+ 1461063424,
+
+ 1461833728,
+
+ 1462603776,
+
+ 1463373312,
+
+ 1464142592,
+
+ 1464911360,
+
+ 1465679744,
+
+ 1466447744,
+
+ 1467215232,
+
+ 1467982464,
+
+ 1468749184,
+
+ 1469515520,
+
+ 1470281600,
+
+ 1471047168,
+
+ 1471812352,
+
+ 1472577024,
+
+ 1473341440,
+
+ 1474105472,
+
+ 1474868992,
+
+ 1475632256,
+
+ 1476395008,
+
+ 1477157376,
+
+ 1477919360,
+
+ 1478681088,
+
+ 1479442304,
+
+ 1480203136,
+
+ 1480963584,
+
+ 1481723648,
+
+ 1482483200,
+
+ 1483242496,
+
+ 1484001408,
+
+ 1484759936,
+
+ 1485518080,
+
+ 1486275712,
+
+ 1487033088,
+
+ 1487790080,
+
+ 1488546560,
+
+ 1489302784,
+
+ 1490058624,
+
+ 1490813952,
+
+ 1491569024,
+
+ 1492323712,
+
+ 1493078016,
+
+ 1493831808,
+
+ 1494585344,
+
+ 1495338496,
+
+ 1496091264,
+
+ 1496843648,
+
+ 1497595648,
+
+ 1498347264,
+
+ 1499098496,
+
+ 1499849344,
+
+ 1500599808,
+
+ 1501349888,
+
+ 1502099712,
+
+ 1502849024,
+
+ 1503598080,
+
+ 1504346624,
+
+ 1505094912,
+
+ 1505842816,
+
+ 1506590208,
+
+ 1507337344,
+
+ 1508084096,
+
+ 1508830592,
+
+ 1509576576,
+
+ 1510322176,
+
+ 1511067520,
+
+ 1511812480,
+
+ 1512556928,
+
+ 1513301120,
+
+ 1514044928,
+
+ 1514788480,
+
+ 1515531520,
+
+ 1516274304,
+
+ 1517016576,
+
+ 1517758592,
+
+ 1518500249
+
+ };
diff --git a/encoder/common_rom.h b/encoder/common_rom.h
new file mode 100644
index 0000000..e892a00
--- /dev/null
+++ b/encoder/common_rom.h
@@ -0,0 +1,48 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file common_rom.h
+*
+* \brief
+* This file contain square root table declarations
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+#ifndef SQRT_TAB_H
+#define SQRT_TAB_H
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+#define INV_SQRT_2_Q31 1518500250 /* 1/sqrt(2) in Q31 */
+#define INV_SQRT_2_Q15 23170
+
+#define Q_SQRT_TAB 15
+
+extern const WORD32 gi4_sqrt_tab[513]; /*sqrt_tab in Q15*/
+
+#endif
diff --git a/encoder/convert_float_to_fix.c b/encoder/convert_float_to_fix.c
new file mode 100644
index 0000000..f1be8ae
--- /dev/null
+++ b/encoder/convert_float_to_fix.c
@@ -0,0 +1,76 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file convert_float_to_fix.c
+*
+* \brief
+* This file contain float to fix and fix to float conversion function
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+#include <string.h>
+
+/* User include files */
+#include "ia_type_def.h"
+/* #include "num_struct.h" */
+#include "var_q_operator.h"
+
+#define ABS(x) (((x) > 0) ? (x) : (-(x)))
+
+void convert_float_to_fix(float a_f, number_t *a)
+{
+ double log_a_f;
+ if(a_f != 0)
+ {
+ log_a_f = log(ABS(a_f)) / log(2);
+
+ a->e = 30 - (WORD32)ceil(log_a_f);
+ a->sm = (WORD32)(a_f * pow(2, a->e));
+ }
+ else
+ {
+ a->e = 0;
+ a->sm = 0;
+ }
+}
+
+void convert_fix_to_float(number_t a, float *a_f)
+{
+ *a_f = (float)(a.sm / pow(2, a.e));
+}
+
+#ifdef ITT_C6678
+#pragma CODE_SECTION(convert_fix_to_float, "itt_varq_l1pram");
+#pragma CODE_SECTION(convert_float_to_fix, "itt_varq_l1pram");
+#endif
diff --git a/encoder/defs.h b/encoder/defs.h
new file mode 100644
index 0000000..29d4ac9
--- /dev/null
+++ b/encoder/defs.h
@@ -0,0 +1,44 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file defs.h
+*
+* \brief
+* This file contain declarations of constant 32bit value in macro
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+
+#ifndef DEFS_H
+#define DEFS_H
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define MAX_32 0x7fffffff
+#define MIN_32 0x80000000
+
+#endif
diff --git a/encoder/est_sad.c b/encoder/est_sad.c
new file mode 100644
index 0000000..87fe5a1
--- /dev/null
+++ b/encoder/est_sad.c
@@ -0,0 +1,319 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file est_sad.c
+*
+* \brief
+* This file contain sad estimation related functions
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* User include files */
+#include "ittiam_datatypes.h"
+#include "rc_common.h"
+#include "rc_cntrl_param.h"
+#include "var_q_operator.h"
+#include "mem_req_and_acq.h"
+#include "est_sad.h"
+
+typedef struct est_sad_t
+{
+ WORD32 i4_use_est_intra_sad;
+ UWORD32 au4_prev_frm_sad[MAX_PIC_TYPE]; /* Previous frame SAD */
+ UWORD32 u4_n_p_frm_ifi_avg_sad; /* Current (nth) ifi average P frame SAD */
+ UWORD32 u4_n_1_p_frm_ifi_avg_sad; /* (n-1)th ifi average P frame SAD */
+ UWORD32 u4_n_2_p_frm_ifi_avg_sad; /* (n-2)th ifi average P frame SAD */
+ WORD32 i4_num_ifi_encoded; /* number of ifi encoded till now */
+ WORD32 i4_num_p_frm_in_cur_ifi; /* number of P frames in the current IFI */
+} est_sad_t;
+
+#if NON_STEADSTATE_CODE
+WORD32 est_sad_num_fill_use_free_memtab(
+ est_sad_t **pps_est_sad, itt_memtab_t *ps_memtab, ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static est_sad_t s_est_sad;
+
+ /* Hack for al alloc, during which we dont have any state memory.
+ Dereferencing can cause issues */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_est_sad) = &s_est_sad;
+
+ /*for src rate control state structure*/
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(
+ &ps_memtab[i4_mem_tab_idx], sizeof(est_sad_t), MEM_TAB_ALIGNMENT, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void **)pps_est_sad, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ return (i4_mem_tab_idx);
+}
+#endif /* #if NON_STEADSTATE_CODE */
+/****************************************************************************
+Function Name : init_est_sad
+Description :
+Inputs : ps_est_sad
+ i4_use_est_intra_sad
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+void init_est_sad(est_sad_t *ps_est_sad, WORD32 i4_use_est_intra_sad)
+{
+ WORD32 i;
+ ps_est_sad->i4_use_est_intra_sad = i4_use_est_intra_sad;
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ ps_est_sad->au4_prev_frm_sad[i] = 0;
+
+ ps_est_sad->u4_n_p_frm_ifi_avg_sad = 0;
+ ps_est_sad->u4_n_1_p_frm_ifi_avg_sad = 0;
+ ps_est_sad->u4_n_2_p_frm_ifi_avg_sad = 0;
+ ps_est_sad->i4_num_ifi_encoded = 0;
+ ps_est_sad->i4_num_p_frm_in_cur_ifi = 0;
+}
+/****************************************************************************
+Function Name : reset_est_sad
+Description :
+Inputs : ps_est_sad
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+void reset_est_sad(est_sad_t *ps_est_sad)
+{
+ init_est_sad(ps_est_sad, ps_est_sad->i4_use_est_intra_sad);
+}
+
+/****************************************************************************
+Function Name : get_est_sad
+Description :
+Inputs : ps_est_sad
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+/*
+Get estimated SAD can be called at any point. The various use cases are:
+1) When a I frame is getting encoded,
+ - get the estimated of P => No issues since we use the last coded P frame value
+ - get estimated of I => This call for two cases:
+ => a) if num_ifi_encoded is less than 2
+ then return the previous encoded I frame sad
+ => b) if num_ifi_encoded is more than 2, then we scale
+ the prev I sad by the ratio of (n-1) ifi P to n-2 ifi P
+2) When P frame is getting encoded,
+ - get the estimated of P => No issues since we use the last coded P frame value
+ - get the estimated of I => Simillar to I we have two cases. To handle the b) case
+ extra logic had to introduced using
+ u1_is_n_1_p_frm_ifi_avg_sad_usable flag
+*/
+UWORD32 get_est_sad(est_sad_t *ps_est_sad, picture_type_e e_pic_type)
+{
+ if(ps_est_sad->i4_use_est_intra_sad)
+ {
+ UWORD32 u4_estimated_sad;
+ if(e_pic_type == P_PIC)
+ {
+ u4_estimated_sad = ps_est_sad->au4_prev_frm_sad[P_PIC];
+ }
+ else if(e_pic_type == B_PIC)
+ {
+ u4_estimated_sad = ps_est_sad->au4_prev_frm_sad[B_PIC];
+ }
+ else
+ {
+ if(ps_est_sad->i4_num_ifi_encoded < 2)
+ {
+ /* Only one IFI has been encoded and so use the previous I frames SAD */
+ u4_estimated_sad = ps_est_sad->au4_prev_frm_sad[I_PIC];
+ }
+ else
+ {
+ /* Since the n-1 'P' frame IFI would have just accumulated the frame sads
+ we average it out here */
+ UWORD32 u4_n_1_p_frm_ifi_avg_sad, u4_n_2_p_frm_ifi_avg_sad;
+ number_t vq_n_1_p_frm_ifi_avg_sad, vq_n_2_p_frm_ifi_avg_sad;
+ number_t vq_prev_frm_sad_i;
+ /* If there are frames in the current IFI start using it to estimate the I frame SAD */
+ if(ps_est_sad->i4_num_p_frm_in_cur_ifi)
+ {
+ u4_n_1_p_frm_ifi_avg_sad =
+ (ps_est_sad->u4_n_p_frm_ifi_avg_sad / ps_est_sad->i4_num_p_frm_in_cur_ifi);
+ u4_n_2_p_frm_ifi_avg_sad = ps_est_sad->u4_n_1_p_frm_ifi_avg_sad;
+ }
+ else
+ {
+ u4_n_1_p_frm_ifi_avg_sad = ps_est_sad->u4_n_1_p_frm_ifi_avg_sad;
+ u4_n_2_p_frm_ifi_avg_sad = ps_est_sad->u4_n_2_p_frm_ifi_avg_sad;
+ }
+ /* If any of the previous p frame SADs are zeros we just return the previous
+ I frame SAD */
+ if(u4_n_1_p_frm_ifi_avg_sad && u4_n_2_p_frm_ifi_avg_sad)
+ {
+ SET_VAR_Q(vq_prev_frm_sad_i, ps_est_sad->au4_prev_frm_sad[I_PIC], 0);
+ SET_VAR_Q(vq_n_1_p_frm_ifi_avg_sad, u4_n_1_p_frm_ifi_avg_sad, 0);
+ SET_VAR_Q(vq_n_2_p_frm_ifi_avg_sad, u4_n_2_p_frm_ifi_avg_sad, 0);
+ /**************************************************************************
+ Estimated SAD =
+ (n-1)th intra frame interval(ifi) P frame Avg SAD *
+ (prev I frame SAD / (n-2)nd intra frame interval(ifi) P frame Avg SAD)
+ **************************************************************************/
+ mult32_var_q(vq_prev_frm_sad_i, vq_n_1_p_frm_ifi_avg_sad, &vq_prev_frm_sad_i);
+ div32_var_q(vq_prev_frm_sad_i, vq_n_2_p_frm_ifi_avg_sad, &vq_prev_frm_sad_i);
+ number_t_to_word32(vq_prev_frm_sad_i, (WORD32 *)&u4_estimated_sad);
+ }
+ else
+ {
+ u4_estimated_sad = ps_est_sad->au4_prev_frm_sad[I_PIC];
+ }
+ }
+ }
+ return u4_estimated_sad;
+ }
+ else
+ {
+ return ps_est_sad->au4_prev_frm_sad[e_pic_type];
+ }
+}
+/****************************************************************************
+Function Name : update_ppic_sad
+Description :
+Inputs : ps_est_sad
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+WORD32 update_ppic_sad(est_sad_t *ps_est_sad, WORD32 i4_est_sad, WORD32 i4_prev_p_sad)
+{
+ i4_est_sad = ((ps_est_sad->au4_prev_frm_sad[P_PIC]) * ((i4_est_sad << 4) / i4_prev_p_sad)) >> 4;
+ /* printf("i4_est_sad=%d prev_psad=%d\n",i4_est_sad,ps_est_sad->au4_prev_frm_sad[P_PIC]); */
+ if(i4_est_sad > (WORD32)ps_est_sad->au4_prev_frm_sad[P_PIC])
+ {
+ if(4 * i4_est_sad > 5 * i4_prev_p_sad)
+ i4_est_sad = (5 * i4_prev_p_sad) >> 2;
+ ps_est_sad->au4_prev_frm_sad[P_PIC] = i4_est_sad;
+ return 0;
+ }
+ return 1;
+}
+/****************************************************************************
+Function Name : update_actual_sad
+Description :
+Inputs : ps_est_sad
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+void update_actual_sad(est_sad_t *ps_est_sad, UWORD32 u4_actual_sad, picture_type_e e_pic_type)
+{
+ ps_est_sad->au4_prev_frm_sad[e_pic_type] = u4_actual_sad;
+
+ if(ps_est_sad->i4_use_est_intra_sad)
+ {
+ if(e_pic_type == I_PIC)
+ {
+ /* The requirement is to have two IFI before estimating I frame SAD */
+ if(ps_est_sad->i4_num_ifi_encoded < 2)
+ ps_est_sad->i4_num_ifi_encoded++;
+
+ /* Calculate the average SAD */
+ if(ps_est_sad->i4_num_p_frm_in_cur_ifi)
+ {
+ ps_est_sad->u4_n_p_frm_ifi_avg_sad /= ps_est_sad->i4_num_p_frm_in_cur_ifi;
+ }
+ else
+ {
+ ps_est_sad->u4_n_p_frm_ifi_avg_sad = 0;
+ }
+ /* Push the (n-1)th average SAD to the (n-2)th average SAD */
+ ps_est_sad->u4_n_2_p_frm_ifi_avg_sad = ps_est_sad->u4_n_1_p_frm_ifi_avg_sad;
+ /* Push the nth average SAD to the (n-1)th average SAD */
+ ps_est_sad->u4_n_1_p_frm_ifi_avg_sad = ps_est_sad->u4_n_p_frm_ifi_avg_sad;
+ /* Reset SAD and number of P frames */
+ ps_est_sad->u4_n_p_frm_ifi_avg_sad = 0;
+ ps_est_sad->i4_num_p_frm_in_cur_ifi = 0;
+ }
+ else
+ {
+ ps_est_sad->u4_n_p_frm_ifi_avg_sad += u4_actual_sad;
+ ps_est_sad->i4_num_p_frm_in_cur_ifi++;
+ }
+ }
+}
+/****************************************************************************
+Function Name : update_prev_frame_intra_sad
+Description :
+Inputs : ps_est_sad
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+void update_prev_frame_intra_sad(est_sad_t *ps_est_sad, WORD32 i4_intra_frm_sad)
+{
+ ps_est_sad->au4_prev_frm_sad[I_PIC] = i4_intra_frm_sad;
+}
+/****************************************************************************
+Function Name : get_prev_frame_intra_sad
+Description :
+Inputs : ps_est_sad
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+WORD32 get_prev_frame_intra_sad(est_sad_t *ps_est_sad)
+{
+ return ps_est_sad->au4_prev_frm_sad[I_PIC];
+}
+/****************************************************************************
+Function Name : update_prev_frame_sad
+Description :
+Inputs : ps_est_sad
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+void update_prev_frame_sad(est_sad_t *ps_est_sad, WORD32 i4_frm_sad, picture_type_e e_pic_type)
+{
+ ps_est_sad->au4_prev_frm_sad[e_pic_type] = i4_frm_sad;
+}
+/****************************************************************************
+Function Name : get_prev_frame_sad
+Description :
+Inputs : ps_est_sad
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+WORD32 get_prev_frame_sad(est_sad_t *ps_est_sad, picture_type_e e_pic_type)
+{
+ return ps_est_sad->au4_prev_frm_sad[e_pic_type];
+}
diff --git a/encoder/est_sad.h b/encoder/est_sad.h
new file mode 100644
index 0000000..bba8c53
--- /dev/null
+++ b/encoder/est_sad.h
@@ -0,0 +1,88 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file est_sad.h
+*
+* \brief
+* This file contain SAD estimation functions declarations
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _EST_SAD_H_
+#define _EST_SAD_H_
+
+/* "est_sad_t->i4_use_est_intra_sad" Flag to control how the I frame SAD is estimated.
+If set to zero
+ - it uses the Intra sad calculated by the previous P frame as
+ the estimated sad for the current I frame
+else
+ - it uses the ratio of P frame sads of the previous two GOPS and
+ scales the I Frame sad with this ratio to estimate the current
+ I frame SAD
+*/
+
+/* Estimating the Average SAD for the current picture type is done by:
+1) if picture_type is I
+ - Estimated SAD = (n-1)th intra frame interval(ifi) P frame Avg SAD *
+ ( prev I frame SAD / (n-2)nd intra frame interval(ifi) P frame Avg SAD)
+ - if only one IFI is encoded use the previous I frame SAD
+2) if picture type is P
+ - Estimate SAD is previous P frame SAD
+3) The first P frame in a IFI could use a little better logic to decide the
+ estimated SAD but currently we assume the last coded P frames SAD */
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+typedef struct est_sad_t *est_sad_handle;
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+WORD32 est_sad_num_fill_use_free_memtab(
+ est_sad_handle *est_sad, itt_memtab_t *ps_memtab, ITT_FUNC_TYPE_E e_func_type);
+
+void init_est_sad(est_sad_handle est_sad, WORD32 i4_use_est_frame_sad);
+
+UWORD32 get_est_sad(est_sad_handle est_sad, picture_type_e e_pic_type);
+
+void update_actual_sad(est_sad_handle est_sad, UWORD32 u4_actual_sad, picture_type_e e_pic_type);
+
+void update_actual_sad_for_intra(est_sad_handle est_sad, WORD32 i4_intra_frm_cost);
+
+void update_prev_frame_intra_sad(est_sad_handle ps_est_sad, WORD32 i4_intra_frm_sad);
+
+WORD32 get_prev_frame_intra_sad(est_sad_handle ps_est_sad);
+
+void update_prev_frame_sad(est_sad_handle ps_est_sad, WORD32 i4_frm_sad, picture_type_e e_pic_type);
+
+WORD32 get_prev_frame_sad(est_sad_handle ps_est_sad, picture_type_e e_pic_type);
+
+void reset_est_sad(est_sad_handle ps_est_sad);
+
+WORD32 update_ppic_sad(est_sad_handle est_sad, WORD32 i4_est_sad, WORD32 i4_prev_p_sad);
+#endif
diff --git a/encoder/fixed_point_error_bits.c b/encoder/fixed_point_error_bits.c
new file mode 100644
index 0000000..0964303
--- /dev/null
+++ b/encoder/fixed_point_error_bits.c
@@ -0,0 +1,248 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file fixed_point_error_bits.c
+*
+* \brief
+* This file contain error bits processing functions
+*
+* \date
+* 15/02/2012
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+
+/* User include files */
+#include "ittiam_datatypes.h"
+#include "rc_common.h"
+#include "rc_cntrl_param.h"
+#include "var_q_operator.h"
+#include "mem_req_and_acq.h"
+#include "fixed_point_error_bits.h"
+
+/*i4_max_tgt_frm_rate and i4_tgt_frm_rate hold same value so removing one*/
+typedef struct error_bits_t
+{
+ /* WORD32 i4_max_tgt_frm_rate; */ /*Max tgt frm rate so that dynamic change infrm rate can be handled */
+ WORD32 i4_accum_frm_rate; /* Cur frm rate */
+ WORD32 i4_tgt_frm_rate; /* tgt frame rate*/
+ WORD32 i4_tgt_frm_rate_incr; /* tgt frm rate increment */
+ UWORD8 u1_compute_error_bits; /* flag to indicate 1 second is up*/
+ WORD32 i4_accum_bitrate; /* Bitrate/frame rate value added over a period*/
+ WORD32 i4_bitrate; /* bitrate */
+} error_bits_t;
+
+#if NON_STEADSTATE_CODE
+WORD32 error_bits_num_fill_use_free_memtab(
+ error_bits_t **pps_error_bits, itt_memtab_t *ps_memtab, ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static error_bits_t s_error_bits_temp;
+
+ /* Hack for al alloc, during which we dont have any state memory.
+ Dereferencing can cause issues */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_error_bits) = &s_error_bits_temp;
+
+ /*for src rate control state structure*/
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(
+ &ps_memtab[i4_mem_tab_idx], sizeof(error_bits_t), MEM_TAB_ALIGNMENT, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void **)pps_error_bits, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ return (i4_mem_tab_idx);
+}
+
+/* ******************************************************************************/
+/**
+ * @brief Calculates the error bits due to fixed point divisions
+ *
+ * @param ps_error_bits
+ * @param i4_tgt_frm_rate
+ * @param i4_bitrate
+ */
+/* ******************************************************************************/
+void init_error_bits(error_bits_t *ps_error_bits, WORD32 i4_tgt_frm_rate, WORD32 i4_bitrate)
+{
+ /* This value is incremented every at the end of every VOP by i4_tgt_frm_rate_incr*/
+ /* Initializing the parameters*/
+ ps_error_bits->i4_accum_frm_rate = 0;
+ ps_error_bits->i4_tgt_frm_rate = i4_tgt_frm_rate;
+
+ /* Value by which i4_accum_frm_rate is incremented every VOP*/
+ ps_error_bits->i4_tgt_frm_rate_incr = 1000;
+
+ /*Compute error bits is set to 1 at the end of 1 second*/
+ ps_error_bits->u1_compute_error_bits = 0;
+ ps_error_bits->i4_tgt_frm_rate = i4_tgt_frm_rate;
+ ps_error_bits->i4_accum_bitrate = 0;
+ ps_error_bits->i4_bitrate = i4_bitrate;
+}
+
+#endif /* #if NON_STEADSTATE_CODE */
+
+/* ******************************************************************************/
+/**
+ * @brief Updates the error state
+ *
+ * @param ps_error_bits
+ */
+/* ******************************************************************************/
+void update_error_bits(error_bits_t *ps_error_bits)
+{
+ WORD32 i4_bits_per_frame;
+
+ X_PROD_Y_DIV_Z(
+ ps_error_bits->i4_bitrate, 1000, ps_error_bits->i4_tgt_frm_rate, i4_bits_per_frame);
+
+ if(ps_error_bits->u1_compute_error_bits == 1)
+ {
+ ps_error_bits->i4_accum_bitrate = 0;
+ //ps_error_bits->i4_accum_frm_rate -= ps_error_bits->i4_tgt_frm_rate;
+ ps_error_bits->i4_accum_frm_rate = 0;
+ }
+ /* This value is incremented every at the end of every VOP by
+ i4_tgt_frm_rate_incr*/
+ ps_error_bits->i4_accum_frm_rate += ps_error_bits->i4_tgt_frm_rate_incr;
+ ps_error_bits->i4_accum_bitrate += i4_bits_per_frame;
+
+ /* When current tgt frm rate is equal or greater than max tgt fram rate
+ 1 second is up , compute the error bits */
+ if(ps_error_bits->i4_accum_frm_rate >= ps_error_bits->i4_tgt_frm_rate)
+ {
+ /* ps_error_bits->i4_accum_frm_rate -= ps_error_bits->i4_tgt_frm_rate; */
+ ps_error_bits->u1_compute_error_bits = 1;
+ }
+ else
+ {
+ ps_error_bits->u1_compute_error_bits = 0;
+ }
+}
+
+/* ******************************************************************************/
+/**
+ * @brief Returns the error bits for the current frame if there are any
+ *
+ * @param ps_error_bits
+ *
+ * @return
+ */
+/* ******************************************************************************/
+WORD32 get_error_bits(error_bits_t *ps_error_bits)
+{
+ WORD32 i4_error_bits = 0;
+ /*If 1s is up calcualte error for the last 1s worth fo frames*/
+ if(ps_error_bits->u1_compute_error_bits == 1)
+ {
+ WORD32 i4_cur_bitrate;
+ WORD32 i4_cur_frame_rate = ps_error_bits->i4_accum_frm_rate;
+ /* For frame rates like 29.970, the current frame rate would be a multiple of
+ 1000 and every 100 seconds 3 frames would be dropped. So the error should be
+ calculated based on actual frame rate. So for e.g. the iteration, there would be
+ 30 frames and so the bits allocated would be (30/29.970)*bitrate */
+ X_PROD_Y_DIV_Z(
+ ps_error_bits->i4_bitrate,
+ i4_cur_frame_rate,
+ ps_error_bits->i4_tgt_frm_rate,
+ i4_cur_bitrate);
+ /*Error = Actual bitrate - bits_per_frame * num of frames*/
+ i4_error_bits = i4_cur_bitrate - ps_error_bits->i4_accum_bitrate;
+ }
+ return (i4_error_bits);
+}
+/* ******************************************************************************/
+/**
+ * @brief Change the bitrate value for error bits module
+ *
+ * @param ps_error_bits
+ * @param i4_bitrate
+ */
+/* ******************************************************************************/
+void change_bitrate_in_error_bits(error_bits_t *ps_error_bits, WORD32 i4_bitrate)
+{
+ /*here accum_bitrate would have accumulated the value based on old bit rate. after one second is elapsed
+ * the error is calcluated based on new bit rate which would result in huge error value. to avoid this
+ * accum_bitrate value is recalculated assuming new bitrate.
+ */
+ /*#ifdef DYNAMIC_RC*/
+ WORD32 i4_old_bits_per_frame;
+ WORD32 i4_new_bits_per_frame;
+ WORD32 i4_frame_count;
+ X_PROD_Y_DIV_Z(
+ ps_error_bits->i4_bitrate, 1000, ps_error_bits->i4_tgt_frm_rate, i4_old_bits_per_frame);
+ i4_frame_count = ps_error_bits->i4_accum_bitrate / i4_old_bits_per_frame;
+ X_PROD_Y_DIV_Z(i4_bitrate, 1000, ps_error_bits->i4_tgt_frm_rate, i4_new_bits_per_frame);
+ ps_error_bits->i4_accum_bitrate = i4_frame_count * i4_new_bits_per_frame;
+
+ /*#endif*/
+ /*change bit rate*/
+ ps_error_bits->i4_bitrate = i4_bitrate;
+ /* ps_error_bits->i4_accum_bitrate=i4_bitrate;*/
+}
+/* ******************************************************************************/
+/**
+ * @brief Change the frame rate parameter for the error bits state
+ *
+ * @param ps_error_bits
+ * @param i4_tgt_frm_rate
+ */
+/* ******************************************************************************/
+/*IMPLEMENTATION NOT TESTED*/
+
+void change_frm_rate_in_error_bits(error_bits_t *ps_error_bits, WORD32 i4_tgt_frm_rate)
+{
+ /* Value by which i4_accum_frm_rate is incremented every VOP*/
+ /*accum_frm_rate is used to mark one second mark so a change in frame rate could alter this mark leading
+ * to very high accum bitrate value. To avoid this accum_frame_rate is recalculated
+ * according to new value
+ */
+ /* WORD32 i4_frame_count;*/
+
+ /* ps_error_bits->i4_accum_frm_rate=(ps_error_bits->i4_accum_frm_rate*i4_tgt_frm_rate)/ps_error_bits->i4_tgt_frm_rate);*/
+
+ if(ps_error_bits->i4_tgt_frm_rate != i4_tgt_frm_rate)
+ X_PROD_Y_DIV_Z(
+ ps_error_bits->i4_accum_frm_rate,
+ i4_tgt_frm_rate,
+ ps_error_bits->i4_tgt_frm_rate,
+ ps_error_bits->i4_accum_frm_rate);
+
+ /*round off the accum value to multiple of 1000*/
+ ps_error_bits->i4_accum_frm_rate = ps_error_bits->i4_accum_frm_rate / 1000;
+ ps_error_bits->i4_accum_frm_rate = ps_error_bits->i4_accum_frm_rate * 1000;
+
+ /* ps_error_bits->i4_tgt_frm_rate_incr = (ps_error_bits->i4_tgt_frm_rate
+ * 1000)/ i4_tgt_frm_rate;
+*/
+ ps_error_bits->i4_tgt_frm_rate = i4_tgt_frm_rate;
+}
diff --git a/encoder/fixed_point_error_bits.h b/encoder/fixed_point_error_bits.h
new file mode 100644
index 0000000..8b1ddea
--- /dev/null
+++ b/encoder/fixed_point_error_bits.h
@@ -0,0 +1,57 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file fixed_point_error_bits.h
+*
+* \brief
+* This file contains all the necessary declarations for
+* error bits processing functions
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+
+#ifndef FIXED_POINT_ERROR_BITS_H
+#define FIXED_POINT_ERROR_BITS_H
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+typedef struct error_bits_t *error_bits_handle;
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+
+WORD32 error_bits_num_fill_use_free_memtab(
+ error_bits_handle *pps_error_bits, itt_memtab_t *ps_memtab, ITT_FUNC_TYPE_E e_func_type);
+void init_error_bits(error_bits_handle ps_error_bits, WORD32 i4_max_tgt_frm_rate, WORD32 i4_bitrate);
+void update_error_bits(error_bits_handle ps_error_bits);
+WORD32 get_error_bits(error_bits_handle ps_error_bits);
+
+void change_frm_rate_in_error_bits(error_bits_handle ps_error_bits, WORD32 i4_tgt_frm_rate);
+void change_bitrate_in_error_bits(error_bits_handle ps_error_bits, WORD32 i4_bitrate);
+
+#endif
diff --git a/encoder/frame_info_collector.c b/encoder/frame_info_collector.c
new file mode 100644
index 0000000..b2e24d4
--- /dev/null
+++ b/encoder/frame_info_collector.c
@@ -0,0 +1,93 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file frame_info_collector.c
+*
+* \brief
+* This file contain frame info initialize function
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+
+/* User include files */
+#include "ittiam_datatypes.h"
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "trace_support.h"
+#include "assert.h"
+
+/*
+******************************************************************************
+* \if Function name : init_frame_info
+*
+* \brief
+* this function initializes the frame info structs
+*
+* \param[in]
+* *ps_finfo -> frame level info
+*
+* \return
+* status
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void init_frame_info(frame_info_t *ps_frame_info)
+{
+ ps_frame_info->i8_frame_num = -1;
+ ps_frame_info->e_pic_type = BUF_PIC;
+ ps_frame_info->f_8bit_q_scale = -1;
+ ps_frame_info->f_8bit_q_scale_without_offset = -1;
+ ps_frame_info->f_hbd_q_scale = -1;
+ ps_frame_info->f_hbd_q_scale_without_offset = -1;
+ ps_frame_info->i4_scene_type = -1;
+ ps_frame_info->i4_rc_hevc_qp = -1;
+ ps_frame_info->i8_cl_sad = -1;
+ ps_frame_info->i8_header_bits = -1;
+ ps_frame_info->i8_tex_bits = -1;
+ ps_frame_info->i4_poc = -1;
+ ps_frame_info->i8_L1_ipe_raw_sad = -1;
+ ps_frame_info->i8_L1_me_sad = -1;
+ ps_frame_info->i4_num_entries = 0;
+ ps_frame_info->i8_est_texture_bits = -1;
+ ps_frame_info->i4_lap_complexity_q7 = -1;
+ ps_frame_info->i4_lap_f_sim = -1;
+ ps_frame_info->i4_lap_var = -1;
+ ps_frame_info->i8_frame_acc_coarse_me_cost = -1;
+ ps_frame_info->i_to_avg_bit_ratio = -1;
+ ps_frame_info->i4_num_scd_in_lap_window = -1;
+ ps_frame_info->i4_num_frames_b4_scd = -1;
+ ps_frame_info->i1_is_complexity_based_bits_reset = -1;
+}
diff --git a/encoder/hme_coarse.c b/encoder/hme_coarse.c
new file mode 100644
index 0000000..f5cf9f8
--- /dev/null
+++ b/encoder/hme_coarse.c
@@ -0,0 +1,1680 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file hme_coarse.c
+*
+* @brief
+* Contains ME algorithm for the coarse layer.
+*
+* @author
+* Ittiam
+*
+*
+* List of Functions
+* hme_update_mv_bank_coarse()
+* hme_coarse()
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+#include <limits.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_bs_compute_ctb.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_dep_mngr_interface.h"
+#include "hme_datatype.h"
+#include "hme_interface.h"
+#include "hme_common_defs.h"
+#include "hme_defs.h"
+#include "ihevce_me_instr_set_router.h"
+#include "hme_globals.h"
+#include "hme_utils.h"
+#include "hme_coarse.h"
+#include "hme_refine.h"
+#include "hme_err_compute.h"
+#include "hme_common_utils.h"
+#include "hme_search_algo.h"
+
+/*******************************************************************************
+* MACROS
+*******************************************************************************/
+#define COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, shift) \
+ { \
+ ps_mv->i2_mv_x = ps_search_node->s_mv.i2_mvx >> (shift); \
+ ps_mv->i2_mv_y = ps_search_node->s_mv.i2_mvy >> (shift); \
+ *pi1_ref_idx = ps_search_node->i1_ref_idx; \
+ }
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+********************************************************************************
+* @fn void hme_update_mv_bank_coarse(search_results_t *ps_search_results,
+* layer_mv_t *ps_layer_mv,
+* S32 i4_blk_x,
+* S32 i4_blk_y,
+* search_node_t *ps_search_node_4x8_l,
+* search_node_t *ps_search_node_8x4_t,
+* S08 i1_ref_idx,
+* mvbank_update_prms_t *ps_prms
+*
+* @brief Updates the coarse layer MV Bank for a given ref id and blk pos
+*
+* @param[in] ps_search_results: Search results data structure
+*
+* @param[in, out] ps_layer_mv : MV Bank for this layer
+*
+* @param[in] i4_search_blk_x: column number of the 4x4 blk searched
+*
+* @param[in] i4_search_blk_y: row number of the 4x4 blk searched
+*
+* @param[in] ps_search_node_4x8_t: Best MV of the 4x8T blk
+*
+* @param[in] ps_search_node_8x4_l: Best MV of the 8x4L blk
+*
+* @param[in] i1_ref_idx : Reference ID that has been searched
+*
+* @param[in] ps_prms : Parameters pertaining to the MV Bank update
+*
+* @return None
+********************************************************************************
+*/
+void hme_update_mv_bank_coarse(
+ search_results_t *ps_search_results,
+ layer_mv_t *ps_layer_mv,
+ S32 i4_search_blk_x,
+ S32 i4_search_blk_y,
+ search_node_t *ps_search_node_4x8_t,
+ search_node_t *ps_search_node_8x4_l,
+ S08 i1_ref_idx,
+ mvbank_update_prms_t *ps_prms)
+{
+ /* These point to the MV and ref idx posn to be udpated */
+ hme_mv_t *ps_mv;
+ S08 *pi1_ref_idx;
+
+ /* Offset within the bank */
+ S32 i4_offset;
+
+ S32 i, j, i4_blk_x, i4_blk_y;
+
+ /* Best results for 8x4R and 4x8B blocks */
+ search_node_t *ps_search_node_8x4_r, *ps_search_node_4x8_b;
+
+ /* Number of MVs in a block */
+ S32 num_mvs = ps_layer_mv->i4_num_mvs_per_ref;
+
+ search_node_t *aps_search_nodes[4];
+
+ /* The search blk may be different in size from the blk used to hold MV */
+ i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
+ i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
+
+ /* Compute the offset in the MV bank */
+ i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
+ i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
+
+ /* Identify the correct offset in the mvbank and the reference id buf */
+ ps_mv = ps_layer_mv->ps_mv + (i4_offset + (num_mvs * i1_ref_idx));
+ pi1_ref_idx = ps_layer_mv->pi1_ref_idx + (i4_offset + (num_mvs * i1_ref_idx));
+
+ /*************************************************************************/
+ /* We have atleast 4 distinct results: the 4x8 top (coming from top blk) */
+ /* 8x4 left (coming from left blk), 8x4 and 4x8 right and bot resp. */
+ /* If number of results to be stored is 4, then we store all these 4 */
+ /* results, else we pick best ones */
+ /*************************************************************************/
+ ps_search_node_8x4_r = ps_search_results->aps_part_results[i1_ref_idx][PART_ID_2NxN_B];
+ ps_search_node_4x8_b = ps_search_results->aps_part_results[i1_ref_idx][PART_ID_Nx2N_R];
+
+ ASSERT(num_mvs <= 4);
+
+ /* Doing this to sort best results */
+ aps_search_nodes[0] = ps_search_node_8x4_r;
+ aps_search_nodes[1] = ps_search_node_4x8_b;
+ aps_search_nodes[2] = ps_search_node_8x4_l;
+ aps_search_nodes[3] = ps_search_node_4x8_t;
+ if(num_mvs == 4)
+ {
+ COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[0], 0);
+ ps_mv++;
+ pi1_ref_idx++;
+ COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[1], 0);
+ ps_mv++;
+ pi1_ref_idx++;
+ COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[2], 0);
+ ps_mv++;
+ pi1_ref_idx++;
+ COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[3], 0);
+ ps_mv++;
+ pi1_ref_idx++;
+ return;
+ }
+
+ /* Run through the results, store them in best to worst order */
+ for(i = 0; i < num_mvs; i++)
+ {
+ for(j = i + 1; j < 4; j++)
+ {
+ if(aps_search_nodes[j]->i4_tot_cost < aps_search_nodes[i]->i4_tot_cost)
+ {
+ SWAP_HME(aps_search_nodes[j], aps_search_nodes[i], search_node_t *);
+ }
+ }
+ COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, aps_search_nodes[i], 0);
+ ps_mv++;
+ pi1_ref_idx++;
+ }
+}
+
+/**
+********************************************************************************
+* @fn void hme_coarse_frm_init(me_ctxt_t *ps_ctxt, coarse_prms_t *ps_coarse_prms)
+*
+* @brief Frame init entry point Coarse ME.
+*
+* @param[in,out] ps_ctxt: ME Handle
+*
+* @param[in] ps_coarse_prms : Coarse layer config params
+*
+* @return None
+********************************************************************************
+*/
+void hme_coarse_frm_init(coarse_me_ctxt_t *ps_ctxt, coarse_prms_t *ps_coarse_prms)
+{
+ layer_ctxt_t *ps_curr_layer;
+
+ S32 i4_pic_wd, i4_pic_ht;
+
+ S32 num_blks_in_pic, num_blks_in_row;
+
+ BLK_SIZE_T e_search_blk_size = BLK_4x4;
+
+ S32 blk_size_shift = 2, blk_wd = 4, blk_ht = 4;
+
+ /* Number of references to search */
+ S32 i4_num_ref;
+
+ ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_coarse_prms->i4_layer_id];
+ i4_num_ref = ps_coarse_prms->i4_num_ref;
+
+ i4_pic_wd = ps_curr_layer->i4_wd;
+ i4_pic_ht = ps_curr_layer->i4_ht;
+ /* Macro updates num_blks_in_pic and num_blks_in_row*/
+ GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic);
+
+ /************************************************************************/
+ /* Initialize the mv bank that holds results of this layer. */
+ /************************************************************************/
+ hme_init_mv_bank(
+ ps_curr_layer,
+ BLK_4x4,
+ i4_num_ref,
+ ps_coarse_prms->num_results,
+ ps_ctxt->u1_encode[ps_coarse_prms->i4_layer_id]);
+
+ return;
+}
+
+/**
+********************************************************************************
+* @fn void hme_derive_worst_case_search_range(range_prms_t *ps_range,
+* range_prms_t *ps_pic_limit,
+* range_prms_t *ps_mv_limit,
+* S32 i4_x,
+* S32 i4_y,
+* S32 blk_wd,
+* S32 blk_ht)
+*
+* @brief given picture limits and blk dimensions and mv search limits, obtains
+* teh valid search range such that the blk stays within pic boundaries,
+* where picture boundaries include padded portions of picture
+*
+* @param[out] ps_range: updated with actual search range
+*
+* @param[in] ps_pic_limit : picture boundaries
+*
+* @param[in] ps_mv_limit: Search range limits for the mvs
+*
+* @param[in] i4_x : x coordinate of the blk
+*
+* @param[in] i4_y : y coordinate of the blk
+*
+* @param[in] blk_wd : blk width
+*
+* @param[in] blk_ht : blk height
+*
+* @return void
+********************************************************************************
+*/
+void hme_derive_worst_case_search_range(
+ range_prms_t *ps_range,
+ range_prms_t *ps_pic_limit,
+ range_prms_t *ps_mv_limit,
+ S32 i4_x,
+ S32 i4_y,
+ S32 blk_wd,
+ S32 blk_ht)
+{
+ /* Taking max x of left block, min x of current block */
+ ps_range->i2_max_x =
+ MIN((ps_pic_limit->i2_max_x - (S16)blk_wd - (S16)(i4_x - 4)), ps_mv_limit->i2_max_x);
+ ps_range->i2_min_x = MAX((ps_pic_limit->i2_min_x - (S16)i4_x), ps_mv_limit->i2_min_x);
+ /* Taking max y of top block, min y of current block */
+ ps_range->i2_max_y =
+ MIN((ps_pic_limit->i2_max_y - (S16)blk_ht - (S16)(i4_y - 4)), ps_mv_limit->i2_max_y);
+ ps_range->i2_min_y = MAX((ps_pic_limit->i2_min_y - (S16)i4_y), ps_mv_limit->i2_min_y);
+}
+
+/**
+********************************************************************************
+* @fn void hme_combine_4x4_sads_and_compute_cost(S08 i1_ref_idx,
+* range_prms_t *ps_mv_range,
+* range_prms_t *ps_mv_limit,
+* hme_mv_t *ps_best_mv_4x8,
+* hme_mv_t *ps_best_mv_8x4,
+* pred_ctxt_t *ps_pred_ctxt,
+* PF_MV_COST_FXN pf_mv_cost_compute,
+* ME_QUALITY_PRESETS_T e_me_quality_preset,
+* S16 *pi2_sads_4x4_current,
+* S16 *pi2_sads_4x4_east,
+* S16 *pi2_sads_4x4_south,
+* FILE *fp_dump_sad)
+*
+* @brief Does a full search on entire srch window with a given step size in coarse layer
+*
+* @param[in] i1_ref_idx : Cur ref idx
+*
+* @param[in] ps_layer_ctxt: All info about this layer
+*
+* @param[out] ps_best_mv : type hme_mv_t contains best mv x and y
+*
+* @param[in] ps_pred_ctxt : Prediction ctxt for cost computation
+*
+* @param[in] pf_mv_cost_compute : mv cost computation function
+*
+* @return void
+********************************************************************************
+*/
+void hme_combine_4x4_sads_and_compute_cost_high_quality(
+ S08 i1_ref_idx,
+ range_prms_t *ps_mv_range,
+ range_prms_t *ps_mv_limit,
+ hme_mv_t *ps_best_mv_4x8,
+ hme_mv_t *ps_best_mv_8x4,
+ pred_ctxt_t *ps_pred_ctxt,
+ PF_MV_COST_FXN pf_mv_cost_compute,
+ S16 *pi2_sads_4x4_current,
+ S16 *pi2_sads_4x4_east,
+ S16 *pi2_sads_4x4_south)
+{
+ /* These control number of parts and number of pts in grid to search */
+ S32 stepy, stepx, best_mv_y_4x8, best_mv_x_4x8, best_mv_y_8x4, best_mv_x_8x4;
+ S32 step_shift_x, step_shift_y;
+ S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range;
+
+ S32 min_cost_4x8 = MAX_32BIT_VAL;
+ S32 min_cost_8x4 = MAX_32BIT_VAL;
+
+ search_node_t s_search_node;
+ s_search_node.i1_ref_idx = i1_ref_idx;
+
+ stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_QUALITY;
+ /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_QUALITY */
+ step_shift_x = step_shift_y = 1;
+
+ mv_x_offset = (-ps_mv_limit->i2_min_x >> step_shift_x);
+ mv_y_offset = (-ps_mv_limit->i2_min_y >> step_shift_y);
+ mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x;
+ mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y;
+
+ /* Run 2loops to sweep over the reference area */
+ for(mvy = ps_mv_range->i2_min_y; mvy < ps_mv_range->i2_max_y; mvy += stepy)
+ {
+ for(mvx = ps_mv_range->i2_min_x; mvx < ps_mv_range->i2_max_x; mvx += stepx)
+ {
+ S32 sad_4x8, cost_4x8, sad_8x4, cost_8x4;
+ S32 sad_pos = ((mvx >> step_shift_x) + mv_x_offset) +
+ ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range;
+
+ /* Get SAD by adding SAD for current and neighbour S */
+ sad_4x8 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_south[sad_pos];
+ sad_8x4 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_east[sad_pos];
+
+ // fprintf(fp_dump_sad,"%d\t",sad);
+ s_search_node.s_mv.i2_mvx = mvx;
+ s_search_node.s_mv.i2_mvy = mvy;
+
+ cost_4x8 = cost_8x4 =
+ pf_mv_cost_compute(&s_search_node, ps_pred_ctxt, PART_ID_2Nx2N, MV_RES_FPEL);
+
+ cost_4x8 += sad_4x8;
+ cost_8x4 += sad_8x4;
+
+ if(cost_4x8 < min_cost_4x8)
+ {
+ best_mv_x_4x8 = mvx;
+ best_mv_y_4x8 = mvy;
+ min_cost_4x8 = cost_4x8;
+ }
+ if(cost_8x4 < min_cost_8x4)
+ {
+ best_mv_x_8x4 = mvx;
+ best_mv_y_8x4 = mvy;
+ min_cost_8x4 = cost_8x4;
+ }
+ }
+ }
+
+ ps_best_mv_4x8->i2_mv_x = best_mv_x_4x8;
+ ps_best_mv_4x8->i2_mv_y = best_mv_y_4x8;
+
+ ps_best_mv_8x4->i2_mv_x = best_mv_x_8x4;
+ ps_best_mv_8x4->i2_mv_y = best_mv_y_8x4;
+}
+
+void hme_combine_4x4_sads_and_compute_cost_high_speed(
+ S08 i1_ref_idx,
+ range_prms_t *ps_mv_range,
+ range_prms_t *ps_mv_limit,
+ hme_mv_t *ps_best_mv_4x8,
+ hme_mv_t *ps_best_mv_8x4,
+ pred_ctxt_t *ps_pred_ctxt,
+ PF_MV_COST_FXN pf_mv_cost_compute,
+ S16 *pi2_sads_4x4_current,
+ S16 *pi2_sads_4x4_east,
+ S16 *pi2_sads_4x4_south)
+{
+ /* These control number of parts and number of pts in grid to search */
+ S32 stepy, stepx, best_mv_y_4x8, best_mv_x_4x8, best_mv_y_8x4, best_mv_x_8x4;
+ S32 step_shift_x, step_shift_y;
+ S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range;
+
+ S32 rnd, lambda, lambda_q_shift;
+
+ S32 min_cost_4x8 = MAX_32BIT_VAL;
+ S32 min_cost_8x4 = MAX_32BIT_VAL;
+
+ (void)pf_mv_cost_compute;
+ stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_SPEED;
+ /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_SPEED */
+ step_shift_x = step_shift_y = 2;
+
+ mv_x_offset = (-ps_mv_limit->i2_min_x >> step_shift_x);
+ mv_y_offset = (-ps_mv_limit->i2_min_y >> step_shift_y);
+ mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x;
+ mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y;
+
+ lambda = ps_pred_ctxt->lambda;
+ lambda_q_shift = ps_pred_ctxt->lambda_q_shift;
+ rnd = 1 << (lambda_q_shift - 1);
+
+ ASSERT(MAX_MVX_SUPPORTED_IN_COARSE_LAYER >= ABS(ps_mv_range->i2_max_x));
+ ASSERT(MAX_MVY_SUPPORTED_IN_COARSE_LAYER >= ABS(ps_mv_range->i2_max_y));
+
+ /* Run 2loops to sweep over the reference area */
+ for(mvy = ps_mv_range->i2_min_y; mvy < ps_mv_range->i2_max_y; mvy += stepy)
+ {
+ for(mvx = ps_mv_range->i2_min_x; mvx < ps_mv_range->i2_max_x; mvx += stepx)
+ {
+ S32 sad_4x8, cost_4x8, sad_8x4, cost_8x4;
+
+ S32 sad_pos = ((mvx >> step_shift_x) + mv_x_offset) +
+ ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range;
+
+ /* Get SAD by adding SAD for current and neighbour S */
+ sad_4x8 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_south[sad_pos];
+ sad_8x4 = pi2_sads_4x4_current[sad_pos] + pi2_sads_4x4_east[sad_pos];
+
+ // fprintf(fp_dump_sad,"%d\t",sad);
+
+ cost_4x8 = cost_8x4 =
+ (2 * hme_get_range(ABS(mvx)) - 1) + (2 * hme_get_range(ABS(mvy)) - 1) + i1_ref_idx;
+
+ cost_4x8 += (mvx != 0) ? 1 : 0;
+ cost_4x8 += (mvy != 0) ? 1 : 0;
+ cost_4x8 = (cost_4x8 * lambda + rnd) >> lambda_q_shift;
+
+ cost_8x4 += (mvx != 0) ? 1 : 0;
+ cost_8x4 += (mvy != 0) ? 1 : 0;
+ cost_8x4 = (cost_8x4 * lambda + rnd) >> lambda_q_shift;
+
+ cost_4x8 += sad_4x8;
+ cost_8x4 += sad_8x4;
+
+ if(cost_4x8 < min_cost_4x8)
+ {
+ best_mv_x_4x8 = mvx;
+ best_mv_y_4x8 = mvy;
+ min_cost_4x8 = cost_4x8;
+ }
+ if(cost_8x4 < min_cost_8x4)
+ {
+ best_mv_x_8x4 = mvx;
+ best_mv_y_8x4 = mvy;
+ min_cost_8x4 = cost_8x4;
+ }
+ }
+ }
+
+ ps_best_mv_4x8->i2_mv_x = best_mv_x_4x8;
+ ps_best_mv_4x8->i2_mv_y = best_mv_y_4x8;
+
+ ps_best_mv_8x4->i2_mv_x = best_mv_x_8x4;
+ ps_best_mv_8x4->i2_mv_y = best_mv_y_8x4;
+}
+
+/**
+********************************************************************************
+* @fn hme_store_4x4_sads(hme_search_prms_t *ps_search_prms,
+* layer_ctxt_t *ps_layer_ctxt)
+*
+* @brief Does a 4x4 sad computation on a given range and stores it in memory
+*
+* @param[in] ps_search_prms : Search prms structure containing info like
+* blk dimensions, search range etc
+*
+* @param[in] ps_layer_ctxt: All info about this layer
+*
+* @param[in] ps_wt_inp_prms: All info about weighted input
+*
+* @param[in] e_me_quality_preset: motion estimation quality preset
+*
+* @param[in] pi2_sads_4x4: Memory to store all 4x4 SADs for given range
+*
+* @return void
+********************************************************************************
+*/
+
+void hme_store_4x4_sads_high_quality(
+ hme_search_prms_t *ps_search_prms,
+ layer_ctxt_t *ps_layer_ctxt,
+ range_prms_t *ps_mv_limit,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ S16 *pi2_sads_4x4)
+{
+ S32 sad, i, j;
+
+ /* Input and reference attributes */
+ U08 *pu1_inp, *pu1_inp_orig, *pu1_ref;
+ S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;
+
+ /* The reference is actually an array of ptrs since there are several */
+ /* reference id. So an array gets passed form calling function */
+ U08 **ppu1_ref, *pu1_ref_coloc;
+
+ S32 stepy, stepx, step_shift_x, step_shift_y;
+ S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range;
+
+ /* Points to the range limits for mv */
+ range_prms_t *ps_range_prms;
+
+ /* Reference index to be searched */
+ S32 i4_search_idx = ps_search_prms->i1_ref_idx;
+ /* Using the member 0 to store for all ref. idx. */
+ ps_range_prms = ps_search_prms->aps_mv_range[0];
+ pu1_inp_orig = ps_wt_inp_prms->apu1_wt_inp[i4_search_idx];
+ i4_inp_stride = ps_search_prms->i4_inp_stride;
+
+ /* Move to the location of the search blk in inp buffer */
+ pu1_inp_orig += ps_search_prms->i4_cu_x_off;
+ pu1_inp_orig += ps_search_prms->i4_cu_y_off * i4_inp_stride;
+
+ /*************************************************************************/
+ /* we use either input of previously encoded pictures as reference */
+ /* in coarse layer */
+ /*************************************************************************/
+ i4_ref_stride = ps_layer_ctxt->i4_inp_stride;
+ ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
+
+ /* colocated position in reference picture */
+ i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
+ pu1_ref_coloc = ppu1_ref[i4_search_idx] + i4_ref_offset;
+
+ stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_QUALITY;
+ /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_QUALITY */
+ step_shift_x = step_shift_y = 1;
+
+ mv_x_offset = -(ps_mv_limit->i2_min_x >> step_shift_x);
+ mv_y_offset = -(ps_mv_limit->i2_min_y >> step_shift_y);
+ mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x;
+ mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y;
+
+ /* Run 2loops to sweep over the reference area */
+ for(mvy = ps_range_prms->i2_min_y; mvy < ps_range_prms->i2_max_y; mvy += stepy)
+ {
+ for(mvx = ps_range_prms->i2_min_x; mvx < ps_range_prms->i2_max_x; mvx += stepx)
+ {
+ /* Set up the reference and inp ptr */
+ pu1_ref = pu1_ref_coloc + mvx + (mvy * i4_ref_stride);
+ pu1_inp = pu1_inp_orig;
+ /* SAD computation */
+ {
+ sad = 0;
+ for(i = 0; i < 4; i++)
+ {
+ for(j = 0; j < 4; j++)
+ {
+ sad += (ABS(((S32)pu1_inp[j] - (S32)pu1_ref[j])));
+ }
+ pu1_inp += i4_inp_stride;
+ pu1_ref += i4_ref_stride;
+ }
+ }
+
+ pi2_sads_4x4
+ [((mvx >> step_shift_x) + mv_x_offset) +
+ ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range] = sad;
+ }
+ }
+}
+
+void hme_store_4x4_sads_high_speed(
+ hme_search_prms_t *ps_search_prms,
+ layer_ctxt_t *ps_layer_ctxt,
+ range_prms_t *ps_mv_limit,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ S16 *pi2_sads_4x4)
+{
+ S32 sad, i, j;
+
+ /* Input and reference attributes */
+ U08 *pu1_inp, *pu1_inp_orig, *pu1_ref;
+ S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;
+
+ /* The reference is actually an array of ptrs since there are several */
+ /* reference id. So an array gets passed form calling function */
+ U08 **ppu1_ref, *pu1_ref_coloc;
+
+ S32 stepy, stepx, step_shift_x, step_shift_y;
+ S32 mvx, mvy, mv_x_offset, mv_y_offset, mv_x_range, mv_y_range;
+
+ /* Points to the range limits for mv */
+ range_prms_t *ps_range_prms;
+
+ /* Reference index to be searched */
+ S32 i4_search_idx = ps_search_prms->i1_ref_idx;
+
+ /* Using the member 0 for all ref. idx */
+ ps_range_prms = ps_search_prms->aps_mv_range[0];
+ pu1_inp_orig = ps_wt_inp_prms->apu1_wt_inp[i4_search_idx];
+ i4_inp_stride = ps_search_prms->i4_inp_stride;
+
+ /* Move to the location of the search blk in inp buffer */
+ pu1_inp_orig += ps_search_prms->i4_cu_x_off;
+ pu1_inp_orig += ps_search_prms->i4_cu_y_off * i4_inp_stride;
+
+ /*************************************************************************/
+ /* we use either input of previously encoded pictures as reference */
+ /* in coarse layer */
+ /*************************************************************************/
+ i4_ref_stride = ps_layer_ctxt->i4_inp_stride;
+ ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
+
+ /* colocated position in reference picture */
+ i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
+ pu1_ref_coloc = ppu1_ref[i4_search_idx] + i4_ref_offset;
+
+ stepx = stepy = HME_COARSE_STEP_SIZE_HIGH_SPEED;
+ /*TODO: Calculate Step shift from the #define HME_COARSE_STEP_SIZE_HIGH_SPEED */
+ step_shift_x = step_shift_y = 2;
+
+ mv_x_offset = -(ps_mv_limit->i2_min_x >> step_shift_x);
+ mv_y_offset = -(ps_mv_limit->i2_min_y >> step_shift_y);
+ mv_x_range = (-ps_mv_limit->i2_min_x + ps_mv_limit->i2_max_x) >> step_shift_x;
+ mv_y_range = (-ps_mv_limit->i2_min_y + ps_mv_limit->i2_max_y) >> step_shift_y;
+
+ /* Run 2loops to sweep over the reference area */
+ for(mvy = ps_range_prms->i2_min_y; mvy < ps_range_prms->i2_max_y; mvy += stepy)
+ {
+ for(mvx = ps_range_prms->i2_min_x; mvx < ps_range_prms->i2_max_x; mvx += stepx)
+ {
+ /* Set up the reference and inp ptr */
+ pu1_ref = pu1_ref_coloc + mvx + (mvy * i4_ref_stride);
+ pu1_inp = pu1_inp_orig;
+ /* SAD computation */
+ {
+ sad = 0;
+ for(i = 0; i < 4; i++)
+ {
+ for(j = 0; j < 4; j++)
+ {
+ sad += (ABS(((S32)pu1_inp[j] - (S32)pu1_ref[j])));
+ }
+ pu1_inp += i4_inp_stride;
+ pu1_ref += i4_ref_stride;
+ }
+ }
+
+ pi2_sads_4x4
+ [((mvx >> step_shift_x) + mv_x_offset) +
+ ((mvy >> step_shift_y) + mv_y_offset) * mv_x_range] = sad;
+ }
+ }
+}
+/**
+********************************************************************************
+* @fn void hme_coarsest(me_ctxt_t *ps_ctxt, coarse_prms_t *ps_coarse_prms)
+*
+* @brief Top level entry point for Coarse ME. Runs across blks and searches
+* at a 4x4 blk granularity by using 4x8 and 8x4 patterns.
+*
+* @param[in,out] ps_ctxt: ME Handle
+*
+* @param[in] ps_coarse_prms : Coarse layer config params
+*
+* @param[in] ps_multi_thrd_ctxt : Multi thread context
+*
+* @return None
+********************************************************************************
+*/
+void hme_coarsest(
+ coarse_me_ctxt_t *ps_ctxt,
+ coarse_prms_t *ps_coarse_prms,
+ multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
+ WORD32 i4_ping_pong,
+ void **ppv_dep_mngr_hme_sync)
+{
+ S16 *pi2_cur_ref_sads_4x4;
+ S32 ai4_sad_4x4_block_size[MAX_NUM_REF], ai4_sad_4x4_block_stride[MAX_NUM_REF];
+ S32 num_rows_coarse;
+ S32 sad_top_offset, sad_current_offset;
+ S32 search_node_top_offset, search_node_left_offset;
+
+ ME_QUALITY_PRESETS_T e_me_quality_preset =
+ ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
+
+ search_results_t *ps_search_results;
+ mvbank_update_prms_t s_mv_update_prms;
+ BLK_SIZE_T e_search_blk_size = BLK_4x4;
+ hme_search_prms_t s_search_prms_4x8, s_search_prms_8x4, s_search_prms_4x4;
+
+ S32 global_id_8x4, global_id_4x8;
+
+ /*************************************************************************/
+ /* These directly point to the best search result nodes that will be */
+ /* updated by the search algorithm, rather than have to go through an */
+ /* elaborate structure */
+ /*************************************************************************/
+ search_node_t *aps_best_search_node_8x4[MAX_NUM_REF];
+ search_node_t *aps_best_search_node_4x8[MAX_NUM_REF];
+
+ /* These point to various spatial candts */
+ search_node_t *ps_candt_8x4_l, *ps_candt_8x4_t, *ps_candt_8x4_tl;
+ search_node_t *ps_candt_4x8_l, *ps_candt_4x8_t, *ps_candt_4x8_tl;
+ search_node_t *ps_candt_zeromv_8x4, *ps_candt_zeromv_4x8;
+ search_node_t *ps_candt_fs_8x4, *ps_candt_fs_4x8;
+ search_node_t as_top_neighbours[4], as_left_neighbours[3];
+
+ /* Holds the global mv for a given ref index */
+ search_node_t s_candt_global[MAX_NUM_REF];
+
+ /* All the search candidates */
+ search_candt_t as_search_candts_8x4[MAX_INIT_CANDTS];
+ search_candt_t as_search_candts_4x8[MAX_INIT_CANDTS];
+ search_candt_t *ps_search_candts_8x4, *ps_search_candts_4x8;
+
+ /* Actual range per blk and the pic level boundaries */
+ range_prms_t s_range_prms, s_pic_limit, as_mv_limit[MAX_NUM_REF];
+
+ /* Current and prev pic layer ctxt at the coarsest layer */
+ layer_ctxt_t *ps_curr_layer, *ps_prev_layer;
+
+ /* best mv of full search */
+ hme_mv_t best_mv_4x8, best_mv_8x4;
+
+ /* Book keeping at blk level */
+ S32 blk_x, num_blks_in_pic, num_blks_in_row, num_4x4_blks_in_row;
+
+ S32 blk_y;
+
+ /* Block dimensions */
+ S32 blk_size_shift = 2, blk_wd = 4, blk_ht = 4;
+
+ S32 lambda = ps_coarse_prms->lambda;
+
+ /* Number of references to search */
+ S32 i4_num_ref;
+
+ S32 i4_i, id, i;
+ S08 i1_ref_idx;
+
+ S32 i4_pic_wd, i4_pic_ht;
+ S32 i4_layer_id;
+
+ S32 end_of_frame;
+
+ pf_get_wt_inp fp_get_wt_inp;
+
+ /* Maximum search iterations around any candidate */
+ S32 i4_max_iters = ps_coarse_prms->i4_max_iters;
+
+ ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_coarse_prms->i4_layer_id];
+ ps_prev_layer = hme_coarse_get_past_layer_ctxt(ps_ctxt, ps_coarse_prms->i4_layer_id);
+
+ /* We need only one instance of search results structure */
+ ps_search_results = &ps_ctxt->s_search_results_8x8;
+
+ ps_search_candts_8x4 = &as_search_candts_8x4[0];
+ ps_search_candts_4x8 = &as_search_candts_4x8[0];
+
+ end_of_frame = 0;
+
+ i4_pic_wd = ps_curr_layer->i4_wd;
+ i4_pic_ht = ps_curr_layer->i4_ht;
+
+ fp_get_wt_inp = ((ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list)
+ ->pf_get_wt_inp_8x8;
+
+ num_rows_coarse = ps_ctxt->i4_num_row_bufs;
+
+ /*************************************************************************/
+ /* Coarse Layer always does explicit search. Number of reference frames */
+ /* to search is a configurable parameter supplied by the application */
+ /*************************************************************************/
+ i4_num_ref = ps_coarse_prms->i4_num_ref;
+ i4_layer_id = ps_coarse_prms->i4_layer_id;
+
+ /*************************************************************************/
+ /* The search algorithm goes as follows: */
+ /* */
+ /* ___ */
+ /* | e | */
+ /* ___|___|___ */
+ /* | c | a | b | */
+ /* |___|___|___| */
+ /* | d | */
+ /* |___| */
+ /* */
+ /* For the target block a, we collect best results from 2 8x4 blks */
+ /* These are c-a and a-b. The 4x8 blks are e-a and a-d */
+ /* c-a result is already available from results of blk c. a-b is */
+ /* evaluated in this blk. Likewise e-a result is stored in a row buffer */
+ /* a-d is evaluated this blk */
+ /* So we store a row buffer which stores best 4x8 results of all top blk */
+ /*************************************************************************/
+
+ /************************************************************************/
+ /* Initialize the pointers to the best node. */
+ /************************************************************************/
+ for(i4_i = 0; i4_i < i4_num_ref; i4_i++)
+ {
+ aps_best_search_node_8x4[i4_i] = ps_search_results->aps_part_results[i4_i][PART_ID_2NxN_B];
+ aps_best_search_node_4x8[i4_i] = ps_search_results->aps_part_results[i4_i][PART_ID_Nx2N_R];
+ }
+
+ /************************************************************************/
+ /* Initialize the "searchresults" structure. This will set up the number*/
+ /* of search types, result updates etc */
+ /************************************************************************/
+ {
+ S32 num_results_per_part;
+ /* We evaluate 4 types of results per 4x4 blk. 8x4L and 8x4R and */
+ /* 4x8 T and 4x8B. So if we are to give 4 results, then we need to */
+ /* only evaluate 1 result per part. In the coarse layer, we are */
+ /* limited to 2 results max per part, and max of 8 results. */
+ num_results_per_part = (ps_coarse_prms->num_results + 3) >> 2;
+ hme_init_search_results(
+ ps_search_results,
+ i4_num_ref,
+ ps_coarse_prms->num_results,
+ num_results_per_part,
+ BLK_8x8,
+ 0,
+ 0,
+ ps_ctxt->au1_is_past);
+ }
+
+ /* Macro updates num_blks_in_pic and num_blks_in_row*/
+ GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic);
+
+ num_4x4_blks_in_row = num_blks_in_row + 1;
+
+ s_mv_update_prms.e_search_blk_size = e_search_blk_size;
+ s_mv_update_prms.i4_num_ref = i4_num_ref;
+ s_mv_update_prms.i4_shift = 0;
+
+ /* For full search, support 2 or 4 step size */
+ if(ps_coarse_prms->do_full_search)
+ {
+ ASSERT((ps_coarse_prms->full_search_step == 2) || (ps_coarse_prms->full_search_step == 4));
+ }
+
+ for(i4_i = 0; i4_i < i4_num_ref; i4_i++)
+ {
+ S32 blk, delta_poc;
+ S32 mv_x_clip, mv_y_clip;
+ /* Initialize only the first row */
+ for(blk = 0; blk < num_blks_in_row; blk++)
+ {
+ INIT_SEARCH_NODE(&ps_ctxt->aps_best_search_nodes_4x8_n_rows[i4_i][blk], i4_i);
+ }
+
+ delta_poc = ABS(ps_curr_layer->i4_poc - ps_curr_layer->ai4_ref_id_to_poc_lc[i4_i]);
+
+ /* Setting search range for different references based on the delta poc */
+ /*************************************************************************/
+ /* set the MV limit per ref. pic. */
+ /* - P pic. : Based on the config params. */
+ /* - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
+ /*************************************************************************/
+ {
+ /* TO DO : Remove hard coding of P-P dist. of 4 */
+ mv_x_clip = (ps_curr_layer->i2_max_mv_x * delta_poc) / 4;
+
+ /* Only for B/b pic. */
+ if(1 == ps_ctxt->s_frm_prms.bidir_enabled)
+ {
+ WORD16 i2_mv_y_per_poc;
+
+ /* Get abs MAX for symmetric search */
+ i2_mv_y_per_poc =
+ MAX(ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id],
+ (ABS(ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id])));
+
+ mv_y_clip = i2_mv_y_per_poc * delta_poc;
+ }
+ /* Set the Config. File Params for P pic. */
+ else
+ {
+ /* TO DO : Remove hard coding of P-P dist. of 4 */
+ mv_y_clip = (ps_curr_layer->i2_max_mv_y * delta_poc) / 4;
+ }
+
+ /* Making mv_x and mv_y range multiple of 4 */
+ mv_x_clip = (((mv_x_clip + 3) >> 2) << 2);
+ mv_y_clip = (((mv_y_clip + 3) >> 2) << 2);
+ /* Clipping the range of mv_x and mv_y */
+ mv_x_clip = CLIP3(mv_x_clip, 4, MAX_MVX_SUPPORTED_IN_COARSE_LAYER);
+ mv_y_clip = CLIP3(mv_y_clip, 4, MAX_MVY_SUPPORTED_IN_COARSE_LAYER);
+
+ as_mv_limit[i4_i].i2_min_x = -mv_x_clip;
+ as_mv_limit[i4_i].i2_min_y = -mv_y_clip;
+ as_mv_limit[i4_i].i2_max_x = mv_x_clip;
+ as_mv_limit[i4_i].i2_max_y = mv_y_clip;
+ }
+ /*Populating SAD block size based on search range */
+ ai4_sad_4x4_block_size[i4_i] = ((2 * mv_x_clip) / ps_coarse_prms->full_search_step) *
+ ((2 * mv_y_clip) / ps_coarse_prms->full_search_step);
+ ai4_sad_4x4_block_stride[i4_i] = (num_blks_in_row + 1) * ai4_sad_4x4_block_size[i4_i];
+ }
+
+ for(i = 0; i < 2 * MAX_INIT_CANDTS; i++)
+ {
+ search_node_t *ps_search_node;
+ ps_search_node = &ps_ctxt->s_init_search_node[i];
+ INIT_SEARCH_NODE(ps_search_node, 0);
+ }
+ for(i = 0; i < 3; i++)
+ {
+ search_node_t *ps_search_node;
+ ps_search_node = &as_left_neighbours[i];
+ INIT_SEARCH_NODE(ps_search_node, 0);
+ ps_search_node = &as_top_neighbours[i];
+ INIT_SEARCH_NODE(ps_search_node, 0);
+ }
+ INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
+ /* Set up place holders to hold the search nodes of each initial candt */
+ for(i = 0; i < MAX_INIT_CANDTS; i++)
+ {
+ ps_search_candts_8x4[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
+
+ ps_search_candts_4x8[i].ps_search_node = &ps_ctxt->s_init_search_node[MAX_INIT_CANDTS + i];
+
+ ps_search_candts_8x4[i].u1_num_steps_refine = (U08)i4_max_iters;
+ ps_search_candts_4x8[i].u1_num_steps_refine = (U08)i4_max_iters;
+ }
+
+ /* For Top,TopLeft and Left cand., no need for refinement */
+ id = 0;
+ if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 == e_me_quality_preset))
+ {
+ /* This search candt has the full search result */
+ ps_candt_fs_8x4 = ps_search_candts_8x4[id].ps_search_node;
+ id++;
+ }
+
+ ps_candt_8x4_l = ps_search_candts_8x4[id].ps_search_node;
+ ps_search_candts_8x4[id].u1_num_steps_refine = 0;
+ id++;
+ ps_candt_8x4_t = ps_search_candts_8x4[id].ps_search_node;
+ ps_search_candts_8x4[id].u1_num_steps_refine = 0;
+ id++;
+ ps_candt_8x4_tl = ps_search_candts_8x4[id].ps_search_node;
+ ps_search_candts_8x4[id].u1_num_steps_refine = 0;
+ id++;
+ /* This search candt stores the global candt */
+ global_id_8x4 = id;
+ id++;
+
+ if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 != e_me_quality_preset))
+ {
+ /* This search candt has the full search result */
+ ps_candt_fs_8x4 = ps_search_candts_8x4[id].ps_search_node;
+ id++;
+ }
+ /* Don't increment id as (0,0) is removed from cand. list. Initializing */
+ /* the pointer for hme_init_pred_ctxt_no_encode() */
+ ps_candt_zeromv_8x4 = ps_search_candts_8x4[id].ps_search_node;
+
+ /* For Top,TopLeft and Left cand., no need for refinement */
+ id = 0;
+ if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 == e_me_quality_preset))
+ {
+ /* This search candt has the full search result */
+ ps_candt_fs_4x8 = ps_search_candts_4x8[id].ps_search_node;
+ id++;
+ }
+
+ ps_candt_4x8_l = ps_search_candts_4x8[id].ps_search_node;
+ ps_search_candts_4x8[id].u1_num_steps_refine = 0;
+ id++;
+ ps_candt_4x8_t = ps_search_candts_4x8[id].ps_search_node;
+ ps_search_candts_4x8[id].u1_num_steps_refine = 0;
+ id++;
+ ps_candt_4x8_tl = ps_search_candts_4x8[id].ps_search_node;
+ ps_search_candts_4x8[id].u1_num_steps_refine = 0;
+ id++;
+ /* This search candt stores the global candt */
+ global_id_4x8 = id;
+ id++;
+ if((ps_coarse_prms->do_full_search) && (ME_XTREME_SPEED_25 != e_me_quality_preset))
+ {
+ /* This search candt has the full search result */
+ ps_candt_fs_4x8 = ps_search_candts_4x8[id].ps_search_node;
+ id++;
+ }
+ /* Don't increment id4as (0,0) is removed from cand. list. Initializing */
+ /* the pointer for hme_init_pred_ctxt_no_encode() */
+ ps_candt_zeromv_4x8 = ps_search_candts_4x8[id].ps_search_node;
+
+ /* Zero mv always has 0 mvx and y componnent, ref idx initialized inside */
+ ps_candt_zeromv_8x4->s_mv.i2_mvx = 0;
+ ps_candt_zeromv_8x4->s_mv.i2_mvy = 0;
+ ps_candt_zeromv_4x8->s_mv.i2_mvx = 0;
+ ps_candt_zeromv_4x8->s_mv.i2_mvy = 0;
+
+ /* SET UP THE PRED CTXT FOR L0 AND L1 */
+ {
+ S32 pred_lx;
+
+ /* Bottom left always not available */
+ as_left_neighbours[2].u1_is_avail = 0;
+
+ for(pred_lx = 0; pred_lx < 2; pred_lx++)
+ {
+ pred_ctxt_t *ps_pred_ctxt;
+
+ ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
+ hme_init_pred_ctxt_no_encode(
+ ps_pred_ctxt,
+ ps_search_results,
+ as_top_neighbours,
+ as_left_neighbours,
+ NULL,
+ ps_candt_zeromv_8x4,
+ ps_candt_zeromv_8x4,
+ pred_lx,
+ lambda,
+ ps_coarse_prms->lambda_q_shift,
+ ps_ctxt->apu1_ref_bits_tlu_lc,
+ ps_ctxt->ai2_ref_scf);
+ }
+ }
+
+ /*************************************************************************/
+ /* Initialize the search parameters for search algo with the following */
+ /* parameters: No SATD, calculated number of initial candidates, */
+ /* No post refinement, initial step size and number of iterations as */
+ /* passed by the calling function. */
+ /* Also, we use input for this layer search, and not recon. */
+ /*************************************************************************/
+ if(e_me_quality_preset == ME_XTREME_SPEED_25)
+ s_search_prms_8x4.i4_num_init_candts = 1;
+ else
+ s_search_prms_8x4.i4_num_init_candts = id;
+ s_search_prms_8x4.i4_use_satd = 0;
+ s_search_prms_8x4.i4_start_step = ps_coarse_prms->i4_start_step;
+ s_search_prms_8x4.i4_num_steps_post_refine = 0;
+ s_search_prms_8x4.i4_use_rec = 0;
+ s_search_prms_8x4.ps_search_candts = ps_search_candts_8x4;
+ s_search_prms_8x4.e_blk_size = BLK_8x4;
+ s_search_prms_8x4.i4_max_iters = ps_coarse_prms->i4_max_iters;
+ /* Coarse layer is always explicit */
+ if(ME_MEDIUM_SPEED > e_me_quality_preset)
+ {
+ s_search_prms_8x4.pf_mv_cost_compute = compute_mv_cost_coarse;
+ }
+ else
+ {
+ s_search_prms_8x4.pf_mv_cost_compute = compute_mv_cost_coarse_high_speed;
+ }
+
+ s_search_prms_8x4.i4_inp_stride = 8;
+ s_search_prms_8x4.i4_cu_x_off = s_search_prms_8x4.i4_cu_y_off = 0;
+ if(ps_coarse_prms->do_full_search)
+ s_search_prms_8x4.i4_max_iters = 1;
+ s_search_prms_8x4.i4_part_mask = (1 << PART_ID_2NxN_B);
+ /* Using the member 0 to store for all ref. idx. */
+ s_search_prms_8x4.aps_mv_range[0] = &s_range_prms;
+ s_search_prms_8x4.ps_search_results = ps_search_results;
+ s_search_prms_8x4.full_search_step = ps_coarse_prms->full_search_step;
+
+ s_search_prms_4x8 = s_search_prms_8x4;
+ s_search_prms_4x8.ps_search_candts = ps_search_candts_4x8;
+ s_search_prms_4x8.e_blk_size = BLK_4x8;
+ s_search_prms_4x8.i4_part_mask = (1 << PART_ID_Nx2N_R);
+
+ s_search_prms_4x4 = s_search_prms_8x4;
+ /* Since s_search_prms_4x4 is used only to computer sad at 4x4 level, search candidate is not used */
+ s_search_prms_4x4.ps_search_candts = ps_search_candts_4x8;
+ s_search_prms_4x4.e_blk_size = BLK_4x4;
+ s_search_prms_4x4.i4_part_mask = (1 << PART_ID_2Nx2N);
+ /*************************************************************************/
+ /* Picture limit on all 4 sides. This will be used to set mv limits for */
+ /* every block given its coordinate. */
+ /*************************************************************************/
+ SET_PIC_LIMIT(
+ s_pic_limit,
+ ps_curr_layer->i4_pad_x_inp,
+ ps_curr_layer->i4_pad_y_inp,
+ ps_curr_layer->i4_wd,
+ ps_curr_layer->i4_ht,
+ s_search_prms_4x4.i4_num_steps_post_refine);
+
+ /* Pick the global mv from previous reference */
+ for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref; i1_ref_idx++)
+ {
+ if(ME_XTREME_SPEED_25 != e_me_quality_preset)
+ {
+ /* Distance of current pic from reference */
+ S32 i4_delta_poc;
+
+ hme_mv_t s_mv;
+ i4_delta_poc = ps_curr_layer->i4_poc - ps_curr_layer->ai4_ref_id_to_poc_lc[i1_ref_idx];
+
+ hme_get_global_mv(ps_prev_layer, &s_mv, i4_delta_poc);
+
+ s_candt_global[i1_ref_idx].s_mv.i2_mvx = s_mv.i2_mv_x;
+ s_candt_global[i1_ref_idx].s_mv.i2_mvy = s_mv.i2_mv_y;
+ s_candt_global[i1_ref_idx].i1_ref_idx = i1_ref_idx;
+
+ /*********************************************************************/
+ /* Initialize the histogram for each reference index in current */
+ /* layer ctxt */
+ /*********************************************************************/
+ hme_init_histogram(
+ ps_ctxt->aps_mv_hist[i1_ref_idx],
+ (S32)as_mv_limit[i1_ref_idx].i2_max_x,
+ (S32)as_mv_limit[i1_ref_idx].i2_max_y);
+ }
+
+ /*********************************************************************/
+ /* Initialize the dyn. search range params. for each reference index */
+ /* in current layer ctxt */
+ /*********************************************************************/
+ /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
+ if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
+ {
+ INIT_DYN_SEARCH_PRMS(
+ &ps_ctxt->s_coarse_dyn_range_prms.as_dyn_range_prms[i4_layer_id][i1_ref_idx],
+ ps_curr_layer->ai4_ref_id_to_poc_lc[i1_ref_idx]);
+ }
+ }
+
+ /*************************************************************************/
+ /* if exhaustive algorithmm then we use only 1 candt 0, 0 */
+ /* else we use a lot of causal and non causal candts */
+ /* finally set number to the configured number of candts */
+ /*************************************************************************/
+
+ /* Loop in raster order over each 4x4 blk in a given row till end of frame */
+ while(0 == end_of_frame)
+ {
+ job_queue_t *ps_job;
+ void *pv_hme_dep_mngr;
+ WORD32 offset_val, check_dep_pos, set_dep_pos;
+
+ /* Get the current layer HME Dep Mngr */
+ /* Note : Use layer_id - 1 in HME layers */
+ pv_hme_dep_mngr = ppv_dep_mngr_hme_sync[ps_coarse_prms->i4_layer_id - 1];
+
+ /* Get the current row from the job queue */
+ ps_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job(
+ ps_multi_thrd_ctxt, ps_multi_thrd_ctxt->i4_me_coarsest_lyr_type, 1, i4_ping_pong);
+
+ /* If all rows are done, set the end of process flag to 1, */
+ /* and the current row to -1 */
+ if(NULL == ps_job)
+ {
+ blk_y = -1;
+ end_of_frame = 1;
+ }
+ else
+ {
+ ASSERT(ps_multi_thrd_ctxt->i4_me_coarsest_lyr_type == ps_job->i4_pre_enc_task_type);
+
+ /* Obtain the current row's details from the job */
+ blk_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
+
+ if(1 == ps_ctxt->s_frm_prms.is_i_pic)
+ {
+ /* set the output dependency of current row */
+ ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
+ continue;
+ }
+
+ /* Set Variables for Dep. Checking and Setting */
+ set_dep_pos = blk_y + 1;
+ if(blk_y > 0)
+ {
+ offset_val = 2;
+ check_dep_pos = blk_y - 1;
+ }
+ else
+ {
+ /* First row should run without waiting */
+ offset_val = -1;
+ check_dep_pos = 0;
+ }
+
+ /* Loop over all the blocks in current row */
+ /* One block extra, since the last block in a row needs East block */
+ for(blk_x = 0; blk_x < (num_blks_in_row + 1); blk_x++)
+ {
+ /* Wait till top row block is processed */
+ /* Currently checking till top right block*/
+ if(blk_x < (num_blks_in_row))
+ {
+ ihevce_dmgr_chk_row_row_sync(
+ pv_hme_dep_mngr,
+ blk_x,
+ offset_val,
+ check_dep_pos,
+ 0, /* Col Tile No. : Not supported in PreEnc*/
+ ps_ctxt->thrd_id);
+ }
+
+ /***************************************************************/
+ /* Get Weighted input for all references */
+ /***************************************************************/
+ fp_get_wt_inp(
+ ps_curr_layer,
+ &ps_ctxt->s_wt_pred,
+ 1 << (blk_size_shift + 1),
+ blk_x << blk_size_shift,
+ (blk_y - 1) << blk_size_shift,
+ 1 << (blk_size_shift + 1),
+ i4_num_ref,
+ ps_ctxt->i4_wt_pred_enable_flag);
+
+ /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
+ hme_reset_search_results(
+ ps_search_results,
+ s_search_prms_8x4.i4_part_mask | s_search_prms_4x8.i4_part_mask,
+ MV_RES_FPEL);
+
+ /* Compute the search node offsets */
+ /* MAX is used to clip when left and top neighbours are not availbale at coarse boundaries */
+ search_node_top_offset =
+ blk_x + ps_ctxt->ai4_row_index[MAX((blk_y - 2), 0)] * num_blks_in_row;
+ search_node_left_offset =
+ MAX((blk_x - 1), 0) +
+ ps_ctxt->ai4_row_index[MAX((blk_y - 1), 0)] * num_blks_in_row;
+
+ /* Input offset: wrt CU start. Offset for South block */
+ s_search_prms_4x4.i4_cu_x_off = 0;
+ s_search_prms_4x4.i4_cu_y_off = 4;
+ s_search_prms_4x4.i4_inp_stride = 8;
+ s_search_prms_4x4.i4_x_off = blk_x << blk_size_shift;
+ s_search_prms_4x4.i4_y_off = blk_y << blk_size_shift;
+
+ s_search_prms_4x8.i4_x_off = s_search_prms_8x4.i4_x_off = blk_x << blk_size_shift;
+ s_search_prms_4x8.i4_y_off = s_search_prms_8x4.i4_y_off = (blk_y - 1)
+ << blk_size_shift;
+
+ /* This layer will always use explicit ME */
+ /* Loop across different Ref IDx */
+ for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref; i1_ref_idx++)
+ {
+ sad_top_offset = (blk_x * ai4_sad_4x4_block_size[i1_ref_idx]) +
+ ps_ctxt->ai4_row_index[MAX((blk_y - 1), 0)] *
+ ai4_sad_4x4_block_stride[i1_ref_idx];
+ sad_current_offset =
+ (blk_x * ai4_sad_4x4_block_size[i1_ref_idx]) +
+ ps_ctxt->ai4_row_index[blk_y] * ai4_sad_4x4_block_stride[i1_ref_idx];
+
+ /* Initialize search node if blk_x == 0, as it doesn't have left neighbours */
+ if(0 == blk_x)
+ INIT_SEARCH_NODE(
+ &ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx][blk_x],
+ i1_ref_idx);
+
+ pi2_cur_ref_sads_4x4 = ps_ctxt->api2_sads_4x4_n_rows[i1_ref_idx];
+
+ /* Initialize changing params here */
+ s_search_prms_8x4.i1_ref_idx = i1_ref_idx;
+ s_search_prms_4x8.i1_ref_idx = i1_ref_idx;
+ s_search_prms_4x4.i1_ref_idx = i1_ref_idx;
+
+ if(num_blks_in_row == blk_x)
+ {
+ S16 *pi2_sads_4x4_current;
+ /* Since the current 4x4 block will be a padded region, which may not match with any of the reference */
+ pi2_sads_4x4_current = pi2_cur_ref_sads_4x4 + sad_current_offset;
+
+ memset(pi2_sads_4x4_current, 0, ai4_sad_4x4_block_size[i1_ref_idx]);
+ }
+
+ /* SAD to be computed and stored for the 4x4 block in 1st row and the last block of all rows*/
+ if((0 == blk_y) || (num_blks_in_row == blk_x))
+ {
+ S16 *pi2_sads_4x4_current;
+ /* Computer 4x4 SADs for current block */
+ /* Pointer to store SADs */
+ pi2_sads_4x4_current = pi2_cur_ref_sads_4x4 + sad_current_offset;
+
+ hme_derive_worst_case_search_range(
+ &s_range_prms,
+ &s_pic_limit,
+ &as_mv_limit[i1_ref_idx],
+ blk_x << blk_size_shift,
+ blk_y << blk_size_shift,
+ blk_wd,
+ blk_ht);
+
+ if(ME_PRISTINE_QUALITY >= e_me_quality_preset)
+ {
+ ((ihevce_me_optimised_function_list_t *)
+ ps_ctxt->pv_me_optimised_function_list)
+ ->pf_store_4x4_sads_high_quality(
+ &s_search_prms_4x4,
+ ps_curr_layer,
+ &as_mv_limit[i1_ref_idx],
+ &ps_ctxt->s_wt_pred,
+ pi2_sads_4x4_current);
+ }
+ else
+ {
+ ((ihevce_me_optimised_function_list_t *)
+ ps_ctxt->pv_me_optimised_function_list)
+ ->pf_store_4x4_sads_high_speed(
+ &s_search_prms_4x4,
+ ps_curr_layer,
+ &as_mv_limit[i1_ref_idx],
+ &ps_ctxt->s_wt_pred,
+ pi2_sads_4x4_current);
+ }
+ }
+ else
+ {
+ /* For the zero mv candt, the ref idx to be modified */
+ ps_candt_zeromv_8x4->i1_ref_idx = i1_ref_idx;
+ ps_candt_zeromv_4x8->i1_ref_idx = i1_ref_idx;
+
+ if(ME_XTREME_SPEED_25 != e_me_quality_preset)
+ {
+ /* For the global mvs alone, the search node points to a local variable */
+ ps_search_candts_8x4[global_id_8x4].ps_search_node =
+ &s_candt_global[i1_ref_idx];
+ ps_search_candts_4x8[global_id_4x8].ps_search_node =
+ &s_candt_global[i1_ref_idx];
+ }
+
+ hme_get_spatial_candt(
+ ps_curr_layer,
+ BLK_4x4,
+ blk_x,
+ blk_y - 1,
+ i1_ref_idx,
+ as_top_neighbours,
+ as_left_neighbours,
+ 0,
+ 1,
+ 0,
+ 0);
+ /* set up the various candts */
+ *ps_candt_4x8_l = as_left_neighbours[0];
+ *ps_candt_4x8_t = as_top_neighbours[1];
+ *ps_candt_4x8_tl = as_top_neighbours[0];
+ *ps_candt_8x4_l = *ps_candt_4x8_l;
+ *ps_candt_8x4_tl = *ps_candt_4x8_tl;
+ *ps_candt_8x4_t = *ps_candt_4x8_t;
+
+ {
+ S32 pred_lx;
+ S16 *pi2_sads_4x4_current, *pi2_sads_4x4_top;
+ pred_ctxt_t *ps_pred_ctxt;
+ PF_MV_COST_FXN pf_mv_cost_compute;
+
+ /* Computer 4x4 SADs for current block */
+ /* Pointer to store SADs */
+ pi2_sads_4x4_current = pi2_cur_ref_sads_4x4 + sad_current_offset;
+
+ hme_derive_worst_case_search_range(
+ &s_range_prms,
+ &s_pic_limit,
+ &as_mv_limit[i1_ref_idx],
+ blk_x << blk_size_shift,
+ blk_y << blk_size_shift,
+ blk_wd,
+ blk_ht);
+ if(i4_pic_ht == blk_y)
+ {
+ memset(pi2_sads_4x4_current, 0, ai4_sad_4x4_block_size[i1_ref_idx]);
+ }
+ else
+ {
+ if(ME_PRISTINE_QUALITY >= e_me_quality_preset)
+ {
+ ((ihevce_me_optimised_function_list_t *)
+ ps_ctxt->pv_me_optimised_function_list)
+ ->pf_store_4x4_sads_high_quality(
+ &s_search_prms_4x4,
+ ps_curr_layer,
+ &as_mv_limit[i1_ref_idx],
+ &ps_ctxt->s_wt_pred,
+ pi2_sads_4x4_current);
+ }
+ else
+ {
+ ((ihevce_me_optimised_function_list_t *)
+ ps_ctxt->pv_me_optimised_function_list)
+ ->pf_store_4x4_sads_high_speed(
+ &s_search_prms_4x4,
+ ps_curr_layer,
+ &as_mv_limit[i1_ref_idx],
+ &ps_ctxt->s_wt_pred,
+ pi2_sads_4x4_current);
+ }
+ }
+ /* Set pred direction to L0 or L1 */
+ pred_lx = 1 - ps_search_results->pu1_is_past[i1_ref_idx];
+
+ /* Suitable context (L0 or L1) */
+ ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
+
+ /* Coarse layer is always explicit */
+ if(ME_PRISTINE_QUALITY > e_me_quality_preset)
+ {
+ pf_mv_cost_compute = compute_mv_cost_coarse;
+ }
+ else
+ {
+ /* Cost function is not called in high speed case. Below one is just a dummy function */
+ pf_mv_cost_compute = compute_mv_cost_coarse_high_speed;
+ }
+
+ /*********************************************************************/
+ /* Now, compute the mv for the top block */
+ /*********************************************************************/
+ pi2_sads_4x4_top = pi2_cur_ref_sads_4x4 + sad_top_offset;
+
+ /*********************************************************************/
+ /* For every blk in the picture, the search range needs to be derived*/
+ /* Any blk can have any mv, but practical search constraints are */
+ /* imposed by the picture boundary and amt of padding. */
+ /*********************************************************************/
+ hme_derive_search_range(
+ &s_range_prms,
+ &s_pic_limit,
+ &as_mv_limit[i1_ref_idx],
+ blk_x << blk_size_shift,
+ (blk_y - 1) << blk_size_shift,
+ blk_wd,
+ blk_ht);
+
+ /* Computer the mv for the top block */
+ if(ME_PRISTINE_QUALITY >= e_me_quality_preset)
+ {
+ ((ihevce_me_optimised_function_list_t *)
+ ps_ctxt->pv_me_optimised_function_list)
+ ->pf_combine_4x4_sads_and_compute_cost_high_quality(
+ i1_ref_idx,
+ &s_range_prms, /* Both 4x8 and 8x4 has same search range */
+ &as_mv_limit[i1_ref_idx],
+ &best_mv_4x8,
+ &best_mv_8x4,
+ ps_pred_ctxt,
+ pf_mv_cost_compute,
+ pi2_sads_4x4_top, /* Current SAD block */
+ (pi2_sads_4x4_top +
+ ai4_sad_4x4_block_size[i1_ref_idx]), /* East SAD block */
+ pi2_sads_4x4_current); /* South SAD block */
+ }
+ else
+ {
+ ((ihevce_me_optimised_function_list_t *)
+ ps_ctxt->pv_me_optimised_function_list)
+ ->pf_combine_4x4_sads_and_compute_cost_high_speed(
+ i1_ref_idx,
+ &s_range_prms, /* Both 4x8 and 8x4 has same search range */
+ &as_mv_limit[i1_ref_idx],
+ &best_mv_4x8,
+ &best_mv_8x4,
+ ps_pred_ctxt,
+ pf_mv_cost_compute,
+ pi2_sads_4x4_top, /* Current SAD block */
+ (pi2_sads_4x4_top +
+ ai4_sad_4x4_block_size[i1_ref_idx]), /* East SAD block */
+ pi2_sads_4x4_current); /* South SAD block */
+ }
+
+ ps_candt_fs_4x8->s_mv.i2_mvx = best_mv_4x8.i2_mv_x;
+ ps_candt_fs_4x8->s_mv.i2_mvy = best_mv_4x8.i2_mv_y;
+ ps_candt_fs_4x8->i1_ref_idx = i1_ref_idx;
+
+ ps_candt_fs_8x4->s_mv.i2_mvx = best_mv_8x4.i2_mv_x;
+ ps_candt_fs_8x4->s_mv.i2_mvy = best_mv_8x4.i2_mv_y;
+ ps_candt_fs_8x4->i1_ref_idx = i1_ref_idx;
+ }
+
+ /* call the appropriate Search Algo for 4x8S. The 4x8N would */
+ /* have already been called by top block */
+ hme_pred_search_square_stepn(
+ &s_search_prms_8x4,
+ ps_curr_layer,
+ &ps_ctxt->s_wt_pred,
+ e_me_quality_preset,
+ (ihevce_me_optimised_function_list_t *)
+ ps_ctxt->pv_me_optimised_function_list
+
+ );
+
+ /* Call the appropriate search algo for 8x4E */
+ hme_pred_search_square_stepn(
+ &s_search_prms_4x8,
+ ps_curr_layer,
+ &ps_ctxt->s_wt_pred,
+ e_me_quality_preset,
+ (ihevce_me_optimised_function_list_t *)
+ ps_ctxt->pv_me_optimised_function_list);
+
+ if(ME_XTREME_SPEED_25 != e_me_quality_preset)
+ {
+ /* Histogram updates across different Ref ID for global MV */
+ hme_update_histogram(
+ ps_ctxt->aps_mv_hist[i1_ref_idx],
+ aps_best_search_node_8x4[i1_ref_idx]->s_mv.i2_mvx,
+ aps_best_search_node_8x4[i1_ref_idx]->s_mv.i2_mvy);
+ hme_update_histogram(
+ ps_ctxt->aps_mv_hist[i1_ref_idx],
+ aps_best_search_node_4x8[i1_ref_idx]->s_mv.i2_mvx,
+ aps_best_search_node_4x8[i1_ref_idx]->s_mv.i2_mvy);
+ }
+
+ /* update the best results to the mv bank */
+ hme_update_mv_bank_coarse(
+ ps_search_results,
+ ps_curr_layer->ps_layer_mvbank,
+ blk_x,
+ (blk_y - 1),
+ ps_ctxt->aps_best_search_nodes_4x8_n_rows[i1_ref_idx] +
+ search_node_top_offset, /* Top Candidate */
+ ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx] +
+ search_node_left_offset, /* Left candidate */
+ i1_ref_idx,
+ &s_mv_update_prms);
+
+ /* Copy the best search result to 5 row array for future use */
+ *(ps_ctxt->aps_best_search_nodes_4x8_n_rows[i1_ref_idx] + blk_x +
+ ps_ctxt->ai4_row_index[blk_y - 1] * num_blks_in_row) =
+ *(aps_best_search_node_4x8[i1_ref_idx]);
+
+ *(ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx] + blk_x +
+ ps_ctxt->ai4_row_index[blk_y - 1] * num_blks_in_row) =
+ *(aps_best_search_node_8x4[i1_ref_idx]);
+
+ /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
+ /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
+ if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
+ {
+ WORD32 num_mvs, i, j;
+ search_node_t *aps_search_nodes[4];
+ /* Best results for 8x4R and 4x8B blocks */
+ search_node_t *ps_search_node_8x4_r, *ps_search_node_4x8_b;
+
+ num_mvs = ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
+
+ /*************************************************************************/
+ /* We have atleast 4 distinct results: the 4x8 top (coming from top blk) */
+ /* 8x4 left (coming from left blk), 8x4 and 4x8 right and bot resp. */
+ /* If number of results to be stored is 4, then we store all these 4 */
+ /* results, else we pick best ones */
+ /*************************************************************************/
+ ps_search_node_8x4_r =
+ ps_search_results->aps_part_results[i1_ref_idx][PART_ID_2NxN_B];
+ ps_search_node_4x8_b =
+ ps_search_results->aps_part_results[i1_ref_idx][PART_ID_Nx2N_R];
+
+ ASSERT(num_mvs <= 4);
+
+ /* Doing this to sort best results */
+ aps_search_nodes[0] = ps_search_node_8x4_r;
+ aps_search_nodes[1] = ps_search_node_4x8_b;
+ aps_search_nodes[2] =
+ ps_ctxt->aps_best_search_nodes_8x4_n_rows[i1_ref_idx] +
+ search_node_left_offset; /* Left candidate */
+ aps_search_nodes[3] =
+ ps_ctxt->aps_best_search_nodes_4x8_n_rows[i1_ref_idx] +
+ search_node_top_offset; /* Top Candidate */
+
+ /* Note : Need to be resolved!!! */
+ /* Added this to match with "hme_update_mv_bank_coarse" */
+ if(num_mvs != 4)
+ {
+ /* Run through the results, store them in best to worst order */
+ for(i = 0; i < num_mvs; i++)
+ {
+ for(j = i + 1; j < 4; j++)
+ {
+ if(aps_search_nodes[j]->i4_tot_cost <
+ aps_search_nodes[i]->i4_tot_cost)
+ {
+ SWAP_HME(
+ aps_search_nodes[j],
+ aps_search_nodes[i],
+ search_node_t *);
+ }
+ }
+ }
+ }
+
+ /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
+ for(i = 0; i < num_mvs; i++)
+ {
+ hme_update_dynamic_search_params(
+ &ps_ctxt->s_coarse_dyn_range_prms
+ .as_dyn_range_prms[i4_layer_id][i1_ref_idx],
+ aps_search_nodes[i]->s_mv.i2_mvy);
+ }
+ }
+ }
+ }
+
+ /* Update the number of blocks processed in the current row */
+ ihevce_dmgr_set_row_row_sync(
+ pv_hme_dep_mngr,
+ (blk_x + 1),
+ blk_y,
+ 0 /* Col Tile No. : Not supported in PreEnc*/);
+ }
+
+ /* set the output dependency after completion of row */
+ ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
+ }
+ }
+
+ return;
+}
diff --git a/encoder/hme_coarse.h b/encoder/hme_coarse.h
new file mode 100644
index 0000000..6c617e4
--- /dev/null
+++ b/encoder/hme_coarse.h
@@ -0,0 +1,80 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file hme_coarse.h
+*
+* \brief
+* Prototypes for coarse layer functions
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _HME_COARSE_H_
+#define _HME_COARSE_H_
+
+/*****************************************************************************/
+/* Functions */
+/*****************************************************************************/
+
+/**
+********************************************************************************
+* @fn void hme_coarse_frm_init(me_ctxt_t *ps_ctxt, coarse_prms_t *ps_coarse_prms)
+*
+* @brief Frame init entry point Coarse ME.
+*
+* @param[in,out] ps_ctxt: ME Handle
+*
+* @param[in] ps_coarse_prms : Coarse layer config params
+*
+* @return None
+********************************************************************************
+*/
+void hme_coarse_frm_init(coarse_me_ctxt_t *ps_ctxt, coarse_prms_t *ps_coarse_prms);
+
+/**
+********************************************************************************
+* @fn void hme_coarse(me_ctxt_t *ps_ctxt, coarse_prms_t *ps_coarse_prms)
+*
+* @brief Top level entry point for Coarse ME. Runs across blks and searches
+* at a 4x4 blk granularity by using 4x8 and 8x4 patterns.
+*
+* @param[in,out] ps_ctxt: ME Handle
+*
+* @param[in] ps_coarse_prms : Coarse layer config params
+*
+* @return None
+********************************************************************************
+*/
+
+void hme_coarsest(
+ coarse_me_ctxt_t *ps_ctxt,
+ coarse_prms_t *ps_coarse_prms,
+ multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
+ WORD32 i4_ping_pong,
+ void **ppv_dep_mngr_hme_sync);
+
+#endif /* #ifndef _HME_COARSE_H_ */
diff --git a/encoder/hme_common_defs.h b/encoder/hme_common_defs.h
new file mode 100644
index 0000000..eb44d1e
--- /dev/null
+++ b/encoder/hme_common_defs.h
@@ -0,0 +1,114 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file hme_common_defs.h
+*
+* \brief
+* Important definitions, enumerations, macros and structures used by ME
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _HME_COMMON_DEFS_H_
+#define _HME_COMMON_DEFS_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define MAX_32BIT_VAL (0x7FFFFFFF)
+#define MAX_SIGNED_16BIT_VAL (0x07FFF)
+#define INTERP_INTERMED_BUF_SIZE (72 * 72 * 2)
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+#define HME_CLIP(x, min, max) (((x) < (min)) ? (min) : (((x) > (max)) ? (max) : (x)))
+
+#define ARG_NOT_USED(x) ((void)(x))
+/**
+******************************************************************************
+ * @brief Average of 2 numbers of any datatype
+******************************************************************************
+*/
+#define AVG2(x, y) (((x) + (y) + 1) >> 1)
+
+#define FLOOR16(x) ((x) & (~15))
+#define FLOOR8(x) ((x) & (~7))
+
+#define SET_PIC_LIMIT(s_pic_limit, pad_x, pad_y, wd, ht, num_post_refine) \
+ { \
+ s_pic_limit.i2_min_x = (S16)(-(pad_x) + (num_post_refine)); \
+ s_pic_limit.i2_min_y = (S16)(-(pad_y) + (num_post_refine)); \
+ s_pic_limit.i2_max_x = (S16)((wd) + (pad_x) - (num_post_refine)); \
+ s_pic_limit.i2_max_y = (S16)((ht) + (pad_y) - (num_post_refine)); \
+ }
+
+#define SCALE_FOR_POC_DELTA(x, y, node, ref_tgt, pi2_ref_scf) \
+ { \
+ x = node->s_mv.i2_mvx; \
+ y = node->s_mv.i2_mvy; \
+ x = x * pi2_ref_scf[ref_tgt * MAX_NUM_REF + node->i1_ref_idx]; \
+ y = y * pi2_ref_scf[ref_tgt * MAX_NUM_REF + node->i1_ref_idx]; \
+ x = (x + 128) >> 8; \
+ y = (y + 128) >> 8; \
+ HME_CLIP(x, -32768, 32767); \
+ HME_CLIP(y, -32768, 32767); \
+ }
+
+#define SWAP_HME(a, b, data_type) \
+ { \
+ data_type temp = a; \
+ a = b; \
+ b = temp; \
+ }
+
+/**
+******************************************************************************
+ * @brief Check if MVs lie within a range
+******************************************************************************
+*/
+#define CHECK_MV_WITHIN_RANGE(x, y, range) \
+ (((x) > (range)->i2_min_x) && ((x) < (range)->i2_max_x) && ((y) > (range)->i2_min_y) && \
+ ((y) < (range)->i2_max_y))
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/**
+ ******************************************************************************
+ * @struct mv_t
+ * @brief Basic Motion vector structure (x and y components)
+ ******************************************************************************
+*/
+typedef struct
+{
+ S16 i2_mv_x;
+ S16 i2_mv_y;
+} hme_mv_t;
+
+#endif /* #ifndef _HME_COMMON_DEFS_H_ */
diff --git a/encoder/hme_common_utils.c b/encoder/hme_common_utils.c
new file mode 100644
index 0000000..8509bb7
--- /dev/null
+++ b/encoder/hme_common_utils.c
@@ -0,0 +1,188 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <assert.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "ihevc_macros.h"
+#include "ihevc_debug.h"
+#include "ihevc_platform_macros.h"
+
+#include "hme_datatype.h"
+#include "hme_common_defs.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+********************************************************************************
+* @fn S16 median4_s16(S16 i2_n1, S16 i2_n2, S16 i2_n3, S16 i2_n4);
+*
+* @brief Returns median4 of 4 16 bits signed nubers
+*
+* @param[in] i2_n1 : first number
+*
+* @param[in] i2_n2 : 2nd number
+*
+* @param[in] i2_n3 : 3rd number
+*
+* @param[in] i2_n4 : 4th number (order does not matter)
+*
+* @return range of the number
+********************************************************************************
+*/
+S16 median4_s16(S16 i2_n1, S16 i2_n2, S16 i2_n3, S16 i2_n4)
+{
+ S16 i2_max, i2_min;
+
+ i2_max = MAX(i2_n1, i2_n2);
+ i2_max = MAX(i2_max, i2_n3);
+ i2_max = MAX(i2_max, i2_n4);
+
+ i2_min = MIN(i2_n1, i2_n2);
+ i2_min = MIN(i2_min, i2_n3);
+ i2_min = MIN(i2_min, i2_n4);
+
+ return ((S16)((i2_n1 + i2_n2 + i2_n3 + i2_n4 - i2_max - i2_min) >> 1));
+}
+
+U32 hme_compute_2d_sum_u08(U08 *pu1_inp, S32 i4_wd, S32 i4_ht, S32 i4_stride)
+{
+ S32 i, j;
+ U32 u4_sum = 0;
+
+ for(i = 0; i < i4_ht; i++)
+ {
+ for(j = 0; j < i4_wd; j++)
+ u4_sum += (U32)pu1_inp[j];
+
+ pu1_inp += i4_stride;
+ }
+
+ return (u4_sum);
+}
+U32 hme_compute_2d_sum_u16(U16 *pu2_inp, S32 i4_wd, S32 i4_ht, S32 i4_stride)
+{
+ S32 i, j;
+ U32 u4_sum = 0;
+
+ for(i = 0; i < i4_ht; i++)
+ {
+ for(j = 0; j < i4_wd; j++)
+ u4_sum += (U32)pu2_inp[j];
+
+ pu2_inp += i4_stride;
+ }
+
+ return (u4_sum);
+}
+U32 hme_compute_2d_sum_u32(U32 *pu4_inp, S32 i4_wd, S32 i4_ht, S32 i4_stride)
+{
+ S32 i, j;
+ U32 u4_sum = 0;
+
+ for(i = 0; i < i4_ht; i++)
+ {
+ for(j = 0; j < i4_wd; j++)
+ u4_sum += (U32)pu4_inp[j];
+
+ pu4_inp += i4_stride;
+ }
+
+ return (u4_sum);
+}
+/**
+********************************************************************************
+* @fn S32 hme_compute_2d_sum_unsigned(void *pv_inp,
+* S32 i4_blk_wd,
+* S32 i4_blk_ht,
+* S32 i4_stride,
+* S32 i4_datatype)
+*
+* @brief Computes and returns 2D sum of a unsigned 2d buffer, with datatype
+* equal to 8/16/32 bit.
+*
+* @param[in] pv_inp : input pointer
+*
+* @param[in] i4_blk_wd : block width
+*
+* @param[in] i4_blk_ht : block ht
+*
+* @param[in] i4_stride : stride
+*
+* @param[in] i4_datatype : datatype 1 - 8 bit, 2 - 16 bit, 4 - 32 bit
+*
+* @return sum of i4_blk_wd * i4_blk_ht number of entries starting at pv_inp
+********************************************************************************
+*/
+
+U32 hme_compute_2d_sum_unsigned(
+ void *pv_inp, S32 i4_blk_wd, S32 i4_blk_ht, S32 i4_stride, S32 i4_datatype)
+{
+ if(i4_datatype == sizeof(U08))
+ return (hme_compute_2d_sum_u08((U08 *)pv_inp, i4_blk_wd, i4_blk_ht, i4_stride));
+ else if(i4_datatype == sizeof(U16))
+ return (hme_compute_2d_sum_u16((U16 *)pv_inp, i4_blk_wd, i4_blk_ht, i4_stride));
+ else if(i4_datatype == sizeof(U32))
+ return (hme_compute_2d_sum_u32((U32 *)pv_inp, i4_blk_wd, i4_blk_ht, i4_stride));
+ else
+ ASSERT(0);
+
+ return 0;
+}
+
+/**
+********************************************************************************
+* @fn S32 get_rand_num(S32 low, S32 high)
+*
+* @brief returns a radom integer in the closed interval [low, high - 1]
+*
+* @param[in] low : lower limit
+*
+* @param[in] high : higher limit
+*
+* @return S32 result: the random number
+********************************************************************************
+*/
+S32 get_rand_num(S32 low, S32 high)
+{
+ double num;
+ S32 result;
+ num = (double)rand() / (double)RAND_MAX;
+ num = num * (high - low) + low;
+
+ result = (S32)floor((num + 0.5));
+ if(result < low)
+ result = low;
+ if(result >= high)
+ result = high - 1;
+
+ return (result);
+}
diff --git a/encoder/hme_common_utils.h b/encoder/hme_common_utils.h
new file mode 100644
index 0000000..422dea6
--- /dev/null
+++ b/encoder/hme_common_utils.h
@@ -0,0 +1,133 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+******************************************************************************
+* \file hme_common_utils.h
+*
+* \brief
+* Common utility functions used by ME
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _HME_COMMON_UTILS_H_
+#define _HME_COMMON_UTILS_H_
+
+#include "ihevc_platform_macros.h"
+
+/*****************************************************************************/
+/* Macros */
+/*****************************************************************************/
+
+#define MEDIAN4(a, b, c, d, e) (median4_##e(a, b, c, d))
+
+/*****************************************************************************/
+/* Functions */
+/*****************************************************************************/
+/**
+********************************************************************************
+* @fn S32 median4_s16(S16 i2_n1, S16 i2_n2, S16 i2_n3, S16 i2_n4);
+*
+* @brief Returns median4 of 4 16 bits signed nubers
+*
+* @param[in] i2_n1 : first number
+*
+* @param[in] i2_n2 : 2nd number
+*
+* @param[in] i2_n3 : 3rd number
+*
+* @param[in] i2_n4 : 4th number (order does not matter)
+*
+* @return range of the number
+********************************************************************************
+*/
+S16 median4_s16(S16 i2_n1, S16 i2_n2, S16 i2_n3, S16 i2_n4);
+
+/**
+********************************************************************************
+* @fn S32 hme_get_range(U32 u4_num);
+*
+* @brief Returns the range of the number
+*
+* @param[in] u4_num : number whose range is to be found
+*
+* @return range of the number
+********************************************************************************
+*/
+
+static INLINE S32 hme_get_range(U32 u4_num)
+{
+ S32 r;
+
+ GETRANGE(r, u4_num);
+ return (r);
+}
+
+/**
+********************************************************************************
+* @fn S32 hme_compute_2d_sum_unsigned(void *pv_inp,
+* S32 i4_blk_wd,
+* S32 i4_blk_ht,
+* S32 i4_stride,
+* S32 i4_datatype)
+*
+* @brief Computes and returns 2D sum of a unsigned 2d buffer, with datatype
+* equal to 8/16/32 bit.
+*
+* @param[in] pv_inp : input pointer
+*
+* @param[in] i4_blk_wd : block width
+*
+* @param[in] i4_blk_ht : block ht
+*
+* @param[in] i4_stride : stride
+*
+* @param[in] i4_datatype : datatype 1 - 8 bit, 2 - 16 bit, 4 - 32 bit
+*
+* @return sum of i4_blk_wd * i4_blk_ht number of entries starting at pv_inp
+********************************************************************************
+*/
+
+U32 hme_compute_2d_sum_unsigned(
+ void *pv_inp, S32 i4_blk_wd, S32 i4_blk_ht, S32 i4_stride, S32 i4_datatype);
+
+/**
+********************************************************************************
+* @fn S32 get_rand_num(S32 low, S32 high)
+*
+* @brief returns a radom integer in the closed interval [low, high - 1]
+*
+* @param[in] low : lower limit
+*
+* @param[in] high : higher limit
+*
+* @return S32 result: the random number
+********************************************************************************
+*/
+S32 get_rand_num(S32 low, S32 high);
+
+#endif /* #ifndef _HME_COMMON_UTILS_H_ */
diff --git a/encoder/hme_datatype.h b/encoder/hme_datatype.h
new file mode 100644
index 0000000..8e73e28
--- /dev/null
+++ b/encoder/hme_datatype.h
@@ -0,0 +1,48 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file hme_datatype.h
+*
+* \brief
+* Alias for data types used by hme
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+#ifndef _HME_DATATYPE_H_
+#define _HME_DATATYPE_H_
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+typedef UWORD8 U08;
+typedef WORD8 S08;
+typedef UWORD16 U16;
+typedef WORD16 S16;
+typedef UWORD32 U32;
+typedef WORD32 S32;
+
+#endif
diff --git a/encoder/hme_defs.h b/encoder/hme_defs.h
new file mode 100644
index 0000000..7f7a9d9
--- /dev/null
+++ b/encoder/hme_defs.h
@@ -0,0 +1,3419 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file hme_defs.h
+*
+* \brief
+* Important definitions, enumerations, macros and structures used by ME
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _HME_DEFS_H_
+#define _HME_DEFS_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+/**
+*******************************************************************************
+@brief Blk size of the CTB in the max possible case
+*******************************************************************************
+ */
+#define CTB_BLK_SIZE 64
+
+/**
+*******************************************************************************
+@brief Maximun number of results per partition
+*******************************************************************************
+ */
+#define MAX_RESULTS_PER_PART 2
+
+/**
+*******************************************************************************
+@brief Not used currently
+*******************************************************************************
+ */
+#define MAX_NUM_UNIFIED_RESULTS 10
+#define MAX_NUM_CTB_NODES 10
+
+/**
+*******************************************************************************
+@brief For 64x64 CTB, we have 16x16 MV grid for prediction purposes (cost calc)
+This has 1 padding at boundaries for causal neighbours
+*******************************************************************************
+ */
+#define CTB_MV_GRID_PAD 1
+
+/**
+*******************************************************************************
+@brief number of bits per bin
+*******************************************************************************
+ */
+#define HME_CABAC_BITS_PER_BIN 0.5
+
+/**
+*******************************************************************************
+@brief bin count to bit count conversion
+*******************************************************************************
+ */
+#define HME_GET_CAB_BIT(x) (U08(((x)*HME_CABAC_BITS_PER_BIN + 0.5)))
+
+/**
+*******************************************************************************
+@brief Columns in the MV grid
+*******************************************************************************
+ */
+#define NUM_COLUMNS_IN_CTB_GRID (((CTB_BLK_SIZE) >> 2) + (2 * CTB_MV_GRID_PAD))
+
+/**
+*******************************************************************************
+@brief Rows in MV grid
+*******************************************************************************
+ */
+#define NUM_ROWS_IN_CTB_GRID (NUM_COLUMNS_IN_CTB_GRID)
+
+/**
+*******************************************************************************
+@brief Total number of MVs held in CTB grid for prediction pourposes
+*******************************************************************************
+ */
+#define NUM_MVS_IN_CTB_GRID ((NUM_COLUMNS_IN_CTB_GRID) * (NUM_ROWS_IN_CTB_GRID))
+
+/**
+*******************************************************************************
+@brief Max number of candidates used for refinement during CU merge stage
+*******************************************************************************
+ */
+#define MAX_MERGE_CANDTS 64
+
+/**
+*******************************************************************************
+@brief For BIDIR refinement, we use 2I-P0 as input, done max at CTB level, so
+stride for this input is 64
+*******************************************************************************
+ */
+#define BACK_PREDICTION_INPUT_STRIDE 64
+
+/**
+*******************************************************************************
+@brief We basically store an impossible and unique MV to identify intra blks
+or CUs
+*******************************************************************************
+ */
+#define INTRA_MV 0x4000
+
+/**
+*******************************************************************************
+@brief Defines the largest CTB supported by HME
+*******************************************************************************
+ */
+#define HME_MAX_CTB_SIZE 64
+
+/**
+*******************************************************************************
+@brief Maximum number of 16x16 blks possible in a CTB. The basic search unit
+in the encode layer is 16x16
+*******************************************************************************
+ */
+#define HME_MAX_16x16_IN_CTB ((HME_MAX_CTB_SIZE >> 4) * (HME_MAX_CTB_SIZE >> 4))
+
+/**
+*******************************************************************************
+@brief Max number of 8x8s possible in a CTB, this in other words is also the
+maximum number of CUs possible in a CTB
+*******************************************************************************
+ */
+#define HME_MAX_8x8_IN_CTB ((HME_MAX_CTB_SIZE >> 3) * (HME_MAX_CTB_SIZE >> 3))
+
+/**
+*******************************************************************************
+@brief Maximum number of init candts supported for refinement search.
+*******************************************************************************
+ */
+#define MAX_INIT_CANDTS 60
+
+/**
+*******************************************************************************
+@brief Maximum MV in X and Y directions in fullpel units allowed in any layer
+Any computed range for MV hasto be within this
+*******************************************************************************
+ */
+#define MAX_MV_X_FINEST 1024
+#define MAX_MV_Y_FINEST 512
+
+#define MAX_NUM_RESULTS 10
+
+#define USE_MODIFIED 1
+
+#define ENABLE_EXPLICIT_SEARCH_IN_P_IN_L0 1
+
+#define ENABLE_EXPLICIT_SEARCH_IN_PQ 0
+
+/**
+*******************************************************************************
+@brief Driven by reasoning that we can tolerate an error of 4 in global mv
+ in coarsest layer per comp, assuming we have search range of 1024x512, the mv
+ range in coarse layer is 128x64, total bins is then 256/4 x 128/4 or 2K bins
+*******************************************************************************
+ */
+#define LOG_MAX_NUM_BINS 11
+#define MAX_NUM_BINS (1 << LOG_MAX_NUM_BINS)
+
+#define NEXT_BLOCK_OFFSET_IN_L0_ME 22
+
+#define PREV_BLOCK_OFFSET_IN_L0_ME 6
+
+#define COLOCATED_BLOCK_OFFSET 2
+
+#define COLOCATED_4X4_NEXT_BLOCK_OFFSET 14
+
+#define MAP_X_MAX 16
+
+#define MAP_Y_MAX 16
+
+#define NUM_POINTS_IN_RECTANGULAR_GRID 9
+
+/*
+******************************************************************************
+@brief Maximum number of elements in the sigmaX and sigmaX-Square array
+computed at 4x4 level for any CU size
+******************************************************************************
+*/
+#define MAX_NUM_SIGMAS_4x4 256
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+@brief Calculates number of blks in picture, given width, ht, and a variable
+shift that controls basic blk size
+*******************************************************************************
+ */
+#define GET_NUM_BLKS_IN_PIC(wd, ht, shift, num_cols, num_blks) \
+ { \
+ S32 y, rnd; \
+ rnd = (1 << shift) - 1; \
+ num_cols = (wd + rnd) >> shift; \
+ y = (ht + rnd) >> shift; \
+ num_blks = num_cols * y; \
+ }
+
+#define COUNT_CANDS(a, b) \
+ { \
+ b = (((a) & (1))) + (((a >> 1) & (1))) + (((a >> 2) & (1))) + (((a >> 3) & (1))) + \
+ (((a >> 4) & (1))) + (((a >> 5) & (1))) + (((a >> 6) & (1))) + (((a >> 7) & (1))) + \
+ (((a >> 8) & (1))); \
+ }
+
+#define COPY_MV_TO_SEARCH_NODE(node, mv, pref, refid, shift) \
+ { \
+ (node)->s_mv.i2_mvx = (mv)->i2_mv_x; \
+ (node)->s_mv.i2_mvy = (mv)->i2_mv_y; \
+ (node)->i1_ref_idx = *pref; \
+ (node)->u1_is_avail = 1; \
+ \
+ /* Can set the availability flag for MV Pred purposes */ \
+ if(((node)->i1_ref_idx < 0) || ((node)->s_mv.i2_mvx == INTRA_MV)) \
+ { \
+ (node)->u1_is_avail = 0; \
+ (node)->i1_ref_idx = refid; \
+ (node)->s_mv.i2_mvx = 0; \
+ (node)->s_mv.i2_mvy = 0; \
+ } \
+ (node)->s_mv.i2_mvx >>= (shift); \
+ (node)->s_mv.i2_mvy >>= (shift); \
+ (node)->u1_subpel_done = (shift) ? 0 : 1; \
+ }
+
+#define COMPUTE_MVD(ps_mv, ps_data, cumulative_mv_distance) \
+ { \
+ S32 mvx_q8 = (ps_mv)->mvx << 8; \
+ S32 mvy_q8 = (ps_mv)->mvy << 8; \
+ S32 mvcx_q8 = (ps_data)->s_centroid.i4_pos_x_q8; \
+ S32 mvcy_q8 = (ps_data)->s_centroid.i4_pos_y_q8; \
+ \
+ S32 mvdx_q8 = mvx_q8 - mvcx_q8; \
+ S32 mvdy_q8 = mvy_q8 - mvcy_q8; \
+ \
+ S32 mvdx = (mvdx_q8 + (1 << 7)) >> 8; \
+ S32 mvdy = (mvdy_q8 + (1 << 7)) >> 8; \
+ \
+ S32 mvd = ABS(mvdx) + ABS(mvdy); \
+ \
+ cumulative_mv_distance += mvd; \
+ }
+
+#define STATS_COLLECTOR_MV_INSERT( \
+ ps_mv_store, num_mvs_stored, mvx_cur, mvy_cur, stats_struct, check_for_duplicate, ref_idx) \
+ { \
+ S32 i4_j; \
+ (stats_struct).f_num_cands_being_processed++; \
+ check_for_duplicate = 0; \
+ \
+ for(i4_j = 0; i4_j < (num_mvs_stored); i4_j++) \
+ { \
+ if(((ps_mv_store)[i4_j].s_mv.i2_mvx == (mvx_cur)) && \
+ ((ps_mv_store)[i4_j].s_mv.i2_mvy == (mvy_cur)) && \
+ ((ps_mv_store)[i4_j].i1_ref_idx == ref_idx)) \
+ { \
+ (stats_struct).f_num_duplicates_amongst_processed++; \
+ check_for_duplicate = 0; \
+ break; \
+ } \
+ } \
+ \
+ if(i4_j == (num_mvs_stored)) \
+ { \
+ (ps_mv_store)[i4_j].s_mv.i2_mvx = (mvx_cur); \
+ (ps_mv_store)[i4_j].s_mv.i2_mvy = (mvy_cur); \
+ (ps_mv_store)[i4_j].i1_ref_idx = ref_idx; \
+ (num_mvs_stored)++; \
+ } \
+ }
+
+#define UPDATE_CLUSTER_METADATA_POST_MERGE(ps_cluster) \
+ { \
+ S32 m; \
+ \
+ S32 num_clusters_evaluated = 0; \
+ \
+ for(m = 0; num_clusters_evaluated < (ps_cluster)->num_clusters; m++) \
+ { \
+ if(!((ps_cluster)->as_cluster_data[m].is_valid_cluster)) \
+ { \
+ if(-1 != (ps_cluster)->as_cluster_data[m].ref_id) \
+ { \
+ (ps_cluster)->au1_num_clusters[(ps_cluster)->as_cluster_data[m].ref_id]--; \
+ } \
+ } \
+ else \
+ { \
+ num_clusters_evaluated++; \
+ } \
+ } \
+ }
+
+#define SET_VALUES_FOR_TOP_REF_IDS(ps_cluster_blk, best_uni_ref, best_alt_ref, num_ref) \
+ { \
+ ps_cluster_blk->best_uni_ref = best_uni_ref; \
+ ps_cluster_blk->best_alt_ref = best_alt_ref; \
+ ps_cluster_blk->num_refs = num_ref; \
+ }
+
+#define MAP_X_MAX 16
+#define MAP_Y_MAX 16
+
+#define CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES( \
+ ps_dedup_enabler, num_cands, mvx, mvy, check_for_duplicate) \
+ { \
+ S32 center_mvx; \
+ S32 center_mvy; \
+ S32 mvdx; \
+ S32 mvdy; \
+ U32 *pu4_node_map; \
+ S32 columnar_presence; \
+ \
+ (check_for_duplicate) = 0; \
+ { \
+ subpel_dedup_enabler_t *ps_dedup = &(ps_dedup_enabler)[0]; \
+ center_mvx = ps_dedup->i2_mv_x; \
+ center_mvy = ps_dedup->i2_mv_y; \
+ pu4_node_map = ps_dedup->au4_node_map; \
+ \
+ mvdx = (mvx)-center_mvx; \
+ mvdy = (mvy)-center_mvy; \
+ \
+ if(((mvdx < MAP_X_MAX) && (mvdx >= -MAP_X_MAX)) && \
+ ((mvdy < MAP_Y_MAX) && (mvdy >= -MAP_Y_MAX))) \
+ { \
+ columnar_presence = pu4_node_map[MAP_X_MAX + mvdx]; \
+ \
+ if(0 == (columnar_presence & (1U << (MAP_Y_MAX + mvdy)))) \
+ { \
+ columnar_presence |= (1U << (MAP_Y_MAX + mvdy)); \
+ pu4_node_map[MAP_X_MAX + mvdx] = columnar_presence; \
+ } \
+ else \
+ { \
+ (check_for_duplicate) = 1; \
+ } \
+ } \
+ } \
+ }
+
+#define BUMP_OUTLIER_CLUSTERS(ps_cluster_blk, sdi_threshold) \
+ { \
+ outlier_data_t as_outliers[MAX_NUM_CLUSTERS_64x64 + 1]; \
+ \
+ S32 j, k; \
+ \
+ S32 num_clusters_evaluated = 0; \
+ S32 num_clusters = ps_cluster_blk->num_clusters; \
+ S32 num_outliers_present = 0; \
+ \
+ for(j = 0; num_clusters_evaluated < num_clusters; j++) \
+ { \
+ cluster_data_t *ps_data = &ps_cluster_blk->as_cluster_data[j]; \
+ \
+ if(!ps_data->is_valid_cluster) \
+ { \
+ continue; \
+ } \
+ \
+ num_clusters_evaluated++; \
+ \
+ if((ps_data->num_mvs == 1) && (ps_data->as_mv[0].sdi < sdi_threshold) && \
+ (ps_cluster_blk->au1_num_clusters[ps_data->ref_id] > \
+ MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)) \
+ { \
+ as_outliers[num_outliers_present].cluster_id = j; \
+ as_outliers[num_outliers_present].ref_idx = ps_data->ref_id; \
+ as_outliers[num_outliers_present].sdi = ps_data->as_mv[0].sdi; \
+ num_outliers_present++; \
+ } \
+ } \
+ \
+ for(j = 0; j < (num_outliers_present - 1); j++) \
+ { \
+ for(k = (j + 1); k < num_outliers_present; k++) \
+ { \
+ if(as_outliers[j].sdi > as_outliers[k].sdi) \
+ { \
+ as_outliers[MAX_NUM_CLUSTERS_64x64] = as_outliers[j]; \
+ as_outliers[j] = as_outliers[k]; \
+ as_outliers[k] = as_outliers[MAX_NUM_CLUSTERS_64x64]; \
+ } \
+ } \
+ } \
+ \
+ for(j = 0; j < (num_outliers_present); j++) \
+ { \
+ S32 ref_idx = as_outliers[j].ref_idx; \
+ \
+ if((ps_cluster_blk->au1_num_clusters[ref_idx] > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)) \
+ { \
+ ps_cluster_blk->as_cluster_data[as_outliers[j].cluster_id].is_valid_cluster = 0; \
+ ps_cluster_blk->num_clusters--; \
+ ps_cluster_blk->au1_num_clusters[ref_idx]--; \
+ } \
+ } \
+ }
+
+#define ADD_CLUSTER_CENTROID_AS_CANDS_FOR_BLK_MERGE( \
+ ps_cluster_data, ps_range_prms, ps_list, ps_mv, is_ref_in_l0, ref_idx) \
+ { \
+ ps_list = &(ps_cluster_data)->as_mv_list[!(is_ref_in_l0)][(ref_idx)]; \
+ ps_mv = &ps_list->as_mv[ps_list->num_mvs]; \
+ \
+ ps_mv->i2_mvx = (ps_centroid->i4_pos_x_q8 + (1 << 7)) >> 8; \
+ ps_mv->i2_mvy = (ps_centroid->i4_pos_y_q8 + (1 << 7)) >> 8; \
+ \
+ CLIP_MV_WITHIN_RANGE(ps_mv->i2_mvx, ps_mv->i2_mvy, (ps_range_prms), 0, 0, 0); \
+ \
+ ps_cluster_data->ai4_ref_id_valid[!(is_ref_in_l0)][(ref_idx)] = 1; \
+ \
+ ps_list->num_mvs++; \
+ }
+
+#define COPY_SEARCH_CANDIDATE_DATA(node, mv, pref, refid, shift) \
+ { \
+ (node)->ps_mv->i2_mvx = (mv)->i2_mv_x; \
+ (node)->ps_mv->i2_mvy = (mv)->i2_mv_y; \
+ (node)->i1_ref_idx = *pref; \
+ (node)->u1_is_avail = 1; \
+ \
+ /* Can set the availability flag for MV Pred purposes */ \
+ if(((node)->i1_ref_idx < 0) || ((node)->ps_mv->i2_mvx == INTRA_MV)) \
+ { \
+ (node)->u1_is_avail = 0; \
+ (node)->i1_ref_idx = refid; \
+ (node)->ps_mv->i2_mvx = 0; \
+ (node)->ps_mv->i2_mvy = 0; \
+ } \
+ (node)->ps_mv->i2_mvx >>= (shift); \
+ (node)->ps_mv->i2_mvy >>= (shift); \
+ (node)->u1_subpel_done = (shift) ? 0 : 1; \
+ }
+/**
+*******************************************************************************
+* @macro MIN_NODE
+* @brief Returns the search node with lesser cost
+*******************************************************************************
+ */
+#define MIN_NODE(a, b) (((a)->i4_tot_cost < (b)->i4_tot_cost) ? (a) : (b))
+
+/**
+*******************************************************************************
+* @macro MAX_NODE
+* @brief Returns search node with higher cost
+*******************************************************************************
+ */
+#define MAX_NODE(a, b) (((a)->i4_tot_cost >= (b)->i4_tot_cost) ? (a) : (b))
+
+/**
+******************************************************************************
+ * @macro HME_INV_WT_PRED
+ * @brief Implements inverse of wt pred formula. Actual wt pred formula is
+ * ((input * wt) + rnd) >> shift) + offset
+******************************************************************************
+*/
+#define HME_INV_WT_PRED(inp, wt, off, shift) (((((inp) - (off)) << (shift)) + ((wt) >> 1)) / (wt))
+#define HME_INV_WT_PRED1(inp, wt, off, shift) \
+ (((((inp) - (off)) << (shift)) * wt + (1 << 14)) >> 15)
+
+/**
+******************************************************************************
+ * @macro HME_WT_PRED
+ * @brief Implements wt pred formula as per spec
+******************************************************************************
+*/
+#define HME_WT_PRED(p0, p1, w0, w1, rnd, shift) \
+ (((((S32)w0) * ((S32)p0) + ((S32)w1) * ((S32)p1)) >> shift) + rnd)
+
+/**
+******************************************************************************
+ * @macro PREFETCH_BLK
+ * @brief Prefetches a block of data into cahce before hand
+******************************************************************************
+*/
+
+/**
+******************************************************************************
+ * @macro INSERT_NEW_NODE
+ * @brief Inserts a new search node in a list if it is unique; helps in
+ removing duplicate nodes/candidates
+******************************************************************************
+*/
+#define PREFETCH_BLK(pu1_src, src_stride, lines, type) \
+ { \
+ WORD32 ctr; \
+ for(ctr = 0; ctr < lines; ctr++) \
+ { \
+ PREFETCH((char const *)pu1_src, type); \
+ pu1_src += src_stride; \
+ } \
+ }
+
+#define INSERT_UNIQUE_NODE( \
+ as_nodes, num_nodes, new_node, au4_map, center_x, center_y, use_hashing) \
+ { \
+ WORD32 k; \
+ UWORD32 map; \
+ WORD32 delta_x, delta_y; \
+ delta_x = (new_node).ps_mv->i2_mvx - (center_x); \
+ delta_y = (new_node).ps_mv->i2_mvy - (center_y); \
+ map = 0; \
+ \
+ if((use_hashing) && (delta_x < MAP_X_MAX) && (delta_x >= (-MAP_X_MAX)) && \
+ (delta_y < MAP_Y_MAX) && (delta_y >= (-MAP_Y_MAX))) \
+ { \
+ map = (au4_map)[delta_x + MAP_X_MAX]; \
+ if(0 == (map & (1U << (delta_y + MAP_Y_MAX)))) \
+ { \
+ (new_node).s_mv = (new_node).ps_mv[0]; \
+ (as_nodes)[(num_nodes)] = (new_node); \
+ ((num_nodes))++; \
+ map |= 1U << (delta_y + MAP_Y_MAX); \
+ (au4_map)[delta_x + MAP_X_MAX] = map; \
+ } \
+ } \
+ else \
+ { \
+ for(k = 0; k < ((num_nodes)); k++) \
+ { \
+ /* Search is this node is already present in unique list */ \
+ if(((as_nodes)[k].s_mv.i2_mvx == (new_node).ps_mv->i2_mvx) && \
+ ((as_nodes)[k].s_mv.i2_mvy == (new_node).ps_mv->i2_mvy) && \
+ ((as_nodes)[k].i1_ref_idx == (new_node).i1_ref_idx)) \
+ { \
+ /* This is duplicate node; need not be inserted */ \
+ break; \
+ } \
+ } \
+ if(k == ((num_nodes))) \
+ { \
+ /* Insert new node only if it is not duplicate node */ \
+ (new_node).s_mv = (new_node).ps_mv[0]; \
+ (as_nodes)[k] = (new_node); \
+ ((num_nodes))++; \
+ } \
+ } \
+ }
+
+/**
+******************************************************************************
+ * @macro INSERT_NEW_NODE
+ * @brief Inserts a new search node in a list if it is unique; helps in
+ removing duplicate nodes/candidates
+******************************************************************************
+*/
+#define INSERT_NEW_NODE_NOMAP(as_nodes, num_nodes, new_node, implicit_layer) \
+ { \
+ WORD32 k; \
+ if(!implicit_layer) \
+ { \
+ for(k = 0; k < (num_nodes); k++) \
+ { \
+ /* Search is this node is already present in unique list */ \
+ if((as_nodes[k].s_mv.i2_mvx == new_node.s_mv.i2_mvx) && \
+ (as_nodes[k].s_mv.i2_mvy == new_node.s_mv.i2_mvy)) \
+ { \
+ /* This is duplicate node; need not be inserted */ \
+ break; \
+ } \
+ } \
+ } \
+ else \
+ { \
+ for(k = 0; k < (num_nodes); k++) \
+ { \
+ /* Search is this node is already present in unique list */ \
+ if((as_nodes[k].s_mv.i2_mvx == new_node.s_mv.i2_mvx) && \
+ (as_nodes[k].s_mv.i2_mvy == new_node.s_mv.i2_mvy) && \
+ (as_nodes[k].i1_ref_idx == new_node.i1_ref_idx)) \
+ { \
+ /* This is duplicate node; need not be inserted */ \
+ break; \
+ } \
+ } \
+ } \
+ \
+ if(k == (num_nodes)) \
+ { \
+ /* Insert new node only if it is not duplicate node */ \
+ as_nodes[k] = new_node; \
+ (num_nodes)++; \
+ } \
+ }
+/**
+******************************************************************************
+ * @macro INSERT_NEW_NODE_NOMAP_ALTERNATE
+ * @brief Inserts a new search node in a list if it is unique; helps in
+ removing duplicate nodes/candidates
+******************************************************************************
+*/
+#define INSERT_NEW_NODE_NOMAP_ALTERNATE(as_nodes, num_nodes, new_node, result_num, part_id) \
+ { \
+ WORD32 k; \
+ WORD32 part_id_1 = (new_node->i4_num_valid_parts > 8) ? new_node->ai4_part_id[part_id] \
+ : part_id; \
+ for(k = 0; k < (num_nodes); k++) \
+ { \
+ /* Search is this node is already present in unique list */ \
+ if((as_nodes[k].s_mv.i2_mvx == new_node->i2_mv_x[result_num][part_id_1]) && \
+ (as_nodes[k].s_mv.i2_mvy == new_node->i2_mv_y[result_num][part_id_1]) && \
+ (as_nodes[k].i1_ref_idx == new_node->i2_ref_idx[result_num][part_id_1])) \
+ { \
+ /* This is duplicate node; need not be inserted */ \
+ break; \
+ } \
+ } \
+ \
+ if(k == (num_nodes)) \
+ { \
+ /* Insert new node only if it is not duplicate node */ \
+ as_nodes[k].i4_tot_cost = (WORD32)new_node->i2_tot_cost[result_num][part_id_1]; \
+ as_nodes[k].i4_mv_cost = (WORD32)new_node->i2_mv_cost[result_num][part_id_1]; \
+ as_nodes[k].s_mv.i2_mvx = new_node->i2_mv_x[result_num][part_id_1]; \
+ as_nodes[k].s_mv.i2_mvy = new_node->i2_mv_y[result_num][part_id_1]; \
+ as_nodes[k].i1_ref_idx = (WORD8)new_node->i2_ref_idx[result_num][part_id_1]; \
+ as_nodes[k].u1_part_id = new_node->ai4_part_id[part_id]; \
+ (num_nodes)++; \
+ } \
+ }
+
+#define INSERT_NEW_NODE( \
+ as_nodes, num_nodes, new_node, implicit_layer, au4_map, center_x, center_y, use_hashing) \
+ { \
+ WORD32 k; \
+ UWORD32 map; \
+ WORD32 delta_x, delta_y; \
+ delta_x = (new_node).s_mv.i2_mvx - center_x; \
+ delta_y = (new_node).s_mv.i2_mvy - center_y; \
+ map = 0; \
+ if((delta_x < MAP_X_MAX) && (delta_x >= (-MAP_X_MAX)) && (delta_y < MAP_Y_MAX) && \
+ (delta_y >= (-MAP_Y_MAX)) && (use_hashing)) \
+ { \
+ map = (au4_map)[delta_x + MAP_X_MAX]; \
+ if(0 == (map & (1U << (delta_y + MAP_Y_MAX)))) \
+ { \
+ (as_nodes)[(num_nodes)] = (new_node); \
+ (num_nodes)++; \
+ map |= 1U << (delta_y + MAP_Y_MAX); \
+ (au4_map)[delta_x + MAP_X_MAX] = map; \
+ } \
+ } \
+ else if(!(implicit_layer)) \
+ { \
+ for(k = 0; k < (num_nodes); k++) \
+ { \
+ /* Search is this node is already present in unique list */ \
+ if(((as_nodes)[k].s_mv.i2_mvx == (new_node).s_mv.i2_mvx) && \
+ ((as_nodes)[k].s_mv.i2_mvy == (new_node).s_mv.i2_mvy)) \
+ { \
+ /* This is duplicate node; need not be inserted */ \
+ break; \
+ } \
+ } \
+ if(k == (num_nodes)) \
+ { \
+ /* Insert new node only if it is not duplicate node */ \
+ (as_nodes)[k] = (new_node); \
+ (num_nodes)++; \
+ } \
+ } \
+ else \
+ { \
+ for(k = 0; k < (num_nodes); k++) \
+ { \
+ /* Search is this node is already present in unique list */ \
+ if(((as_nodes)[k].s_mv.i2_mvx == (new_node).s_mv.i2_mvx) && \
+ ((as_nodes)[k].s_mv.i2_mvy == (new_node).s_mv.i2_mvy) && \
+ ((as_nodes)[k].i1_ref_idx == (new_node).i1_ref_idx)) \
+ { \
+ /* This is duplicate node; need not be inserted */ \
+ break; \
+ } \
+ } \
+ if(k == (num_nodes)) \
+ { \
+ /* Insert new node only if it is not duplicate node */ \
+ (as_nodes)[k] = (new_node); \
+ (num_nodes)++; \
+ } \
+ } \
+ }
+
+#define COMPUTE_DIFF_MV(mvdx, mvdy, inp_node, mv_p_x, mv_p_y, inp_sh, pred_sh) \
+ { \
+ mvdx = (inp_node)->s_mv.i2_mvx << (inp_sh); \
+ mvdy = (inp_node)->s_mv.i2_mvy << (inp_sh); \
+ mvdx -= ((mv_p_x) << (pred_sh)); \
+ mvdy -= ((mv_p_y) << (pred_sh)); \
+ }
+
+#define COMPUTE_MV_DIFFERENCE(mvdx, mvdy, inp_node, mv_p_x, mv_p_y, inp_sh, pred_sh) \
+ { \
+ mvdx = (inp_node)->ps_mv->i2_mvx << (inp_sh); \
+ mvdy = (inp_node)->ps_mv->i2_mvy << (inp_sh); \
+ mvdx -= ((mv_p_x) << (pred_sh)); \
+ mvdy -= ((mv_p_y) << (pred_sh)); \
+ }
+
+/**
+******************************************************************************
+ * @enum CU_MERGE_RESULT_T
+ * @brief Describes the results of merge, whether successful or not
+******************************************************************************
+*/
+typedef enum
+{
+ CU_MERGED,
+ CU_SPLIT
+} CU_MERGE_RESULT_T;
+
+/**
+******************************************************************************
+ * @enum PART_ORIENT_T
+ * @brief Describes the orientation of partition (vert/horz, left/rt)
+******************************************************************************
+*/
+typedef enum
+{
+ VERT_LEFT,
+ VERT_RIGHT,
+ HORZ_TOP,
+ HORZ_BOT
+} PART_ORIENT_T;
+
+/**
+******************************************************************************
+ * @enum GRID_PT_T
+ * @brief For a 3x3 rect grid, nubers each pt as shown
+* 5 2 6
+* 1 0 3
+* 7 4 8
+******************************************************************************
+*/
+typedef enum
+{
+ PT_C = 0,
+ PT_L = 1,
+ PT_T = 2,
+ PT_R = 3,
+ PT_B = 4,
+ PT_TL = 5,
+ PT_TR = 6,
+ PT_BL = 7,
+ PT_BR = 8,
+ NUM_GRID_PTS
+} GRID_PT_T;
+
+/**
+******************************************************************************
+ * @macro IS_POW
+ * @brief Returns whwehter a number is power of 2
+******************************************************************************
+*/
+#define IS_POW_2(x) (!((x) & ((x)-1)))
+
+/**
+******************************************************************************
+ * @macro GRID_ALL_PTS_VALID
+ * @brief For a 3x3 rect grid, this can be used to enable all pts in grid
+******************************************************************************
+*/
+#define GRID_ALL_PTS_VALID 0x1ff
+
+/**
+******************************************************************************
+ * @macro GRID_DIAMOND_ENABLE_ALL
+ * @brief If we search diamond, this enables all 5 pts of diamond (including centre)
+******************************************************************************
+*/
+#define GRID_DIAMOND_ENABLE_ALL \
+ (BIT_EN(PT_C) | BIT_EN(PT_L) | BIT_EN(PT_T) | BIT_EN(PT_R) | BIT_EN(PT_B))
+
+/**
+******************************************************************************
+ * @macro GRID_RT_3_INVALID, GRID_LT_3_INVALID,GRID_TOP_3_INVALID,GRID_BOT_3_INVALID
+ * @brief For a square grid search, depending on where the best result is
+ * we can optimise search for next iteration by invalidating some pts
+******************************************************************************
+*/
+#define GRID_RT_3_INVALID ((GRID_ALL_PTS_VALID) ^ (BIT_EN(PT_TR) | BIT_EN(PT_R) | BIT_EN(PT_BR)))
+#define GRID_LT_3_INVALID ((GRID_ALL_PTS_VALID) ^ (BIT_EN(PT_TL) | BIT_EN(PT_L) | BIT_EN(PT_BL)))
+#define GRID_TOP_3_INVALID ((GRID_ALL_PTS_VALID) ^ (BIT_EN(PT_TL) | BIT_EN(PT_T) | BIT_EN(PT_TR)))
+#define GRID_BOT_3_INVALID ((GRID_ALL_PTS_VALID) ^ (BIT_EN(PT_BL) | BIT_EN(PT_B) | BIT_EN(PT_BR)))
+
+/**
+******************************************************************************
+ * @enum GMV_MVTYPE_T
+ * @brief Defines what type of GMV we need (thin lobe for a very spiky
+ * distribution of mv or thick lobe for a blurred distrib of mvs
+******************************************************************************
+*/
+typedef enum
+{
+ GMV_THICK_LOBE,
+ GMV_THIN_LOBE,
+ NUM_GMV_LOBES
+} GMV_MVTYPE_T;
+
+/**
+******************************************************************************
+ * @enum BLK_TYPE_T
+ * @brief Defines all possible inter blks possible
+******************************************************************************
+*/
+typedef enum
+{
+ BLK_INVALID = -1,
+ BLK_4x4 = 0,
+ BLK_4x8,
+ BLK_8x4,
+ BLK_8x8,
+ BLK_4x16,
+ BLK_8x16,
+ BLK_12x16,
+ BLK_16x4,
+ BLK_16x8,
+ BLK_16x12,
+ BLK_16x16,
+ BLK_8x32,
+ BLK_16x32,
+ BLK_24x32,
+ BLK_32x8,
+ BLK_32x16,
+ BLK_32x24,
+ BLK_32x32,
+ BLK_16x64,
+ BLK_32x64,
+ BLK_48x64,
+ BLK_64x16,
+ BLK_64x32,
+ BLK_64x48,
+ BLK_64x64,
+ NUM_BLK_SIZES
+} BLK_SIZE_T;
+
+/**
+******************************************************************************
+ * @enum SEARCH_COMPLEXITY_T
+ * @brief For refinement layer, this decides the number of refinement candts
+******************************************************************************
+*/
+typedef enum
+{
+ SEARCH_CX_LOW = 0,
+ SEARCH_CX_MED = 1,
+ SEARCH_CX_HIGH = 2
+} SEARCH_COMPLEXITY_T;
+
+/**
+******************************************************************************
+ * @enum CTB_BOUNDARY_TYPES_T
+ * @brief For pictures not a multiples of CTB horizontally or vertically, we
+ * define 4 unique cases, centre (full ctbs), bottom boundary (64x8k CTBs),
+ * right boundary (8mx64 CTBs), and bottom rt corner (8mx8k CTB)
+******************************************************************************
+*/
+typedef enum
+{
+ CTB_CENTRE,
+ CTB_BOT_PIC_BOUNDARY,
+ CTB_RT_PIC_BOUNDARY,
+ CTB_BOT_RT_PIC_BOUNDARY,
+ NUM_CTB_BOUNDARY_TYPES,
+} CTB_BOUNDARY_TYPES_T;
+
+/**
+******************************************************************************
+ * @enum SEARCH_CANDIDATE_TYPE_T
+ * @brief Monikers for all sorts of search candidates used in ME
+******************************************************************************
+*/
+typedef enum
+{
+ ILLUSORY_CANDIDATE = -1,
+ ZERO_MV = 0,
+ ZERO_MV_ALTREF,
+ SPATIAL_LEFT0,
+ SPATIAL_TOP0,
+ SPATIAL_TOP_RIGHT0,
+ SPATIAL_TOP_LEFT0,
+ SPATIAL_LEFT1,
+ SPATIAL_TOP1,
+ SPATIAL_TOP_RIGHT1,
+ SPATIAL_TOP_LEFT1,
+ PROJECTED_COLOC0,
+ PROJECTED_COLOC1,
+ PROJECTED_COLOC2,
+ PROJECTED_COLOC3,
+ PROJECTED_COLOC4,
+ PROJECTED_COLOC5,
+ PROJECTED_COLOC6,
+ PROJECTED_COLOC7,
+ PROJECTED_COLOC_TR0,
+ PROJECTED_COLOC_TR1,
+ PROJECTED_COLOC_BL0,
+ PROJECTED_COLOC_BL1,
+ PROJECTED_COLOC_BR0,
+ PROJECTED_COLOC_BR1,
+ PROJECTED_TOP0,
+ PROJECTED_TOP1,
+ PROJECTED_TOP_RIGHT0,
+ PROJECTED_TOP_RIGHT1,
+ PROJECTED_TOP_LEFT0,
+ PROJECTED_TOP_LEFT1,
+ PROJECTED_RIGHT0,
+ PROJECTED_RIGHT1,
+ PROJECTED_BOTTOM0,
+ PROJECTED_BOTTOM1,
+ PROJECTED_BOTTOM_RIGHT0,
+ PROJECTED_BOTTOM_RIGHT1,
+ PROJECTED_BOTTOM_LEFT0,
+ PROJECTED_BOTTOM_LEFT1,
+ COLOCATED_GLOBAL_MV0,
+ COLOCATED_GLOBAL_MV1,
+ PROJECTED_TOP2,
+ PROJECTED_TOP3,
+ PROJECTED_TOP_RIGHT2,
+ PROJECTED_TOP_RIGHT3,
+ PROJECTED_TOP_LEFT2,
+ PROJECTED_TOP_LEFT3,
+ PROJECTED_RIGHT2,
+ PROJECTED_RIGHT3,
+ PROJECTED_BOTTOM2,
+ PROJECTED_BOTTOM3,
+ PROJECTED_BOTTOM_RIGHT2,
+ PROJECTED_BOTTOM_RIGHT3,
+ PROJECTED_BOTTOM_LEFT2,
+ PROJECTED_BOTTOM_LEFT3,
+ NUM_SEARCH_CAND_TYPES
+} SEARCH_CANDIDATE_TYPE_T;
+
+typedef enum
+{
+ ILLUSORY_LOCATION = -1,
+ COLOCATED,
+ COLOCATED_4x4_TR,
+ COLOCATED_4x4_BL,
+ COLOCATED_4x4_BR,
+ LEFT,
+ TOPLEFT,
+ TOP,
+ TOPRIGHT,
+ RIGHT,
+ BOTTOMRIGHT,
+ BOTTOM,
+ BOTTOMLEFT,
+ NUM_SEARCH_CAND_LOCATIONS
+} SEARCH_CAND_LOCATIONS_T;
+
+/**
+******************************************************************************
+ * @macros ENABLE_mxn
+ * @brief Enables a type or a group of partitions. ENABLE_ALL_PARTS, enables all
+ * partitions, while others enable selected partitions. These can be used
+ * to set the mask of active partitions
+******************************************************************************
+*/
+#define ENABLE_2Nx2N (BIT_EN(PART_ID_2Nx2N))
+#define ENABLE_2NxN (BIT_EN(PART_ID_2NxN_T) | BIT_EN(PART_ID_2NxN_B))
+#define ENABLE_Nx2N (BIT_EN(PART_ID_Nx2N_L) | BIT_EN(PART_ID_Nx2N_R))
+#define ENABLE_NxN \
+ (BIT_EN(PART_ID_NxN_TL) | BIT_EN(PART_ID_NxN_TR) | BIT_EN(PART_ID_NxN_BL) | \
+ BIT_EN(PART_ID_NxN_BR))
+#define ENABLE_2NxnU (BIT_EN(PART_ID_2NxnU_T) | BIT_EN(PART_ID_2NxnU_B))
+#define ENABLE_2NxnD (BIT_EN(PART_ID_2NxnD_T) | BIT_EN(PART_ID_2NxnD_B))
+#define ENABLE_nLx2N (BIT_EN(PART_ID_nLx2N_L) | BIT_EN(PART_ID_nLx2N_R))
+#define ENABLE_nRx2N (BIT_EN(PART_ID_nRx2N_L) | BIT_EN(PART_ID_nRx2N_R))
+#define ENABLE_AMP ((ENABLE_2NxnU) | (ENABLE_2NxnD) | (ENABLE_nLx2N) | (ENABLE_nRx2N))
+#define ENABLE_SMP ((ENABLE_2NxN) | (ENABLE_Nx2N))
+#define ENABLE_ALL_PARTS \
+ ((ENABLE_2Nx2N) | (ENABLE_NxN) | (ENABLE_2NxN) | (ENABLE_Nx2N) | (ENABLE_AMP))
+#define ENABLE_SQUARE_PARTS ((ENABLE_2Nx2N) | (ENABLE_NxN))
+
+/**
+******************************************************************************
+ * @enum MV_PEL_RES_T
+ * @brief Resolution of MV fpel/hpel/qpel units. Useful for maintaining
+ * predictors. During fpel search, candts, predictors etc are in fpel units,
+ * in subpel search, they are in subpel units
+******************************************************************************
+*/
+typedef enum
+{
+ MV_RES_FPEL,
+ MV_RES_HPEL,
+ MV_RES_QPEL
+} MV_PEL_RES_T;
+
+/**
+******************************************************************************
+ * @enum HME_SET_MVPRED_RES
+ * @brief Sets resolution for predictor bank (fpel/qpel/hpel units)
+******************************************************************************
+*/
+#define HME_SET_MVPRED_RES(ps_pred_ctxt, mv_pel_res) ((ps_pred_ctxt)->mv_pel = mv_pel_res)
+
+/**
+******************************************************************************
+ * @enum HME_SET_MVPRED_DIR
+ * @brief Sets the direction, meaning L0/L1. Since L0 and L1 use separate
+ * candts, the pred ctxt for them hasto be maintained separately
+******************************************************************************
+*/
+#define HME_SET_MVPRED_DIR(ps_pred_ctxt, pred_lx) ((ps_pred_ctxt)->pred_lx = pred_lx)
+
+/**
+******************************************************************************
+ * @brief macros to clip / check mv within specified range
+******************************************************************************
+ */
+#define CHECK_MV_WITHIN_RANGE(x, y, range) \
+ (((x) > (range)->i2_min_x) && ((x) < (range)->i2_max_x) && ((y) > (range)->i2_min_y) && \
+ ((y) < (range)->i2_max_y))
+
+#define CONVERT_MV_LIMIT_TO_QPEL(range) \
+ { \
+ (range)->i2_max_x <<= 2; \
+ (range)->i2_max_y <<= 2; \
+ (range)->i2_min_x <<= 2; \
+ (range)->i2_min_y <<= 2; \
+ }
+
+#define CONVERT_MV_LIMIT_TO_FPEL(range) \
+ { \
+ (range)->i2_max_x >>= 2; \
+ (range)->i2_max_y >>= 2; \
+ (range)->i2_min_x >>= 2; \
+ (range)->i2_min_y >>= 2; \
+ }
+
+/**
+******************************************************************************
+ * @brief Swicth to debug the number of subpel search nodes
+******************************************************************************
+*/
+#define DEBUG_SUBPEL_SEARCH_NODE_HS_COUNT 0
+
+/**
+******************************************************************************
+ * @typedef SAD_GRID_T
+ * @brief Defines a 2D array type used to store SADs across grid and across
+ * partition types
+******************************************************************************
+*/
+typedef S32 SAD_GRID_T[9][MAX_NUM_PARTS];
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @struct grid_node_t
+ * @brief stores a complete info for a candt
+******************************************************************************
+*/
+typedef struct
+{
+ S16 i2_mv_x;
+ S16 i2_mv_y;
+ S08 i1_ref_idx;
+} grid_node_t;
+
+/**
+******************************************************************************
+ * @struct search_node_t
+ * @brief Basic structure used for storage of search results, specification
+ * of init candidates for search etc. This structure is complete for
+ * specification of mv and cost for a given direction of search (L0/L1) but
+ * does not carry information of what type of partition it represents.
+******************************************************************************
+ */
+typedef struct
+{
+ /** Motion vector */
+ mv_t s_mv;
+
+ /** Used in the hme_mv_clipper function to reduce loads and stores */
+ mv_t *ps_mv;
+
+ /** Ref id, as specified in terms of Lc, unified list */
+ S08 i1_ref_idx;
+
+ /** Flag to indicate whether mv is in fpel or QPEL units */
+ U08 u1_subpel_done;
+
+ /**
+ * Indicates whether this node constitutes a valid predictor candt.
+ * Since this structure also used for predictor candts, some candts may
+ * not be available (anti causal or outside pic boundary). Availabilit
+ * can be inferred using this flag.
+ */
+ U08 u1_is_avail;
+
+ /**
+ * Indicates partition Id to which this node belongs. Useful during
+ * subpel / fullpel refinement search to identify partition whose
+ * cost needs to be minimized
+ */
+ U08 u1_part_id;
+
+ /** SAD / SATD stored here */
+ S32 i4_sad;
+
+ /**
+ * Cost related to coding MV, multiplied by lambda
+ * TODO : Entry may be redundant, can be removed
+ */
+ S32 i4_mv_cost;
+
+ /** Total cost, (SAD + MV Cost) */
+ S32 i4_tot_cost;
+
+ /** Subpel_Dist_Improvement.
+ It is the reduction in distortion (SAD or SATD) achieved
+ from the full-pel stage to the sub-pel stage
+ */
+ S32 i4_sdi;
+
+} search_node_t;
+
+/**
+******************************************************************************
+ * @macro INIT_SEARCH_NODE
+ * @brief Initializes this search_node_t structure. Can be used to zero
+ * out candts, set max costs in results etc
+******************************************************************************
+ */
+#define INIT_SEARCH_NODE(x, a) \
+ { \
+ (x)->s_mv.i2_mvx = 0; \
+ (x)->s_mv.i2_mvy = 0; \
+ (x)->i1_ref_idx = a; \
+ (x)->i4_tot_cost = MAX_32BIT_VAL; \
+ (x)->i4_sad = MAX_32BIT_VAL; \
+ (x)->u1_subpel_done = 0; \
+ (x)->u1_is_avail = 1; \
+ }
+
+/**
+******************************************************************************
+ * @struct part_attr_t
+ * @brief Geometric description of a partition w.r.t. CU start. Note that
+ * since this is used across various CU sizes, the inference of
+ * these members is to be done in the context of specific usage
+******************************************************************************
+ */
+typedef struct
+{
+ /** Start of partition w.r.t. CU start in x dirn */
+ U08 u1_x_start;
+ /** Size of partitino w.r.t. CU start in x dirn */
+ U08 u1_x_count;
+ /** Start of partition w.r.t. CU start in y dirn */
+ U08 u1_y_start;
+ /** Size of partitino w.r.t. CU start in y dirn */
+ U08 u1_y_count;
+} part_attr_t;
+
+/**
+******************************************************************************
+ * @struct search_candt_t
+ * @brief Complete information for a given candt in any refinement srch
+******************************************************************************
+ */
+typedef struct
+{
+ /** Points to the mv, ref id info. */
+ search_node_t *ps_search_node;
+ /** Number of refinemnts to be done for this candt */
+ U08 u1_num_steps_refine;
+} search_candt_t;
+
+/**
+******************************************************************************
+ * @struct result_node_t
+ * @brief Contains complete search result for a CU for a given type of
+ * partition split. Holds ptrs to results for each partition, with
+ * information of partition type.
+******************************************************************************
+ */
+typedef struct
+{
+ /**
+ * Type of partition that the CU is split into, for which this
+ * result is relevant
+ */
+ PART_TYPE_T e_part_type;
+
+ /**
+ * Total cost of coding the CU (sum of costs of individual partitions
+ * plus other possible CU level overheads)
+ */
+ S32 i4_tot_cost;
+
+ /**
+ * Pointer to results of each individual partitions. Note that max
+ * number of partitions a CU can be split into is MAX_NUM_PARTS
+ */
+ search_node_t *ps_part_result[MAX_NUM_PARTS];
+
+ /* TU split flag : tu_split_flag[0] represents the transform splits
+ * for CU size <= 32, for 64x64 each ai4_tu_split_flag corresponds
+ * to respective 32x32 */
+ S32 ai4_tu_split_flag[4];
+
+} result_node_t;
+
+/**
+******************************************************************************
+ * @struct ctb_node_t
+ * @brief Finalized information for a given CU or CTB. This is a recursive
+ * structure and can hence start at CTB level, recursing for every
+ * level of split till we hit leaf CUs in the CTB. At leaf node
+ * it contains info for coded non split CU, with child nodes being
+ * set to NULL
+******************************************************************************
+ */
+typedef struct ctb_node_t
+{
+ /** x offset of this CU w.r.t. CTB start (0-63) */
+ U08 u1_x_off;
+ /** y offset of this C U w.r.t. CTB start (0-63) */
+ U08 u1_y_off;
+ /** Results of each partition in both directions L0,L1 */
+ search_node_t as_part_results[MAX_NUM_PARTS][2];
+ /**
+ * Pointers to pred buffers. Note that the buffer may be allocated
+ * at parent level or at this level
+ */
+ U08 *apu1_pred[2];
+ /** Prediction direction for each partition: 0-L0, 1-L1, 2-BI */
+ U08 u1_pred_dir[MAX_NUM_PARTS];
+ /**
+ * When pred direction is decided to be BI, we still store the best
+ * uni pred dir (L0/L1) in this array, for RD Opt purposes
+ */
+ U08 u1_best_uni_dir[MAX_NUM_PARTS];
+ /** Stride of pred buffer pointed to by apu1_pred member */
+ S32 i4_pred_stride;
+ /** Size of the CU that this node represents */
+ CU_SIZE_T e_cu_size;
+ /** For leaf CUs, this indicats type of partition (for e.g. PRT_2NxN) */
+ PART_TYPE_T e_part_type;
+ /** Below entries are for a CU level*/
+ S32 i4_sad;
+ S32 i4_satd;
+ S32 i4_mv_cost;
+ S32 i4_rate;
+ S32 i4_dist;
+ S32 i4_tot_cost;
+ /** Best costs of each partitions, if partition is BI, then best cost across uni/bi */
+ S32 ai4_part_costs[4];
+
+ /* TU split flag : tu_split_flag[0] represents the transform splits
+ * for CU size <= 32, for 64x64 each ai4_tu_split_flag corresponds
+ * to respective 32x32 */
+ /* For a 8x8 TU - 1 bit used to indicate split */
+ /* For a 16x16 TU - LSB used to indicate winner between 16 and 8 TU's. 4 other bits used to indicate split in each 8x8 quadrant */
+ /* For a 32x32 TU - See above */
+ S32 ai4_tu_split_flag[4];
+
+ /**
+ * pointers to child nodes. If this node is split, then the below point
+ * to children nodes (TL, TR, BL, BR) each of quarter size (w/2, h/2)
+ * If this node not split, then below point to null
+ */
+ struct ctb_node_t *ps_tl;
+ struct ctb_node_t *ps_tr;
+ struct ctb_node_t *ps_bl;
+ struct ctb_node_t *ps_br;
+} ctb_node_t;
+
+/**
+******************************************************************************
+ * @struct ctb_mem_mgr_t
+ * @brief Memory manager structure for CTB level memory allocations of CTB
+ * nodes
+******************************************************************************
+ */
+typedef struct
+{
+ /** Base memory ptr */
+ U08 *pu1_mem;
+ /** Amount used so far (running value) */
+ S32 i4_used;
+ /** Total memory available for this mem mgr */
+ S32 i4_tot;
+
+ /** Size of CTB node, and alignment requiremnts */
+ S32 i4_size;
+ S32 i4_align;
+} ctb_mem_mgr_t;
+
+/**
+******************************************************************************
+ * @struct buf_mgr_t
+ * @brief Memory manager structure for CTB level buffer allocations on the
+ * fly, esp useful for pred bufs and working memory
+******************************************************************************
+ */
+typedef struct
+{
+ /** base memory ptr */
+ U08 *pu1_wkg_mem;
+ /** total memory available */
+ S32 i4_total;
+ /** Memory used so far */
+ S32 i4_used;
+} buf_mgr_t;
+
+/**
+******************************************************************************
+ * @struct pred_candt_nodes_t
+ * @brief For a given partition and a given CU/blk, this has pointers to
+ * all the neighbouring and coloc pred candts. All the pred candts
+ * are stored as search_node_t structures itself.
+******************************************************************************
+ */
+typedef struct
+{
+ search_node_t *ps_tl;
+ search_node_t *ps_t;
+ search_node_t *ps_tr;
+ search_node_t *ps_bl;
+ search_node_t *ps_l;
+ search_node_t *ps_coloc;
+ search_node_t *ps_zeromv;
+ search_node_t **pps_proj_coloc;
+
+ search_node_t *ps_mvp_node;
+} pred_candt_nodes_t;
+
+/**
+******************************************************************************
+ * @struct pred_ctxt_t
+ * @brief For a given CU/blk, has complete prediction information for all
+ * types of partitions. Note that the pred candts are only pointed
+ * to, not actually stored here. This indirection is to avoid
+ * copies after each partition search, this way, the result of
+ * a partition is updated and the causally next partition
+ * automatically uses this result
+******************************************************************************
+ */
+typedef struct
+{
+ pred_candt_nodes_t as_pred_nodes[TOT_NUM_PARTS];
+
+ /**
+ * We use S + lambda * R to evaluate cost. Here S = SAD/SATD and lambda
+ * is the scaling of bits to S and R is bits of overhead (MV + mode).
+ * Choice of lambda depends on open loop / closed loop, Qp, temporal id
+ * and possibly CU depth. It is the caller's responsiblity to pass
+ * to this module the appropriate lambda.
+ */
+ S32 lambda;
+
+ /** lambda is in Q format, so this is the downshift reqd */
+ S32 lambda_q_shift;
+
+ /** Prediction direction : PRED_L0 or PRED_L1 */
+ S32 pred_lx;
+
+ /** MV resolution: FPEL, HPEL or QPEL */
+ S32 mv_pel;
+
+ /** Points to the ref bits lookup 1 ptr for each PRED_Lx */
+ U08 **ppu1_ref_bits_tlu;
+
+ /**
+ * Points to the ref scale factor, for a given ref id k,
+ * to scale as per ref id m, we use entry k+MAX_NUM_REF*m
+ */
+ S16 *pi2_ref_scf;
+
+ /**
+ * Flag that indicates whether T, TR and TL candidates used
+ * are causal or projected
+ */
+ U08 proj_used;
+
+} pred_ctxt_t;
+
+/**
+******************************************************************************
+ * @struct search_results_t
+ * @brief For a given CU/blk, Stores all the results of ME search. Results
+ * are stored per partition, also the best results for CU are stored
+ * across partitions.
+******************************************************************************
+ */
+typedef struct
+{
+ /** Size of CU for which this structure used */
+ CU_SIZE_T e_cu_size;
+
+ /**
+ * X and y offsets w.r.t. CTB start in encode layers. For non encode
+ * layers, these may typically be 0
+ */
+ U08 u1_x_off;
+ U08 u1_y_off;
+
+ /** Number of best results for this CU stored */
+ U08 u1_num_best_results;
+
+ /** Number of results stored per partition. */
+ U08 u1_num_results_per_part;
+
+ /**
+ * Number of result planes active. This may be different from total
+ * number of active references during search. For example, we may
+ * have 4 active ref, 2 ineach dirn, but active result planes may
+ * only be 2, one for L0 and 1 for L1
+ */
+ U08 u1_num_active_ref;
+ /**
+ * mask of active partitions, Totally 17 bits. For a given partition
+ * id, as per PART_ID_T enum the corresponding bit position is 1/0
+ * indicating that partition is active or inactive
+ */
+ S32 i4_part_mask;
+
+ /** Points to partial results for each partition id
+ * Temporary hack for the bug: If +1 is not kept,
+ * it doesn't bit match with older version
+ */
+ search_node_t *aps_part_results[MAX_NUM_REF][TOT_NUM_PARTS];
+
+ /**
+ * Ptr to best results for the current CU post bi pred evaluation and
+ * intra mode insertions
+ */
+ inter_cu_results_t *ps_cu_results;
+
+ /** 2 pred ctxts, one for L0 and one for L1 */
+ pred_ctxt_t as_pred_ctxt[2];
+
+ /**
+ * Pointer to a table that indicates whether the ref id
+ * corresponds to past or future dirn. Input is ref id Lc form
+ */
+
+ U08 *pu1_is_past;
+
+ /**
+ * Overall best CU cost, while other entries store CU costs
+ * in single direction, this is best CU cost, where each
+ * partition cost is evaluated as best of uni/bi
+ */
+ S32 best_cu_cost;
+
+ /**
+ * Split_flag which is used for deciding if 16x16 CU is split or not
+ */
+ U08 u1_split_flag;
+} search_results_t;
+
+/**
+******************************************************************************
+ * @struct ctb_list_t
+ * @brief Tree structure containing info for entire CTB. At top level
+ * it points to entire CTB results, with children nodes at each lvl
+ * being non null if split.
+******************************************************************************
+ */
+typedef struct ctb_list_t
+{
+ /** Indicates whether this level split further */
+ U08 u1_is_split;
+
+ /** Number of result candts present */
+ U08 u1_num_candts;
+
+ /**
+ * Whether this level valid. E.g. if we are at boundary, where only
+ * left 2 32x32 are within pic boundary, then the parent is force split
+ * at the children level, TR and BR are invalid.
+ */
+ U08 u1_is_valid;
+
+ /**
+ * IF this level is 16x16 then this mask indicates which 8x8 blks
+ * are valid
+ */
+ U08 u1_8x8_mask;
+
+ /** Search results of this CU */
+ search_results_t *ps_search_results;
+
+ /** Search results of this CU */
+ inter_cu_results_t *ps_cu_results;
+
+ /** Pointers to leaf nodes, if CU is split further, else null */
+ struct ctb_list_t *ps_tl;
+ struct ctb_list_t *ps_tr;
+ struct ctb_list_t *ps_bl;
+ struct ctb_list_t *ps_br;
+} ctb_list_t;
+
+/**
+******************************************************************************
+ * @struct layer_mv_t
+ * @brief mv bank structure for a particular layer
+******************************************************************************
+ */
+typedef struct
+{
+ /** Number of mvs for a given ref/pred dirn */
+ S32 i4_num_mvs_per_ref;
+ /** Number of reference for which results stored */
+ S32 i4_num_ref;
+ /** Number of mvs stored per blk. Product of above two */
+ S32 i4_num_mvs_per_blk;
+ /** Block size of the unit for which MVs stored */
+ BLK_SIZE_T e_blk_size;
+ /** Number of blocks present per row */
+ S32 i4_num_blks_per_row;
+
+ /** Number of mvs stored every row */
+ S32 i4_num_mvs_per_row;
+
+ /**
+ * Max number of mvs allowed per row. The main purpose of this variable
+ * is to resolve or detect discrepanceis between allocation time mem
+ * and run time mem, when alloc time resolution and run time resolution
+ * may be different
+ */
+ S32 max_num_mvs_per_row;
+
+ /**
+ * Pointer to mvs of 0, 0 blk, This is different from base since the
+ * mv bank is padded all sides
+ */
+ hme_mv_t *ps_mv;
+
+ /** Pointer to base of mv bank mvs */
+ hme_mv_t *ps_mv_base;
+
+ /** Pointers to ref idx.One to one correspondence between this and ps_mv*/
+ S08 *pi1_ref_idx;
+ /** Base of ref ids just like in case of ps_mv */
+ S08 *pi1_ref_idx_base;
+
+ /** Part mask for every blk, if stored, 1 per blk */
+ U08 *pu1_part_mask;
+} layer_mv_t;
+
+/**
+******************************************************************************
+ * @struct mv_hist_t
+ * @brief Histogram structure to calculate global mvs
+******************************************************************************
+ */
+typedef struct
+{
+ S32 i4_num_rows;
+ S32 i4_num_cols;
+ S32 i4_shift_x;
+ S32 i4_shift_y;
+ S32 i4_lobe1_size;
+ S32 i4_lobe2_size;
+ S32 i4_min_x;
+ S32 i4_min_y;
+ S32 i4_num_bins;
+ S32 ai4_bin_count[MAX_NUM_BINS];
+} mv_hist_t;
+
+typedef struct
+{
+ U08 u1_is_past;
+} ref_attr_t;
+
+/**
+******************************************************************************
+ * @struct layer_ctxt_t
+ * @brief Complete information for the layer
+******************************************************************************
+ */
+typedef struct
+{
+ /** Display Width of this layer */
+ S32 i4_disp_wd;
+ /** Display height of this layer */
+ S32 i4_disp_ht;
+ /** Width of this layer */
+ S32 i4_wd;
+ /** height of this layer */
+ S32 i4_ht;
+ /** Amount of padding of input in x dirn */
+ S32 i4_pad_x_inp;
+ /** Amount of padding of input in y dirn */
+ S32 i4_pad_y_inp;
+ /** Padding amount of recon in x dirn */
+ S32 i4_pad_x_rec;
+ /** padding amt of recon in y dirn */
+ S32 i4_pad_y_rec;
+
+ /**
+ * Offset for recon. Since recon has padding, the 0, 0 start differs
+ * from base of buffer
+ */
+ S32 i4_rec_offset;
+ /** Offset for input, same explanation as recon */
+ S32 i4_inp_offset;
+ /** stride of input buffer */
+ S32 i4_inp_stride;
+ /** stride of recon buffer */
+ S32 i4_rec_stride;
+ /** Pic order count */
+ S32 i4_poc;
+ /** input pointer. */
+ U08 *pu1_inp;
+ /** Base of input. Add inp_offset to go to 0, 0 locn */
+ U08 *pu1_inp_base;
+
+ /** Pointer to 4 hpel recon planes */
+ U08 *pu1_rec_fxfy;
+ U08 *pu1_rec_hxfy;
+ U08 *pu1_rec_fxhy;
+ U08 *pu1_rec_hxhy;
+
+ /** Global mv, one set per reference searched */
+ hme_mv_t s_global_mv[MAX_NUM_REF][NUM_GMV_LOBES];
+
+ /** Layer MV bank */
+ layer_mv_t *ps_layer_mvbank;
+
+ /** Pointer to list of recon buffers for each ref id, one ptr per plane */
+ U08 **ppu1_list_rec_fxfy;
+ U08 **ppu1_list_rec_hxfy;
+ U08 **ppu1_list_rec_fxhy;
+ U08 **ppu1_list_rec_hxhy;
+
+ void **ppv_dep_mngr_recon;
+
+ /** Pointer to list of input buffers for each ref id, one ptr per plane */
+ U08 **ppu1_list_inp;
+
+ /** Max MV in x and y direction supported at this layer resolution */
+ S16 i2_max_mv_x;
+ S16 i2_max_mv_y;
+
+ /** Converts ref id (as per Lc list) to POC */
+ S32 ai4_ref_id_to_poc_lc[MAX_NUM_REF];
+
+ S32 ai4_ref_id_to_disp_num[MAX_NUM_REF];
+
+ /** status of the buffer */
+ S32 i4_is_free;
+
+ /** idr gop number */
+ S32 i4_idr_gop_num;
+
+ /** is reference picture */
+ S32 i4_is_reference;
+
+ /** is non reference picture processed by me*/
+ S32 i4_non_ref_free;
+
+} layer_ctxt_t;
+
+typedef S32 (*PF_MV_COST_FXN)(search_node_t *, pred_ctxt_t *, PART_ID_T, S32);
+
+/**
+ ******************************************************************************
+ * @struct refine_prms_t
+ * @brief All the configurable input parameters for the refinement layer
+ *
+ * @param encode: Whether this layer is encoded or not
+ * @param explicit_ref: If enabled, then the number of reference frames to
+ * be searched is a function of coarsest layer num ref
+ frames. Else, number of references collapsed to 1/2
+ * @param i4_num_fpel_results : Number of full pel results to be allowed
+ * @param i4_num_results_per_part: Number of results stored per partition
+ * @param e_search_complexity: Decides the number of initial candts, refer
+ * to SEARCH_COMPLEXITY_T
+ * @param i4_use_rec_in_fpel: Whether to use input buf or recon buf in fpel
+ * @param i4_enable_4x4_part : if encode is 0, we use 8x8 blks, if this param
+ enabled, then we do 4x4 partial sad update
+ * @param i4_layer_id : id of this layer (0 = finest)
+ * @param i4_num_32x32_merge_results: number of 32x32 merged results stored
+ * @param i4_num_64x64_merge_results: number of 64x64 merged results stored
+ * @param i4_use_satd_cu_merge: Use SATD during CU merge
+ * @param i4_num_steps_hpel_refine : Number of steps during hpel refinement
+ * @param i4_num_steps_qpel_refine : Same as above but for qpel
+ * @param i4_use_satd_subpel : Use of SATD or SAD for subpel
+ ******************************************************************************
+*/
+typedef struct
+{
+ /* This array is used to place upper bounds on the number of search candidates */
+ /* that can be used per 'search cand location' */
+ U08 au1_num_fpel_search_cands[NUM_SEARCH_CAND_LOCATIONS];
+
+ U08 u1_max_2nx2n_tu_recur_cands;
+
+ U08 u1_max_num_fpel_refine_centers;
+
+ U08 u1_max_num_subpel_refine_centers;
+
+ S32 i4_encode;
+ S32 explicit_ref;
+ S32 i4_num_ref_fpel;
+ S32 i4_num_fpel_results;
+
+ S32 i4_num_results_per_part;
+
+ S32 i4_num_mvbank_results;
+ SEARCH_COMPLEXITY_T e_search_complexity;
+ S32 i4_use_rec_in_fpel;
+
+ S32 i4_enable_4x4_part;
+ S32 i4_layer_id;
+
+ S32 i4_num_32x32_merge_results;
+ S32 i4_num_64x64_merge_results;
+
+ S32 i4_use_satd_cu_merge;
+
+ S32 i4_num_steps_post_refine_fpel;
+ S32 i4_num_steps_fpel_refine;
+ S32 i4_num_steps_hpel_refine;
+ S32 i4_num_steps_qpel_refine;
+ S32 i4_use_satd_subpel;
+
+ double *pd_intra_costs;
+ S32 bidir_enabled;
+ S32 lambda_inp;
+ S32 lambda_recon;
+ S32 lambda_q_shift;
+
+ S32 limit_active_partitions;
+
+ S32 sdi_threshold;
+
+ U08 u1_use_lambda_derived_from_min_8x8_act_in_ctb;
+
+ U08 u1_max_subpel_candts;
+
+ U08 u1_max_subpel_candts_2Nx2N;
+ U08 u1_max_subpel_candts_NxN;
+
+ U08 u1_subpel_candt_threshold;
+
+ /* Pointer to the array which has num best results for
+ fpel refinement */
+ U08 *pu1_num_best_results;
+
+} refine_prms_t;
+
+/**
+******************************************************************************
+ * @struct coarse_prms_t
+ * @brief All the parameters passed to coarse layer search
+******************************************************************************
+ */
+typedef struct
+{
+ /** ID of this layer, typically N-1 where N is tot layers */
+ S32 i4_layer_id;
+
+ /** Initial step size, valid if full search disabled */
+ S32 i4_start_step;
+
+ /** Maximum number of iterations to consider if full search disabled */
+ S32 i4_max_iters;
+
+ /** Number of reference frames to search */
+ S32 i4_num_ref;
+
+ /** Number of best results to maintain at this layer for projection */
+ S32 num_results;
+
+ /**
+ * Enable or disable full search, if disabled then, we search around initial
+ * candidates with early exit
+ */
+ S32 do_full_search;
+
+ /** Values of lambda and the Q format */
+ S32 lambda;
+ S32 lambda_q_shift;
+
+ /** Step size for full search 2/4 */
+ S32 full_search_step;
+
+} coarse_prms_t;
+
+typedef struct
+{
+ /**
+ * These pointers point to modified input, one each for one ref idx.
+ * Instead of weighting the reference, we weight the input with inverse
+ * wt and offset.
+ * +1 for storing non weighted input
+ */
+ U08 *apu1_wt_inp[MAX_NUM_REF + 1];
+
+ /* These are allocated once at the start of encoding */
+ /* These are necessary only if wt_pred is switched on */
+ /* Else, only a single buffer is used to store the */
+ /* unweighed input */
+ U08 *apu1_wt_inp_buf_array[MAX_NUM_REF + 1];
+
+ /** Stores the weights and offsets for each ref */
+ S32 a_wpred_wt[MAX_NUM_REF];
+ S32 a_inv_wpred_wt[MAX_NUM_REF];
+ S32 a_wpred_off[MAX_NUM_REF];
+ S32 wpred_log_wdc;
+
+ S32 ai4_shift_val[MAX_NUM_REF];
+} wgt_pred_ctxt_t;
+
+/**
+******************************************************************************
+ * @struct mv_refine_ctxt_t
+ * @brief This structure contains important parameters used motion vector
+ refinement
+******************************************************************************
+ */
+typedef struct
+{
+ /* Added +7 in the array sizes below to make every array dimension
+ 16-byte aligned */
+ /** Cost of best candidate for each partition*/
+ MEM_ALIGN16 WORD16 i2_tot_cost[2][TOT_NUM_PARTS + 7];
+
+ MEM_ALIGN16 WORD16 i2_stim_injected_cost[2][TOT_NUM_PARTS + 7];
+
+ /** Motion vector cost for the best candidate of each partition*/
+ MEM_ALIGN16 WORD16 i2_mv_cost[2][TOT_NUM_PARTS + 7];
+ /** X component of the motion vector of the best candidate of each partition*/
+ MEM_ALIGN16 WORD16 i2_mv_x[2][TOT_NUM_PARTS + 7];
+ /** Y component of the motion vector of the best candidate of each partition*/
+ MEM_ALIGN16 WORD16 i2_mv_y[2][TOT_NUM_PARTS + 7];
+ /** Reference index of the best candidate of each partition*/
+ MEM_ALIGN16 WORD16 i2_ref_idx[2][TOT_NUM_PARTS + 7];
+
+ /** Partition id for the various partitions*/
+ WORD32 ai4_part_id[TOT_NUM_PARTS + 1];
+ /** Indicates the total number of valid partitions*/
+ WORD32 i4_num_valid_parts;
+
+ /** Number of candidates to refine through*/
+ WORD32 i4_num_search_nodes;
+
+ /** Stores the satd at the end of fullpel refinement*/
+ WORD16 ai2_fullpel_satd[2][TOT_NUM_PARTS];
+} mv_refine_ctxt_t;
+
+typedef mv_refine_ctxt_t fullpel_refine_ctxt_t;
+typedef mv_refine_ctxt_t subpel_refine_ctxt_t;
+/**
+******************************************************************************
+ * @struct hme_search_prms_t
+ * @brief All prms going to any fpel search
+******************************************************************************
+ */
+typedef struct
+{
+ /** for explicit search, indicates which ref frm to search */
+ /** for implicit search, indicates the prediction direction for search */
+ S08 i1_ref_idx;
+
+ /** Blk size used for search, and for which the search is done */
+ BLK_SIZE_T e_blk_size;
+
+ /** Number of init candts being searched */
+ S32 i4_num_init_candts;
+
+ S32 i4_num_steps_post_refine;
+
+ /**
+ * For coarser searches, bigger refinement is done around each candt
+ * in these cases, this prm has start step
+ */
+ S32 i4_start_step;
+
+ /** whether SATD to be used for srch */
+ S32 i4_use_satd;
+
+ /** if 1, we use recon frm for search (closed loop ) */
+ S32 i4_use_rec;
+
+ /** bitmask of active partitions */
+ S32 i4_part_mask;
+
+ /** x and y offset of blk w.r.t. pic start */
+ S32 i4_x_off;
+ S32 i4_y_off;
+
+ /**
+ * max number of iterations to search if early exit not hit
+ * relevant only for coarser searches
+ */
+ S32 i4_max_iters;
+
+ /** pointer to str holding all results for this blk */
+ search_results_t *ps_search_results;
+
+ /** pts to str having all search candt with refinement info */
+ search_candt_t *ps_search_candts;
+ /** pts to str having valid mv range info for this blk */
+ range_prms_t *aps_mv_range[MAX_NUM_REF];
+ /** cost compute fxnptr */
+ PF_MV_COST_FXN pf_mv_cost_compute;
+
+ /** when this str is set up for full search, indicates step size for same */
+ S32 full_search_step;
+
+ /** stride ofinp buffer */
+ S32 i4_inp_stride;
+
+ /** x and y offset of cu w.r.t. ctb start, set to 0 for non enc layer */
+ S32 i4_cu_x_off;
+ S32 i4_cu_y_off;
+
+ /** base pointer to the de-duplicated search nodes */
+ search_node_t *ps_search_nodes;
+
+ /** number of de-duplicated nodes to be searched */
+ S32 i4_num_search_nodes;
+
+ fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt;
+
+ U32 au4_src_variance[TOT_NUM_PARTS];
+
+ S32 i4_alpha_stim_multiplier;
+
+ U08 u1_is_cu_noisy;
+
+ ULWORD64 *pu8_part_src_sigmaX;
+ ULWORD64 *pu8_part_src_sigmaXSquared;
+
+} hme_search_prms_t;
+
+/**
+******************************************************************************
+ * @struct hme_err_prms_t
+ * @brief This is input prms struct for SAD/SATD computation
+******************************************************************************
+ */
+typedef struct
+{
+ /** Ptr to input blk for which err computed */
+ U08 *pu1_inp;
+
+ U16 *pu2_inp;
+
+ /** Ptr to ref blk after adjusting for mv and coordinates in pic */
+ U08 *pu1_ref;
+
+ U16 *pu2_ref;
+
+ /** Stride of input buffer */
+ S32 i4_inp_stride;
+ /** Stride of ref buffer */
+ S32 i4_ref_stride;
+ /** Mask of active partitions. */
+ S32 i4_part_mask;
+ /** Mask of active grid pts. Refer to GRID_PT_T enum for bit posns */
+ S32 i4_grid_mask;
+ /**
+ * Pointer to SAD Grid where SADs for each partition are stored.
+ * The layout is as follows: If there are M total partitions
+ * and N active pts in the grid, then the first N results contain
+ * first partition, e.g. 2Nx2N. Next N results contain 2nd partitino
+ * sad, e.g. 2NxN_T. Totally we have MxN results.
+ * Note: The active partition count may be lesser than M, still we
+ * have results for M partitions
+ */
+ S32 *pi4_sad_grid;
+
+ /** Pointer to TU_SPLIT grid flags */
+ S32 *pi4_tu_split_flags;
+
+ /** Pointer to the Child's satd cost */
+ S32 *pi4_child_cost;
+
+ /** pointer to the child'd TU_split flags */
+ S32 *pi4_child_tu_split_flags;
+
+ /** pointer to the child'd TU_early_cbf flags */
+ S32 *pi4_child_tu_early_cbf;
+
+ /** Pointer to TU early CBF flags */
+ S32 *pi4_tu_early_cbf;
+
+ /** pointer to the early cbf thresholds */
+ S32 *pi4_tu_early_cbf_threshold;
+
+ /** store the DC value */
+ S32 i4_dc_val;
+
+ /** Block width and ht of the block being evaluated for SAD */
+ S32 i4_blk_wd;
+ S32 i4_blk_ht;
+
+ /**
+ * Array of valid partition ids. E.g. if 2 partitions active,
+ * then there will be 3 entries, 3rd entry being -1
+ */
+ S32 *pi4_valid_part_ids;
+ /** Step size of the grid */
+ S32 i4_step;
+
+ /* Number of partitions */
+ S32 i4_num_partitions;
+
+ /** Store the tu_spli_flag cost */
+ S32 i4_tu_split_cost;
+
+ /** The max_depth for inter tu_tree */
+ U08 u1_max_tr_depth;
+
+ U08 u1_max_tr_size;
+
+ /** Scratch memory for Doing hadamard */
+ U08 *pu1_wkg_mem;
+
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list;
+
+} err_prms_t;
+
+typedef struct grid
+{
+ WORD32 num_grids; /* Number of grid to work with */
+ WORD32 ref_buf_stride; /* Buffer stride of reference buffer */
+ WORD32
+ grd_sz_y_x; /* Packed 16 bits indicating grid spacing in y & x direction <--grid-size-y--><--grid-size-x--> */
+ UWORD8 **ppu1_ref_ptr; /* Center point for the grid search */
+ WORD32 *pi4_grd_mask; /* Mask indicating which grid points need to be evaluated */
+ hme_mv_t *p_mv; /* <--MVy--><--MVx--> */
+ WORD32 *p_ref_idx; /* Ref idx to which the grid is pointing */
+} grid_ctxt_t;
+
+typedef struct cand
+{
+ hme_mv_t mv; /* MV corresponding to the candidate <--MVy--><--MVx--> */
+ WORD32 ref_idx; /* Ref idx corresponding to the candidate */
+ WORD32 grid_ix; /* Grid to which this candidate belongs */
+ UWORD8 *pu1_ref_ptr; /* Pointer to the candidate */
+} cand_t;
+
+/**
+******************************************************************************
+ * @struct hme_ctb_prms_t
+ * @brief Parameters to create the CTB list, which is a tree structure
+******************************************************************************
+ */
+typedef struct
+{
+ /**
+ * These parameters cover number of input 16x16, 32x32 and 64x64 results
+ * and the number of output results that are mix of all above CU sizes.
+ * i4_num_kxk_unified_out is relevant only if we are sending multiple CU
+ * sizes for same region for RD Opt.
+ */
+ S32 i4_num_16x16_in;
+ S32 i4_num_32x32_in;
+ S32 i4_num_32x32_unified_out;
+ S32 i4_num_64x64_in;
+ S32 i4_num_64x64_unified_out;
+
+ /** Pointers to results at differen CU sizes */
+ search_results_t *ps_search_results_16x16;
+ search_results_t *ps_search_results_32x32;
+ search_results_t *ps_search_results_64x64;
+
+ S32 i4_num_part_type;
+
+ /** Indicates whether we have split at 64x64 level */
+ S32 i4_cu_64x64_split;
+ /** Indicates whether each of the 32x32 CU is split */
+ S32 ai4_cu_32x32_split[4];
+
+ /** X and y offset of the CTB */
+ S32 i4_ctb_x;
+ S32 i4_ctb_y;
+
+ /**
+ * Memory manager for the CTB that is responsible for node allocation
+ * at a CU level
+ */
+ ctb_mem_mgr_t *ps_ctb_mem_mgr;
+
+ /** Buffer manager that is responsible for memory allocation (pred bufs) */
+ buf_mgr_t *ps_buf_mgr;
+} hme_ctb_prms_t;
+
+/**
+******************************************************************************
+ * @struct result_upd_prms_t
+ * @brief Updation of results
+******************************************************************************
+ */
+typedef struct
+{
+ /** Cost compuatation function ponter */
+ PF_MV_COST_FXN pf_mv_cost_compute;
+
+ /** Points to the SAD grid updated during SAD compute fxn */
+ S32 *pi4_sad_grid;
+
+ /** Points to the TU_SPLIT grid updates duting the SATD TU REC fxn */
+ S32 *pi4_tu_split_flags;
+
+ /**
+ * This is the central mv of the grid. For e.g. if we have a 3x3 grid,
+ * this covers the central pt's mv in the grid.
+ */
+ const search_node_t *ps_search_node_base;
+
+ /** Search results structure updated by the result update fxn */
+ search_results_t *ps_search_results;
+
+ /** List of active partitions, only these are processed and updated */
+ S32 *pi4_valid_part_ids;
+
+ /** Reference id for this candt and grid */
+ S08 i1_ref_idx;
+
+ /** Mask of active pts in the grid */
+ S32 i4_grid_mask;
+
+ /**
+ * For early exit reasons we may want to know the id of the least candt
+ * This will correspond to id of candt with least cost for 2Nx2N part,
+ * if multiple partitions enabled, or if 1 part enabled, it will be for
+ * id of candt of that partition
+ */
+ S32 i4_min_id;
+
+ /** Step size of the grid */
+ S32 i4_step;
+
+ /** Mask of active partitions */
+ S32 i4_part_mask;
+
+ /** Min cost corresponding to min id */
+ S32 i4_min_cost;
+
+ /** Store the motion vectors in qpel unit*/
+ S16 i2_mv_x;
+
+ S16 i2_mv_y;
+
+ U08 u1_pred_lx;
+
+ subpel_refine_ctxt_t *ps_subpel_refine_ctxt;
+
+ /** Current candidate in the subpel refinement process*/
+ search_node_t *ps_search_node;
+
+} result_upd_prms_t;
+
+/**
+******************************************************************************
+ * @struct mv_grid_t
+ * @brief Grid of MVs storing results for a CTB and neighbours. For a CTB
+ * of size 64x64, we may store upto 16x16 mvs (one for each 4x4)
+ * along with 1 neighbour on each side. Valid only for encode layer
+******************************************************************************
+ */
+typedef struct
+{
+ /** All the mvs in the grid */
+ search_node_t as_node[NUM_MVS_IN_CTB_GRID];
+
+ /** Stride of the grid */
+ S32 i4_stride;
+
+ /** Start offset of the 0,0 locn in CTB. */
+ S32 i4_start_offset;
+} mv_grid_t;
+
+typedef struct
+{
+ /* centroid's (x, y) co-ordinates in Q8 format */
+ WORD32 i4_pos_x_q8;
+
+ WORD32 i4_pos_y_q8;
+} centroid_t;
+
+typedef struct
+{
+ S16 min_x;
+
+ S16 min_y;
+
+ S16 max_x;
+
+ S16 max_y;
+
+ /* The cumulative sum of partition sizes of the mvs */
+ /* in this cluster */
+ S16 area_in_pixels;
+
+ S16 uni_mv_pixel_area;
+
+ S16 bi_mv_pixel_area;
+
+ mv_data_t as_mv[128];
+
+ U08 num_mvs;
+
+ /* Weighted average of all mvs in the cluster */
+ centroid_t s_centroid;
+
+ S08 ref_id;
+
+ S32 max_dist_from_centroid;
+
+ U08 is_valid_cluster;
+
+} cluster_data_t;
+
+typedef struct
+{
+ cluster_data_t as_cluster_data[MAX_NUM_CLUSTERS_16x16];
+
+ U08 num_clusters;
+
+ U08 au1_num_clusters[MAX_NUM_REF];
+
+ S16 intra_mv_area;
+
+ S32 best_inter_cost;
+
+} cluster_16x16_blk_t;
+
+typedef struct
+{
+ cluster_data_t as_cluster_data[MAX_NUM_CLUSTERS_32x32];
+
+ U08 num_clusters;
+
+ U08 au1_num_clusters[MAX_NUM_REF];
+
+ S16 intra_mv_area;
+
+ S08 best_uni_ref;
+
+ S08 best_alt_ref;
+
+ S32 best_inter_cost;
+
+ U08 num_refs;
+
+ U08 num_clusters_with_weak_sdi_density;
+
+} cluster_32x32_blk_t;
+
+typedef struct
+{
+ cluster_data_t as_cluster_data[MAX_NUM_CLUSTERS_64x64];
+
+ U08 num_clusters;
+
+ U08 au1_num_clusters[MAX_NUM_REF];
+
+ S16 intra_mv_area;
+
+ S08 best_uni_ref;
+
+ S08 best_alt_ref;
+
+ S32 best_inter_cost;
+
+ U08 num_refs;
+
+} cluster_64x64_blk_t;
+
+typedef struct
+{
+ cluster_16x16_blk_t *ps_16x16_blk;
+
+ cluster_32x32_blk_t *ps_32x32_blk;
+
+ cluster_64x64_blk_t *ps_64x64_blk;
+
+ cur_ctb_cu_tree_t *ps_cu_tree_root;
+ ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
+ S32 nodes_created_in_cu_tree;
+
+ S32 *pi4_blk_8x8_mask;
+
+ S32 blk_32x32_mask;
+
+ S32 sdi_threshold;
+
+ S32 i4_frame_qstep;
+
+ S32 i4_frame_qstep_multiplier;
+
+ U08 au1_is_16x16_blk_split[16];
+
+ S32 ai4_part_mask[16];
+
+} ctb_cluster_info_t;
+
+/**
+******************************************************************************
+ * @struct hme_merge_prms_t
+ * @brief All parameters related to the merge process
+******************************************************************************
+ */
+typedef struct
+{
+ /**
+ * MV Range prms for the merged CU, this may have to be conservative
+ * in comparison to individual CUs
+ */
+ range_prms_t *aps_mv_range[MAX_NUM_REF];
+
+ /** Pointers to search results of 4 children CUs to be merged */
+ search_results_t *ps_results_tl;
+ search_results_t *ps_results_tr;
+ search_results_t *ps_results_bl;
+ search_results_t *ps_results_br;
+
+ search_results_t *ps_results_grandchild;
+
+ /** Pointer to search results of the parent CU updated during merge */
+ search_results_t *ps_results_merge;
+
+ inter_cu_results_t *ps_8x8_cu_results;
+
+ /** Layer related context */
+ layer_ctxt_t *ps_layer_ctxt;
+
+ inter_ctb_prms_t *ps_inter_ctb_prms;
+
+ /**
+ * Points to an array of pointers. This array in turn points to
+ * the active mv grid in each direction (L0/L1)
+ */
+ mv_grid_t **pps_mv_grid;
+
+ ctb_cluster_info_t *ps_cluster_info;
+
+ S08 *pi1_past_list;
+
+ S08 *pi1_future_list;
+
+ /** MV cost compute function */
+ PF_MV_COST_FXN pf_mv_cost_compute;
+
+ /** If segmentation info available for the parent block */
+ S32 i4_seg_info_avail;
+
+ /** Partition mask (if segmentation info available) */
+ S32 i4_part_mask;
+
+ /** Number of input results available for the merge proc from children*/
+ S32 i4_num_inp_results;
+
+ /** Whether SATD to be used for fpel searches */
+ S32 i4_use_satd;
+
+ /**
+ * Number of result planes valid for this merge process. For example,
+ * for fpel search in encode layer, we may have only L0 and L1
+ */
+ S32 i4_num_ref;
+
+ /** Whether to use input or recon frm for search */
+ S32 i4_use_rec;
+
+ /** optimized mv grid flag : indicates if same mvgrid is used for both fpel and qpel
+ * This helps in copying fpel and qpel mv grid in pred context mv grid
+ */
+ S32 i4_mv_grid_opt;
+
+ /** ctb size, typically 32 or 64 */
+ S32 log_ctb_size;
+
+ S32 i4_ctb_x_off;
+
+ S32 i4_ctb_y_off;
+
+ ME_QUALITY_PRESETS_T e_quality_preset;
+
+ S32 i4_num_pred_dir_actual;
+
+ U08 au1_pred_dir_searched[2];
+
+ S32 i4_alpha_stim_multiplier;
+
+ U08 u1_is_cu_noisy;
+
+} hme_merge_prms_t;
+
+/**
+******************************************************************************
+ * @struct mvbank_update_prms_t
+ * @brief Useful prms for updating the mv bank
+******************************************************************************
+ */
+typedef struct
+{
+ /** Number of references for which update to be done */
+ S32 i4_num_ref;
+
+ /**
+ * Search blk size that was used, if this is different from the blk
+ * size used in mv bank, then some replications or reductions may
+ * have to be done. E.g. if search blk size is 8x8 and result blk
+ * size is 4x4, then we have to update part NxN results to be
+ * used for update along with replication of 2Nx2N result in each
+ * of the 4 4x4 blk.
+ */
+ BLK_SIZE_T e_search_blk_size;
+
+ /**
+ * Redundant prm as it reflects differences between search blk size
+ * and mv blk size if any
+ */
+ S32 i4_shift;
+
+ S32 i4_num_active_ref_l0;
+
+ S32 i4_num_active_ref_l1;
+
+ S32 i4_num_results_to_store;
+} mvbank_update_prms_t;
+
+/**
+******************************************************************************
+ * @struct hme_subpel_prms_t
+ * @brief input and control prms for subpel refinement
+******************************************************************************
+ */
+typedef struct
+{
+ /** Relevant only for the case where we mix up results of diff cu sizes */
+ S32 i4_num_16x16_candts;
+ S32 i4_num_32x32_candts;
+ S32 i4_num_64x64_candts;
+
+ /** X and y offset of ctb w.r.t. start of pic */
+ S32 i4_ctb_x_off;
+ S32 i4_ctb_y_off;
+
+ /** Max Number of diamond steps for hpel and qpel refinement */
+ S32 i4_num_steps_hpel_refine;
+ S32 i4_num_steps_qpel_refine;
+
+ /** Whether SATD to be used or SAD to be used */
+ S32 i4_use_satd;
+
+ /**
+ * Input ptr. This is updated inside the subpel refinement by picking
+ * up correct adress
+ */
+ void *pv_inp;
+
+ /**
+ * Pred buffer ptr, updated inside subpel refinement process. This
+ * location passed to the leaf fxn for copying the winner pred buf
+ */
+ U08 *pu1_pred;
+
+ /** Interpolation fxn sent by top layer, should exact qpel be desired */
+ PF_INTERP_FXN_T pf_qpel_interp;
+
+ /** Working mem passed to leaf fxns */
+ U08 *pu1_wkg_mem;
+
+ /** prediction buffer stride fo rleaf fxns to copy the pred winner buf */
+ S32 i4_pred_stride;
+
+ /** Type of input ; sizeof(UWORD8) => unidir refinement, else BIDIR */
+ S32 i4_inp_type;
+
+ /** Stride of input buf, updated inside subpel fxn */
+ S32 i4_inp_stride;
+
+ /**
+ * Pointer to the backward input ptr. This is also updated inside
+ * the subpel fxn. Needed for BIDIR refinement where modified inpu
+ * is 2I - P0
+ */
+ S16 *pi2_inp_bck;
+
+ /** Indicates if CU merge uses SATD / SAD */
+ S32 i4_use_satd_cu_merge;
+
+ /** valid MV range in hpel and qpel units */
+ range_prms_t *aps_mv_range_hpel[MAX_NUM_REF];
+ range_prms_t *aps_mv_range_qpel[MAX_NUM_REF];
+ /** Relevant only for mixed CU cases */
+ search_results_t *ps_search_results_16x16;
+ search_results_t *ps_search_results_32x32;
+ search_results_t *ps_search_results_64x64;
+
+ /** Cost computatino fxn ptr */
+ PF_MV_COST_FXN pf_mv_cost_compute;
+
+ /** Whether BI mode is allowed for this pic (not allowed in P) */
+ S32 bidir_enabled;
+
+ /**
+ * Total number of references of current picture which is enocded
+ */
+ U08 u1_num_ref;
+
+ /**
+ * Number of candidates used for refinement
+ * If given 1 candidate, then 2Nx2N is chosen as the best candidate
+ */
+ U08 u1_max_subpel_candts;
+
+ U08 u1_subpel_candt_threshold;
+
+ ME_QUALITY_PRESETS_T e_me_quality_presets;
+
+ U08 u1_max_subpel_candts_2Nx2N;
+ U08 u1_max_subpel_candts_NxN;
+
+ U08 u1_max_num_subpel_refine_centers;
+
+ subpel_refine_ctxt_t *ps_subpel_refine_ctxt;
+
+ S32 i4_num_act_ref_l0;
+
+ S32 i4_num_act_ref_l1;
+
+ U08 u1_is_cu_noisy;
+} hme_subpel_prms_t;
+
+/**
+******************************************************************************
+ * @struct layers_descr_t
+ * @brief One such str exists for each ref and curr input in the me ctxt
+ * Has ctxt handles for all layers of a given POC
+******************************************************************************
+ */
+typedef struct
+{
+ /** Handles for all layers. Entry 0 is finest layer */
+ layer_ctxt_t *aps_layers[MAX_NUM_LAYERS];
+} layers_descr_t;
+
+/**
+******************************************************************************
+ * @struct blk_ctb_attrs_t
+ * @brief The CTB is split into 16x16 blks. For each such blk, this str
+ * stores attributes of this blk w.r.t. ctb
+******************************************************************************
+ */
+typedef struct
+{
+ /**
+ * ID of the blk in the full ctb. Assuming the full ctb were coded,
+ * this indicates what is the blk num of this blk (in encode order)
+ * within the full ctb
+ */
+ U08 u1_blk_id_in_full_ctb;
+
+ /** x and y coordinates of this blk w.r.t. ctb base */
+ U08 u1_blk_x;
+ U08 u1_blk_y;
+ /**
+ * Mask of 8x8 blks that are active. Bits 0-3 for blks 0-3 in raster order
+ * within a 16x16 blk. This will be 0xf in interiors and < 0xf at rt/bot
+ * boundaries or at bot rt corners, where we may not have full 16x16 blk
+ */
+ U08 u1_blk_8x8_mask;
+} blk_ctb_attrs_t;
+
+/**
+******************************************************************************
+ * @struct ctb_boundary_attrs_t
+ * @brief Depending on the location of ctb (rt boundary, bot boundary,
+ * bot rt corner, elsewhere) this picks out the appropriate
+ * attributes of the ctb
+******************************************************************************
+ */
+typedef struct
+{
+ /**
+ * 4 bit variable, one for each of the 4 possible 32x32s in a full ctb
+ * If any 32x32 is partially present / not present at boundaries, that
+ * bit posn will be 0
+ */
+ U08 u1_merge_to_32x32_flag;
+
+ /**
+ * 1 bit flag indicating whether it is a complete ctb or not, and
+ * consequently whether it can be merged to a full 64x64
+ */
+ U08 u1_merge_to_64x64_flag;
+
+ /** Number of valid 16x16 blks (includes those partially/fully present*/
+ U08 u1_num_blks_in_ctb;
+
+ /** 16 bit variable indicating whether the corresponding 16x16 is valid */
+ S32 cu_16x16_valid_flag;
+
+ /**
+ * For possible 16 16x16 blks in a CTB, we have one attribute str for
+ * every valid blk. Tightly packed structure. For example,
+ * 0 1 4 5
+ * 2 3 6 7
+ * 8 9 12 13
+ * 10 11 14 15
+ * Assuming the ctb width is only 48, blks 5,7,13,15 are invalid
+ * Then We store attributes in the order: 0,1,2,3,4,6,8,9,10,11,12,14
+ */
+ blk_ctb_attrs_t as_blk_attrs[16];
+} ctb_boundary_attrs_t;
+
+typedef struct
+{
+ S32 sdi;
+
+ S32 ref_idx;
+
+ S32 cluster_id;
+} outlier_data_t;
+
+/**
+******************************************************************************
+ * @struct coarse_dyn_range_prms_t
+ * @brief The parameters for Dyn. Search Range in coarse ME
+******************************************************************************
+ */
+
+typedef struct
+{
+ /* TO DO : size can be reduced, as not getting used for L0 */
+
+ /** Dynamical Search Range parameters per layer & ref_pic */
+ dyn_range_prms_t as_dyn_range_prms[MAX_NUM_LAYERS][MAX_NUM_REF];
+
+ /** Min y value Normalized per POC distance */
+ WORD16 i2_dyn_min_y_per_poc[MAX_NUM_LAYERS];
+ /** Max y value Normalized per POC distance */
+ WORD16 i2_dyn_max_y_per_poc[MAX_NUM_LAYERS];
+
+} coarse_dyn_range_prms_t;
+
+/**
+******************************************************************************
+ * @struct coarse_me_ctxt_t
+ * @brief Handle for Coarse ME
+******************************************************************************
+ */
+typedef struct
+{
+ /** Init search candts, 2 sets, one for 4x8 and one for 8x4 */
+ search_node_t s_init_search_node[MAX_INIT_CANDTS * 2];
+
+ /** For non enc layer, we search 8x8 blks and store results here */
+ search_results_t s_search_results_8x8;
+ /**
+ * Below arays store input planes for each ref pic.
+ * These are duplications, and are present within layer ctxts, but
+ * kept here together for faster indexing during search
+ */
+ U08 *apu1_list_inp[MAX_NUM_LAYERS][MAX_NUM_REF];
+
+ /** Ptr to all layer context placeholder for curr pic encoded */
+ layers_descr_t *ps_curr_descr;
+
+ /** Ptr to all layer ctxt place holder for all pics */
+ layers_descr_t as_ref_descr[MAX_NUM_REF + 1 + NUM_BUFS_DECOMP_HME];
+
+ /**
+ * ME uses ref id lc to search multi ref. This TLU gets POC of
+ * the pic w.r.t. a given ref id
+ */
+ S32 ai4_ref_idx_to_poc_lc[MAX_NUM_REF];
+
+ /** use this array to get disp num from ref_idx. Used for L1 traqo **/
+ S32 ai4_ref_idx_to_disp_num[MAX_NUM_REF];
+
+ /** POC of pic encoded just before current */
+ S32 i4_prev_poc;
+
+ /** POC of curret pic being encoded */
+ S32 i4_curr_poc;
+
+ /** Number of HME layers encode + non encode */
+ S32 num_layers;
+
+ /** Alloc time parameter, max ref frms used for this session */
+ S32 max_num_ref;
+
+ /**
+ * Number of layers that use explicit search. Explicit search means
+ * that each ref id is searched separately
+ */
+ S32 num_layers_explicit_search;
+
+ /**
+ * Maximum number of results maintained at any refinement layer
+ * search. Important from mem alloc perspective
+ */
+ S32 max_num_results;
+
+ /** Same as above but for coarse layer */
+ S32 max_num_results_coarse;
+
+ /** Array of flags, one per layer indicating hwether layer is encoded */
+ U08 u1_encode[MAX_NUM_LAYERS];
+
+ /** Init prms send by encoder during create time */
+ hme_init_prms_t s_init_prms;
+
+ /**
+ * Array look up created each frm, maintaining the corresponding
+ * layer descr look up for each ref id
+ */
+ S32 a_ref_to_descr_id[MAX_NUM_REF];
+
+ /**
+ * Array lookup created each frame that maps a given ref id
+ * pertaining to unified list to a L0/L1 list. Encoder searches in terms
+ * of LC list or in other words does not differentiate between L0
+ * and L1 frames for most of search. Finally to report results to
+ * encoder, the ref id has to be remapped to suitable list
+ */
+ S32 a_ref_idx_lc_to_l0[MAX_NUM_REF];
+ S32 a_ref_idx_lc_to_l1[MAX_NUM_REF];
+
+ /** Width and ht of each layer */
+ S32 a_wd[MAX_NUM_LAYERS];
+ S32 a_ht[MAX_NUM_LAYERS];
+
+ /** Histogram, one for each ref, allocated during craete time */
+ mv_hist_t *aps_mv_hist[MAX_NUM_REF];
+
+ /** Whether a given ref id in Lc list is past frm or future frm */
+ U08 au1_is_past[MAX_NUM_REF];
+
+ /** These are L0 and L1 lists, storing ref id Lc in them */
+ S08 ai1_past_list[MAX_NUM_REF];
+ S08 ai1_future_list[MAX_NUM_REF];
+
+ /** Number of past and future ref pics sent this frm */
+ S32 num_ref_past;
+ S32 num_ref_future;
+
+ void *pv_ext_frm_prms;
+
+ hme_frm_prms_t *ps_hme_frm_prms;
+
+ hme_ref_map_t *ps_hme_ref_map;
+ /**
+ * Scale factor of any given ref lc to another ref in Q8
+ * First MAX_NUM_REF entries are to scale an mv of ref id k
+ * w.r.t. ref id 0 (approx 256 * POC delta(0) / POC delta(k))
+ * Next MAX_NUM_REF entreis are to scale mv of ref id 1 w.r.t. 0
+ * And so on
+ */
+ S16 ai2_ref_scf[MAX_NUM_REF * MAX_NUM_REF];
+
+ /** bits for a given ref id, in either list L0/L1 */
+ U08 au1_ref_bits_tlu_lc[2][MAX_NUM_REF];
+
+ /** Points to above: 1 ptr for each list */
+ U08 *apu1_ref_bits_tlu_lc[2];
+
+ /** number of b fraems between P, depends on number of hierarchy layers */
+ S32 num_b_frms;
+
+ /** Frame level qp passed every frame by ME's caller */
+ S32 frm_qstep;
+
+ /** Backup of frame parameters */
+ hme_frm_prms_t s_frm_prms;
+
+ /** Weighted prediction parameters for all references are stored
+ * Scratch buffers for populated widgted inputs are also stored in this
+ */
+ wgt_pred_ctxt_t s_wt_pred;
+
+ /** Weighted pred enable flag */
+ S32 i4_wt_pred_enable_flag;
+
+ /* Pointer to hold 5 rows of best search node information */
+ search_node_t *aps_best_search_nodes_4x8_n_rows[MAX_NUM_REF];
+
+ search_node_t *aps_best_search_nodes_8x4_n_rows[MAX_NUM_REF];
+
+ /* Pointer to hold 5 rows of best search node information */
+ S16 *api2_sads_4x4_n_rows[MAX_NUM_REF];
+
+ /* Number of row buffers to store SADs and best search nodes */
+ S32 i4_num_row_bufs;
+
+ /* (HEVCE_MAX_HEIGHT>>1) assuming layer 1 is coarse layer and >>2 assuming block size is 4x4*/
+ S32 ai4_row_index[(HEVCE_MAX_HEIGHT >> 1) >> 2];
+
+ /* store L1 cost required for rate control for enc decision*/
+ S32 i4_L1_hme_best_cost;
+
+ /* store L1 cost required for modulation index calc*/
+ //S32 i4_L1_hme_best_cost_for_ref;
+
+ /* store L1 satd */
+ S32 i4_L1_hme_sad;
+ /* EIID: layer1 buffer to store the early inter intra costs and decisions */
+ /* pic_level pointer stored here */
+ ihevce_ed_blk_t *ps_ed_blk;
+ /* EIID: layer1 buffer to store the sad/cost information for rate control
+ or cu level qp modulation*/
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1;
+ /** Dynamical Search Range parameters */
+ coarse_dyn_range_prms_t s_coarse_dyn_range_prms;
+
+ /** Dependency manager for Row level sync in HME pass */
+ void *apv_dep_mngr_hme_sync[MAX_NUM_HME_LAYERS - 1];
+
+ /* pointer buffers for memory mapping */
+ UWORD8 *pu1_me_reverse_map_info;
+
+ /*blk count which has higher SAD*/
+ S32 i4_num_blks_high_sad;
+
+ /*num of 8x8 blocks in nearest poc*/
+ S32 i4_num_blks;
+
+ /* thread id of the current context */
+ WORD32 thrd_id;
+
+ /* Should be typecast to a struct of type 'ihevce_me_optimised_function_list_t' */
+ void *pv_me_optimised_function_list;
+
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list;
+
+} coarse_me_ctxt_t;
+
+/**
+******************************************************************************
+ * @struct coarse_dyn_range_prms_t
+ * @brief The parameters for Dyn. Search Range in coarse ME
+******************************************************************************
+ */
+typedef struct
+{
+ /** Dynamical Search Range parameters per ref_pic */
+ dyn_range_prms_t as_dyn_range_prms[MAX_NUM_REF];
+
+ /** Min y value Normalized per POC distance */
+ WORD16 i2_dyn_min_y_per_poc;
+ /** Max y value Normalized per POC distance */
+ WORD16 i2_dyn_max_y_per_poc;
+
+ /* The number of ref. pic. actually used in L0. Used to communicate */
+ /* to ihevce_l0_me_frame_end and frame process */
+ WORD32 i4_num_act_ref_in_l0;
+
+ /*display number*/
+ WORD32 i4_display_num;
+
+} l0_dyn_range_prms_t;
+
+/**
+******************************************************************************
+ * @brief inter prediction (MC) context for me loop
+******************************************************************************
+ */
+/*IMPORTANT please keep inter_pred_ctxt_t and inter_pred_me_ctxt_t as identical*/
+typedef struct
+{
+ /** pointer to reference lists */
+ recon_pic_buf_t *(*ps_ref_list)[HEVCE_MAX_REF_PICS * 2];
+
+ /** scratch buffer for horizontal interpolation destination */
+ WORD16 MEM_ALIGN16 ai2_horz_scratch[MAX_CTB_SIZE * (MAX_CTB_SIZE + 8)];
+
+ /** scratch 16 bit buffer for interpolation in l0 direction */
+ WORD16 MEM_ALIGN16 ai2_scratch_buf_l0[MAX_CTB_SIZE * MAX_CTB_SIZE];
+
+ /** scratch 16 bit buffer for interpolation in l1 direction */
+ WORD16 MEM_ALIGN16 ai2_scratch_buf_l1[MAX_CTB_SIZE * MAX_CTB_SIZE];
+
+ /** Pointer to struct containing function pointers to
+ functions in the 'common' library' */
+ func_selector_t *ps_func_selector;
+
+ /** common denominator used for luma weights */
+ WORD32 i4_log2_luma_wght_denom;
+
+ /** common denominator used for chroma weights */
+ WORD32 i4_log2_chroma_wght_denom;
+
+ /** offset w.r.t frame start in horz direction (pels) */
+ WORD32 i4_ctb_frm_pos_x;
+
+ /** offset w.r.t frame start in vert direction (pels) */
+ WORD32 i4_ctb_frm_pos_y;
+
+ /* Bit Depth of Input */
+ WORD32 i4_bit_depth;
+
+ /* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
+ UWORD8 u1_chroma_array_type;
+
+ /** weighted_pred_flag */
+ WORD8 i1_weighted_pred_flag;
+
+ /** weighted_bipred_flag */
+ WORD8 i1_weighted_bipred_flag;
+
+ /** Structure to describe extra CTBs around frame due to search
+ range associated with distributed-mode. Entries are top, left,
+ right and bottom */
+ WORD32 ai4_tile_xtra_pel[4];
+
+} inter_pred_me_ctxt_t;
+
+typedef void FT_CALC_SATD_AND_RESULT(err_prms_t *ps_prms, result_upd_prms_t *ps_result_prms);
+
+typedef struct
+{
+ FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_1_best_result_pt_pu_16x16_num_part_eq_1;
+ FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_1_best_result_pt_pu_16x16_num_part_lt_9;
+ FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_1_best_result_pt_pu_16x16_num_part_lt_17;
+ FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_2_best_results_pt_pu_16x16_num_part_eq_1;
+ FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_2_best_results_pt_pu_16x16_num_part_lt_9;
+ FT_CALC_SATD_AND_RESULT *pf_evalsatd_update_2_best_results_pt_pu_16x16_num_part_lt_17;
+ FT_HAD_8X8_USING_4_4X4_R *pf_had_8x8_using_4_4x4_r;
+ FT_HAD_16X16_R *pf_had_16x16_r;
+ FT_HAD_32X32_USING_16X16 *pf_compute_32x32HAD_using_16x16;
+} me_func_selector_t;
+
+/**
+******************************************************************************
+ * @struct me_frm_ctxt_t
+ * @brief Handle for ME
+******************************************************************************
+ */
+typedef struct
+{
+ /** Init search candts, 2 sets, one for 4x8 and one for 8x4 */
+ search_node_t s_init_search_node[MAX_INIT_CANDTS];
+
+ /** Motion Vectors array */
+ mv_t as_search_cand_mv[MAX_INIT_CANDTS];
+
+ /** Results of 16 16x16 blks within a CTB used in enc layer */
+ search_results_t as_search_results_16x16[16];
+
+ /** Results of 4 32x32 blks in a ctb for enc layer merge stage */
+ search_results_t as_search_results_32x32[4];
+
+ /** Same as above but fo 64x64 blk */
+ search_results_t s_search_results_64x64;
+
+ /**
+ * Below arays store input, 4 recon planes for each ref pic.
+ * These are duplications, and are present within layer ctxts, but
+ * kept here together for faster indexing during search
+ */
+
+ U08 *apu1_list_rec_fxfy[MAX_NUM_LAYERS][MAX_NUM_REF];
+ U08 *apu1_list_rec_hxfy[MAX_NUM_LAYERS][MAX_NUM_REF];
+ U08 *apu1_list_rec_fxhy[MAX_NUM_LAYERS][MAX_NUM_REF];
+ U08 *apu1_list_rec_hxhy[MAX_NUM_LAYERS][MAX_NUM_REF];
+ U08 *apu1_list_inp[MAX_NUM_LAYERS][MAX_NUM_REF];
+
+ void *apv_list_dep_mngr[MAX_NUM_LAYERS][MAX_NUM_REF];
+
+ /** Ptr to all layer context placeholder for curr pic encoded */
+ layers_descr_t *ps_curr_descr;
+
+ /**
+ * ME uses ref id lc to search multi ref. This TLU gets POC of
+ * the pic w.r.t. a given ref id
+ */
+ S32 ai4_ref_idx_to_poc_lc[MAX_NUM_REF];
+
+ /** POC of pic encoded just before current */
+ S32 i4_prev_poc;
+
+ /** POC of curret pic being encoded */
+ S32 i4_curr_poc;
+
+ /** Buf mgr for memory allocation */
+ buf_mgr_t s_buf_mgr;
+
+ /** MV Grid for L0 and L1, this is active one used */
+ mv_grid_t as_mv_grid[2];
+
+ /**
+ * MV grid for FPEL and QPEL maintained separately. Depending on the
+ * correct prediction res. being used, copy appropriate results to
+ * the as_mv_Grid structure
+ */
+ mv_grid_t as_mv_grid_fpel[2];
+ mv_grid_t as_mv_grid_qpel[2];
+
+ /** Number of HME layers encode + non encode */
+ S32 num_layers;
+
+ /** Alloc time parameter, max ref frms used for this session */
+ S32 max_num_ref;
+
+ /**
+ * Number of layers that use explicit search. Explicit search means
+ * that each ref id is searched separately
+ */
+ S32 num_layers_explicit_search;
+
+ /**
+ * Maximum number of results maintained at any refinement layer
+ * search. Important from mem alloc perspective
+ */
+ S32 max_num_results;
+
+ /** Same as above but for coarse layer */
+ S32 max_num_results_coarse;
+
+ /** Array of flags, one per layer indicating hwether layer is encoded */
+ U08 u1_encode[MAX_NUM_LAYERS];
+
+ /* Parameters used for lambda computation */
+ frm_lambda_ctxt_t s_frm_lambda_ctxt;
+
+ /**
+ * Array look up created each frm, maintaining the corresponding
+ * layer descr look up for each ref id
+ */
+ S32 a_ref_to_descr_id[MAX_NUM_REF];
+
+ /**
+ * Array lookup created each frame that maps a given ref id
+ * pertaining to unified list to a L0/L1 list. Encoder searches in terms
+ * of LC list or in other words does not differentiate between L0
+ * and L1 frames for most of search. Finally to report results to
+ * encoder, the ref id has to be remapped to suitable list
+ */
+ S32 a_ref_idx_lc_to_l0[MAX_NUM_REF];
+ S32 a_ref_idx_lc_to_l1[MAX_NUM_REF];
+
+ /** Width and ht of each layer */
+ S32 i4_wd;
+ S32 i4_ht;
+
+ /** Histogram, one for each ref, allocated during craete time */
+ mv_hist_t *aps_mv_hist[MAX_NUM_REF];
+
+ /**
+ * Back input requiring > 8 bit precision, allocated during
+ * create time, storing 2I-P0 for Bidir refinement
+ */
+ S16 *pi2_inp_bck;
+ ctb_boundary_attrs_t as_ctb_bound_attrs[NUM_CTB_BOUNDARY_TYPES];
+
+ /** Whether a given ref id in Lc list is past frm or future frm */
+ U08 au1_is_past[MAX_NUM_REF];
+
+ /** These are L0 and L1 lists, storing ref id Lc in them */
+ S08 ai1_past_list[MAX_NUM_REF];
+ S08 ai1_future_list[MAX_NUM_REF];
+
+ /** Number of past and future ref pics sent this frm */
+ S32 num_ref_past;
+ S32 num_ref_future;
+
+ /**
+ * Passed by encoder, stored as void to avoid header file inclusion
+ * of encoder wks into ME, these are frm prms passed by encoder,
+ * pointers to ctbanalyse_t and cu_analyse_t structures and the
+ * corresponding running ptrs
+ */
+
+ ctb_analyse_t *ps_ctb_analyse_base;
+ cur_ctb_cu_tree_t *ps_cu_tree_base;
+ me_ctb_data_t *ps_me_ctb_data_base;
+
+ ctb_analyse_t *ps_ctb_analyse_curr_row;
+ cu_analyse_t *ps_cu_analyse_curr_row;
+ cur_ctb_cu_tree_t *ps_cu_tree_curr_row;
+ me_ctb_data_t *ps_me_ctb_data_curr_row;
+
+ /** Log2 of ctb size e.g. for 64 size, it will be 6 */
+ S32 log_ctb_size;
+
+ hme_frm_prms_t *ps_hme_frm_prms;
+
+ hme_ref_map_t *ps_hme_ref_map;
+
+ /**
+ * Scale factor of any given ref lc to another ref in Q8
+ * First MAX_NUM_REF entries are to scale an mv of ref id k
+ * w.r.t. ref id 0 (approx 256 * POC delta(0) / POC delta(k))
+ * Next MAX_NUM_REF entreis are to scale mv of ref id 1 w.r.t. 0
+ * And so on
+ */
+ S16 ai2_ref_scf[MAX_NUM_REF * MAX_NUM_REF];
+
+ /** bits for a given ref id, in either list L0/L1 */
+ U08 au1_ref_bits_tlu_lc[2][MAX_NUM_REF];
+
+ /** Points to above: 1 ptr for each list */
+ U08 *apu1_ref_bits_tlu_lc[2];
+
+ /**
+ * Frame level base pointer to L0 IPE ctb analyze structures.
+ * This strucutres include the following
+ *
+ * 1. Best costs and modes at all levels of CTB (CU=8,16,32,64)
+ * 2. Recommended IPE intra CU sizes for this CTB size
+ * 3. Early intra/inter decision structures for all 8x8 blocks of CTB
+ * populated by L1-ME and L1-IPE
+ *
+ */
+ ipe_l0_ctb_analyse_for_me_t *ps_ipe_l0_ctb_frm_base;
+
+ /** array of ptrs to intra cost per layer encoded, stored at 8x8 */
+ double *apd_intra_cost[MAX_NUM_LAYERS];
+
+ /** number of b fraems between P, depends on number of hierarchy layers */
+ S32 num_b_frms;
+
+ /** Frame level qp passed every frame by ME's caller */
+ S32 frm_qstep;
+
+ /** Frame level qp with higher precision : left shifted by 8 */
+ S32 qstep_ls8;
+
+ /** Backup of frame parameters */
+ hme_frm_prms_t s_frm_prms;
+
+ /** Weighted prediction parameters for all references are stored
+ * Scratch buffers for populated widgted inputs are also stored in this
+ */
+ wgt_pred_ctxt_t s_wt_pred;
+
+ /** Weighted pred enable flag */
+ S32 i4_wt_pred_enable_flag;
+
+ /** Results of 16 16x16 blks within a CTB used in enc layer */
+ inter_cu_results_t as_cu16x16_results[16];
+
+ /** Results of 4 32x32 blks in a ctb for enc layer merge stage */
+ inter_cu_results_t as_cu32x32_results[4];
+
+ /** Same as above but fo 64x64 blk */
+ inter_cu_results_t s_cu64x64_results;
+
+ /** Results of 64 8x8 blks within a CTB used in enc layer */
+ inter_cu_results_t as_cu8x8_results[64];
+
+ WORD32 i4_is_prev_frame_reference;
+
+ rc_quant_t *ps_rc_quant_ctxt;
+
+ /** Dynamical Search Range parameters */
+ l0_dyn_range_prms_t as_l0_dyn_range_prms[NUM_SG_INTERLEAVED];
+
+ /** Dependency manager for Row level sync in L0 ME pass */
+ void *pv_dep_mngr_l0_me_sync;
+
+ /** Pointer to structure containing function pointers of encoder*/
+ me_func_selector_t *ps_func_selector;
+
+ cluster_16x16_blk_t *ps_blk_16x16;
+
+ cluster_32x32_blk_t *ps_blk_32x32;
+
+ cluster_64x64_blk_t *ps_blk_64x64;
+
+ ctb_cluster_info_t *ps_ctb_cluster_info;
+
+ fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt;
+
+ /* thread id of the current context */
+ WORD32 thrd_id;
+
+ /* dependency manager for froward ME sync */
+ void *pv_dep_mngr_encloop_dep_me;
+ WORD32 i4_l0me_qp_mod;
+
+ /*mc ctxt to reuse lume inter pred fucntion
+ for the purpose of TRAQO*/
+ inter_pred_me_ctxt_t s_mc_ctxt;
+
+ WORD32 i4_rc_pass;
+ /*pic type*/
+ WORD32 i4_pic_type;
+
+ WORD32 i4_temporal_layer;
+
+ WORD32 i4_count;
+
+ WORD32 i4_use_const_lamda_modifier;
+
+ double f_i_pic_lamda_modifier;
+
+ UWORD8 u1_is_curFrame_a_refFrame;
+
+ /* src_var related variables */
+ U32 au4_4x4_src_sigmaX[MAX_NUM_SIGMAS_4x4];
+ U32 au4_4x4_src_sigmaXSquared[MAX_NUM_SIGMAS_4x4];
+} me_frm_ctxt_t;
+
+/**
+******************************************************************************
+ * @struct me_ctxt_t
+ * @brief Handle for ME
+******************************************************************************
+ */
+typedef struct
+{
+ /** Init prms send by encoder during create time */
+ hme_init_prms_t s_init_prms;
+
+ /** Not used in encoder, relevant to test bench */
+ U08 *pu1_debug_out;
+
+ void *pv_ext_frm_prms;
+
+ /* Frame level ME ctxt */
+ me_frm_ctxt_t *aps_me_frm_prms[MAX_NUM_ME_PARALLEL];
+
+ /** Ptr to all layer ctxt place holder for all pics */
+ /** number of reference descriptors should be equal to max number of active references **/
+ layers_descr_t as_ref_descr[((DEFAULT_MAX_REFERENCE_PICS << 1) * MAX_NUM_ME_PARALLEL) + 1];
+
+ /* Should be typecast to a struct of type 'ihevce_me_optimised_function_list_t' */
+ void *pv_me_optimised_function_list;
+
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list;
+
+ /* Pointer to Tile params base */
+ void *pv_tile_params_base;
+
+} me_ctxt_t;
+
+typedef struct
+{
+ /** array of context for each thread */
+ coarse_me_ctxt_t *aps_me_ctxt[MAX_NUM_FRM_PROC_THRDS_PRE_ENC];
+
+ /** memtabs storage memory */
+ hme_memtab_t as_memtabs[HME_COARSE_TOT_MEMTABS];
+
+ /** Frame level parameters for ME */
+ hme_frm_prms_t s_frm_prms;
+
+ /** Holds all reference mapping */
+ hme_ref_map_t s_ref_map;
+
+ /** number of threads created run time */
+ WORD32 i4_num_proc_thrds;
+
+ /** Dependency manager for Row level sync in HME pass */
+ /* Note : Indexing should be like layer_id - 1 */
+ void *apv_dep_mngr_hme_sync[MAX_NUM_HME_LAYERS - 1];
+ /* Should be typecast to a struct of type 'ihevce_me_optimised_function_list_t' */
+ void *pv_me_optimised_function_list;
+
+ ihevce_cmn_opt_func_t s_cmn_opt_func;
+} coarse_me_master_ctxt_t;
+
+typedef struct
+{
+ /** array of context for each thread */
+ me_ctxt_t *aps_me_ctxt[MAX_NUM_FRM_PROC_THRDS_ENC];
+
+ /** memtabs storage memory */
+ hme_memtab_t as_memtabs[MAX_HME_ENC_TOT_MEMTABS];
+
+ /** Frame level parameters for ME */
+ hme_frm_prms_t as_frm_prms[MAX_NUM_ME_PARALLEL];
+
+ /** Holds all reference mapping */
+ hme_ref_map_t as_ref_map[MAX_NUM_ME_PARALLEL];
+
+ /** number of threads created run time */
+ WORD32 i4_num_proc_thrds;
+
+ /** number of me frames running in parallel */
+ WORD32 i4_num_me_frm_pllel;
+
+ /** Pointer to structure containing function pointers of encoder*/
+ me_func_selector_t s_func_selector;
+ /* Should be typecast to a struct of type 'ihevce_me_optimised_function_list_t' */
+ void *pv_me_optimised_function_list;
+
+ ihevce_cmn_opt_func_t s_cmn_opt_func;
+
+ /* Pointer to Tile params base */
+ void *pv_tile_params_base;
+
+} me_master_ctxt_t;
+
+typedef struct
+{
+ S16 i2_mv_x;
+
+ S16 i2_mv_y;
+
+ U08 u1_ref_idx;
+
+ U32 au4_node_map[2 * MAP_Y_MAX];
+
+} subpel_dedup_enabler_t;
+
+typedef subpel_dedup_enabler_t hme_dedup_enabler_t;
+
+typedef struct
+{
+ layer_ctxt_t *ps_curr_layer;
+
+ layer_ctxt_t *ps_coarse_layer;
+
+ U08 *pu1_num_fpel_search_cands;
+
+ S32 *pi4_ref_id_lc_to_l0_map;
+
+ S32 *pi4_ref_id_lc_to_l1_map;
+
+ S32 i4_pos_x;
+
+ S32 i4_pos_y;
+
+ S32 i4_num_act_ref_l0;
+
+ S32 i4_num_act_ref_l1;
+
+ search_candt_t *ps_search_cands;
+
+ U08 u1_search_candidate_list_index;
+
+ S32 i4_max_num_init_cands;
+
+ U08 u1_pred_dir;
+
+ /* Indicates the position of the current predDir in the processing order of predDir */
+ U08 u1_pred_dir_ctr;
+
+ /* The following 4 flags apply exclusively to spatial candidates */
+ U08 u1_is_topRight_available;
+
+ U08 u1_is_topLeft_available;
+
+ U08 u1_is_top_available;
+
+ U08 u1_is_left_available;
+
+ S08 i1_default_ref_id;
+
+ S08 i1_alt_default_ref_id;
+
+ U08 u1_num_results_in_mvbank;
+
+ BLK_SIZE_T e_search_blk_size;
+
+} fpel_srch_cand_init_data_t;
+
+typedef struct
+{
+ U08 *pu1_pred;
+
+ S32 i4_pred_stride;
+
+ U08 u1_pred_buf_array_id;
+
+} hme_pred_buf_info_t;
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+typedef void (*PF_SAD_FXN_T)(err_prms_t *);
+
+typedef void (*PF_SAD_RESULT_FXN_T)(err_prms_t *, result_upd_prms_t *ps_result_prms);
+
+typedef WORD32 (*PF_SAD_FXN_TU_REC)(
+ err_prms_t *,
+ WORD32 lambda,
+ WORD32 lamda_q_shift,
+ WORD32 i4_frm_qstep,
+ me_func_selector_t *ps_func_selector);
+
+typedef void (*PF_RESULT_FXN_T)(result_upd_prms_t *);
+
+typedef void (*PF_CALC_SAD_AND_RESULT)(
+ hme_search_prms_t *, wgt_pred_ctxt_t *, err_prms_t *, result_upd_prms_t *, U08 **, S32);
+
+#endif /* _HME_DEFS_H_ */
diff --git a/encoder/hme_err_compute.c b/encoder/hme_err_compute.c
new file mode 100644
index 0000000..febffce
--- /dev/null
+++ b/encoder/hme_err_compute.c
@@ -0,0 +1,3797 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+***************************************************************************
+* \file hme_err_compute.c
+*
+* \brief
+* SAD / SATD routines for error computation
+*
+* Detailed_description : Contains various types of SAD/SATD routines for
+* error computation between a given input and reference ptr. The SAD
+* routines can evaluate for either a single point or a grid, and can
+* evaluate with either partial updates or no partial updates. Partial
+* updates means evaluating sub block SADs, e.g. 4 4x4 subblock SAD in
+* addition to the main 8x8 block SAD.
+*
+* \date
+* 22/9/2012
+*
+* \author Ittiam
+***************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+#include <limits.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_bs_compute_ctb.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_dep_mngr_interface.h"
+#include "hme_datatype.h"
+#include "hme_interface.h"
+#include "hme_common_defs.h"
+#include "hme_defs.h"
+#include "ihevce_me_instr_set_router.h"
+#include "hme_globals.h"
+#include "hme_utils.h"
+#include "hme_coarse.h"
+#include "hme_refine.h"
+#include "hme_err_compute.h"
+#include "hme_common_utils.h"
+#include "hme_search_algo.h"
+#include "ihevce_stasino_helpers.h"
+
+/******************************************************************************
+* MACRO DEFINITIONS
+******************************************************************************/
+
+/*****************************************************************************/
+/* Theoritically, the various types of SAD functions that are needed for */
+/* reasons of optimality. SADs that are to be evaluated at a single pt can be*/
+/* more optimal than SADs that are to be evaluated for a grid of 3x3. The */
+/* SADs to be evaluated at a grid are classified as separate functions, since*/
+/* evaluating them on a single function call helps reuse inputs for a small */
+/* grid of 3x3. Also, if no partial updates are required, there are 3 basic */
+/* funcitons, width 4K (K = odd number), width 8K (K = odd number) and width */
+/* 16K, K any number. For partial updates, it is assumed that the block size */
+/* is square (8x8, 16x16, 32x32, 64x64) and further differentiation is done */
+/* based on the basic evaluation unit. E.g. if 16x16 blk size requires, part */
+/* update on AMP partitions, then basic SAD unit is 4x4, if it doesnt, then */
+/* basic SAD unit is 8x8. */
+/*****************************************************************************/
+
+#define UPD_RES_PT_NPU_BEST1 hme_update_results_grid_pu_bestn
+#define UPD_RES_PT_NPU_BESTN hme_update_results_grid_pu_bestn
+#define UPD_RES_PT_PU_BEST1 hme_update_results_grid_pu_bestn
+#define UPD_RES_PT_PU_BESTN hme_update_results_grid_pu_bestn
+#define UPD_RES_GRID_NPU_BEST1 hme_update_results_grid_pu_bestn
+#define UPD_RES_GRID_NPU_BESTN hme_update_results_grid_pu_bestn
+#define UPD_RES_GRID_PU_BEST1 hme_update_results_grid_pu_bestn
+#define UPD_RES_GRID_PU_BESTN hme_update_results_grid_pu_bestn
+
+/*******************************************************************************
+* FUNCTION DEFINITIONS
+*******************************************************************************/
+S32 hme_cmp_nodes(search_node_t *ps_best_node1, search_node_t *ps_best_node2)
+{
+ if((ps_best_node1->s_mv.i2_mvx == ps_best_node2->s_mv.i2_mvx) &&
+ (ps_best_node1->s_mv.i2_mvy == ps_best_node2->s_mv.i2_mvy) &&
+ (ps_best_node1->i1_ref_idx == ps_best_node2->i1_ref_idx))
+ {
+ return 0;
+ }
+ return -1;
+}
+
+void compute_4x4_sads_for_16x16_blk(
+ grid_ctxt_t *ps_grid, /* Grid ctxt */
+ UWORD8 *pu1_cur_ptr, /* Pointer to top-left of current block */
+ WORD32 cur_buf_stride, /* Buffer stride of current buffer */
+ UWORD16 **
+ u2_part_sads, /* 2D Array containing SADs for all 17 partitions. As many rows as partitions. SADs in a row correspond to each of the candidates */
+ cand_t *ps_cand, /* Return the list of candidates evaluated */
+ WORD32 *num_cands /* Number of candidates that were processed */
+)
+{
+ WORD32 a, b, c, d, i;
+ WORD16 grd_sz_y = (ps_grid->grd_sz_y_x & 0xFFFF0000) >> 16;
+ WORD16 grd_sz_x = (ps_grid->grd_sz_y_x & 0xFFFF);
+ //WORD32 offset_x[9] = {-grd_sz_x, 0, grd_sz_x, -grd_sz_x, 0, grd_sz_x, grd_sz_x, 0, -grd_sz_x};
+ //WORD32 offset_y[9] = {-grd_sz_y, -grd_sz_y, -grd_sz_y, 0, 0, 0, grd_sz_y, grd_sz_y, grd_sz_y};
+ /* Assumes the following order: C, L, T, R, B, TL, TR, BL, BR */
+ WORD32 offset_x[9] = { 0, -grd_sz_x, 0, grd_sz_x, 0, -grd_sz_x, grd_sz_x, -grd_sz_x, grd_sz_x };
+ WORD32 offset_y[9] = { 0, 0, -grd_sz_y, 0, grd_sz_y, -grd_sz_y, -grd_sz_y, grd_sz_y, grd_sz_y };
+ WORD32 ref_buf_stride = ps_grid->ref_buf_stride;
+ WORD32 cur_buf_stride_ls2 = (cur_buf_stride << 2);
+ WORD32 ref_buf_stride_ls2 = (ref_buf_stride << 2);
+ cand_t *cand0 = ps_cand;
+ UWORD16 au2_4x4_sad[NUM_4X4];
+
+ *num_cands = 0;
+
+ /* Loop to fill up the cand_t array and to calculate num_cands */
+ for(i = 0; i < ps_grid->num_grids; i++)
+ {
+ WORD32 j;
+ WORD32 mask = ps_grid->pi4_grd_mask[i];
+ UWORD8 *pu1_ref_ptr_center = ps_grid->ppu1_ref_ptr[i];
+ WORD32 mv_x = ps_grid->p_mv[i].i2_mv_x;
+ WORD32 mv_y = (ps_grid->p_mv[i].i2_mv_y);
+
+ for(j = 0; j < NUM_CANDIDATES_IN_GRID; j++, mask >>= 1)
+ {
+ if(mask & 1)
+ {
+ *num_cands = *num_cands + 1;
+ cand0->grid_ix = i;
+ cand0->ref_idx = ps_grid->p_ref_idx[i];
+ cand0->pu1_ref_ptr =
+ pu1_ref_ptr_center + offset_x[j] + ref_buf_stride * offset_y[j];
+ cand0->mv.i2_mv_x = (S16)(mv_x) + offset_x[j];
+ cand0->mv.i2_mv_y = (S16)(mv_y) + offset_y[j];
+ cand0++;
+ }
+ }
+ }
+
+ /* Loop to compute the SAD's */
+ for(a = 0; a < *num_cands; a++)
+ {
+ cand_t *cand = ps_cand + a;
+ memset(&au2_4x4_sad[0], 0, NUM_4X4 * sizeof(UWORD16));
+ for(b = 0; b < NUM_4X4; b++)
+ {
+ WORD32 t1 = (b % 4) * NUM_PIXELS_IN_ROW + (b >> 2) * cur_buf_stride_ls2;
+ WORD32 t2 = (b % 4) * NUM_PIXELS_IN_ROW + (b >> 2) * ref_buf_stride_ls2;
+
+ for(c = 0; c < NUM_ROWS_IN_4X4; c++)
+ {
+ WORD32 z_cur = (cur_buf_stride)*c + t1;
+ WORD32 z_ref = (ref_buf_stride)*c + t2;
+ for(d = 0; d < NUM_PIXELS_IN_ROW; d++)
+ {
+ au2_4x4_sad[b] += (UWORD16)ABS(
+ (((S32)cand->pu1_ref_ptr[(z_ref + d)]) - ((S32)pu1_cur_ptr[(z_cur + d)])));
+ }
+ }
+ }
+
+ u2_part_sads[PART_ID_NxN_TL][a] =
+ (au2_4x4_sad[0] + au2_4x4_sad[1] + au2_4x4_sad[4] + au2_4x4_sad[5]);
+ u2_part_sads[PART_ID_NxN_TR][a] =
+ (au2_4x4_sad[2] + au2_4x4_sad[3] + au2_4x4_sad[6] + au2_4x4_sad[7]);
+ u2_part_sads[PART_ID_NxN_BL][a] =
+ (au2_4x4_sad[8] + au2_4x4_sad[9] + au2_4x4_sad[12] + au2_4x4_sad[13]);
+ u2_part_sads[PART_ID_NxN_BR][a] =
+ (au2_4x4_sad[10] + au2_4x4_sad[11] + au2_4x4_sad[14] + au2_4x4_sad[15]);
+ u2_part_sads[PART_ID_Nx2N_L][a] =
+ u2_part_sads[PART_ID_NxN_TL][a] + u2_part_sads[PART_ID_NxN_BL][a];
+ u2_part_sads[PART_ID_Nx2N_R][a] =
+ u2_part_sads[PART_ID_NxN_TR][a] + u2_part_sads[PART_ID_NxN_BR][a];
+ u2_part_sads[PART_ID_2NxN_T][a] =
+ u2_part_sads[PART_ID_NxN_TR][a] + u2_part_sads[PART_ID_NxN_TL][a];
+ u2_part_sads[PART_ID_2NxN_B][a] =
+ u2_part_sads[PART_ID_NxN_BR][a] + u2_part_sads[PART_ID_NxN_BL][a];
+ u2_part_sads[PART_ID_nLx2N_L][a] =
+ (au2_4x4_sad[8] + au2_4x4_sad[0] + au2_4x4_sad[12] + au2_4x4_sad[4]);
+ u2_part_sads[PART_ID_nRx2N_R][a] =
+ (au2_4x4_sad[3] + au2_4x4_sad[7] + au2_4x4_sad[15] + au2_4x4_sad[11]);
+ u2_part_sads[PART_ID_2NxnU_T][a] =
+ (au2_4x4_sad[1] + au2_4x4_sad[0] + au2_4x4_sad[2] + au2_4x4_sad[3]);
+ u2_part_sads[PART_ID_2NxnD_B][a] =
+ (au2_4x4_sad[15] + au2_4x4_sad[14] + au2_4x4_sad[12] + au2_4x4_sad[13]);
+ u2_part_sads[PART_ID_2Nx2N][a] =
+ u2_part_sads[PART_ID_2NxN_T][a] + u2_part_sads[PART_ID_2NxN_B][a];
+ u2_part_sads[PART_ID_2NxnU_B][a] =
+ u2_part_sads[PART_ID_2Nx2N][a] - u2_part_sads[PART_ID_2NxnU_T][a];
+ u2_part_sads[PART_ID_2NxnD_T][a] =
+ u2_part_sads[PART_ID_2Nx2N][a] - u2_part_sads[PART_ID_2NxnD_B][a];
+ u2_part_sads[PART_ID_nRx2N_L][a] =
+ u2_part_sads[PART_ID_2Nx2N][a] - u2_part_sads[PART_ID_nRx2N_R][a];
+ u2_part_sads[PART_ID_nLx2N_R][a] =
+ u2_part_sads[PART_ID_2Nx2N][a] - u2_part_sads[PART_ID_nLx2N_L][a];
+ }
+}
+
+/**
+********************************************************************************
+* @fn compute_part_sads_for_MxM_blk(grid_ctxt_t *ps_grid,
+* UWORD8 *pu1_cur_ptr,
+* WORD32 cur_buf_stride,
+* WORD32 **pi4_part_sads,
+* cand_t *ps_cand,
+* WORD32 *num_cands
+*
+* @brief Computes partial SADs and updates partition results for an MxM blk
+* and does so for several grids of points. This can be used for
+* 32x32/64x64 blks with 17 partition updates
+*
+*
+* @param[in] ps_grid : Pointer to grid ctxt that has multiple grid of max
+* 9 pts per grid
+*
+* @param[in] pu1_cur_ptr : Top left of input buffer
+*
+* @param[in] pi4_part_sads : array of pointers, each entry pointing to
+* results to be updated for a given partition
+*
+* @return The ps_search_results structure has the best result updated for
+* the 2Nx2N partition alone.
+
+********************************************************************************
+*/
+void compute_part_sads_for_MxM_blk(
+ grid_ctxt_t *ps_grid,
+ UWORD8 *pu1_cur_ptr,
+ WORD32 cur_buf_stride,
+ WORD32 **pp_part_sads,
+ cand_t *ps_cand,
+ WORD32 *num_cands,
+ CU_SIZE_T e_cu_size)
+{
+ WORD32 a, b, c, d, i;
+ WORD16 grd_sz_y = (ps_grid->grd_sz_y_x & 0xFFFF0000) >> 16;
+ WORD16 grd_sz_x = (ps_grid->grd_sz_y_x & 0xFFFF);
+
+ /* Assumes the following order: C, L, T, R, B, TL, TR, BL, BR */
+ WORD32 offset_x[9] = { 0, -grd_sz_x, 0, grd_sz_x, 0, -grd_sz_x, grd_sz_x, -grd_sz_x, grd_sz_x };
+ WORD32 offset_y[9] = { 0, 0, -grd_sz_y, 0, grd_sz_y, -grd_sz_y, -grd_sz_y, grd_sz_y, grd_sz_y };
+ WORD32 shift = (WORD32)e_cu_size;
+
+ WORD32 ref_buf_stride = ps_grid->ref_buf_stride;
+ WORD32 cur_buf_stride_lsN = (cur_buf_stride << (1 + shift));
+ WORD32 ref_buf_stride_lsN = (ref_buf_stride << (1 + shift));
+ /* Num rows and pixels per row: 8 for CU_32x32 and 16 for CU_64x64 */
+ WORD32 num_rows_in_nxn = 2 << shift;
+ WORD32 num_pixels_in_row = 2 << shift;
+ cand_t *cand0 = ps_cand;
+ /* for a 2Nx2N partition we evaluate nxn SADs, where n = N/2. This is */
+ /* needed for AMP cases. */
+ WORD32 a_nxn_sad[NUM_4X4];
+ *num_cands = 0;
+
+ /* Loop to fill up the cand_t array and to calculate num_cands */
+ for(i = 0; i < ps_grid->num_grids; i++)
+ {
+ WORD32 j;
+ WORD32 mask = ps_grid->pi4_grd_mask[i];
+ UWORD8 *pu1_ref_ptr_center = ps_grid->ppu1_ref_ptr[i];
+ WORD32 mv_x = ps_grid->p_mv[i].i2_mv_x;
+ WORD32 mv_y = (ps_grid->p_mv[i].i2_mv_y);
+
+ for(j = 0; j < NUM_CANDIDATES_IN_GRID; j++, mask >>= 1)
+ {
+ if(mask & 1)
+ {
+ *num_cands = *num_cands + 1;
+ cand0->grid_ix = i;
+ cand0->ref_idx = ps_grid->p_ref_idx[i];
+ cand0->pu1_ref_ptr =
+ pu1_ref_ptr_center + offset_x[j] + ref_buf_stride * offset_y[j];
+ cand0->mv.i2_mv_x = (S16)(mv_x) + offset_x[j];
+ cand0->mv.i2_mv_y = (S16)(mv_y) + offset_y[j];
+ cand0++;
+ }
+ }
+ }
+
+ /* Loop to compute the SAD's */
+ for(a = 0; a < *num_cands; a++)
+ {
+ cand_t *cand = ps_cand + a;
+ memset(&a_nxn_sad[0], 0, NUM_4X4 * sizeof(WORD32));
+ for(b = 0; b < NUM_4X4; b++)
+ {
+ WORD32 t1 = (b % 4) * num_pixels_in_row + (b >> 2) * cur_buf_stride_lsN;
+ WORD32 t2 = (b % 4) * num_pixels_in_row + (b >> 2) * ref_buf_stride_lsN;
+
+ for(c = 0; c < num_rows_in_nxn; c++)
+ {
+ WORD32 z_cur = (cur_buf_stride)*c + t1;
+ WORD32 z_ref = (ref_buf_stride)*c + t2;
+ for(d = 0; d < num_pixels_in_row; d++)
+ {
+ a_nxn_sad[b] += (WORD32)ABS(
+ (((WORD32)cand->pu1_ref_ptr[(z_ref + d)]) -
+ ((WORD32)pu1_cur_ptr[(z_cur + d)])));
+ }
+ }
+ }
+
+ pp_part_sads[PART_ID_NxN_TL][a] =
+ (a_nxn_sad[0] + a_nxn_sad[1] + a_nxn_sad[4] + a_nxn_sad[5]);
+ pp_part_sads[PART_ID_NxN_TR][a] =
+ (a_nxn_sad[2] + a_nxn_sad[3] + a_nxn_sad[6] + a_nxn_sad[7]);
+ pp_part_sads[PART_ID_NxN_BL][a] =
+ (a_nxn_sad[8] + a_nxn_sad[9] + a_nxn_sad[12] + a_nxn_sad[13]);
+ pp_part_sads[PART_ID_NxN_BR][a] =
+ (a_nxn_sad[10] + a_nxn_sad[11] + a_nxn_sad[14] + a_nxn_sad[15]);
+ pp_part_sads[PART_ID_Nx2N_L][a] =
+ pp_part_sads[PART_ID_NxN_TL][a] + pp_part_sads[PART_ID_NxN_BL][a];
+ pp_part_sads[PART_ID_Nx2N_R][a] =
+ pp_part_sads[PART_ID_NxN_TR][a] + pp_part_sads[PART_ID_NxN_BR][a];
+ pp_part_sads[PART_ID_2NxN_T][a] =
+ pp_part_sads[PART_ID_NxN_TR][a] + pp_part_sads[PART_ID_NxN_TL][a];
+ pp_part_sads[PART_ID_2NxN_B][a] =
+ pp_part_sads[PART_ID_NxN_BR][a] + pp_part_sads[PART_ID_NxN_BL][a];
+ pp_part_sads[PART_ID_nLx2N_L][a] =
+ (a_nxn_sad[8] + a_nxn_sad[0] + a_nxn_sad[12] + a_nxn_sad[4]);
+ pp_part_sads[PART_ID_nRx2N_R][a] =
+ (a_nxn_sad[3] + a_nxn_sad[7] + a_nxn_sad[15] + a_nxn_sad[11]);
+ pp_part_sads[PART_ID_2NxnU_T][a] =
+ (a_nxn_sad[1] + a_nxn_sad[0] + a_nxn_sad[2] + a_nxn_sad[3]);
+ pp_part_sads[PART_ID_2NxnD_B][a] =
+ (a_nxn_sad[15] + a_nxn_sad[14] + a_nxn_sad[12] + a_nxn_sad[13]);
+ pp_part_sads[PART_ID_2Nx2N][a] =
+ pp_part_sads[PART_ID_2NxN_T][a] + pp_part_sads[PART_ID_2NxN_B][a];
+ pp_part_sads[PART_ID_2NxnU_B][a] =
+ pp_part_sads[PART_ID_2Nx2N][a] - pp_part_sads[PART_ID_2NxnU_T][a];
+ pp_part_sads[PART_ID_2NxnD_T][a] =
+ pp_part_sads[PART_ID_2Nx2N][a] - pp_part_sads[PART_ID_2NxnD_B][a];
+ pp_part_sads[PART_ID_nRx2N_L][a] =
+ pp_part_sads[PART_ID_2Nx2N][a] - pp_part_sads[PART_ID_nRx2N_R][a];
+ pp_part_sads[PART_ID_nLx2N_R][a] =
+ pp_part_sads[PART_ID_2Nx2N][a] - pp_part_sads[PART_ID_nLx2N_L][a];
+ }
+}
+
+void hme_evalsad_grid_pu_16x16(err_prms_t *ps_prms)
+{
+ grid_ctxt_t s_grid;
+ cand_t as_candt[9];
+ U16 au2_sad_grid[TOT_NUM_PARTS * 9];
+ U16 *apu2_sad_grid[TOT_NUM_PARTS];
+ hme_mv_t s_mv = { 0, 0 };
+ S32 i4_ref_idx = 0, i;
+ S32 num_candts = 0;
+ s_grid.num_grids = 1;
+ s_grid.ref_buf_stride = ps_prms->i4_ref_stride;
+ s_grid.grd_sz_y_x = ((ps_prms->i4_step << 16) | ps_prms->i4_step);
+ s_grid.ppu1_ref_ptr = &ps_prms->pu1_ref;
+ s_grid.pi4_grd_mask = &ps_prms->i4_grid_mask;
+ s_grid.p_mv = &s_mv;
+ s_grid.p_ref_idx = &i4_ref_idx;
+ for(i = 0; i < 9; i++)
+ {
+ if(s_grid.pi4_grd_mask[0] & (1 << i))
+ num_candts++;
+ }
+
+ for(i = 0; i < TOT_NUM_PARTS; i++)
+ apu2_sad_grid[i] = &au2_sad_grid[i * num_candts];
+
+ compute_4x4_sads_for_16x16_blk(
+ &s_grid, ps_prms->pu1_inp, ps_prms->i4_inp_stride, apu2_sad_grid, as_candt, &num_candts);
+ for(i = 0; i < TOT_NUM_PARTS * num_candts; i++)
+ {
+ ps_prms->pi4_sad_grid[i] = au2_sad_grid[i];
+ }
+}
+
+void hme_evalsad_grid_npu_MxN(err_prms_t *ps_prms)
+{
+ U08 *pu1_inp_base, *pu1_ref_c;
+ S32 *pi4_sad = ps_prms->pi4_sad_grid;
+ S32 i, grid_count = 0;
+ S32 step = ps_prms->i4_step;
+ S32 x_off = step, y_off = step * ps_prms->i4_ref_stride;
+
+ ASSERT((ps_prms->i4_part_mask & (ps_prms->i4_part_mask - 1)) == 0);
+
+ //assert(ps_prms->i4_blk_ht <= 8);
+ //assert(ps_prms->i4_blk_wd <= 8);
+ for(i = 0; i < 9; i++)
+ {
+ if(ps_prms->i4_grid_mask & (1 << i))
+ grid_count++;
+ }
+ pi4_sad += (ps_prms->pi4_valid_part_ids[0] * grid_count);
+
+ pu1_inp_base = ps_prms->pu1_inp;
+ pu1_ref_c = ps_prms->pu1_ref;
+ for(i = 0; i < 9; i++)
+ {
+ S32 sad = 0, j, k;
+ U08 *pu1_inp, *pu1_ref;
+
+ if(!(ps_prms->i4_grid_mask & (1 << i)))
+ continue;
+ pu1_ref = pu1_ref_c + x_off * gai1_grid_id_to_x[i];
+ pu1_ref += y_off * gai1_grid_id_to_y[i];
+ pu1_inp = pu1_inp_base;
+
+ for(j = 0; j < ps_prms->i4_blk_ht; j++)
+ {
+ for(k = 0; k < ps_prms->i4_blk_wd; k++)
+ {
+ sad += (ABS((pu1_inp[k] - pu1_ref[k])));
+ }
+ pu1_inp += ps_prms->i4_inp_stride;
+ pu1_ref += ps_prms->i4_ref_stride;
+ }
+ *pi4_sad++ = sad;
+ }
+}
+
+WORD32 hme_evalsad_pt_npu_MxN_8bit_compute(
+ WORD32 ht,
+ WORD32 wd,
+ UWORD8 *pu1_inp,
+ UWORD8 *pu1_ref,
+ WORD32 i4_inp_stride,
+ WORD32 i4_ref_stride)
+{
+ WORD32 i, j;
+ WORD32 sad = 0;
+ for(i = 0; i < ht; i++)
+ {
+ for(j = 0; j < wd; j++)
+ {
+ sad += (ABS(((S32)pu1_inp[j] - (S32)pu1_ref[j])));
+ }
+ pu1_inp += i4_inp_stride;
+ pu1_ref += i4_ref_stride;
+ }
+ return sad;
+}
+
+void hme_evalsad_pt_npu_MxN_8bit(err_prms_t *ps_prms)
+{
+ S32 wd, ht;
+ U08 *pu1_inp, *pu1_ref;
+
+ wd = ps_prms->i4_blk_wd;
+ ht = ps_prms->i4_blk_ht;
+
+ pu1_inp = ps_prms->pu1_inp;
+ pu1_ref = ps_prms->pu1_ref;
+
+ ps_prms->pi4_sad_grid[0] = hme_evalsad_pt_npu_MxN_8bit_compute(
+ ht, wd, pu1_inp, pu1_ref, ps_prms->i4_inp_stride, ps_prms->i4_ref_stride);
+}
+
+void compute_satd_8bit(err_prms_t *ps_prms)
+{
+ U08 *pu1_origin;
+ S32 src_strd;
+ U08 *pu1_pred_buf;
+ S32 dst_strd;
+ S32 wd, ht;
+ U32 u4_sad = 0;
+ WORD32 x, y;
+ U08 *u1_pi0, *u1_pi1;
+
+ pu1_origin = ps_prms->pu1_inp;
+ pu1_pred_buf = ps_prms->pu1_ref;
+ src_strd = ps_prms->i4_inp_stride;
+ dst_strd = ps_prms->i4_ref_stride;
+ wd = ps_prms->i4_blk_wd;
+ ht = ps_prms->i4_blk_ht;
+
+ u1_pi0 = pu1_origin;
+ u1_pi1 = pu1_pred_buf;
+
+ /* Follows the following logic:
+ For block sizes less than or equal to 16X16, the basic transform size is 4x4
+ For block sizes greater than or equal to 32x32, the basic transform size is 8x8 */
+ if((wd > 0x10) || (ht > 0x10))
+ {
+ for(y = 0; y < ht; y += 8)
+ {
+ for(x = 0; x < wd; x += 8)
+ {
+ u4_sad += ps_prms->ps_cmn_utils_optimised_function_list->pf_HAD_8x8_8bit(
+ &u1_pi0[x], src_strd, &u1_pi1[x], dst_strd, NULL, 1);
+ }
+ u1_pi0 += src_strd * 8;
+ u1_pi1 += dst_strd * 8;
+ }
+ }
+ else
+ {
+ for(y = 0; y < ht; y += 4)
+ {
+ for(x = 0; x < wd; x += 4)
+ {
+ u4_sad += ps_prms->ps_cmn_utils_optimised_function_list->pf_HAD_4x4_8bit(
+ &u1_pi0[x], src_strd, &u1_pi1[x], dst_strd, NULL, 1);
+ }
+ u1_pi0 += src_strd * 4;
+ u1_pi1 += dst_strd * 4;
+ }
+ }
+
+ ps_prms->pi4_sad_grid[0] = (S32)u4_sad;
+}
+
+void hme_init_pred_part(
+ pred_ctxt_t *ps_pred_ctxt,
+ search_node_t *ps_tl,
+ search_node_t *ps_t,
+ search_node_t *ps_tr,
+ search_node_t *ps_l,
+ search_node_t *ps_bl,
+ search_node_t *ps_coloc,
+ search_node_t *ps_zeromv,
+ search_node_t **pps_proj_coloc,
+ PART_ID_T e_part_id)
+{
+ pred_candt_nodes_t *ps_candt_nodes;
+
+ ps_candt_nodes = &ps_pred_ctxt->as_pred_nodes[e_part_id];
+
+ ps_candt_nodes->ps_tl = ps_tl;
+ ps_candt_nodes->ps_tr = ps_tr;
+ ps_candt_nodes->ps_t = ps_t;
+ ps_candt_nodes->ps_l = ps_l;
+ ps_candt_nodes->ps_bl = ps_bl;
+ ps_candt_nodes->ps_coloc = ps_coloc;
+ ps_candt_nodes->ps_zeromv = ps_zeromv;
+ ps_candt_nodes->pps_proj_coloc = pps_proj_coloc;
+}
+
+void hme_init_pred_ctxt_no_encode(
+ pred_ctxt_t *ps_pred_ctxt,
+ search_results_t *ps_search_results,
+ search_node_t *ps_top_candts,
+ search_node_t *ps_left_candts,
+ search_node_t **pps_proj_coloc_candts,
+ search_node_t *ps_coloc_candts,
+ search_node_t *ps_zeromv_candt,
+ S32 pred_lx,
+ S32 lambda,
+ S32 lambda_q_shift,
+ U08 **ppu1_ref_bits_tlu,
+ S16 *pi2_ref_scf)
+{
+ search_node_t *ps_invalid, *ps_l, *ps_t, *ps_tl, *ps_tr, *ps_bl;
+ search_node_t *ps_coloc;
+ PART_ID_T e_part_id;
+
+ /* Assume that resolution is subpel to begin with */
+ ps_pred_ctxt->mv_pel = 0; // FPEL
+
+ /* lambda and pred_lx (PRED_L0/PRED_L1) */
+ ps_pred_ctxt->lambda = lambda;
+ ps_pred_ctxt->lambda_q_shift = lambda_q_shift;
+ ps_pred_ctxt->pred_lx = pred_lx;
+ ps_pred_ctxt->ppu1_ref_bits_tlu = ppu1_ref_bits_tlu;
+ ps_pred_ctxt->pi2_ref_scf = pi2_ref_scf;
+ ps_pred_ctxt->proj_used = 0;
+
+ /* Bottom left should not be valid */
+ ASSERT(ps_left_candts[2].u1_is_avail == 0);
+ ps_invalid = &ps_left_candts[2];
+
+ /*************************************************************************/
+ /* for the case of no encode, the idea is to set up cants as follows */
+ /* */
+ /* ____ ______________ */
+ /* | TL | T | T1 | TR | */
+ /* |____|____|____|____| */
+ /* | L | b0 | b1 | */
+ /* |____|____|____| */
+ /* | L1 | b2 | b3 | */
+ /* |____|____|____| */
+ /* | BL | */
+ /* |____| */
+ /* */
+ /* If use_4x4 is 0, then b0,b1,b2,b3 are single 8x8 blk. then T=T1 */
+ /* and L=L1. topleft, top and topright are TL,T,TR respectively */
+ /* Left and bottom left is L and BL respectively. */
+ /* If use_4x4 is 1: then the above holds true only for PARTID = 0 (8x8) */
+ /* For the 4 subblocks (partids 4-7) */
+ /* */
+ /* Block Left Top Top Left Top Right Bottom Left */
+ /* b0 L T TL T1 L1 */
+ /* b1 b0 T1 T TR BL(invalid) */
+ /* b2 L1 b0 L0 b1 BL (invalid) */
+ /* b3 b2 b1 b0 BL(inv) BL (inv) */
+ /* */
+ /* Note : For block b1, bottom left pts to b2, which is not yet ready */
+ /* hence it is kept invalid and made to pt to BL. For block b3 top rt */
+ /* is invalid and hence made to pt to BL which is invalid. */
+ /* BL is invalid since it lies in a bottom left 8x8 blk and not yet ready*/
+ /*************************************************************************/
+
+ /* ps_coloc always points to a fixe candt (global) */
+ /* TODO : replace incoming ps_coloc from global to geniune coloc */
+ ps_coloc = ps_coloc_candts;
+
+ /* INITIALIZATION OF 8x8 BLK */
+ ps_tl = ps_top_candts;
+ ps_t = ps_tl + 2;
+ ps_tr = ps_t + 1;
+ ps_l = ps_left_candts + 1;
+ ps_bl = ps_invalid;
+ e_part_id = PART_ID_2Nx2N;
+ hme_init_pred_part(
+ ps_pred_ctxt,
+ ps_tl,
+ ps_t,
+ ps_tr,
+ ps_l,
+ ps_bl,
+ ps_coloc,
+ ps_zeromv_candt,
+ pps_proj_coloc_candts,
+ e_part_id);
+
+ /* INITIALIZATION OF 4x4 TL BLK */
+ e_part_id = PART_ID_NxN_TL;
+ ps_tl = ps_top_candts;
+ ps_t = ps_tl + 1;
+ ps_tr = ps_t + 1;
+ ps_l = ps_left_candts;
+ ps_bl = ps_l + 1;
+ hme_init_pred_part(
+ ps_pred_ctxt,
+ ps_tl,
+ ps_t,
+ ps_tr,
+ ps_l,
+ ps_bl,
+ ps_coloc,
+ ps_zeromv_candt,
+ pps_proj_coloc_candts,
+ e_part_id);
+
+ /* INITIALIZATION OF 4x4 TR BLK */
+ e_part_id = PART_ID_NxN_TR;
+ ps_tl = ps_top_candts + 1;
+ ps_t = ps_tl + 1;
+ ps_tr = ps_t + 1;
+ ps_l = ps_search_results->aps_part_results[pred_lx][PART_ID_NxN_TL];
+ ps_bl = ps_invalid;
+ hme_init_pred_part(
+ ps_pred_ctxt,
+ ps_tl,
+ ps_t,
+ ps_tr,
+ ps_l,
+ ps_bl,
+ ps_coloc,
+ ps_zeromv_candt,
+ pps_proj_coloc_candts,
+ e_part_id);
+
+ /* INITIALIZATION OF 4x4 BL BLK */
+ e_part_id = PART_ID_NxN_BL;
+ ps_tl = ps_left_candts;
+ ps_t = ps_search_results->aps_part_results[pred_lx][PART_ID_NxN_TL];
+ ps_tr = ps_search_results->aps_part_results[pred_lx][PART_ID_NxN_TR];
+ ps_l = ps_left_candts + 1;
+ ps_bl = ps_invalid; //invalid
+ hme_init_pred_part(
+ ps_pred_ctxt,
+ ps_tl,
+ ps_t,
+ ps_tr,
+ ps_l,
+ ps_bl,
+ ps_coloc,
+ ps_zeromv_candt,
+ pps_proj_coloc_candts,
+ e_part_id);
+
+ /* INITIALIZATION OF 4x4 BR BLK */
+ e_part_id = PART_ID_NxN_BR;
+ ps_tl = ps_search_results->aps_part_results[pred_lx][PART_ID_NxN_TL];
+ ps_t = ps_search_results->aps_part_results[pred_lx][PART_ID_NxN_TR];
+ ps_tr = ps_invalid; // invalid
+ ps_l = ps_search_results->aps_part_results[pred_lx][PART_ID_NxN_BL];
+ ps_bl = ps_invalid; // invalid
+ hme_init_pred_part(
+ ps_pred_ctxt,
+ ps_tl,
+ ps_t,
+ ps_tr,
+ ps_l,
+ ps_bl,
+ ps_coloc,
+ ps_zeromv_candt,
+ pps_proj_coloc_candts,
+ e_part_id);
+}
+
+void hme_init_pred_ctxt_encode(
+ pred_ctxt_t *ps_pred_ctxt,
+ search_results_t *ps_search_results,
+ search_node_t *ps_coloc_candts,
+ search_node_t *ps_zeromv_candt,
+ mv_grid_t *ps_mv_grid,
+ S32 pred_lx,
+ S32 lambda,
+ S32 lambda_q_shift,
+ U08 **ppu1_ref_bits_tlu,
+ S16 *pi2_ref_scf)
+{
+ search_node_t *ps_invalid, *ps_l, *ps_t, *ps_tl, *ps_tr, *ps_bl;
+ search_node_t *ps_coloc;
+ search_node_t *ps_grid_cu_base;
+ CU_SIZE_T e_cu_size = ps_search_results->e_cu_size;
+
+ /* Part Start, Part sizes in 4x4 units */
+ S32 part_wd, part_ht, part_start_x, part_start_y;
+
+ /* Partition type, number of partitions in type */
+ S32 part_id;
+
+ /* Coordinates of the CU in 4x4 units */
+ S32 cu_start_x, cu_start_y;
+ S32 shift = e_cu_size;
+
+ /* top right and bot left validity at CU level */
+ S32 cu_tr_valid, cu_bl_valid;
+ /* strideo f the grid */
+ S32 grid_stride = ps_mv_grid->i4_stride;
+
+ ps_pred_ctxt->lambda = lambda;
+ ps_pred_ctxt->lambda_q_shift = lambda_q_shift;
+ ps_pred_ctxt->pred_lx = pred_lx;
+ ps_pred_ctxt->mv_pel = 0;
+ ps_pred_ctxt->ppu1_ref_bits_tlu = ppu1_ref_bits_tlu;
+ ps_pred_ctxt->pi2_ref_scf = pi2_ref_scf;
+ ps_pred_ctxt->proj_used = 1;
+
+ cu_start_x = ps_search_results->u1_x_off >> 2;
+ cu_start_y = ps_search_results->u1_y_off >> 2;
+
+ /* Coloc always points to fixed global candt */
+ ps_coloc = ps_coloc_candts;
+
+ /* Go to base of the CU in the MV Grid */
+ ps_grid_cu_base = &ps_mv_grid->as_node[0];
+ ps_grid_cu_base += (ps_mv_grid->i4_start_offset + cu_start_x);
+ ps_grid_cu_base += (grid_stride * cu_start_y);
+
+ /* points to the real bottom left of the grid, will never be valid */
+ ps_invalid = &ps_mv_grid->as_node[0];
+ ps_invalid += (grid_stride * 17);
+
+ {
+ S32 shift = 1 + e_cu_size;
+ cu_tr_valid = gau1_cu_tr_valid[cu_start_y >> shift][cu_start_x >> shift];
+ cu_bl_valid = gau1_cu_bl_valid[cu_start_y >> shift][cu_start_x >> shift];
+ }
+
+ /*************************************************************************/
+ /* for the case of encode, the idea is to set up cants as follows */
+ /* */
+ /* ____ ______________ ____ ____ */
+ /* | T0 | T1 | T2 | T3 | T4 | T5 | */
+ /* |____|____|____|____|____|____| */
+ /* | L1 | | | */
+ /* |____| | | */
+ /* | L2 | p0 | p1 | */
+ /* |____| | | */
+ /* | L3 | | | */
+ /* |____| | | */
+ /* | L4 | L' | | */
+ /* |____|____|______________| */
+ /* | BL | */
+ /* |____| */
+ /* The example is shown with 16x16 CU, though it can be generalized */
+ /* This CU has 2 partitions, cu_wd = 4. also p_wd, p_ht are partition */
+ /* width and ht in 4x4 units. */
+ /* For a given CU, derive the top left, top and bottom left and top rt */
+ /* pts. Left and top are assumed to be valid. */
+ /* IF there aretwo partitions in the CU (like p0 and p1) and vertical, */
+ /* then for first partition, left, top, top left and top right valid */
+ /* Bottom left is valid. store these validity flags. Also store the */
+ /* grid offsets of the partitions w.r.t. CU start in units of 4x4.For p0*/
+ /* Left grid offset = -1, 3. Top Grd offset = -1, 0. */
+ /* Top left grid offset = -1, -1. Top right = 1, -1. BL = -1, 4. */
+ /* For p1, validity flags are left, top, top left, top right, valid. */
+ /* BL is invalid. Grid offsets are: Left = dont care. T = 1, -1 (T2) */
+ /* TR = 4, -1 (T5). TL = 0, -1 (T1). BL = don't care. */
+ /* For p1, set the left pred candt to the best search result of p0. */
+ /*************************************************************************/
+
+ /* Loop over all partitions, and identify the 5 neighbours */
+ for(part_id = 0; part_id < TOT_NUM_PARTS; part_id++)
+ {
+ part_attr_t *ps_part_attr = &gas_part_attr_in_cu[part_id];
+ S32 tr_valid, bl_valid, is_vert;
+ search_node_t *ps_grid_pu_base;
+ PART_TYPE_T e_part_type;
+ PART_ID_T first_part;
+ S32 part_num;
+
+ e_part_type = ge_part_id_to_part_type[part_id];
+ first_part = ge_part_type_to_part_id[e_part_type][0];
+ is_vert = gau1_is_vert_part[e_part_type];
+ part_num = gau1_part_id_to_part_num[part_id];
+ tr_valid = gau1_partid_tr_valid[part_id] & cu_tr_valid;
+ bl_valid = gau1_partid_bl_valid[part_id] & cu_bl_valid;
+
+ part_start_x = (ps_part_attr->u1_x_start << shift) >> 2;
+ part_start_y = (ps_part_attr->u1_y_start << shift) >> 2;
+ part_wd = (ps_part_attr->u1_x_count << shift) >> 2;
+ part_ht = (ps_part_attr->u1_y_count << shift) >> 2;
+
+ /* go to top left of part */
+ ps_grid_pu_base = ps_grid_cu_base + part_start_x;
+ ps_grid_pu_base += (part_start_y * grid_stride);
+
+ ps_tl = ps_grid_pu_base - 1 - grid_stride;
+ ps_t = ps_grid_pu_base - grid_stride + part_wd - 1;
+ ps_l = ps_grid_pu_base - 1 + ((part_ht - 1) * grid_stride);
+ ps_tr = ps_t + 1;
+ ps_bl = ps_l + grid_stride;
+
+ if(!tr_valid)
+ ps_tr = ps_invalid;
+ if(!bl_valid)
+ ps_bl = ps_invalid;
+
+ if(part_num == 1)
+ {
+ /* for cases of two partitions 2nd part has 1st part as candt */
+ /* if vertical type, left candt of 2nd part is 1st part. */
+ /* if horz type, top candt of 2nd part is 1st part. */
+ if(is_vert)
+ {
+ ps_l = ps_search_results->aps_part_results[pred_lx][first_part];
+ }
+ else
+ {
+ ps_t = ps_search_results->aps_part_results[pred_lx][first_part];
+ }
+ }
+ if(part_num == 2)
+ {
+ /* only possible for NxN_BL */
+ ps_t = ps_search_results->aps_part_results[pred_lx][PART_ID_NxN_TL];
+ ps_tr = ps_search_results->aps_part_results[pred_lx][PART_ID_NxN_TR];
+ }
+ if(part_num == 3)
+ {
+ /* only possible for NxN_BR */
+ ps_t = ps_search_results->aps_part_results[pred_lx][PART_ID_NxN_TR];
+ ps_tl = ps_search_results->aps_part_results[pred_lx][PART_ID_NxN_TL];
+ ps_l = ps_search_results->aps_part_results[pred_lx][PART_ID_NxN_BL];
+ }
+ hme_init_pred_part(
+ ps_pred_ctxt,
+ ps_tl,
+ ps_t,
+ ps_tr,
+ ps_l,
+ ps_bl,
+ ps_coloc,
+ ps_zeromv_candt,
+ NULL,
+ (PART_ID_T)part_id);
+ }
+}
+
+/**
+********************************************************************************
+* @fn compute_mv_cost_explicit(search_node_t *ps_node,
+* pred_ctxt_t *ps_pred_ctxt,
+* PART_ID_T e_part_id)
+*
+* @brief MV cost for explicit search in layers not encoded
+*
+* @param[in] ps_node: search node having mv and ref id for which to eval cost
+*
+* @param[in] ps_pred_ctxt : mv pred context
+*
+* @param[in] e_part_id : Partition id.
+*
+* @return Cost value
+
+********************************************************************************
+*/
+S32 compute_mv_cost_explicit(
+ search_node_t *ps_node, pred_ctxt_t *ps_pred_ctxt, PART_ID_T e_part_id, S32 inp_mv_pel)
+{
+#define RETURN_FIXED_COST 0
+ search_node_t *ps_pred_node_a = NULL, *ps_pred_node_b = NULL;
+ pred_candt_nodes_t *ps_pred_nodes;
+ S32 inp_shift = 2 - inp_mv_pel;
+ S32 pred_shift = 2 - ps_pred_ctxt->mv_pel;
+ S32 mv_p_x, mv_p_y;
+ S16 mvdx1, mvdx2, mvdy1, mvdy2;
+ S32 cost, ref_bits;
+
+ /*************************************************************************/
+ /* Logic for cost computation for explicit search. For such a search, */
+ /* it is guaranteed that all predictor candts have same ref id. The only */
+ /* probable issue is with the availability which needs checking. This fxn*/
+ /* does not suffer the need to scale predictor candts due to diff ref id */
+ /*************************************************************************/
+
+ /* Hack: currently we always assume 2Nx2N. */
+ /* TODO: get rid of this hack and return cost tuned to each partition */
+ ps_pred_nodes = &ps_pred_ctxt->as_pred_nodes[e_part_id];
+ ref_bits = ps_pred_ctxt->ppu1_ref_bits_tlu[ps_pred_ctxt->pred_lx][ps_node->i1_ref_idx];
+
+ /*************************************************************************/
+ /* Priority to bottom left availability. Else we go to left. If both are */
+ /* not available, then a remains null */
+ /*************************************************************************/
+ if(ps_pred_nodes->ps_tl->u1_is_avail)
+ ps_pred_node_a = ps_pred_nodes->ps_tl;
+ else if(ps_pred_nodes->ps_l->u1_is_avail)
+ ps_pred_node_a = ps_pred_nodes->ps_l;
+
+ /*************************************************************************/
+ /* For encoder, top left may not be really needed unless we use slices, */
+ /* and even then in ME it may not be relevant. So we only consider T or */
+ /* TR, as, if both T and TR are not available, TL also will not be */
+ /*************************************************************************/
+ if(ps_pred_nodes->ps_tr->u1_is_avail)
+ ps_pred_node_b = ps_pred_nodes->ps_tr;
+ else if(ps_pred_nodes->ps_t->u1_is_avail)
+ ps_pred_node_b = ps_pred_nodes->ps_t;
+
+ if(ps_pred_node_a == NULL)
+ {
+ ps_pred_node_a = ps_pred_nodes->ps_coloc;
+ if(ps_pred_node_b == NULL)
+ ps_pred_node_b = ps_pred_nodes->ps_zeromv;
+ }
+ else if(ps_pred_node_b == NULL)
+ ps_pred_node_b = ps_pred_nodes->ps_coloc;
+ else if(0 == hme_cmp_nodes(ps_pred_node_a, ps_pred_node_b))
+ {
+ ps_pred_node_b = ps_pred_nodes->ps_coloc;
+ }
+
+ mv_p_x = ps_pred_node_a->s_mv.i2_mvx;
+ mv_p_y = ps_pred_node_a->s_mv.i2_mvy;
+ COMPUTE_DIFF_MV(mvdx1, mvdy1, ps_node, mv_p_x, mv_p_y, inp_shift, pred_shift);
+ mvdx1 = ABS(mvdx1);
+ mvdy1 = ABS(mvdy1);
+
+ mv_p_x = ps_pred_node_b->s_mv.i2_mvx;
+ mv_p_y = ps_pred_node_b->s_mv.i2_mvy;
+ COMPUTE_DIFF_MV(mvdx2, mvdy2, ps_node, mv_p_x, mv_p_y, inp_shift, pred_shift);
+ mvdx2 = ABS(mvdx2);
+ mvdy2 = ABS(mvdy2);
+
+ if((mvdx1 + mvdy1) < (mvdx2 + mvdy2))
+ {
+ cost =
+ hme_get_range(mvdx1) + hme_get_range(mvdy1) + (mvdx1 > 0) + (mvdy1 > 0) + ref_bits + 2;
+ }
+ else
+ {
+ cost =
+ hme_get_range(mvdx2) + hme_get_range(mvdy2) + (mvdx2 > 0) + (mvdy2 > 0) + ref_bits + 2;
+ }
+ {
+ S32 rnd = 1 << (ps_pred_ctxt->lambda_q_shift - 1);
+ return ((cost * ps_pred_ctxt->lambda + rnd) >> ps_pred_ctxt->lambda_q_shift);
+ }
+}
+/**
+********************************************************************************
+* @fn compute_mv_cost_coarse(search_node_t *ps_node,
+* pred_ctxt_t *ps_pred_ctxt,
+* PART_ID_T e_part_id)
+*
+* @brief MV cost for coarse explicit search in coarsest layer
+*
+* @param[in] ps_node: search node having mv and ref id for which to eval cost
+*
+* @param[in] ps_pred_ctxt : mv pred context
+*
+* @param[in] e_part_id : Partition id.
+*
+* @return Cost value
+
+********************************************************************************
+*/
+S32 compute_mv_cost_coarse(
+ search_node_t *ps_node, pred_ctxt_t *ps_pred_ctxt, PART_ID_T e_part_id, S32 inp_mv_pel)
+{
+ ARG_NOT_USED(e_part_id);
+
+ return (compute_mv_cost_explicit(ps_node, ps_pred_ctxt, PART_ID_2Nx2N, inp_mv_pel));
+}
+
+/**
+********************************************************************************
+* @fn compute_mv_cost_coarse_high_speed(search_node_t *ps_node,
+* pred_ctxt_t *ps_pred_ctxt,
+* PART_ID_T e_part_id)
+*
+* @brief MV cost for coarse explicit search in coarsest layer
+*
+* @param[in] ps_node: search node having mv and ref id for which to eval cost
+*
+* @param[in] ps_pred_ctxt : mv pred context
+*
+* @param[in] e_part_id : Partition id.
+*
+* @return Cost value
+
+********************************************************************************
+*/
+S32 compute_mv_cost_coarse_high_speed(
+ search_node_t *ps_node, pred_ctxt_t *ps_pred_ctxt, PART_ID_T e_part_id, S32 inp_mv_pel)
+{
+ S32 rnd, mvx, mvy, i4_search_idx;
+ S32 cost;
+
+ mvx = ps_node->s_mv.i2_mvx;
+ mvy = ps_node->s_mv.i2_mvy;
+ i4_search_idx = ps_node->i1_ref_idx;
+
+ cost = (2 * hme_get_range(ABS(mvx)) - 1) + (2 * hme_get_range(ABS(mvy)) - 1) + i4_search_idx;
+ cost += (mvx != 0) ? 1 : 0;
+ cost += (mvy != 0) ? 1 : 0;
+ rnd = 1 << (ps_pred_ctxt->lambda_q_shift - 1);
+ cost = (cost * ps_pred_ctxt->lambda + rnd) >> ps_pred_ctxt->lambda_q_shift;
+ return cost;
+}
+
+/**
+********************************************************************************
+* @fn compute_mv_cost_explicit_refine(search_node_t *ps_node,
+* pred_ctxt_t *ps_pred_ctxt,
+* PART_ID_T e_part_id)
+*
+* @brief MV cost for explicit search in layers not encoded. Always returns
+* cost of the projected colocated candidate
+*
+* @param[in] ps_node: search node having mv and ref id for which to eval cost
+*
+* @param[in] ps_pred_ctxt : mv pred context
+*
+* @param[in] e_part_id : Partition id.
+*
+* @return Cost value
+
+********************************************************************************
+*/
+S32 compute_mv_cost_explicit_refine(
+ search_node_t *ps_node, pred_ctxt_t *ps_pred_ctxt, PART_ID_T e_part_id, S32 inp_mv_pel)
+{
+ search_node_t *ps_pred_node_a = NULL;
+ pred_candt_nodes_t *ps_pred_nodes;
+ S32 inp_shift = 2 - inp_mv_pel;
+ S32 pred_shift = 2 - ps_pred_ctxt->mv_pel;
+ S32 mv_p_x, mv_p_y;
+ S16 mvdx1, mvdy1;
+ S32 cost, ref_bits;
+
+ ps_pred_nodes = &ps_pred_ctxt->as_pred_nodes[e_part_id];
+ ref_bits = ps_pred_ctxt->ppu1_ref_bits_tlu[ps_pred_ctxt->pred_lx][ps_node->i1_ref_idx];
+
+ ps_pred_node_a = ps_pred_nodes->pps_proj_coloc[0];
+
+ mv_p_x = ps_pred_node_a->s_mv.i2_mvx;
+ mv_p_y = ps_pred_node_a->s_mv.i2_mvy;
+ COMPUTE_DIFF_MV(mvdx1, mvdy1, ps_node, mv_p_x, mv_p_y, inp_shift, pred_shift);
+ mvdx1 = ABS(mvdx1);
+ mvdy1 = ABS(mvdy1);
+
+ cost = hme_get_range(mvdx1) + hme_get_range(mvdy1) + (mvdx1 > 0) + (mvdy1 > 0) + ref_bits + 2;
+
+ {
+ S32 rnd = 1 << (ps_pred_ctxt->lambda_q_shift - 1);
+ return ((cost * ps_pred_ctxt->lambda + rnd) >> ps_pred_ctxt->lambda_q_shift);
+ }
+}
+
+/**
+********************************************************************************
+* @fn compute_mv_cost_refine(search_node_t *ps_node,
+* pred_ctxt_t *ps_pred_ctxt,
+* PART_ID_T e_part_id)
+*
+* @brief MV cost for coarse explicit search in coarsest layer
+*
+* @param[in] ps_node: search node having mv and ref id for which to eval cost
+*
+* @param[in] ps_pred_ctxt : mv pred context
+*
+* @param[in] e_part_id : Partition id.
+*
+* @return Cost value
+
+********************************************************************************
+*/
+S32 compute_mv_cost_refine(
+ search_node_t *ps_node, pred_ctxt_t *ps_pred_ctxt, PART_ID_T e_part_id, S32 inp_mv_pel)
+{
+ return (compute_mv_cost_explicit_refine(ps_node, ps_pred_ctxt, e_part_id, inp_mv_pel));
+}
+
+S32 compute_mv_cost_implicit(
+ search_node_t *ps_node, pred_ctxt_t *ps_pred_ctxt, PART_ID_T e_part_id, S32 inp_mv_pel)
+{
+ search_node_t *ps_pred_node_a = NULL, *ps_pred_node_b = NULL;
+ pred_candt_nodes_t *ps_pred_nodes;
+ S08 i1_ref_idx;
+ S08 i1_ref_tl = -1, i1_ref_tr = -1, i1_ref_t = -1;
+ S08 i1_ref_bl = -1, i1_ref_l = -1;
+ S32 inp_shift = 2 - inp_mv_pel;
+ S32 pred_shift; /* = 2 - ps_pred_ctxt->mv_pel;*/
+ S32 ref_bits, cost;
+ S32 mv_p_x, mv_p_y;
+ S16 mvdx1, mvdx2, mvdy1, mvdy2;
+
+ //return 0;
+ i1_ref_idx = ps_node->i1_ref_idx;
+
+ /*************************************************************************/
+ /* Logic for cost computation for explicit search. For such a search, */
+ /* it is guaranteed that all predictor candts have same ref id. The only */
+ /* probable issue is with the availability which needs checking. This fxn*/
+ /* does not suffer the need to scale predictor candts due to diff ref id */
+ /*************************************************************************/
+
+ ps_pred_nodes = &ps_pred_ctxt->as_pred_nodes[e_part_id];
+ ref_bits = ps_pred_ctxt->ppu1_ref_bits_tlu[ps_pred_ctxt->pred_lx][i1_ref_idx];
+
+ /*************************************************************************/
+ /* Priority to bottom left availability. Else we go to left. If both are */
+ /* not available, then a remains null */
+ /*************************************************************************/
+ if(ps_pred_nodes->ps_bl->u1_is_avail)
+ i1_ref_bl = ps_pred_nodes->ps_bl->i1_ref_idx;
+ if(ps_pred_nodes->ps_l->u1_is_avail)
+ i1_ref_l = ps_pred_nodes->ps_l->i1_ref_idx;
+ if(i1_ref_bl == i1_ref_idx)
+ ps_pred_node_a = ps_pred_nodes->ps_bl;
+ else if(i1_ref_l == i1_ref_idx)
+ ps_pred_node_a = ps_pred_nodes->ps_l;
+ if(ps_pred_node_a == NULL)
+ {
+ if(i1_ref_bl != -1)
+ ps_pred_node_a = ps_pred_nodes->ps_bl;
+ else if(i1_ref_l != -1)
+ ps_pred_node_a = ps_pred_nodes->ps_l;
+ }
+
+ /*************************************************************************/
+ /* For encoder, top left may not be really needed unless we use slices, */
+ /* and even then in ME it may not be relevant. So we only consider T or */
+ /* TR, as, if both T and TR are not available, TL also will not be */
+ /*************************************************************************/
+ if(ps_pred_nodes->ps_tr->u1_is_avail)
+ i1_ref_tr = ps_pred_nodes->ps_tr->i1_ref_idx;
+ if(ps_pred_nodes->ps_t->u1_is_avail)
+ i1_ref_t = ps_pred_nodes->ps_t->i1_ref_idx;
+ if(ps_pred_nodes->ps_tl->u1_is_avail)
+ i1_ref_tl = ps_pred_nodes->ps_tl->i1_ref_idx;
+ if(i1_ref_tr == i1_ref_idx)
+ ps_pred_node_b = ps_pred_nodes->ps_tr;
+ else if(i1_ref_t == i1_ref_idx)
+ ps_pred_node_b = ps_pred_nodes->ps_t;
+ else if(i1_ref_tl == i1_ref_idx)
+ ps_pred_node_b = ps_pred_nodes->ps_tl;
+
+ if(ps_pred_node_b == NULL)
+ {
+ if(i1_ref_tr != -1)
+ ps_pred_node_b = ps_pred_nodes->ps_tr;
+ else if(i1_ref_t != -1)
+ ps_pred_node_b = ps_pred_nodes->ps_t;
+ else if(i1_ref_tl != -1)
+ ps_pred_node_b = ps_pred_nodes->ps_tl;
+ }
+ if(ps_pred_node_a == NULL)
+ {
+ ps_pred_node_a = ps_pred_nodes->ps_coloc;
+ if(ps_pred_node_b == NULL)
+ ps_pred_node_b = ps_pred_nodes->ps_zeromv;
+ }
+ else if(ps_pred_node_b == NULL)
+ ps_pred_node_b = ps_pred_nodes->ps_coloc;
+ else if(0 == hme_cmp_nodes(ps_pred_node_a, ps_pred_node_b))
+ {
+ ps_pred_node_b = ps_pred_nodes->ps_coloc;
+ }
+
+ if(ps_pred_node_a->i1_ref_idx != i1_ref_idx)
+ {
+ SCALE_FOR_POC_DELTA(mv_p_x, mv_p_y, ps_pred_node_a, i1_ref_idx, ps_pred_ctxt->pi2_ref_scf);
+ }
+ else
+ {
+ mv_p_x = ps_pred_node_a->s_mv.i2_mvx;
+ mv_p_y = ps_pred_node_a->s_mv.i2_mvy;
+ }
+ pred_shift = ps_pred_node_a->u1_subpel_done ? 0 : 2;
+ COMPUTE_DIFF_MV(mvdx1, mvdy1, ps_node, mv_p_x, mv_p_y, inp_shift, pred_shift);
+ mvdx1 = ABS(mvdx1);
+ mvdy1 = ABS(mvdy1);
+
+ if(ps_pred_node_b->i1_ref_idx != i1_ref_idx)
+ {
+ SCALE_FOR_POC_DELTA(mv_p_x, mv_p_y, ps_pred_node_b, i1_ref_idx, ps_pred_ctxt->pi2_ref_scf);
+ }
+ else
+ {
+ mv_p_x = ps_pred_node_b->s_mv.i2_mvx;
+ mv_p_y = ps_pred_node_b->s_mv.i2_mvy;
+ }
+ pred_shift = ps_pred_node_b->u1_subpel_done ? 0 : 2;
+ COMPUTE_DIFF_MV(mvdx2, mvdy2, ps_node, mv_p_x, mv_p_y, inp_shift, pred_shift);
+ mvdx2 = ABS(mvdx2);
+ mvdy2 = ABS(mvdy2);
+
+ if((mvdx1 + mvdy1) < (mvdx2 + mvdy2))
+ {
+ cost = 2 * hme_get_range(mvdx1) + 2 * hme_get_range(mvdy1) + 2 * (mvdx1 > 0) +
+ 2 * (mvdy1 > 0) + ref_bits + 2;
+ }
+ else
+ {
+ cost = 2 * hme_get_range(mvdx2) + 2 * hme_get_range(mvdy2) + 2 * (mvdx2 > 0) +
+ 2 * (mvdy2 > 0) + ref_bits + 2;
+ }
+ {
+ /* Part bits in Q1, so evaluate cost as ((mv_cost<<1) + partbitsQ1 + rnd)>>(q+1)*/
+ S32 rnd = 1 << (ps_pred_ctxt->lambda_q_shift);
+ S32 tot_cost = (cost * ps_pred_ctxt->lambda) << 1;
+
+ tot_cost += (gau1_bits_for_part_id_q1[e_part_id] * ps_pred_ctxt->lambda);
+ return ((tot_cost + rnd) >> (ps_pred_ctxt->lambda_q_shift + 1));
+ }
+}
+
+S32 compute_mv_cost_implicit_high_speed(
+ search_node_t *ps_node, pred_ctxt_t *ps_pred_ctxt, PART_ID_T e_part_id, S32 inp_mv_pel)
+{
+ search_node_t *ps_pred_node_a = NULL, *ps_pred_node_b = NULL;
+ pred_candt_nodes_t *ps_pred_nodes;
+ S08 i1_ref_idx;
+ S08 i1_ref_tr = -1;
+ S08 i1_ref_l = -1;
+ S32 inp_shift = 2 - inp_mv_pel;
+ S32 pred_shift; /* = 2 - ps_pred_ctxt->mv_pel; */
+ S32 ref_bits, cost;
+ S32 mv_p_x, mv_p_y;
+ S16 mvdx1, mvdx2, mvdy1, mvdy2;
+
+ i1_ref_idx = ps_node->i1_ref_idx;
+
+ ps_pred_nodes = &ps_pred_ctxt->as_pred_nodes[e_part_id];
+ ref_bits = ps_pred_ctxt->ppu1_ref_bits_tlu[ps_pred_ctxt->pred_lx][i1_ref_idx];
+
+ /*************************************************************************/
+ /* Priority to bottom left availability. Else we go to left. If both are */
+ /* not available, then a remains null */
+ /*************************************************************************/
+ if(ps_pred_nodes->ps_l->u1_is_avail)
+ {
+ i1_ref_l = ps_pred_nodes->ps_l->i1_ref_idx;
+ ps_pred_node_a = ps_pred_nodes->ps_l;
+ }
+
+ /*************************************************************************/
+ /* For encoder, top left may not be really needed unless we use slices, */
+ /* and even then in ME it may not be relevant. So we only consider T or */
+ /* TR, as, if both T and TR are not available, TL also will not be */
+ /*************************************************************************/
+
+ if((!(ps_pred_ctxt->proj_used) && (ps_pred_nodes->ps_tr->u1_is_avail)))
+ {
+ i1_ref_tr = ps_pred_nodes->ps_tr->i1_ref_idx;
+ ps_pred_node_b = ps_pred_nodes->ps_tr;
+ }
+ else
+ {
+ ps_pred_node_b = ps_pred_nodes->ps_coloc;
+ }
+
+ if(ps_pred_node_a == NULL)
+ {
+ ps_pred_node_a = ps_pred_nodes->ps_coloc;
+
+ if(ps_pred_node_b == ps_pred_nodes->ps_coloc)
+ ps_pred_node_b = ps_pred_nodes->ps_zeromv;
+ }
+
+ if(ps_pred_node_a->i1_ref_idx != i1_ref_idx)
+ {
+ SCALE_FOR_POC_DELTA(mv_p_x, mv_p_y, ps_pred_node_a, i1_ref_idx, ps_pred_ctxt->pi2_ref_scf);
+ }
+ else
+ {
+ mv_p_x = ps_pred_node_a->s_mv.i2_mvx;
+ mv_p_y = ps_pred_node_a->s_mv.i2_mvy;
+ }
+
+ pred_shift = ps_pred_node_a->u1_subpel_done ? 0 : 2;
+ COMPUTE_DIFF_MV(mvdx1, mvdy1, ps_node, mv_p_x, mv_p_y, inp_shift, pred_shift);
+ mvdx1 = ABS(mvdx1);
+ mvdy1 = ABS(mvdy1);
+
+ if(ps_pred_node_b->i1_ref_idx != i1_ref_idx)
+ {
+ SCALE_FOR_POC_DELTA(mv_p_x, mv_p_y, ps_pred_node_b, i1_ref_idx, ps_pred_ctxt->pi2_ref_scf);
+ }
+ else
+ {
+ mv_p_x = ps_pred_node_b->s_mv.i2_mvx;
+ mv_p_y = ps_pred_node_b->s_mv.i2_mvy;
+ }
+
+ pred_shift = ps_pred_node_b->u1_subpel_done ? 0 : 2;
+ COMPUTE_DIFF_MV(mvdx2, mvdy2, ps_node, mv_p_x, mv_p_y, inp_shift, pred_shift);
+ mvdx2 = ABS(mvdx2);
+ mvdy2 = ABS(mvdy2);
+
+ if((mvdx1 + mvdy1) < (mvdx2 + mvdy2))
+ {
+ cost =
+ hme_get_range(mvdx1) + hme_get_range(mvdy1) + (mvdx1 > 0) + (mvdy1 > 0) + ref_bits + 2;
+ }
+ else
+ {
+ cost =
+ hme_get_range(mvdx2) + hme_get_range(mvdy2) + (mvdx2 > 0) + (mvdy2 > 0) + ref_bits + 2;
+ }
+ {
+ /* Part bits in Q1, so evaluate cost as ((mv_cost<<1) + partbitsQ1 + rnd)>>(q+1)*/
+ S32 rnd = 1 << (ps_pred_ctxt->lambda_q_shift - 1);
+ S32 tot_cost = (cost * ps_pred_ctxt->lambda);
+
+ return ((tot_cost + rnd) >> (ps_pred_ctxt->lambda_q_shift));
+ }
+}
+
+S32 compute_mv_cost_implicit_high_speed_modified(
+ search_node_t *ps_node, pred_ctxt_t *ps_pred_ctxt, PART_ID_T e_part_id, S32 inp_mv_pel)
+{
+ search_node_t *ps_pred_node_a = NULL;
+ pred_candt_nodes_t *ps_pred_nodes;
+ S32 inp_shift = 2 - inp_mv_pel;
+ S32 pred_shift; /* = 2 - ps_pred_ctxt->mv_pel; */
+ S32 mv_p_x, mv_p_y;
+ S16 mvdx1, mvdy1;
+ S32 cost, ref_bits;
+
+ ps_pred_nodes = &ps_pred_ctxt->as_pred_nodes[e_part_id];
+ ref_bits = ps_pred_ctxt->ppu1_ref_bits_tlu[ps_pred_ctxt->pred_lx][ps_node->i1_ref_idx];
+
+ ps_pred_node_a = ps_pred_nodes->ps_mvp_node;
+
+ mv_p_x = ps_pred_node_a->s_mv.i2_mvx;
+ mv_p_y = ps_pred_node_a->s_mv.i2_mvy;
+ pred_shift = ps_pred_node_a->u1_subpel_done ? 0 : 2;
+ COMPUTE_DIFF_MV(mvdx1, mvdy1, ps_node, mv_p_x, mv_p_y, inp_shift, pred_shift);
+ mvdx1 = ABS(mvdx1);
+ mvdy1 = ABS(mvdy1);
+
+ cost = hme_get_range(mvdx1) + hme_get_range(mvdy1) + (mvdx1 > 0) + (mvdy1 > 0) + ref_bits + 2;
+
+ {
+ S32 rnd = 1 << (ps_pred_ctxt->lambda_q_shift - 1);
+ return ((cost * ps_pred_ctxt->lambda + rnd) >> ps_pred_ctxt->lambda_q_shift);
+ }
+}
+
+void hme_update_results_grid_pu_bestn_xtreme_speed(result_upd_prms_t *ps_result_prms)
+{
+ /*The function modified with assumption that only 2NxN_B and Nx2N_R is modified */
+
+ search_node_t s_search_node_grid;
+ const search_node_t *ps_search_node_base;
+ search_node_t *ps_search_node_grid, *ps_best_node;
+ S32 i4_min_cost = (MAX_32BIT_VAL), i4_search_idx;
+ S32 num_results, i4_unique_id = -1, i4_grid_pt;
+ search_results_t *ps_search_results;
+ S32 *pi4_valid_part_ids;
+ S32 i4_step = ps_result_prms->i4_step;
+ S32 i4_grid_mask, i, i4_min_id;
+ S32 i4_tot_cost, i4_mv_cost, i4_sad, id;
+ S32 *pi4_sad_grid = ps_result_prms->pi4_sad_grid;
+ S32 grid_count = 0;
+ S32 pred_lx;
+
+ i4_min_id = (S32)PT_C;
+ i4_min_cost = MAX_32BIT_VAL;
+ ps_search_node_grid = &s_search_node_grid;
+ ps_search_node_base = ps_result_prms->ps_search_node_base;
+ *ps_search_node_grid = *ps_search_node_base;
+ pi4_valid_part_ids = ps_result_prms->pi4_valid_part_ids;
+ ps_search_results = ps_result_prms->ps_search_results;
+ num_results = (S32)ps_search_results->u1_num_results_per_part;
+ i4_grid_mask = ps_result_prms->i4_grid_mask;
+
+ for(i = 0; i < 9; i++)
+ {
+ if(i4_grid_mask & (1 << i))
+ grid_count++;
+ }
+
+ /* Some basic assumptions: only single pt, only part updates */
+ /* and more than 1 best result to be computed. */
+ //ASSERT(ps_result_prms->i4_grid_mask != 1);
+ //ASSERT(ps_result_prms->i4_part_mask != ENABLE_2Nx2N);
+ //ASSERT(ps_search_results->num_results > 1);
+
+ i4_search_idx = (S32)ps_result_prms->i1_ref_idx;
+ pred_lx = 1 - ps_search_results->pu1_is_past[i4_search_idx];
+
+ /*************************************************************************/
+ /* Supposing we do hte result update for a unique partid, we can */
+ /* store the best pt id in the grid and also min cost is return */
+ /* param. This will be useful for early exit cases. */
+ /* TODO : once we have separate fxn for unique part+grid, we can */
+ /* do away with this code here */
+ /*************************************************************************/
+ //if (pi4_valid_part_ids[1] == -1)
+ i4_unique_id = pi4_valid_part_ids[0];
+
+ /* pi4_valid_part_ids contains all the valid ids. We loop through */
+ /* this till we encounter -1. This is easier than having to */
+ /* figure out part by part, besides, active part decision is */
+ /* usually fixed for a given duration of search, e.g. entire fpel */
+ /* refinement for a blk/cu will use fixed valid part mask */
+ id = pi4_valid_part_ids[0];
+
+ /*****************************************************************/
+ /* points to the best search results corresponding to this */
+ /* specific part type. */
+ /*****************************************************************/
+ ps_best_node = ps_search_results->aps_part_results[i4_search_idx][id];
+
+ /*************************************************************************/
+ /* Outer loop runs through all active pts in the grid */
+ /*************************************************************************/
+ for(i4_grid_pt = 0; i4_grid_pt < (S32)NUM_GRID_PTS; i4_grid_pt++)
+ {
+ if(!(i4_grid_mask & (1 << i4_grid_pt)))
+ continue;
+
+ /* For the pt in the grid, update mvx and y depending on */
+ /* location of pt. Updates are in FPEL units. */
+ ps_search_node_grid->s_mv.i2_mvx = ps_search_node_base->s_mv.i2_mvx;
+ ps_search_node_grid->s_mv.i2_mvy = ps_search_node_base->s_mv.i2_mvy;
+ ps_search_node_grid->s_mv.i2_mvx += (S16)(i4_step * gai1_grid_id_to_x[i4_grid_pt]);
+ ps_search_node_grid->s_mv.i2_mvy += (S16)(i4_step * gai1_grid_id_to_y[i4_grid_pt]);
+
+ {
+ /* evaluate mv cost and totalcost for this part for this given mv*/
+ i4_mv_cost = compute_mv_cost_coarse_high_speed(
+ ps_search_node_grid,
+ &ps_search_results->as_pred_ctxt[pred_lx],
+ (PART_ID_T)id,
+ MV_RES_FPEL);
+
+ i4_sad = pi4_sad_grid[grid_count * id];
+ i4_tot_cost = i4_sad + i4_mv_cost;
+
+ ASSERT(i4_unique_id == id);
+ ASSERT(num_results == 1);
+
+ /*****************************************************************/
+ /* We do not labor through the results if the total cost worse */
+ /* than the last of the results. */
+ /*****************************************************************/
+ if(i4_tot_cost < ps_best_node[num_results - 1].i4_tot_cost)
+ {
+ i4_min_id = i4_grid_pt;
+ ps_result_prms->i4_min_cost = i4_tot_cost;
+
+ ps_best_node[0] = *ps_search_node_grid;
+ ps_best_node[0].i4_sad = i4_sad;
+ ps_best_node[0].i4_mv_cost = i4_mv_cost;
+ ps_best_node[0].i4_tot_cost = i4_tot_cost;
+ }
+ }
+ pi4_sad_grid++;
+ }
+ ps_result_prms->i4_min_id = i4_min_id;
+}
+
+void hme_update_results_grid_pu_bestn(result_upd_prms_t *ps_result_prms)
+{
+ search_node_t s_search_node_grid;
+ const search_node_t *ps_search_node_base;
+ search_node_t *ps_search_node_grid, *ps_best_node;
+ S32 i4_min_cost = (MAX_32BIT_VAL), i4_search_idx;
+ S32 num_results, i4_unique_id = -1, i4_grid_pt;
+ search_results_t *ps_search_results;
+ S32 *pi4_valid_part_ids;
+ S32 i4_step = ps_result_prms->i4_step;
+ S32 i4_grid_mask, i4_count, i, i4_min_id;
+ S32 i4_tot_cost, i4_mv_cost, i4_sad, id;
+ S32 *pi4_sad_grid = ps_result_prms->pi4_sad_grid;
+ S32 grid_count = 0;
+ S32 pred_lx;
+
+ i4_min_id = (S32)PT_C;
+ i4_min_cost = MAX_32BIT_VAL;
+ ps_search_node_grid = &s_search_node_grid;
+ ps_search_node_base = ps_result_prms->ps_search_node_base;
+ *ps_search_node_grid = *ps_search_node_base;
+ pi4_valid_part_ids = ps_result_prms->pi4_valid_part_ids;
+ ps_search_results = ps_result_prms->ps_search_results;
+ num_results = (S32)ps_search_results->u1_num_results_per_part;
+ i4_grid_mask = ps_result_prms->i4_grid_mask;
+
+ for(i = 0; i < 9; i++)
+ {
+ if(i4_grid_mask & (1 << i))
+ {
+ grid_count++;
+ }
+ }
+
+ i4_search_idx = (S32)ps_result_prms->i1_ref_idx;
+ pred_lx = 1 - ps_search_results->pu1_is_past[i4_search_idx];
+
+ i4_unique_id = pi4_valid_part_ids[0];
+
+ /*************************************************************************/
+ /* Outer loop runs through all active pts in the grid */
+ /*************************************************************************/
+ for(i4_grid_pt = 0; i4_grid_pt < (S32)NUM_GRID_PTS; i4_grid_pt++)
+ {
+ if(!(i4_grid_mask & (1 << i4_grid_pt)))
+ {
+ continue;
+ }
+
+ /* For the pt in the grid, update mvx and y depending on */
+ /* location of pt. Updates are in FPEL units. */
+ ps_search_node_grid->s_mv.i2_mvx = ps_search_node_base->s_mv.i2_mvx;
+ ps_search_node_grid->s_mv.i2_mvy = ps_search_node_base->s_mv.i2_mvy;
+ ps_search_node_grid->s_mv.i2_mvx += (S16)(i4_step * gai1_grid_id_to_x[i4_grid_pt]);
+ ps_search_node_grid->s_mv.i2_mvy += (S16)(i4_step * gai1_grid_id_to_y[i4_grid_pt]);
+
+ i4_count = 0;
+
+ while((id = pi4_valid_part_ids[i4_count]) >= 0)
+ {
+ /*****************************************************************/
+ /* points to the best search results corresponding to this */
+ /* specific part type. */
+ /*****************************************************************/
+ ps_best_node = ps_search_results->aps_part_results[i4_search_idx][id];
+
+ /* evaluate mv cost and totalcost for this part for this given mv*/
+ i4_mv_cost = ps_result_prms->pf_mv_cost_compute(
+ ps_search_node_grid,
+ &ps_search_results->as_pred_ctxt[pred_lx],
+ (PART_ID_T)id,
+ MV_RES_FPEL);
+
+ i4_sad = pi4_sad_grid[grid_count * id];
+ i4_tot_cost = i4_sad + i4_mv_cost;
+
+ if(i4_unique_id == id)
+ {
+ if(i4_tot_cost < ps_result_prms->i4_min_cost)
+ {
+ i4_min_id = i4_grid_pt;
+ ps_result_prms->i4_min_cost = i4_tot_cost;
+ }
+ }
+
+ if(i4_tot_cost < ps_best_node[num_results - 1].i4_tot_cost)
+ {
+ for(i = 0; i < num_results - 1; i++)
+ {
+ if(i4_tot_cost < ps_best_node[i].i4_tot_cost)
+ {
+ memmove(
+ ps_best_node + i + 1,
+ ps_best_node + i,
+ sizeof(search_node_t) * (num_results - 1 - i));
+ break;
+ }
+ else if(i4_tot_cost == ps_best_node[i].i4_tot_cost)
+ {
+ if(0 == hme_cmp_nodes(ps_search_node_grid, ps_best_node + i))
+ break;
+ }
+ }
+ ps_best_node[i] = *ps_search_node_grid;
+ ps_best_node[i].i4_sad = i4_sad;
+ ps_best_node[i].i4_mv_cost = i4_mv_cost;
+ ps_best_node[i].i4_tot_cost = i4_tot_cost;
+ }
+ i4_count++;
+ }
+ pi4_sad_grid++;
+ }
+ ps_result_prms->i4_min_id = i4_min_id;
+}
+
+/**
+********************************************************************************
+* @fn hme_update_results_grid_pu_bestn_no_encode(result_upd_prms_t *ps_result_prms)
+*
+* @brief Updates results for the case where 1 best result is to be updated
+* for a given pt, for several parts
+* Note : The function is replicated for CLIPing the cost to 16bit to make
+* bit match with SIMD version
+*
+* @param[in] result_upd_prms_t : Contains the input parameters to this fxn
+*
+* @return The result_upd_prms_t structure is updated for all the active
+* parts in case the current candt has results for any given part
+* that is the best result for that part
+********************************************************************************
+*/
+void hme_update_results_grid_pu_bestn_no_encode(result_upd_prms_t *ps_result_prms)
+{
+ search_node_t s_search_node_grid;
+ const search_node_t *ps_search_node_base;
+ search_node_t *ps_search_node_grid, *ps_best_node;
+ S32 i4_min_cost = (MAX_32BIT_VAL), i4_search_idx;
+ S32 num_results, i4_unique_id = -1, i4_grid_pt;
+ search_results_t *ps_search_results;
+ S32 *pi4_valid_part_ids;
+ S32 i4_step = ps_result_prms->i4_step;
+ S32 i4_grid_mask, i4_count, i, i4_min_id;
+ S32 i4_tot_cost, i4_mv_cost, i4_sad, id;
+ S32 *pi4_sad_grid = ps_result_prms->pi4_sad_grid;
+ S32 grid_count = 0;
+ S32 pred_lx;
+
+ i4_min_id = (S32)PT_C;
+ i4_min_cost = MAX_32BIT_VAL;
+ ps_search_node_grid = &s_search_node_grid;
+ ps_search_node_base = ps_result_prms->ps_search_node_base;
+ *ps_search_node_grid = *ps_search_node_base;
+ pi4_valid_part_ids = ps_result_prms->pi4_valid_part_ids;
+ ps_search_results = ps_result_prms->ps_search_results;
+ num_results = (S32)ps_search_results->u1_num_results_per_part;
+ i4_grid_mask = ps_result_prms->i4_grid_mask;
+
+ for(i = 0; i < 9; i++)
+ {
+ if(i4_grid_mask & (1 << i))
+ grid_count++;
+ }
+
+ /* Some basic assumptions: only single pt, only part updates */
+ /* and more than 1 best result to be computed. */
+
+ i4_search_idx = (S32)ps_result_prms->i1_ref_idx;
+ pred_lx = 1 - ps_search_results->pu1_is_past[i4_search_idx];
+
+ /*************************************************************************/
+ /* Supposing we do hte result update for a unique partid, we can */
+ /* store the best pt id in the grid and also min cost is return */
+ /* param. This will be useful for early exit cases. */
+ /* TODO : once we have separate fxn for unique part+grid, we can */
+ /* do away with this code here */
+ /*************************************************************************/
+ //if (pi4_valid_part_ids[1] == -1)
+ i4_unique_id = pi4_valid_part_ids[0];
+
+ /*************************************************************************/
+ /* Outer loop runs through all active pts in the grid */
+ /*************************************************************************/
+ for(i4_grid_pt = 0; i4_grid_pt < (S32)NUM_GRID_PTS; i4_grid_pt++)
+ {
+ if(!(i4_grid_mask & (1 << i4_grid_pt)))
+ continue;
+
+ /* For the pt in the grid, update mvx and y depending on */
+ /* location of pt. Updates are in FPEL units. */
+ ps_search_node_grid->s_mv.i2_mvx = ps_search_node_base->s_mv.i2_mvx;
+ ps_search_node_grid->s_mv.i2_mvy = ps_search_node_base->s_mv.i2_mvy;
+ ps_search_node_grid->s_mv.i2_mvx += (S16)(i4_step * gai1_grid_id_to_x[i4_grid_pt]);
+ ps_search_node_grid->s_mv.i2_mvy += (S16)(i4_step * gai1_grid_id_to_y[i4_grid_pt]);
+
+ i4_count = 0;
+
+ /* pi4_valid_part_ids contains all the valid ids. We loop through */
+ /* this till we encounter -1. This is easier than having to */
+ /* figure out part by part, besides, active part decision is */
+ /* usually fixed for a given duration of search, e.g. entire fpel */
+ /* refinement for a blk/cu will use fixed valid part mask */
+
+ while((id = pi4_valid_part_ids[i4_count]) >= 0)
+ {
+ //ps_search_node_grid->e_part_type = (PART_TYPE_T)id;
+
+ /*****************************************************************/
+ /* points to the best search results corresponding to this */
+ /* specific part type. */
+ /*****************************************************************/
+ ps_best_node = ps_search_results->aps_part_results[i4_search_idx][id];
+
+ /* evaluate mv cost and totalcost for this part for this given mv*/
+ i4_mv_cost = ps_result_prms->pf_mv_cost_compute(
+ ps_search_node_grid,
+ &ps_search_results->as_pred_ctxt[pred_lx],
+ (PART_ID_T)id,
+ MV_RES_FPEL);
+
+ i4_sad = pi4_sad_grid[grid_count * id];
+
+ /* Clipping to 16 bit to bit match with SIMD version */
+ i4_mv_cost = CLIP_S16(i4_mv_cost);
+ i4_sad = CLIP_S16(i4_sad);
+
+ i4_tot_cost = i4_sad + i4_mv_cost;
+ /* Clipping to 16 bit to bit match with SIMD version */
+ i4_tot_cost = CLIP_S16(i4_tot_cost);
+
+ if(i4_unique_id == id)
+ {
+ if(i4_tot_cost < ps_result_prms->i4_min_cost)
+ {
+ i4_min_id = i4_grid_pt;
+ ps_result_prms->i4_min_cost = i4_tot_cost;
+ }
+ }
+
+ /*****************************************************************/
+ /* We do not labor through the results if the total cost worse */
+ /* than the last of the results. */
+ /*****************************************************************/
+ if(i4_tot_cost < ps_best_node[num_results - 1].i4_tot_cost)
+ {
+ S32 eq_cost = 0;
+ /*************************************************************/
+ /* Identify where the current result isto be placed.Basically*/
+ /* find the node which has cost just higher thannodeundertest*/
+ /*************************************************************/
+ for(i = 0; i < num_results - 1; i++)
+ {
+ if(i4_tot_cost < ps_best_node[i].i4_tot_cost)
+ {
+ memmove(
+ ps_best_node + i + 1,
+ ps_best_node + i,
+ sizeof(search_node_t) * (num_results - 1 - i));
+ break;
+ }
+ else if(i4_tot_cost == ps_best_node[i].i4_tot_cost)
+ {
+ //if (0 == hme_cmp_nodes(ps_search_node_grid, ps_best_node+i))
+ // break;
+ /* When cost is same we comp. the nodes and if it's same skip. */
+ /* We don't want to add this code to intrinsic. So we are */
+ /* commenting it. The quality impact was minor when we did the */
+ /* regression. */
+ eq_cost = 1;
+ }
+ }
+ if(!eq_cost)
+ {
+ ps_best_node[i] = *ps_search_node_grid;
+ ps_best_node[i].i4_sad = i4_sad;
+ ps_best_node[i].i4_mv_cost = i4_mv_cost;
+ ps_best_node[i].i4_tot_cost = i4_tot_cost;
+ }
+ }
+ i4_count++;
+ }
+ pi4_sad_grid++;
+ }
+ ps_result_prms->i4_min_id = i4_min_id;
+}
+
+/**
+********************************************************************************
+* @fn hme_update_results_pt_npu_best1(result_upd_prms_t *ps_result_prms)
+*
+* @brief Updates results for the case where 1 best result is to be updated
+* for a given pt, for several parts
+*
+* @param[in] ps_result_prms. Contains the input parameters to this fxn
+* ::ps_pred_info : contains cost fxn ptr and predictor info
+* ::pi4_sad : 17x9 SAD Grid, this case, only 1st 17 entries valid
+* ::ps_search_results: Search results structure
+* ::i1_ref_id : Reference index
+* ::i4_grid_mask: Dont Care for this fxn
+* ::pi4_valid_part_ids : valid part ids
+* ::ps_search_node_base: Contains the centre pt candt info.
+*
+* @return The ps_search_results structure is updated for all the active
+* parts in case the current candt has results for any given part
+* that is the best result for that part
+********************************************************************************
+*/
+
+void hme_update_results_pt_pu_best1_subpel_hs(
+ err_prms_t *ps_err_prms, result_upd_prms_t *ps_result_prms)
+{
+ search_node_t *ps_search_node_base, *ps_best_node;
+ search_results_t *ps_search_results;
+ S32 id, i4_search_idx = ps_result_prms->u1_pred_lx;
+ S32 i4_count = 0, i4_sad, i4_mv_cost, i4_tot_cost;
+ S32 num_results, i;
+ S32 *pi4_valid_part_ids;
+
+ pi4_valid_part_ids = ps_result_prms->pi4_valid_part_ids;
+ /* Some basic assumptions: only single pt, only part updates */
+ /* and more than 1 best result to be computed. */
+ ASSERT(ps_result_prms->i4_grid_mask == 1);
+
+ ps_search_results = ps_result_prms->ps_search_results;
+ num_results = (S32)ps_search_results->u1_num_results_per_part;
+
+ /* Compute mv cost, total cost */
+ ps_search_node_base = (search_node_t *)ps_result_prms->ps_search_node_base;
+
+ while((id = pi4_valid_part_ids[i4_count]) >= 0)
+ {
+ S32 update_required = 1;
+
+ ps_best_node = ps_search_results->aps_part_results[i4_search_idx][id];
+ /* Use a pre-computed cost instead of freshly evaluating subpel cost */
+ i4_mv_cost = ps_best_node->i4_mv_cost;
+ i4_sad = ps_result_prms->pi4_sad_grid[id];
+ i4_tot_cost = i4_sad + i4_mv_cost;
+
+ /* We do not labor through the results if the total cost is worse than */
+ /* the last of the results. */
+ if(i4_tot_cost < ps_best_node[num_results - 1].i4_tot_cost)
+ {
+ /* Identify where the current result is to be placed. Basically find */
+ /* the node which has cost just higher than node under test */
+ for(i = 0; i < num_results - 1; i++)
+ {
+ if(ps_best_node[i].i1_ref_idx != -1)
+ {
+ if(i4_tot_cost < ps_best_node[i].i4_tot_cost)
+ {
+ memmove(
+ ps_best_node + i + 1,
+ ps_best_node + i,
+ sizeof(search_node_t) * (num_results - 1 - i));
+ break;
+ }
+ else if(i4_tot_cost == ps_best_node[i].i4_tot_cost)
+ {
+ update_required = 0;
+ break;
+ }
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ if(update_required)
+ {
+ /* Update when either ref_idx or mv's are different */
+ ps_best_node[i] = *ps_search_node_base;
+ ps_best_node[i].i4_sad = i4_sad;
+ ps_best_node[i].i4_mv_cost = i4_mv_cost;
+ ps_best_node[i].i4_tot_cost = i4_tot_cost;
+ }
+ }
+ i4_count++;
+ }
+}
+
+void hme_update_results_pt_pu_best1_subpel_hs_1(
+ err_prms_t *ps_err_prms, result_upd_prms_t *ps_result_prms)
+{
+ search_node_t *ps_search_node_base, *ps_best_node;
+ search_results_t *ps_search_results;
+ S32 id, i4_search_idx = ps_result_prms->u1_pred_lx;
+ S32 i4_count = 0, i4_sad, i4_mv_cost, i4_tot_cost;
+ S32 num_results;
+ S32 *pi4_valid_part_ids;
+
+ pi4_valid_part_ids = ps_result_prms->pi4_valid_part_ids;
+ /* Some basic assumptions: only single pt, only part updates */
+ /* and more than 1 best result to be computed. */
+ ASSERT(ps_result_prms->i4_grid_mask == 1);
+
+ ps_search_results = ps_result_prms->ps_search_results;
+ num_results = (S32)ps_search_results->u1_num_results_per_part;
+
+ /* Compute mv cost, total cost */
+ ps_search_node_base = (search_node_t *)ps_result_prms->ps_search_node_base;
+
+ while((id = pi4_valid_part_ids[i4_count]) >= 0)
+ {
+ S32 update_required = 0;
+
+ ps_best_node = ps_search_results->aps_part_results[i4_search_idx][id];
+ /* Use a pre-computed cost instead of freshly evaluating subpel cost */
+ i4_mv_cost = ps_best_node->i4_mv_cost;
+ i4_sad = ps_result_prms->pi4_sad_grid[id];
+ i4_tot_cost = i4_sad + i4_mv_cost;
+
+ /* We do not labor through the results if the total cost is worse than */
+ /* the last of the results. */
+ if(i4_tot_cost < ps_best_node[1].i4_tot_cost)
+ {
+ S32 sdi_value = 0;
+
+ update_required = 2;
+ /* Identify where the current result is to be placed. Basically find */
+ /* the node which has cost just higher than node under test */
+ {
+ if(i4_tot_cost < ps_best_node[0].i4_tot_cost)
+ {
+ update_required = 1;
+ sdi_value = ps_best_node[0].i4_sad - i4_sad;
+ }
+ else if(
+ (ps_result_prms->i2_mv_x == ps_best_node[0].s_mv.i2_mvx) &&
+ (ps_result_prms->i2_mv_y == ps_best_node[0].s_mv.i2_mvy) &&
+ (ps_best_node[0].i1_ref_idx == ps_result_prms->i1_ref_idx))
+ {
+ update_required = 0;
+ }
+ }
+ if(update_required == 2)
+ {
+ subpel_refine_ctxt_t *ps_subpel_refine_ctxt = ps_result_prms->ps_subpel_refine_ctxt;
+
+ ps_subpel_refine_ctxt->i2_tot_cost[1][i4_count] = i4_tot_cost;
+ ps_subpel_refine_ctxt->i2_mv_cost[1][i4_count] = i4_mv_cost;
+ ps_subpel_refine_ctxt->i2_mv_x[1][i4_count] = ps_result_prms->i2_mv_x;
+ ps_subpel_refine_ctxt->i2_mv_y[1][i4_count] = ps_result_prms->i2_mv_y;
+ ps_subpel_refine_ctxt->i2_ref_idx[1][i4_count] = ps_result_prms->i1_ref_idx;
+ }
+ else if(update_required == 1)
+ {
+ subpel_refine_ctxt_t *ps_subpel_refine_ctxt = ps_result_prms->ps_subpel_refine_ctxt;
+
+ ps_subpel_refine_ctxt->i2_tot_cost[1][i4_count] =
+ ps_subpel_refine_ctxt->i2_tot_cost[0][i4_count];
+ ps_subpel_refine_ctxt->i2_mv_cost[1][i4_count] =
+ ps_subpel_refine_ctxt->i2_mv_cost[0][i4_count];
+ ps_subpel_refine_ctxt->i2_mv_x[1][i4_count] =
+ ps_subpel_refine_ctxt->i2_mv_x[0][i4_count];
+ ps_subpel_refine_ctxt->i2_mv_y[1][i4_count] =
+ ps_subpel_refine_ctxt->i2_mv_y[0][i4_count];
+ ps_subpel_refine_ctxt->i2_ref_idx[1][i4_count] =
+ ps_subpel_refine_ctxt->i2_ref_idx[0][i4_count];
+
+ ps_subpel_refine_ctxt->i2_tot_cost[0][i4_count] = i4_tot_cost;
+ ps_subpel_refine_ctxt->i2_mv_cost[0][i4_count] = i4_mv_cost;
+ ps_subpel_refine_ctxt->i2_mv_x[0][i4_count] = ps_result_prms->i2_mv_x;
+ ps_subpel_refine_ctxt->i2_mv_y[0][i4_count] = ps_result_prms->i2_mv_y;
+ ps_subpel_refine_ctxt->i2_ref_idx[0][i4_count] = ps_result_prms->i1_ref_idx;
+ }
+ }
+ i4_count++;
+ }
+}
+
+/**
+******************************************************************************
+* @brief Gives a result fxn ptr for a index [x] where x is as:
+* 0 : single pt, no partial updates, 1 best result
+* 1 : single pt, no partial updates, N best results
+* 2 : single pt, partial updates, 1 best result
+* 3 : single pt, partial updates, N best results
+* 0 : grid , no partial updates, 1 best result
+* 1 : grid , no partial updates, N best results
+* 2 : grid , partial updates, 1 best result
+* 3 : grid , partial updates, N best results
+******************************************************************************
+*/
+
+static PF_RESULT_FXN_T g_pf_result_fxn[8] = { UPD_RES_PT_NPU_BEST1, UPD_RES_PT_NPU_BESTN,
+ UPD_RES_PT_PU_BEST1, UPD_RES_PT_PU_BESTN,
+ UPD_RES_GRID_NPU_BEST1, UPD_RES_GRID_NPU_BESTN,
+ UPD_RES_GRID_PU_BEST1, UPD_RES_GRID_PU_BESTN };
+
+/**
+********************************************************************************
+* @fn hme_get_result_fxn(i4_grid_mask, i4_part_mask, i4_num_results)
+*
+* @brief Obtains the suitable result function that evaluates COST and also
+* computes one or more best results for point/grid, single part or
+* more than one part.
+*
+* @param[in] i4_grid_mask : Mask containing which of 9 grid pts active
+*
+* @param[in] i4_part_mask : Mask containing which of the 17 parts active
+*
+* @param[in] i4_num_results: Number of active results
+*
+* @return Pointer to the appropriate result update function
+********************************************************************************
+*/
+PF_RESULT_FXN_T hme_get_result_fxn(S32 i4_grid_mask, S32 i4_part_mask, S32 i4_num_results)
+{
+ S32 i4_is_grid = (i4_grid_mask != 1);
+ S32 i4_is_pu = ((i4_part_mask & (i4_part_mask - 1)) != 0);
+ S32 i4_res_gt1 = (i4_num_results > 1);
+ S32 id;
+
+ id = (i4_is_grid << 2) + (i4_is_pu << 1) + i4_res_gt1;
+
+ return (g_pf_result_fxn[id]);
+}
+
+void hme_calc_sad_and_2_best_results(
+ hme_search_prms_t *ps_search_prms,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ err_prms_t *ps_err_prms,
+ result_upd_prms_t *ps_result_prms,
+ U08 **ppu1_ref,
+ S32 i4_ref_stride)
+{
+ S32 i4_candt;
+ S32 i4_inp_off;
+ S32 i4_ref_offset;
+ S32 i4_num_nodes;
+
+ S32 *pi4_sad_grid = ps_err_prms->pi4_sad_grid;
+ S32 cur_buf_stride = ps_err_prms->i4_inp_stride;
+ WORD32 ref_buf_stride = ps_err_prms->i4_ref_stride;
+ WORD32 cur_buf_stride_ls2 = (cur_buf_stride << 2);
+ WORD32 ref_buf_stride_ls2 = (ref_buf_stride << 2);
+
+ mv_refine_ctxt_t *ps_mv_refine_ctxt;
+ search_node_t *ps_search_node;
+
+ ps_mv_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt;
+ i4_num_nodes = ps_search_prms->i4_num_search_nodes;
+ i4_inp_off = ps_search_prms->i4_cu_x_off;
+ i4_inp_off += ps_search_prms->i4_cu_y_off * cur_buf_stride;
+ i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
+ ps_search_node = ps_search_prms->ps_search_nodes;
+
+ for(i4_candt = 0; i4_candt < i4_num_nodes; i4_candt++)
+ {
+ /**********************************************************************/
+ /* CALL THE FUNCTION THAT COMPUTES THE SAD AND UPDATES THE SAD GRID */
+ /**********************************************************************/
+ {
+ WORD32 b, c, d;
+ UWORD8 *pu1_cur_ptr;
+ UWORD8 *pu1_ref_ptr;
+ UWORD16 au2_4x4_sad[NUM_4X4];
+
+ if(ps_search_node->s_mv.i2_mvx == INTRA_MV)
+ {
+ continue;
+ }
+
+ ps_err_prms->pu1_inp =
+ ps_wt_inp_prms->apu1_wt_inp[ps_search_node->i1_ref_idx] + i4_inp_off;
+ ps_err_prms->pu1_ref = ppu1_ref[ps_search_node->i1_ref_idx] + i4_ref_offset;
+ ps_err_prms->pu1_ref += ps_search_node->s_mv.i2_mvx;
+ ps_err_prms->pu1_ref += (ps_search_node->s_mv.i2_mvy * i4_ref_stride);
+
+ pu1_cur_ptr = ps_err_prms->pu1_inp;
+ pu1_ref_ptr = &ps_err_prms->pu1_ref[0];
+
+ /* Loop to compute the SAD's */
+ {
+ memset(&au2_4x4_sad[0], 0, NUM_4X4 * sizeof(UWORD16));
+ for(b = 0; b < NUM_4X4; b++)
+ {
+ WORD32 t1 = (b % 4) * NUM_PIXELS_IN_ROW + (b >> 2) * cur_buf_stride_ls2;
+ WORD32 t2 = (b % 4) * NUM_PIXELS_IN_ROW + (b >> 2) * ref_buf_stride_ls2;
+
+ for(c = 0; c < NUM_ROWS_IN_4X4; c++)
+ {
+ WORD32 z_cur = (cur_buf_stride)*c + t1;
+ WORD32 z_ref = (ref_buf_stride)*c + t2;
+ for(d = 0; d < NUM_PIXELS_IN_ROW; d++)
+ {
+ au2_4x4_sad[b] += (UWORD16)ABS((
+ ((S32)pu1_ref_ptr[(z_ref + d)]) - ((S32)pu1_cur_ptr[(z_cur + d)])));
+ }
+ }
+ }
+
+ pi4_sad_grid[PART_ID_NxN_TL] =
+ (au2_4x4_sad[0] + au2_4x4_sad[1] + au2_4x4_sad[4] + au2_4x4_sad[5]);
+ pi4_sad_grid[PART_ID_NxN_TR] =
+ (au2_4x4_sad[2] + au2_4x4_sad[3] + au2_4x4_sad[6] + au2_4x4_sad[7]);
+ pi4_sad_grid[PART_ID_NxN_BL] =
+ (au2_4x4_sad[8] + au2_4x4_sad[9] + au2_4x4_sad[12] + au2_4x4_sad[13]);
+ pi4_sad_grid[PART_ID_NxN_BR] =
+ (au2_4x4_sad[10] + au2_4x4_sad[11] + au2_4x4_sad[14] + au2_4x4_sad[15]);
+ pi4_sad_grid[PART_ID_Nx2N_L] =
+ pi4_sad_grid[PART_ID_NxN_TL] + pi4_sad_grid[PART_ID_NxN_BL];
+ pi4_sad_grid[PART_ID_Nx2N_R] =
+ pi4_sad_grid[PART_ID_NxN_TR] + pi4_sad_grid[PART_ID_NxN_BR];
+ pi4_sad_grid[PART_ID_2NxN_T] =
+ pi4_sad_grid[PART_ID_NxN_TR] + pi4_sad_grid[PART_ID_NxN_TL];
+ pi4_sad_grid[PART_ID_2NxN_B] =
+ pi4_sad_grid[PART_ID_NxN_BR] + pi4_sad_grid[PART_ID_NxN_BL];
+ pi4_sad_grid[PART_ID_nLx2N_L] =
+ (au2_4x4_sad[8] + au2_4x4_sad[0] + au2_4x4_sad[12] + au2_4x4_sad[4]);
+ pi4_sad_grid[PART_ID_nRx2N_R] =
+ (au2_4x4_sad[3] + au2_4x4_sad[7] + au2_4x4_sad[15] + au2_4x4_sad[11]);
+ pi4_sad_grid[PART_ID_2NxnU_T] =
+ (au2_4x4_sad[1] + au2_4x4_sad[0] + au2_4x4_sad[2] + au2_4x4_sad[3]);
+ pi4_sad_grid[PART_ID_2NxnD_B] =
+ (au2_4x4_sad[15] + au2_4x4_sad[14] + au2_4x4_sad[12] + au2_4x4_sad[13]);
+ pi4_sad_grid[PART_ID_2Nx2N] =
+ pi4_sad_grid[PART_ID_2NxN_T] + pi4_sad_grid[PART_ID_2NxN_B];
+ pi4_sad_grid[PART_ID_2NxnU_B] =
+ pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_2NxnU_T];
+ pi4_sad_grid[PART_ID_2NxnD_T] =
+ pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_2NxnD_B];
+ pi4_sad_grid[PART_ID_nRx2N_L] =
+ pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_nRx2N_R];
+ pi4_sad_grid[PART_ID_nLx2N_R] =
+ pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_nLx2N_L];
+ }
+ }
+
+ {
+ S32 i4_count = 0, i4_sad, i4_mv_cost, i4_tot_cost;
+ S32 *pi4_valid_part_ids = &ps_mv_refine_ctxt->ai4_part_id[0];
+ S32 best_node_cost;
+ S32 second_best_node_cost;
+
+ {
+ S16 mvdx1, mvdy1;
+ S32 i4_search_idx = (S32)ps_result_prms->i1_ref_idx;
+ search_results_t *ps_search_results = ps_result_prms->ps_search_results;
+ S32 pred_lx = i4_search_idx;
+
+ pred_ctxt_t *ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
+ pred_candt_nodes_t *ps_pred_nodes = &ps_pred_ctxt->as_pred_nodes[PART_2Nx2N];
+ search_node_t *ps_pred_node_a = ps_pred_nodes->ps_mvp_node;
+
+ S32 inp_shift = 2;
+ S32 pred_shift = ps_pred_node_a->u1_subpel_done ? 0 : 2;
+ S32 lambda_q_shift = ps_pred_ctxt->lambda_q_shift;
+ S32 lambda = ps_pred_ctxt->lambda;
+ S32 rnd = 1 << (lambda_q_shift - 1);
+ S32 mv_p_x = ps_pred_node_a->s_mv.i2_mvx;
+ S32 mv_p_y = ps_pred_node_a->s_mv.i2_mvy;
+ S32 ref_bits =
+ ps_pred_ctxt
+ ->ppu1_ref_bits_tlu[ps_pred_ctxt->pred_lx][ps_search_node->i1_ref_idx];
+
+ COMPUTE_DIFF_MV(
+ mvdx1, mvdy1, ps_search_node, mv_p_x, mv_p_y, inp_shift, pred_shift);
+
+ mvdx1 = ABS(mvdx1);
+ mvdy1 = ABS(mvdy1);
+
+ i4_mv_cost = hme_get_range(mvdx1) + hme_get_range(mvdy1) + (mvdx1 > 0) +
+ (mvdy1 > 0) + ref_bits + 2;
+
+ i4_mv_cost *= lambda;
+ i4_mv_cost += rnd;
+ i4_mv_cost >>= lambda_q_shift;
+
+ i4_mv_cost = CLIP_U16(i4_mv_cost);
+ }
+
+ /*For each valid partition, update the refine_prm structure to reflect the best and second
+ best candidates for that partition*/
+
+ for(i4_count = 0; i4_count < ps_mv_refine_ctxt->i4_num_valid_parts; i4_count++)
+ {
+ S32 update_required = 0;
+ S32 part_id = pi4_valid_part_ids[i4_count];
+ S32 index = (ps_mv_refine_ctxt->i4_num_valid_parts > 8) ? part_id : i4_count;
+
+ /*Calculate total cost*/
+ i4_sad = CLIP3(pi4_sad_grid[part_id], 0, 0x7fff);
+ i4_tot_cost = CLIP_S16(i4_sad + i4_mv_cost);
+
+ /*****************************************************************/
+ /* We do not labor through the results if the total cost worse */
+ /* than the last of the results. */
+ /*****************************************************************/
+ best_node_cost = CLIP_S16(ps_mv_refine_ctxt->i2_tot_cost[0][index]);
+ second_best_node_cost = CLIP_S16(ps_mv_refine_ctxt->i2_tot_cost[1][index]);
+
+ if(i4_tot_cost < second_best_node_cost)
+ {
+ update_required = 2;
+
+ /*************************************************************/
+ /* Identify where the current result isto be placed.Basically*/
+ /* find the node which has cost just higher thannodeundertest*/
+ /*************************************************************/
+ if(i4_tot_cost < best_node_cost)
+ {
+ update_required = 1;
+ }
+ else if(i4_tot_cost == best_node_cost)
+ {
+ update_required = 0;
+ }
+
+ if(update_required == 2)
+ {
+ ps_mv_refine_ctxt->i2_tot_cost[1][index] = i4_tot_cost;
+ ps_mv_refine_ctxt->i2_mv_cost[1][index] = i4_mv_cost;
+ ps_mv_refine_ctxt->i2_mv_x[1][index] = ps_search_node->s_mv.i2_mvx;
+ ps_mv_refine_ctxt->i2_mv_y[1][index] = ps_search_node->s_mv.i2_mvy;
+ ps_mv_refine_ctxt->i2_ref_idx[1][index] = ps_search_node->i1_ref_idx;
+ }
+ else if(update_required == 1)
+ {
+ ps_mv_refine_ctxt->i2_tot_cost[1][index] =
+ ps_mv_refine_ctxt->i2_tot_cost[0][index];
+ ps_mv_refine_ctxt->i2_mv_cost[1][index] =
+ ps_mv_refine_ctxt->i2_mv_cost[0][index];
+ ps_mv_refine_ctxt->i2_mv_x[1][index] = ps_mv_refine_ctxt->i2_mv_x[0][index];
+ ps_mv_refine_ctxt->i2_mv_y[1][index] = ps_mv_refine_ctxt->i2_mv_y[0][index];
+ ps_mv_refine_ctxt->i2_ref_idx[1][index] =
+ ps_mv_refine_ctxt->i2_ref_idx[0][index];
+
+ ps_mv_refine_ctxt->i2_tot_cost[0][index] = i4_tot_cost;
+ ps_mv_refine_ctxt->i2_mv_cost[0][index] = i4_mv_cost;
+ ps_mv_refine_ctxt->i2_mv_x[0][index] = ps_search_node->s_mv.i2_mvx;
+ ps_mv_refine_ctxt->i2_mv_y[0][index] = ps_search_node->s_mv.i2_mvy;
+ ps_mv_refine_ctxt->i2_ref_idx[0][index] = ps_search_node->i1_ref_idx;
+ }
+ }
+ }
+ }
+ ps_search_node++;
+ }
+
+ {
+ WORD32 i4_i;
+ WORD32 part_id;
+ search_node_t *ps_search_node = ps_search_prms->ps_search_nodes;
+ for(i4_i = 0; i4_i < ps_mv_refine_ctxt->i4_num_valid_parts; i4_i++)
+ {
+ part_id = ps_mv_refine_ctxt->ai4_part_id[i4_i];
+ if(ps_mv_refine_ctxt->i2_tot_cost[0][part_id] >= MAX_SIGNED_16BIT_VAL)
+ {
+ ASSERT(ps_mv_refine_ctxt->i2_mv_cost[0][part_id] == MAX_SIGNED_16BIT_VAL);
+ ASSERT(ps_mv_refine_ctxt->i2_mv_x[0][part_id] == 0);
+ ASSERT(ps_mv_refine_ctxt->i2_mv_y[0][part_id] == 0);
+
+ ps_mv_refine_ctxt->i2_ref_idx[0][part_id] = ps_search_node->i1_ref_idx;
+ }
+ if(ps_mv_refine_ctxt->i2_tot_cost[1][part_id] >= MAX_SIGNED_16BIT_VAL)
+ {
+ ASSERT(ps_mv_refine_ctxt->i2_mv_cost[1][part_id] == MAX_SIGNED_16BIT_VAL);
+ ASSERT(ps_mv_refine_ctxt->i2_mv_x[1][part_id] == 0);
+ ASSERT(ps_mv_refine_ctxt->i2_mv_y[1][part_id] == 0);
+
+ ps_mv_refine_ctxt->i2_ref_idx[1][part_id] = ps_search_node->i1_ref_idx;
+ }
+ }
+ }
+}
+
+void hme_calc_sad_and_2_best_results_subpel(
+ err_prms_t *ps_err_prms, result_upd_prms_t *ps_result_prms)
+{
+ S32 i4_candt;
+ S32 i4_num_nodes;
+
+ S32 *pi4_sad_grid = ps_err_prms->pi4_sad_grid;
+ S32 cur_buf_stride = ps_err_prms->i4_inp_stride;
+ WORD32 ref_buf_stride = ps_err_prms->i4_ref_stride;
+ WORD32 cur_buf_stride_ls2 = (cur_buf_stride << 2);
+ WORD32 ref_buf_stride_ls2 = (ref_buf_stride << 2);
+
+ mv_refine_ctxt_t *ps_subpel_refine_ctxt;
+ ps_subpel_refine_ctxt = ps_result_prms->ps_subpel_refine_ctxt;
+ i4_num_nodes = 1;
+
+ /* Run through each of the candts in a loop */
+ for(i4_candt = 0; i4_candt < i4_num_nodes; i4_candt++)
+ {
+ /**********************************************************************/
+ /* CALL THE FUNCTION THAT COMPUTES THE SAD AND UPDATES THE SAD GRID */
+ /**********************************************************************/
+ {
+ WORD32 b, c, d;
+ UWORD8 *pu1_cur_ptr;
+ UWORD8 *pu1_ref_ptr;
+ UWORD16 au2_4x4_sad[NUM_4X4];
+
+ pu1_cur_ptr = ps_err_prms->pu1_inp;
+ pu1_ref_ptr = &ps_err_prms->pu1_ref[0];
+
+ /* Loop to compute the SAD's */
+ {
+ memset(&au2_4x4_sad[0], 0, NUM_4X4 * sizeof(UWORD16));
+ for(b = 0; b < NUM_4X4; b++)
+ {
+ WORD32 t1 = (b % 4) * NUM_PIXELS_IN_ROW + (b >> 2) * cur_buf_stride_ls2;
+ WORD32 t2 = (b % 4) * NUM_PIXELS_IN_ROW + (b >> 2) * ref_buf_stride_ls2;
+
+ for(c = 0; c < NUM_ROWS_IN_4X4; c++)
+ {
+ WORD32 z_cur = (cur_buf_stride)*c + t1;
+ WORD32 z_ref = (ref_buf_stride)*c + t2;
+ for(d = 0; d < NUM_PIXELS_IN_ROW; d++)
+ {
+ au2_4x4_sad[b] += (UWORD16)ABS((
+ ((S32)pu1_ref_ptr[(z_ref + d)]) - ((S32)pu1_cur_ptr[(z_cur + d)])));
+ }
+ }
+ }
+
+ pi4_sad_grid[PART_ID_NxN_TL] =
+ (au2_4x4_sad[0] + au2_4x4_sad[1] + au2_4x4_sad[4] + au2_4x4_sad[5]);
+ pi4_sad_grid[PART_ID_NxN_TR] =
+ (au2_4x4_sad[2] + au2_4x4_sad[3] + au2_4x4_sad[6] + au2_4x4_sad[7]);
+ pi4_sad_grid[PART_ID_NxN_BL] =
+ (au2_4x4_sad[8] + au2_4x4_sad[9] + au2_4x4_sad[12] + au2_4x4_sad[13]);
+ pi4_sad_grid[PART_ID_NxN_BR] =
+ (au2_4x4_sad[10] + au2_4x4_sad[11] + au2_4x4_sad[14] + au2_4x4_sad[15]);
+ pi4_sad_grid[PART_ID_Nx2N_L] =
+ pi4_sad_grid[PART_ID_NxN_TL] + pi4_sad_grid[PART_ID_NxN_BL];
+ pi4_sad_grid[PART_ID_Nx2N_R] =
+ pi4_sad_grid[PART_ID_NxN_TR] + pi4_sad_grid[PART_ID_NxN_BR];
+ pi4_sad_grid[PART_ID_2NxN_T] =
+ pi4_sad_grid[PART_ID_NxN_TR] + pi4_sad_grid[PART_ID_NxN_TL];
+ pi4_sad_grid[PART_ID_2NxN_B] =
+ pi4_sad_grid[PART_ID_NxN_BR] + pi4_sad_grid[PART_ID_NxN_BL];
+ pi4_sad_grid[PART_ID_nLx2N_L] =
+ (au2_4x4_sad[8] + au2_4x4_sad[0] + au2_4x4_sad[12] + au2_4x4_sad[4]);
+ pi4_sad_grid[PART_ID_nRx2N_R] =
+ (au2_4x4_sad[3] + au2_4x4_sad[7] + au2_4x4_sad[15] + au2_4x4_sad[11]);
+ pi4_sad_grid[PART_ID_2NxnU_T] =
+ (au2_4x4_sad[1] + au2_4x4_sad[0] + au2_4x4_sad[2] + au2_4x4_sad[3]);
+ pi4_sad_grid[PART_ID_2NxnD_B] =
+ (au2_4x4_sad[15] + au2_4x4_sad[14] + au2_4x4_sad[12] + au2_4x4_sad[13]);
+ pi4_sad_grid[PART_ID_2Nx2N] =
+ pi4_sad_grid[PART_ID_2NxN_T] + pi4_sad_grid[PART_ID_2NxN_B];
+ pi4_sad_grid[PART_ID_2NxnU_B] =
+ pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_2NxnU_T];
+ pi4_sad_grid[PART_ID_2NxnD_T] =
+ pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_2NxnD_B];
+ pi4_sad_grid[PART_ID_nRx2N_L] =
+ pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_nRx2N_R];
+ pi4_sad_grid[PART_ID_nLx2N_R] =
+ pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_nLx2N_L];
+ }
+ }
+ /**********************************************************************/
+ /* CALL THE FUNCTION THAT COMPUTES UPDATES THE BEST RESULTS */
+ /**********************************************************************/
+ {
+ S32 i4_count = 0, i4_sad, i4_mv_cost, i4_tot_cost;
+ S32 *pi4_valid_part_ids = &ps_subpel_refine_ctxt->ai4_part_id[0];
+ S32 best_node_cost;
+ S32 second_best_node_cost;
+
+ /*For each valid partition, update the refine_prm structure to reflect the best and second
+ best candidates for that partition*/
+
+ for(i4_count = 0; i4_count < ps_subpel_refine_ctxt->i4_num_valid_parts; i4_count++)
+ {
+ S32 update_required = 0;
+ S32 part_id = pi4_valid_part_ids[i4_count];
+ S32 index = (ps_subpel_refine_ctxt->i4_num_valid_parts > 8) ? part_id : i4_count;
+
+ /* Use a pre-computed cost instead of freshly evaluating subpel cost */
+ i4_mv_cost = ps_subpel_refine_ctxt->i2_mv_cost[0][index];
+
+ /*Calculate total cost*/
+ i4_sad = CLIP3(pi4_sad_grid[part_id], 0, 0x7fff);
+ i4_tot_cost = CLIP_S16(i4_sad + i4_mv_cost);
+
+ /*****************************************************************/
+ /* We do not labor through the results if the total cost worse */
+ /* than the last of the results. */
+ /*****************************************************************/
+ best_node_cost = CLIP_S16(ps_subpel_refine_ctxt->i2_tot_cost[0][index]);
+ second_best_node_cost = CLIP_S16(ps_subpel_refine_ctxt->i2_tot_cost[1][index]);
+
+ if(i4_tot_cost < second_best_node_cost)
+ {
+ update_required = 2;
+
+ /*************************************************************/
+ /* Identify where the current result isto be placed.Basically*/
+ /* find the node which has cost just higher thannodeundertest*/
+ /*************************************************************/
+ if(i4_tot_cost < best_node_cost)
+ {
+ update_required = 1;
+ }
+ else if(i4_tot_cost == ps_subpel_refine_ctxt->i2_tot_cost[0][index])
+ {
+ update_required = 0;
+ }
+ if(update_required == 2)
+ {
+ ps_subpel_refine_ctxt->i2_tot_cost[1][index] = i4_tot_cost;
+ ps_subpel_refine_ctxt->i2_mv_cost[1][index] = i4_mv_cost;
+ ps_subpel_refine_ctxt->i2_mv_x[1][index] = ps_result_prms->i2_mv_x;
+ ps_subpel_refine_ctxt->i2_mv_y[1][index] = ps_result_prms->i2_mv_y;
+ ps_subpel_refine_ctxt->i2_ref_idx[1][index] = ps_result_prms->i1_ref_idx;
+ }
+ else if(update_required == 1)
+ {
+ ps_subpel_refine_ctxt->i2_tot_cost[1][index] =
+ ps_subpel_refine_ctxt->i2_tot_cost[0][index];
+ ps_subpel_refine_ctxt->i2_mv_cost[1][index] =
+ ps_subpel_refine_ctxt->i2_mv_cost[0][index];
+ ps_subpel_refine_ctxt->i2_mv_x[1][index] =
+ ps_subpel_refine_ctxt->i2_mv_x[0][index];
+ ps_subpel_refine_ctxt->i2_mv_y[1][index] =
+ ps_subpel_refine_ctxt->i2_mv_y[0][index];
+ ps_subpel_refine_ctxt->i2_ref_idx[1][index] =
+ ps_subpel_refine_ctxt->i2_ref_idx[0][index];
+
+ ps_subpel_refine_ctxt->i2_tot_cost[0][index] = i4_tot_cost;
+ ps_subpel_refine_ctxt->i2_mv_cost[0][index] = i4_mv_cost;
+ ps_subpel_refine_ctxt->i2_mv_x[0][index] = ps_result_prms->i2_mv_x;
+ ps_subpel_refine_ctxt->i2_mv_y[0][index] = ps_result_prms->i2_mv_y;
+ ps_subpel_refine_ctxt->i2_ref_idx[0][index] = ps_result_prms->i1_ref_idx;
+ }
+ }
+ }
+ }
+ }
+
+ {
+ WORD32 i4_count = 0;
+ for(i4_count = 0; i4_count < TOT_NUM_PARTS; i4_count++)
+ {
+ WORD32 j;
+ for(j = 0; j < 2; j++)
+ {
+ if(ps_subpel_refine_ctxt->i2_tot_cost[j][i4_count] >= MAX_SIGNED_16BIT_VAL)
+ {
+ ps_subpel_refine_ctxt->ai2_fullpel_satd[j][i4_count] = MAX_SIGNED_16BIT_VAL;
+ }
+ }
+ }
+ }
+}
+
+void hme_calc_stim_injected_sad_and_2_best_results(
+ hme_search_prms_t *ps_search_prms,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ err_prms_t *ps_err_prms,
+ result_upd_prms_t *ps_result_prms,
+ U08 **ppu1_ref,
+ S32 i4_ref_stride)
+{
+ mv_refine_ctxt_t *ps_mv_refine_ctxt;
+ search_node_t *ps_search_node;
+
+ S32 i4_candt;
+ S32 i4_count;
+ S32 i4_inp_off;
+ S32 i4_ref_offset;
+ S32 i4_num_nodes;
+ ULWORD64 *au8_final_src_sigmaX, *au8_final_src_sigmaXSquared, au8_final_ref_sigmaX[17],
+ au8_final_ref_sigmaXSquared[17];
+ UWORD32 au4_4x4_ref_sigmaX[NUM_4X4], au4_4x4_ref_sigmaXSquared[NUM_4X4];
+ S32 *pi4_valid_part_ids;
+
+ S32 *pi4_sad_grid = ps_err_prms->pi4_sad_grid;
+ S32 cur_buf_stride = ps_err_prms->i4_inp_stride;
+ WORD32 ref_buf_stride = ps_err_prms->i4_ref_stride;
+ WORD32 cur_buf_stride_ls2 = (cur_buf_stride << 2);
+ WORD32 ref_buf_stride_ls2 = (ref_buf_stride << 2);
+
+ ps_mv_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt;
+ i4_num_nodes = ps_search_prms->i4_num_search_nodes;
+ i4_inp_off = ps_search_prms->i4_cu_x_off;
+ i4_inp_off += ps_search_prms->i4_cu_y_off * cur_buf_stride;
+ i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
+ ps_search_node = ps_search_prms->ps_search_nodes;
+ pi4_valid_part_ids = &ps_mv_refine_ctxt->ai4_part_id[0];
+
+ /* Set local pointer to point to partition level sigma values calculated in hme_refine */
+ au8_final_src_sigmaX = ps_search_prms->pu8_part_src_sigmaX;
+ au8_final_src_sigmaXSquared = ps_search_prms->pu8_part_src_sigmaXSquared;
+
+ for(i4_candt = 0; i4_candt < i4_num_nodes; i4_candt++)
+ {
+ {
+ WORD32 b, c, d;
+ UWORD8 *pu1_cur_ptr;
+ UWORD8 *pu1_ref_ptr;
+ UWORD16 au2_4x4_sad[NUM_4X4];
+
+ if(ps_search_node->s_mv.i2_mvx == INTRA_MV)
+ {
+ continue;
+ }
+
+ ps_err_prms->pu1_inp =
+ ps_wt_inp_prms->apu1_wt_inp[ps_search_node->i1_ref_idx] + i4_inp_off;
+ ps_err_prms->pu1_ref = ppu1_ref[ps_search_node->i1_ref_idx] + i4_ref_offset;
+ ps_err_prms->pu1_ref += ps_search_node->s_mv.i2_mvx;
+ ps_err_prms->pu1_ref += (ps_search_node->s_mv.i2_mvy * i4_ref_stride);
+
+ pu1_cur_ptr = ps_err_prms->pu1_inp;
+ pu1_ref_ptr = &ps_err_prms->pu1_ref[0];
+
+ /* Loop to compute the SAD's */
+ {
+ memset(&au2_4x4_sad[0], 0, NUM_4X4 * sizeof(UWORD16));
+ for(b = 0; b < NUM_4X4; b++)
+ {
+ WORD32 t1 = (b % 4) * NUM_PIXELS_IN_ROW + (b >> 2) * cur_buf_stride_ls2;
+ WORD32 t2 = (b % 4) * NUM_PIXELS_IN_ROW + (b >> 2) * ref_buf_stride_ls2;
+
+ for(c = 0; c < NUM_ROWS_IN_4X4; c++)
+ {
+ WORD32 z_cur = (cur_buf_stride)*c + t1;
+ WORD32 z_ref = (ref_buf_stride)*c + t2;
+ for(d = 0; d < NUM_PIXELS_IN_ROW; d++)
+ {
+ au2_4x4_sad[b] += (UWORD16)ABS((
+ ((S32)pu1_ref_ptr[(z_ref + d)]) - ((S32)pu1_cur_ptr[(z_cur + d)])));
+ }
+ }
+ }
+
+ /* Compute sigmaX and sigmaX_Squared at 4x4 level for ref from ref_ptr */
+ hme_compute_sigmaX_and_sigmaXSquared(
+ pu1_ref_ptr,
+ ref_buf_stride,
+ au4_4x4_ref_sigmaX,
+ au4_4x4_ref_sigmaXSquared,
+ 4,
+ 4,
+ 16,
+ 16,
+ 1,
+ 4);
+
+ pi4_sad_grid[PART_ID_NxN_TL] =
+ (au2_4x4_sad[0] + au2_4x4_sad[1] + au2_4x4_sad[4] + au2_4x4_sad[5]);
+ pi4_sad_grid[PART_ID_NxN_TR] =
+ (au2_4x4_sad[2] + au2_4x4_sad[3] + au2_4x4_sad[6] + au2_4x4_sad[7]);
+ pi4_sad_grid[PART_ID_NxN_BL] =
+ (au2_4x4_sad[8] + au2_4x4_sad[9] + au2_4x4_sad[12] + au2_4x4_sad[13]);
+ pi4_sad_grid[PART_ID_NxN_BR] =
+ (au2_4x4_sad[10] + au2_4x4_sad[11] + au2_4x4_sad[14] + au2_4x4_sad[15]);
+ pi4_sad_grid[PART_ID_Nx2N_L] =
+ pi4_sad_grid[PART_ID_NxN_TL] + pi4_sad_grid[PART_ID_NxN_BL];
+ pi4_sad_grid[PART_ID_Nx2N_R] =
+ pi4_sad_grid[PART_ID_NxN_TR] + pi4_sad_grid[PART_ID_NxN_BR];
+ pi4_sad_grid[PART_ID_2NxN_T] =
+ pi4_sad_grid[PART_ID_NxN_TR] + pi4_sad_grid[PART_ID_NxN_TL];
+ pi4_sad_grid[PART_ID_2NxN_B] =
+ pi4_sad_grid[PART_ID_NxN_BR] + pi4_sad_grid[PART_ID_NxN_BL];
+ pi4_sad_grid[PART_ID_nLx2N_L] =
+ (au2_4x4_sad[8] + au2_4x4_sad[0] + au2_4x4_sad[12] + au2_4x4_sad[4]);
+ pi4_sad_grid[PART_ID_nRx2N_R] =
+ (au2_4x4_sad[3] + au2_4x4_sad[7] + au2_4x4_sad[15] + au2_4x4_sad[11]);
+ pi4_sad_grid[PART_ID_2NxnU_T] =
+ (au2_4x4_sad[1] + au2_4x4_sad[0] + au2_4x4_sad[2] + au2_4x4_sad[3]);
+ pi4_sad_grid[PART_ID_2NxnD_B] =
+ (au2_4x4_sad[15] + au2_4x4_sad[14] + au2_4x4_sad[12] + au2_4x4_sad[13]);
+ pi4_sad_grid[PART_ID_2Nx2N] =
+ pi4_sad_grid[PART_ID_2NxN_T] + pi4_sad_grid[PART_ID_2NxN_B];
+ pi4_sad_grid[PART_ID_2NxnU_B] =
+ pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_2NxnU_T];
+ pi4_sad_grid[PART_ID_2NxnD_T] =
+ pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_2NxnD_B];
+ pi4_sad_grid[PART_ID_nRx2N_L] =
+ pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_nRx2N_R];
+ pi4_sad_grid[PART_ID_nLx2N_R] =
+ pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_nLx2N_L];
+ }
+ }
+
+ {
+ S32 i4_sad, i4_mv_cost, i4_tot_cost;
+ S32 best_node_cost;
+ S32 second_best_node_cost;
+ ULWORD64 u8_temp_var, u8_temp_var1;
+ ULWORD64 u8_ref_X_Square, u8_pure_dist, u8_src_var, u8_ref_var;
+
+ {
+ S16 mvdx1, mvdy1;
+ S32 i4_search_idx = (S32)ps_result_prms->i1_ref_idx;
+ search_results_t *ps_search_results = ps_result_prms->ps_search_results;
+ S32 pred_lx = i4_search_idx;
+
+ pred_ctxt_t *ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
+ pred_candt_nodes_t *ps_pred_nodes = &ps_pred_ctxt->as_pred_nodes[PART_2Nx2N];
+ search_node_t *ps_pred_node_a = ps_pred_nodes->ps_mvp_node;
+
+ S32 inp_shift = 2;
+ S32 pred_shift = ps_pred_node_a->u1_subpel_done ? 0 : 2;
+ S32 lambda_q_shift = ps_pred_ctxt->lambda_q_shift;
+ S32 lambda = ps_pred_ctxt->lambda;
+ S32 rnd = 1 << (lambda_q_shift - 1);
+ S32 mv_p_x = ps_pred_node_a->s_mv.i2_mvx;
+ S32 mv_p_y = ps_pred_node_a->s_mv.i2_mvy;
+ S32 ref_bits =
+ ps_pred_ctxt
+ ->ppu1_ref_bits_tlu[ps_pred_ctxt->pred_lx][ps_search_node->i1_ref_idx];
+
+ COMPUTE_DIFF_MV(
+ mvdx1, mvdy1, ps_search_node, mv_p_x, mv_p_y, inp_shift, pred_shift);
+
+ mvdx1 = ABS(mvdx1);
+ mvdy1 = ABS(mvdy1);
+
+ i4_mv_cost = hme_get_range(mvdx1) + hme_get_range(mvdy1) + (mvdx1 > 0) +
+ (mvdy1 > 0) + ref_bits + 2;
+
+ i4_mv_cost *= lambda;
+ i4_mv_cost += rnd;
+ i4_mv_cost >>= lambda_q_shift;
+
+ i4_mv_cost = CLIP_U16(i4_mv_cost);
+ }
+
+ for(i4_count = 0; i4_count < ps_mv_refine_ctxt->i4_num_valid_parts; i4_count++)
+ {
+ S32 i4_stim_injected_sad;
+ S32 i4_stim_injected_cost;
+ S32 i4_noise_term;
+ unsigned long u4_shift_val;
+ S32 i4_bits_req;
+
+ S32 update_required = 0;
+ S32 part_id = pi4_valid_part_ids[i4_count];
+ S32 index = (ps_mv_refine_ctxt->i4_num_valid_parts > 8) ? part_id : i4_count;
+
+ WORD32 i4_q_level = STIM_Q_FORMAT + ALPHA_Q_FORMAT;
+
+ S32 i4_inv_wt = ps_wt_inp_prms->a_inv_wpred_wt[ps_search_node->i1_ref_idx];
+
+ if(ps_search_prms->i4_alpha_stim_multiplier)
+ {
+ /* Compute ref sigmaX and sigmaX_Squared values for valid partitions from previously computed ref 4x4 level values */
+ hme_compute_final_sigma_of_pu_from_base_blocks(
+ au4_4x4_ref_sigmaX,
+ au4_4x4_ref_sigmaXSquared,
+ au8_final_ref_sigmaX,
+ au8_final_ref_sigmaXSquared,
+ 16,
+ 4,
+ part_id,
+ 4);
+
+ u8_ref_X_Square =
+ (au8_final_ref_sigmaX[part_id] * au8_final_ref_sigmaX[part_id]);
+ u8_ref_var = (au8_final_ref_sigmaXSquared[part_id] - u8_ref_X_Square);
+
+ /* Multiply un-normalized src_var with inv_wt if its not same as default wt */
+ /* and shift the resulting src_var if its more than 27 bits to avoid overflow */
+ /* The amount by which it is shifted is passed on to u4_shift_val and applied equally on ref_var */
+ u4_shift_val = ihevce_calc_stim_injected_variance(
+ au8_final_src_sigmaX,
+ au8_final_src_sigmaXSquared,
+ &u8_src_var,
+ i4_inv_wt,
+ ps_wt_inp_prms->ai4_shift_val[ps_search_node->i1_ref_idx],
+ ps_wt_inp_prms->wpred_log_wdc,
+ part_id);
+
+ u8_ref_var = u8_ref_var >> u4_shift_val;
+
+ /* Do the same check on ref_var to avoid overflow and apply similar shift on src_var */
+ GETRANGE64(i4_bits_req, u8_ref_var);
+
+ if(i4_bits_req > 27)
+ {
+ u8_ref_var = u8_ref_var >> (i4_bits_req - 27);
+ u8_src_var = u8_src_var >> (i4_bits_req - 27);
+ }
+
+ if(u8_src_var == u8_ref_var)
+ {
+ u8_temp_var = (1 << STIM_Q_FORMAT);
+ }
+ else
+ {
+ u8_temp_var = (2 * u8_src_var * u8_ref_var);
+ u8_temp_var = (u8_temp_var * (1 << STIM_Q_FORMAT));
+ u8_temp_var1 = (u8_src_var * u8_src_var) + (u8_ref_var * u8_ref_var);
+ u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2));
+ u8_temp_var = (u8_temp_var / u8_temp_var1);
+ }
+
+ i4_noise_term = (UWORD32)u8_temp_var;
+
+ ASSERT(i4_noise_term >= 0);
+
+ i4_noise_term *= ps_search_prms->i4_alpha_stim_multiplier;
+ }
+ else
+ {
+ i4_noise_term = 0;
+ }
+ u8_pure_dist = pi4_sad_grid[part_id];
+ u8_pure_dist *= ((1 << (i4_q_level)) - (i4_noise_term));
+ u8_pure_dist += (1 << ((i4_q_level)-1));
+ i4_stim_injected_sad = (UWORD32)(u8_pure_dist >> (i4_q_level));
+
+ i4_sad = CLIP3(pi4_sad_grid[part_id], 0, 0x7fff);
+ i4_tot_cost = CLIP_S16(i4_sad + i4_mv_cost);
+ i4_stim_injected_sad = CLIP3(i4_stim_injected_sad, 0, 0x7fff);
+ i4_stim_injected_cost = CLIP_S16(i4_stim_injected_sad + i4_mv_cost);
+
+ best_node_cost = CLIP_S16(ps_mv_refine_ctxt->i2_stim_injected_cost[0][index]);
+ second_best_node_cost =
+ CLIP_S16(ps_mv_refine_ctxt->i2_stim_injected_cost[1][index]);
+
+ if(i4_stim_injected_cost < second_best_node_cost)
+ {
+ update_required = 2;
+
+ if(i4_stim_injected_cost < best_node_cost)
+ {
+ update_required = 1;
+ }
+ else if(i4_stim_injected_cost == best_node_cost)
+ {
+ update_required = 0;
+ }
+
+ if(update_required == 2)
+ {
+ ps_mv_refine_ctxt->i2_tot_cost[1][index] = i4_tot_cost;
+ ps_mv_refine_ctxt->i2_stim_injected_cost[1][index] = i4_stim_injected_cost;
+ ps_mv_refine_ctxt->i2_mv_cost[1][index] = i4_mv_cost;
+ ps_mv_refine_ctxt->i2_mv_x[1][index] = ps_search_node->s_mv.i2_mvx;
+ ps_mv_refine_ctxt->i2_mv_y[1][index] = ps_search_node->s_mv.i2_mvy;
+ ps_mv_refine_ctxt->i2_ref_idx[1][index] = ps_search_node->i1_ref_idx;
+ }
+ else if(update_required == 1)
+ {
+ ps_mv_refine_ctxt->i2_tot_cost[1][index] =
+ ps_mv_refine_ctxt->i2_tot_cost[0][index];
+ ps_mv_refine_ctxt->i2_stim_injected_cost[1][index] =
+ ps_mv_refine_ctxt->i2_stim_injected_cost[0][index];
+ ps_mv_refine_ctxt->i2_mv_cost[1][index] =
+ ps_mv_refine_ctxt->i2_mv_cost[0][index];
+ ps_mv_refine_ctxt->i2_mv_x[1][index] = ps_mv_refine_ctxt->i2_mv_x[0][index];
+ ps_mv_refine_ctxt->i2_mv_y[1][index] = ps_mv_refine_ctxt->i2_mv_y[0][index];
+ ps_mv_refine_ctxt->i2_ref_idx[1][index] =
+ ps_mv_refine_ctxt->i2_ref_idx[0][index];
+
+ ps_mv_refine_ctxt->i2_tot_cost[0][index] = i4_tot_cost;
+ ps_mv_refine_ctxt->i2_stim_injected_cost[0][index] = i4_stim_injected_cost;
+ ps_mv_refine_ctxt->i2_mv_cost[0][index] = i4_mv_cost;
+ ps_mv_refine_ctxt->i2_mv_x[0][index] = ps_search_node->s_mv.i2_mvx;
+ ps_mv_refine_ctxt->i2_mv_y[0][index] = ps_search_node->s_mv.i2_mvy;
+ ps_mv_refine_ctxt->i2_ref_idx[0][index] = ps_search_node->i1_ref_idx;
+ }
+ }
+ }
+ }
+
+ ps_search_node++;
+ }
+
+ {
+ WORD32 i4_i;
+ WORD32 part_id;
+ search_node_t *ps_search_node = ps_search_prms->ps_search_nodes;
+ for(i4_i = 0; i4_i < ps_mv_refine_ctxt->i4_num_valid_parts; i4_i++)
+ {
+ part_id = ps_mv_refine_ctxt->ai4_part_id[i4_i];
+ if(ps_mv_refine_ctxt->i2_stim_injected_cost[0][part_id] >= MAX_SIGNED_16BIT_VAL)
+ {
+ ASSERT(ps_mv_refine_ctxt->i2_mv_cost[0][part_id] == MAX_SIGNED_16BIT_VAL);
+ ASSERT(ps_mv_refine_ctxt->i2_mv_x[0][part_id] == 0);
+ ASSERT(ps_mv_refine_ctxt->i2_mv_y[0][part_id] == 0);
+
+ ps_mv_refine_ctxt->i2_ref_idx[0][part_id] = ps_search_node->i1_ref_idx;
+ }
+ if(ps_mv_refine_ctxt->i2_stim_injected_cost[1][part_id] >= MAX_SIGNED_16BIT_VAL)
+ {
+ ASSERT(ps_mv_refine_ctxt->i2_mv_cost[1][part_id] == MAX_SIGNED_16BIT_VAL);
+ ASSERT(ps_mv_refine_ctxt->i2_mv_x[1][part_id] == 0);
+ ASSERT(ps_mv_refine_ctxt->i2_mv_y[1][part_id] == 0);
+
+ ps_mv_refine_ctxt->i2_ref_idx[1][part_id] = ps_search_node->i1_ref_idx;
+ }
+ }
+ }
+}
+
+void hme_calc_sad_and_1_best_result(
+ hme_search_prms_t *ps_search_prms,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ err_prms_t *ps_err_prms,
+ result_upd_prms_t *ps_result_prms,
+ U08 **ppu1_ref,
+ S32 i4_ref_stride)
+{
+ S32 i4_candt;
+ S32 i4_inp_off;
+ S32 i4_ref_offset;
+ S32 i4_num_nodes;
+
+ S32 *pi4_sad_grid = ps_err_prms->pi4_sad_grid;
+ S32 cur_buf_stride = ps_err_prms->i4_inp_stride;
+ WORD32 ref_buf_stride = ps_err_prms->i4_ref_stride;
+ WORD32 cur_buf_stride_ls2 = (cur_buf_stride << 2);
+ WORD32 ref_buf_stride_ls2 = (ref_buf_stride << 2);
+
+ mv_refine_ctxt_t *ps_mv_refine_ctxt;
+ search_node_t *ps_search_node;
+
+ ps_mv_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt;
+ i4_num_nodes = ps_search_prms->i4_num_search_nodes;
+ i4_inp_off = ps_search_prms->i4_cu_x_off;
+ i4_inp_off += ps_search_prms->i4_cu_y_off * cur_buf_stride;
+ i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
+ ps_search_node = ps_search_prms->ps_search_nodes;
+
+ for(i4_candt = 0; i4_candt < i4_num_nodes; i4_candt++)
+ {
+ /**********************************************************************/
+ /* CALL THE FUNCTION THAT COMPUTES THE SAD AND UPDATES THE SAD GRID */
+ /**********************************************************************/
+ {
+ WORD32 b, c, d;
+ UWORD8 *pu1_cur_ptr;
+ UWORD8 *pu1_ref_ptr;
+ UWORD16 au2_4x4_sad[NUM_4X4];
+
+ if(ps_search_node->s_mv.i2_mvx == INTRA_MV)
+ {
+ continue;
+ }
+
+ ps_err_prms->pu1_inp =
+ ps_wt_inp_prms->apu1_wt_inp[ps_search_node->i1_ref_idx] + i4_inp_off;
+ ps_err_prms->pu1_ref = ppu1_ref[ps_search_node->i1_ref_idx] + i4_ref_offset;
+ ps_err_prms->pu1_ref += ps_search_node->s_mv.i2_mvx;
+ ps_err_prms->pu1_ref += (ps_search_node->s_mv.i2_mvy * i4_ref_stride);
+
+ pu1_cur_ptr = ps_err_prms->pu1_inp;
+ pu1_ref_ptr = &ps_err_prms->pu1_ref[0];
+
+ /* Loop to compute the SAD's */
+ {
+ memset(&au2_4x4_sad[0], 0, NUM_4X4 * sizeof(UWORD16));
+ for(b = 0; b < NUM_4X4; b++)
+ {
+ WORD32 t1 = (b % 4) * NUM_PIXELS_IN_ROW + (b >> 2) * cur_buf_stride_ls2;
+ WORD32 t2 = (b % 4) * NUM_PIXELS_IN_ROW + (b >> 2) * ref_buf_stride_ls2;
+
+ for(c = 0; c < NUM_ROWS_IN_4X4; c++)
+ {
+ WORD32 z_cur = (cur_buf_stride)*c + t1;
+ WORD32 z_ref = (ref_buf_stride)*c + t2;
+ for(d = 0; d < NUM_PIXELS_IN_ROW; d++)
+ {
+ au2_4x4_sad[b] += (UWORD16)ABS((
+ ((S32)pu1_ref_ptr[(z_ref + d)]) - ((S32)pu1_cur_ptr[(z_cur + d)])));
+ }
+ }
+ }
+
+ pi4_sad_grid[PART_ID_NxN_TL] =
+ (au2_4x4_sad[0] + au2_4x4_sad[1] + au2_4x4_sad[4] + au2_4x4_sad[5]);
+ pi4_sad_grid[PART_ID_NxN_TR] =
+ (au2_4x4_sad[2] + au2_4x4_sad[3] + au2_4x4_sad[6] + au2_4x4_sad[7]);
+ pi4_sad_grid[PART_ID_NxN_BL] =
+ (au2_4x4_sad[8] + au2_4x4_sad[9] + au2_4x4_sad[12] + au2_4x4_sad[13]);
+ pi4_sad_grid[PART_ID_NxN_BR] =
+ (au2_4x4_sad[10] + au2_4x4_sad[11] + au2_4x4_sad[14] + au2_4x4_sad[15]);
+ pi4_sad_grid[PART_ID_Nx2N_L] =
+ pi4_sad_grid[PART_ID_NxN_TL] + pi4_sad_grid[PART_ID_NxN_BL];
+ pi4_sad_grid[PART_ID_Nx2N_R] =
+ pi4_sad_grid[PART_ID_NxN_TR] + pi4_sad_grid[PART_ID_NxN_BR];
+ pi4_sad_grid[PART_ID_2NxN_T] =
+ pi4_sad_grid[PART_ID_NxN_TR] + pi4_sad_grid[PART_ID_NxN_TL];
+ pi4_sad_grid[PART_ID_2NxN_B] =
+ pi4_sad_grid[PART_ID_NxN_BR] + pi4_sad_grid[PART_ID_NxN_BL];
+ pi4_sad_grid[PART_ID_nLx2N_L] =
+ (au2_4x4_sad[8] + au2_4x4_sad[0] + au2_4x4_sad[12] + au2_4x4_sad[4]);
+ pi4_sad_grid[PART_ID_nRx2N_R] =
+ (au2_4x4_sad[3] + au2_4x4_sad[7] + au2_4x4_sad[15] + au2_4x4_sad[11]);
+ pi4_sad_grid[PART_ID_2NxnU_T] =
+ (au2_4x4_sad[1] + au2_4x4_sad[0] + au2_4x4_sad[2] + au2_4x4_sad[3]);
+ pi4_sad_grid[PART_ID_2NxnD_B] =
+ (au2_4x4_sad[15] + au2_4x4_sad[14] + au2_4x4_sad[12] + au2_4x4_sad[13]);
+ pi4_sad_grid[PART_ID_2Nx2N] =
+ pi4_sad_grid[PART_ID_2NxN_T] + pi4_sad_grid[PART_ID_2NxN_B];
+ pi4_sad_grid[PART_ID_2NxnU_B] =
+ pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_2NxnU_T];
+ pi4_sad_grid[PART_ID_2NxnD_T] =
+ pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_2NxnD_B];
+ pi4_sad_grid[PART_ID_nRx2N_L] =
+ pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_nRx2N_R];
+ pi4_sad_grid[PART_ID_nLx2N_R] =
+ pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_nLx2N_L];
+ }
+ }
+
+ {
+ S32 i4_count = 0, i4_sad, i4_mv_cost, i4_tot_cost;
+ S32 *pi4_valid_part_ids = &ps_mv_refine_ctxt->ai4_part_id[0];
+ S32 best_node_cost;
+ S32 second_best_node_cost;
+
+ {
+ S16 mvdx1, mvdy1;
+ S32 i4_search_idx = (S32)ps_result_prms->i1_ref_idx;
+ search_results_t *ps_search_results = ps_result_prms->ps_search_results;
+ S32 pred_lx = i4_search_idx;
+
+ pred_ctxt_t *ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
+ pred_candt_nodes_t *ps_pred_nodes = &ps_pred_ctxt->as_pred_nodes[PART_2Nx2N];
+ search_node_t *ps_pred_node_a = ps_pred_nodes->ps_mvp_node;
+
+ S32 inp_shift = 2;
+ S32 pred_shift = ps_pred_node_a->u1_subpel_done ? 0 : 2;
+ S32 lambda_q_shift = ps_pred_ctxt->lambda_q_shift;
+ S32 lambda = ps_pred_ctxt->lambda;
+ S32 rnd = 1 << (lambda_q_shift - 1);
+ S32 mv_p_x = ps_pred_node_a->s_mv.i2_mvx;
+ S32 mv_p_y = ps_pred_node_a->s_mv.i2_mvy;
+ S32 ref_bits =
+ ps_pred_ctxt
+ ->ppu1_ref_bits_tlu[ps_pred_ctxt->pred_lx][ps_search_node->i1_ref_idx];
+
+ COMPUTE_DIFF_MV(
+ mvdx1, mvdy1, ps_search_node, mv_p_x, mv_p_y, inp_shift, pred_shift);
+
+ mvdx1 = ABS(mvdx1);
+ mvdy1 = ABS(mvdy1);
+
+ i4_mv_cost = hme_get_range(mvdx1) + hme_get_range(mvdy1) + (mvdx1 > 0) +
+ (mvdy1 > 0) + ref_bits + 2;
+
+ i4_mv_cost *= lambda;
+ i4_mv_cost += rnd;
+ i4_mv_cost >>= lambda_q_shift;
+
+ i4_mv_cost = CLIP_U16(i4_mv_cost);
+ }
+
+ /*For each valid partition, update the refine_prm structure to reflect the best and second
+ best candidates for that partition*/
+
+ for(i4_count = 0; i4_count < ps_mv_refine_ctxt->i4_num_valid_parts; i4_count++)
+ {
+ S32 update_required = 0;
+ S32 part_id = pi4_valid_part_ids[i4_count];
+ S32 index = (ps_mv_refine_ctxt->i4_num_valid_parts > 8) ? part_id : i4_count;
+
+ /*Calculate total cost*/
+ i4_sad = CLIP3(pi4_sad_grid[part_id], 0, 0x7fff);
+ i4_tot_cost = CLIP_S16(i4_sad + i4_mv_cost);
+
+ /*****************************************************************/
+ /* We do not labor through the results if the total cost worse */
+ /* than the last of the results. */
+ /*****************************************************************/
+ best_node_cost = CLIP_S16(ps_mv_refine_ctxt->i2_tot_cost[0][index]);
+ second_best_node_cost = SHRT_MAX;
+
+ if(i4_tot_cost < second_best_node_cost)
+ {
+ update_required = 0;
+
+ /*************************************************************/
+ /* Identify where the current result isto be placed.Basically*/
+ /* find the node which has cost just higher thannodeundertest*/
+ /*************************************************************/
+ if(i4_tot_cost < best_node_cost)
+ {
+ update_required = 1;
+ }
+ else if(i4_tot_cost == best_node_cost)
+ {
+ update_required = 0;
+ }
+
+ if(update_required == 2)
+ {
+ ps_mv_refine_ctxt->i2_tot_cost[1][index] = i4_tot_cost;
+ ps_mv_refine_ctxt->i2_mv_cost[1][index] = i4_mv_cost;
+ ps_mv_refine_ctxt->i2_mv_x[1][index] = ps_search_node->s_mv.i2_mvx;
+ ps_mv_refine_ctxt->i2_mv_y[1][index] = ps_search_node->s_mv.i2_mvy;
+ ps_mv_refine_ctxt->i2_ref_idx[1][index] = ps_search_node->i1_ref_idx;
+ }
+ else if(update_required == 1)
+ {
+ ps_mv_refine_ctxt->i2_tot_cost[0][index] = i4_tot_cost;
+ ps_mv_refine_ctxt->i2_mv_cost[0][index] = i4_mv_cost;
+ ps_mv_refine_ctxt->i2_mv_x[0][index] = ps_search_node->s_mv.i2_mvx;
+ ps_mv_refine_ctxt->i2_mv_y[0][index] = ps_search_node->s_mv.i2_mvy;
+ ps_mv_refine_ctxt->i2_ref_idx[0][index] = ps_search_node->i1_ref_idx;
+ }
+ }
+ }
+ }
+ ps_search_node++;
+ }
+
+ {
+ WORD32 i4_i;
+ WORD32 part_id;
+ search_node_t *ps_search_node = ps_search_prms->ps_search_nodes;
+ for(i4_i = 0; i4_i < ps_mv_refine_ctxt->i4_num_valid_parts; i4_i++)
+ {
+ part_id = ps_mv_refine_ctxt->ai4_part_id[i4_i];
+ if(ps_mv_refine_ctxt->i2_tot_cost[0][part_id] >= MAX_SIGNED_16BIT_VAL)
+ {
+ ASSERT(ps_mv_refine_ctxt->i2_mv_cost[0][part_id] == MAX_SIGNED_16BIT_VAL);
+ ASSERT(ps_mv_refine_ctxt->i2_mv_x[0][part_id] == 0);
+ ASSERT(ps_mv_refine_ctxt->i2_mv_y[0][part_id] == 0);
+
+ ps_mv_refine_ctxt->i2_ref_idx[0][part_id] = ps_search_node->i1_ref_idx;
+ }
+ }
+ }
+}
+
+void hme_calc_stim_injected_sad_and_1_best_result(
+ hme_search_prms_t *ps_search_prms,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ err_prms_t *ps_err_prms,
+ result_upd_prms_t *ps_result_prms,
+ U08 **ppu1_ref,
+ S32 i4_ref_stride)
+{
+ mv_refine_ctxt_t *ps_mv_refine_ctxt;
+ search_node_t *ps_search_node;
+
+ S32 i4_candt;
+ S32 i4_count;
+ S32 i4_inp_off;
+ S32 i4_ref_offset;
+ S32 i4_num_nodes;
+ ULWORD64 *au8_final_src_sigmaX, *au8_final_src_sigmaXSquared, au8_final_ref_sigmaX[17],
+ au8_final_ref_sigmaXSquared[17];
+ UWORD32 au4_4x4_ref_sigmaX[NUM_4X4], au4_4x4_ref_sigmaXSquared[NUM_4X4];
+ S32 *pi4_valid_part_ids;
+
+ S32 *pi4_sad_grid = ps_err_prms->pi4_sad_grid;
+ S32 cur_buf_stride = ps_err_prms->i4_inp_stride;
+ WORD32 ref_buf_stride = ps_err_prms->i4_ref_stride;
+ WORD32 cur_buf_stride_ls2 = (cur_buf_stride << 2);
+ WORD32 ref_buf_stride_ls2 = (ref_buf_stride << 2);
+
+ ps_mv_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt;
+ i4_num_nodes = ps_search_prms->i4_num_search_nodes;
+ i4_inp_off = ps_search_prms->i4_cu_x_off;
+ i4_inp_off += ps_search_prms->i4_cu_y_off * cur_buf_stride;
+ i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
+ ps_search_node = ps_search_prms->ps_search_nodes;
+ pi4_valid_part_ids = &ps_mv_refine_ctxt->ai4_part_id[0];
+
+ /* Set local pointer to point to partition level sigma values calculated in hme_refine */
+ au8_final_src_sigmaX = ps_search_prms->pu8_part_src_sigmaX;
+ au8_final_src_sigmaXSquared = ps_search_prms->pu8_part_src_sigmaXSquared;
+
+ for(i4_candt = 0; i4_candt < i4_num_nodes; i4_candt++)
+ {
+ {
+ WORD32 b, c, d;
+ UWORD8 *pu1_cur_ptr;
+ UWORD8 *pu1_ref_ptr;
+ UWORD16 au2_4x4_sad[NUM_4X4];
+
+ if(ps_search_node->s_mv.i2_mvx == INTRA_MV)
+ {
+ continue;
+ }
+
+ ps_err_prms->pu1_inp =
+ ps_wt_inp_prms->apu1_wt_inp[ps_search_node->i1_ref_idx] + i4_inp_off;
+ ps_err_prms->pu1_ref = ppu1_ref[ps_search_node->i1_ref_idx] + i4_ref_offset;
+ ps_err_prms->pu1_ref += ps_search_node->s_mv.i2_mvx;
+ ps_err_prms->pu1_ref += (ps_search_node->s_mv.i2_mvy * i4_ref_stride);
+
+ pu1_cur_ptr = ps_err_prms->pu1_inp;
+ pu1_ref_ptr = &ps_err_prms->pu1_ref[0];
+
+ /* Loop to compute the SAD's */
+ {
+ memset(&au2_4x4_sad[0], 0, NUM_4X4 * sizeof(UWORD16));
+ for(b = 0; b < NUM_4X4; b++)
+ {
+ WORD32 t1 = (b % 4) * NUM_PIXELS_IN_ROW + (b >> 2) * cur_buf_stride_ls2;
+ WORD32 t2 = (b % 4) * NUM_PIXELS_IN_ROW + (b >> 2) * ref_buf_stride_ls2;
+
+ for(c = 0; c < NUM_ROWS_IN_4X4; c++)
+ {
+ WORD32 z_cur = (cur_buf_stride)*c + t1;
+ WORD32 z_ref = (ref_buf_stride)*c + t2;
+ for(d = 0; d < NUM_PIXELS_IN_ROW; d++)
+ {
+ au2_4x4_sad[b] += (UWORD16)ABS((
+ ((S32)pu1_ref_ptr[(z_ref + d)]) - ((S32)pu1_cur_ptr[(z_cur + d)])));
+ }
+ }
+ }
+
+ /* Compute sigmaX and sigmaX_Squared at 4x4 level for ref from ref_ptr */
+ hme_compute_sigmaX_and_sigmaXSquared(
+ pu1_ref_ptr,
+ ref_buf_stride,
+ au4_4x4_ref_sigmaX,
+ au4_4x4_ref_sigmaXSquared,
+ 4,
+ 4,
+ 16,
+ 16,
+ 1,
+ 4);
+
+ pi4_sad_grid[PART_ID_NxN_TL] =
+ (au2_4x4_sad[0] + au2_4x4_sad[1] + au2_4x4_sad[4] + au2_4x4_sad[5]);
+ pi4_sad_grid[PART_ID_NxN_TR] =
+ (au2_4x4_sad[2] + au2_4x4_sad[3] + au2_4x4_sad[6] + au2_4x4_sad[7]);
+ pi4_sad_grid[PART_ID_NxN_BL] =
+ (au2_4x4_sad[8] + au2_4x4_sad[9] + au2_4x4_sad[12] + au2_4x4_sad[13]);
+ pi4_sad_grid[PART_ID_NxN_BR] =
+ (au2_4x4_sad[10] + au2_4x4_sad[11] + au2_4x4_sad[14] + au2_4x4_sad[15]);
+ pi4_sad_grid[PART_ID_Nx2N_L] =
+ pi4_sad_grid[PART_ID_NxN_TL] + pi4_sad_grid[PART_ID_NxN_BL];
+ pi4_sad_grid[PART_ID_Nx2N_R] =
+ pi4_sad_grid[PART_ID_NxN_TR] + pi4_sad_grid[PART_ID_NxN_BR];
+ pi4_sad_grid[PART_ID_2NxN_T] =
+ pi4_sad_grid[PART_ID_NxN_TR] + pi4_sad_grid[PART_ID_NxN_TL];
+ pi4_sad_grid[PART_ID_2NxN_B] =
+ pi4_sad_grid[PART_ID_NxN_BR] + pi4_sad_grid[PART_ID_NxN_BL];
+ pi4_sad_grid[PART_ID_nLx2N_L] =
+ (au2_4x4_sad[8] + au2_4x4_sad[0] + au2_4x4_sad[12] + au2_4x4_sad[4]);
+ pi4_sad_grid[PART_ID_nRx2N_R] =
+ (au2_4x4_sad[3] + au2_4x4_sad[7] + au2_4x4_sad[15] + au2_4x4_sad[11]);
+ pi4_sad_grid[PART_ID_2NxnU_T] =
+ (au2_4x4_sad[1] + au2_4x4_sad[0] + au2_4x4_sad[2] + au2_4x4_sad[3]);
+ pi4_sad_grid[PART_ID_2NxnD_B] =
+ (au2_4x4_sad[15] + au2_4x4_sad[14] + au2_4x4_sad[12] + au2_4x4_sad[13]);
+ pi4_sad_grid[PART_ID_2Nx2N] =
+ pi4_sad_grid[PART_ID_2NxN_T] + pi4_sad_grid[PART_ID_2NxN_B];
+ pi4_sad_grid[PART_ID_2NxnU_B] =
+ pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_2NxnU_T];
+ pi4_sad_grid[PART_ID_2NxnD_T] =
+ pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_2NxnD_B];
+ pi4_sad_grid[PART_ID_nRx2N_L] =
+ pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_nRx2N_R];
+ pi4_sad_grid[PART_ID_nLx2N_R] =
+ pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_nLx2N_L];
+ }
+ }
+
+ {
+ S32 i4_sad, i4_mv_cost, i4_tot_cost;
+ S32 best_node_cost;
+ S32 second_best_node_cost;
+ ULWORD64 u8_temp_var, u8_temp_var1;
+ ULWORD64 u8_ref_X_Square, u8_pure_dist, u8_src_var, u8_ref_var;
+
+ {
+ S16 mvdx1, mvdy1;
+ S32 i4_search_idx = (S32)ps_result_prms->i1_ref_idx;
+ search_results_t *ps_search_results = ps_result_prms->ps_search_results;
+ S32 pred_lx = i4_search_idx;
+
+ pred_ctxt_t *ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
+ pred_candt_nodes_t *ps_pred_nodes = &ps_pred_ctxt->as_pred_nodes[PART_2Nx2N];
+ search_node_t *ps_pred_node_a = ps_pred_nodes->ps_mvp_node;
+
+ S32 inp_shift = 2;
+ S32 pred_shift = ps_pred_node_a->u1_subpel_done ? 0 : 2;
+ S32 lambda_q_shift = ps_pred_ctxt->lambda_q_shift;
+ S32 lambda = ps_pred_ctxt->lambda;
+ S32 rnd = 1 << (lambda_q_shift - 1);
+ S32 mv_p_x = ps_pred_node_a->s_mv.i2_mvx;
+ S32 mv_p_y = ps_pred_node_a->s_mv.i2_mvy;
+ S32 ref_bits =
+ ps_pred_ctxt
+ ->ppu1_ref_bits_tlu[ps_pred_ctxt->pred_lx][ps_search_node->i1_ref_idx];
+
+ COMPUTE_DIFF_MV(
+ mvdx1, mvdy1, ps_search_node, mv_p_x, mv_p_y, inp_shift, pred_shift);
+
+ mvdx1 = ABS(mvdx1);
+ mvdy1 = ABS(mvdy1);
+
+ i4_mv_cost = hme_get_range(mvdx1) + hme_get_range(mvdy1) + (mvdx1 > 0) +
+ (mvdy1 > 0) + ref_bits + 2;
+
+ i4_mv_cost *= lambda;
+ i4_mv_cost += rnd;
+ i4_mv_cost >>= lambda_q_shift;
+
+ i4_mv_cost = CLIP_U16(i4_mv_cost);
+ }
+
+ for(i4_count = 0; i4_count < ps_mv_refine_ctxt->i4_num_valid_parts; i4_count++)
+ {
+ S32 i4_stim_injected_sad;
+ S32 i4_stim_injected_cost;
+ S32 i4_noise_term;
+ unsigned long u4_shift_val;
+ S32 i4_bits_req;
+
+ S32 update_required = 0;
+ S32 part_id = pi4_valid_part_ids[i4_count];
+ S32 index = (ps_mv_refine_ctxt->i4_num_valid_parts > 8) ? part_id : i4_count;
+
+ WORD32 i4_q_level = STIM_Q_FORMAT + ALPHA_Q_FORMAT;
+
+ S32 i4_inv_wt = ps_wt_inp_prms->a_inv_wpred_wt[ps_search_node->i1_ref_idx];
+
+ if(ps_search_prms->i4_alpha_stim_multiplier)
+ {
+ /* Compute ref sigmaX and sigmaX_Squared values for valid partitions from previously computed ref 4x4 level values */
+ hme_compute_final_sigma_of_pu_from_base_blocks(
+ au4_4x4_ref_sigmaX,
+ au4_4x4_ref_sigmaXSquared,
+ au8_final_ref_sigmaX,
+ au8_final_ref_sigmaXSquared,
+ 16,
+ 4,
+ part_id,
+ 4);
+
+ u8_ref_X_Square =
+ (au8_final_ref_sigmaX[part_id] * au8_final_ref_sigmaX[part_id]);
+ u8_ref_var = (au8_final_ref_sigmaXSquared[part_id] - u8_ref_X_Square);
+
+ /* Multiply un-normalized src_var with inv_wt if its not same as default wt */
+ /* and shift the resulting src_var if its more than 27 bits to avoid overflow */
+ /* The amount by which it is shifted is passed on to u4_shift_val and applied equally on ref_var */
+ u4_shift_val = ihevce_calc_stim_injected_variance(
+ au8_final_src_sigmaX,
+ au8_final_src_sigmaXSquared,
+ &u8_src_var,
+ i4_inv_wt,
+ ps_wt_inp_prms->ai4_shift_val[ps_search_node->i1_ref_idx],
+ ps_wt_inp_prms->wpred_log_wdc,
+ part_id);
+
+ u8_ref_var = u8_ref_var >> u4_shift_val;
+
+ /* Do the same check on ref_var to avoid overflow and apply similar shift on src_var */
+ GETRANGE64(i4_bits_req, u8_ref_var);
+
+ if(i4_bits_req > 27)
+ {
+ u8_ref_var = u8_ref_var >> (i4_bits_req - 27);
+ u8_src_var = u8_src_var >> (i4_bits_req - 27);
+ }
+
+ if(u8_src_var == u8_ref_var)
+ {
+ u8_temp_var = (1 << STIM_Q_FORMAT);
+ }
+ else
+ {
+ u8_temp_var = (2 * u8_src_var * u8_ref_var);
+ u8_temp_var = (u8_temp_var * (1 << STIM_Q_FORMAT));
+ u8_temp_var1 = (u8_src_var * u8_src_var) + (u8_ref_var * u8_ref_var);
+ u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2));
+ u8_temp_var = (u8_temp_var / u8_temp_var1);
+ }
+
+ i4_noise_term = (UWORD32)u8_temp_var;
+
+ ASSERT(i4_noise_term >= 0);
+
+ i4_noise_term *= ps_search_prms->i4_alpha_stim_multiplier;
+ }
+ else
+ {
+ i4_noise_term = 0;
+ }
+ u8_pure_dist = pi4_sad_grid[part_id];
+ u8_pure_dist *= ((1 << (i4_q_level)) - (i4_noise_term));
+ u8_pure_dist += (1 << ((i4_q_level)-1));
+ i4_stim_injected_sad = (UWORD32)(u8_pure_dist >> (i4_q_level));
+
+ i4_sad = CLIP3(pi4_sad_grid[part_id], 0, 0x7fff);
+ i4_tot_cost = CLIP_S16(i4_sad + i4_mv_cost);
+ i4_stim_injected_sad = CLIP3(i4_stim_injected_sad, 0, 0x7fff);
+ i4_stim_injected_cost = CLIP_S16(i4_stim_injected_sad + i4_mv_cost);
+
+ best_node_cost = CLIP_S16(ps_mv_refine_ctxt->i2_stim_injected_cost[0][index]);
+ second_best_node_cost = SHRT_MAX;
+
+ if(i4_stim_injected_cost < second_best_node_cost)
+ {
+ update_required = 0;
+
+ if(i4_stim_injected_cost < best_node_cost)
+ {
+ update_required = 1;
+ }
+ else if(i4_stim_injected_cost == best_node_cost)
+ {
+ update_required = 0;
+ }
+
+ if(update_required == 2)
+ {
+ ps_mv_refine_ctxt->i2_tot_cost[1][index] = i4_tot_cost;
+ ps_mv_refine_ctxt->i2_stim_injected_cost[1][index] = i4_stim_injected_cost;
+ ps_mv_refine_ctxt->i2_mv_cost[1][index] = i4_mv_cost;
+ ps_mv_refine_ctxt->i2_mv_x[1][index] = ps_search_node->s_mv.i2_mvx;
+ ps_mv_refine_ctxt->i2_mv_y[1][index] = ps_search_node->s_mv.i2_mvy;
+ ps_mv_refine_ctxt->i2_ref_idx[1][index] = ps_search_node->i1_ref_idx;
+ }
+ else if(update_required == 1)
+ {
+ ps_mv_refine_ctxt->i2_tot_cost[0][index] = i4_tot_cost;
+ ps_mv_refine_ctxt->i2_stim_injected_cost[0][index] = i4_stim_injected_cost;
+ ps_mv_refine_ctxt->i2_mv_cost[0][index] = i4_mv_cost;
+ ps_mv_refine_ctxt->i2_mv_x[0][index] = ps_search_node->s_mv.i2_mvx;
+ ps_mv_refine_ctxt->i2_mv_y[0][index] = ps_search_node->s_mv.i2_mvy;
+ ps_mv_refine_ctxt->i2_ref_idx[0][index] = ps_search_node->i1_ref_idx;
+ }
+ }
+ }
+ }
+
+ ps_search_node++;
+ }
+
+ {
+ WORD32 i4_i;
+ WORD32 part_id;
+ search_node_t *ps_search_node = ps_search_prms->ps_search_nodes;
+ for(i4_i = 0; i4_i < ps_mv_refine_ctxt->i4_num_valid_parts; i4_i++)
+ {
+ part_id = ps_mv_refine_ctxt->ai4_part_id[i4_i];
+ if(ps_mv_refine_ctxt->i2_stim_injected_cost[0][part_id] >= MAX_SIGNED_16BIT_VAL)
+ {
+ ASSERT(ps_mv_refine_ctxt->i2_mv_cost[0][part_id] == MAX_SIGNED_16BIT_VAL);
+ ASSERT(ps_mv_refine_ctxt->i2_mv_x[0][part_id] == 0);
+ ASSERT(ps_mv_refine_ctxt->i2_mv_y[0][part_id] == 0);
+
+ ps_mv_refine_ctxt->i2_ref_idx[0][part_id] = ps_search_node->i1_ref_idx;
+ }
+ }
+ }
+}
+
+void hme_calc_sad_and_1_best_result_subpel(
+ err_prms_t *ps_err_prms, result_upd_prms_t *ps_result_prms)
+{
+ S32 i4_candt;
+ S32 i4_num_nodes;
+
+ S32 *pi4_sad_grid = ps_err_prms->pi4_sad_grid;
+
+ S32 cur_buf_stride = ps_err_prms->i4_inp_stride;
+ WORD32 ref_buf_stride = ps_err_prms->i4_ref_stride;
+ WORD32 cur_buf_stride_ls2 = (cur_buf_stride << 2);
+ WORD32 ref_buf_stride_ls2 = (ref_buf_stride << 2);
+
+ mv_refine_ctxt_t *ps_subpel_refine_ctxt;
+ ps_subpel_refine_ctxt = ps_result_prms->ps_subpel_refine_ctxt;
+ i4_num_nodes = 1;
+
+ /* Run through each of the candts in a loop */
+ for(i4_candt = 0; i4_candt < i4_num_nodes; i4_candt++)
+ {
+ /**********************************************************************/
+ /* CALL THE FUNCTION THAT COMPUTES THE SAD AND UPDATES THE SAD GRID */
+ /**********************************************************************/
+ {
+ WORD32 b, c, d;
+ UWORD8 *pu1_cur_ptr;
+ UWORD8 *pu1_ref_ptr;
+ UWORD16 au2_4x4_sad[NUM_4X4];
+
+ pu1_cur_ptr = ps_err_prms->pu1_inp;
+ pu1_ref_ptr = &ps_err_prms->pu1_ref[0];
+
+ /* Loop to compute the SAD's */
+ {
+ memset(&au2_4x4_sad[0], 0, NUM_4X4 * sizeof(UWORD16));
+ for(b = 0; b < NUM_4X4; b++)
+ {
+ WORD32 t1 = (b % 4) * NUM_PIXELS_IN_ROW + (b >> 2) * cur_buf_stride_ls2;
+ WORD32 t2 = (b % 4) * NUM_PIXELS_IN_ROW + (b >> 2) * ref_buf_stride_ls2;
+
+ for(c = 0; c < NUM_ROWS_IN_4X4; c++)
+ {
+ WORD32 z_cur = (cur_buf_stride)*c + t1;
+ WORD32 z_ref = (ref_buf_stride)*c + t2;
+ for(d = 0; d < NUM_PIXELS_IN_ROW; d++)
+ {
+ au2_4x4_sad[b] += (UWORD16)ABS((
+ ((S32)pu1_ref_ptr[(z_ref + d)]) - ((S32)pu1_cur_ptr[(z_cur + d)])));
+ }
+ }
+ }
+
+ pi4_sad_grid[PART_ID_NxN_TL] =
+ (au2_4x4_sad[0] + au2_4x4_sad[1] + au2_4x4_sad[4] + au2_4x4_sad[5]);
+ pi4_sad_grid[PART_ID_NxN_TR] =
+ (au2_4x4_sad[2] + au2_4x4_sad[3] + au2_4x4_sad[6] + au2_4x4_sad[7]);
+ pi4_sad_grid[PART_ID_NxN_BL] =
+ (au2_4x4_sad[8] + au2_4x4_sad[9] + au2_4x4_sad[12] + au2_4x4_sad[13]);
+ pi4_sad_grid[PART_ID_NxN_BR] =
+ (au2_4x4_sad[10] + au2_4x4_sad[11] + au2_4x4_sad[14] + au2_4x4_sad[15]);
+ pi4_sad_grid[PART_ID_Nx2N_L] =
+ pi4_sad_grid[PART_ID_NxN_TL] + pi4_sad_grid[PART_ID_NxN_BL];
+ pi4_sad_grid[PART_ID_Nx2N_R] =
+ pi4_sad_grid[PART_ID_NxN_TR] + pi4_sad_grid[PART_ID_NxN_BR];
+ pi4_sad_grid[PART_ID_2NxN_T] =
+ pi4_sad_grid[PART_ID_NxN_TR] + pi4_sad_grid[PART_ID_NxN_TL];
+ pi4_sad_grid[PART_ID_2NxN_B] =
+ pi4_sad_grid[PART_ID_NxN_BR] + pi4_sad_grid[PART_ID_NxN_BL];
+ pi4_sad_grid[PART_ID_nLx2N_L] =
+ (au2_4x4_sad[8] + au2_4x4_sad[0] + au2_4x4_sad[12] + au2_4x4_sad[4]);
+ pi4_sad_grid[PART_ID_nRx2N_R] =
+ (au2_4x4_sad[3] + au2_4x4_sad[7] + au2_4x4_sad[15] + au2_4x4_sad[11]);
+ pi4_sad_grid[PART_ID_2NxnU_T] =
+ (au2_4x4_sad[1] + au2_4x4_sad[0] + au2_4x4_sad[2] + au2_4x4_sad[3]);
+ pi4_sad_grid[PART_ID_2NxnD_B] =
+ (au2_4x4_sad[15] + au2_4x4_sad[14] + au2_4x4_sad[12] + au2_4x4_sad[13]);
+ pi4_sad_grid[PART_ID_2Nx2N] =
+ pi4_sad_grid[PART_ID_2NxN_T] + pi4_sad_grid[PART_ID_2NxN_B];
+ pi4_sad_grid[PART_ID_2NxnU_B] =
+ pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_2NxnU_T];
+ pi4_sad_grid[PART_ID_2NxnD_T] =
+ pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_2NxnD_B];
+ pi4_sad_grid[PART_ID_nRx2N_L] =
+ pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_nRx2N_R];
+ pi4_sad_grid[PART_ID_nLx2N_R] =
+ pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_nLx2N_L];
+ }
+ }
+ /**********************************************************************/
+ /* CALL THE FUNCTION THAT COMPUTES UPDATES THE BEST RESULTS */
+ /**********************************************************************/
+ {
+ S32 i4_count = 0, i4_sad, i4_mv_cost, i4_tot_cost;
+ S32 *pi4_valid_part_ids = &ps_subpel_refine_ctxt->ai4_part_id[0];
+ S32 best_node_cost;
+ S32 second_best_node_cost;
+
+ /*For each valid partition, update the refine_prm structure to reflect the best and second
+ best candidates for that partition*/
+
+ for(i4_count = 0; i4_count < ps_subpel_refine_ctxt->i4_num_valid_parts; i4_count++)
+ {
+ S32 update_required = 0;
+ S32 part_id = pi4_valid_part_ids[i4_count];
+ S32 index = (ps_subpel_refine_ctxt->i4_num_valid_parts > 8) ? part_id : i4_count;
+
+ /* Use a pre-computed cost instead of freshly evaluating subpel cost */
+ i4_mv_cost = ps_subpel_refine_ctxt->i2_mv_cost[0][index];
+
+ /*Calculate total cost*/
+ i4_sad = CLIP3(pi4_sad_grid[part_id], 0, 0x7fff);
+ i4_tot_cost = CLIP_S16(i4_sad + i4_mv_cost);
+
+ /*****************************************************************/
+ /* We do not labor through the results if the total cost worse */
+ /* than the last of the results. */
+ /*****************************************************************/
+ best_node_cost = CLIP_S16(ps_subpel_refine_ctxt->i2_tot_cost[0][index]);
+ second_best_node_cost = SHRT_MAX;
+
+ if(i4_tot_cost < second_best_node_cost)
+ {
+ update_required = 0;
+
+ /*************************************************************/
+ /* Identify where the current result isto be placed.Basically*/
+ /* find the node which has cost just higher thannodeundertest*/
+ /*************************************************************/
+ if(i4_tot_cost < best_node_cost)
+ {
+ update_required = 1;
+ }
+ else if(i4_tot_cost == ps_subpel_refine_ctxt->i2_tot_cost[0][index])
+ {
+ update_required = 0;
+ }
+ if(update_required == 2)
+ {
+ ps_subpel_refine_ctxt->i2_tot_cost[1][index] = i4_tot_cost;
+ ps_subpel_refine_ctxt->i2_mv_cost[1][index] = i4_mv_cost;
+ ps_subpel_refine_ctxt->i2_mv_x[1][index] = ps_result_prms->i2_mv_x;
+ ps_subpel_refine_ctxt->i2_mv_y[1][index] = ps_result_prms->i2_mv_y;
+ ps_subpel_refine_ctxt->i2_ref_idx[1][index] = ps_result_prms->i1_ref_idx;
+ }
+ else if(update_required == 1)
+ {
+ ps_subpel_refine_ctxt->i2_tot_cost[0][index] = i4_tot_cost;
+ ps_subpel_refine_ctxt->i2_mv_cost[0][index] = i4_mv_cost;
+ ps_subpel_refine_ctxt->i2_mv_x[0][index] = ps_result_prms->i2_mv_x;
+ ps_subpel_refine_ctxt->i2_mv_y[0][index] = ps_result_prms->i2_mv_y;
+ ps_subpel_refine_ctxt->i2_ref_idx[0][index] = ps_result_prms->i1_ref_idx;
+ }
+ }
+ }
+ }
+ }
+
+ {
+ WORD32 i4_count = 0;
+ for(i4_count = 0; i4_count < TOT_NUM_PARTS; i4_count++)
+ {
+ if(ps_subpel_refine_ctxt->i2_tot_cost[0][i4_count] >= MAX_SIGNED_16BIT_VAL)
+ {
+ ps_subpel_refine_ctxt->ai2_fullpel_satd[0][i4_count] = MAX_SIGNED_16BIT_VAL;
+ }
+ }
+ }
+}
+
+/**
+********************************************************************************
+* @fn hme_calc_pt_sad_and_result_explicit(hme_search_prms_t *ps_search_prms,
+* wgt_pred_ctxt_t *ps_wt_inp_prms,
+* err_prms_t *ps_err_prms,
+* result_upd_prms_t *ps_result_prms,
+* U08 **ppu1_ref,
+* S32 i4_ref_stride)
+*
+* @brief Run thorugh the provided candidates and compute the point SAD and
+* cost and update the results in the order
+*
+* @param[in] ps_search_prms
+* @param[in] ps_wt_inp_prms
+* @param[in] ps_err_prms
+* @param[out] ps_result_prms
+* @param[in] ppu1_ref
+* @param[in] i4_ref_stride
+*
+* @return None
+********************************************************************************
+*/
+
+void hme_calc_pt_sad_and_result_explicit(
+ hme_search_prms_t *ps_search_prms,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ err_prms_t *ps_err_prms,
+ result_upd_prms_t *ps_result_prms,
+ U08 **ppu1_ref,
+ S32 i4_ref_stride)
+{
+ WORD32 i4_grid_mask, i4_part_mask, i4_num_results, i4_candt, i4_num_nodes;
+ WORD32 i4_inp_stride, i4_inp_off, i4_ref_offset;
+
+ search_node_t *ps_search_node;
+ BLK_SIZE_T e_blk_size;
+ PF_SAD_FXN_T pf_sad_fxn;
+ PF_RESULT_FXN_T pf_hme_result_fxn;
+
+ i4_grid_mask = 0x1; /* Point SAD */
+
+ /* Get the parameters required */
+ i4_part_mask = ps_search_prms->i4_part_mask;
+ e_blk_size = ps_search_prms->e_blk_size;
+ i4_num_results = (S32)ps_search_prms->ps_search_results->u1_num_results_per_part;
+ i4_num_nodes = ps_search_prms->i4_num_search_nodes;
+ ps_search_node = ps_search_prms->ps_search_nodes;
+
+ i4_inp_stride = ps_search_prms->i4_inp_stride;
+ /* Move to the location of the search blk in inp buffer */
+ i4_inp_off = ps_search_prms->i4_cu_x_off;
+ i4_inp_off += ps_search_prms->i4_cu_y_off * i4_inp_stride;
+ i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
+
+ pf_sad_fxn = hme_get_sad_fxn(e_blk_size, i4_grid_mask, i4_part_mask);
+ /**********************************************************************/
+ /* we have a sparsely populated SAD grid of size 9x17. */
+ /* the id of the results in the grid is shown */
+ /* 5 2 6 */
+ /* 1 0 3 */
+ /* 7 4 8 */
+ /* The motivation for choosing a grid like this is that */
+ /* in case of no refinement, the central location is */
+ /* the first entry in the grid */
+ /* Also for diamond, the 4 entries get considered first */
+ /* This is consistent with the diamond notation used in */
+ /* subpel refinement. To Check */
+ /* Update the results for the given search candt */
+ /* returns the cost of the 2Nx2N partition */
+ /**********************************************************************/
+
+ /* Get the modified update result fun. with CLIP16 of cost to match */
+ /* with SIMD */
+ pf_hme_result_fxn = hme_update_results_grid_pu_bestn_no_encode;
+
+ for(i4_candt = 0; i4_candt < i4_num_nodes; i4_candt++)
+ {
+ if(ps_search_node->s_mv.i2_mvx == INTRA_MV)
+ continue;
+
+ /* initialize minimum cost for this candidate. As we search around */
+ /* this candidate, this is used to check early exit, when in any */
+ /* given iteration, the center pt of the grid is lowest value */
+ ps_result_prms->i4_min_cost = MAX_32BIT_VAL;
+
+ ps_err_prms->pu1_inp = ps_wt_inp_prms->apu1_wt_inp[ps_search_node->i1_ref_idx] + i4_inp_off;
+ ps_err_prms->i4_grid_mask = i4_grid_mask;
+
+ ps_err_prms->pu1_ref = ppu1_ref[ps_search_node->i1_ref_idx] + i4_ref_offset;
+ ps_err_prms->pu1_ref += ps_search_node->s_mv.i2_mvx;
+ ps_err_prms->pu1_ref += (ps_search_node->s_mv.i2_mvy * i4_ref_stride);
+
+ /**********************************************************************/
+ /* CALL THE FUNCTION THAT COMPUTES THE SAD AND UPDATES THE SAD GRID */
+ /**********************************************************************/
+ pf_sad_fxn(ps_err_prms);
+
+ /**********************************************************************/
+ /* CALL THE FUNCTION THAT COMPUTES UPDATES THE BEST RESULTS */
+ /**********************************************************************/
+ ps_result_prms->i4_grid_mask = i4_grid_mask;
+ ps_result_prms->ps_search_node_base = ps_search_node;
+ pf_hme_result_fxn(ps_result_prms);
+
+ ps_search_node++;
+ }
+}
+
+/**
+********************************************************************************
+* @fn hme_set_mvp_node(search_results_t *ps_search_results,
+* search_node_t *ps_candt_prj_coloc,
+* S08 i1_ref_idx)
+*
+* @brief Set node used for motion vector predictor computation
+* Either TR or L is compared to projected colocated and
+* closest is decided as MVP
+*
+* @param[in] ps_search_results
+*
+* @param[in] ps_candt_prj_coloc
+*
+* @param[in] i1_ref_idx
+*
+* @return None
+********************************************************************************
+*/
+void hme_set_mvp_node(
+ search_results_t *ps_search_results,
+ search_node_t *ps_candt_prj_coloc,
+ U08 u1_pred_lx,
+ U08 u1_default_ref_id)
+{
+ S32 i;
+ pred_ctxt_t *ps_pred_ctxt = &ps_search_results->as_pred_ctxt[u1_pred_lx];
+ pred_candt_nodes_t *ps_pred_nodes = ps_pred_ctxt->as_pred_nodes;
+ search_node_t *ps_pred_node_a = NULL, *ps_pred_node_b = NULL;
+
+ S32 inp_shift = 2;
+ S32 pred_shift;
+ S32 ref_bits;
+ S32 mv_p_x, mv_p_y;
+ S16 mvdx1, mvdx2, mvdy1, mvdy2;
+
+ ref_bits = ps_pred_ctxt->ppu1_ref_bits_tlu[u1_pred_lx][u1_default_ref_id];
+
+ /*************************************************************************/
+ /* Priority to bottom left availability. Else we go to left. If both are */
+ /* not available, then a remains null */
+ /*************************************************************************/
+ if(ps_pred_nodes->ps_l->u1_is_avail)
+ {
+ ps_pred_node_a = ps_pred_nodes->ps_l;
+ }
+
+ if((!(ps_pred_ctxt->proj_used) && (ps_pred_nodes->ps_tr->u1_is_avail)))
+ {
+ ps_pred_node_b = ps_pred_nodes->ps_tr;
+ }
+ else
+ {
+ ps_pred_node_b = ps_pred_nodes->ps_coloc;
+ ps_pred_node_b->s_mv = ps_pred_node_b->ps_mv[0];
+ }
+
+ if(ps_pred_node_a == NULL)
+ {
+ ps_pred_node_a = ps_pred_nodes->ps_coloc;
+ ps_pred_node_a->s_mv = ps_pred_node_a->ps_mv[0];
+
+ if(ps_pred_node_b == ps_pred_nodes->ps_coloc)
+ {
+ ps_pred_node_b = ps_pred_nodes->ps_zeromv;
+ ps_pred_node_b->s_mv = ps_pred_node_b->ps_mv[0];
+ }
+ }
+
+ if(ps_pred_node_a->i1_ref_idx != u1_default_ref_id)
+ {
+ SCALE_FOR_POC_DELTA(
+ mv_p_x, mv_p_y, ps_pred_node_a, u1_default_ref_id, ps_pred_ctxt->pi2_ref_scf);
+ }
+ else
+ {
+ mv_p_x = ps_pred_node_a->s_mv.i2_mvx;
+ mv_p_y = ps_pred_node_a->s_mv.i2_mvy;
+ }
+ pred_shift = ps_pred_node_a->u1_subpel_done ? 0 : 2;
+ COMPUTE_MV_DIFFERENCE(mvdx1, mvdy1, ps_candt_prj_coloc, mv_p_x, mv_p_y, inp_shift, pred_shift);
+ mvdx1 = ABS(mvdx1);
+ mvdy1 = ABS(mvdy1);
+
+ if(ps_pred_node_b->i1_ref_idx != u1_default_ref_id)
+ {
+ SCALE_FOR_POC_DELTA(
+ mv_p_x, mv_p_y, ps_pred_node_b, u1_default_ref_id, ps_pred_ctxt->pi2_ref_scf);
+ }
+ else
+ {
+ mv_p_x = ps_pred_node_b->s_mv.i2_mvx;
+ mv_p_y = ps_pred_node_b->s_mv.i2_mvy;
+ }
+ pred_shift = ps_pred_node_b->u1_subpel_done ? 0 : 2;
+ COMPUTE_MV_DIFFERENCE(mvdx2, mvdy2, ps_candt_prj_coloc, mv_p_x, mv_p_y, inp_shift, pred_shift);
+ mvdx2 = ABS(mvdx2);
+ mvdy2 = ABS(mvdy2);
+
+ if((mvdx1 + mvdy1) < (mvdx2 + mvdy2))
+ {
+ for(i = 0; i < TOT_NUM_PARTS; i++)
+ {
+ ps_pred_nodes[i].ps_mvp_node = ps_pred_node_a;
+ }
+ }
+ else
+ {
+ for(i = 0; i < TOT_NUM_PARTS; i++)
+ {
+ ps_pred_nodes[i].ps_mvp_node = ps_pred_node_b;
+ }
+ }
+}
diff --git a/encoder/hme_err_compute.h b/encoder/hme_err_compute.h
new file mode 100644
index 0000000..aa6b94b
--- /dev/null
+++ b/encoder/hme_err_compute.h
@@ -0,0 +1,319 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file hme_err_compute.h
+*
+* \brief
+* contains prototypes for functions that compute error or best results or
+* return fxn ptrs for the same.
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _HME_ERR_COMPUTE_H_
+#define _HME_ERR_COMPUTE_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define NUM_4X4 16
+#define NUM_4X4_IN_8x8 4
+#define NUM_4X4_IN_16x16 16
+#define NUM_8X8_IN_16x16 4
+#define NUM_8X8_IN_32x32 16
+#define NUM_8X8_IN_64x64 64
+#define NUM_16X16_IN_64x64 16
+#define NUM_ROWS_IN_4X4 4
+#define NUM_PIXELS_IN_ROW 4
+#define NUM_CANDIDATES_IN_GRID 9
+
+// 0 => best + good;
+// 1 => 1st and 2nd best;
+// good => worse or equal to second best
+#define BESTEST 0
+
+#define COST(a, b, c, d, e) (a)
+
+/*****************************************************************************/
+/* Functions */
+/*****************************************************************************/
+void hme_evalsad_pt_npu_MxN_16bit(err_prms_t *ps_prms);
+
+#define compute_sad_16bit hme_evalsad_pt_npu_MxN_16bit
+
+/**
+********************************************************************************
+* @fn S32 hme_update_results_grid_pu_bestn(result_upd_prms_t *ps_result_prms);
+*
+* @brief Updates the best N results based on a grid SAD for enabled partitions
+*
+* @param[in,out] ps_result_prms : contains parametrs pertaining to the results
+*
+* @return None
+********************************************************************************
+*/
+void hme_update_results_grid_pu_bestn(result_upd_prms_t *ps_result_prms);
+
+void hme_update_results_grid_pu_bestn_xtreme_speed(result_upd_prms_t *ps_result_prms);
+
+/**
+********************************************************************************
+* @fn hme_update_results_grid_pu_bestn_no_encode(result_upd_prms_t *ps_result_prms)
+*
+* @brief Updates results for the case where 1 best result is to be updated
+* for a given pt, for several parts
+* Note : The function is replicated for CLIPing the cost to 16bit to make
+* bit match with SIMD version
+*
+* @param[in] result_upd_prms_t : Contains the input parameters to this fxn
+*
+* @return The result_upd_prms_t structure is updated for all the active
+* parts in case the current candt has results for any given part
+* that is the best result for that part
+********************************************************************************
+*/
+void hme_update_results_grid_pu_bestn_no_encode(result_upd_prms_t *ps_result_prms);
+
+/**
+********************************************************************************
+* @fn hme_get_result_fxn(i4_grid_mask, i4_part_mask, i4_num_results)
+*
+* @brief Implements predictive search with square grid refinement. In this
+* case, the square grid is of step 1 always. since this is considered
+* to be more of a refinement search
+*
+* @param[in] i4_grid_mask : Mask containing which of 9 grid pts active
+*
+* @param[in] i4_part_mask : Mask containing which of the 17 parts active
+*
+* @param[in] i4_num_results: Number of active results
+*
+* @return Pointer to the appropriate result update function
+* (type PF_RESULT_FXN_T)
+********************************************************************************
+*/
+PF_RESULT_FXN_T hme_get_result_fxn(S32 i4_grid_mask, S32 i4_part_mask, S32 i4_num_results);
+
+void compute_satd_16bit(err_prms_t *ps_prms);
+
+void compute_satd_8bit(err_prms_t *ps_prms);
+
+void compute_sad_16bit(err_prms_t *ps_prms);
+
+S32 compute_mv_cost(search_node_t *ps_search_node, pred_ctxt_t *ps_pred_ctxt, BLK_SIZE_T e_blk_size);
+
+void hme_init_pred_ctxt_no_encode(
+ pred_ctxt_t *ps_pred_ctxt,
+ search_results_t *ps_search_results,
+ search_node_t *ps_top_candts,
+ search_node_t *ps_left_candts,
+ search_node_t **pps_proj_coloc_candts,
+ search_node_t *ps_coloc_candts,
+ search_node_t *ps_zeromv_candt,
+ S32 pred_lx,
+ S32 lambda,
+ S32 lambda_q_shift,
+ U08 **ppu1_ref_bits_tlu,
+ S16 *pi2_ref_scf);
+
+void hme_init_pred_ctxt_encode(
+ pred_ctxt_t *ps_pred_ctxt,
+ search_results_t *ps_search_results,
+ search_node_t *ps_coloc_candts,
+ search_node_t *ps_zeromv_candt,
+ mv_grid_t *ps_mv_grid,
+ S32 pred_lx,
+ S32 lambda,
+ S32 lambda_q_shift,
+ U08 **ppu1_ref_bits_tlu,
+ S16 *pi2_ref_scf);
+
+/**
+********************************************************************************
+* @fn compute_mv_cost_coarse(search_node_t *ps_node,
+* pred_ctxt_t *ps_pred_ctxt,
+* PART_ID_T e_part_id)
+*
+* @brief MV cost for coarse explicit search in coarsest layer
+*
+* @param[in] ps_node: search node having mv and ref id for which to eval cost
+*
+* @param[in] ps_pred_ctxt : mv pred context
+*
+* @param[in] e_part_id : Partition id.
+*
+* @return Cost value
+
+********************************************************************************
+*/
+S32 compute_mv_cost_coarse(
+ search_node_t *ps_node, pred_ctxt_t *ps_pred_ctxt, PART_ID_T e_part_id, S32 inp_mv_pel);
+
+/**
+********************************************************************************
+* @fn compute_mv_cost_coarse(search_node_t *ps_node,
+* pred_ctxt_t *ps_pred_ctxt,
+* PART_ID_T e_part_id)
+*
+* @brief MV cost for coarse explicit search in coarsest layer
+*
+* @param[in] ps_node: search node having mv and ref id for which to eval cost
+*
+* @param[in] ps_pred_ctxt : mv pred context
+*
+* @param[in] e_part_id : Partition id.
+*
+* @return Cost value
+
+********************************************************************************
+*/
+S32 compute_mv_cost_coarse_high_speed(
+ search_node_t *ps_node, pred_ctxt_t *ps_pred_ctxt, PART_ID_T e_part_id, S32 inp_mv_pel);
+
+/**
+********************************************************************************
+* @fn compute_mv_cost_coarse(search_node_t *ps_node,
+* pred_ctxt_t *ps_pred_ctxt,
+* PART_ID_T e_part_id)
+*
+* @brief MV cost for coarse explicit search in coarsest layer
+*
+* @param[in] ps_node: search node having mv and ref id for which to eval cost
+*
+* @param[in] ps_pred_ctxt : mv pred context
+*
+* @param[in] e_part_id : Partition id.
+*
+* @return Cost value
+
+********************************************************************************
+*/
+S32 compute_mv_cost_refine(
+ search_node_t *ps_node, pred_ctxt_t *ps_pred_ctxt, PART_ID_T e_part_id, S32 inp_mv_pel);
+
+/**
+********************************************************************************
+* @fn compute_mv_cost_explicit(search_node_t *ps_node,
+* pred_ctxt_t *ps_pred_ctxt,
+* PART_ID_T e_part_id)
+*
+* @brief MV cost for explicit search in layers not encoded
+*
+* @param[in] ps_node: search node having mv and ref id for which to eval cost
+*
+* @param[in] ps_pred_ctxt : mv pred context
+*
+* @param[in] e_part_id : Partition id.
+*
+* @return Cost value
+
+********************************************************************************
+*/
+S32 compute_mv_cost_explicit(
+ search_node_t *ps_node, pred_ctxt_t *ps_pred_ctxt, PART_ID_T e_part_id, S32 inp_mv_pel);
+
+S32 compute_mv_cost_implicit(
+ search_node_t *ps_node, pred_ctxt_t *ps_pred_ctxt, PART_ID_T e_part_id, S32 inp_mv_pel);
+
+S32 compute_mv_cost_implicit_high_speed(
+ search_node_t *ps_node, pred_ctxt_t *ps_pred_ctxt, PART_ID_T e_part_id, S32 inp_mv_pel);
+
+S32 compute_mv_cost_implicit_high_speed_modified(
+ search_node_t *ps_node, pred_ctxt_t *ps_pred_ctxt, PART_ID_T e_part_id, S32 inp_mv_pel);
+
+void hme_evalsad_grid_pu_16x16(err_prms_t *ps_prms);
+
+void hme_evalsatd_pt_pu_8x8(err_prms_t *ps_prms);
+
+WORD32 hme_evalsatd_pt_pu_8x8_tu_rec(
+ err_prms_t *ps_prms,
+ WORD32 lambda,
+ WORD32 lambda_q_shift,
+ WORD32 i4_frm_qstep,
+ me_func_selector_t *ps_func_selector);
+
+void hme_evalsatd_update_1_best_result_pt_pu_16x16(
+ err_prms_t *ps_prms, result_upd_prms_t *ps_result_prms);
+
+WORD32 hme_evalsatd_pt_pu_32x32_tu_rec(
+ err_prms_t *ps_prms,
+ WORD32 lambda,
+ WORD32 lambda_q_shift,
+ WORD32 i4_frm_qstep,
+ me_func_selector_t *ps_func_selector);
+
+void hme_evalsatd_pt_pu_32x32(err_prms_t *ps_prms);
+
+void hme_evalsatd_pt_pu_64x64(err_prms_t *ps_prms);
+
+WORD32 hme_evalsatd_pt_pu_64x64_tu_rec(
+ err_prms_t *ps_prms,
+ WORD32 lambda,
+ WORD32 lambda_q_shift,
+ WORD32 i4_frm_qstep,
+ me_func_selector_t *ps_func_selector);
+
+WORD32 hme_evalsatd_pt_pu_16x16_tu_rec(
+ err_prms_t *ps_prms,
+ WORD32 lambda,
+ WORD32 lambda_q_shift,
+ WORD32 i4_frm_qstep,
+ me_func_selector_t *ps_func_selector);
+
+void ihevce_had_32x32_r(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 **ppi4_hsad,
+ WORD32 **ppi4_tu_split,
+ WORD32 **ppi4_tu_early_cbf,
+ WORD32 pos_x_y_4x4,
+ WORD32 num_4x4_in_row,
+ WORD32 lambda,
+ WORD32 lambda_q_shift,
+ WORD32 i4_frm_qstep,
+ WORD32 i4_cur_depth,
+ WORD32 i4_max_depth,
+ WORD32 i4_max_tr_size,
+ WORD32 *pi4_tu_split_cost,
+ me_func_selector_t *ps_func_selector);
+
+void hme_update_results_pt_pu_best1_subpel_hs(
+ err_prms_t *ps_err_prms, result_upd_prms_t *ps_result_prms);
+
+void hme_set_mvp_node(
+ search_results_t *ps_search_results,
+ search_node_t *ps_candt_prj_coloc,
+ U08 u1_pred_lx,
+ U08 u1_default_ref_id);
+
+S32 hme_cmp_nodes(search_node_t *ps_best_node1, search_node_t *ps_best_node2);
+
+#endif /* #ifndef _HME_SEARCH_ALGO_H_*/
diff --git a/encoder/hme_fullpel.c b/encoder/hme_fullpel.c
new file mode 100644
index 0000000..076d50a
--- /dev/null
+++ b/encoder/hme_fullpel.c
@@ -0,0 +1,494 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file hme_subpel.c
+*
+* @brief
+* Fullpel search and refinement
+*
+* @author
+* Ittiam
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+#include <limits.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_bs_compute_ctb.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_dep_mngr_interface.h"
+#include "hme_datatype.h"
+#include "hme_interface.h"
+#include "hme_common_defs.h"
+#include "hme_defs.h"
+#include "ihevce_me_instr_set_router.h"
+#include "hme_globals.h"
+#include "hme_utils.h"
+#include "hme_coarse.h"
+#include "hme_refine.h"
+#include "hme_err_compute.h"
+#include "hme_common_utils.h"
+#include "hme_search_algo.h"
+#include "ihevce_stasino_helpers.h"
+
+/**
+********************************************************************************
+* @fn hme_fullpel_cand_sifter
+*
+* @brief Given a list of search candidates and valid partition types,
+* this function finds the two best candidates for each partition type.
+*
+* @return None
+********************************************************************************
+*/
+void hme_fullpel_cand_sifter(
+ hme_search_prms_t *ps_search_prms,
+ layer_ctxt_t *ps_layer_ctxt,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ S32 i4_alpha_stim_multiplier,
+ U08 u1_is_cu_noisy,
+ ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
+{
+ S32 i4_i;
+ S16 i2_temp_tot_cost, i2_temp_stim_injected_cost, i2_temp_mv_cost, i2_temp_mv_x, i2_temp_mv_y,
+ i2_temp_ref_idx;
+
+ fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt;
+ S32 i4_temp_part_mask;
+
+ ps_search_prms->i4_alpha_stim_multiplier = i4_alpha_stim_multiplier;
+ ps_search_prms->u1_is_cu_noisy = u1_is_cu_noisy;
+
+ if(u1_is_cu_noisy)
+ {
+ i4_temp_part_mask = ps_search_prms->i4_part_mask;
+ ps_search_prms->i4_part_mask &= ((ENABLE_2Nx2N) | (ENABLE_NxN));
+
+ ps_fullpel_refine_ctxt->i4_num_valid_parts = hme_create_valid_part_ids(
+ (ps_search_prms->i4_part_mask) & ((ENABLE_2Nx2N) | (ENABLE_NxN)),
+ &ps_fullpel_refine_ctxt->ai4_part_id[0]);
+ }
+
+ ps_search_prms->u1_is_cu_noisy = u1_is_cu_noisy;
+
+ hme_pred_search(
+ ps_search_prms, ps_layer_ctxt, ps_wt_inp_prms, 0, ps_me_optimised_function_list);
+
+ if(u1_is_cu_noisy)
+ {
+ if(ps_search_prms->ps_search_results->u1_num_results_per_part == 2)
+ {
+ for(i4_i = 0; i4_i < ps_fullpel_refine_ctxt->i4_num_valid_parts; i4_i++)
+ {
+ if(ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i] >
+ ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i])
+ {
+ i2_temp_tot_cost = ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i];
+ i2_temp_stim_injected_cost =
+ ps_fullpel_refine_ctxt->i2_stim_injected_cost[0][i4_i];
+ i2_temp_mv_cost = ps_fullpel_refine_ctxt->i2_mv_cost[0][i4_i];
+ i2_temp_mv_x = ps_fullpel_refine_ctxt->i2_mv_x[0][i4_i];
+ i2_temp_mv_y = ps_fullpel_refine_ctxt->i2_mv_y[0][i4_i];
+ i2_temp_ref_idx = ps_fullpel_refine_ctxt->i2_ref_idx[0][i4_i];
+
+ ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i] =
+ ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i];
+ ps_fullpel_refine_ctxt->i2_stim_injected_cost[0][i4_i] =
+ ps_fullpel_refine_ctxt->i2_stim_injected_cost[1][i4_i];
+ ps_fullpel_refine_ctxt->i2_mv_cost[0][i4_i] =
+ ps_fullpel_refine_ctxt->i2_mv_cost[1][i4_i];
+ ps_fullpel_refine_ctxt->i2_mv_x[0][i4_i] =
+ ps_fullpel_refine_ctxt->i2_mv_x[1][i4_i];
+ ps_fullpel_refine_ctxt->i2_mv_y[0][i4_i] =
+ ps_fullpel_refine_ctxt->i2_mv_y[1][i4_i];
+ ps_fullpel_refine_ctxt->i2_ref_idx[0][i4_i] =
+ ps_fullpel_refine_ctxt->i2_ref_idx[1][i4_i];
+
+ ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i] = i2_temp_tot_cost;
+ ps_fullpel_refine_ctxt->i2_stim_injected_cost[1][i4_i] =
+ i2_temp_stim_injected_cost;
+ ps_fullpel_refine_ctxt->i2_mv_cost[1][i4_i] = i2_temp_mv_cost;
+ ps_fullpel_refine_ctxt->i2_mv_x[1][i4_i] = i2_temp_mv_x;
+ ps_fullpel_refine_ctxt->i2_mv_y[1][i4_i] = i2_temp_mv_y;
+ ps_fullpel_refine_ctxt->i2_ref_idx[1][i4_i] = i2_temp_ref_idx;
+ }
+ }
+ }
+
+ ps_search_prms->i4_part_mask = i4_temp_part_mask;
+
+ ps_fullpel_refine_ctxt->i4_num_valid_parts = hme_create_valid_part_ids(
+ ps_search_prms->i4_part_mask, &ps_fullpel_refine_ctxt->ai4_part_id[0]);
+ }
+}
+
+static void hme_add_fpel_refine_candidates_to_search_cand_array(
+ search_node_t *ps_unique_search_nodes,
+ fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt,
+ S32 *pi4_num_unique_nodes,
+ U32 *pu4_unique_node_map,
+ S32 i4_fpel_search_result_id,
+ S32 i4_fpel_search_result_array_index,
+ S32 i4_unique_node_map_center_x,
+ S32 i4_unique_node_map_center_y,
+ S08 i1_unique_node_map_ref_idx,
+ U08 u1_add_refine_grid_center_to_search_cand_array,
+ U08 u1_do_not_check_for_duplicates)
+{
+ search_node_t s_refine_grid_center;
+
+ U08 u1_use_hashing, i;
+
+ S32 i2_mvx =
+ ps_fullpel_refine_ctxt->i2_mv_x[i4_fpel_search_result_id][i4_fpel_search_result_array_index];
+ S32 i2_mvy =
+ ps_fullpel_refine_ctxt->i2_mv_y[i4_fpel_search_result_id][i4_fpel_search_result_array_index];
+ S08 i1_ref_idx = ps_fullpel_refine_ctxt
+ ->i2_ref_idx[i4_fpel_search_result_id][i4_fpel_search_result_array_index];
+
+ if(!u1_do_not_check_for_duplicates)
+ {
+ s_refine_grid_center.s_mv.i2_mvx = i2_mvx;
+ s_refine_grid_center.s_mv.i2_mvy = i2_mvy;
+ s_refine_grid_center.i1_ref_idx = i1_ref_idx;
+
+ u1_use_hashing = (s_refine_grid_center.i1_ref_idx == i1_unique_node_map_ref_idx);
+
+ for(i = 0; i < NUM_POINTS_IN_RECTANGULAR_GRID; i++)
+ {
+ S08 i1_offset_x = gai1_mv_offsets_from_center_in_rect_grid[i][0];
+ S08 i1_offset_y = gai1_mv_offsets_from_center_in_rect_grid[i][1];
+
+ if(i1_offset_x || i1_offset_y)
+ {
+ s_refine_grid_center.s_mv.i2_mvx = i2_mvx + i1_offset_x;
+ s_refine_grid_center.s_mv.i2_mvy = i2_mvy + i1_offset_y;
+
+ INSERT_NEW_NODE(
+ ps_unique_search_nodes,
+ pi4_num_unique_nodes[0],
+ s_refine_grid_center,
+ 1,
+ pu4_unique_node_map,
+ i4_unique_node_map_center_x,
+ i4_unique_node_map_center_y,
+ u1_use_hashing);
+ }
+ else if(u1_add_refine_grid_center_to_search_cand_array)
+ {
+ s_refine_grid_center.s_mv.i2_mvx = i2_mvx;
+ s_refine_grid_center.s_mv.i2_mvy = i2_mvy;
+
+ INSERT_NEW_NODE(
+ ps_unique_search_nodes,
+ pi4_num_unique_nodes[0],
+ s_refine_grid_center,
+ 1,
+ pu4_unique_node_map,
+ i4_unique_node_map_center_x,
+ i4_unique_node_map_center_y,
+ 0);
+ }
+ }
+ }
+ else
+ {
+ for(i = 0; i < NUM_POINTS_IN_RECTANGULAR_GRID; i++)
+ {
+ S08 i1_offset_x = gai1_mv_offsets_from_center_in_rect_grid[i][0];
+ S08 i1_offset_y = gai1_mv_offsets_from_center_in_rect_grid[i][1];
+
+ if(i1_offset_x || i1_offset_y)
+ {
+ ps_unique_search_nodes[pi4_num_unique_nodes[0]].s_mv.i2_mvx = i2_mvx + i1_offset_x;
+ ps_unique_search_nodes[pi4_num_unique_nodes[0]].s_mv.i2_mvy = i2_mvy + i1_offset_y;
+ ps_unique_search_nodes[pi4_num_unique_nodes[0]++].i1_ref_idx = i1_ref_idx;
+ }
+ else if(u1_add_refine_grid_center_to_search_cand_array)
+ {
+ ps_unique_search_nodes[pi4_num_unique_nodes[0]].s_mv.i2_mvx = i2_mvx;
+ ps_unique_search_nodes[pi4_num_unique_nodes[0]].s_mv.i2_mvy = i2_mvy;
+ ps_unique_search_nodes[pi4_num_unique_nodes[0]++].i1_ref_idx = i1_ref_idx;
+ }
+ }
+ }
+}
+
+void hme_fullpel_refine(
+ refine_prms_t *ps_refine_prms,
+ hme_search_prms_t *ps_search_prms,
+ layer_ctxt_t *ps_layer_ctxt,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ U32 *pu4_unique_node_map,
+ U08 u1_num_init_search_cands,
+ U08 u1_8x8_blk_mask,
+ S32 i4_unique_node_map_center_x,
+ S32 i4_unique_node_map_center_y,
+ S08 i1_unique_node_map_ref_idx,
+ ME_QUALITY_PRESETS_T e_quality_preset,
+ ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
+{
+ S32 i, j;
+ S32 i4_num_results;
+ U08 u1_num_complete_grids = 0;
+ U08 u1_num_grids = 0;
+
+ fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt;
+
+ S32 i4_num_unique_nodes = 0;
+
+ search_node_t *ps_unique_search_nodes = ps_search_prms->ps_search_nodes;
+
+ if(u1_num_init_search_cands >= 2)
+ {
+ S32 i4_max_num_results = (15 == u1_8x8_blk_mask)
+ ? ps_refine_prms->u1_max_num_fpel_refine_centers
+ : ((ME_XTREME_SPEED_25 == e_quality_preset)
+ ? MAX_NUM_CANDS_FOR_FPEL_REFINE_IN_XS25
+ : INT_MAX);
+
+ for(i = 0; i < ps_fullpel_refine_ctxt->i4_num_valid_parts; i++)
+ {
+ S32 i4_part_id;
+ S32 i4_index;
+
+ i4_part_id = ps_fullpel_refine_ctxt->ai4_part_id[i];
+ i4_index = (ps_fullpel_refine_ctxt->i4_num_valid_parts > 8) ? i4_part_id : i;
+ i4_num_results = (15 == u1_8x8_blk_mask)
+ ? MIN(ps_search_prms->ps_search_results->u1_num_results_per_part,
+ ps_refine_prms->pu1_num_best_results[i4_part_id])
+ : ps_search_prms->ps_search_results->u1_num_results_per_part;
+
+ ASSERT(i4_num_results <= 2);
+
+ for(j = 0; j < i4_num_results; j++)
+ {
+ if((ps_fullpel_refine_ctxt->i2_ref_idx[j][i4_index] >= 0) &&
+ (ps_fullpel_refine_ctxt->i2_mv_x[j][i4_index] != INTRA_MV))
+ {
+ S32 i4_num_nodes_added = i4_num_unique_nodes;
+
+ hme_add_fpel_refine_candidates_to_search_cand_array(
+ ps_unique_search_nodes,
+ ps_fullpel_refine_ctxt,
+ &i4_num_unique_nodes,
+ pu4_unique_node_map,
+ j,
+ i4_index,
+ i4_unique_node_map_center_x,
+ i4_unique_node_map_center_y,
+ i1_unique_node_map_ref_idx,
+ 0,
+ 0);
+
+ i4_num_nodes_added = i4_num_unique_nodes - i4_num_nodes_added;
+
+ u1_num_complete_grids +=
+ (i4_num_nodes_added >= (NUM_POINTS_IN_RECTANGULAR_GRID - 1));
+ u1_num_grids += (!!i4_num_nodes_added);
+
+ i4_max_num_results--;
+ }
+
+ if(i4_max_num_results <= 0)
+ {
+ break;
+ }
+ }
+
+ if(i4_max_num_results <= 0)
+ {
+ break;
+ }
+ }
+ }
+ else if((1 == u1_num_init_search_cands) && (ps_refine_prms->u1_max_num_fpel_refine_centers >= 1))
+ {
+ ps_fullpel_refine_ctxt->i2_mv_x[0][0] = ps_unique_search_nodes[0].s_mv.i2_mvx;
+ ps_fullpel_refine_ctxt->i2_mv_y[0][0] = ps_unique_search_nodes[0].s_mv.i2_mvy;
+ ps_fullpel_refine_ctxt->i2_ref_idx[0][0] = ps_unique_search_nodes[0].i1_ref_idx;
+
+ if((ps_fullpel_refine_ctxt->i2_ref_idx[0][0] >= 0) &&
+ (ps_fullpel_refine_ctxt->i2_mv_x[0][0] != INTRA_MV))
+ {
+ hme_add_fpel_refine_candidates_to_search_cand_array(
+ ps_unique_search_nodes,
+ ps_fullpel_refine_ctxt,
+ &i4_num_unique_nodes,
+ pu4_unique_node_map,
+ 0,
+ 0,
+ i4_unique_node_map_center_x,
+ i4_unique_node_map_center_y,
+ i1_unique_node_map_ref_idx,
+ 1,
+ 1);
+
+ u1_num_complete_grids++;
+ }
+ }
+
+ if(i4_num_unique_nodes > 0)
+ {
+ ps_search_prms->i4_num_search_nodes = i4_num_unique_nodes;
+ ps_search_prms->u1_is_cu_noisy = 0;
+
+ hme_pred_search(
+ ps_search_prms,
+ ps_layer_ctxt,
+ ps_wt_inp_prms,
+ (1 == u1_num_complete_grids) && (u1_num_grids == u1_num_complete_grids),
+ ps_me_optimised_function_list
+
+ );
+ }
+}
+
+/**
+********************************************************************************
+* @fn hme_remove_duplicate_fpel_search_candidates
+*
+* @brief Function name is self-explanatory
+*
+* @return Number of unique candidates
+********************************************************************************
+*/
+S32 hme_remove_duplicate_fpel_search_candidates(
+ search_node_t *ps_unique_search_nodes,
+ search_candt_t *ps_search_candts,
+ U32 *pu4_unique_node_map,
+ S08 *pi1_pred_dir_to_ref_idx,
+ S32 i4_num_srch_cands,
+ S32 i4_num_init_candts,
+ S32 i4_refine_iter_ctr,
+ S32 i4_num_refinement_iterations,
+ S32 i4_num_act_ref_l0,
+ S08 i1_unique_node_map_ref_idx,
+ S32 i4_unique_node_map_center_x,
+ S32 i4_unique_node_map_center_y,
+ U08 u1_is_bidir_enabled,
+ ME_QUALITY_PRESETS_T e_quality_preset)
+{
+ S32 i;
+
+ S32 i4_max_num_cands = ((!u1_is_bidir_enabled) && (i4_num_act_ref_l0 > 1))
+ ? (i4_num_init_candts >> 1)
+ : i4_num_init_candts;
+ S32 i4_num_unique_nodes = 0;
+
+ for(i = 0; (i < i4_num_srch_cands) && (i4_num_unique_nodes < i4_max_num_cands); i++)
+ {
+ search_node_t *ps_cur_cand = ps_search_candts[i].ps_search_node;
+
+ U08 u1_use_hashing = (ps_cur_cand->i1_ref_idx == i1_unique_node_map_ref_idx);
+
+ if(i4_num_refinement_iterations > 1)
+ {
+#if !ENABLE_EXPLICIT_SEARCH_IN_P_IN_L0
+ /* Ref0 evaluated during the first iteration */
+ /* All other Ref's evaluated during the second iteration */
+ if((ps_cur_cand->i1_ref_idx != pi1_pred_dir_to_ref_idx[0]) && (i4_refine_iter_ctr == 0))
+ {
+ continue;
+ }
+#else
+ if(e_quality_preset == ME_HIGH_QUALITY)
+ {
+ if((ps_cur_cand->i1_ref_idx != pi1_pred_dir_to_ref_idx[0]) &&
+ (i4_refine_iter_ctr == 0))
+ {
+ continue;
+ }
+ }
+ else
+ {
+ if(ps_cur_cand->i1_ref_idx != pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr])
+ {
+ continue;
+ }
+ }
+#endif
+ }
+
+ INSERT_UNIQUE_NODE(
+ ps_unique_search_nodes,
+ i4_num_unique_nodes,
+ ps_cur_cand[0],
+ pu4_unique_node_map,
+ i4_unique_node_map_center_x,
+ i4_unique_node_map_center_y,
+ u1_use_hashing);
+ }
+
+ return i4_num_unique_nodes;
+}
diff --git a/encoder/hme_fullpel.h b/encoder/hme_fullpel.h
new file mode 100644
index 0000000..3de1cda
--- /dev/null
+++ b/encoder/hme_fullpel.h
@@ -0,0 +1,81 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file hme_fullpel.h
+*
+* \brief
+* contains prototypes for fullpel functions
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _HME_FULLPEL_H_
+#define _HME_FULLPEL_H_
+
+/*****************************************************************************/
+/* Functions */
+/*****************************************************************************/
+
+void hme_fullpel_cand_sifter(
+ hme_search_prms_t *ps_search_prms,
+ layer_ctxt_t *ps_layer_ctxt,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ S32 i4_alpha_stim_multiplier,
+ U08 u1_is_cu_noisy,
+ ihevce_me_optimised_function_list_t *ps_me_optimised_function_list);
+
+void hme_fullpel_refine(
+ refine_prms_t *ps_refine_prms,
+ hme_search_prms_t *ps_search_prms,
+ layer_ctxt_t *ps_layer_ctxt,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ U32 *pu4_unique_node_map,
+ U08 u1_num_init_search_cands,
+ U08 u1_8x8_blk_mask,
+ S32 i4_unique_node_map_center_x,
+ S32 i4_unique_node_map_center_y,
+ S08 i1_unique_node_map_ref_idx,
+ ME_QUALITY_PRESETS_T e_quality_preset,
+ ihevce_me_optimised_function_list_t *ps_me_optimised_function_list);
+
+S32 hme_remove_duplicate_fpel_search_candidates(
+ search_node_t *ps_unique_search_nodes,
+ search_candt_t *ps_search_candts,
+ U32 *pu4_unique_node_map,
+ S08 *pi1_pred_dir_to_ref_idx,
+ S32 i4_num_srch_cands,
+ S32 i4_num_init_candts,
+ S32 i4_refine_iter_ctr,
+ S32 i4_num_refinement_iterations,
+ S32 i4_num_act_ref_l0,
+ S08 i1_unique_node_map_ref_idx,
+ S32 i4_unique_node_map_center_x,
+ S32 i4_unique_node_map_center_y,
+ U08 u1_is_bidir_enabled,
+ ME_QUALITY_PRESETS_T e_quality_preset);
+
+#endif
diff --git a/encoder/hme_function_selector.c b/encoder/hme_function_selector.c
new file mode 100644
index 0000000..93c1635
--- /dev/null
+++ b/encoder/hme_function_selector.c
@@ -0,0 +1,151 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* hme_function_selector.c
+*
+* @brief
+* Contains functions to initialize function pointers used in hevc me
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+*
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_debug.h"
+#include "ihevc_deblk.h"
+#include "ihevc_defs.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_macros.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_sao.h"
+#include "ihevc_structs.h"
+#include "ihevc_weighted_pred.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevce_api.h"
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_global_tables.h"
+
+#include "hme_datatype.h"
+#include "hme_common_defs.h"
+#include "hme_common_utils.h"
+#include "hme_interface.h"
+#include "hme_defs.h"
+#include "hme_err_compute.h"
+#include "hme_globals.h"
+
+#include "ihevce_me_instr_set_router.h"
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+// clang-format off
+
+#ifdef ENABLE_NEON
+FT_CALC_SATD_AND_RESULT hme_evalsatd_update_1_best_result_pt_pu_16x16_neon;
+#endif
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+
+#ifdef ENABLE_NEON
+void hme_init_function_ptr_neon(void *pv_me_context)
+{
+ me_master_ctxt_t *pv_me_ctxt = (me_master_ctxt_t *)pv_me_context;
+
+ // clang-format off
+ pv_me_ctxt->s_func_selector.pf_had_8x8_using_4_4x4_r = &ihevce_had_8x8_using_4_4x4_r_neon;
+ pv_me_ctxt->s_func_selector.pf_had_16x16_r = &ihevce_had_16x16_r_neon;
+ pv_me_ctxt->s_func_selector.pf_compute_32x32HAD_using_16x16 = &ihevce_compute_32x32HAD_using_16x16_neon;
+ pv_me_ctxt->s_func_selector.pf_evalsatd_update_1_best_result_pt_pu_16x16_num_part_eq_1 = hme_evalsatd_update_1_best_result_pt_pu_16x16_neon;
+ pv_me_ctxt->s_func_selector.pf_evalsatd_update_1_best_result_pt_pu_16x16_num_part_lt_9 = hme_evalsatd_update_1_best_result_pt_pu_16x16_neon;
+ pv_me_ctxt->s_func_selector.pf_evalsatd_update_1_best_result_pt_pu_16x16_num_part_lt_17 = hme_evalsatd_update_1_best_result_pt_pu_16x16_neon;
+ // clang-format on
+}
+#endif
+
+void hme_init_function_ptr_generic(void *pv_me_context)
+{
+ me_master_ctxt_t *pv_me_ctxt = (me_master_ctxt_t *)pv_me_context;
+
+ // clang-format off
+ pv_me_ctxt->s_func_selector.pf_had_8x8_using_4_4x4_r = &ihevce_had_8x8_using_4_4x4_r;
+ pv_me_ctxt->s_func_selector.pf_had_16x16_r = &ihevce_had_16x16_r;
+ pv_me_ctxt->s_func_selector.pf_compute_32x32HAD_using_16x16 = &ihevce_compute_32x32HAD_using_16x16;
+ pv_me_ctxt->s_func_selector.pf_evalsatd_update_1_best_result_pt_pu_16x16_num_part_eq_1 = hme_evalsatd_update_1_best_result_pt_pu_16x16;
+ pv_me_ctxt->s_func_selector.pf_evalsatd_update_1_best_result_pt_pu_16x16_num_part_lt_9 = hme_evalsatd_update_1_best_result_pt_pu_16x16;
+ pv_me_ctxt->s_func_selector.pf_evalsatd_update_1_best_result_pt_pu_16x16_num_part_lt_17 = hme_evalsatd_update_1_best_result_pt_pu_16x16;
+ // clang-format on
+}
+
+void hme_init_function_ptr(void *pv_me_context, IV_ARCH_T e_processor_arch)
+{
+ switch(e_processor_arch)
+ {
+#ifdef ENABLE_NEON
+ case ARCH_ARM_A9Q:
+ case ARCH_ARM_V8_NEON:
+ hme_init_function_ptr_neon(pv_me_context);
+ break;
+#endif
+ default:
+ hme_init_function_ptr_generic(pv_me_context);
+ break;
+ }
+}
diff --git a/encoder/hme_function_selector.h b/encoder/hme_function_selector.h
new file mode 100644
index 0000000..c55f4b5
--- /dev/null
+++ b/encoder/hme_function_selector.h
@@ -0,0 +1,46 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file hme_function_selector.h
+*
+* \brief
+* function selector prototypes
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _HME_FUNCTION_SELECTOR_H_
+#define _HME_FUNCTION_SELECTOR_H_
+
+#include "ihevce_defs.h"
+
+/*****************************************************************************/
+/* Functions */
+/*****************************************************************************/
+void hme_init_function_ptr(void *pv_enc_ctxt, IV_ARCH_T e_processor_arch);
+
+#endif /* _HME_FUNCTION_SELECTOR_H_ */
diff --git a/encoder/hme_globals.c b/encoder/hme_globals.c
new file mode 100644
index 0000000..0096e0d
--- /dev/null
+++ b/encoder/hme_globals.c
@@ -0,0 +1,8883 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+******************************************************************************
+* @file hme_globals.c
+*
+* @brief
+* Contains all the global definitions used by HME
+*
+* @author
+* Ittiam
+*
+*
+* List of Functions
+*
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+#include <limits.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_bs_compute_ctb.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_dep_mngr_interface.h"
+#include "hme_datatype.h"
+#include "hme_interface.h"
+#include "hme_common_defs.h"
+#include "hme_defs.h"
+#include "ihevce_me_instr_set_router.h"
+#include "hme_globals.h"
+#include "hme_utils.h"
+#include "hme_coarse.h"
+#include "hme_refine.h"
+#include "hme_err_compute.h"
+#include "hme_common_utils.h"
+#include "hme_search_algo.h"
+
+/*****************************************************************************/
+/* Globals */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+* @brief Converts an encode order to raster order x coord. Meant for 16x16
+* CU within 64x64 or within 32x32
+******************************************************************************
+*/
+U08 gau1_encode_to_raster_x[16] = { 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3 };
+
+/**
+******************************************************************************
+* @brief Converts an encode order to raster order y coord. Meant for 16x16
+* CU within 64x64 or within 32x32
+******************************************************************************
+*/
+U08 gau1_encode_to_raster_y[16] = { 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3 };
+
+/**
+******************************************************************************
+* @brief Given a CU id within the bigger CU (0..3), and the partition type
+* currently within the small CU, we can figure out candidate
+* partition types for bigger CU. E.g. IF CU id is 0, and is AMP of
+* nLx2N, candidate partitions for bigger CU are nLx2N and 2Nx2N
+******************************************************************************
+*/
+PART_TYPE_T ge_part_type_to_merge_part[4][MAX_PART_TYPES][3] = {
+ /* CU 0: TL */
+ {
+ { PRT_2Nx2N, PRT_2NxN, PRT_Nx2N }, // small CU= 2Nx2N
+ { PRT_2NxnU, PRT_INVALID, PRT_INVALID }, //small CU = 2NxN
+ { PRT_nLx2N, PRT_INVALID, PRT_INVALID }, //small CU = Nx2N
+ { PRT_2Nx2N, PRT_INVALID, PRT_INVALID }, //small CU = NxN
+ { PRT_2Nx2N, PRT_2NxnU, PRT_INVALID }, //small CU = 2NxnU
+ { PRT_2NxN, PRT_2NxnU, PRT_INVALID }, //small CU = 2NxnD
+ { PRT_2Nx2N, PRT_nLx2N, PRT_INVALID }, //small CU = nLx2N
+ { PRT_Nx2N, PRT_nLx2N, PRT_INVALID }, //small CU = nRx2N
+
+ },
+ /* CU 1: TR */
+ {
+ { PRT_2Nx2N, PRT_2NxN, PRT_Nx2N }, // small CU= 2Nx2N
+ { PRT_2NxnU, PRT_INVALID, PRT_INVALID }, //small CU = 2NxN
+ { PRT_nRx2N, PRT_INVALID, PRT_INVALID }, //small CU = Nx2N
+ { PRT_2Nx2N, PRT_INVALID, PRT_INVALID }, //small CU = NxN
+ { PRT_2Nx2N, PRT_2NxnU, PRT_INVALID }, //small CU = 2NxnU
+ { PRT_2NxN, PRT_2NxnU, PRT_INVALID }, //small CU = 2NxnD
+ { PRT_Nx2N, PRT_nRx2N, PRT_INVALID }, //small CU = nLx2N
+ { PRT_2Nx2N, PRT_nRx2N, PRT_INVALID }, //small CU = nRx2N
+
+ },
+ /* CU 0: BL */
+ {
+ { PRT_2Nx2N, PRT_2NxN, PRT_Nx2N }, // small CU= 2Nx2N
+ { PRT_2NxnD, PRT_INVALID, PRT_INVALID }, //small CU = 2NxN
+ { PRT_nLx2N, PRT_INVALID, PRT_INVALID }, //small CU = Nx2N
+ { PRT_2Nx2N, PRT_INVALID, PRT_INVALID }, //small CU = NxN
+ { PRT_2NxN, PRT_2NxnD, PRT_INVALID }, //small CU = 2NxnU
+ { PRT_2Nx2N, PRT_2NxnD, PRT_INVALID }, //small CU = 2NxnD
+ { PRT_2Nx2N, PRT_nLx2N, PRT_INVALID }, //small CU = nLx2N
+ { PRT_2NxN, PRT_nLx2N, PRT_INVALID }, //small CU = nRx2N
+
+ },
+ /* CU 0: BR */
+ {
+ { PRT_2Nx2N, PRT_2NxN, PRT_Nx2N }, // small CU= 2Nx2N
+ { PRT_2NxnD, PRT_INVALID, PRT_INVALID }, //small CU = 2NxN
+ { PRT_nRx2N, PRT_INVALID, PRT_INVALID }, //small CU = Nx2N
+ { PRT_2Nx2N, PRT_INVALID, PRT_INVALID }, //small CU = NxN
+ { PRT_2NxN, PRT_2NxnD, PRT_INVALID }, //small CU = 2NxnU
+ { PRT_2Nx2N, PRT_2NxnD, PRT_INVALID }, //small CU = 2NxnD
+ { PRT_Nx2N, PRT_nRx2N, PRT_INVALID }, //small CU = nLx2N
+ { PRT_2Nx2N, PRT_nRx2N, PRT_INVALID }, //small CU = nRx2N
+
+ }
+};
+
+/**
+******************************************************************************
+* @brief A given partition type has 1,2 or 4 partitions, each corresponding
+* to a unique partition id PART_ID_T enum type. So, this global converts
+* partition type to a bitmask of corresponding partition ids.
+******************************************************************************
+*/
+S32 gai4_part_type_to_part_mask[MAX_PART_TYPES] = {
+ (ENABLE_2Nx2N), (ENABLE_2NxN), (ENABLE_Nx2N), (ENABLE_NxN),
+ (ENABLE_2NxnU), (ENABLE_2NxnD), (ENABLE_nLx2N), (ENABLE_nRx2N),
+};
+
+/**
+******************************************************************************
+* @brief Reads out the index of function pointer to a sad_compute function
+* of blk given a blk size enumeration
+******************************************************************************
+*/
+U08 gau1_blk_size_to_fp[NUM_BLK_SIZES] = {
+ 0, //BLK_4x4
+ 4, //BLK_4x8
+ 28, //BLK_8x4
+ 8, //BLK_8x8
+ 4, //BLK_4x16
+ 8, //BLK_8x16
+ 12, //BLK_12x16
+ 20, //BLK_16x4
+ 16, //BLK_16x8
+ 32, //BLK_16x12
+ 16, //BLK_16x16
+ 8, //BLK_8x32
+ 16, //BLK_16x32
+ 24, //BLK_24x32
+ 16, //BLK_32x8
+ 16, //BLK_32x16
+ 16, //BLK_32x24
+ 16, //BLK_32x32
+ 16, //BLK_16x64
+ 16, //BLK_32x64
+ 16, //BLK_48x64
+ 16, //BLK_64x16
+ 16, //BLK_64x32
+ 16, //BLK_64x48
+ 16 //BLK_64x64
+};
+
+/**
+******************************************************************************
+* @brief Reads out the width of blk given a blk size enumeration
+******************************************************************************
+*/
+U08 gau1_blk_size_to_wd[NUM_BLK_SIZES] = {
+ 4, //BLK_4x4
+ 4, //BLK_4x8
+ 8, //BLK_8x4
+ 8, //BLK_8x8
+ 4, //BLK_4x16
+ 8, //BLK_8x16
+ 12, //BLK_12x16
+ 16, //BLK_16x4
+ 16, //BLK_16x8
+ 16, //BLK_16x12
+ 16, //BLK_16x16
+ 8, //BLK_8x32
+ 16, //BLK_16x32
+ 24, //BLK_24x32
+ 32, //BLK_32x8
+ 32, //BLK_32x16
+ 32, //BLK_32x24
+ 32, //BLK_32x32
+ 16, //BLK_16x64
+ 32, //BLK_32x64
+ 48, //BLK_48x64
+ 64, //BLK_64x16
+ 64, //BLK_64x32
+ 64, //BLK_64x48
+ 64, //BLK_64x64
+};
+
+/**
+******************************************************************************
+* @brief Reads out the shift to be done for blk given a blk size enumeration
+******************************************************************************
+*/
+U08 gau1_blk_size_to_wd_shift[NUM_BLK_SIZES] = {
+ 3, //BLK_4x4
+ 3, //BLK_4x8
+ 4, //BLK_8x4
+ 4, //BLK_8x8
+ 3, //BLK_4x16
+ 4, //BLK_8x16
+ 12, //BLK_12x16
+ 5, //BLK_16x4
+ 5, //BLK_16x8
+ 5, //BLK_16x12
+ 5, //BLK_16x16
+ 4, //BLK_8x32
+ 5, //BLK_16x32
+ 24, //BLK_24x32
+ 6, //BLK_32x8
+ 6, //BLK_32x16
+ 6, //BLK_32x24
+ 6, //BLK_32x32
+ 5, //BLK_16x64
+ 6, //BLK_32x64
+ 48, //BLK_48x64
+ 7, //BLK_64x16
+ 7, //BLK_64x32
+ 7, //BLK_64x48
+ 7, //BLK_64x64
+};
+/**
+******************************************************************************
+* @brief Reads out the height of blk given a blk size enumeration
+******************************************************************************
+*/
+U08 gau1_blk_size_to_ht[NUM_BLK_SIZES] = {
+ 4, //BLK_4x4
+ 8, //BLK_4x8
+ 4, //BLK_8x4
+ 8, //BLK_8x8
+ 16, //BLK_4x16
+ 16, //BLK_8x16
+ 16, //BLK_12x16
+ 4, //BLK_16x4
+ 8, //BLK_16x8
+ 12, //BLK_16x12
+ 16, //BLK_16x16
+ 32, //BLK_8x32
+ 32, //BLK_16x32
+ 32, //BLK_24x32
+ 8, //BLK_32x8
+ 16, //BLK_32x16
+ 24, //BLK_32x24
+ 32, //BLK_32x32
+ 64, //BLK_16x64
+ 64, //BLK_32x64
+ 64, //BLK_48x64
+ 16, //BLK_64x16
+ 32, //BLK_64x32
+ 48, //BLK_64x48
+ 64, //BLK_64x64
+};
+
+/**
+******************************************************************************
+* @brief Given a minimum pt enum in a 3x3 grid, reads out the list of active
+* search pts in next iteration as a bit-mask, eliminating need to search
+* pts that have already been searched in this iteration.
+******************************************************************************
+*/
+S32 gai4_opt_grid_mask[NUM_GRID_PTS];
+
+/**
+******************************************************************************
+* @brief Given a minimum pt enum in a 3x3 grid, reads out the x offset of
+* the min pt relative to center assuming step size of 1
+******************************************************************************
+*/
+S08 gai1_grid_id_to_x[NUM_GRID_PTS];
+
+/**
+******************************************************************************
+* @brief Given a minimum pt enum in a 3x3 grid, reads out the y offset of
+* the min pt relative to center assuming step size of 1
+******************************************************************************
+*/
+S08 gai1_grid_id_to_y[NUM_GRID_PTS];
+
+/**
+******************************************************************************
+* @brief Lookup of the blk size enum, given a specific partition and cu size
+******************************************************************************
+*/
+BLK_SIZE_T ge_part_id_to_blk_size[NUM_CU_SIZES][TOT_NUM_PARTS];
+
+/**
+******************************************************************************
+* @brief For a given partition split, find number of partitions
+******************************************************************************
+*/
+U08 gau1_num_parts_in_part_type[MAX_PART_TYPES];
+
+/**
+******************************************************************************
+* @brief For a given partition split, returns the enumerations of specific
+* partitions in raster order. E.g. for PRT_2NxN, part id 0 is
+* PART_ID_2NxN_T and part id 1 is PART_ID_2NxN_B
+******************************************************************************
+*/
+PART_ID_T ge_part_type_to_part_id[MAX_PART_TYPES][MAX_NUM_PARTS];
+
+/**
+******************************************************************************
+* @brief For a given partition id, returs the rectangular position and size
+* of partition within cu relative ot cu start.
+******************************************************************************
+*/
+part_attr_t gas_part_attr_in_cu[TOT_NUM_PARTS];
+
+/**
+******************************************************************************
+* @brief Gives the CU type enumeration given a blk size.
+******************************************************************************
+*/
+CU_SIZE_T ge_blk_size_to_cu_size[NUM_BLK_SIZES];
+
+/**
+******************************************************************************
+* @brief Given a minimum pt enum in a diamond grid, reads out the list
+* of active search pts in next iteration as a bit-mask, eliminating need
+* to search pts that have already been searched in this iteration.
+******************************************************************************
+*/
+
+S32 gai4_opt_grid_mask_diamond[5];
+
+/**
+******************************************************************************
+* @brief Given a minimum pt enum in a 9 point grid, reads out the list
+* of active search pts in next iteration as a bit-mask, eliminating need
+* to search pts that have already been searched in this iteration.
+******************************************************************************
+*/
+
+S32 gai4_opt_grid_mask_conventional[9];
+
+/**
+******************************************************************************
+* @brief Returns 1 if there are qpel points to the top and bottom of the
+* current point
+******************************************************************************
+*/
+S32 gai4_2pt_qpel_interpol_possible_vert[4][4] = {
+ { 1, 0, 1, 0 }, { 1, 0, 1, 0 }, { 1, 0, 1, 0 }, { 1, 0, 1, 0 }
+};
+
+/**
+******************************************************************************
+* @brief Returns 1 if there are qpel points to the left and right of the
+* current point
+******************************************************************************
+*/
+S32 gai4_2pt_qpel_interpol_possible_horz[4][4] = {
+ { 1, 1, 1, 1 }, { 0, 0, 0, 0 }, { 1, 1, 1, 1 }, { 0, 0, 0, 0 }
+};
+
+S32 gai4_select_qpel_function_vert[4][16] = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 3, 3, 3, 3, 1, 3, 1, 3, 3, 3, 3, 3, 1, 3, 1, 3 },
+ { 4, 4, 4, 4, 2, 4, 2, 4, 4, 4, 4, 4, 2, 4, 2, 4 },
+ { 5, 5, 5, 5, 7, 6, 7, 6, 5, 5, 5, 5, 7, 6, 7, 6 } };
+
+S32 gai4_select_qpel_function_horz[4][16] = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
+ { 3, 1, 3, 1, 3, 3, 3, 3, 3, 1, 3, 1, 3, 3, 3, 3 },
+ { 4, 2, 4, 2, 4, 4, 4, 4, 4, 2, 4, 2, 4, 4, 4, 4 },
+ { 5, 7, 5, 7, 5, 6, 5, 6, 5, 7, 5, 7, 5, 6, 5, 6 } };
+
+U08 gau1_cu_id_raster_to_enc[4][4] = {
+ { 0, 1, 4, 5 }, { 2, 3, 6, 7 }, { 8, 9, 12, 13 }, { 10, 11, 14, 15 }
+};
+
+/**
+******************************************************************************
+* @brief Given a CU size, this array returns blk size enum
+******************************************************************************
+*/
+BLK_SIZE_T ge_cu_size_to_blk_size[NUM_CU_SIZES];
+
+/**
+******************************************************************************
+* @brief Given a part type, returns whether the part type is vertically
+* oriented.
+******************************************************************************
+*/
+U08 gau1_is_vert_part[MAX_PART_TYPES];
+
+/**
+******************************************************************************
+* @brief Given a partition, returns the number of best results to consider
+* for full pell refinement.
+******************************************************************************
+*/
+U08 gau1_num_best_results_PQ[TOT_NUM_PARTS];
+U08 gau1_num_best_results_HQ[TOT_NUM_PARTS];
+U08 gau1_num_best_results_MS[TOT_NUM_PARTS];
+U08 gau1_num_best_results_HS[TOT_NUM_PARTS];
+U08 gau1_num_best_results_XS[TOT_NUM_PARTS];
+U08 gau1_num_best_results_XS25[TOT_NUM_PARTS];
+
+/**
+******************************************************************************
+* @brief gau1_cu_tr_valid[y][x] returns the validity of a top rt candt for
+* CU with raster id x, y within CTB. Valid for 16x16 CUs and above
+******************************************************************************
+*/
+U08 gau1_cu_tr_valid[4][4] = { { 1, 1, 1, 1 }, { 1, 0, 1, 0 }, { 1, 1, 1, 0 }, { 1, 0, 1, 0 } };
+/**
+******************************************************************************
+* @brief gau1_cu_tr_valid[y][x] returns the validity of a bot lt candt for
+* CU with raster id x, y within CTB. Valid for 16x16 CUs and above
+******************************************************************************
+*/
+U08 gau1_cu_bl_valid[4][4] = { { 1, 0, 1, 0 }, { 1, 0, 0, 0 }, { 1, 0, 1, 0 }, { 0, 0, 0, 0 } };
+
+/**
+******************************************************************************
+* @brief Returns the validity of top rt candt for a given part id, will not
+* be valid if tr of a part pts to a non causal neighbour like 16x8B
+******************************************************************************
+*/
+U08 gau1_partid_tr_valid[TOT_NUM_PARTS];
+/**
+******************************************************************************
+* @brief Returns the validity of bottom left cant for given part id, will
+* not be valid, if bl of a part pts to a non causal neighbour like 8x16R
+******************************************************************************
+*/
+U08 gau1_partid_bl_valid[TOT_NUM_PARTS];
+
+/**
+******************************************************************************
+* @brief The number of partition id in the CU, e.g. PART_ID_16x8_B is 2nd
+******************************************************************************
+*/
+U08 gau1_part_id_to_part_num[TOT_NUM_PARTS];
+
+/**
+******************************************************************************
+* @brief Returns partition type for a given partition id, e.g.
+* PART_ID_16x8_B returns PRT_TYPE_16x8
+******************************************************************************
+*/
+PART_TYPE_T ge_part_id_to_part_type[TOT_NUM_PARTS];
+
+/**
+******************************************************************************
+* @brief given raster id x, y of 8x8 blk in 64x64 CTB, return the enc order
+******************************************************************************
+*/
+U08 gau1_8x8_cu_id_raster_to_enc[8][8] = {
+ { 0, 1, 4, 5, 16, 17, 20, 21 }, { 2, 3, 6, 7, 18, 19, 22, 23 },
+ { 8, 9, 12, 13, 24, 25, 28, 29 }, { 10, 11, 14, 15, 26, 27, 30, 31 },
+ { 32, 33, 36, 37, 48, 49, 52, 53 }, { 34, 35, 38, 39, 50, 51, 54, 55 },
+ { 40, 41, 44, 45, 56, 57, 60, 61 }, { 42, 43, 46, 47, 58, 59, 62, 63 }
+};
+
+/**
+******************************************************************************
+* @brief Return the bits for a given partition id which gets added to the
+* cost. Although the bits are for a given partition type, we add off the
+* bits per partition while computing mv cost. For example, if the bits for
+* 2NxN part type is 3, we add 1.5 bits for 2NxN_T and 1.5 for 2NxN_B.
+* Hence this is stored in Q1 format
+******************************************************************************
+*/
+U08 gau1_bits_for_part_id_q1[TOT_NUM_PARTS];
+
+/**
+*******************************************************************************
+@brief number of bits per bin
+*******************************************************************************
+*/
+#define HME_CABAC_BITS_PER_BIN 0.5
+
+/**
+*******************************************************************************
+@brief bin count to bit count conversion
+*******************************************************************************
+*/
+#define HME_CAB_BITS_PER_BIN_Q8 128
+#define HME_GET_CAB_BITS(x) (U08(((x)*HME_CABAC_BITS_PER_BIN + 0.5)))
+#define HME_GET_BITS_FROM_BINS(x) ((x * HME_CAB_BITS_PER_BIN_Q8) >> 8)
+
+/**
+******************************************************************************
+* @brief For a given partition split, num bits to encode the partition type
+* merge flags, mvp flags, split tu bits;
+* assuming one bin equal to 0.5 bit for now
+******************************************************************************
+*/
+U08 gau1_num_bits_for_part_type[MAX_PART_TYPES] = {
+ /* bits for part type + merge/mvp flags + split cu/tu */
+ 0, //HME_GET_CAB_BITS(0), /* PRT_2Nx2N */
+ 0, //HME_GET_CAB_BITS(0), /* PRT_2NxN */
+ 0, //HME_GET_CAB_BITS(0), /* PRT_Nx2N */
+ 0, //HME_GET_CAB_BITS(0), /* PRT_NxN */
+ 0, //HME_GET_CAB_BITS(0), /* PRT_2NxnU */
+ 0, //HME_GET_CAB_BITS(0), /* PRT_2NxnD */
+ 0, //HME_GET_CAB_BITS(0), /* PRT_nLx2N */
+ 0, //HME_GET_CAB_BITS(0) /* PRT_nRx2N */
+};
+
+/**
+*******************************************************************************
+* @brief Used exclusively in the Intrinsics version of the function
+* 'hme_combine_4x4_sads_and_compute_cost_high_speed' instead
+* of calling get_range()
+*******************************************************************************
+*/
+S16 gi2_mvy_range[MAX_MVY_SUPPORTED_IN_COARSE_LAYER + 1][8] = {
+ { 1, 1, 1, 1, 1, 1, 1, 1 }, //0
+ { 2, 2, 2, 2, 2, 2, 2, 2 }, //1
+ { 4, 4, 4, 4, 4, 4, 4, 4 }, //2
+ { 4, 4, 4, 4, 4, 4, 4, 4 }, { 6, 6, 6, 6, 6, 6, 6, 6 }, //4
+ { 6, 6, 6, 6, 6, 6, 6, 6 }, { 6, 6, 6, 6, 6, 6, 6, 6 },
+ { 6, 6, 6, 6, 6, 6, 6, 6 }, { 8, 8, 8, 8, 8, 8, 8, 8 }, //8
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 10, 10, 10, 10, 10, 10, 10, 10 }, //16
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 12, 12, 12, 12, 12, 12, 12, 12 }, //32
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 14, 14, 14, 14, 14, 14, 14, 14 } //64
+};
+
+/**
+*******************************************************************************
+* @brief Used exclusively in the Intrinsics version of the function
+* 'hme_combine_4x4_sads_and_compute_cost_high_speed' instead
+* of calling get_range()
+*******************************************************************************
+*/
+S16 gi2_mvx_range[MAX_MVX_SUPPORTED_IN_COARSE_LAYER * 2 + 1][8] = {
+ { 16, 14, 14, 14, 14, 14, 14, 14 }, //-128
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 },
+
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, //-124
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 },
+
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, //-120
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 },
+
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, //-116
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 },
+
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, //-112
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 },
+
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, //-108
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 },
+
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, //-104
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 },
+
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, //-100
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 },
+
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, //-96
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 },
+
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, //-92
+ { 14, 14, 14, 14, 14, 14, 14, 12 }, { 14, 14, 14, 14, 14, 14, 14, 12 },
+ { 14, 14, 14, 14, 14, 14, 14, 12 },
+
+ { 14, 14, 14, 14, 14, 14, 14, 12 }, //-88
+ { 14, 14, 14, 14, 14, 14, 12, 12 }, { 14, 14, 14, 14, 14, 14, 12, 12 },
+ { 14, 14, 14, 14, 14, 14, 12, 12 },
+
+ { 14, 14, 14, 14, 14, 14, 12, 12 }, //-84
+ { 14, 14, 14, 14, 14, 12, 12, 12 }, { 14, 14, 14, 14, 14, 12, 12, 12 },
+ { 14, 14, 14, 14, 14, 12, 12, 12 },
+
+ { 14, 14, 14, 14, 14, 12, 12, 12 }, //-80
+ { 14, 14, 14, 14, 12, 12, 12, 12 }, { 14, 14, 14, 14, 12, 12, 12, 12 },
+ { 14, 14, 14, 14, 12, 12, 12, 12 },
+
+ { 14, 14, 14, 14, 12, 12, 12, 12 }, //-76
+ { 14, 14, 14, 12, 12, 12, 12, 12 }, { 14, 14, 14, 12, 12, 12, 12, 12 },
+ { 14, 14, 14, 12, 12, 12, 12, 12 },
+
+ { 14, 14, 14, 12, 12, 12, 12, 12 }, //-72
+ { 14, 14, 12, 12, 12, 12, 12, 12 }, { 14, 14, 12, 12, 12, 12, 12, 12 },
+ { 14, 14, 12, 12, 12, 12, 12, 12 },
+
+ { 14, 14, 12, 12, 12, 12, 12, 12 }, //-68
+ { 14, 12, 12, 12, 12, 12, 12, 12 }, { 14, 12, 12, 12, 12, 12, 12, 12 },
+ { 14, 12, 12, 12, 12, 12, 12, 12 },
+
+ { 14, 12, 12, 12, 12, 12, 12, 12 }, //-64
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 }, //-60
+
+ { 12, 12, 12, 12, 12, 12, 12, 10 }, //-59
+ { 12, 12, 12, 12, 12, 12, 12, 10 }, { 12, 12, 12, 12, 12, 12, 12, 10 },
+ { 12, 12, 12, 12, 12, 12, 12, 10 },
+
+ { 12, 12, 12, 12, 12, 12, 10, 10 }, //-55
+ { 12, 12, 12, 12, 12, 12, 10, 10 }, { 12, 12, 12, 12, 12, 12, 10, 10 },
+ { 12, 12, 12, 12, 12, 12, 10, 10 },
+
+ { 12, 12, 12, 12, 12, 10, 10, 10 }, //-51
+ { 12, 12, 12, 12, 12, 10, 10, 10 }, { 12, 12, 12, 12, 12, 10, 10, 10 },
+ { 12, 12, 12, 12, 12, 10, 10, 10 },
+
+ { 12, 12, 12, 12, 10, 10, 10, 10 }, //-47
+ { 12, 12, 12, 12, 10, 10, 10, 10 }, { 12, 12, 12, 12, 10, 10, 10, 10 },
+ { 12, 12, 12, 12, 10, 10, 10, 10 },
+
+ { 12, 12, 12, 10, 10, 10, 10, 8 }, //-43
+ { 12, 12, 12, 10, 10, 10, 10, 8 }, { 12, 12, 12, 10, 10, 10, 10, 8 },
+ { 12, 12, 12, 10, 10, 10, 10, 8 },
+
+ { 12, 12, 10, 10, 10, 10, 8, 8 }, //-39
+ { 12, 12, 10, 10, 10, 10, 8, 8 }, { 12, 12, 10, 10, 10, 10, 8, 8 },
+ { 12, 12, 10, 10, 10, 10, 8, 8 },
+
+ { 12, 10, 10, 10, 10, 8, 8, 6 }, //-35
+ { 12, 10, 10, 10, 10, 8, 8, 6 }, { 12, 10, 10, 10, 10, 8, 8, 6 },
+ { 12, 10, 10, 10, 10, 8, 8, 6 },
+
+ { 10, 10, 10, 10, 8, 8, 6, 4 }, //-31
+ { 10, 10, 10, 10, 8, 8, 6, 4 }, { 10, 10, 10, 10, 8, 8, 6, 2 },
+ { 10, 10, 10, 10, 8, 8, 6, 1 },
+
+ { 10, 10, 10, 8, 8, 6, 4, 2 }, //-27
+ { 10, 10, 10, 8, 8, 6, 4, 4 }, { 10, 10, 10, 8, 8, 6, 2, 4 },
+ { 10, 10, 10, 8, 8, 6, 1, 6 },
+
+ { 10, 10, 8, 8, 6, 4, 2, 6 }, //-23
+ { 10, 10, 8, 8, 6, 4, 4, 6 }, { 10, 10, 8, 8, 6, 2, 4, 6 },
+ { 10, 10, 8, 8, 6, 1, 6, 8 }, //8@7
+
+ { 10, 8, 8, 6, 4, 2, 6, 8 }, //-19
+ { 10, 8, 8, 6, 4, 4, 6, 8 }, { 10, 8, 8, 6, 2, 4, 6, 8 },
+ { 10, 8, 8, 6, 1, 6, 8, 8 }, //12@7
+
+ { 8, 8, 6, 4, 2, 6, 8, 8 }, //-15
+ { 8, 8, 6, 4, 4, 6, 8, 8 }, { 8, 8, 6, 2, 4, 6, 8, 8 },
+ { 8, 8, 6, 1, 6, 8, 8, 10 }, //16@7
+
+ { 8, 6, 4, 2, 6, 8, 8, 10 }, //-11
+ { 8, 6, 4, 4, 6, 8, 8, 10 }, { 8, 6, 2, 4, 6, 8, 8, 10 },
+ { 8, 6, 1, 6, 8, 8, 10, 10 }, //20@7
+
+ { 6, 4, 2, 6, 8, 8, 10, 10 }, //-7
+ { 6, 4, 4, 6, 8, 8, 10, 10 }, { 6, 2, 4, 6, 8, 8, 10, 10 },
+ { 6, 1, 6, 8, 8, 10, 10, 10 }, //24@7
+
+ { 4, 2, 6, 8, 8, 10, 10, 10 }, //-3
+ { 4, 4, 6, 8, 8, 10, 10, 10 }, { 2, 4, 6, 8, 8, 10, 10, 10 },
+ { 1, 6, 8, 8, 10, 10, 10, 10 }, //28@7
+
+ { 2, 6, 8, 8, 10, 10, 10, 10 }, //1
+ { 4, 6, 8, 8, 10, 10, 10, 10 }, { 4, 6, 8, 8, 10, 10, 10, 10 },
+ { 6, 8, 8, 10, 10, 10, 10, 12 }, //32@7
+
+ { 6, 8, 8, 10, 10, 10, 10, 12 }, //5
+ { 6, 8, 8, 10, 10, 10, 10, 12 }, { 6, 8, 8, 10, 10, 10, 10, 12 },
+ { 8, 8, 10, 10, 10, 10, 12, 12 }, //36@7
+
+ { 8, 8, 10, 10, 10, 10, 12, 12 }, //9
+ { 8, 8, 10, 10, 10, 10, 12, 12 }, { 8, 8, 10, 10, 10, 10, 12, 12 },
+ { 8, 10, 10, 10, 10, 12, 12, 12 }, //40@7
+
+ { 8, 10, 10, 10, 10, 12, 12, 12 }, //13
+ { 8, 10, 10, 10, 10, 12, 12, 12 }, { 8, 10, 10, 10, 10, 12, 12, 12 },
+ { 10, 10, 10, 10, 12, 12, 12, 12 }, //44@7
+
+ { 10, 10, 10, 10, 12, 12, 12, 12 }, //17
+ { 10, 10, 10, 10, 12, 12, 12, 12 }, { 10, 10, 10, 10, 12, 12, 12, 12 },
+ { 10, 10, 10, 12, 12, 12, 12, 12 }, //48@7
+
+ { 10, 10, 10, 12, 12, 12, 12, 12 }, //21
+ { 10, 10, 10, 12, 12, 12, 12, 12 }, { 10, 10, 10, 12, 12, 12, 12, 12 },
+ { 10, 10, 12, 12, 12, 12, 12, 12 }, //52@7
+
+ { 10, 10, 12, 12, 12, 12, 12, 12 }, //25
+ { 10, 10, 12, 12, 12, 12, 12, 12 }, { 10, 10, 12, 12, 12, 12, 12, 12 },
+ { 10, 12, 12, 12, 12, 12, 12, 12 }, //56@7
+
+ { 10, 12, 12, 12, 12, 12, 12, 12 }, //29
+ { 10, 12, 12, 12, 12, 12, 12, 12 }, { 10, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, //60@7
+
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, //33
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 14 }, //64@7
+
+ { 12, 12, 12, 12, 12, 12, 12, 14 }, //37
+ { 12, 12, 12, 12, 12, 12, 12, 14 }, { 12, 12, 12, 12, 12, 12, 12, 14 },
+ { 12, 12, 12, 12, 12, 12, 14, 14 }, //64@6
+
+ { 12, 12, 12, 12, 12, 12, 14, 14 }, //41
+ { 12, 12, 12, 12, 12, 12, 14, 14 }, { 12, 12, 12, 12, 12, 12, 14, 14 },
+ { 12, 12, 12, 12, 12, 14, 14, 14 }, //64@5
+
+ { 12, 12, 12, 12, 12, 14, 14, 14 }, //45
+ { 12, 12, 12, 12, 12, 14, 14, 14 }, { 12, 12, 12, 12, 12, 14, 14, 14 },
+ { 12, 12, 12, 12, 14, 14, 14, 14 }, //64@4
+
+ { 12, 12, 12, 12, 14, 14, 14, 14 }, //49
+ { 12, 12, 12, 12, 14, 14, 14, 14 }, { 12, 12, 12, 12, 14, 14, 14, 14 },
+ { 12, 12, 12, 14, 14, 14, 14, 14 }, //64@3
+
+ { 12, 12, 12, 14, 14, 14, 14, 14 }, //53
+ { 12, 12, 12, 14, 14, 14, 14, 14 }, { 12, 12, 12, 14, 14, 14, 14, 14 },
+ { 12, 12, 14, 14, 14, 14, 14, 14 }, //64@2
+
+ { 12, 12, 14, 14, 14, 14, 14, 14 }, //57
+ { 12, 12, 14, 14, 14, 14, 14, 14 }, { 12, 12, 14, 14, 14, 14, 14, 14 },
+ { 12, 14, 14, 14, 14, 14, 14, 14 }, //64@1
+
+ { 12, 14, 14, 14, 14, 14, 14, 14 }, //61
+ { 12, 14, 14, 14, 14, 14, 14, 14 }, { 12, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, //92@7
+
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, //65
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, //96@7
+
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, //69
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, //100@7
+
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, //73
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, //104@7
+
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, //77
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, //108@7
+
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, //81
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, //112@7
+
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, //85
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, //116@7
+
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, //89
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, //120@7
+
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, //93
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, //124@7
+
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, //97
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 16 }, //128@7
+
+ { 14, 14, 14, 14, 14, 14, 14, 16 }, //101
+ { 14, 14, 14, 14, 14, 14, 14, 16 }, { 14, 14, 14, 14, 14, 14, 14, 16 },
+ { 14, 14, 14, 14, 14, 14, 16, 16 }, //132@7
+
+ { 14, 14, 14, 14, 14, 14, 16, 16 }, //105
+ { 14, 14, 14, 14, 14, 14, 16, 16 }, { 14, 14, 14, 14, 14, 14, 16, 16 },
+ { 14, 14, 14, 14, 14, 16, 16, 16 }, //136@7
+
+ { 14, 14, 14, 14, 14, 16, 16, 16 }, //109
+ { 14, 14, 14, 14, 14, 16, 16, 16 }, { 14, 14, 14, 14, 14, 16, 16, 16 },
+ { 14, 14, 14, 14, 16, 16, 16, 16 }, //140@7
+
+ { 14, 14, 14, 14, 16, 16, 16, 16 }, //113
+ { 14, 14, 14, 14, 16, 16, 16, 16 }, { 14, 14, 14, 14, 16, 16, 16, 16 },
+ { 14, 14, 14, 16, 16, 16, 16, 16 }, //144@7
+
+ { 14, 14, 14, 16, 16, 16, 16, 16 }, //117
+ { 14, 14, 14, 16, 16, 16, 16, 16 }, { 14, 14, 14, 16, 16, 16, 16, 16 },
+ { 14, 14, 16, 16, 16, 16, 16, 16 }, //148@7
+
+ { 14, 14, 16, 16, 16, 16, 16, 16 }, //121
+ { 14, 14, 16, 16, 16, 16, 16, 16 }, { 14, 14, 16, 16, 16, 16, 16, 16 },
+ { 14, 16, 16, 16, 16, 16, 16, 16 }, //152@7
+
+ { 14, 16, 16, 16, 16, 16, 16, 16 }, //125
+ { 14, 16, 16, 16, 16, 16, 16, 16 }, { 14, 16, 16, 16, 16, 16, 16, 16 },
+ { 16, 16, 16, 16, 16, 16, 16, 16 }, //156@7
+};
+
+/**
+******************************************************************************
+* @brief Returns area of a partition in terms of number of pixels
+* assuming block size is 16x16
+******************************************************************************
+*/
+S32 gai4_partition_area[TOT_NUM_PARTS] = {
+ 256, //PART_ID_2Nx2N
+ 128, //PART_ID_2NxN_T
+ 128, //PART_ID_2NxN_B
+ 128, //PART_ID_Nx2N_L
+ 128, //PART_ID_Nx2N_R
+ 64, //PART_ID_NxN_TL
+ 64, //PART_ID_NxN_TR
+ 64, //PART_ID_NxN_BL
+ 64, //PART_ID_NxN_BR
+ 64, //PART_ID_2NxnU_T
+ 192, //PART_ID_2NxnU_B
+ 192, //PART_ID_2NxnD_T
+ 64, //PART_ID_2NxnD_B
+ 64, //PART_ID_nLx2N_L
+ 192, //PART_ID_nLx2N_R
+ 192, //PART_ID_nRx2N_L
+ 64 //PART_ID_nRx2N_R
+};
+
+/* 2 - 1 list for PQ and HQ. The other list for all other presets */
+const U08 gau1_search_cand_priority_in_l1_and_l2_me[2][NUM_SEARCH_CAND_TYPES] = {
+ {
+ 17, //ZERO_MV
+ UCHAR_MAX, //ZERO_MV_ALTREF
+ 0, //SPATIAL_LEFT0
+ 1, //SPATIAL_TOP0
+ 2, //SPATIAL_TOP_RIGHT0
+ 3, //SPATIAL_TOP_LEFT0
+ UCHAR_MAX, //SPATIAL_LEFT1
+ UCHAR_MAX, //SPATIAL_TOP1
+ UCHAR_MAX, //SPATIAL_TOP_RIGHT1
+ UCHAR_MAX, //SPATIAL_TOP_LEFT1
+ 4, //PROJECTED_COLOC0
+ 5, //PROJECTED_COLOC1
+ UCHAR_MAX, //PROJECTED_COLOC2
+ UCHAR_MAX, //PROJECTED_COLOC3
+ UCHAR_MAX, //PROJECTED_COLOC4
+ UCHAR_MAX, //PROJECTED_COLOC5
+ UCHAR_MAX, //PROJECTED_COLOC6
+ UCHAR_MAX, //PROJECTED_COLOC7
+ UCHAR_MAX, //PROJECTED_COLOC_TR0
+ UCHAR_MAX, //PROJECTED_COLOC_TR1
+ UCHAR_MAX, //PROJECTED_COLOC_BL0
+ UCHAR_MAX, //PROJECTED_COLOC_BL1
+ UCHAR_MAX, //PROJECTED_COLOC_BR0
+ UCHAR_MAX, //PROJECTED_COLOC_BR1
+ UCHAR_MAX, //PROJECTED_TOP0
+ 14, //PROJECTED_TOP1
+ UCHAR_MAX, //PROJECTED_TOP_RIGHT0
+ 15, //PROJECTED_TOP_RIGHT1
+ UCHAR_MAX, //PROJECTED_TOP_LEFT0
+ 16, //PROJECTED_TOP_LEFT1
+ 6, //PROJECTED_RIGHT0
+ 10, //PROJECTED_RIGHT1
+ 7, //PROJECTED_BOTTOM0
+ 11, //PROJECTED_BOTTOM1
+ 8, //PROJECTED_BOTTOM_RIGHT0
+ 12, //PROJECTED_BOTTOM_RIGHT1
+ 9, //PROJECTED_BOTTOM_LEFT0
+ 13, //PROJECTED_BOTTOM_LEFT1
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV0
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV1
+ UCHAR_MAX, //PROJECTED_TOP2
+ UCHAR_MAX, //PROJECTED_TOP3
+ UCHAR_MAX, //PROJECTED_TOP_RIGHT2
+ UCHAR_MAX, //PROJECTED_TOP_RIGHT3
+ UCHAR_MAX, //PROJECTED_TOP_LEFT2
+ UCHAR_MAX, //PROJECTED_TOP_LEFT3
+ UCHAR_MAX, //PROJECTED_RIGHT2
+ UCHAR_MAX, //PROJECTED_RIGHT3
+ UCHAR_MAX, //PROJECTED_BOTTOM2
+ UCHAR_MAX, //PROJECTED_BOTTOM3
+ UCHAR_MAX, //PROJECTED_BOTTOM_RIGHT2
+ UCHAR_MAX, //PROJECTED_BOTTOM_RIGHT3
+ UCHAR_MAX, //PROJECTED_BOTTOM_LEFT2
+ UCHAR_MAX //PROJECTED_BOTTOM_LEFT3
+ },
+ {
+ 10, //ZERO_MV
+ UCHAR_MAX, //ZERO_MV_ALTREF
+ 0, //SPATIAL_LEFT0
+ UCHAR_MAX, //SPATIAL_TOP0
+ UCHAR_MAX, //SPATIAL_TOP_RIGHT0
+ UCHAR_MAX, //SPATIAL_TOP_LEFT0
+ UCHAR_MAX, //SPATIAL_LEFT1
+ UCHAR_MAX, //SPATIAL_TOP1
+ UCHAR_MAX, //SPATIAL_TOP_RIGHT1
+ UCHAR_MAX, //SPATIAL_TOP_LEFT1
+ 1, //PROJECTED_COLOC0
+ 3, //PROJECTED_COLOC1
+ UCHAR_MAX, //PROJECTED_COLOC2
+ UCHAR_MAX, //PROJECTED_COLOC3
+ UCHAR_MAX, //PROJECTED_COLOC4
+ UCHAR_MAX, //PROJECTED_COLOC5
+ UCHAR_MAX, //PROJECTED_COLOC6
+ UCHAR_MAX, //PROJECTED_COLOC7
+ UCHAR_MAX, //PROJECTED_COLOC_TR0
+ UCHAR_MAX, //PROJECTED_COLOC_TR1
+ UCHAR_MAX, //PROJECTED_COLOC_BL0
+ UCHAR_MAX, //PROJECTED_COLOC_BL1
+ UCHAR_MAX, //PROJECTED_COLOC_BR0
+ UCHAR_MAX, //PROJECTED_COLOC_BR1
+ 2, //PROJECTED_TOP0
+ 11, //PROJECTED_TOP1
+ 4, //PROJECTED_TOP_RIGHT0
+ 12, //PROJECTED_TOP_RIGHT1
+ 5, //PROJECTED_TOP_LEFT0
+ 13, //PROJECTED_TOP_LEFT1
+ 6, //PROJECTED_RIGHT0
+ 14, //PROJECTED_RIGHT1
+ 7, //PROJECTED_BOTTOM0
+ 15, //PROJECTED_BOTTOM1
+ 8, //PROJECTED_BOTTOM_RIGHT0
+ 16, //PROJECTED_BOTTOM_RIGHT1
+ 9, //PROJECTED_BOTTOM_LEFT0
+ 17, //PROJECTED_BOTTOM_LEFT1
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV0
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV1
+ UCHAR_MAX, //PROJECTED_TOP2
+ UCHAR_MAX, //PROJECTED_TOP3
+ UCHAR_MAX, //PROJECTED_TOP_RIGHT2
+ UCHAR_MAX, //PROJECTED_TOP_RIGHT3
+ UCHAR_MAX, //PROJECTED_TOP_LEFT2
+ UCHAR_MAX, //PROJECTED_TOP_LEFT3
+ UCHAR_MAX, //PROJECTED_RIGHT2
+ UCHAR_MAX, //PROJECTED_RIGHT3
+ UCHAR_MAX, //PROJECTED_BOTTOM2
+ UCHAR_MAX, //PROJECTED_BOTTOM3
+ UCHAR_MAX, //PROJECTED_BOTTOM_RIGHT2
+ UCHAR_MAX, //PROJECTED_BOTTOM_RIGHT3
+ UCHAR_MAX, //PROJECTED_BOTTOM_LEFT2
+ UCHAR_MAX //PROJECTED_BOTTOM_LEFT3
+ }
+};
+
+/* 12 cases are - */
+/* case 0 - P picture, num_refs=1, 4x4 in L1ME = 0 */
+/* case 1 - P picture, num_refs=1, 4x4 in L1ME = 1 */
+/* case 2 - P picture, num_refs=2, 4x4 in L1ME = 0 */
+/* case 3 - P picture, num_refs=2, 4x4 in L1ME = 1 */
+/* case 4 - P picture, num_refs=3, 4x4 in L1ME = 0 */
+/* case 5 - P picture, num_refs=3, 4x4 in L1ME = 1 */
+/* case 6 - P picture, num_refs=3, 4x4 in L1ME = 0 */
+/* case 7 - P picture, num_refs=3, 4x4 in L1ME = 1 */
+/* case 8 - B picture, num_refs=1, 4x4 in L1ME = 0 */
+/* case 9 - B picture, num_refs=1, 4x4 in L1ME = 1 */
+/* case 10 - B picture, num_refs=2, 4x4 in L1ME = 0 */
+/* case 11 - B picture, num_refs=2, 4x4 in L1ME = 1 */
+const U08 gau1_search_cand_priority_in_l0_me[12][NUM_SEARCH_CAND_TYPES] = {
+ {
+ 10, //ZERO_MV
+ UCHAR_MAX, //ZERO_MV_ALTREF
+ 0, //SPATIAL_LEFT0
+ UCHAR_MAX, //SPATIAL_TOP0
+ UCHAR_MAX, //SPATIAL_TOP_RIGHT0
+ UCHAR_MAX, //SPATIAL_TOP_LEFT0
+ UCHAR_MAX, //SPATIAL_LEFT1
+ UCHAR_MAX, //SPATIAL_TOP1
+ UCHAR_MAX, //SPATIAL_TOP_RIGHT1
+ UCHAR_MAX, //SPATIAL_TOP_LEFT1
+ 2, //PROJECTED_COLOC0
+ 3, //PROJECTED_COLOC1
+ UCHAR_MAX, //PROJECTED_COLOC2
+ UCHAR_MAX, //PROJECTED_COLOC3
+ UCHAR_MAX, //PROJECTED_COLOC4
+ UCHAR_MAX, //PROJECTED_COLOC5
+ UCHAR_MAX, //PROJECTED_COLOC6
+ UCHAR_MAX, //PROJECTED_COLOC7
+ UCHAR_MAX, //PROJECTED_COLOC_TR0
+ UCHAR_MAX, //PROJECTED_COLOC_TR1
+ UCHAR_MAX, //PROJECTED_COLOC_BL0
+ UCHAR_MAX, //PROJECTED_COLOC_BL1
+ UCHAR_MAX, //PROJECTED_COLOC_BR0
+ UCHAR_MAX, //PROJECTED_COLOC_BR1
+ 1, //PROJECTED_TOP0
+ UCHAR_MAX, //PROJECTED_TOP1
+ 4, //PROJECTED_TOP_RIGHT0
+ UCHAR_MAX, //PROJECTED_TOP_RIGHT1
+ 5, //PROJECTED_TOP_LEFT0
+ UCHAR_MAX, //PROJECTED_TOP_LEFT1
+ 6, //PROJECTED_RIGHT0
+ UCHAR_MAX, //PROJECTED_RIGHT1
+ 7, //PROJECTED_BOTTOM0
+ UCHAR_MAX, //PROJECTED_BOTTOM1
+ 8, //PROJECTED_BOTTOM_RIGHT0
+ UCHAR_MAX, //PROJECTED_BOTTOM_RIGHT1
+ 9, //PROJECTED_BOTTOM_LEFT0
+ UCHAR_MAX, //PROJECTED_BOTTOM_LEFT1
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV0
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV1
+ UCHAR_MAX, //PROJECTED_TOP2
+ UCHAR_MAX, //PROJECTED_TOP3
+ UCHAR_MAX, //PROJECTED_TOP_RIGHT2
+ UCHAR_MAX, //PROJECTED_TOP_RIGHT3
+ UCHAR_MAX, //PROJECTED_TOP_LEFT2
+ UCHAR_MAX, //PROJECTED_TOP_LEFT3
+ UCHAR_MAX, //PROJECTED_RIGHT2
+ UCHAR_MAX, //PROJECTED_RIGHT3
+ UCHAR_MAX, //PROJECTED_BOTTOM2
+ UCHAR_MAX, //PROJECTED_BOTTOM3
+ UCHAR_MAX, //PROJECTED_BOTTOM_RIGHT2
+ UCHAR_MAX, //PROJECTED_BOTTOM_RIGHT3
+ UCHAR_MAX, //PROJECTED_BOTTOM_LEFT2
+ UCHAR_MAX //PROJECTED_BOTTOM_LEFT3
+ },
+ {
+ 13, //ZERO_MV
+ UCHAR_MAX, //ZERO_MV_ALTREF
+ 0, //SPATIAL_LEFT0
+ UCHAR_MAX, //SPATIAL_TOP0
+ UCHAR_MAX, //SPATIAL_TOP_RIGHT0
+ UCHAR_MAX, //SPATIAL_TOP_LEFT0
+ UCHAR_MAX, //SPATIAL_LEFT1
+ UCHAR_MAX, //SPATIAL_TOP1
+ UCHAR_MAX, //SPATIAL_TOP_RIGHT1
+ UCHAR_MAX, //SPATIAL_TOP_LEFT1
+ 2, //PROJECTED_COLOC0
+ 3, //PROJECTED_COLOC1
+ UCHAR_MAX, //PROJECTED_COLOC2
+ UCHAR_MAX, //PROJECTED_COLOC3
+ UCHAR_MAX, //PROJECTED_COLOC4
+ UCHAR_MAX, //PROJECTED_COLOC5
+ UCHAR_MAX, //PROJECTED_COLOC6
+ UCHAR_MAX, //PROJECTED_COLOC7
+ 6, //PROJECTED_COLOC_TR0
+ UCHAR_MAX, //PROJECTED_COLOC_TR1
+ 7, //PROJECTED_COLOC_BL0
+ UCHAR_MAX, //PROJECTED_COLOC_BL1
+ 8, //PROJECTED_COLOC_BR0
+ UCHAR_MAX, //PROJECTED_COLOC_BR1
+ 1, //PROJECTED_TOP0
+ UCHAR_MAX, //PROJECTED_TOP1
+ 4, //PROJECTED_TOP_RIGHT0
+ UCHAR_MAX, //PROJECTED_TOP_RIGHT1
+ 5, //PROJECTED_TOP_LEFT0
+ UCHAR_MAX, //PROJECTED_TOP_LEFT1
+ 9, //PROJECTED_RIGHT0
+ UCHAR_MAX, //PROJECTED_RIGHT1
+ 10, //PROJECTED_BOTTOM0
+ UCHAR_MAX, //PROJECTED_BOTTOM1
+ 11, //PROJECTED_BOTTOM_RIGHT0
+ UCHAR_MAX, //PROJECTED_BOTTOM_RIGHT1
+ 12, //PROJECTED_BOTTOM_LEFT0
+ UCHAR_MAX, //PROJECTED_BOTTOM_LEFT1
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV0
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV1
+ UCHAR_MAX, //PROJECTED_TOP2
+ UCHAR_MAX, //PROJECTED_TOP3
+ UCHAR_MAX, //PROJECTED_TOP_RIGHT2
+ UCHAR_MAX, //PROJECTED_TOP_RIGHT3
+ UCHAR_MAX, //PROJECTED_TOP_LEFT2
+ UCHAR_MAX, //PROJECTED_TOP_LEFT3
+ UCHAR_MAX, //PROJECTED_RIGHT2
+ UCHAR_MAX, //PROJECTED_RIGHT3
+ UCHAR_MAX, //PROJECTED_BOTTOM2
+ UCHAR_MAX, //PROJECTED_BOTTOM3
+ UCHAR_MAX, //PROJECTED_BOTTOM_RIGHT2
+ UCHAR_MAX, //PROJECTED_BOTTOM_RIGHT3
+ UCHAR_MAX, //PROJECTED_BOTTOM_LEFT2
+ UCHAR_MAX //PROJECTED_BOTTOM_LEFT3
+ },
+ {
+ 20, //ZERO_MV
+ 21, //ZERO_MV_ALTREF
+ 0, //SPATIAL_LEFT0
+ UCHAR_MAX, //SPATIAL_TOP0
+ UCHAR_MAX, //SPATIAL_TOP_RIGHT0
+ UCHAR_MAX, //SPATIAL_TOP_LEFT0
+ 1, //SPATIAL_LEFT1
+ UCHAR_MAX, //SPATIAL_TOP1
+ UCHAR_MAX, //SPATIAL_TOP_RIGHT1
+ UCHAR_MAX, //SPATIAL_TOP_LEFT1
+ 4, //PROJECTED_COLOC0
+ 5, //PROJECTED_COLOC1
+ 6, //PROJECTED_COLOC2
+ 7, //PROJECTED_COLOC3
+ UCHAR_MAX, //PROJECTED_COLOC4
+ UCHAR_MAX, //PROJECTED_COLOC5
+ UCHAR_MAX, //PROJECTED_COLOC6
+ UCHAR_MAX, //PROJECTED_COLOC7
+ UCHAR_MAX, //PROJECTED_COLOC_TR0
+ UCHAR_MAX, //PROJECTED_COLOC_TR1
+ UCHAR_MAX, //PROJECTED_COLOC_BL0
+ UCHAR_MAX, //PROJECTED_COLOC_BL1
+ UCHAR_MAX, //PROJECTED_COLOC_BR0
+ UCHAR_MAX, //PROJECTED_COLOC_BR1
+ 2, //PROJECTED_TOP0
+ 3, //PROJECTED_TOP1
+ 8, //PROJECTED_TOP_RIGHT0
+ 9, //PROJECTED_TOP_RIGHT1
+ 10, //PROJECTED_TOP_LEFT0
+ 11, //PROJECTED_TOP_LEFT1
+ 12, //PROJECTED_RIGHT0
+ 13, //PROJECTED_RIGHT1
+ 14, //PROJECTED_BOTTOM0
+ 15, //PROJECTED_BOTTOM1
+ 16, //PROJECTED_BOTTOM_RIGHT0
+ 17, //PROJECTED_BOTTOM_RIGHT1
+ 18, //PROJECTED_BOTTOM_LEFT0
+ 19, //PROJECTED_BOTTOM_LEFT1
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV0
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV1
+ UCHAR_MAX, //PROJECTED_TOP2
+ UCHAR_MAX, //PROJECTED_TOP3
+ UCHAR_MAX, //PROJECTED_TOP_RIGHT2
+ UCHAR_MAX, //PROJECTED_TOP_RIGHT3
+ UCHAR_MAX, //PROJECTED_TOP_LEFT2
+ UCHAR_MAX, //PROJECTED_TOP_LEFT3
+ UCHAR_MAX, //PROJECTED_RIGHT2
+ UCHAR_MAX, //PROJECTED_RIGHT3
+ UCHAR_MAX, //PROJECTED_BOTTOM2
+ UCHAR_MAX, //PROJECTED_BOTTOM3
+ UCHAR_MAX, //PROJECTED_BOTTOM_RIGHT2
+ UCHAR_MAX, //PROJECTED_BOTTOM_RIGHT3
+ UCHAR_MAX, //PROJECTED_BOTTOM_LEFT2
+ UCHAR_MAX //PROJECTED_BOTTOM_LEFT3
+ },
+ {
+ 26, //ZERO_MV
+ 27, //ZERO_MV_ALTREF
+ 0, //SPATIAL_LEFT0
+ UCHAR_MAX, //SPATIAL_TOP0
+ UCHAR_MAX, //SPATIAL_TOP_RIGHT0
+ UCHAR_MAX, //SPATIAL_TOP_LEFT0
+ 1, //SPATIAL_LEFT1
+ UCHAR_MAX, //SPATIAL_TOP1
+ UCHAR_MAX, //SPATIAL_TOP_RIGHT1
+ UCHAR_MAX, //SPATIAL_TOP_LEFT1
+ 4, //PROJECTED_COLOC0
+ 5, //PROJECTED_COLOC1
+ 6, //PROJECTED_COLOC2
+ 7, //PROJECTED_COLOC3
+ UCHAR_MAX, //PROJECTED_COLOC4
+ UCHAR_MAX, //PROJECTED_COLOC5
+ UCHAR_MAX, //PROJECTED_COLOC6
+ UCHAR_MAX, //PROJECTED_COLOC7
+ 12, //PROJECTED_COLOC_TR0
+ 15, //PROJECTED_COLOC_TR1
+ 13, //PROJECTED_COLOC_BL0
+ 16, //PROJECTED_COLOC_BL1
+ 14, //PROJECTED_COLOC_BR0
+ 17, //PROJECTED_COLOC_BR1
+ 2, //PROJECTED_TOP0
+ 3, //PROJECTED_TOP1
+ 8, //PROJECTED_TOP_RIGHT0
+ 9, //PROJECTED_TOP_RIGHT1
+ 10, //PROJECTED_TOP_LEFT0
+ 11, //PROJECTED_TOP_LEFT1
+ 18, //PROJECTED_RIGHT0
+ 19, //PROJECTED_RIGHT1
+ 20, //PROJECTED_BOTTOM0
+ 21, //PROJECTED_BOTTOM1
+ 22, //PROJECTED_BOTTOM_RIGHT0
+ 23, //PROJECTED_BOTTOM_RIGHT1
+ 24, //PROJECTED_BOTTOM_LEFT0
+ 25, //PROJECTED_BOTTOM_LEFT1
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV0
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV1
+ UCHAR_MAX, //PROJECTED_TOP2
+ UCHAR_MAX, //PROJECTED_TOP3
+ UCHAR_MAX, //PROJECTED_TOP_RIGHT2
+ UCHAR_MAX, //PROJECTED_TOP_RIGHT3
+ UCHAR_MAX, //PROJECTED_TOP_LEFT2
+ UCHAR_MAX, //PROJECTED_TOP_LEFT3
+ UCHAR_MAX, //PROJECTED_RIGHT2
+ UCHAR_MAX, //PROJECTED_RIGHT3
+ UCHAR_MAX, //PROJECTED_BOTTOM2
+ UCHAR_MAX, //PROJECTED_BOTTOM3
+ UCHAR_MAX, //PROJECTED_BOTTOM_RIGHT2
+ UCHAR_MAX, //PROJECTED_BOTTOM_RIGHT3
+ UCHAR_MAX, //PROJECTED_BOTTOM_LEFT2
+ UCHAR_MAX //PROJECTED_BOTTOM_LEFT3
+ },
+ {
+ 22, //ZERO_MV
+ 23, //ZERO_MV_ALTREF
+ 0, //SPATIAL_LEFT0
+ UCHAR_MAX, //SPATIAL_TOP0
+ UCHAR_MAX, //SPATIAL_TOP_RIGHT0
+ UCHAR_MAX, //SPATIAL_TOP_LEFT0
+ 1, //SPATIAL_LEFT1
+ UCHAR_MAX, //SPATIAL_TOP1
+ UCHAR_MAX, //SPATIAL_TOP_RIGHT1
+ UCHAR_MAX, //SPATIAL_TOP_LEFT1
+ 4, //PROJECTED_COLOC0
+ 5, //PROJECTED_COLOC1
+ 6, //PROJECTED_COLOC2
+ 7, //PROJECTED_COLOC3
+ 8, //PROJECTED_COLOC4
+ 9, //PROJECTED_COLOC5
+ UCHAR_MAX, //PROJECTED_COLOC6
+ UCHAR_MAX, //PROJECTED_COLOC7
+ UCHAR_MAX, //PROJECTED_COLOC_TR0
+ UCHAR_MAX, //PROJECTED_COLOC_TR1
+ UCHAR_MAX, //PROJECTED_COLOC_BL0
+ UCHAR_MAX, //PROJECTED_COLOC_BL1
+ UCHAR_MAX, //PROJECTED_COLOC_BR0
+ UCHAR_MAX, //PROJECTED_COLOC_BR1
+ 2, //PROJECTED_TOP0
+ 3, //PROJECTED_TOP1
+ 10, //PROJECTED_TOP_RIGHT0
+ 11, //PROJECTED_TOP_RIGHT1
+ 12, //PROJECTED_TOP_LEFT0
+ 13, //PROJECTED_TOP_LEFT1
+ 14, //PROJECTED_RIGHT0
+ 15, //PROJECTED_RIGHT1
+ 16, //PROJECTED_BOTTOM0
+ 17, //PROJECTED_BOTTOM1
+ 18, //PROJECTED_BOTTOM_RIGHT0
+ 19, //PROJECTED_BOTTOM_RIGHT1
+ 20, //PROJECTED_BOTTOM_LEFT0
+ 21, //PROJECTED_BOTTOM_LEFT1
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV0
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV1
+ 24, //PROJECTED_TOP2
+ UCHAR_MAX, //PROJECTED_TOP3
+ 25, //PROJECTED_TOP_RIGHT2
+ UCHAR_MAX, //PROJECTED_TOP_RIGHT3
+ 26, //PROJECTED_TOP_LEFT2
+ UCHAR_MAX, //PROJECTED_TOP_LEFT3
+ 27, //PROJECTED_RIGHT2
+ UCHAR_MAX, //PROJECTED_RIGHT3
+ 28, //PROJECTED_BOTTOM2
+ UCHAR_MAX, //PROJECTED_BOTTOM3
+ 29, //PROJECTED_BOTTOM_RIGHT2
+ UCHAR_MAX, //PROJECTED_BOTTOM_RIGHT3
+ 30, //PROJECTED_BOTTOM_LEFT2
+ UCHAR_MAX //PROJECTED_BOTTOM_LEFT3
+ },
+ {
+ 28, //ZERO_MV
+ 29, //ZERO_MV_ALTREF
+ 0, //SPATIAL_LEFT0
+ UCHAR_MAX, //SPATIAL_TOP0
+ UCHAR_MAX, //SPATIAL_TOP_RIGHT0
+ UCHAR_MAX, //SPATIAL_TOP_LEFT0
+ 1, //SPATIAL_LEFT1
+ UCHAR_MAX, //SPATIAL_TOP1
+ UCHAR_MAX, //SPATIAL_TOP_RIGHT1
+ UCHAR_MAX, //SPATIAL_TOP_LEFT1
+ 4, //PROJECTED_COLOC0
+ 5, //PROJECTED_COLOC1
+ 6, //PROJECTED_COLOC2
+ 7, //PROJECTED_COLOC3
+ 8, //PROJECTED_COLOC4
+ 9, //PROJECTED_COLOC5
+ UCHAR_MAX, //PROJECTED_COLOC6
+ UCHAR_MAX, //PROJECTED_COLOC7
+ 14, //PROJECTED_COLOC_TR0
+ 17, //PROJECTED_COLOC_TR1
+ 15, //PROJECTED_COLOC_BL0
+ 18, //PROJECTED_COLOC_BL1
+ 16, //PROJECTED_COLOC_BR0
+ 19, //PROJECTED_COLOC_BR1
+ 2, //PROJECTED_TOP0
+ 3, //PROJECTED_TOP1
+ 10, //PROJECTED_TOP_RIGHT0
+ 11, //PROJECTED_TOP_RIGHT1
+ 12, //PROJECTED_TOP_LEFT0
+ 13, //PROJECTED_TOP_LEFT1
+ 20, //PROJECTED_RIGHT0
+ 21, //PROJECTED_RIGHT1
+ 22, //PROJECTED_BOTTOM0
+ 23, //PROJECTED_BOTTOM1
+ 24, //PROJECTED_BOTTOM_RIGHT0
+ 25, //PROJECTED_BOTTOM_RIGHT1
+ 26, //PROJECTED_BOTTOM_LEFT0
+ 27, //PROJECTED_BOTTOM_LEFT1
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV0
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV1
+ 30, //PROJECTED_TOP2
+ UCHAR_MAX, //PROJECTED_TOP3
+ 31, //PROJECTED_TOP_RIGHT2
+ UCHAR_MAX, //PROJECTED_TOP_RIGHT3
+ 32, //PROJECTED_TOP_LEFT2
+ UCHAR_MAX, //PROJECTED_TOP_LEFT3
+ 33, //PROJECTED_RIGHT2
+ UCHAR_MAX, //PROJECTED_RIGHT3
+ 34, //PROJECTED_BOTTOM2
+ UCHAR_MAX, //PROJECTED_BOTTOM3
+ 35, //PROJECTED_BOTTOM_RIGHT2
+ UCHAR_MAX, //PROJECTED_BOTTOM_RIGHT3
+ 36, //PROJECTED_BOTTOM_LEFT2
+ UCHAR_MAX //PROJECTED_BOTTOM_LEFT3
+ },
+ {
+ 24, //ZERO_MV
+ 25, //ZERO_MV_ALTREF
+ 0, //SPATIAL_LEFT0
+ UCHAR_MAX, //SPATIAL_TOP0
+ UCHAR_MAX, //SPATIAL_TOP_RIGHT0
+ UCHAR_MAX, //SPATIAL_TOP_LEFT0
+ 1, //SPATIAL_LEFT1
+ UCHAR_MAX, //SPATIAL_TOP1
+ UCHAR_MAX, //SPATIAL_TOP_RIGHT1
+ UCHAR_MAX, //SPATIAL_TOP_LEFT1
+ 4, //PROJECTED_COLOC0
+ 5, //PROJECTED_COLOC1
+ 6, //PROJECTED_COLOC2
+ 7, //PROJECTED_COLOC3
+ 8, //PROJECTED_COLOC4
+ 9, //PROJECTED_COLOC5
+ 10, //PROJECTED_COLOC6
+ 11, //PROJECTED_COLOC7
+ UCHAR_MAX, //PROJECTED_COLOC_TR0
+ UCHAR_MAX, //PROJECTED_COLOC_TR1
+ UCHAR_MAX, //PROJECTED_COLOC_BL0
+ UCHAR_MAX, //PROJECTED_COLOC_BL1
+ UCHAR_MAX, //PROJECTED_COLOC_BR0
+ UCHAR_MAX, //PROJECTED_COLOC_BR1
+ 2, //PROJECTED_TOP0
+ 3, //PROJECTED_TOP1
+ 12, //PROJECTED_TOP_RIGHT0
+ 13, //PROJECTED_TOP_RIGHT1
+ 14, //PROJECTED_TOP_LEFT0
+ 15, //PROJECTED_TOP_LEFT1
+ 16, //PROJECTED_RIGHT0
+ 17, //PROJECTED_RIGHT1
+ 18, //PROJECTED_BOTTOM0
+ 19, //PROJECTED_BOTTOM1
+ 20, //PROJECTED_BOTTOM_RIGHT0
+ 21, //PROJECTED_BOTTOM_RIGHT1
+ 22, //PROJECTED_BOTTOM_LEFT0
+ 23, //PROJECTED_BOTTOM_LEFT1
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV0
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV1
+ 26, //PROJECTED_TOP2
+ 33, //PROJECTED_TOP3
+ 27, //PROJECTED_TOP_RIGHT2
+ 34, //PROJECTED_TOP_RIGHT3
+ 28, //PROJECTED_TOP_LEFT2
+ 35, //PROJECTED_TOP_LEFT3
+ 29, //PROJECTED_RIGHT2
+ 36, //PROJECTED_RIGHT3
+ 30, //PROJECTED_BOTTOM2
+ 37, //PROJECTED_BOTTOM3
+ 31, //PROJECTED_BOTTOM_RIGHT2
+ 38, //PROJECTED_BOTTOM_RIGHT3
+ 32, //PROJECTED_BOTTOM_LEFT2
+ 39 //PROJECTED_BOTTOM_LEFT3
+ },
+ {
+ 30, //ZERO_MV
+ 31, //ZERO_MV_ALTREF
+ 0, //SPATIAL_LEFT0
+ UCHAR_MAX, //SPATIAL_TOP0
+ UCHAR_MAX, //SPATIAL_TOP_RIGHT0
+ UCHAR_MAX, //SPATIAL_TOP_LEFT0
+ 1, //SPATIAL_LEFT1
+ UCHAR_MAX, //SPATIAL_TOP1
+ UCHAR_MAX, //SPATIAL_TOP_RIGHT1
+ UCHAR_MAX, //SPATIAL_TOP_LEFT1
+ 4, //PROJECTED_COLOC0
+ 5, //PROJECTED_COLOC1
+ 6, //PROJECTED_COLOC2
+ 7, //PROJECTED_COLOC3
+ 8, //PROJECTED_COLOC4
+ 9, //PROJECTED_COLOC5
+ 10, //PROJECTED_COLOC6
+ 11, //PROJECTED_COLOC7
+ 16, //PROJECTED_COLOC_TR0
+ 19, //PROJECTED_COLOC_TR1
+ 17, //PROJECTED_COLOC_BL0
+ 20, //PROJECTED_COLOC_BL1
+ 18, //PROJECTED_COLOC_BR0
+ 21, //PROJECTED_COLOC_BR1
+ 2, //PROJECTED_TOP0
+ 3, //PROJECTED_TOP1
+ 12, //PROJECTED_TOP_RIGHT0
+ 13, //PROJECTED_TOP_RIGHT1
+ 14, //PROJECTED_TOP_LEFT0
+ 15, //PROJECTED_TOP_LEFT1
+ 22, //PROJECTED_RIGHT0
+ 23, //PROJECTED_RIGHT1
+ 24, //PROJECTED_BOTTOM0
+ 25, //PROJECTED_BOTTOM1
+ 26, //PROJECTED_BOTTOM_RIGHT0
+ 27, //PROJECTED_BOTTOM_RIGHT1
+ 28, //PROJECTED_BOTTOM_LEFT0
+ 29, //PROJECTED_BOTTOM_LEFT1
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV0
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV1
+ 32, //PROJECTED_TOP2
+ 39, //PROJECTED_TOP3
+ 33, //PROJECTED_TOP_RIGHT2
+ 40, //PROJECTED_TOP_RIGHT3
+ 34, //PROJECTED_TOP_LEFT2
+ 41, //PROJECTED_TOP_LEFT3
+ 35, //PROJECTED_RIGHT2
+ 42, //PROJECTED_RIGHT3
+ 36, //PROJECTED_BOTTOM2
+ 43, //PROJECTED_BOTTOM3
+ 37, //PROJECTED_BOTTOM_RIGHT2
+ 44, //PROJECTED_BOTTOM_RIGHT3
+ 38, //PROJECTED_BOTTOM_LEFT2
+ 45 //PROJECTED_BOTTOM_LEFT3
+ },
+ {
+ 10, //ZERO_MV
+ UCHAR_MAX, //ZERO_MV_ALTREF
+ 0, //SPATIAL_LEFT0
+ UCHAR_MAX, //SPATIAL_TOP0
+ UCHAR_MAX, //SPATIAL_TOP_RIGHT0
+ UCHAR_MAX, //SPATIAL_TOP_LEFT0
+ UCHAR_MAX, //SPATIAL_LEFT1
+ UCHAR_MAX, //SPATIAL_TOP1
+ UCHAR_MAX, //SPATIAL_TOP_RIGHT1
+ UCHAR_MAX, //SPATIAL_TOP_LEFT1
+ 2, //PROJECTED_COLOC0
+ 3, //PROJECTED_COLOC1
+ UCHAR_MAX, //PROJECTED_COLOC2
+ UCHAR_MAX, //PROJECTED_COLOC3
+ UCHAR_MAX, //PROJECTED_COLOC4
+ UCHAR_MAX, //PROJECTED_COLOC5
+ UCHAR_MAX, //PROJECTED_COLOC6
+ UCHAR_MAX, //PROJECTED_COLOC7
+ UCHAR_MAX, //PROJECTED_COLOC_TR0
+ UCHAR_MAX, //PROJECTED_COLOC_TR1
+ UCHAR_MAX, //PROJECTED_COLOC_BL0
+ UCHAR_MAX, //PROJECTED_COLOC_BL1
+ UCHAR_MAX, //PROJECTED_COLOC_BR0
+ UCHAR_MAX, //PROJECTED_COLOC_BR1
+ 1, //PROJECTED_TOP0
+ 11, //PROJECTED_TOP1
+ 4, //PROJECTED_TOP_RIGHT0
+ 12, //PROJECTED_TOP_RIGHT1
+ 5, //PROJECTED_TOP_LEFT0
+ 13, //PROJECTED_TOP_LEFT1
+ 6, //PROJECTED_RIGHT0
+ 14, //PROJECTED_RIGHT1
+ 7, //PROJECTED_BOTTOM0
+ 15, //PROJECTED_BOTTOM1
+ 8, //PROJECTED_BOTTOM_RIGHT0
+ 16, //PROJECTED_BOTTOM_RIGHT1
+ 9, //PROJECTED_BOTTOM_LEFT0
+ 17, //PROJECTED_BOTTOM_LEFT1
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV0
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV1
+ UCHAR_MAX, //PROJECTED_TOP2
+ UCHAR_MAX, //PROJECTED_TOP3
+ UCHAR_MAX, //PROJECTED_TOP_RIGHT2
+ UCHAR_MAX, //PROJECTED_TOP_RIGHT3
+ UCHAR_MAX, //PROJECTED_TOP_LEFT2
+ UCHAR_MAX, //PROJECTED_TOP_LEFT3
+ UCHAR_MAX, //PROJECTED_RIGHT2
+ UCHAR_MAX, //PROJECTED_RIGHT3
+ UCHAR_MAX, //PROJECTED_BOTTOM2
+ UCHAR_MAX, //PROJECTED_BOTTOM3
+ UCHAR_MAX, //PROJECTED_BOTTOM_RIGHT2
+ UCHAR_MAX, //PROJECTED_BOTTOM_RIGHT3
+ UCHAR_MAX, //PROJECTED_BOTTOM_LEFT2
+ UCHAR_MAX //PROJECTED_BOTTOM_LEFT3
+ },
+ {
+ 13, //ZERO_MV
+ UCHAR_MAX, //ZERO_MV_ALTREF
+ 0, //SPATIAL_LEFT0
+ UCHAR_MAX, //SPATIAL_TOP0
+ UCHAR_MAX, //SPATIAL_TOP_RIGHT0
+ UCHAR_MAX, //SPATIAL_TOP_LEFT0
+ UCHAR_MAX, //SPATIAL_LEFT1
+ UCHAR_MAX, //SPATIAL_TOP1
+ UCHAR_MAX, //SPATIAL_TOP_RIGHT1
+ UCHAR_MAX, //SPATIAL_TOP_LEFT1
+ 2, //PROJECTED_COLOC0
+ 3, //PROJECTED_COLOC1
+ UCHAR_MAX, //PROJECTED_COLOC2
+ UCHAR_MAX, //PROJECTED_COLOC3
+ UCHAR_MAX, //PROJECTED_COLOC4
+ UCHAR_MAX, //PROJECTED_COLOC5
+ UCHAR_MAX, //PROJECTED_COLOC6
+ UCHAR_MAX, //PROJECTED_COLOC7
+ 6, //PROJECTED_COLOC_TR0
+ UCHAR_MAX, //PROJECTED_COLOC_TR1
+ 7, //PROJECTED_COLOC_BL0
+ UCHAR_MAX, //PROJECTED_COLOC_BL1
+ 8, //PROJECTED_COLOC_BR0
+ UCHAR_MAX, //PROJECTED_COLOC_BR1
+ 1, //PROJECTED_TOP0
+ 14, //PROJECTED_TOP1
+ 4, //PROJECTED_TOP_RIGHT0
+ 15, //PROJECTED_TOP_RIGHT1
+ 5, //PROJECTED_TOP_LEFT0
+ 16, //PROJECTED_TOP_LEFT1
+ 9, //PROJECTED_RIGHT0
+ 17, //PROJECTED_RIGHT1
+ 10, //PROJECTED_BOTTOM0
+ 18, //PROJECTED_BOTTOM1
+ 11, //PROJECTED_BOTTOM_RIGHT0
+ 19, //PROJECTED_BOTTOM_RIGHT1
+ 12, //PROJECTED_BOTTOM_LEFT0
+ 20, //PROJECTED_BOTTOM_LEFT1
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV0
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV1
+ UCHAR_MAX, //PROJECTED_TOP2
+ UCHAR_MAX, //PROJECTED_TOP3
+ UCHAR_MAX, //PROJECTED_TOP_RIGHT2
+ UCHAR_MAX, //PROJECTED_TOP_RIGHT3
+ UCHAR_MAX, //PROJECTED_TOP_LEFT2
+ UCHAR_MAX, //PROJECTED_TOP_LEFT3
+ UCHAR_MAX, //PROJECTED_RIGHT2
+ UCHAR_MAX, //PROJECTED_RIGHT3
+ UCHAR_MAX, //PROJECTED_BOTTOM2
+ UCHAR_MAX, //PROJECTED_BOTTOM3
+ UCHAR_MAX, //PROJECTED_BOTTOM_RIGHT2
+ UCHAR_MAX, //PROJECTED_BOTTOM_RIGHT3
+ UCHAR_MAX, //PROJECTED_BOTTOM_LEFT2
+ UCHAR_MAX //PROJECTED_BOTTOM_LEFT3
+ },
+ {
+ 10, //ZERO_MV
+ UCHAR_MAX, //ZERO_MV_ALTREF
+ 0, //SPATIAL_LEFT0
+ UCHAR_MAX, //SPATIAL_TOP0
+ UCHAR_MAX, //SPATIAL_TOP_RIGHT0
+ UCHAR_MAX, //SPATIAL_TOP_LEFT0
+ UCHAR_MAX, //SPATIAL_LEFT1
+ UCHAR_MAX, //SPATIAL_TOP1
+ UCHAR_MAX, //SPATIAL_TOP_RIGHT1
+ UCHAR_MAX, //SPATIAL_TOP_LEFT1
+ 2, //PROJECTED_COLOC0
+ 3, //PROJECTED_COLOC1
+ 18, //PROJECTED_COLOC2
+ 19, //PROJECTED_COLOC3
+ UCHAR_MAX, //PROJECTED_COLOC4
+ UCHAR_MAX, //PROJECTED_COLOC5
+ UCHAR_MAX, //PROJECTED_COLOC6
+ UCHAR_MAX, //PROJECTED_COLOC7
+ UCHAR_MAX, //PROJECTED_COLOC_TR0
+ UCHAR_MAX, //PROJECTED_COLOC_TR1
+ UCHAR_MAX, //PROJECTED_COLOC_BL0
+ UCHAR_MAX, //PROJECTED_COLOC_BL1
+ UCHAR_MAX, //PROJECTED_COLOC_BR0
+ UCHAR_MAX, //PROJECTED_COLOC_BR1
+ 1, //PROJECTED_TOP0
+ 11, //PROJECTED_TOP1
+ 4, //PROJECTED_TOP_RIGHT0
+ 12, //PROJECTED_TOP_RIGHT1
+ 5, //PROJECTED_TOP_LEFT0
+ 13, //PROJECTED_TOP_LEFT1
+ 6, //PROJECTED_RIGHT0
+ 14, //PROJECTED_RIGHT1
+ 7, //PROJECTED_BOTTOM0
+ 15, //PROJECTED_BOTTOM1
+ 8, //PROJECTED_BOTTOM_RIGHT0
+ 16, //PROJECTED_BOTTOM_RIGHT1
+ 9, //PROJECTED_BOTTOM_LEFT0
+ 17, //PROJECTED_BOTTOM_LEFT1
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV0
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV1
+ UCHAR_MAX, //PROJECTED_TOP2
+ UCHAR_MAX, //PROJECTED_TOP3
+ UCHAR_MAX, //PROJECTED_TOP_RIGHT2
+ UCHAR_MAX, //PROJECTED_TOP_RIGHT3
+ UCHAR_MAX, //PROJECTED_TOP_LEFT2
+ UCHAR_MAX, //PROJECTED_TOP_LEFT3
+ UCHAR_MAX, //PROJECTED_RIGHT2
+ UCHAR_MAX, //PROJECTED_RIGHT3
+ UCHAR_MAX, //PROJECTED_BOTTOM2
+ UCHAR_MAX, //PROJECTED_BOTTOM3
+ UCHAR_MAX, //PROJECTED_BOTTOM_RIGHT2
+ UCHAR_MAX, //PROJECTED_BOTTOM_RIGHT3
+ UCHAR_MAX, //PROJECTED_BOTTOM_LEFT2
+ UCHAR_MAX //PROJECTED_BOTTOM_LEFT3
+ },
+ {
+ 13, //ZERO_MV
+ UCHAR_MAX, //ZERO_MV_ALTREF
+ 0, //SPATIAL_LEFT0
+ UCHAR_MAX, //SPATIAL_TOP0
+ UCHAR_MAX, //SPATIAL_TOP_RIGHT0
+ UCHAR_MAX, //SPATIAL_TOP_LEFT0
+ UCHAR_MAX, //SPATIAL_LEFT1
+ UCHAR_MAX, //SPATIAL_TOP1
+ UCHAR_MAX, //SPATIAL_TOP_RIGHT1
+ UCHAR_MAX, //SPATIAL_TOP_LEFT1
+ 2, //PROJECTED_COLOC0
+ 3, //PROJECTED_COLOC1
+ 21, //PROJECTED_COLOC2
+ 22, //PROJECTED_COLOC3
+ UCHAR_MAX, //PROJECTED_COLOC4
+ UCHAR_MAX, //PROJECTED_COLOC5
+ UCHAR_MAX, //PROJECTED_COLOC6
+ UCHAR_MAX, //PROJECTED_COLOC7
+ 6, //PROJECTED_COLOC_TR0
+ UCHAR_MAX, //PROJECTED_COLOC_TR1
+ 7, //PROJECTED_COLOC_BL0
+ UCHAR_MAX, //PROJECTED_COLOC_BL1
+ 8, //PROJECTED_COLOC_BR0
+ UCHAR_MAX, //PROJECTED_COLOC_BR1
+ 1, //PROJECTED_TOP0
+ 14, //PROJECTED_TOP1
+ 4, //PROJECTED_TOP_RIGHT0
+ 15, //PROJECTED_TOP_RIGHT1
+ 5, //PROJECTED_TOP_LEFT0
+ 16, //PROJECTED_TOP_LEFT1
+ 9, //PROJECTED_RIGHT0
+ 17, //PROJECTED_RIGHT1
+ 10, //PROJECTED_BOTTOM0
+ 18, //PROJECTED_BOTTOM1
+ 11, //PROJECTED_BOTTOM_RIGHT0
+ 19, //PROJECTED_BOTTOM_RIGHT1
+ 12, //PROJECTED_BOTTOM_LEFT0
+ 20, //PROJECTED_BOTTOM_LEFT1
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV0
+ UCHAR_MAX, //COLOCATED_GLOBAL_MV1
+ UCHAR_MAX, //PROJECTED_TOP2
+ UCHAR_MAX, //PROJECTED_TOP3
+ UCHAR_MAX, //PROJECTED_TOP_RIGHT2
+ UCHAR_MAX, //PROJECTED_TOP_RIGHT3
+ UCHAR_MAX, //PROJECTED_TOP_LEFT2
+ UCHAR_MAX, //PROJECTED_TOP_LEFT3
+ UCHAR_MAX, //PROJECTED_RIGHT2
+ UCHAR_MAX, //PROJECTED_RIGHT3
+ UCHAR_MAX, //PROJECTED_BOTTOM2
+ UCHAR_MAX, //PROJECTED_BOTTOM3
+ UCHAR_MAX, //PROJECTED_BOTTOM_RIGHT2
+ UCHAR_MAX, //PROJECTED_BOTTOM_RIGHT3
+ UCHAR_MAX, //PROJECTED_BOTTOM_LEFT2
+ UCHAR_MAX //PROJECTED_BOTTOM_LEFT3
+ }
+};
+
+const SEARCH_CANDIDATE_TYPE_T
+ gae_search_cand_priority_to_search_cand_type_map_in_l0_me[12][NUM_SEARCH_CAND_TYPES] = {
+ { /* 0*/ SPATIAL_LEFT0,
+ /* 1*/ PROJECTED_TOP0,
+ /* 2*/ PROJECTED_COLOC0,
+ /* 3*/ PROJECTED_COLOC1,
+ /* 4*/ PROJECTED_TOP_RIGHT0,
+ /* 5*/ PROJECTED_TOP_LEFT0,
+ /* 6*/ PROJECTED_RIGHT0,
+ /* 7*/ PROJECTED_BOTTOM0,
+ /* 8*/ PROJECTED_BOTTOM_RIGHT0,
+ /* 9*/ PROJECTED_BOTTOM_LEFT0,
+ /*10*/ ZERO_MV,
+ /*11*/ ILLUSORY_CANDIDATE,
+ /*12*/ ILLUSORY_CANDIDATE,
+ /*13*/ ILLUSORY_CANDIDATE,
+ /*14*/ ILLUSORY_CANDIDATE,
+ /*15*/ ILLUSORY_CANDIDATE,
+ /*16*/ ILLUSORY_CANDIDATE,
+ /*17*/ ILLUSORY_CANDIDATE,
+ /*18*/ ILLUSORY_CANDIDATE,
+ /*19*/ ILLUSORY_CANDIDATE,
+ /*20*/ ILLUSORY_CANDIDATE,
+ /*21*/ ILLUSORY_CANDIDATE,
+ /*22*/ ILLUSORY_CANDIDATE,
+ /*23*/ ILLUSORY_CANDIDATE,
+ /*24*/ ILLUSORY_CANDIDATE,
+ /*25*/ ILLUSORY_CANDIDATE,
+ /*26*/ ILLUSORY_CANDIDATE,
+ /*27*/ ILLUSORY_CANDIDATE,
+ /*28*/ ILLUSORY_CANDIDATE,
+ /*29*/ ILLUSORY_CANDIDATE,
+ /*30*/ ILLUSORY_CANDIDATE,
+ /*31*/ ILLUSORY_CANDIDATE,
+ /*32*/ ILLUSORY_CANDIDATE,
+ /*33*/ ILLUSORY_CANDIDATE,
+ /*34*/ ILLUSORY_CANDIDATE,
+ /*35*/ ILLUSORY_CANDIDATE,
+ /*36*/ ILLUSORY_CANDIDATE,
+ /*37*/ ILLUSORY_CANDIDATE,
+ /*38*/ ILLUSORY_CANDIDATE,
+ /*39*/ ILLUSORY_CANDIDATE,
+ /*40*/ ILLUSORY_CANDIDATE,
+ /*41*/ ILLUSORY_CANDIDATE,
+ /*42*/ ILLUSORY_CANDIDATE,
+ /*43*/ ILLUSORY_CANDIDATE,
+ /*44*/ ILLUSORY_CANDIDATE,
+ /*45*/ ILLUSORY_CANDIDATE,
+ /*46*/ ILLUSORY_CANDIDATE,
+ /*47*/ ILLUSORY_CANDIDATE,
+ /*48*/ ILLUSORY_CANDIDATE,
+ /*49*/ ILLUSORY_CANDIDATE,
+ /*40*/ ILLUSORY_CANDIDATE,
+ /*51*/ ILLUSORY_CANDIDATE,
+ /*52*/ ILLUSORY_CANDIDATE,
+ /*53*/ ILLUSORY_CANDIDATE },
+ { /* 0*/ SPATIAL_LEFT0,
+ /* 1*/ PROJECTED_TOP0,
+ /* 2*/ PROJECTED_COLOC0,
+ /* 3*/ PROJECTED_COLOC1,
+ /* 4*/ PROJECTED_TOP_RIGHT0,
+ /* 5*/ PROJECTED_TOP_LEFT0,
+ /* 6*/ PROJECTED_COLOC_TR0,
+ /* 7*/ PROJECTED_COLOC_BL0,
+ /* 8*/ PROJECTED_COLOC_BR0,
+ /* 9*/ PROJECTED_RIGHT0,
+ /*10*/ PROJECTED_BOTTOM0,
+ /*11*/ PROJECTED_BOTTOM_RIGHT0,
+ /*12*/ PROJECTED_BOTTOM_LEFT0,
+ /*13*/ ZERO_MV,
+ /*14*/ ILLUSORY_CANDIDATE,
+ /*15*/ ILLUSORY_CANDIDATE,
+ /*16*/ ILLUSORY_CANDIDATE,
+ /*17*/ ILLUSORY_CANDIDATE,
+ /*18*/ ILLUSORY_CANDIDATE,
+ /*19*/ ILLUSORY_CANDIDATE,
+ /*20*/ ILLUSORY_CANDIDATE,
+ /*21*/ ILLUSORY_CANDIDATE,
+ /*22*/ ILLUSORY_CANDIDATE,
+ /*23*/ ILLUSORY_CANDIDATE,
+ /*24*/ ILLUSORY_CANDIDATE,
+ /*25*/ ILLUSORY_CANDIDATE,
+ /*26*/ ILLUSORY_CANDIDATE,
+ /*27*/ ILLUSORY_CANDIDATE,
+ /*28*/ ILLUSORY_CANDIDATE,
+ /*29*/ ILLUSORY_CANDIDATE,
+ /*30*/ ILLUSORY_CANDIDATE,
+ /*31*/ ILLUSORY_CANDIDATE,
+ /*32*/ ILLUSORY_CANDIDATE,
+ /*33*/ ILLUSORY_CANDIDATE,
+ /*34*/ ILLUSORY_CANDIDATE,
+ /*35*/ ILLUSORY_CANDIDATE,
+ /*36*/ ILLUSORY_CANDIDATE,
+ /*37*/ ILLUSORY_CANDIDATE,
+ /*38*/ ILLUSORY_CANDIDATE,
+ /*39*/ ILLUSORY_CANDIDATE,
+ /*40*/ ILLUSORY_CANDIDATE,
+ /*41*/ ILLUSORY_CANDIDATE,
+ /*42*/ ILLUSORY_CANDIDATE,
+ /*43*/ ILLUSORY_CANDIDATE,
+ /*44*/ ILLUSORY_CANDIDATE,
+ /*45*/ ILLUSORY_CANDIDATE,
+ /*46*/ ILLUSORY_CANDIDATE,
+ /*47*/ ILLUSORY_CANDIDATE,
+ /*48*/ ILLUSORY_CANDIDATE,
+ /*49*/ ILLUSORY_CANDIDATE,
+ /*40*/ ILLUSORY_CANDIDATE,
+ /*51*/ ILLUSORY_CANDIDATE,
+ /*52*/ ILLUSORY_CANDIDATE,
+ /*53*/ ILLUSORY_CANDIDATE },
+ { /* 0*/ SPATIAL_LEFT0,
+ /* 1*/ SPATIAL_LEFT1,
+ /* 2*/ PROJECTED_TOP0,
+ /* 3*/ PROJECTED_TOP1,
+ /* 4*/ PROJECTED_COLOC0,
+ /* 5*/ PROJECTED_COLOC1,
+ /* 6*/ PROJECTED_COLOC2,
+ /* 7*/ PROJECTED_COLOC3,
+ /* 8*/ PROJECTED_TOP_RIGHT0,
+ /* 9*/ PROJECTED_TOP_RIGHT1,
+ /*10*/ PROJECTED_TOP_LEFT0,
+ /*11*/ PROJECTED_TOP_LEFT1,
+ /*12*/ PROJECTED_RIGHT0,
+ /*13*/ PROJECTED_RIGHT1,
+ /*14*/ PROJECTED_BOTTOM0,
+ /*15*/ PROJECTED_BOTTOM1,
+ /*16*/ PROJECTED_BOTTOM_RIGHT0,
+ /*17*/ PROJECTED_BOTTOM_RIGHT1,
+ /*18*/ PROJECTED_BOTTOM_LEFT0,
+ /*19*/ PROJECTED_BOTTOM_LEFT1,
+ /*20*/ ZERO_MV,
+ /*21*/ ZERO_MV_ALTREF,
+ /*22*/ ILLUSORY_CANDIDATE,
+ /*23*/ ILLUSORY_CANDIDATE,
+ /*24*/ ILLUSORY_CANDIDATE,
+ /*25*/ ILLUSORY_CANDIDATE,
+ /*26*/ ILLUSORY_CANDIDATE,
+ /*27*/ ILLUSORY_CANDIDATE,
+ /*28*/ ILLUSORY_CANDIDATE,
+ /*29*/ ILLUSORY_CANDIDATE,
+ /*30*/ ILLUSORY_CANDIDATE,
+ /*31*/ ILLUSORY_CANDIDATE,
+ /*32*/ ILLUSORY_CANDIDATE,
+ /*33*/ ILLUSORY_CANDIDATE,
+ /*34*/ ILLUSORY_CANDIDATE,
+ /*35*/ ILLUSORY_CANDIDATE,
+ /*36*/ ILLUSORY_CANDIDATE,
+ /*37*/ ILLUSORY_CANDIDATE,
+ /*38*/ ILLUSORY_CANDIDATE,
+ /*39*/ ILLUSORY_CANDIDATE,
+ /*40*/ ILLUSORY_CANDIDATE,
+ /*41*/ ILLUSORY_CANDIDATE,
+ /*42*/ ILLUSORY_CANDIDATE,
+ /*43*/ ILLUSORY_CANDIDATE,
+ /*44*/ ILLUSORY_CANDIDATE,
+ /*45*/ ILLUSORY_CANDIDATE,
+ /*46*/ ILLUSORY_CANDIDATE,
+ /*47*/ ILLUSORY_CANDIDATE,
+ /*48*/ ILLUSORY_CANDIDATE,
+ /*49*/ ILLUSORY_CANDIDATE,
+ /*40*/ ILLUSORY_CANDIDATE,
+ /*51*/ ILLUSORY_CANDIDATE,
+ /*52*/ ILLUSORY_CANDIDATE,
+ /*53*/ ILLUSORY_CANDIDATE },
+ { /* 0*/ SPATIAL_LEFT0,
+ /* 1*/ SPATIAL_LEFT1,
+ /* 2*/ PROJECTED_TOP0,
+ /* 3*/ PROJECTED_TOP1,
+ /* 4*/ PROJECTED_COLOC0,
+ /* 5*/ PROJECTED_COLOC1,
+ /* 6*/ PROJECTED_COLOC2,
+ /* 7*/ PROJECTED_COLOC3,
+ /* 8*/ PROJECTED_TOP_RIGHT0,
+ /* 9*/ PROJECTED_TOP_RIGHT1,
+ /*10*/ PROJECTED_TOP_LEFT0,
+ /*11*/ PROJECTED_TOP_LEFT1,
+ /*12*/ PROJECTED_COLOC_TR0,
+ /*13*/ PROJECTED_COLOC_BL0,
+ /*14*/ PROJECTED_COLOC_BR0,
+ /*15*/ PROJECTED_COLOC_TR1,
+ /*16*/ PROJECTED_COLOC_BL1,
+ /*17*/ PROJECTED_COLOC_BR1,
+ /*18*/ PROJECTED_RIGHT0,
+ /*19*/ PROJECTED_RIGHT1,
+ /*20*/ PROJECTED_BOTTOM0,
+ /*21*/ PROJECTED_BOTTOM1,
+ /*22*/ PROJECTED_BOTTOM_RIGHT0,
+ /*23*/ PROJECTED_BOTTOM_RIGHT1,
+ /*24*/ PROJECTED_BOTTOM_LEFT0,
+ /*25*/ PROJECTED_BOTTOM_LEFT1,
+ /*26*/ ZERO_MV,
+ /*27*/ ZERO_MV_ALTREF,
+ /*28*/ ILLUSORY_CANDIDATE,
+ /*29*/ ILLUSORY_CANDIDATE,
+ /*30*/ ILLUSORY_CANDIDATE,
+ /*31*/ ILLUSORY_CANDIDATE,
+ /*32*/ ILLUSORY_CANDIDATE,
+ /*33*/ ILLUSORY_CANDIDATE,
+ /*34*/ ILLUSORY_CANDIDATE,
+ /*35*/ ILLUSORY_CANDIDATE,
+ /*36*/ ILLUSORY_CANDIDATE,
+ /*37*/ ILLUSORY_CANDIDATE,
+ /*38*/ ILLUSORY_CANDIDATE,
+ /*39*/ ILLUSORY_CANDIDATE,
+ /*40*/ ILLUSORY_CANDIDATE,
+ /*41*/ ILLUSORY_CANDIDATE,
+ /*42*/ ILLUSORY_CANDIDATE,
+ /*43*/ ILLUSORY_CANDIDATE,
+ /*44*/ ILLUSORY_CANDIDATE,
+ /*45*/ ILLUSORY_CANDIDATE,
+ /*46*/ ILLUSORY_CANDIDATE,
+ /*47*/ ILLUSORY_CANDIDATE,
+ /*48*/ ILLUSORY_CANDIDATE,
+ /*49*/ ILLUSORY_CANDIDATE,
+ /*40*/ ILLUSORY_CANDIDATE,
+ /*51*/ ILLUSORY_CANDIDATE,
+ /*52*/ ILLUSORY_CANDIDATE,
+ /*53*/ ILLUSORY_CANDIDATE },
+ { /* 0*/ SPATIAL_LEFT0,
+ /* 1*/ SPATIAL_LEFT1,
+ /* 2*/ PROJECTED_TOP0,
+ /* 3*/ PROJECTED_TOP1,
+ /* 4*/ PROJECTED_COLOC0,
+ /* 5*/ PROJECTED_COLOC1,
+ /* 6*/ PROJECTED_COLOC2,
+ /* 7*/ PROJECTED_COLOC3,
+ /* 8*/ PROJECTED_COLOC4,
+ /* 9*/ PROJECTED_COLOC5,
+ /*10*/ PROJECTED_TOP_RIGHT0,
+ /*11*/ PROJECTED_TOP_RIGHT1,
+ /*12*/ PROJECTED_TOP_LEFT0,
+ /*13*/ PROJECTED_TOP_LEFT1,
+ /*14*/ PROJECTED_RIGHT0,
+ /*15*/ PROJECTED_RIGHT1,
+ /*16*/ PROJECTED_BOTTOM0,
+ /*17*/ PROJECTED_BOTTOM1,
+ /*18*/ PROJECTED_BOTTOM_RIGHT0,
+ /*19*/ PROJECTED_BOTTOM_RIGHT1,
+ /*20*/ PROJECTED_BOTTOM_LEFT0,
+ /*21*/ PROJECTED_BOTTOM_LEFT1,
+ /*22*/ ZERO_MV,
+ /*23*/ ZERO_MV_ALTREF,
+ /*24*/ PROJECTED_TOP2,
+ /*25*/ PROJECTED_TOP_RIGHT2,
+ /*26*/ PROJECTED_TOP_LEFT2,
+ /*27*/ PROJECTED_RIGHT2,
+ /*28*/ PROJECTED_BOTTOM2,
+ /*29*/ PROJECTED_BOTTOM_RIGHT2,
+ /*30*/ PROJECTED_BOTTOM_LEFT2,
+ /*31*/ ILLUSORY_CANDIDATE,
+ /*32*/ ILLUSORY_CANDIDATE,
+ /*33*/ ILLUSORY_CANDIDATE,
+ /*34*/ ILLUSORY_CANDIDATE,
+ /*35*/ ILLUSORY_CANDIDATE,
+ /*36*/ ILLUSORY_CANDIDATE,
+ /*37*/ ILLUSORY_CANDIDATE,
+ /*38*/ ILLUSORY_CANDIDATE,
+ /*39*/ ILLUSORY_CANDIDATE,
+ /*40*/ ILLUSORY_CANDIDATE,
+ /*41*/ ILLUSORY_CANDIDATE,
+ /*42*/ ILLUSORY_CANDIDATE,
+ /*43*/ ILLUSORY_CANDIDATE,
+ /*44*/ ILLUSORY_CANDIDATE,
+ /*45*/ ILLUSORY_CANDIDATE,
+ /*46*/ ILLUSORY_CANDIDATE,
+ /*47*/ ILLUSORY_CANDIDATE,
+ /*48*/ ILLUSORY_CANDIDATE,
+ /*49*/ ILLUSORY_CANDIDATE,
+ /*40*/ ILLUSORY_CANDIDATE,
+ /*51*/ ILLUSORY_CANDIDATE,
+ /*52*/ ILLUSORY_CANDIDATE,
+ /*53*/ ILLUSORY_CANDIDATE },
+ { /* 0*/ SPATIAL_LEFT0,
+ /* 1*/ SPATIAL_LEFT1,
+ /* 2*/ PROJECTED_TOP0,
+ /* 3*/ PROJECTED_TOP1,
+ /* 4*/ PROJECTED_COLOC0,
+ /* 5*/ PROJECTED_COLOC1,
+ /* 6*/ PROJECTED_COLOC2,
+ /* 7*/ PROJECTED_COLOC3,
+ /* 8*/ PROJECTED_COLOC4,
+ /* 9*/ PROJECTED_COLOC5,
+ /*10*/ PROJECTED_TOP_RIGHT0,
+ /*11*/ PROJECTED_TOP_RIGHT1,
+ /*12*/ PROJECTED_TOP_LEFT0,
+ /*13*/ PROJECTED_TOP_LEFT1,
+ /*14*/ PROJECTED_COLOC_TR0,
+ /*15*/ PROJECTED_COLOC_BL0,
+ /*16*/ PROJECTED_COLOC_BR0,
+ /*17*/ PROJECTED_COLOC_TR1,
+ /*18*/ PROJECTED_COLOC_BL1,
+ /*19*/ PROJECTED_COLOC_BR1,
+ /*20*/ PROJECTED_RIGHT0,
+ /*21*/ PROJECTED_RIGHT1,
+ /*22*/ PROJECTED_BOTTOM0,
+ /*23*/ PROJECTED_BOTTOM1,
+ /*24*/ PROJECTED_BOTTOM_RIGHT0,
+ /*25*/ PROJECTED_BOTTOM_RIGHT1,
+ /*26*/ PROJECTED_BOTTOM_LEFT0,
+ /*27*/ PROJECTED_BOTTOM_LEFT1,
+ /*28*/ ZERO_MV,
+ /*29*/ ZERO_MV_ALTREF,
+ /*30*/ PROJECTED_TOP2,
+ /*31*/ PROJECTED_TOP_RIGHT2,
+ /*32*/ PROJECTED_TOP_LEFT2,
+ /*33*/ PROJECTED_RIGHT2,
+ /*34*/ PROJECTED_BOTTOM2,
+ /*35*/ PROJECTED_BOTTOM_RIGHT2,
+ /*36*/ PROJECTED_BOTTOM_LEFT2,
+ /*37*/ ILLUSORY_CANDIDATE,
+ /*38*/ ILLUSORY_CANDIDATE,
+ /*39*/ ILLUSORY_CANDIDATE,
+ /*40*/ ILLUSORY_CANDIDATE,
+ /*41*/ ILLUSORY_CANDIDATE,
+ /*42*/ ILLUSORY_CANDIDATE,
+ /*43*/ ILLUSORY_CANDIDATE,
+ /*44*/ ILLUSORY_CANDIDATE,
+ /*45*/ ILLUSORY_CANDIDATE,
+ /*46*/ ILLUSORY_CANDIDATE,
+ /*47*/ ILLUSORY_CANDIDATE,
+ /*48*/ ILLUSORY_CANDIDATE,
+ /*49*/ ILLUSORY_CANDIDATE,
+ /*40*/ ILLUSORY_CANDIDATE,
+ /*51*/ ILLUSORY_CANDIDATE,
+ /*52*/ ILLUSORY_CANDIDATE,
+ /*53*/ ILLUSORY_CANDIDATE },
+ { /* 0*/ SPATIAL_LEFT0,
+ /* 1*/ SPATIAL_LEFT1,
+ /* 2*/ PROJECTED_TOP0,
+ /* 3*/ PROJECTED_TOP1,
+ /* 4*/ PROJECTED_COLOC0,
+ /* 5*/ PROJECTED_COLOC1,
+ /* 6*/ PROJECTED_COLOC2,
+ /* 7*/ PROJECTED_COLOC3,
+ /* 8*/ PROJECTED_COLOC4,
+ /* 9*/ PROJECTED_COLOC5,
+ /*10*/ PROJECTED_COLOC6,
+ /*11*/ PROJECTED_COLOC7,
+ /*12*/ PROJECTED_TOP_RIGHT0,
+ /*13*/ PROJECTED_TOP_RIGHT1,
+ /*14*/ PROJECTED_TOP_LEFT0,
+ /*15*/ PROJECTED_TOP_LEFT1,
+ /*16*/ PROJECTED_RIGHT0,
+ /*17*/ PROJECTED_RIGHT1,
+ /*18*/ PROJECTED_BOTTOM0,
+ /*19*/ PROJECTED_BOTTOM1,
+ /*20*/ PROJECTED_BOTTOM_RIGHT0,
+ /*21*/ PROJECTED_BOTTOM_RIGHT1,
+ /*22*/ PROJECTED_BOTTOM_LEFT0,
+ /*23*/ PROJECTED_BOTTOM_LEFT1,
+ /*24*/ ZERO_MV,
+ /*25*/ ZERO_MV_ALTREF,
+ /*26*/ PROJECTED_TOP2,
+ /*27*/ PROJECTED_TOP_RIGHT2,
+ /*28*/ PROJECTED_TOP_LEFT2,
+ /*29*/ PROJECTED_RIGHT2,
+ /*30*/ PROJECTED_BOTTOM2,
+ /*31*/ PROJECTED_BOTTOM_RIGHT2,
+ /*32*/ PROJECTED_BOTTOM_LEFT2,
+ /*33*/ PROJECTED_TOP3,
+ /*34*/ PROJECTED_TOP_RIGHT3,
+ /*35*/ PROJECTED_TOP_LEFT3,
+ /*36*/ PROJECTED_RIGHT3,
+ /*37*/ PROJECTED_BOTTOM3,
+ /*38*/ PROJECTED_BOTTOM_RIGHT3,
+ /*39*/ PROJECTED_BOTTOM_LEFT3,
+ /*40*/ ILLUSORY_CANDIDATE,
+ /*41*/ ILLUSORY_CANDIDATE,
+ /*42*/ ILLUSORY_CANDIDATE,
+ /*43*/ ILLUSORY_CANDIDATE,
+ /*44*/ ILLUSORY_CANDIDATE,
+ /*45*/ ILLUSORY_CANDIDATE,
+ /*46*/ ILLUSORY_CANDIDATE,
+ /*47*/ ILLUSORY_CANDIDATE,
+ /*48*/ ILLUSORY_CANDIDATE,
+ /*49*/ ILLUSORY_CANDIDATE,
+ /*40*/ ILLUSORY_CANDIDATE,
+ /*51*/ ILLUSORY_CANDIDATE,
+ /*52*/ ILLUSORY_CANDIDATE,
+ /*53*/ ILLUSORY_CANDIDATE },
+ { /* 0*/ SPATIAL_LEFT0,
+ /* 1*/ SPATIAL_LEFT1,
+ /* 2*/ PROJECTED_TOP0,
+ /* 3*/ PROJECTED_TOP1,
+ /* 4*/ PROJECTED_COLOC0,
+ /* 5*/ PROJECTED_COLOC1,
+ /* 6*/ PROJECTED_COLOC2,
+ /* 7*/ PROJECTED_COLOC3,
+ /* 8*/ PROJECTED_COLOC4,
+ /* 9*/ PROJECTED_COLOC5,
+ /*10*/ PROJECTED_COLOC6,
+ /*11*/ PROJECTED_COLOC7,
+ /*12*/ PROJECTED_TOP_RIGHT0,
+ /*13*/ PROJECTED_TOP_RIGHT1,
+ /*14*/ PROJECTED_TOP_LEFT0,
+ /*15*/ PROJECTED_TOP_LEFT1,
+ /*16*/ PROJECTED_COLOC_TR0,
+ /*17*/ PROJECTED_COLOC_TR1,
+ /*18*/ PROJECTED_COLOC_BL0,
+ /*19*/ PROJECTED_COLOC_BL1,
+ /*20*/ PROJECTED_COLOC_BR0,
+ /*21*/ PROJECTED_COLOC_BR1,
+ /*22*/ PROJECTED_RIGHT0,
+ /*23*/ PROJECTED_RIGHT1,
+ /*24*/ PROJECTED_BOTTOM0,
+ /*25*/ PROJECTED_BOTTOM1,
+ /*26*/ PROJECTED_BOTTOM_RIGHT0,
+ /*27*/ PROJECTED_BOTTOM_RIGHT1,
+ /*28*/ PROJECTED_BOTTOM_LEFT0,
+ /*29*/ PROJECTED_BOTTOM_LEFT1,
+ /*30*/ ZERO_MV,
+ /*31*/ ZERO_MV_ALTREF,
+ /*32*/ PROJECTED_TOP2,
+ /*33*/ PROJECTED_TOP_RIGHT2,
+ /*34*/ PROJECTED_TOP_LEFT2,
+ /*35*/ PROJECTED_RIGHT2,
+ /*36*/ PROJECTED_BOTTOM2,
+ /*37*/ PROJECTED_BOTTOM_RIGHT2,
+ /*38*/ PROJECTED_BOTTOM_LEFT2,
+ /*39*/ PROJECTED_TOP3,
+ /*40*/ PROJECTED_TOP_RIGHT3,
+ /*41*/ PROJECTED_TOP_LEFT3,
+ /*42*/ PROJECTED_RIGHT3,
+ /*43*/ PROJECTED_BOTTOM3,
+ /*44*/ PROJECTED_BOTTOM_RIGHT3,
+ /*45*/ PROJECTED_BOTTOM_LEFT3,
+ /*46*/ ILLUSORY_CANDIDATE,
+ /*47*/ ILLUSORY_CANDIDATE,
+ /*48*/ ILLUSORY_CANDIDATE,
+ /*49*/ ILLUSORY_CANDIDATE,
+ /*40*/ ILLUSORY_CANDIDATE,
+ /*51*/ ILLUSORY_CANDIDATE,
+ /*52*/ ILLUSORY_CANDIDATE,
+ /*53*/ ILLUSORY_CANDIDATE },
+ { /* 0*/ SPATIAL_LEFT0,
+ /* 1*/ PROJECTED_TOP0,
+ /* 2*/ PROJECTED_COLOC0,
+ /* 3*/ PROJECTED_COLOC1,
+ /* 4*/ PROJECTED_TOP_RIGHT0,
+ /* 5*/ PROJECTED_TOP_LEFT0,
+ /* 6*/ PROJECTED_RIGHT0,
+ /* 7*/ PROJECTED_BOTTOM0,
+ /* 8*/ PROJECTED_BOTTOM_RIGHT0,
+ /* 9*/ PROJECTED_BOTTOM_LEFT0,
+ /*10*/ ZERO_MV,
+ /*11*/ PROJECTED_TOP1,
+ /*12*/ PROJECTED_TOP_RIGHT1,
+ /*13*/ PROJECTED_TOP_LEFT1,
+ /*14*/ PROJECTED_RIGHT1,
+ /*15*/ PROJECTED_BOTTOM1,
+ /*16*/ PROJECTED_BOTTOM_RIGHT1,
+ /*17*/ PROJECTED_BOTTOM_LEFT1,
+ /*18*/ ILLUSORY_CANDIDATE,
+ /*19*/ ILLUSORY_CANDIDATE,
+ /*20*/ ILLUSORY_CANDIDATE,
+ /*21*/ ILLUSORY_CANDIDATE,
+ /*22*/ ILLUSORY_CANDIDATE,
+ /*23*/ ILLUSORY_CANDIDATE,
+ /*24*/ ILLUSORY_CANDIDATE,
+ /*25*/ ILLUSORY_CANDIDATE,
+ /*26*/ ILLUSORY_CANDIDATE,
+ /*27*/ ILLUSORY_CANDIDATE,
+ /*28*/ ILLUSORY_CANDIDATE,
+ /*29*/ ILLUSORY_CANDIDATE,
+ /*30*/ ILLUSORY_CANDIDATE,
+ /*31*/ ILLUSORY_CANDIDATE,
+ /*32*/ ILLUSORY_CANDIDATE,
+ /*33*/ ILLUSORY_CANDIDATE,
+ /*34*/ ILLUSORY_CANDIDATE,
+ /*35*/ ILLUSORY_CANDIDATE,
+ /*36*/ ILLUSORY_CANDIDATE,
+ /*37*/ ILLUSORY_CANDIDATE,
+ /*38*/ ILLUSORY_CANDIDATE,
+ /*39*/ ILLUSORY_CANDIDATE,
+ /*40*/ ILLUSORY_CANDIDATE,
+ /*41*/ ILLUSORY_CANDIDATE,
+ /*42*/ ILLUSORY_CANDIDATE,
+ /*43*/ ILLUSORY_CANDIDATE,
+ /*44*/ ILLUSORY_CANDIDATE,
+ /*45*/ ILLUSORY_CANDIDATE,
+ /*46*/ ILLUSORY_CANDIDATE,
+ /*47*/ ILLUSORY_CANDIDATE,
+ /*48*/ ILLUSORY_CANDIDATE,
+ /*49*/ ILLUSORY_CANDIDATE,
+ /*40*/ ILLUSORY_CANDIDATE,
+ /*51*/ ILLUSORY_CANDIDATE,
+ /*52*/ ILLUSORY_CANDIDATE,
+ /*53*/ ILLUSORY_CANDIDATE },
+ { /* 0*/ SPATIAL_LEFT0,
+ /* 1*/ PROJECTED_TOP0,
+ /* 2*/ PROJECTED_COLOC0,
+ /* 3*/ PROJECTED_COLOC1,
+ /* 4*/ PROJECTED_TOP_RIGHT0,
+ /* 5*/ PROJECTED_TOP_LEFT0,
+ /* 6*/ PROJECTED_COLOC_TR0,
+ /* 7*/ PROJECTED_COLOC_BL0,
+ /* 8*/ PROJECTED_COLOC_BR0,
+ /* 9*/ PROJECTED_RIGHT0,
+ /*10*/ PROJECTED_BOTTOM0,
+ /*11*/ PROJECTED_BOTTOM_RIGHT0,
+ /*12*/ PROJECTED_BOTTOM_LEFT0,
+ /*13*/ ZERO_MV,
+ /*14*/ PROJECTED_TOP1,
+ /*15*/ PROJECTED_TOP_RIGHT1,
+ /*16*/ PROJECTED_TOP_LEFT1,
+ /*17*/ PROJECTED_RIGHT1,
+ /*18*/ PROJECTED_BOTTOM1,
+ /*19*/ PROJECTED_BOTTOM_RIGHT1,
+ /*20*/ PROJECTED_BOTTOM_LEFT1,
+ /*21*/ ILLUSORY_CANDIDATE,
+ /*22*/ ILLUSORY_CANDIDATE,
+ /*23*/ ILLUSORY_CANDIDATE,
+ /*24*/ ILLUSORY_CANDIDATE,
+ /*25*/ ILLUSORY_CANDIDATE,
+ /*26*/ ILLUSORY_CANDIDATE,
+ /*27*/ ILLUSORY_CANDIDATE,
+ /*28*/ ILLUSORY_CANDIDATE,
+ /*29*/ ILLUSORY_CANDIDATE,
+ /*30*/ ILLUSORY_CANDIDATE,
+ /*31*/ ILLUSORY_CANDIDATE,
+ /*32*/ ILLUSORY_CANDIDATE,
+ /*33*/ ILLUSORY_CANDIDATE,
+ /*34*/ ILLUSORY_CANDIDATE,
+ /*35*/ ILLUSORY_CANDIDATE,
+ /*36*/ ILLUSORY_CANDIDATE,
+ /*37*/ ILLUSORY_CANDIDATE,
+ /*38*/ ILLUSORY_CANDIDATE,
+ /*39*/ ILLUSORY_CANDIDATE,
+ /*40*/ ILLUSORY_CANDIDATE,
+ /*41*/ ILLUSORY_CANDIDATE,
+ /*42*/ ILLUSORY_CANDIDATE,
+ /*43*/ ILLUSORY_CANDIDATE,
+ /*44*/ ILLUSORY_CANDIDATE,
+ /*45*/ ILLUSORY_CANDIDATE,
+ /*46*/ ILLUSORY_CANDIDATE,
+ /*47*/ ILLUSORY_CANDIDATE,
+ /*48*/ ILLUSORY_CANDIDATE,
+ /*49*/ ILLUSORY_CANDIDATE,
+ /*40*/ ILLUSORY_CANDIDATE,
+ /*51*/ ILLUSORY_CANDIDATE,
+ /*52*/ ILLUSORY_CANDIDATE,
+ /*53*/ ILLUSORY_CANDIDATE },
+ { /* 0*/ SPATIAL_LEFT0,
+ /* 1*/ PROJECTED_TOP0,
+ /* 2*/ PROJECTED_COLOC0,
+ /* 3*/ PROJECTED_COLOC1,
+ /* 4*/ PROJECTED_TOP_RIGHT0,
+ /* 5*/ PROJECTED_TOP_LEFT0,
+ /* 6*/ PROJECTED_RIGHT0,
+ /* 7*/ PROJECTED_BOTTOM0,
+ /* 8*/ PROJECTED_BOTTOM_RIGHT0,
+ /* 9*/ PROJECTED_BOTTOM_LEFT0,
+ /*10*/ ZERO_MV,
+ /*11*/ PROJECTED_TOP1,
+ /*12*/ PROJECTED_TOP_RIGHT1,
+ /*13*/ PROJECTED_TOP_LEFT1,
+ /*14*/ PROJECTED_RIGHT1,
+ /*15*/ PROJECTED_BOTTOM1,
+ /*16*/ PROJECTED_BOTTOM_RIGHT1,
+ /*17*/ PROJECTED_BOTTOM_LEFT1,
+ /*18*/ PROJECTED_COLOC2,
+ /*19*/ PROJECTED_COLOC3,
+ /*20*/ ILLUSORY_CANDIDATE,
+ /*21*/ ILLUSORY_CANDIDATE,
+ /*22*/ ILLUSORY_CANDIDATE,
+ /*23*/ ILLUSORY_CANDIDATE,
+ /*24*/ ILLUSORY_CANDIDATE,
+ /*25*/ ILLUSORY_CANDIDATE,
+ /*26*/ ILLUSORY_CANDIDATE,
+ /*27*/ ILLUSORY_CANDIDATE,
+ /*28*/ ILLUSORY_CANDIDATE,
+ /*29*/ ILLUSORY_CANDIDATE,
+ /*30*/ ILLUSORY_CANDIDATE,
+ /*31*/ ILLUSORY_CANDIDATE,
+ /*32*/ ILLUSORY_CANDIDATE,
+ /*33*/ ILLUSORY_CANDIDATE,
+ /*34*/ ILLUSORY_CANDIDATE,
+ /*35*/ ILLUSORY_CANDIDATE,
+ /*36*/ ILLUSORY_CANDIDATE,
+ /*37*/ ILLUSORY_CANDIDATE,
+ /*38*/ ILLUSORY_CANDIDATE,
+ /*39*/ ILLUSORY_CANDIDATE,
+ /*40*/ ILLUSORY_CANDIDATE,
+ /*41*/ ILLUSORY_CANDIDATE,
+ /*42*/ ILLUSORY_CANDIDATE,
+ /*43*/ ILLUSORY_CANDIDATE,
+ /*44*/ ILLUSORY_CANDIDATE,
+ /*45*/ ILLUSORY_CANDIDATE,
+ /*46*/ ILLUSORY_CANDIDATE,
+ /*47*/ ILLUSORY_CANDIDATE,
+ /*48*/ ILLUSORY_CANDIDATE,
+ /*49*/ ILLUSORY_CANDIDATE,
+ /*40*/ ILLUSORY_CANDIDATE,
+ /*51*/ ILLUSORY_CANDIDATE,
+ /*52*/ ILLUSORY_CANDIDATE,
+ /*53*/ ILLUSORY_CANDIDATE },
+ { /* 0*/ SPATIAL_LEFT0,
+ /* 1*/ PROJECTED_TOP0,
+ /* 2*/ PROJECTED_COLOC0,
+ /* 3*/ PROJECTED_COLOC1,
+ /* 4*/ PROJECTED_TOP_RIGHT0,
+ /* 5*/ PROJECTED_TOP_LEFT0,
+ /* 6*/ PROJECTED_COLOC_TR0,
+ /* 7*/ PROJECTED_COLOC_BL0,
+ /* 8*/ PROJECTED_COLOC_BR0,
+ /* 9*/ PROJECTED_RIGHT0,
+ /*10*/ PROJECTED_BOTTOM0,
+ /*11*/ PROJECTED_BOTTOM_RIGHT0,
+ /*12*/ PROJECTED_BOTTOM_LEFT0,
+ /*13*/ ZERO_MV,
+ /*14*/ PROJECTED_TOP1,
+ /*15*/ PROJECTED_TOP_RIGHT1,
+ /*16*/ PROJECTED_TOP_LEFT1,
+ /*17*/ PROJECTED_RIGHT1,
+ /*18*/ PROJECTED_BOTTOM1,
+ /*19*/ PROJECTED_BOTTOM_RIGHT1,
+ /*20*/ PROJECTED_BOTTOM_LEFT1,
+ /*21*/ PROJECTED_COLOC2,
+ /*22*/ PROJECTED_COLOC3,
+ /*23*/ ILLUSORY_CANDIDATE,
+ /*24*/ ILLUSORY_CANDIDATE,
+ /*25*/ ILLUSORY_CANDIDATE,
+ /*26*/ ILLUSORY_CANDIDATE,
+ /*27*/ ILLUSORY_CANDIDATE,
+ /*28*/ ILLUSORY_CANDIDATE,
+ /*29*/ ILLUSORY_CANDIDATE,
+ /*30*/ ILLUSORY_CANDIDATE,
+ /*31*/ ILLUSORY_CANDIDATE,
+ /*32*/ ILLUSORY_CANDIDATE,
+ /*33*/ ILLUSORY_CANDIDATE,
+ /*34*/ ILLUSORY_CANDIDATE,
+ /*35*/ ILLUSORY_CANDIDATE,
+ /*36*/ ILLUSORY_CANDIDATE,
+ /*37*/ ILLUSORY_CANDIDATE,
+ /*38*/ ILLUSORY_CANDIDATE,
+ /*39*/ ILLUSORY_CANDIDATE,
+ /*40*/ ILLUSORY_CANDIDATE,
+ /*41*/ ILLUSORY_CANDIDATE,
+ /*42*/ ILLUSORY_CANDIDATE,
+ /*43*/ ILLUSORY_CANDIDATE,
+ /*44*/ ILLUSORY_CANDIDATE,
+ /*45*/ ILLUSORY_CANDIDATE,
+ /*46*/ ILLUSORY_CANDIDATE,
+ /*47*/ ILLUSORY_CANDIDATE,
+ /*48*/ ILLUSORY_CANDIDATE,
+ /*49*/ ILLUSORY_CANDIDATE,
+ /*40*/ ILLUSORY_CANDIDATE,
+ /*51*/ ILLUSORY_CANDIDATE,
+ /*52*/ ILLUSORY_CANDIDATE,
+ /*53*/ ILLUSORY_CANDIDATE }
+ };
+
+const U08 gau1_max_num_search_cands_in_l0_me[12] = {
+ 11, 14, 22, 28, 31, 37, 40, 46, 18, 21, 20, 23
+};
+
+const SEARCH_CAND_LOCATIONS_T gae_search_cand_type_to_location_map[NUM_SEARCH_CAND_TYPES] = {
+ ILLUSORY_LOCATION, //ZERO_MV
+ ILLUSORY_LOCATION, //ZERO_MV_ALTREF
+ LEFT, //SPATIAL_LEFT0
+ TOP, //SPATIAL_TOP0
+ TOPRIGHT, //SPATIAL_TOP_RIGHT0
+ TOPLEFT, //SPATIAL_TOP_LEFT0
+ LEFT, //SPATIAL_LEFT1
+ TOP, //SPATIAL_TOP1
+ TOPRIGHT, //SPATIAL_TOP_RIGHT1
+ TOPLEFT, //SPATIAL_TOP_LEFT1
+ COLOCATED, //PROJECTED_COLOC0
+ COLOCATED, //PROJECTED_COLOC1
+ COLOCATED, //PROJECTED_COLOC2
+ COLOCATED, //PROJECTED_COLOC3
+ COLOCATED, //PROJECTED_COLOC4
+ COLOCATED, //PROJECTED_COLOC5
+ COLOCATED, //PROJECTED_COLOC6
+ COLOCATED, //PROJECTED_COLOC7
+ COLOCATED_4x4_TR, //PROJECTED_COLOC_TR0
+ COLOCATED_4x4_TR, //PROJECTED_COLOC_TR1
+ COLOCATED_4x4_BL, //PROJECTED_COLOC_BL0
+ COLOCATED_4x4_BL, //PROJECTED_COLOC_BL1
+ COLOCATED_4x4_BR, //PROJECTED_COLOC_BR0
+ COLOCATED_4x4_BR, //PROJECTED_COLOC_BR1
+ TOP, //PROJECTED_TOP0
+ TOP, //PROJECTED_TOP1
+ TOPRIGHT, //PROJECTED_TOP_RIGHT0
+ TOPRIGHT, //PROJECTED_TOP_RIGHT1
+ TOPLEFT, //PROJECTED_TOP_LEFT0
+ TOPLEFT, //PROJECTED_TOP_LEFT1
+ RIGHT, //PROJECTED_RIGHT0
+ RIGHT, //PROJECTED_RIGHT1
+ BOTTOM, //PROJECTED_BOTTOM0
+ BOTTOM, //PROJECTED_BOTTOM1
+ BOTTOMRIGHT, //PROJECTED_BOTTOM_RIGHT0
+ BOTTOMRIGHT, //PROJECTED_BOTTOM_RIGHT1
+ BOTTOMLEFT, //PROJECTED_BOTTOM_LEFT0
+ BOTTOMLEFT, //PROJECTED_BOTTOM_LEFT1
+ ILLUSORY_LOCATION, //COLOCATED_GLOBAL_MV0
+ ILLUSORY_LOCATION, //COLOCATED_GLOBAL_MV1
+ TOP, //PROJECTED_TOP2
+ TOP, //PROJECTED_TOP3
+ TOPRIGHT, //PROJECTED_TOP_RIGHT2
+ TOPRIGHT, //PROJECTED_TOP_RIGHT3
+ TOPLEFT, //PROJECTED_TOP_LEFT2
+ TOPLEFT, //PROJECTED_TOP_LEFT3
+ RIGHT, //PROJECTED_RIGHT2
+ RIGHT, //PROJECTED_RIGHT3
+ BOTTOM, //PROJECTED_BOTTOM2
+ BOTTOM, //PROJECTED_BOTTOM3
+ BOTTOMRIGHT, //PROJECTED_BOTTOM_RIGHT2
+ BOTTOMRIGHT, //PROJECTED_BOTTOM_RIGHT3
+ BOTTOMLEFT, //PROJECTED_BOTTOM_LEFT2
+ BOTTOMLEFT //PROJECTED_BOTTOM_LEFT3
+};
+
+/* 0 => projected; 1 => spatial; 2 => others */
+const U08 gau1_search_cand_type_to_spatiality_map[NUM_SEARCH_CAND_TYPES] = {
+ 2, //ZERO_MV
+ 2, //ZERO_MV_ALTREF
+ 1, //SPATIAL_LEFT0
+ 1, //SPATIAL_TOP0
+ 1, //SPATIAL_TOP_RIGHT0
+ 1, //SPATIAL_TOP_LEFT0
+ 1, //SPATIAL_LEFT1
+ 1, //SPATIAL_TOP1
+ 1, //SPATIAL_TOP_RIGHT1
+ 1, //SPATIAL_TOP_LEFT1
+ 0, //PROJECTED_COLOC0
+ 0, //PROJECTED_COLOC1
+ 0, //PROJECTED_COLOC2
+ 0, //PROJECTED_COLOC3
+ 0, //PROJECTED_COLOC4
+ 0, //PROJECTED_COLOC5
+ 0, //PROJECTED_COLOC6
+ 0, //PROJECTED_COLOC7
+ 0, //PROJECTED_COLOC_TR0
+ 0, //PROJECTED_COLOC_TR1
+ 0, //PROJECTED_COLOC_BL0
+ 0, //PROJECTED_COLOC_BL1
+ 0, //PROJECTED_COLOC_BR0
+ 0, //PROJECTED_COLOC_BR1
+ 0, //PROJECTED_TOP0
+ 0, //PROJECTED_TOP1
+ 0, //PROJECTED_TOP_RIGHT0
+ 0, //PROJECTED_TOP_RIGHT1
+ 0, //PROJECTED_TOP_LEFT0
+ 0, //PROJECTED_TOP_LEFT1
+ 0, //PROJECTED_RIGHT0
+ 0, //PROJECTED_RIGHT1
+ 0, //PROJECTED_BOTTOM0
+ 0, //PROJECTED_BOTTOM1
+ 0, //PROJECTED_BOTTOM_RIGHT0
+ 0, //PROJECTED_BOTTOM_RIGHT1
+ 0, //PROJECTED_BOTTOM_LEFT0
+ 0, //PROJECTED_BOTTOM_LEFT1
+ 0, //COLOCATED_GLOBAL_MV0
+ 0, //COLOCATED_GLOBAL_MV1
+ 0, //PROJECTED_TOP2
+ 0, //PROJECTED_TOP3
+ 0, //PROJECTED_TOP_RIGHT2
+ 0, //PROJECTED_TOP_RIGHT3
+ 0, //PROJECTED_TOP_LEFT2
+ 0, //PROJECTED_TOP_LEFT3
+ 0, //PROJECTED_RIGHT2
+ 0, //PROJECTED_RIGHT3
+ 0, //PROJECTED_BOTTOM2
+ 0, //PROJECTED_BOTTOM3
+ 0, //PROJECTED_BOTTOM_RIGHT2
+ 0, //PROJECTED_BOTTOM_RIGHT3
+ 0, //PROJECTED_BOTTOM_LEFT2
+ 0, //PROJECTED_BOTTOM_LEFT3
+};
+
+const S08 gai1_search_cand_type_to_result_id_map[NUM_SEARCH_CAND_TYPES] = {
+ 0, //ZERO_MV
+ 1, //ZERO_MV_ALTREF
+ 0, //SPATIAL_LEFT0
+ 0, //SPATIAL_TOP0
+ 0, //SPATIAL_TOP_RIGHT0
+ 0, //SPATIAL_TOP_LEFT0
+ 1, //SPATIAL_LEFT1
+ 1, //SPATIAL_TOP1
+ 1, //SPATIAL_TOP_RIGHT1
+ 1, //SPATIAL_TOP_LEFT1
+ 0, //PROJECTED_COLOC0
+ 1, //PROJECTED_COLOC1
+ 2, //PROJECTED_COLOC2
+ 3, //PROJECTED_COLOC3
+ 4, //PROJECTED_COLOC4
+ 5, //PROJECTED_COLOC5
+ 6, //PROJECTED_COLOC6
+ 7, //PROJECTED_COLOC7
+ 0, //PROJECTED_COLOC_TR0
+ 1, //PROJECTED_COLOC_TR1
+ 0, //PROJECTED_COLOC_BL0
+ 1, //PROJECTED_COLOC_BL1
+ 0, //PROJECTED_COLOC_BR0
+ 1, //PROJECTED_COLOC_BR1
+ 0, //PROJECTED_TOP0
+ 1, //PROJECTED_TOP1
+ 0, //PROJECTED_TOP_RIGHT0
+ 1, //PROJECTED_TOP_RIGHT1
+ 0, //PROJECTED_TOP_LEFT0
+ 1, //PROJECTED_TOP_LEFT1
+ 0, //PROJECTED_RIGHT0
+ 1, //PROJECTED_RIGHT1
+ 0, //PROJECTED_BOTTOM0
+ 1, //PROJECTED_BOTTOM1
+ 0, //PROJECTED_BOTTOM_RIGHT0
+ 1, //PROJECTED_BOTTOM_RIGHT1
+ 0, //PROJECTED_BOTTOM_LEFT0
+ 1, //PROJECTED_BOTTOM_LEFT1
+ 0, //COLOCATED_GLOBAL_MV0
+ 1, //COLOCATED_GLOBAL_MV1
+ 2, //PROJECTED_TOP2
+ 3, //PROJECTED_TOP3
+ 2, //PROJECTED_TOP_RIGHT2
+ 3, //PROJECTED_TOP_RIGHT3
+ 2, //PROJECTED_TOP_LEFT2
+ 3, //PROJECTED_TOP_LEFT3
+ 2, //PROJECTED_RIGHT2
+ 3, //PROJECTED_RIGHT3
+ 2, //PROJECTED_BOTTOM2
+ 3, //PROJECTED_BOTTOM3
+ 2, //PROJECTED_BOTTOM_RIGHT2
+ 3, //PROJECTED_BOTTOM_RIGHT3
+ 2, //PROJECTED_BOTTOM_LEFT2
+ 3 //PROJECTED_BOTTOM_LEFT3
+};
+
+const S32 gai4_search_cand_location_to_x_offset_map[NUM_SEARCH_CAND_LOCATIONS] = {
+ COLOCATED_BLOCK_OFFSET,
+ COLOCATED_4X4_NEXT_BLOCK_OFFSET,
+ COLOCATED_BLOCK_OFFSET,
+ COLOCATED_4X4_NEXT_BLOCK_OFFSET,
+ -PREV_BLOCK_OFFSET_IN_L0_ME,
+ -PREV_BLOCK_OFFSET_IN_L0_ME,
+ 0,
+ NEXT_BLOCK_OFFSET_IN_L0_ME,
+ NEXT_BLOCK_OFFSET_IN_L0_ME,
+ NEXT_BLOCK_OFFSET_IN_L0_ME,
+ 0,
+ -PREV_BLOCK_OFFSET_IN_L0_ME
+};
+
+const S32 gai4_search_cand_location_to_y_offset_map[NUM_SEARCH_CAND_LOCATIONS] = {
+ COLOCATED_BLOCK_OFFSET,
+ COLOCATED_BLOCK_OFFSET,
+ COLOCATED_4X4_NEXT_BLOCK_OFFSET,
+ COLOCATED_4X4_NEXT_BLOCK_OFFSET,
+ 0,
+ -PREV_BLOCK_OFFSET_IN_L0_ME,
+ -PREV_BLOCK_OFFSET_IN_L0_ME,
+ -PREV_BLOCK_OFFSET_IN_L0_ME,
+ 0,
+ NEXT_BLOCK_OFFSET_IN_L0_ME,
+ NEXT_BLOCK_OFFSET_IN_L0_ME,
+ NEXT_BLOCK_OFFSET_IN_L0_ME
+};
+
+/**
+*******************************************************************************
+* @brief Used exclusively in the Intrinsics version of the function
+* 'hme_combine_4x4_sads_and_compute_cost_high_quality' instead
+* of calling get_range()
+*******************************************************************************
+*/
+S16 gi2_mvx_range_high_quality[MAX_MVX_SUPPORTED_IN_COARSE_LAYER * 2 + 1][8] = {
+ { 16, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 12 },
+ { 14, 14, 14, 14, 14, 14, 14, 12 }, { 14, 14, 14, 14, 14, 14, 12, 12 },
+ { 14, 14, 14, 14, 14, 14, 12, 12 }, { 14, 14, 14, 14, 14, 12, 12, 12 },
+ { 14, 14, 14, 14, 14, 12, 12, 12 }, { 14, 14, 14, 14, 12, 12, 12, 12 },
+ { 14, 14, 14, 14, 12, 12, 12, 12 }, { 14, 14, 14, 12, 12, 12, 12, 12 },
+ { 14, 14, 14, 12, 12, 12, 12, 12 }, { 14, 14, 12, 12, 12, 12, 12, 12 },
+ { 14, 14, 12, 12, 12, 12, 12, 12 }, { 14, 12, 12, 12, 12, 12, 12, 12 },
+ { 14, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 10 },
+ { 12, 12, 12, 12, 12, 12, 12, 10 }, { 12, 12, 12, 12, 12, 12, 10, 10 },
+ { 12, 12, 12, 12, 12, 12, 10, 10 }, { 12, 12, 12, 12, 12, 10, 10, 10 },
+ { 12, 12, 12, 12, 12, 10, 10, 10 }, { 12, 12, 12, 12, 10, 10, 10, 10 },
+ { 12, 12, 12, 12, 10, 10, 10, 10 }, { 12, 12, 12, 10, 10, 10, 10, 10 },
+ { 12, 12, 12, 10, 10, 10, 10, 10 }, { 12, 12, 10, 10, 10, 10, 10, 10 },
+ { 12, 12, 10, 10, 10, 10, 10, 10 }, { 12, 10, 10, 10, 10, 10, 10, 10 },
+ { 12, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 8 },
+ { 10, 10, 10, 10, 10, 10, 10, 8 }, { 10, 10, 10, 10, 10, 10, 8, 8 },
+ { 10, 10, 10, 10, 10, 10, 8, 8 }, { 10, 10, 10, 10, 10, 8, 8, 8 },
+ { 10, 10, 10, 10, 10, 8, 8, 8 }, { 10, 10, 10, 10, 8, 8, 8, 8 },
+ { 10, 10, 10, 10, 8, 8, 8, 8 }, { 10, 10, 10, 8, 8, 8, 8, 6 },
+ { 10, 10, 10, 8, 8, 8, 8, 6 }, { 10, 10, 8, 8, 8, 8, 6, 6 },
+ { 10, 10, 8, 8, 8, 8, 6, 6 }, { 10, 8, 8, 8, 8, 6, 6, 4 },
+ { 10, 8, 8, 8, 8, 6, 6, 4 }, { 8, 8, 8, 8, 6, 6, 4, 2 },
+ { 8, 8, 8, 8, 6, 6, 4, 1 }, { 8, 8, 8, 6, 6, 4, 2, 2 },
+ { 8, 8, 8, 6, 6, 4, 1, 4 }, { 8, 8, 6, 6, 4, 2, 2, 4 },
+ { 8, 8, 6, 6, 4, 1, 4, 6 }, { 8, 6, 6, 4, 2, 2, 4, 6 },
+ { 8, 6, 6, 4, 1, 4, 6, 6 }, { 6, 6, 4, 2, 2, 4, 6, 6 },
+ { 6, 6, 4, 1, 4, 6, 6, 8 }, { 6, 4, 2, 2, 4, 6, 6, 8 },
+ { 6, 4, 1, 4, 6, 6, 8, 8 }, { 4, 2, 2, 4, 6, 6, 8, 8 },
+ { 4, 1, 4, 6, 6, 8, 8, 8 }, { 2, 2, 4, 6, 6, 8, 8, 8 },
+ { 1, 4, 6, 6, 8, 8, 8, 8 }, { 2, 4, 6, 6, 8, 8, 8, 8 },
+ { 4, 6, 6, 8, 8, 8, 8, 10 }, { 4, 6, 6, 8, 8, 8, 8, 10 },
+ { 6, 6, 8, 8, 8, 8, 10, 10 }, { 6, 6, 8, 8, 8, 8, 10, 10 },
+ { 6, 8, 8, 8, 8, 10, 10, 10 }, { 6, 8, 8, 8, 8, 10, 10, 10 },
+ { 8, 8, 8, 8, 10, 10, 10, 10 }, { 8, 8, 8, 8, 10, 10, 10, 10 },
+ { 8, 8, 8, 10, 10, 10, 10, 10 }, { 8, 8, 8, 10, 10, 10, 10, 10 },
+ { 8, 8, 10, 10, 10, 10, 10, 10 }, { 8, 8, 10, 10, 10, 10, 10, 10 },
+ { 8, 10, 10, 10, 10, 10, 10, 10 }, { 8, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 12 }, { 10, 10, 10, 10, 10, 10, 10, 12 },
+ { 10, 10, 10, 10, 10, 10, 12, 12 }, { 10, 10, 10, 10, 10, 10, 12, 12 },
+ { 10, 10, 10, 10, 10, 12, 12, 12 }, { 10, 10, 10, 10, 10, 12, 12, 12 },
+ { 10, 10, 10, 10, 12, 12, 12, 12 }, { 10, 10, 10, 10, 12, 12, 12, 12 },
+ { 10, 10, 10, 12, 12, 12, 12, 12 }, { 10, 10, 10, 12, 12, 12, 12, 12 },
+ { 10, 10, 12, 12, 12, 12, 12, 12 }, { 10, 10, 12, 12, 12, 12, 12, 12 },
+ { 10, 12, 12, 12, 12, 12, 12, 12 }, { 10, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 14 }, { 12, 12, 12, 12, 12, 12, 12, 14 },
+ { 12, 12, 12, 12, 12, 12, 14, 14 }, { 12, 12, 12, 12, 12, 12, 14, 14 },
+ { 12, 12, 12, 12, 12, 14, 14, 14 }, { 12, 12, 12, 12, 12, 14, 14, 14 },
+ { 12, 12, 12, 12, 14, 14, 14, 14 }, { 12, 12, 12, 12, 14, 14, 14, 14 },
+ { 12, 12, 12, 14, 14, 14, 14, 14 }, { 12, 12, 12, 14, 14, 14, 14, 14 },
+ { 12, 12, 14, 14, 14, 14, 14, 14 }, { 12, 12, 14, 14, 14, 14, 14, 14 },
+ { 12, 14, 14, 14, 14, 14, 14, 14 }, { 12, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 16 }, { 14, 14, 14, 14, 14, 14, 14, 16 },
+ { 14, 14, 14, 14, 14, 14, 16, 16 }, { 14, 14, 14, 14, 14, 14, 16, 16 },
+ { 14, 14, 14, 14, 14, 16, 16, 16 }, { 14, 14, 14, 14, 14, 16, 16, 16 },
+ { 14, 14, 14, 14, 16, 16, 16, 16 }, { 14, 14, 14, 14, 16, 16, 16, 16 },
+ { 14, 14, 14, 16, 16, 16, 16, 16 }, { 14, 14, 14, 16, 16, 16, 16, 16 },
+ { 14, 14, 16, 16, 16, 16, 16, 16 }, { 14, 14, 16, 16, 16, 16, 16, 16 },
+ { 14, 16, 16, 16, 16, 16, 16, 16 }, { 14, 16, 16, 16, 16, 16, 16, 16 },
+ { 16, 16, 16, 16, 16, 16, 16, 16 }
+};
+
+const S16 gai2_mvx_range_mapping[8193][8] = {
+ { 15, 14, 14, 15, 14, 15, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 13, 14, 13, 14, 14, 13 },
+ { 14, 14, 13, 14, 13, 14, 14, 13 }, { 14, 14, 13, 14, 13, 14, 14, 13 },
+ { 14, 14, 13, 14, 13, 14, 14, 13 }, { 14, 13, 13, 14, 13, 14, 13, 13 },
+ { 14, 13, 13, 14, 13, 14, 13, 13 }, { 14, 13, 13, 14, 13, 14, 13, 13 },
+ { 14, 13, 13, 14, 13, 14, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 12, 13, 12, 13, 13, 12 },
+ { 13, 13, 12, 13, 12, 13, 13, 12 }, { 13, 13, 12, 13, 12, 13, 13, 12 },
+ { 13, 13, 12, 13, 12, 13, 13, 12 }, { 13, 12, 12, 13, 12, 13, 12, 12 },
+ { 13, 12, 12, 13, 12, 13, 12, 12 }, { 13, 12, 12, 13, 12, 13, 12, 12 },
+ { 13, 12, 12, 13, 12, 13, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 11, 12, 11, 12, 12, 11 },
+ { 12, 12, 11, 12, 11, 12, 12, 11 }, { 12, 12, 11, 12, 11, 12, 12, 11 },
+ { 12, 12, 11, 12, 11, 12, 12, 11 }, { 12, 11, 11, 12, 11, 12, 11, 11 },
+ { 12, 11, 11, 12, 11, 12, 11, 11 }, { 12, 11, 11, 12, 11, 12, 11, 11 },
+ { 12, 11, 11, 12, 11, 12, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 10, 11, 10, 11, 11, 10 },
+ { 11, 11, 10, 11, 10, 11, 11, 10 }, { 11, 11, 10, 11, 10, 11, 11, 10 },
+ { 11, 11, 10, 11, 10, 11, 11, 10 }, { 11, 10, 10, 11, 10, 11, 10, 10 },
+ { 11, 10, 10, 11, 10, 11, 10, 10 }, { 11, 10, 10, 11, 10, 11, 10, 10 },
+ { 11, 10, 10, 11, 10, 11, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 9, 10, 9, 10, 10, 9 },
+ { 10, 10, 9, 10, 9, 10, 10, 9 }, { 10, 10, 9, 10, 9, 10, 10, 9 },
+ { 10, 10, 9, 10, 9, 10, 10, 9 }, { 10, 9, 9, 10, 9, 10, 9, 9 },
+ { 10, 9, 9, 10, 9, 10, 9, 9 }, { 10, 9, 9, 10, 9, 10, 9, 9 },
+ { 10, 9, 9, 10, 9, 10, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 8, 9, 8, 9, 9, 8 },
+ { 9, 9, 8, 9, 8, 9, 9, 8 }, { 9, 9, 8, 9, 8, 9, 9, 8 },
+ { 9, 9, 8, 9, 8, 9, 9, 8 }, { 9, 8, 8, 9, 8, 9, 8, 8 },
+ { 9, 8, 8, 9, 8, 9, 8, 8 }, { 9, 8, 8, 9, 8, 9, 8, 8 },
+ { 9, 8, 8, 9, 8, 9, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 7, 8, 7, 8, 8, 7 },
+ { 8, 8, 7, 8, 7, 8, 8, 7 }, { 8, 8, 7, 8, 7, 8, 8, 7 },
+ { 8, 8, 7, 8, 7, 8, 8, 7 }, { 8, 7, 7, 8, 7, 8, 7, 7 },
+ { 8, 7, 7, 8, 7, 8, 7, 7 }, { 8, 7, 7, 8, 7, 8, 7, 7 },
+ { 8, 7, 7, 8, 7, 8, 7, 7 }, { 7, 7, 7, 7, 7, 7, 7, 7 },
+ { 7, 7, 7, 7, 7, 7, 7, 7 }, { 7, 7, 7, 7, 7, 7, 7, 7 },
+ { 7, 7, 7, 7, 7, 7, 7, 7 }, { 7, 7, 7, 7, 7, 7, 7, 7 },
+ { 7, 7, 7, 7, 7, 7, 7, 7 }, { 7, 7, 7, 7, 7, 7, 7, 7 },
+ { 7, 7, 7, 7, 7, 7, 7, 7 }, { 7, 7, 6, 7, 6, 7, 7, 6 },
+ { 7, 7, 6, 7, 6, 7, 7, 6 }, { 7, 7, 6, 7, 6, 7, 7, 6 },
+ { 7, 7, 6, 7, 6, 7, 7, 6 }, { 7, 6, 6, 7, 6, 7, 6, 6 },
+ { 7, 6, 6, 7, 6, 7, 6, 6 }, { 7, 6, 6, 7, 6, 7, 6, 6 },
+ { 7, 6, 6, 7, 6, 7, 6, 6 }, { 6, 6, 5, 6, 5, 6, 6, 5 },
+ { 6, 6, 5, 6, 5, 6, 6, 5 }, { 6, 6, 5, 6, 5, 6, 6, 5 },
+ { 6, 6, 5, 6, 5, 6, 6, 5 }, { 6, 5, 4, 6, 4, 6, 5, 4 },
+ { 6, 5, 4, 6, 4, 6, 5, 4 }, { 6, 5, 3, 6, 3, 6, 5, 3 },
+ { 6, 5, 2, 6, 2, 6, 5, 2 }, { 5, 4, 3, 5, 3, 5, 4, 3 },
+ { 5, 4, 4, 5, 4, 5, 4, 4 }, { 5, 3, 4, 5, 4, 5, 3, 4 },
+ { 5, 2, 5, 5, 5, 5, 2, 5 }, { 4, 3, 5, 4, 5, 4, 3, 5 },
+ { 4, 4, 5, 4, 5, 4, 4, 5 }, { 3, 4, 5, 3, 5, 3, 4, 5 },
+ { 2, 5, 6, 2, 6, 2, 5, 6 }, { 3, 5, 6, 3, 6, 3, 5, 6 },
+ { 4, 5, 6, 4, 6, 4, 5, 6 }, { 4, 5, 6, 4, 6, 4, 5, 6 },
+ { 5, 6, 6, 5, 6, 5, 6, 6 }, { 5, 6, 6, 5, 6, 5, 6, 6 },
+ { 5, 6, 6, 5, 6, 5, 6, 6 }, { 5, 6, 6, 5, 6, 5, 6, 6 },
+ { 6, 6, 7, 6, 7, 6, 6, 7 }, { 6, 6, 7, 6, 7, 6, 6, 7 },
+ { 6, 6, 7, 6, 7, 6, 6, 7 }, { 6, 6, 7, 6, 7, 6, 6, 7 },
+ { 6, 7, 7, 6, 7, 6, 7, 7 }, { 6, 7, 7, 6, 7, 6, 7, 7 },
+ { 6, 7, 7, 6, 7, 6, 7, 7 }, { 6, 7, 7, 6, 7, 6, 7, 7 },
+ { 7, 7, 7, 7, 7, 7, 7, 7 }, { 7, 7, 7, 7, 7, 7, 7, 7 },
+ { 7, 7, 7, 7, 7, 7, 7, 7 }, { 7, 7, 7, 7, 7, 7, 7, 7 },
+ { 7, 7, 7, 7, 7, 7, 7, 7 }, { 7, 7, 7, 7, 7, 7, 7, 7 },
+ { 7, 7, 7, 7, 7, 7, 7, 7 }, { 7, 7, 7, 7, 7, 7, 7, 7 },
+ { 7, 7, 8, 7, 8, 7, 7, 8 }, { 7, 7, 8, 7, 8, 7, 7, 8 },
+ { 7, 7, 8, 7, 8, 7, 7, 8 }, { 7, 7, 8, 7, 8, 7, 7, 8 },
+ { 7, 8, 8, 7, 8, 7, 8, 8 }, { 7, 8, 8, 7, 8, 7, 8, 8 },
+ { 7, 8, 8, 7, 8, 7, 8, 8 }, { 7, 8, 8, 7, 8, 7, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 9, 8, 9, 8, 8, 9 }, { 8, 8, 9, 8, 9, 8, 8, 9 },
+ { 8, 8, 9, 8, 9, 8, 8, 9 }, { 8, 8, 9, 8, 9, 8, 8, 9 },
+ { 8, 9, 9, 8, 9, 8, 9, 9 }, { 8, 9, 9, 8, 9, 8, 9, 9 },
+ { 8, 9, 9, 8, 9, 8, 9, 9 }, { 8, 9, 9, 8, 9, 8, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 10, 9, 10, 9, 9, 10 }, { 9, 9, 10, 9, 10, 9, 9, 10 },
+ { 9, 9, 10, 9, 10, 9, 9, 10 }, { 9, 9, 10, 9, 10, 9, 9, 10 },
+ { 9, 10, 10, 9, 10, 9, 10, 10 }, { 9, 10, 10, 9, 10, 9, 10, 10 },
+ { 9, 10, 10, 9, 10, 9, 10, 10 }, { 9, 10, 10, 9, 10, 9, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 11, 10, 11, 10, 10, 11 }, { 10, 10, 11, 10, 11, 10, 10, 11 },
+ { 10, 10, 11, 10, 11, 10, 10, 11 }, { 10, 10, 11, 10, 11, 10, 10, 11 },
+ { 10, 11, 11, 10, 11, 10, 11, 11 }, { 10, 11, 11, 10, 11, 10, 11, 11 },
+ { 10, 11, 11, 10, 11, 10, 11, 11 }, { 10, 11, 11, 10, 11, 10, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 12, 11, 12, 11, 11, 12 }, { 11, 11, 12, 11, 12, 11, 11, 12 },
+ { 11, 11, 12, 11, 12, 11, 11, 12 }, { 11, 11, 12, 11, 12, 11, 11, 12 },
+ { 11, 12, 12, 11, 12, 11, 12, 12 }, { 11, 12, 12, 11, 12, 11, 12, 12 },
+ { 11, 12, 12, 11, 12, 11, 12, 12 }, { 11, 12, 12, 11, 12, 11, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 13, 12, 13, 12, 12, 13 }, { 12, 12, 13, 12, 13, 12, 12, 13 },
+ { 12, 12, 13, 12, 13, 12, 12, 13 }, { 12, 12, 13, 12, 13, 12, 12, 13 },
+ { 12, 13, 13, 12, 13, 12, 13, 13 }, { 12, 13, 13, 12, 13, 12, 13, 13 },
+ { 12, 13, 13, 12, 13, 12, 13, 13 }, { 12, 13, 13, 12, 13, 12, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 14, 13, 14, 13, 13, 14 }, { 13, 13, 14, 13, 14, 13, 13, 14 },
+ { 13, 13, 14, 13, 14, 13, 13, 14 }, { 13, 13, 14, 13, 14, 13, 13, 14 },
+ { 13, 14, 14, 13, 14, 13, 14, 14 }, { 13, 14, 14, 13, 14, 13, 14, 14 },
+ { 13, 14, 14, 13, 14, 13, 14, 14 }, { 13, 14, 14, 13, 14, 13, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }, { 14, 14, 14, 14, 14, 14, 14, 14 },
+ { 14, 14, 15, 14, 15, 14, 14, 15 }, { 14, 14, 15, 14, 15, 14, 14, 15 },
+ { 14, 14, 15, 14, 15, 14, 14, 15 }, { 14, 14, 15, 14, 15, 14, 14, 15 },
+ { 14, 15, 15, 14, 15, 14, 15, 15 }, { 14, 15, 15, 14, 15, 14, 15, 15 },
+ { 14, 15, 15, 14, 15, 14, 15, 15 }, { 14, 15, 15, 14, 15, 14, 15, 15 },
+ { 15, 15, 15, 15, 15, 15, 15, 15 }
+};
+
+const S16 gai2_mvy_range_mapping[4097][8] = {
+ { 14, 14, 14, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 12, 12, 12 },
+ { 13, 13, 13, 13, 13, 12, 12, 12 }, { 13, 13, 13, 13, 13, 12, 12, 12 },
+ { 13, 13, 13, 13, 13, 12, 12, 12 }, { 13, 13, 13, 12, 12, 12, 12, 12 },
+ { 13, 13, 13, 12, 12, 12, 12, 12 }, { 13, 13, 13, 12, 12, 12, 12, 12 },
+ { 13, 13, 13, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 11, 11, 11 },
+ { 12, 12, 12, 12, 12, 11, 11, 11 }, { 12, 12, 12, 12, 12, 11, 11, 11 },
+ { 12, 12, 12, 12, 12, 11, 11, 11 }, { 12, 12, 12, 11, 11, 11, 11, 11 },
+ { 12, 12, 12, 11, 11, 11, 11, 11 }, { 12, 12, 12, 11, 11, 11, 11, 11 },
+ { 12, 12, 12, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 10, 10, 10 },
+ { 11, 11, 11, 11, 11, 10, 10, 10 }, { 11, 11, 11, 11, 11, 10, 10, 10 },
+ { 11, 11, 11, 11, 11, 10, 10, 10 }, { 11, 11, 11, 10, 10, 10, 10, 10 },
+ { 11, 11, 11, 10, 10, 10, 10, 10 }, { 11, 11, 11, 10, 10, 10, 10, 10 },
+ { 11, 11, 11, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 9, 9, 9 },
+ { 10, 10, 10, 10, 10, 9, 9, 9 }, { 10, 10, 10, 10, 10, 9, 9, 9 },
+ { 10, 10, 10, 10, 10, 9, 9, 9 }, { 10, 10, 10, 9, 9, 9, 9, 9 },
+ { 10, 10, 10, 9, 9, 9, 9, 9 }, { 10, 10, 10, 9, 9, 9, 9, 9 },
+ { 10, 10, 10, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 8, 8, 8 },
+ { 9, 9, 9, 9, 9, 8, 8, 8 }, { 9, 9, 9, 9, 9, 8, 8, 8 },
+ { 9, 9, 9, 9, 9, 8, 8, 8 }, { 9, 9, 9, 8, 8, 8, 8, 8 },
+ { 9, 9, 9, 8, 8, 8, 8, 8 }, { 9, 9, 9, 8, 8, 8, 8, 8 },
+ { 9, 9, 9, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 7, 7, 7 },
+ { 8, 8, 8, 8, 8, 7, 7, 7 }, { 8, 8, 8, 8, 8, 7, 7, 7 },
+ { 8, 8, 8, 8, 8, 7, 7, 7 }, { 8, 8, 8, 7, 7, 7, 7, 7 },
+ { 8, 8, 8, 7, 7, 7, 7, 7 }, { 8, 8, 8, 7, 7, 7, 7, 7 },
+ { 8, 8, 8, 7, 7, 7, 7, 7 }, { 7, 7, 7, 7, 7, 7, 7, 7 },
+ { 7, 7, 7, 7, 7, 7, 7, 7 }, { 7, 7, 7, 7, 7, 7, 7, 7 },
+ { 7, 7, 7, 7, 7, 7, 7, 7 }, { 7, 7, 7, 7, 7, 7, 7, 7 },
+ { 7, 7, 7, 7, 7, 7, 7, 7 }, { 7, 7, 7, 7, 7, 7, 7, 7 },
+ { 7, 7, 7, 7, 7, 7, 7, 7 }, { 7, 7, 7, 7, 7, 6, 6, 6 },
+ { 7, 7, 7, 7, 7, 6, 6, 6 }, { 7, 7, 7, 7, 7, 6, 6, 6 },
+ { 7, 7, 7, 7, 7, 6, 6, 6 }, { 7, 7, 7, 6, 6, 6, 6, 6 },
+ { 7, 7, 7, 6, 6, 6, 6, 6 }, { 7, 7, 7, 6, 6, 6, 6, 6 },
+ { 7, 7, 7, 6, 6, 6, 6, 6 }, { 6, 6, 6, 6, 6, 5, 5, 5 },
+ { 6, 6, 6, 6, 6, 5, 5, 5 }, { 6, 6, 6, 6, 6, 5, 5, 5 },
+ { 6, 6, 6, 6, 6, 5, 5, 5 }, { 6, 6, 6, 5, 5, 4, 4, 4 },
+ { 6, 6, 6, 5, 5, 4, 4, 4 }, { 6, 6, 6, 5, 5, 3, 3, 3 },
+ { 6, 6, 6, 5, 5, 2, 2, 2 }, { 5, 5, 5, 4, 4, 3, 3, 3 },
+ { 5, 5, 5, 4, 4, 4, 4, 4 }, { 5, 5, 5, 3, 3, 4, 4, 4 },
+ { 5, 5, 5, 2, 2, 5, 5, 5 }, { 4, 4, 4, 3, 3, 5, 5, 5 },
+ { 4, 4, 4, 4, 4, 5, 5, 5 }, { 3, 3, 3, 4, 4, 5, 5, 5 },
+ { 2, 2, 2, 5, 5, 6, 6, 6 }, { 3, 3, 3, 5, 5, 6, 6, 6 },
+ { 4, 4, 4, 5, 5, 6, 6, 6 }, { 4, 4, 4, 5, 5, 6, 6, 6 },
+ { 5, 5, 5, 6, 6, 6, 6, 6 }, { 5, 5, 5, 6, 6, 6, 6, 6 },
+ { 5, 5, 5, 6, 6, 6, 6, 6 }, { 5, 5, 5, 6, 6, 6, 6, 6 },
+ { 6, 6, 6, 6, 6, 7, 7, 7 }, { 6, 6, 6, 6, 6, 7, 7, 7 },
+ { 6, 6, 6, 6, 6, 7, 7, 7 }, { 6, 6, 6, 6, 6, 7, 7, 7 },
+ { 6, 6, 6, 7, 7, 7, 7, 7 }, { 6, 6, 6, 7, 7, 7, 7, 7 },
+ { 6, 6, 6, 7, 7, 7, 7, 7 }, { 6, 6, 6, 7, 7, 7, 7, 7 },
+ { 7, 7, 7, 7, 7, 7, 7, 7 }, { 7, 7, 7, 7, 7, 7, 7, 7 },
+ { 7, 7, 7, 7, 7, 7, 7, 7 }, { 7, 7, 7, 7, 7, 7, 7, 7 },
+ { 7, 7, 7, 7, 7, 7, 7, 7 }, { 7, 7, 7, 7, 7, 7, 7, 7 },
+ { 7, 7, 7, 7, 7, 7, 7, 7 }, { 7, 7, 7, 7, 7, 7, 7, 7 },
+ { 7, 7, 7, 7, 7, 8, 8, 8 }, { 7, 7, 7, 7, 7, 8, 8, 8 },
+ { 7, 7, 7, 7, 7, 8, 8, 8 }, { 7, 7, 7, 7, 7, 8, 8, 8 },
+ { 7, 7, 7, 8, 8, 8, 8, 8 }, { 7, 7, 7, 8, 8, 8, 8, 8 },
+ { 7, 7, 7, 8, 8, 8, 8, 8 }, { 7, 7, 7, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 8, 8, 8 }, { 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 8, 8, 8, 8, 8, 9, 9, 9 }, { 8, 8, 8, 8, 8, 9, 9, 9 },
+ { 8, 8, 8, 8, 8, 9, 9, 9 }, { 8, 8, 8, 8, 8, 9, 9, 9 },
+ { 8, 8, 8, 9, 9, 9, 9, 9 }, { 8, 8, 8, 9, 9, 9, 9, 9 },
+ { 8, 8, 8, 9, 9, 9, 9, 9 }, { 8, 8, 8, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 9, 9, 9 }, { 9, 9, 9, 9, 9, 9, 9, 9 },
+ { 9, 9, 9, 9, 9, 10, 10, 10 }, { 9, 9, 9, 9, 9, 10, 10, 10 },
+ { 9, 9, 9, 9, 9, 10, 10, 10 }, { 9, 9, 9, 9, 9, 10, 10, 10 },
+ { 9, 9, 9, 10, 10, 10, 10, 10 }, { 9, 9, 9, 10, 10, 10, 10, 10 },
+ { 9, 9, 9, 10, 10, 10, 10, 10 }, { 9, 9, 9, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 10, 10, 10 }, { 10, 10, 10, 10, 10, 10, 10, 10 },
+ { 10, 10, 10, 10, 10, 11, 11, 11 }, { 10, 10, 10, 10, 10, 11, 11, 11 },
+ { 10, 10, 10, 10, 10, 11, 11, 11 }, { 10, 10, 10, 10, 10, 11, 11, 11 },
+ { 10, 10, 10, 11, 11, 11, 11, 11 }, { 10, 10, 10, 11, 11, 11, 11, 11 },
+ { 10, 10, 10, 11, 11, 11, 11, 11 }, { 10, 10, 10, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 11, 11, 11 }, { 11, 11, 11, 11, 11, 11, 11, 11 },
+ { 11, 11, 11, 11, 11, 12, 12, 12 }, { 11, 11, 11, 11, 11, 12, 12, 12 },
+ { 11, 11, 11, 11, 11, 12, 12, 12 }, { 11, 11, 11, 11, 11, 12, 12, 12 },
+ { 11, 11, 11, 12, 12, 12, 12, 12 }, { 11, 11, 11, 12, 12, 12, 12, 12 },
+ { 11, 11, 11, 12, 12, 12, 12, 12 }, { 11, 11, 11, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 12, 12, 12 }, { 12, 12, 12, 12, 12, 12, 12, 12 },
+ { 12, 12, 12, 12, 12, 13, 13, 13 }, { 12, 12, 12, 12, 12, 13, 13, 13 },
+ { 12, 12, 12, 12, 12, 13, 13, 13 }, { 12, 12, 12, 12, 12, 13, 13, 13 },
+ { 12, 12, 12, 13, 13, 13, 13, 13 }, { 12, 12, 12, 13, 13, 13, 13, 13 },
+ { 12, 12, 12, 13, 13, 13, 13, 13 }, { 12, 12, 12, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 13, 13, 13 }, { 13, 13, 13, 13, 13, 13, 13, 13 },
+ { 13, 13, 13, 13, 13, 14, 14, 14 }, { 13, 13, 13, 13, 13, 14, 14, 14 },
+ { 13, 13, 13, 13, 13, 14, 14, 14 }, { 13, 13, 13, 13, 13, 14, 14, 14 },
+ { 13, 13, 13, 14, 14, 14, 14, 14 }, { 13, 13, 13, 14, 14, 14, 14, 14 },
+ { 13, 13, 13, 14, 14, 14, 14, 14 }, { 13, 13, 13, 14, 14, 14, 14, 14 },
+ { 14, 14, 14, 14, 14, 14, 14, 14 }
+};
+
+const S16 gai2_set_best_cost_max[8][8] = {
+ { 0xFFFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0xFFFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0xFFFF, 0x00, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0xFFFF, 0x00, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0xFFFF, 0x00, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0xFFFF, 0x00, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFFFF, 0x00 },
+ { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFFFF },
+};
+
+const S08 gai1_mv_adjust[8][2] = { { 0, 0 }, { 1, 0 }, { 2, 0 }, { 0, 1 },
+ { 2, 1 }, { 0, 2 }, { 1, 2 }, { 2, 2 }
+
+};
+
+const S08 gai1_mv_offsets_from_center_in_rect_grid[NUM_POINTS_IN_RECTANGULAR_GRID][2] = {
+ { -1, -1 }, { 0, -1 }, { 1, -1 }, { -1, 0 }, { 1, 0 }, { -1, 1 }, { 0, 1 }, { 1, 1 }, { 0, 0 }
+};
diff --git a/encoder/hme_globals.h b/encoder/hme_globals.h
new file mode 100644
index 0000000..3df38a5
--- /dev/null
+++ b/encoder/hme_globals.h
@@ -0,0 +1,385 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file hme_globals.h
+*
+* \brief
+* Contains all the global declarations used by HME
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _HME_GLOBALS_H_
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Globals */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief Converts an encode order to raster order x coord. Meant for 16x16
+ * CU within 64x64 or within 32x32
+******************************************************************************
+ */
+extern U08 gau1_encode_to_raster_x[16];
+
+/**
+******************************************************************************
+ * @brief Converts an encode order to raster order y coord. Meant for 16x16
+ * CU within 64x64 or within 32x32
+******************************************************************************
+ */
+extern U08 gau1_encode_to_raster_y[16];
+
+/**
+******************************************************************************
+ * @brief Given a CU id within the bigger CU (0..3), and the partition type
+ * currently within the small CU, we can figure out candidate
+ * partition types for bigger CU. E.g. IF CU id is 0, and is AMP of
+ * nLx2N, candidate partitions for bigger CU are nLx2N and 2Nx2N
+******************************************************************************
+ */
+extern PART_TYPE_T ge_part_type_to_merge_part[4][MAX_PART_TYPES][3];
+
+/**
+******************************************************************************
+ * @brief A given partition type has 1,2 or 4 partitions, each corresponding
+ * to a unique partition id PART_ID_T enum type. So, this global converts
+ * partition type to a bitmask of corresponding partition ids.
+******************************************************************************
+ */
+extern S32 gai4_part_type_to_part_mask[MAX_PART_TYPES];
+
+/**
+******************************************************************************
+ * @brief Reads out the index of function pointer to a sad_compute function
+ * of blk given a blk size enumeration
+******************************************************************************
+ */
+extern U08 gau1_blk_size_to_fp[NUM_BLK_SIZES];
+
+/**
+******************************************************************************
+ * @brief Reads out the width of blk given a blk size enumeration
+******************************************************************************
+ */
+extern U08 gau1_blk_size_to_wd[NUM_BLK_SIZES];
+
+extern U08 gau1_blk_size_to_wd_shift[NUM_BLK_SIZES];
+
+/**
+******************************************************************************
+ * @brief Reads out the height of blk given a blk size enumeration
+******************************************************************************
+ */
+extern U08 gau1_blk_size_to_ht[NUM_BLK_SIZES];
+
+/**
+******************************************************************************
+ * @brief Given a minimum pt enum in a 3x3 grid, reads out the list of active
+ * search pts in next iteration as a bit-mask, eliminating need to search
+ * pts that have already been searched in this iteration.
+******************************************************************************
+ */
+extern S32 gai4_opt_grid_mask[NUM_GRID_PTS];
+
+/**
+******************************************************************************
+ * @brief Given a minimum pt enum in a 3x3 grid, reads out the x offset of
+ * the min pt relative to center assuming step size of 1
+******************************************************************************
+ */
+extern S08 gai1_grid_id_to_x[NUM_GRID_PTS];
+
+/**
+******************************************************************************
+ * @brief Given a minimum pt enum in a 3x3 grid, reads out the y offset of
+ * the min pt relative to center assuming step size of 1
+******************************************************************************
+*/
+extern S08 gai1_grid_id_to_y[NUM_GRID_PTS];
+
+/**
+******************************************************************************
+ * @brief Lookup of the blk size enum, given a specific partition and cu size
+******************************************************************************
+*/
+extern BLK_SIZE_T ge_part_id_to_blk_size[NUM_CU_SIZES][TOT_NUM_PARTS];
+
+/**
+******************************************************************************
+ * @brief For a given partition split, find number of partitions
+******************************************************************************
+*/
+extern U08 gau1_num_parts_in_part_type[MAX_PART_TYPES];
+
+/**
+******************************************************************************
+ * @brief For a given partition split, returns the enumerations of specific
+ * partitions in raster order. E.g. for PART_2NxN, part id 0 is
+ * PART_ID_2NxN_T and part id 1 is PART_ID_2NxN_B
+******************************************************************************
+*/
+extern PART_ID_T ge_part_type_to_part_id[MAX_PART_TYPES][MAX_NUM_PARTS];
+
+/**
+******************************************************************************
+ * @brief For a given partition id, returs the rectangular position and size
+ * of partition within cu relative ot cu start.
+******************************************************************************
+*/
+extern part_attr_t gas_part_attr_in_cu[TOT_NUM_PARTS];
+
+/**
+******************************************************************************
+ * @brief Gives the CU type enumeration given a blk size.
+******************************************************************************
+*/
+extern CU_SIZE_T ge_blk_size_to_cu_size[NUM_BLK_SIZES];
+
+/**
+******************************************************************************
+
+ * @brief Given a minimum pt enum in a diamond grid, reads out the list
+ * of active search pts in next iteration as a bit-mask, eliminating need
+ * to search pts that have already been searched in this iteration.
+******************************************************************************
+ */
+extern S32 gai4_opt_grid_mask_diamond[5];
+
+/**
+******************************************************************************
+ * @brief Given a minimum pt enum in a 9 point grid, reads out the list
+ * of active search pts in next iteration as a bit-mask, eliminating need
+ * to search pts that have already been searched in this iteration.
+******************************************************************************
+ */
+
+extern S32 gai4_opt_grid_mask_conventional[9];
+
+/**
+******************************************************************************
+ * @brief Given a raster coord x, y, this aray returns the CU id in encoding
+ * order. Indexed as [y][x]
+******************************************************************************
+ */
+extern U08 gau1_cu_id_raster_to_enc[4][4];
+/**
+******************************************************************************
+ * @brief Given a CU size, this array returns blk size enum
+******************************************************************************
+ */
+extern BLK_SIZE_T ge_cu_size_to_blk_size[NUM_CU_SIZES];
+
+/**
+******************************************************************************
+ * @brief Given a part type, returns whether the part type is vertically
+ * oriented.
+******************************************************************************
+ */
+extern U08 gau1_is_vert_part[MAX_PART_TYPES];
+
+/**
+******************************************************************************
+ * @brief Given a partition, returns the number of best results to consider
+ * for full pell refinement.
+******************************************************************************
+ */
+extern U08 gau1_num_best_results_PQ[TOT_NUM_PARTS];
+extern U08 gau1_num_best_results_HQ[TOT_NUM_PARTS];
+extern U08 gau1_num_best_results_MS[TOT_NUM_PARTS];
+extern U08 gau1_num_best_results_HS[TOT_NUM_PARTS];
+extern U08 gau1_num_best_results_XS[TOT_NUM_PARTS];
+extern U08 gau1_num_best_results_XS25[TOT_NUM_PARTS];
+
+/**
+******************************************************************************
+ * @brief gau1_cu_tr_valid[y][x] returns the validity of a top rt candt for
+ * CU with raster id x, y within CTB. Valid for 16x16 CUs and above
+******************************************************************************
+ */
+extern U08 gau1_cu_tr_valid[4][4];
+/**
+******************************************************************************
+ * @brief gau1_cu_tr_valid[y][x] returns the validity of a bot lt candt for
+ * CU with raster id x, y within CTB. Valid for 16x16 CUs and above
+******************************************************************************
+ */
+extern U08 gau1_cu_bl_valid[4][4];
+
+/**
+******************************************************************************
+ * @brief Returns the validity of top rt candt for a given part id, will not
+ * be valid if tr of a part pts to a non causal neighbour like 16x8B
+******************************************************************************
+ */
+extern U08 gau1_partid_tr_valid[TOT_NUM_PARTS];
+/**
+******************************************************************************
+ * @brief Returns the validity of bottom left cant for given part id, will
+ * not be valid, if bl of a part pts to a non causal neighbour like 8x16R
+******************************************************************************
+ */
+extern U08 gau1_partid_bl_valid[TOT_NUM_PARTS];
+
+/**
+******************************************************************************
+ * @brief The number of partition id in the CU, e.g. PART_ID_16x8_B is 2nd
+******************************************************************************
+ */
+extern U08 gau1_part_id_to_part_num[TOT_NUM_PARTS];
+
+/**
+******************************************************************************
+ * @brief Returns partition type for a given partition id, e.g.
+ * PART_ID_16x8_B returns PRT_TYPE_16x8
+******************************************************************************
+ */
+extern PART_TYPE_T ge_part_id_to_part_type[TOT_NUM_PARTS];
+
+/**
+******************************************************************************
+ * @brief given raster id x, y of 8x8 blk in 64x64 CTB, return the enc order
+******************************************************************************
+ */
+extern U08 gau1_8x8_cu_id_raster_to_enc[8][8];
+
+/**
+******************************************************************************
+ * @brief Return the bits for a given partition id which gets added to the
+ * cost. Although the bits are for a given partition type, we add off the
+ * bits per partition while computing mv cost. For example, if the bits for
+ * 2NxN part type is 3, we add 1.5 bits for 2NxN_T and 1.5 for 2NxN_B.
+ * Hence this is stored in Q1 format
+******************************************************************************
+*/
+extern U08 gau1_bits_for_part_id_q1[TOT_NUM_PARTS];
+
+/**
+******************************************************************************
+ * @brief Returns 1 if there are qpel points to the top and bottom of the
+ * current point
+******************************************************************************
+*/
+extern S32 gai4_2pt_qpel_interpol_possible_vert[4][4];
+
+/**
+******************************************************************************
+ * @brief Returns 1 if there are qpel points to the left and right of the
+ * current point
+******************************************************************************
+*/
+extern S32 gai4_2pt_qpel_interpol_possible_horz[4][4];
+
+/**
+******************************************************************************
+ * @brief For a given partition split, num bits to encode the partition type
+ * and split cu,tu bits; assuming one bin equal to one bit for now
+******************************************************************************
+*/
+extern U08 gau1_num_bits_for_part_type[MAX_PART_TYPES];
+
+/**
+******************************************************************************
+ * @brief Used exclusively in the Intrinsics version of the function
+ * 'hme_combine_4x4_sads_and_compute_cost_high_speed' instead
+ * of calling get_range()
+******************************************************************************
+ */
+extern S16 gi2_mvy_range[MAX_MVY_SUPPORTED_IN_COARSE_LAYER + 1][8];
+
+/**
+******************************************************************************
+ * @brief Used exclusively in the Intrinsics version of the function
+ * 'hme_combine_4x4_sads_and_compute_cost_high_speed' instead
+ * of calling get_range()
+******************************************************************************
+ */
+extern S16 gi2_mvx_range[MAX_MVX_SUPPORTED_IN_COARSE_LAYER * 2 + 1][8];
+
+extern S32 gai4_select_qpel_function_vert[4][16];
+
+extern S32 gai4_select_qpel_function_horz[4][16];
+
+extern S32 gai4_partition_area[TOT_NUM_PARTS];
+
+extern const U08 gau1_search_cand_priority_in_l1_and_l2_me[2][NUM_SEARCH_CAND_TYPES];
+
+/* 12 cases are - */
+/* case 0 - P picture, num_refs=1, 4x4 in L1ME = 0 */
+/* case 1 - P picture, num_refs=1, 4x4 in L1ME = 1 */
+/* case 2 - P picture, num_refs=2, 4x4 in L1ME = 0 */
+/* case 3 - P picture, num_refs=2, 4x4 in L1ME = 1 */
+/* case 4 - P picture, num_refs=3, 4x4 in L1ME = 0 */
+/* case 5 - P picture, num_refs=3, 4x4 in L1ME = 1 */
+/* case 6 - P picture, num_refs=3, 4x4 in L1ME = 0 */
+/* case 7 - P picture, num_refs=3, 4x4 in L1ME = 1 */
+/* case 8 - B picture, num_refs=1, 4x4 in L1ME = 0 */
+/* case 9 - B picture, num_refs=1, 4x4 in L1ME = 1 */
+/* case 10 - B picture, num_refs=2, 4x4 in L1ME = 0 */
+/* case 11 - B picture, num_refs=2, 4x4 in L1ME = 1 */
+extern const U08 gau1_search_cand_priority_in_l0_me[12][NUM_SEARCH_CAND_TYPES];
+
+extern const SEARCH_CANDIDATE_TYPE_T
+ gae_search_cand_priority_to_search_cand_type_map_in_l0_me[12][NUM_SEARCH_CAND_TYPES];
+
+extern const U08 gau1_max_num_search_cands_in_l0_me[12];
+
+extern const SEARCH_CAND_LOCATIONS_T gae_search_cand_type_to_location_map[NUM_SEARCH_CAND_TYPES];
+
+extern const S08 gai1_search_cand_type_to_result_id_map[NUM_SEARCH_CAND_TYPES];
+
+extern const U08 gau1_search_cand_type_to_spatiality_map[NUM_SEARCH_CAND_TYPES];
+
+extern const S32 gai4_search_cand_location_to_x_offset_map[NUM_SEARCH_CAND_LOCATIONS];
+
+extern const S32 gai4_search_cand_location_to_y_offset_map[NUM_SEARCH_CAND_LOCATIONS];
+
+/**
+******************************************************************************
+ * @brief Used exclusively in the Intrinsics version of the function
+ * 'hme_combine_4x4_sads_and_compute_cost_high_quality' instead
+ * of calling get_range()
+******************************************************************************
+ */
+extern S16 gi2_mvx_range_high_quality[MAX_MVX_SUPPORTED_IN_COARSE_LAYER * 2 + 1][8];
+
+extern const S16 gai2_mvx_range_mapping[8193][8];
+
+extern const S16 gai2_mvy_range_mapping[4097][8];
+
+extern const S16 gai2_set_best_cost_max[8][8];
+
+extern const S08 gai1_mv_adjust[8][2];
+
+extern const S08 gai1_mv_offsets_from_center_in_rect_grid[NUM_POINTS_IN_RECTANGULAR_GRID][2];
+
+#endif /* #ifndef _HME_GLOBALS_H_*/
diff --git a/encoder/hme_interface.c b/encoder/hme_interface.c
new file mode 100644
index 0000000..af95868
--- /dev/null
+++ b/encoder/hme_interface.c
@@ -0,0 +1,4755 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+#include <limits.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_bs_compute_ctb.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_dep_mngr_interface.h"
+#include "hme_datatype.h"
+#include "hme_interface.h"
+#include "hme_common_defs.h"
+#include "hme_defs.h"
+#include "ihevce_me_instr_set_router.h"
+#include "hme_globals.h"
+#include "hme_utils.h"
+#include "hme_coarse.h"
+#include "hme_refine.h"
+#include "hme_err_compute.h"
+#include "hme_common_utils.h"
+#include "hme_search_algo.h"
+#include "ihevce_profile.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+void hme_init_globals()
+{
+ GRID_PT_T id;
+ S32 i, j;
+ /*************************************************************************/
+ /* Initialize the lookup table for x offset, y offset, optimized mask */
+ /* based on grid id. The design is as follows: */
+ /* */
+ /* a b c d */
+ /* TL T TR e */
+ /* L C R f */
+ /* BL B BR */
+ /* */
+ /* IF a non corner pt, like T is the new minima, then we need to */
+ /* evaluate only 3 new pts, in this case, a, b, c. So the optimal */
+ /* grid mask would reflect this. If a corner pt like TR is the new */
+ /* minima, then we need to evaluate 5 new pts, in this case, b, c, d, */
+ /* e and f. So the grid mask will have 5 pts enabled. */
+ /*************************************************************************/
+
+ id = PT_C;
+ gai4_opt_grid_mask[id] = GRID_ALL_PTS_VALID ^ (BIT_EN(PT_C));
+ gai1_grid_id_to_x[id] = 0;
+ gai1_grid_id_to_y[id] = 0;
+ gai4_opt_grid_mask_diamond[id] = GRID_DIAMOND_ENABLE_ALL ^ (BIT_EN(PT_C));
+ gai4_opt_grid_mask_conventional[id] = GRID_ALL_PTS_VALID ^ (BIT_EN(PT_C));
+
+ id = PT_L;
+ gai4_opt_grid_mask[id] = BIT_EN(PT_TL) | BIT_EN(PT_L) | BIT_EN(PT_BL);
+ gai1_grid_id_to_x[id] = -1;
+ gai1_grid_id_to_y[id] = 0;
+ gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_T) | BIT_EN(PT_L) | BIT_EN(PT_B);
+ gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_L) | BIT_EN(PT_B);
+
+ id = PT_R;
+ gai4_opt_grid_mask[id] = BIT_EN(PT_TR) | BIT_EN(PT_R) | BIT_EN(PT_BR);
+ gai1_grid_id_to_x[id] = 1;
+ gai1_grid_id_to_y[id] = 0;
+ gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_T) | BIT_EN(PT_R) | BIT_EN(PT_B);
+ gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_R) | BIT_EN(PT_B);
+
+ id = PT_T;
+ gai4_opt_grid_mask[id] = BIT_EN(PT_TL) | BIT_EN(PT_T) | BIT_EN(PT_TR);
+ gai1_grid_id_to_x[id] = 0;
+ gai1_grid_id_to_y[id] = -1;
+ gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_R) | BIT_EN(PT_L) | BIT_EN(PT_T);
+ gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_R) | BIT_EN(PT_L) | BIT_EN(PT_T);
+
+ id = PT_B;
+ gai4_opt_grid_mask[id] = BIT_EN(PT_BL) | BIT_EN(PT_B) | BIT_EN(PT_BR);
+ gai1_grid_id_to_x[id] = 0;
+ gai1_grid_id_to_y[id] = 1;
+ gai4_opt_grid_mask_diamond[id] = BIT_EN(PT_B) | BIT_EN(PT_L) | BIT_EN(PT_R);
+ gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_B) | BIT_EN(PT_L) | BIT_EN(PT_R);
+
+ id = PT_TL;
+ gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_L] | gai4_opt_grid_mask[PT_T];
+ gai1_grid_id_to_x[id] = -1;
+ gai1_grid_id_to_y[id] = -1;
+ gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_L);
+
+ id = PT_TR;
+ gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_R] | gai4_opt_grid_mask[PT_T];
+ gai1_grid_id_to_x[id] = 1;
+ gai1_grid_id_to_y[id] = -1;
+ gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_T) | BIT_EN(PT_R);
+
+ id = PT_BL;
+ gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_L] | gai4_opt_grid_mask[PT_B];
+ gai1_grid_id_to_x[id] = -1;
+ gai1_grid_id_to_y[id] = 1;
+ gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_L) | BIT_EN(PT_B);
+
+ id = PT_BR;
+ gai4_opt_grid_mask[id] = gai4_opt_grid_mask[PT_R] | gai4_opt_grid_mask[PT_B];
+ gai1_grid_id_to_x[id] = 1;
+ gai1_grid_id_to_y[id] = 1;
+ gai4_opt_grid_mask_conventional[id] = BIT_EN(PT_R) | BIT_EN(PT_B);
+
+ ge_part_id_to_blk_size[CU_8x8][PART_ID_2Nx2N] = BLK_8x8;
+ ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxN_T] = BLK_8x4;
+ ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxN_B] = BLK_8x4;
+ ge_part_id_to_blk_size[CU_8x8][PART_ID_Nx2N_L] = BLK_4x8;
+ ge_part_id_to_blk_size[CU_8x8][PART_ID_Nx2N_R] = BLK_4x8;
+ ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_TL] = BLK_4x4;
+ ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_TR] = BLK_4x4;
+ ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_BL] = BLK_4x4;
+ ge_part_id_to_blk_size[CU_8x8][PART_ID_NxN_BR] = BLK_4x4;
+ ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnU_T] = BLK_INVALID;
+ ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnU_B] = BLK_INVALID;
+ ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnD_T] = BLK_INVALID;
+ ge_part_id_to_blk_size[CU_8x8][PART_ID_2NxnD_B] = BLK_INVALID;
+ ge_part_id_to_blk_size[CU_8x8][PART_ID_nLx2N_L] = BLK_INVALID;
+ ge_part_id_to_blk_size[CU_8x8][PART_ID_nLx2N_R] = BLK_INVALID;
+ ge_part_id_to_blk_size[CU_8x8][PART_ID_nRx2N_L] = BLK_INVALID;
+ ge_part_id_to_blk_size[CU_8x8][PART_ID_nRx2N_R] = BLK_INVALID;
+
+ ge_part_id_to_blk_size[CU_16x16][PART_ID_2Nx2N] = BLK_16x16;
+ ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxN_T] = BLK_16x8;
+ ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxN_B] = BLK_16x8;
+ ge_part_id_to_blk_size[CU_16x16][PART_ID_Nx2N_L] = BLK_8x16;
+ ge_part_id_to_blk_size[CU_16x16][PART_ID_Nx2N_R] = BLK_8x16;
+ ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_TL] = BLK_8x8;
+ ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_TR] = BLK_8x8;
+ ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_BL] = BLK_8x8;
+ ge_part_id_to_blk_size[CU_16x16][PART_ID_NxN_BR] = BLK_8x8;
+ ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnU_T] = BLK_16x4;
+ ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnU_B] = BLK_16x12;
+ ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnD_T] = BLK_16x12;
+ ge_part_id_to_blk_size[CU_16x16][PART_ID_2NxnD_B] = BLK_16x4;
+ ge_part_id_to_blk_size[CU_16x16][PART_ID_nLx2N_L] = BLK_4x16;
+ ge_part_id_to_blk_size[CU_16x16][PART_ID_nLx2N_R] = BLK_12x16;
+ ge_part_id_to_blk_size[CU_16x16][PART_ID_nRx2N_L] = BLK_12x16;
+ ge_part_id_to_blk_size[CU_16x16][PART_ID_nRx2N_R] = BLK_4x16;
+
+ ge_part_id_to_blk_size[CU_32x32][PART_ID_2Nx2N] = BLK_32x32;
+ ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxN_T] = BLK_32x16;
+ ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxN_B] = BLK_32x16;
+ ge_part_id_to_blk_size[CU_32x32][PART_ID_Nx2N_L] = BLK_16x32;
+ ge_part_id_to_blk_size[CU_32x32][PART_ID_Nx2N_R] = BLK_16x32;
+ ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_TL] = BLK_16x16;
+ ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_TR] = BLK_16x16;
+ ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_BL] = BLK_16x16;
+ ge_part_id_to_blk_size[CU_32x32][PART_ID_NxN_BR] = BLK_16x16;
+ ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnU_T] = BLK_32x8;
+ ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnU_B] = BLK_32x24;
+ ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnD_T] = BLK_32x24;
+ ge_part_id_to_blk_size[CU_32x32][PART_ID_2NxnD_B] = BLK_32x8;
+ ge_part_id_to_blk_size[CU_32x32][PART_ID_nLx2N_L] = BLK_8x32;
+ ge_part_id_to_blk_size[CU_32x32][PART_ID_nLx2N_R] = BLK_24x32;
+ ge_part_id_to_blk_size[CU_32x32][PART_ID_nRx2N_L] = BLK_24x32;
+ ge_part_id_to_blk_size[CU_32x32][PART_ID_nRx2N_R] = BLK_8x32;
+
+ ge_part_id_to_blk_size[CU_64x64][PART_ID_2Nx2N] = BLK_64x64;
+ ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxN_T] = BLK_64x32;
+ ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxN_B] = BLK_64x32;
+ ge_part_id_to_blk_size[CU_64x64][PART_ID_Nx2N_L] = BLK_32x64;
+ ge_part_id_to_blk_size[CU_64x64][PART_ID_Nx2N_R] = BLK_32x64;
+ ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_TL] = BLK_32x32;
+ ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_TR] = BLK_32x32;
+ ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_BL] = BLK_32x32;
+ ge_part_id_to_blk_size[CU_64x64][PART_ID_NxN_BR] = BLK_32x32;
+ ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnU_T] = BLK_64x16;
+ ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnU_B] = BLK_64x48;
+ ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnD_T] = BLK_64x48;
+ ge_part_id_to_blk_size[CU_64x64][PART_ID_2NxnD_B] = BLK_64x16;
+ ge_part_id_to_blk_size[CU_64x64][PART_ID_nLx2N_L] = BLK_16x64;
+ ge_part_id_to_blk_size[CU_64x64][PART_ID_nLx2N_R] = BLK_48x64;
+ ge_part_id_to_blk_size[CU_64x64][PART_ID_nRx2N_L] = BLK_48x64;
+ ge_part_id_to_blk_size[CU_64x64][PART_ID_nRx2N_R] = BLK_16x64;
+
+ gau1_num_parts_in_part_type[PRT_2Nx2N] = 1;
+ gau1_num_parts_in_part_type[PRT_2NxN] = 2;
+ gau1_num_parts_in_part_type[PRT_Nx2N] = 2;
+ gau1_num_parts_in_part_type[PRT_NxN] = 4;
+ gau1_num_parts_in_part_type[PRT_2NxnU] = 2;
+ gau1_num_parts_in_part_type[PRT_2NxnD] = 2;
+ gau1_num_parts_in_part_type[PRT_nLx2N] = 2;
+ gau1_num_parts_in_part_type[PRT_nRx2N] = 2;
+
+ for(i = 0; i < MAX_PART_TYPES; i++)
+ for(j = 0; j < MAX_NUM_PARTS; j++)
+ ge_part_type_to_part_id[i][j] = PART_ID_INVALID;
+
+ /* 2Nx2N only one partition */
+ ge_part_type_to_part_id[PRT_2Nx2N][0] = PART_ID_2Nx2N;
+
+ /* 2NxN 2 partitions */
+ ge_part_type_to_part_id[PRT_2NxN][0] = PART_ID_2NxN_T;
+ ge_part_type_to_part_id[PRT_2NxN][1] = PART_ID_2NxN_B;
+
+ /* Nx2N 2 partitions */
+ ge_part_type_to_part_id[PRT_Nx2N][0] = PART_ID_Nx2N_L;
+ ge_part_type_to_part_id[PRT_Nx2N][1] = PART_ID_Nx2N_R;
+
+ /* NxN 4 partitions */
+ ge_part_type_to_part_id[PRT_NxN][0] = PART_ID_NxN_TL;
+ ge_part_type_to_part_id[PRT_NxN][1] = PART_ID_NxN_TR;
+ ge_part_type_to_part_id[PRT_NxN][2] = PART_ID_NxN_BL;
+ ge_part_type_to_part_id[PRT_NxN][3] = PART_ID_NxN_BR;
+
+ /* AMP 2Nx (N/2 + 3N/2) 2 partitions */
+ ge_part_type_to_part_id[PRT_2NxnU][0] = PART_ID_2NxnU_T;
+ ge_part_type_to_part_id[PRT_2NxnU][1] = PART_ID_2NxnU_B;
+
+ /* AMP 2Nx (3N/2 + N/2) 2 partitions */
+ ge_part_type_to_part_id[PRT_2NxnD][0] = PART_ID_2NxnD_T;
+ ge_part_type_to_part_id[PRT_2NxnD][1] = PART_ID_2NxnD_B;
+
+ /* AMP (N/2 + 3N/2) x 2N 2 partitions */
+ ge_part_type_to_part_id[PRT_nLx2N][0] = PART_ID_nLx2N_L;
+ ge_part_type_to_part_id[PRT_nLx2N][1] = PART_ID_nLx2N_R;
+
+ /* AMP (3N/2 + N/2) x 2N 2 partitions */
+ ge_part_type_to_part_id[PRT_nRx2N][0] = PART_ID_nRx2N_L;
+ ge_part_type_to_part_id[PRT_nRx2N][1] = PART_ID_nRx2N_R;
+
+ /*************************************************************************/
+ /* initialize attributes for each partition id within the cu. */
+ /*************************************************************************/
+ {
+ part_attr_t *ps_part_attr;
+
+ ps_part_attr = &gas_part_attr_in_cu[PART_ID_2Nx2N];
+ ps_part_attr->u1_x_start = 0;
+ ps_part_attr->u1_y_start = 0;
+ ps_part_attr->u1_x_count = 8;
+ ps_part_attr->u1_y_count = 8;
+
+ ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxN_T];
+ ps_part_attr->u1_x_start = 0;
+ ps_part_attr->u1_y_start = 0;
+ ps_part_attr->u1_x_count = 8;
+ ps_part_attr->u1_y_count = 4;
+
+ ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxN_B];
+ ps_part_attr->u1_x_start = 0;
+ ps_part_attr->u1_y_start = 4;
+ ps_part_attr->u1_x_count = 8;
+ ps_part_attr->u1_y_count = 4;
+
+ ps_part_attr = &gas_part_attr_in_cu[PART_ID_Nx2N_L];
+ ps_part_attr->u1_x_start = 0;
+ ps_part_attr->u1_y_start = 0;
+ ps_part_attr->u1_x_count = 4;
+ ps_part_attr->u1_y_count = 8;
+
+ ps_part_attr = &gas_part_attr_in_cu[PART_ID_Nx2N_R];
+ ps_part_attr->u1_x_start = 4;
+ ps_part_attr->u1_y_start = 0;
+ ps_part_attr->u1_x_count = 4;
+ ps_part_attr->u1_y_count = 8;
+
+ ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_TL];
+ ps_part_attr->u1_x_start = 0;
+ ps_part_attr->u1_y_start = 0;
+ ps_part_attr->u1_x_count = 4;
+ ps_part_attr->u1_y_count = 4;
+
+ ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_TR];
+ ps_part_attr->u1_x_start = 4;
+ ps_part_attr->u1_y_start = 0;
+ ps_part_attr->u1_x_count = 4;
+ ps_part_attr->u1_y_count = 4;
+
+ ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_BL];
+ ps_part_attr->u1_x_start = 0;
+ ps_part_attr->u1_y_start = 4;
+ ps_part_attr->u1_x_count = 4;
+ ps_part_attr->u1_y_count = 4;
+
+ ps_part_attr = &gas_part_attr_in_cu[PART_ID_NxN_BR];
+ ps_part_attr->u1_x_start = 4;
+ ps_part_attr->u1_y_start = 4;
+ ps_part_attr->u1_x_count = 4;
+ ps_part_attr->u1_y_count = 4;
+
+ ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnU_T];
+ ps_part_attr->u1_x_start = 0;
+ ps_part_attr->u1_y_start = 0;
+ ps_part_attr->u1_x_count = 8;
+ ps_part_attr->u1_y_count = 2;
+
+ ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnU_B];
+ ps_part_attr->u1_x_start = 0;
+ ps_part_attr->u1_y_start = 2;
+ ps_part_attr->u1_x_count = 8;
+ ps_part_attr->u1_y_count = 6;
+
+ ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnD_T];
+ ps_part_attr->u1_x_start = 0;
+ ps_part_attr->u1_y_start = 0;
+ ps_part_attr->u1_x_count = 8;
+ ps_part_attr->u1_y_count = 6;
+
+ ps_part_attr = &gas_part_attr_in_cu[PART_ID_2NxnD_B];
+ ps_part_attr->u1_x_start = 0;
+ ps_part_attr->u1_y_start = 6;
+ ps_part_attr->u1_x_count = 8;
+ ps_part_attr->u1_y_count = 2;
+
+ ps_part_attr = &gas_part_attr_in_cu[PART_ID_nLx2N_L];
+ ps_part_attr->u1_x_start = 0;
+ ps_part_attr->u1_y_start = 0;
+ ps_part_attr->u1_x_count = 2;
+ ps_part_attr->u1_y_count = 8;
+
+ ps_part_attr = &gas_part_attr_in_cu[PART_ID_nLx2N_R];
+ ps_part_attr->u1_x_start = 2;
+ ps_part_attr->u1_y_start = 0;
+ ps_part_attr->u1_x_count = 6;
+ ps_part_attr->u1_y_count = 8;
+
+ ps_part_attr = &gas_part_attr_in_cu[PART_ID_nRx2N_L];
+ ps_part_attr->u1_x_start = 0;
+ ps_part_attr->u1_y_start = 0;
+ ps_part_attr->u1_x_count = 6;
+ ps_part_attr->u1_y_count = 8;
+
+ ps_part_attr = &gas_part_attr_in_cu[PART_ID_nRx2N_R];
+ ps_part_attr->u1_x_start = 6;
+ ps_part_attr->u1_y_start = 0;
+ ps_part_attr->u1_x_count = 2;
+ ps_part_attr->u1_y_count = 8;
+ }
+ for(i = 0; i < NUM_BLK_SIZES; i++)
+ ge_blk_size_to_cu_size[i] = CU_INVALID;
+
+ ge_blk_size_to_cu_size[BLK_8x8] = CU_8x8;
+ ge_blk_size_to_cu_size[BLK_16x16] = CU_16x16;
+ ge_blk_size_to_cu_size[BLK_32x32] = CU_32x32;
+ ge_blk_size_to_cu_size[BLK_64x64] = CU_64x64;
+
+ /* This is the reverse, given cU size, get blk size */
+ ge_cu_size_to_blk_size[CU_8x8] = BLK_8x8;
+ ge_cu_size_to_blk_size[CU_16x16] = BLK_16x16;
+ ge_cu_size_to_blk_size[CU_32x32] = BLK_32x32;
+ ge_cu_size_to_blk_size[CU_64x64] = BLK_64x64;
+
+ gau1_is_vert_part[PRT_2Nx2N] = 0;
+ gau1_is_vert_part[PRT_2NxN] = 0;
+ gau1_is_vert_part[PRT_Nx2N] = 1;
+ gau1_is_vert_part[PRT_NxN] = 1;
+ gau1_is_vert_part[PRT_2NxnU] = 0;
+ gau1_is_vert_part[PRT_2NxnD] = 0;
+ gau1_is_vert_part[PRT_nLx2N] = 1;
+ gau1_is_vert_part[PRT_nRx2N] = 1;
+
+ /* Initialise the number of best results for the full pell refinement */
+ gau1_num_best_results_PQ[PART_ID_2Nx2N] = 2;
+ gau1_num_best_results_PQ[PART_ID_2NxN_T] = 0;
+ gau1_num_best_results_PQ[PART_ID_2NxN_B] = 0;
+ gau1_num_best_results_PQ[PART_ID_Nx2N_L] = 0;
+ gau1_num_best_results_PQ[PART_ID_Nx2N_R] = 0;
+ gau1_num_best_results_PQ[PART_ID_NxN_TL] = 1;
+ gau1_num_best_results_PQ[PART_ID_NxN_TR] = 1;
+ gau1_num_best_results_PQ[PART_ID_NxN_BL] = 1;
+ gau1_num_best_results_PQ[PART_ID_NxN_BR] = 1;
+ gau1_num_best_results_PQ[PART_ID_2NxnU_T] = 1;
+ gau1_num_best_results_PQ[PART_ID_2NxnU_B] = 0;
+ gau1_num_best_results_PQ[PART_ID_2NxnD_T] = 0;
+ gau1_num_best_results_PQ[PART_ID_2NxnD_B] = 1;
+ gau1_num_best_results_PQ[PART_ID_nLx2N_L] = 1;
+ gau1_num_best_results_PQ[PART_ID_nLx2N_R] = 0;
+ gau1_num_best_results_PQ[PART_ID_nRx2N_L] = 0;
+ gau1_num_best_results_PQ[PART_ID_nRx2N_R] = 1;
+
+ gau1_num_best_results_HQ[PART_ID_2Nx2N] = 2;
+ gau1_num_best_results_HQ[PART_ID_2NxN_T] = 0;
+ gau1_num_best_results_HQ[PART_ID_2NxN_B] = 0;
+ gau1_num_best_results_HQ[PART_ID_Nx2N_L] = 0;
+ gau1_num_best_results_HQ[PART_ID_Nx2N_R] = 0;
+ gau1_num_best_results_HQ[PART_ID_NxN_TL] = 1;
+ gau1_num_best_results_HQ[PART_ID_NxN_TR] = 1;
+ gau1_num_best_results_HQ[PART_ID_NxN_BL] = 1;
+ gau1_num_best_results_HQ[PART_ID_NxN_BR] = 1;
+ gau1_num_best_results_HQ[PART_ID_2NxnU_T] = 1;
+ gau1_num_best_results_HQ[PART_ID_2NxnU_B] = 0;
+ gau1_num_best_results_HQ[PART_ID_2NxnD_T] = 0;
+ gau1_num_best_results_HQ[PART_ID_2NxnD_B] = 1;
+ gau1_num_best_results_HQ[PART_ID_nLx2N_L] = 1;
+ gau1_num_best_results_HQ[PART_ID_nLx2N_R] = 0;
+ gau1_num_best_results_HQ[PART_ID_nRx2N_L] = 0;
+ gau1_num_best_results_HQ[PART_ID_nRx2N_R] = 1;
+
+ gau1_num_best_results_MS[PART_ID_2Nx2N] = 2;
+ gau1_num_best_results_MS[PART_ID_2NxN_T] = 0;
+ gau1_num_best_results_MS[PART_ID_2NxN_B] = 0;
+ gau1_num_best_results_MS[PART_ID_Nx2N_L] = 0;
+ gau1_num_best_results_MS[PART_ID_Nx2N_R] = 0;
+ gau1_num_best_results_MS[PART_ID_NxN_TL] = 1;
+ gau1_num_best_results_MS[PART_ID_NxN_TR] = 1;
+ gau1_num_best_results_MS[PART_ID_NxN_BL] = 1;
+ gau1_num_best_results_MS[PART_ID_NxN_BR] = 1;
+ gau1_num_best_results_MS[PART_ID_2NxnU_T] = 1;
+ gau1_num_best_results_MS[PART_ID_2NxnU_B] = 0;
+ gau1_num_best_results_MS[PART_ID_2NxnD_T] = 0;
+ gau1_num_best_results_MS[PART_ID_2NxnD_B] = 1;
+ gau1_num_best_results_MS[PART_ID_nLx2N_L] = 1;
+ gau1_num_best_results_MS[PART_ID_nLx2N_R] = 0;
+ gau1_num_best_results_MS[PART_ID_nRx2N_L] = 0;
+ gau1_num_best_results_MS[PART_ID_nRx2N_R] = 1;
+
+ gau1_num_best_results_HS[PART_ID_2Nx2N] = 2;
+ gau1_num_best_results_HS[PART_ID_2NxN_T] = 0;
+ gau1_num_best_results_HS[PART_ID_2NxN_B] = 0;
+ gau1_num_best_results_HS[PART_ID_Nx2N_L] = 0;
+ gau1_num_best_results_HS[PART_ID_Nx2N_R] = 0;
+ gau1_num_best_results_HS[PART_ID_NxN_TL] = 0;
+ gau1_num_best_results_HS[PART_ID_NxN_TR] = 0;
+ gau1_num_best_results_HS[PART_ID_NxN_BL] = 0;
+ gau1_num_best_results_HS[PART_ID_NxN_BR] = 0;
+ gau1_num_best_results_HS[PART_ID_2NxnU_T] = 0;
+ gau1_num_best_results_HS[PART_ID_2NxnU_B] = 0;
+ gau1_num_best_results_HS[PART_ID_2NxnD_T] = 0;
+ gau1_num_best_results_HS[PART_ID_2NxnD_B] = 0;
+ gau1_num_best_results_HS[PART_ID_nLx2N_L] = 0;
+ gau1_num_best_results_HS[PART_ID_nLx2N_R] = 0;
+ gau1_num_best_results_HS[PART_ID_nRx2N_L] = 0;
+ gau1_num_best_results_HS[PART_ID_nRx2N_R] = 0;
+
+ gau1_num_best_results_XS[PART_ID_2Nx2N] = 2;
+ gau1_num_best_results_XS[PART_ID_2NxN_T] = 0;
+ gau1_num_best_results_XS[PART_ID_2NxN_B] = 0;
+ gau1_num_best_results_XS[PART_ID_Nx2N_L] = 0;
+ gau1_num_best_results_XS[PART_ID_Nx2N_R] = 0;
+ gau1_num_best_results_XS[PART_ID_NxN_TL] = 0;
+ gau1_num_best_results_XS[PART_ID_NxN_TR] = 0;
+ gau1_num_best_results_XS[PART_ID_NxN_BL] = 0;
+ gau1_num_best_results_XS[PART_ID_NxN_BR] = 0;
+ gau1_num_best_results_XS[PART_ID_2NxnU_T] = 0;
+ gau1_num_best_results_XS[PART_ID_2NxnU_B] = 0;
+ gau1_num_best_results_XS[PART_ID_2NxnD_T] = 0;
+ gau1_num_best_results_XS[PART_ID_2NxnD_B] = 0;
+ gau1_num_best_results_XS[PART_ID_nLx2N_L] = 0;
+ gau1_num_best_results_XS[PART_ID_nLx2N_R] = 0;
+ gau1_num_best_results_XS[PART_ID_nRx2N_L] = 0;
+ gau1_num_best_results_XS[PART_ID_nRx2N_R] = 0;
+
+ gau1_num_best_results_XS25[PART_ID_2Nx2N] = MAX_NUM_CANDS_FOR_FPEL_REFINE_IN_XS25;
+ gau1_num_best_results_XS25[PART_ID_2NxN_T] = 0;
+ gau1_num_best_results_XS25[PART_ID_2NxN_B] = 0;
+ gau1_num_best_results_XS25[PART_ID_Nx2N_L] = 0;
+ gau1_num_best_results_XS25[PART_ID_Nx2N_R] = 0;
+ gau1_num_best_results_XS25[PART_ID_NxN_TL] = 0;
+ gau1_num_best_results_XS25[PART_ID_NxN_TR] = 0;
+ gau1_num_best_results_XS25[PART_ID_NxN_BL] = 0;
+ gau1_num_best_results_XS25[PART_ID_NxN_BR] = 0;
+ gau1_num_best_results_XS25[PART_ID_2NxnU_T] = 0;
+ gau1_num_best_results_XS25[PART_ID_2NxnU_B] = 0;
+ gau1_num_best_results_XS25[PART_ID_2NxnD_T] = 0;
+ gau1_num_best_results_XS25[PART_ID_2NxnD_B] = 0;
+ gau1_num_best_results_XS25[PART_ID_nLx2N_L] = 0;
+ gau1_num_best_results_XS25[PART_ID_nLx2N_R] = 0;
+ gau1_num_best_results_XS25[PART_ID_nRx2N_L] = 0;
+ gau1_num_best_results_XS25[PART_ID_nRx2N_R] = 0;
+
+ /* Top right validity for each part id */
+ gau1_partid_tr_valid[PART_ID_2Nx2N] = 1;
+ gau1_partid_tr_valid[PART_ID_2NxN_T] = 1;
+ gau1_partid_tr_valid[PART_ID_2NxN_B] = 0;
+ gau1_partid_tr_valid[PART_ID_Nx2N_L] = 1;
+ gau1_partid_tr_valid[PART_ID_Nx2N_R] = 1;
+ gau1_partid_tr_valid[PART_ID_NxN_TL] = 1;
+ gau1_partid_tr_valid[PART_ID_NxN_TR] = 1;
+ gau1_partid_tr_valid[PART_ID_NxN_BL] = 1;
+ gau1_partid_tr_valid[PART_ID_NxN_BR] = 0;
+ gau1_partid_tr_valid[PART_ID_2NxnU_T] = 1;
+ gau1_partid_tr_valid[PART_ID_2NxnU_B] = 0;
+ gau1_partid_tr_valid[PART_ID_2NxnD_T] = 1;
+ gau1_partid_tr_valid[PART_ID_2NxnD_B] = 0;
+ gau1_partid_tr_valid[PART_ID_nLx2N_L] = 1;
+ gau1_partid_tr_valid[PART_ID_nLx2N_R] = 1;
+ gau1_partid_tr_valid[PART_ID_nRx2N_L] = 1;
+ gau1_partid_tr_valid[PART_ID_nRx2N_R] = 1;
+
+ /* Bot Left validity for each part id */
+ gau1_partid_bl_valid[PART_ID_2Nx2N] = 1;
+ gau1_partid_bl_valid[PART_ID_2NxN_T] = 1;
+ gau1_partid_bl_valid[PART_ID_2NxN_B] = 1;
+ gau1_partid_bl_valid[PART_ID_Nx2N_L] = 1;
+ gau1_partid_bl_valid[PART_ID_Nx2N_R] = 0;
+ gau1_partid_bl_valid[PART_ID_NxN_TL] = 1;
+ gau1_partid_bl_valid[PART_ID_NxN_TR] = 0;
+ gau1_partid_bl_valid[PART_ID_NxN_BL] = 1;
+ gau1_partid_bl_valid[PART_ID_NxN_BR] = 0;
+ gau1_partid_bl_valid[PART_ID_2NxnU_T] = 1;
+ gau1_partid_bl_valid[PART_ID_2NxnU_B] = 1;
+ gau1_partid_bl_valid[PART_ID_2NxnD_T] = 1;
+ gau1_partid_bl_valid[PART_ID_2NxnD_B] = 1;
+ gau1_partid_bl_valid[PART_ID_nLx2N_L] = 1;
+ gau1_partid_bl_valid[PART_ID_nLx2N_R] = 0;
+ gau1_partid_bl_valid[PART_ID_nRx2N_L] = 1;
+ gau1_partid_bl_valid[PART_ID_nRx2N_R] = 0;
+
+ /*Part id to part num of this partition id in the CU */
+ gau1_part_id_to_part_num[PART_ID_2Nx2N] = 0;
+ gau1_part_id_to_part_num[PART_ID_2NxN_T] = 0;
+ gau1_part_id_to_part_num[PART_ID_2NxN_B] = 1;
+ gau1_part_id_to_part_num[PART_ID_Nx2N_L] = 0;
+ gau1_part_id_to_part_num[PART_ID_Nx2N_R] = 1;
+ gau1_part_id_to_part_num[PART_ID_NxN_TL] = 0;
+ gau1_part_id_to_part_num[PART_ID_NxN_TR] = 1;
+ gau1_part_id_to_part_num[PART_ID_NxN_BL] = 2;
+ gau1_part_id_to_part_num[PART_ID_NxN_BR] = 3;
+ gau1_part_id_to_part_num[PART_ID_2NxnU_T] = 0;
+ gau1_part_id_to_part_num[PART_ID_2NxnU_B] = 1;
+ gau1_part_id_to_part_num[PART_ID_2NxnD_T] = 0;
+ gau1_part_id_to_part_num[PART_ID_2NxnD_B] = 1;
+ gau1_part_id_to_part_num[PART_ID_nLx2N_L] = 0;
+ gau1_part_id_to_part_num[PART_ID_nLx2N_R] = 1;
+ gau1_part_id_to_part_num[PART_ID_nRx2N_L] = 0;
+ gau1_part_id_to_part_num[PART_ID_nRx2N_R] = 1;
+
+ /*Which partition type does this partition id belong to */
+ ge_part_id_to_part_type[PART_ID_2Nx2N] = PRT_2Nx2N;
+ ge_part_id_to_part_type[PART_ID_2NxN_T] = PRT_2NxN;
+ ge_part_id_to_part_type[PART_ID_2NxN_B] = PRT_2NxN;
+ ge_part_id_to_part_type[PART_ID_Nx2N_L] = PRT_Nx2N;
+ ge_part_id_to_part_type[PART_ID_Nx2N_R] = PRT_Nx2N;
+ ge_part_id_to_part_type[PART_ID_NxN_TL] = PRT_NxN;
+ ge_part_id_to_part_type[PART_ID_NxN_TR] = PRT_NxN;
+ ge_part_id_to_part_type[PART_ID_NxN_BL] = PRT_NxN;
+ ge_part_id_to_part_type[PART_ID_NxN_BR] = PRT_NxN;
+ ge_part_id_to_part_type[PART_ID_2NxnU_T] = PRT_2NxnU;
+ ge_part_id_to_part_type[PART_ID_2NxnU_B] = PRT_2NxnU;
+ ge_part_id_to_part_type[PART_ID_2NxnD_T] = PRT_2NxnD;
+ ge_part_id_to_part_type[PART_ID_2NxnD_B] = PRT_2NxnD;
+ ge_part_id_to_part_type[PART_ID_nLx2N_L] = PRT_nLx2N;
+ ge_part_id_to_part_type[PART_ID_nLx2N_R] = PRT_nLx2N;
+ ge_part_id_to_part_type[PART_ID_nRx2N_L] = PRT_nRx2N;
+ ge_part_id_to_part_type[PART_ID_nRx2N_R] = PRT_nRx2N;
+
+ /*************************************************************************/
+ /* Set up the bits to be taken up for the part type. This is equally */
+ /* divided up between the various partitions in the part-type. */
+ /* For NxN @ CU 16x16, we assume it as CU 8x8, so consider it as */
+ /* partition 2Nx2N. */
+ /*************************************************************************/
+ /* 1 bit for 2Nx2N partition */
+ gau1_bits_for_part_id_q1[PART_ID_2Nx2N] = 2;
+
+ /* 3 bits for symmetric part types, so 1.5 bits per partition */
+ gau1_bits_for_part_id_q1[PART_ID_2NxN_T] = 3;
+ gau1_bits_for_part_id_q1[PART_ID_2NxN_B] = 3;
+ gau1_bits_for_part_id_q1[PART_ID_Nx2N_L] = 3;
+ gau1_bits_for_part_id_q1[PART_ID_Nx2N_R] = 3;
+
+ /* 1 bit for NxN partitions, assuming these to be 2Nx2N CUs of lower level */
+ gau1_bits_for_part_id_q1[PART_ID_NxN_TL] = 2;
+ gau1_bits_for_part_id_q1[PART_ID_NxN_TR] = 2;
+ gau1_bits_for_part_id_q1[PART_ID_NxN_BL] = 2;
+ gau1_bits_for_part_id_q1[PART_ID_NxN_BR] = 2;
+
+ /* 4 bits for AMP so 2 bits per partition */
+ gau1_bits_for_part_id_q1[PART_ID_2NxnU_T] = 4;
+ gau1_bits_for_part_id_q1[PART_ID_2NxnU_B] = 4;
+ gau1_bits_for_part_id_q1[PART_ID_2NxnD_T] = 4;
+ gau1_bits_for_part_id_q1[PART_ID_2NxnD_B] = 4;
+ gau1_bits_for_part_id_q1[PART_ID_nLx2N_L] = 4;
+ gau1_bits_for_part_id_q1[PART_ID_nLx2N_R] = 4;
+ gau1_bits_for_part_id_q1[PART_ID_nRx2N_L] = 4;
+ gau1_bits_for_part_id_q1[PART_ID_nRx2N_R] = 4;
+}
+
+/**
+********************************************************************************
+* @fn hme_enc_num_alloc()
+*
+* @brief returns number of memtabs that is required by hme module
+*
+* @return Number of memtabs required
+********************************************************************************
+*/
+S32 hme_enc_num_alloc(WORD32 i4_num_me_frm_pllel)
+{
+ if(i4_num_me_frm_pllel > 1)
+ {
+ return ((S32)MAX_HME_ENC_TOT_MEMTABS);
+ }
+ else
+ {
+ return ((S32)MIN_HME_ENC_TOT_MEMTABS);
+ }
+}
+
+/**
+********************************************************************************
+* @fn hme_coarse_num_alloc()
+*
+* @brief returns number of memtabs that is required by hme module
+*
+* @return Number of memtabs required
+********************************************************************************
+*/
+S32 hme_coarse_num_alloc()
+{
+ return ((S32)HME_COARSE_TOT_MEMTABS);
+}
+
+/**
+********************************************************************************
+* @fn hme_coarse_dep_mngr_num_alloc()
+*
+* @brief returns number of memtabs that is required by Dep Mngr for hme module
+*
+* @return Number of memtabs required
+********************************************************************************
+*/
+WORD32 hme_coarse_dep_mngr_num_alloc()
+{
+ return ((WORD32)((MAX_NUM_HME_LAYERS - 1) * ihevce_dmgr_get_num_mem_recs()));
+}
+
+S32 hme_validate_init_prms(hme_init_prms_t *ps_prms)
+{
+ S32 n_layers = ps_prms->num_simulcast_layers;
+
+ /* The final layer has got to be a non encode coarse layer */
+ if(n_layers > (MAX_NUM_LAYERS - 1))
+ return (-1);
+
+ if(n_layers < 1)
+ return (-1);
+
+ /* Width of the coarsest encode layer got to be >= 2*min_wd where min_Wd */
+ /* represents the min allowed width in any layer. Ditto with ht */
+ if(ps_prms->a_wd[n_layers - 1] < 2 * (MIN_WD_COARSE))
+ return (-1);
+ if(ps_prms->a_ht[n_layers - 1] < 2 * (MIN_HT_COARSE))
+ return (-1);
+ if(ps_prms->max_num_ref > MAX_NUM_REF)
+ return (-1);
+ if(ps_prms->max_num_ref < 0)
+ return (-1);
+
+ return (0);
+}
+void hme_set_layer_res_attrs(
+ layer_ctxt_t *ps_layer, S32 wd, S32 ht, S32 disp_wd, S32 disp_ht, U08 u1_enc)
+{
+ ps_layer->i4_wd = wd;
+ ps_layer->i4_ht = ht;
+ ps_layer->i4_disp_wd = disp_wd;
+ ps_layer->i4_disp_ht = disp_ht;
+ if(0 == u1_enc)
+ {
+ ps_layer->i4_inp_stride = wd + 32 + 4;
+ ps_layer->i4_inp_offset = (ps_layer->i4_inp_stride * 16) + 16;
+ ps_layer->i4_pad_x_inp = 16;
+ ps_layer->i4_pad_y_inp = 16;
+ ps_layer->pu1_inp = ps_layer->pu1_inp_base + ps_layer->i4_inp_offset;
+ }
+}
+
+/**
+********************************************************************************
+* @fn hme_coarse_get_layer1_mv_bank_ref_idx_size()
+*
+* @brief returns the MV bank and ref idx size of Layer 1 (penultimate)
+*
+* @return none
+********************************************************************************
+*/
+void hme_coarse_get_layer1_mv_bank_ref_idx_size(
+ S32 n_tot_layers,
+ S32 *a_wd,
+ S32 *a_ht,
+ S32 max_num_ref,
+ S32 *pi4_mv_bank_size,
+ S32 *pi4_ref_idx_size)
+{
+ S32 num_blks, num_mvs_per_blk, num_ref;
+ S32 num_cols, num_rows, num_mvs_per_row;
+ S32 is_explicit_store = 1;
+ S32 wd, ht, num_layers_explicit_search;
+ S32 num_results, use_4x4;
+ wd = a_wd[1];
+ ht = a_ht[1];
+
+ /* Assuming abt 4 layers for 1080p, we do explicit search across all ref */
+ /* frames in all but final layer In final layer, it could be 1/2 */
+ //ps_hme_init_prms->num_layers_explicit_search = 3;
+ num_layers_explicit_search = 3;
+
+ if(num_layers_explicit_search <= 0)
+ num_layers_explicit_search = n_tot_layers - 1;
+
+ num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
+
+ /* Possibly implicit search for lower (finer) layers */
+ if(n_tot_layers - 1 > num_layers_explicit_search)
+ is_explicit_store = 0;
+
+ /* coarsest layer alwasy uses 4x4 blks to store results */
+ if(1 == (n_tot_layers - 1))
+ {
+ /* we store 4 results in coarsest layer per blk. 8x4L, 8x4R, 4x8T, 4x8B */
+ //ps_hme_init_prms->max_num_results_coarse = 4;
+ //vijay : with new algo in coarseset layer this has to be revisited
+ num_results = 4;
+ }
+ else
+ {
+ /* Every refinement layer stores a max of 2 results per partition */
+ //ps_hme_init_prms->max_num_results = 2;
+ num_results = 2;
+ }
+ use_4x4 = hme_get_mv_blk_size(1, 1, n_tot_layers, 0);
+
+ num_cols = use_4x4 ? ((wd >> 2) + 2) : ((wd >> 3) + 2);
+ num_rows = use_4x4 ? ((ht >> 2) + 2) : ((ht >> 3) + 2);
+
+ if(is_explicit_store)
+ num_ref = max_num_ref;
+ else
+ num_ref = 2;
+
+ num_blks = num_cols * num_rows;
+ num_mvs_per_blk = num_ref * num_results;
+ num_mvs_per_row = num_mvs_per_blk * num_cols;
+
+ /* stroe the sizes */
+ *pi4_mv_bank_size = num_blks * num_mvs_per_blk * sizeof(hme_mv_t);
+ *pi4_ref_idx_size = num_blks * num_mvs_per_blk * sizeof(S08);
+
+ return;
+}
+/**
+********************************************************************************
+* @fn hme_alloc_init_layer_mv_bank()
+*
+* @brief memory alloc and init function for MV bank
+*
+* @return Number of memtabs required
+********************************************************************************
+*/
+S32 hme_alloc_init_layer_mv_bank(
+ hme_memtab_t *ps_memtab,
+ S32 max_num_results,
+ S32 max_num_ref,
+ S32 use_4x4,
+ S32 mem_avail,
+ S32 u1_enc,
+ S32 wd,
+ S32 ht,
+ S32 is_explicit_store,
+ hme_mv_t **pps_mv_base,
+ S08 **pi1_ref_idx_base,
+ S32 *pi4_num_mvs_per_row)
+{
+ S32 count = 0;
+ S32 size;
+ S32 num_blks, num_mvs_per_blk;
+ S32 num_ref;
+ S32 num_cols, num_rows, num_mvs_per_row;
+
+ if(is_explicit_store)
+ num_ref = max_num_ref;
+ else
+ num_ref = 2;
+
+ /* MV Bank allocation takes into consideration following */
+ /* number of results per reference x max num refrences is the amount */
+ /* bufffered up per blk. Numbero f blks in pic deps on the blk size, */
+ /* which could be either 4x4 or 8x8. */
+ num_cols = use_4x4 ? ((wd >> 2) + 2) : ((wd >> 3) + 2);
+ num_rows = use_4x4 ? ((ht >> 2) + 2) : ((ht >> 3) + 2);
+
+ if(u1_enc)
+ {
+ /* TODO: CTB64x64 is assumed. FIX according to actual CTB */
+ WORD32 num_ctb_cols = ((wd + 63) >> 6);
+ WORD32 num_ctb_rows = ((ht + 63) >> 6);
+
+ num_cols = (num_ctb_cols << 3) + 2;
+ num_rows = (num_ctb_rows << 3) + 2;
+ }
+ num_blks = num_cols * num_rows;
+ num_mvs_per_blk = num_ref * max_num_results;
+ num_mvs_per_row = num_mvs_per_blk * num_cols;
+
+ size = num_blks * num_mvs_per_blk * sizeof(hme_mv_t);
+ if(mem_avail)
+ {
+ /* store this for run time verifications */
+ *pi4_num_mvs_per_row = num_mvs_per_row;
+ ASSERT(ps_memtab[count].size == size);
+ *pps_mv_base = (hme_mv_t *)ps_memtab[count].pu1_mem;
+ }
+ else
+ {
+ ps_memtab[count].size = size;
+ ps_memtab[count].align = 4;
+ ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
+ }
+
+ count++;
+ /* Ref idx takes the same route as mvbase */
+
+ size = num_blks * num_mvs_per_blk * sizeof(S08);
+ if(mem_avail)
+ {
+ ASSERT(ps_memtab[count].size == size);
+ *pi1_ref_idx_base = (S08 *)ps_memtab[count].pu1_mem;
+ }
+ else
+ {
+ ps_memtab[count].size = size;
+ ps_memtab[count].align = 4;
+ ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
+ }
+ count++;
+
+ return (count);
+}
+/**
+********************************************************************************
+* @fn hme_alloc_init_layer()
+*
+* @brief memory alloc and init function
+*
+* @return Number of memtabs required
+********************************************************************************
+*/
+S32 hme_alloc_init_layer(
+ hme_memtab_t *ps_memtab,
+ S32 max_num_results,
+ S32 max_num_ref,
+ S32 use_4x4,
+ S32 mem_avail,
+ S32 u1_enc,
+ S32 wd,
+ S32 ht,
+ S32 disp_wd,
+ S32 disp_ht,
+ S32 segment_layer,
+ S32 is_explicit_store,
+ layer_ctxt_t **pps_layer)
+{
+ S32 count = 0;
+ layer_ctxt_t *ps_layer = NULL;
+ S32 size;
+ S32 num_ref;
+
+ ARG_NOT_USED(segment_layer);
+
+ if(is_explicit_store)
+ num_ref = max_num_ref;
+ else
+ num_ref = 2;
+
+ /* We do not store 4x4 results for encoding layers */
+ if(u1_enc)
+ use_4x4 = 0;
+
+ size = sizeof(layer_ctxt_t);
+ if(mem_avail)
+ {
+ ASSERT(ps_memtab[count].size == size);
+ ps_layer = (layer_ctxt_t *)ps_memtab[count].pu1_mem;
+ *pps_layer = ps_layer;
+ }
+ else
+ {
+ ps_memtab[count].size = size;
+ ps_memtab[count].align = 8;
+ ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
+ }
+
+ count++;
+
+ /* Input luma buffer allocated only for non encode case */
+ if(0 == u1_enc)
+ {
+ /* Allocate input with padding of 16 pixels */
+ size = (wd + 32 + 4) * (ht + 32 + 4);
+ if(mem_avail)
+ {
+ ASSERT(ps_memtab[count].size == size);
+ ps_layer->pu1_inp_base = ps_memtab[count].pu1_mem;
+ }
+ else
+ {
+ ps_memtab[count].size = size;
+ ps_memtab[count].align = 16;
+ ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
+ }
+ count++;
+ }
+
+ /* Allocate memory or just the layer mvbank strcture. */
+ /* TODO : see if this can be removed by moving it to layer_ctxt */
+ size = sizeof(layer_mv_t);
+
+ if(mem_avail)
+ {
+ ASSERT(ps_memtab[count].size == size);
+ ps_layer->ps_layer_mvbank = (layer_mv_t *)ps_memtab[count].pu1_mem;
+ }
+ else
+ {
+ ps_memtab[count].size = size;
+ ps_memtab[count].align = 8;
+ ps_memtab[count].e_mem_attr = HME_PERSISTENT_MEM;
+ }
+
+ count++;
+
+ if(mem_avail)
+ {
+ hme_set_layer_res_attrs(ps_layer, wd, ht, disp_wd, disp_ht, u1_enc);
+ }
+
+ return (count);
+}
+
+S32 hme_alloc_init_search_nodes(
+ search_results_t *ps_search_results,
+ hme_memtab_t *ps_memtabs,
+ S32 mem_avail,
+ S32 max_num_ref,
+ S32 max_num_results)
+{
+ S32 size = max_num_results * sizeof(search_node_t) * max_num_ref * TOT_NUM_PARTS;
+ S32 j, k;
+ search_node_t *ps_search_node;
+
+ if(mem_avail == 0)
+ {
+ ps_memtabs->size = size;
+ ps_memtabs->align = 4;
+ ps_memtabs->e_mem_attr = HME_SCRATCH_OVLY_MEM;
+ return (1);
+ }
+
+ ps_search_node = (search_node_t *)ps_memtabs->pu1_mem;
+ ASSERT(ps_memtabs->size == size);
+ /****************************************************************************/
+ /* For each CU, we search and store N best results, per partition, per ref */
+ /* So, number of memtabs is num_refs * num_parts */
+ /****************************************************************************/
+ for(j = 0; j < max_num_ref; j++)
+ {
+ for(k = 0; k < TOT_NUM_PARTS; k++)
+ {
+ ps_search_results->aps_part_results[j][k] = ps_search_node;
+ ps_search_node += max_num_results;
+ }
+ }
+ return (1);
+}
+
+S32 hme_derive_num_layers(S32 n_enc_layers, S32 *p_wd, S32 *p_ht, S32 *p_disp_wd, S32 *p_disp_ht)
+{
+ S32 i;
+ /* We keep downscaling by 2 till we hit one of the conditions: */
+ /* 1. MAX_NUM_LAYERS reached. */
+ /* 2. Width or ht goes below min width and ht allowed at coarsest layer */
+ ASSERT(n_enc_layers < MAX_NUM_LAYERS);
+ ASSERT(n_enc_layers > 0);
+ ASSERT(p_wd[0] <= HME_MAX_WIDTH);
+ ASSERT(p_ht[0] <= HME_MAX_HEIGHT);
+
+ p_disp_wd[0] = p_wd[0];
+ p_disp_ht[0] = p_ht[0];
+ /*************************************************************************/
+ /* Verify that for simulcast, lower layer to higher layer ratio is bet */
+ /* 2 (dyadic) and 1.33. Typically it should be 1.5. */
+ /* TODO : for interlace, we may choose to have additional downscaling for*/
+ /* width alone in coarsest layer to next layer. */
+ /*************************************************************************/
+ for(i = 1; i < n_enc_layers; i++)
+ {
+ S32 wd1, wd2, ht1, ht2;
+ wd1 = FLOOR16(p_wd[i - 1] >> 1);
+ wd2 = CEIL16((p_wd[i - 1] * 3) >> 2);
+ ASSERT(p_wd[i] >= wd1);
+ ASSERT(p_wd[i] <= wd2);
+ ht1 = FLOOR16(p_ht[i - 1] >> 1);
+ ht2 = CEIL16((p_ht[i - 1] * 3) >> 2);
+ ASSERT(p_ht[i] >= ht1);
+ ASSERT(p_ht[i] <= ht2);
+ }
+ ASSERT(p_wd[n_enc_layers - 1] >= 2 * MIN_WD_COARSE);
+ ASSERT(p_ht[n_enc_layers - 1] >= 2 * MIN_HT_COARSE);
+
+ for(i = n_enc_layers; i < MAX_NUM_LAYERS; i++)
+ {
+ if((p_wd[i - 1] < 2 * MIN_WD_COARSE) || (p_ht[i - 1] < 2 * MIN_HT_COARSE))
+ {
+ return (i);
+ }
+ /* Use CEIL16 to facilitate 16x16 searches in future, or to do */
+ /* segmentation study in future */
+ p_wd[i] = CEIL16(p_wd[i - 1] >> 1);
+ p_ht[i] = CEIL16(p_ht[i - 1] >> 1);
+
+ p_disp_wd[i] = p_disp_wd[i - 1] >> 1;
+ p_disp_ht[i] = p_disp_ht[i - 1] >> 1;
+ }
+ return (i);
+}
+
+/**
+********************************************************************************
+* @fn hme_get_mv_blk_size()
+*
+* @brief returns whether blk uses 4x4 size or something else.
+*
+* @param[in] enable_4x4 : input param from application to enable 4x4
+*
+* @param[in] layer_id : id of current layer (0 finest)
+*
+* @param[in] num_layeers : total num layers
+*
+* @param[in] is_enc : Whether encoding enabled for layer
+*
+* @return 1 for 4x4 blks, 0 for 8x8
+********************************************************************************
+*/
+S32 hme_get_mv_blk_size(S32 enable_4x4, S32 layer_id, S32 num_layers, S32 is_enc)
+{
+ S32 use_4x4 = enable_4x4;
+
+ if((layer_id <= 1) && (num_layers >= 4))
+ use_4x4 = USE_4x4_IN_L1;
+ if(layer_id == num_layers - 1)
+ use_4x4 = 1;
+ if(is_enc)
+ use_4x4 = 0;
+
+ return (use_4x4);
+}
+
+/**
+********************************************************************************
+* @fn hme_enc_alloc_init_mem()
+*
+* @brief Requests/ assign memory based on mem avail
+*
+* @param[in] ps_memtabs : memtab array
+*
+* @param[in] ps_prms : init prms
+*
+* @param[in] pv_ctxt : ME ctxt
+*
+* @param[in] mem_avail : request/assign flag
+*
+* @return 1 for 4x4 blks, 0 for 8x8
+********************************************************************************
+*/
+S32 hme_enc_alloc_init_mem(
+ hme_memtab_t *ps_memtabs,
+ hme_init_prms_t *ps_prms,
+ void *pv_ctxt,
+ S32 mem_avail,
+ S32 i4_num_me_frm_pllel)
+{
+ me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_ctxt;
+ me_ctxt_t *ps_ctxt;
+ S32 count = 0, size, i, j, use_4x4;
+ S32 n_tot_layers, n_enc_layers;
+ S32 num_layers_explicit_search;
+ S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
+ S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
+ S32 num_results;
+ S32 num_thrds;
+ S32 ctb_wd = 1 << ps_prms->log_ctb_size;
+
+ /* MV bank changes */
+ hme_mv_t *aps_mv_bank[((DEFAULT_MAX_REFERENCE_PICS << 1) * MAX_NUM_ME_PARALLEL) + 1] = { NULL };
+ S32 i4_num_mvs_per_row = 0;
+ S08 *api1_ref_idx[((DEFAULT_MAX_REFERENCE_PICS << 1) * MAX_NUM_ME_PARALLEL) + 1] = { NULL };
+
+ n_enc_layers = ps_prms->num_simulcast_layers;
+
+ /* Memtab 0: handle */
+ size = sizeof(me_master_ctxt_t);
+ if(mem_avail)
+ {
+ /* store the number of processing threads */
+ ps_master_ctxt->i4_num_proc_thrds = ps_prms->i4_num_proc_thrds;
+ }
+ else
+ {
+ ps_memtabs[count].size = size;
+ ps_memtabs[count].align = 8;
+ ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
+ }
+
+ count++;
+
+ /* Memtab 1: ME threads ctxt */
+ size = ps_prms->i4_num_proc_thrds * sizeof(me_ctxt_t);
+ if(mem_avail)
+ {
+ me_ctxt_t *ps_me_tmp_ctxt = (me_ctxt_t *)ps_memtabs[count].pu1_mem;
+
+ /* store the indivisual thread ctxt pointers */
+ for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
+ {
+ ps_master_ctxt->aps_me_ctxt[num_thrds] = ps_me_tmp_ctxt++;
+ }
+ }
+ else
+ {
+ ps_memtabs[count].size = size;
+ ps_memtabs[count].align = 8;
+ ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
+ }
+
+ count++;
+
+ /* Memtab 2: ME frame ctxts */
+ size = sizeof(me_frm_ctxt_t) * MAX_NUM_ME_PARALLEL * ps_prms->i4_num_proc_thrds;
+ if(mem_avail)
+ {
+ me_frm_ctxt_t *ps_me_frm_tmp_ctxt = (me_frm_ctxt_t *)ps_memtabs[count].pu1_mem;
+
+ for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
+ {
+ /* store the indivisual thread ctxt pointers */
+ for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
+ {
+ ps_master_ctxt->aps_me_ctxt[num_thrds]->aps_me_frm_prms[i] = ps_me_frm_tmp_ctxt;
+
+ ps_me_frm_tmp_ctxt++;
+ }
+ }
+ }
+ else
+ {
+ ps_memtabs[count].size = size;
+ ps_memtabs[count].align = 8;
+ ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
+ }
+
+ count++;
+
+ memcpy(a_wd, ps_prms->a_wd, sizeof(S32) * ps_prms->num_simulcast_layers);
+ memcpy(a_ht, ps_prms->a_ht, sizeof(S32) * ps_prms->num_simulcast_layers);
+ /*************************************************************************/
+ /* Derive the number of HME layers, including both encoded and non encode*/
+ /* This function also derives the width and ht of each layer. */
+ /*************************************************************************/
+ n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
+ num_layers_explicit_search = ps_prms->num_layers_explicit_search;
+ if(num_layers_explicit_search <= 0)
+ num_layers_explicit_search = n_tot_layers - 1;
+
+ num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
+
+ if(mem_avail)
+ {
+ for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
+ {
+ me_frm_ctxt_t *ps_frm_ctxt;
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+
+ for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
+ {
+ ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
+
+ memset(ps_frm_ctxt->u1_encode, 0, n_tot_layers);
+ memset(ps_frm_ctxt->u1_encode, 1, n_enc_layers);
+
+ /* only one enocde layer is used */
+ ps_frm_ctxt->num_layers = 1;
+
+ ps_frm_ctxt->i4_wd = a_wd[0];
+ ps_frm_ctxt->i4_ht = a_ht[0];
+ /*
+ memcpy(ps_ctxt->a_wd, a_wd, sizeof(S32)*n_tot_layers);
+ memcpy(ps_ctxt->a_ht, a_ht, sizeof(S32)*n_tot_layers);
+*/
+ ps_frm_ctxt->num_layers_explicit_search = num_layers_explicit_search;
+ ps_frm_ctxt->max_num_results = ps_prms->max_num_results;
+ ps_frm_ctxt->max_num_results_coarse = ps_prms->max_num_results_coarse;
+ ps_frm_ctxt->max_num_ref = ps_prms->max_num_ref;
+ }
+ }
+ }
+
+ /* Memtabs : Layers MV bank for encode layer */
+ /* Each ref_desr in master ctxt will have seperate layer ctxt */
+
+ for(i = 0; i < (ps_prms->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
+ {
+ for(j = 0; j < 1; j++)
+ {
+ S32 is_explicit_store = 1;
+ S32 wd, ht;
+ U08 u1_enc = 1;
+ wd = a_wd[j];
+ ht = a_ht[j];
+
+ /* Possibly implicit search for lower (finer) layers */
+ if(n_tot_layers - j > num_layers_explicit_search)
+ is_explicit_store = 0;
+
+ /* Even if explicit search, we store only 2 results (L0 and L1) */
+ /* in finest layer */
+ if(j == 0)
+ {
+ is_explicit_store = 0;
+ }
+
+ /* coarsest layer alwasy uses 4x4 blks to store results */
+ if(j == n_tot_layers - 1)
+ {
+ num_results = ps_prms->max_num_results_coarse;
+ }
+ else
+ {
+ num_results = ps_prms->max_num_results;
+ if(j == 0)
+ num_results = 1;
+ }
+ use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
+
+ count += hme_alloc_init_layer_mv_bank(
+ &ps_memtabs[count],
+ num_results,
+ ps_prms->max_num_ref,
+ use_4x4,
+ mem_avail,
+ u1_enc,
+ wd,
+ ht,
+ is_explicit_store,
+ &aps_mv_bank[i],
+ &api1_ref_idx[i],
+ &i4_num_mvs_per_row);
+ }
+ }
+
+ /* Memtabs : Layers * num-ref + 1 */
+ for(i = 0; i < (ps_prms->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
+ {
+ /* layer memory allocated only for enocde layer */
+ for(j = 0; j < 1; j++)
+ {
+ layer_ctxt_t *ps_layer;
+ S32 is_explicit_store = 1;
+ S32 segment_this_layer = (j == 0) ? 1 : ps_prms->segment_higher_layers;
+ S32 wd, ht;
+ U08 u1_enc = 1;
+ wd = a_wd[j];
+ ht = a_ht[j];
+
+ /* Possibly implicit search for lower (finer) layers */
+ if(n_tot_layers - j > num_layers_explicit_search)
+ is_explicit_store = 0;
+
+ /* Even if explicit search, we store only 2 results (L0 and L1) */
+ /* in finest layer */
+ if(j == 0)
+ {
+ is_explicit_store = 0;
+ }
+
+ /* coarsest layer alwasy uses 4x4 blks to store results */
+ if(j == n_tot_layers - 1)
+ {
+ num_results = ps_prms->max_num_results_coarse;
+ }
+ else
+ {
+ num_results = ps_prms->max_num_results;
+ if(j == 0)
+ num_results = 1;
+ }
+ use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
+
+ count += hme_alloc_init_layer(
+ &ps_memtabs[count],
+ num_results,
+ ps_prms->max_num_ref,
+ use_4x4,
+ mem_avail,
+ u1_enc,
+ wd,
+ ht,
+ a_disp_wd[j],
+ a_disp_ht[j],
+ segment_this_layer,
+ is_explicit_store,
+ &ps_layer);
+ if(mem_avail)
+ {
+ /* same ps_layer memory pointer is stored in all the threads */
+ for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
+ {
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+ ps_ctxt->as_ref_descr[i].aps_layers[j] = ps_layer;
+ }
+
+ /* store the MV bank pointers */
+ ps_layer->ps_layer_mvbank->max_num_mvs_per_row = i4_num_mvs_per_row;
+ ps_layer->ps_layer_mvbank->ps_mv_base = aps_mv_bank[i];
+ ps_layer->ps_layer_mvbank->pi1_ref_idx_base = api1_ref_idx[i];
+ }
+ }
+ }
+
+ /* Memtabs : Buf Mgr for predictor bufs and working mem */
+ /* TODO : Parameterise this appropriately */
+ size = MAX_WKG_MEM_SIZE_PER_THREAD * ps_prms->i4_num_proc_thrds * i4_num_me_frm_pllel;
+
+ if(mem_avail)
+ {
+ U08 *pu1_mem = ps_memtabs[count].pu1_mem;
+
+ ASSERT(ps_memtabs[count].size == size);
+
+ for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
+ {
+ me_frm_ctxt_t *ps_frm_ctxt;
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+
+ for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
+ {
+ ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
+
+ hme_init_wkg_mem(&ps_frm_ctxt->s_buf_mgr, pu1_mem, MAX_WKG_MEM_SIZE_PER_THREAD);
+
+ if(i4_num_me_frm_pllel != 1)
+ {
+ /* update the memory buffer pointer */
+ pu1_mem += MAX_WKG_MEM_SIZE_PER_THREAD;
+ }
+ }
+ if(i4_num_me_frm_pllel == 1)
+ {
+ pu1_mem += MAX_WKG_MEM_SIZE_PER_THREAD;
+ }
+ }
+ }
+ else
+ {
+ ps_memtabs[count].size = size;
+ ps_memtabs[count].align = 4;
+ ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
+ }
+ count++;
+
+ /*************************************************************************/
+ /* Memtab : We need 64x64 buffer to store the entire CTB input for bidir */
+ /* refinement. This memtab stores 2I - P0, I is input and P0 is L0 pred */
+ /*************************************************************************/
+ size = sizeof(S16) * CTB_BLK_SIZE * CTB_BLK_SIZE * ps_prms->i4_num_proc_thrds *
+ i4_num_me_frm_pllel;
+
+ if(mem_avail)
+ {
+ S16 *pi2_mem = (S16 *)ps_memtabs[count].pu1_mem;
+
+ ASSERT(ps_memtabs[count].size == size);
+
+ for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
+ {
+ me_frm_ctxt_t *ps_frm_ctxt;
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+
+ for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
+ {
+ ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
+
+ ps_frm_ctxt->pi2_inp_bck = pi2_mem;
+ /** If no me frames running in parallel update the other aps_me_frm_prms indices with same memory **/
+ if(i4_num_me_frm_pllel != 1)
+ {
+ pi2_mem += (CTB_BLK_SIZE * CTB_BLK_SIZE);
+ }
+ }
+ if(i4_num_me_frm_pllel == 1)
+ {
+ pi2_mem += (CTB_BLK_SIZE * CTB_BLK_SIZE);
+ }
+ }
+ }
+ else
+ {
+ ps_memtabs[count].size = size;
+ ps_memtabs[count].align = 16;
+ ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
+ }
+
+ count++;
+
+ /* Allocate a memtab for each histogram. As many as num ref and number of threads */
+ /* Loop across for each ME_FRM in PARALLEL */
+ for(j = 0; j < MAX_NUM_ME_PARALLEL; j++)
+ {
+ for(i = 0; i < ps_prms->max_num_ref; i++)
+ {
+ size = ps_prms->i4_num_proc_thrds * sizeof(mv_hist_t);
+ if(mem_avail)
+ {
+ mv_hist_t *ps_mv_hist = (mv_hist_t *)ps_memtabs[count].pu1_mem;
+
+ ASSERT(size == ps_memtabs[count].size);
+
+ /* divide the memory accross the threads */
+ for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
+ {
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+
+ ps_ctxt->aps_me_frm_prms[j]->aps_mv_hist[i] = ps_mv_hist;
+ ps_mv_hist++;
+ }
+ }
+ else
+ {
+ ps_memtabs[count].size = size;
+ ps_memtabs[count].align = 8;
+ ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
+ }
+ count++;
+ }
+ if((i4_num_me_frm_pllel == 1) && (j != (MAX_NUM_ME_PARALLEL - 1)))
+ {
+ /** If no me frames running in parallel update the other aps_me_frm_prms indices with same memory **/
+ /** bring the count back to earlier value if there are no me frames in parallel. don't decrement for last loop **/
+ count -= ps_prms->max_num_ref;
+ }
+ }
+
+ /* Memtabs : Search nodes for 16x16 CUs, 32x32 and 64x64 CUs */
+ for(j = 0; j < MAX_NUM_ME_PARALLEL; j++)
+ {
+ S32 count_cpy = count;
+ for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
+ {
+ if(mem_avail)
+ {
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+ }
+
+ for(i = 0; i < 21; i++)
+ {
+ search_results_t *ps_search_results = NULL;
+ if(mem_avail)
+ {
+ if(i < 16)
+ {
+ ps_search_results =
+ &ps_ctxt->aps_me_frm_prms[j]->as_search_results_16x16[i];
+ }
+ else if(i < 20)
+ {
+ ps_search_results =
+ &ps_ctxt->aps_me_frm_prms[j]->as_search_results_32x32[i - 16];
+ ps_search_results->ps_cu_results =
+ &ps_ctxt->aps_me_frm_prms[j]->as_cu32x32_results[i - 16];
+ }
+ else if(i == 20)
+ {
+ ps_search_results = &ps_ctxt->aps_me_frm_prms[j]->s_search_results_64x64;
+ ps_search_results->ps_cu_results =
+ &ps_ctxt->aps_me_frm_prms[j]->s_cu64x64_results;
+ }
+ else
+ {
+ /* 8x8 search results are not required in LO ME */
+ ASSERT(0);
+ }
+ }
+ count += hme_alloc_init_search_nodes(
+ ps_search_results, &ps_memtabs[count], mem_avail, 2, ps_prms->max_num_results);
+ }
+ }
+
+ if((i4_num_me_frm_pllel == 1) && (j != (MAX_NUM_ME_PARALLEL - 1)))
+ {
+ count = count_cpy;
+ }
+ }
+
+ /* Weighted inputs, one for each ref + one non weighted */
+ for(j = 0; j < MAX_NUM_ME_PARALLEL; j++)
+ {
+ size = (ps_prms->max_num_ref + 1) * ctb_wd * ctb_wd * ps_prms->i4_num_proc_thrds;
+ if(mem_avail)
+ {
+ U08 *pu1_mem;
+ ASSERT(ps_memtabs[count].size == size);
+ pu1_mem = ps_memtabs[count].pu1_mem;
+
+ for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
+ {
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+
+ for(i = 0; i < ps_prms->max_num_ref + 1; i++)
+ {
+ ps_ctxt->aps_me_frm_prms[j]->s_wt_pred.apu1_wt_inp_buf_array[i] = pu1_mem;
+ pu1_mem += (ctb_wd * ctb_wd);
+ }
+ }
+ }
+ else
+ {
+ ps_memtabs[count].size = size;
+ ps_memtabs[count].align = 16;
+ ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
+ }
+ if((i4_num_me_frm_pllel != 1) || (j == (MAX_NUM_ME_PARALLEL - 1)))
+ {
+ count++;
+ }
+ }
+
+ /* if memory is allocated the intislaise the frm prms ptr to each thrd */
+ if(mem_avail)
+ {
+ for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
+ {
+ me_frm_ctxt_t *ps_frm_ctxt;
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+
+ for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
+ {
+ ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
+
+ ps_frm_ctxt->ps_hme_frm_prms = &ps_master_ctxt->as_frm_prms[i];
+ ps_frm_ctxt->ps_hme_ref_map = &ps_master_ctxt->as_ref_map[i];
+ }
+ }
+ }
+
+ /* Memory allocation for use in Clustering */
+ if(ps_prms->s_me_coding_tools.e_me_quality_presets == ME_PRISTINE_QUALITY)
+ {
+ for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
+ {
+ size = 16 * sizeof(cluster_16x16_blk_t) + 4 * sizeof(cluster_32x32_blk_t) +
+ sizeof(cluster_64x64_blk_t) + sizeof(ctb_cluster_info_t);
+ size *= ps_prms->i4_num_proc_thrds;
+
+ if(mem_avail)
+ {
+ U08 *pu1_mem;
+
+ ASSERT(ps_memtabs[count].size == size);
+ pu1_mem = ps_memtabs[count].pu1_mem;
+
+ for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
+ {
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+
+ ps_ctxt->aps_me_frm_prms[i]->ps_blk_16x16 = (cluster_16x16_blk_t *)pu1_mem;
+ pu1_mem += (16 * sizeof(cluster_16x16_blk_t));
+
+ ps_ctxt->aps_me_frm_prms[i]->ps_blk_32x32 = (cluster_32x32_blk_t *)pu1_mem;
+ pu1_mem += (4 * sizeof(cluster_32x32_blk_t));
+
+ ps_ctxt->aps_me_frm_prms[i]->ps_blk_64x64 = (cluster_64x64_blk_t *)pu1_mem;
+ pu1_mem += (sizeof(cluster_64x64_blk_t));
+
+ ps_ctxt->aps_me_frm_prms[i]->ps_ctb_cluster_info =
+ (ctb_cluster_info_t *)pu1_mem;
+ pu1_mem += (sizeof(ctb_cluster_info_t));
+ }
+ }
+ else
+ {
+ ps_memtabs[count].size = size;
+ ps_memtabs[count].align = 16;
+ ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
+ }
+
+ if((i4_num_me_frm_pllel != 1) || (i == (MAX_NUM_ME_PARALLEL - 1)))
+ {
+ count++;
+ }
+ }
+ }
+ else if(mem_avail)
+ {
+ for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
+ {
+ for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
+ {
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+
+ ps_ctxt->aps_me_frm_prms[i]->ps_blk_16x16 = NULL;
+
+ ps_ctxt->aps_me_frm_prms[i]->ps_blk_32x32 = NULL;
+
+ ps_ctxt->aps_me_frm_prms[i]->ps_blk_64x64 = NULL;
+
+ ps_ctxt->aps_me_frm_prms[i]->ps_ctb_cluster_info = NULL;
+ }
+ }
+ }
+
+ for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
+ {
+ size = sizeof(fullpel_refine_ctxt_t);
+ size *= ps_prms->i4_num_proc_thrds;
+
+ if(mem_avail)
+ {
+ U08 *pu1_mem;
+
+ ASSERT(ps_memtabs[count].size == size);
+ pu1_mem = ps_memtabs[count].pu1_mem;
+
+ for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
+ {
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+
+ ps_ctxt->aps_me_frm_prms[i]->ps_fullpel_refine_ctxt =
+ (fullpel_refine_ctxt_t *)pu1_mem;
+ pu1_mem += (sizeof(fullpel_refine_ctxt_t));
+ }
+ }
+ else
+ {
+ ps_memtabs[count].size = size;
+ ps_memtabs[count].align = 16;
+ ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
+ }
+
+ if((i4_num_me_frm_pllel != 1) || (i == (MAX_NUM_ME_PARALLEL - 1)))
+ {
+ count++;
+ }
+ }
+
+ /* Memory for ihevce_me_optimised_function_list_t struct */
+ if(mem_avail)
+ {
+ ps_master_ctxt->pv_me_optimised_function_list = (void *)ps_memtabs[count++].pu1_mem;
+ }
+ else
+ {
+ ps_memtabs[count].size = sizeof(ihevce_me_optimised_function_list_t);
+ ps_memtabs[count].align = 16;
+ ps_memtabs[count++].e_mem_attr = HME_SCRATCH_OVLY_MEM;
+ }
+
+ ASSERT(count < hme_enc_num_alloc(i4_num_me_frm_pllel));
+ return (count);
+}
+
+/**
+********************************************************************************
+* @fn hme_coarse_alloc_init_mem()
+*
+* @brief Requests/ assign memory based on mem avail
+*
+* @param[in] ps_memtabs : memtab array
+*
+* @param[in] ps_prms : init prms
+*
+* @param[in] pv_ctxt : ME ctxt
+*
+* @param[in] mem_avail : request/assign flag
+*
+* @return number of memtabs
+********************************************************************************
+*/
+S32 hme_coarse_alloc_init_mem(
+ hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms, void *pv_ctxt, S32 mem_avail)
+{
+ coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
+ coarse_me_ctxt_t *ps_ctxt;
+ S32 count = 0, size, i, j, use_4x4, wd;
+ S32 n_tot_layers;
+ S32 num_layers_explicit_search;
+ S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
+ S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
+ S32 num_results;
+ S32 num_thrds;
+ //S32 ctb_wd = 1 << ps_prms->log_ctb_size;
+ S32 sad_4x4_block_size, sad_4x4_block_stride, search_step, num_rows;
+ S32 layer1_blk_width = 8; // 8x8 search
+ S32 blk_shift;
+
+ /* MV bank changes */
+ hme_mv_t *aps_mv_bank[MAX_NUM_LAYERS] = { NULL };
+ S32 ai4_num_mvs_per_row[MAX_NUM_LAYERS] = { 0 };
+ S08 *api1_ref_idx[MAX_NUM_LAYERS] = { NULL };
+
+ /* Memtab 0: handle */
+ size = sizeof(coarse_me_master_ctxt_t);
+ if(mem_avail)
+ {
+ /* store the number of processing threads */
+ ps_master_ctxt->i4_num_proc_thrds = ps_prms->i4_num_proc_thrds;
+ }
+ else
+ {
+ ps_memtabs[count].size = size;
+ ps_memtabs[count].align = 8;
+ ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
+ }
+
+ count++;
+
+ /* Memtab 1: ME threads ctxt */
+ size = ps_prms->i4_num_proc_thrds * sizeof(coarse_me_ctxt_t);
+ if(mem_avail)
+ {
+ coarse_me_ctxt_t *ps_me_tmp_ctxt = (coarse_me_ctxt_t *)ps_memtabs[count].pu1_mem;
+
+ /* store the indivisual thread ctxt pointers */
+ for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
+ {
+ ps_master_ctxt->aps_me_ctxt[num_thrds] = ps_me_tmp_ctxt++;
+ }
+ }
+ else
+ {
+ ps_memtabs[count].size = size;
+ ps_memtabs[count].align = 8;
+ ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
+ }
+
+ count++;
+
+ memcpy(a_wd, ps_prms->a_wd, sizeof(S32) * ps_prms->num_simulcast_layers);
+ memcpy(a_ht, ps_prms->a_ht, sizeof(S32) * ps_prms->num_simulcast_layers);
+ /*************************************************************************/
+ /* Derive the number of HME layers, including both encoded and non encode*/
+ /* This function also derives the width and ht of each layer. */
+ /*************************************************************************/
+ n_tot_layers = hme_derive_num_layers(1, a_wd, a_ht, a_disp_wd, a_disp_ht);
+
+ num_layers_explicit_search = ps_prms->num_layers_explicit_search;
+
+ if(num_layers_explicit_search <= 0)
+ num_layers_explicit_search = n_tot_layers - 1;
+
+ num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
+
+ if(mem_avail)
+ {
+ for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
+ {
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+ memset(ps_ctxt->u1_encode, 0, n_tot_layers);
+
+ /* encode layer should be excluded during processing */
+ ps_ctxt->num_layers = n_tot_layers;
+
+ memcpy(ps_ctxt->a_wd, a_wd, sizeof(S32) * n_tot_layers);
+ memcpy(ps_ctxt->a_ht, a_ht, sizeof(S32) * n_tot_layers);
+
+ ps_ctxt->num_layers_explicit_search = num_layers_explicit_search;
+ ps_ctxt->max_num_results = ps_prms->max_num_results;
+ ps_ctxt->max_num_results_coarse = ps_prms->max_num_results_coarse;
+ ps_ctxt->max_num_ref = ps_prms->max_num_ref;
+ }
+ }
+
+ /* Memtabs : Layers MV bank for total layers - 2 */
+ /* for penultimate layer MV bank will be initialsed at every frame level */
+ for(j = 1; j < n_tot_layers; j++)
+ {
+ S32 is_explicit_store = 1;
+ S32 wd, ht;
+ U08 u1_enc = 0;
+ wd = a_wd[j];
+ ht = a_ht[j];
+
+ /* Possibly implicit search for lower (finer) layers */
+ if(n_tot_layers - j > num_layers_explicit_search)
+ is_explicit_store = 0;
+
+ /* Even if explicit search, we store only 2 results (L0 and L1) */
+ /* in finest layer */
+ if(j == 0)
+ {
+ is_explicit_store = 0;
+ }
+
+ /* coarsest layer alwasy uses 4x4 blks to store results */
+ if(j == n_tot_layers - 1)
+ {
+ num_results = ps_prms->max_num_results_coarse;
+ }
+ else
+ {
+ num_results = ps_prms->max_num_results;
+ if(j == 0)
+ num_results = 1;
+ }
+ use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
+
+ /* for penultimate compute the parameters and store */
+ if(j == 1)
+ {
+ S32 num_blks, num_mvs_per_blk, num_ref;
+ S32 num_cols, num_rows, num_mvs_per_row;
+
+ num_cols = use_4x4 ? ((wd >> 2) + 2) : ((wd >> 3) + 2);
+ num_rows = use_4x4 ? ((ht >> 2) + 2) : ((ht >> 3) + 2);
+
+ if(is_explicit_store)
+ num_ref = ps_prms->max_num_ref;
+ else
+ num_ref = 2;
+
+ num_blks = num_cols * num_rows;
+ num_mvs_per_blk = num_ref * num_results;
+ num_mvs_per_row = num_mvs_per_blk * num_cols;
+
+ ai4_num_mvs_per_row[j] = num_mvs_per_row;
+ aps_mv_bank[j] = NULL;
+ api1_ref_idx[j] = NULL;
+ }
+ else
+ {
+ count += hme_alloc_init_layer_mv_bank(
+ &ps_memtabs[count],
+ num_results,
+ ps_prms->max_num_ref,
+ use_4x4,
+ mem_avail,
+ u1_enc,
+ wd,
+ ht,
+ is_explicit_store,
+ &aps_mv_bank[j],
+ &api1_ref_idx[j],
+ &ai4_num_mvs_per_row[j]);
+ }
+ }
+
+ /* Memtabs : Layers * num-ref + 1 */
+ for(i = 0; i < ps_prms->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
+ {
+ /* for all layer except encode layer */
+ for(j = 1; j < n_tot_layers; j++)
+ {
+ layer_ctxt_t *ps_layer;
+ S32 is_explicit_store = 1;
+ S32 segment_this_layer = (j == 0) ? 1 : ps_prms->segment_higher_layers;
+ S32 wd, ht;
+ U08 u1_enc = 0;
+ wd = a_wd[j];
+ ht = a_ht[j];
+
+ /* Possibly implicit search for lower (finer) layers */
+ if(n_tot_layers - j > num_layers_explicit_search)
+ is_explicit_store = 0;
+
+ /* Even if explicit search, we store only 2 results (L0 and L1) */
+ /* in finest layer */
+ if(j == 0)
+ {
+ is_explicit_store = 0;
+ }
+
+ /* coarsest layer alwasy uses 4x4 blks to store results */
+ if(j == n_tot_layers - 1)
+ {
+ num_results = ps_prms->max_num_results_coarse;
+ }
+ else
+ {
+ num_results = ps_prms->max_num_results;
+ if(j == 0)
+ num_results = 1;
+ }
+ use_4x4 = hme_get_mv_blk_size(ps_prms->use_4x4, j, n_tot_layers, u1_enc);
+
+ count += hme_alloc_init_layer(
+ &ps_memtabs[count],
+ num_results,
+ ps_prms->max_num_ref,
+ use_4x4,
+ mem_avail,
+ u1_enc,
+ wd,
+ ht,
+ a_disp_wd[j],
+ a_disp_ht[j],
+ segment_this_layer,
+ is_explicit_store,
+ &ps_layer);
+ if(mem_avail)
+ {
+ /* same ps_layer memory pointer is stored in all the threads */
+ for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
+ {
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+ ps_ctxt->as_ref_descr[i].aps_layers[j] = ps_layer;
+ }
+
+ /* store the MV bank pointers */
+ ps_layer->ps_layer_mvbank->max_num_mvs_per_row = ai4_num_mvs_per_row[j];
+ ps_layer->ps_layer_mvbank->ps_mv_base = aps_mv_bank[j];
+ ps_layer->ps_layer_mvbank->pi1_ref_idx_base = api1_ref_idx[j];
+ }
+ }
+ }
+
+ /* Memtabs : Prev Row search node at coarsest layer */
+ wd = a_wd[n_tot_layers - 1];
+
+ /* Allocate a memtab for storing 4x4 SADs for n rows. As many as num ref and number of threads */
+ num_rows = ps_prms->i4_num_proc_thrds + 1;
+ if(ps_prms->s_me_coding_tools.e_me_quality_presets < ME_MEDIUM_SPEED)
+ search_step = HME_COARSE_STEP_SIZE_HIGH_QUALITY;
+ else
+ search_step = HME_COARSE_STEP_SIZE_HIGH_SPEED;
+
+ /*shift factor*/
+ blk_shift = 2; /*4x4*/
+ search_step >>= 1;
+
+ sad_4x4_block_size = ((2 * MAX_MVX_SUPPORTED_IN_COARSE_LAYER) >> search_step) *
+ ((2 * MAX_MVY_SUPPORTED_IN_COARSE_LAYER) >> search_step);
+ sad_4x4_block_stride = ((wd >> blk_shift) + 1) * sad_4x4_block_size;
+
+ size = num_rows * sad_4x4_block_stride * sizeof(S16);
+ for(i = 0; i < ps_prms->max_num_ref; i++)
+ {
+ if(mem_avail)
+ {
+ ASSERT(size == ps_memtabs[count].size);
+
+ /* same row memory pointer is stored in all the threads */
+ for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
+ {
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+ ps_ctxt->api2_sads_4x4_n_rows[i] = (S16 *)ps_memtabs[count].pu1_mem;
+ }
+ }
+ else
+ {
+ ps_memtabs[count].size = size;
+ ps_memtabs[count].align = 4;
+ ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
+ }
+ count++;
+ }
+
+ /* Allocate a memtab for storing best search nodes 8x4 for n rows. Row is allocated for worst case (2*min_wd_coarse/4). As many as num ref and number of threads */
+ size = num_rows * ((wd >> blk_shift) + 1) * sizeof(search_node_t);
+ for(i = 0; i < ps_prms->max_num_ref; i++)
+ {
+ if(mem_avail)
+ {
+ ASSERT(size == ps_memtabs[count].size);
+
+ /* same row memory pointer is stored in all the threads */
+ for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
+ {
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+ ps_ctxt->aps_best_search_nodes_8x4_n_rows[i] =
+ (search_node_t *)ps_memtabs[count].pu1_mem;
+ }
+ }
+ else
+ {
+ ps_memtabs[count].size = size;
+ ps_memtabs[count].align = 4;
+ ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
+ }
+ count++;
+ }
+ /* Allocate a memtab for storing best search nodes 4x8 for n rows. Row is allocated for worst case (2*min_wd_coarse/4). As many as num ref and number of threads */
+ size = num_rows * ((wd >> blk_shift) + 1) * sizeof(search_node_t);
+ for(i = 0; i < ps_prms->max_num_ref; i++)
+ {
+ if(mem_avail)
+ {
+ ASSERT(size == ps_memtabs[count].size);
+
+ /* same row memory pointer is stored in all the threads */
+ for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
+ {
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+ ps_ctxt->aps_best_search_nodes_4x8_n_rows[i] =
+ (search_node_t *)ps_memtabs[count].pu1_mem;
+ }
+ }
+ else
+ {
+ ps_memtabs[count].size = size;
+ ps_memtabs[count].align = 4;
+ ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
+ }
+ count++;
+ }
+
+ /* Allocate a memtab for each histogram. As many as num ref and number of threads */
+ for(i = 0; i < ps_prms->max_num_ref; i++)
+ {
+ size = ps_prms->i4_num_proc_thrds * sizeof(mv_hist_t);
+ if(mem_avail)
+ {
+ mv_hist_t *ps_mv_hist = (mv_hist_t *)ps_memtabs[count].pu1_mem;
+
+ ASSERT(size == ps_memtabs[count].size);
+
+ /* divide the memory accross the threads */
+ for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
+ {
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+ ps_ctxt->aps_mv_hist[i] = ps_mv_hist;
+ ps_mv_hist++;
+ }
+ }
+ else
+ {
+ ps_memtabs[count].size = size;
+ ps_memtabs[count].align = 8;
+ ps_memtabs[count].e_mem_attr = HME_PERSISTENT_MEM;
+ }
+ count++;
+ }
+
+ /* Memtabs : Search nodes for 8x8 blks */
+ for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
+ {
+ search_results_t *ps_search_results = NULL;
+
+ if(mem_avail)
+ {
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+ }
+
+ if(mem_avail)
+ {
+ ps_search_results = &ps_ctxt->s_search_results_8x8;
+ }
+ count += hme_alloc_init_search_nodes(
+ ps_search_results,
+ &ps_memtabs[count],
+ mem_avail,
+ ps_prms->max_num_ref,
+ ps_prms->max_num_results);
+ }
+
+ /* Weighted inputs, one for each ref */
+ size = (ps_prms->max_num_ref + 1) * layer1_blk_width * layer1_blk_width *
+ ps_prms->i4_num_proc_thrds;
+ if(mem_avail)
+ {
+ U08 *pu1_mem;
+ ASSERT(ps_memtabs[count].size == size);
+ pu1_mem = ps_memtabs[count].pu1_mem;
+
+ for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
+ {
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+
+ for(i = 0; i < ps_prms->max_num_ref + 1; i++)
+ {
+ ps_ctxt->s_wt_pred.apu1_wt_inp_buf_array[i] = pu1_mem;
+ pu1_mem += (layer1_blk_width * layer1_blk_width);
+ }
+ }
+ }
+ else
+ {
+ ps_memtabs[count].size = size;
+ ps_memtabs[count].align = 16;
+ ps_memtabs[count].e_mem_attr = HME_SCRATCH_OVLY_MEM;
+ }
+ count++;
+
+ /* if memory is allocated the intislaise the frm prms ptr to each thrd */
+ if(mem_avail)
+ {
+ for(num_thrds = 0; num_thrds < ps_prms->i4_num_proc_thrds; num_thrds++)
+ {
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+
+ ps_ctxt->ps_hme_frm_prms = &ps_master_ctxt->s_frm_prms;
+ ps_ctxt->ps_hme_ref_map = &ps_master_ctxt->s_ref_map;
+ }
+ }
+
+ /* Memory for ihevce_me_optimised_function_list_t struct */
+ if(mem_avail)
+ {
+ ps_master_ctxt->pv_me_optimised_function_list = (void *)ps_memtabs[count++].pu1_mem;
+ }
+ else
+ {
+ ps_memtabs[count].size = sizeof(ihevce_me_optimised_function_list_t);
+ ps_memtabs[count].align = 16;
+ ps_memtabs[count++].e_mem_attr = HME_SCRATCH_OVLY_MEM;
+ }
+
+ //ASSERT(count < hme_enc_num_alloc());
+ ASSERT(count < hme_coarse_num_alloc());
+ return (count);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_coarse_me_get_lyr_prms_dep_mngr \endif
+*
+* \brief Returns to the caller key attributes relevant for dependency manager,
+* ie, the number of vertical units in each layer
+*
+* \par Description:
+* This function requires the precondition that the width and ht of encode
+* layer is known.
+* The number of layers, number of vertical units in each layer, and for
+* each vertial unit in each layer, its dependency on previous layer's units
+* From ME's perspective, a vertical unit is one which is smallest min size
+* vertically (and spans the entire row horizontally). This is CTB for encode
+* layer, and 8x8 / 4x4 for non encode layers.
+*
+* \param[in] num_layers : Number of ME Layers
+* \param[in] pai4_ht : Array storing ht at each layer
+* \param[in] pai4_wd : Array storing wd at each layer
+* \param[out] pi4_num_vert_units_in_lyr : Array of size N (num layers), each
+* entry has num vertical units in that particular layer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_coarse_me_get_lyr_prms_dep_mngr(
+ WORD32 num_layers, WORD32 *pai4_ht, WORD32 *pai4_wd, WORD32 *pai4_num_vert_units_in_lyr)
+{
+ /* Height of current and next layers */
+ WORD32 ht_c, ht_n;
+ /* Blk ht at a given layer and next layer*/
+ WORD32 unit_ht_c, unit_ht_n, blk_ht_c, blk_ht_n;
+ /* Number of vertical units in current and next layer */
+ WORD32 num_vert_c, num_vert_n;
+
+ WORD32 ctb_size = 64, num_enc_layers = 1, use_4x4 = 1, i;
+ UWORD8 au1_encode[MAX_NUM_LAYERS];
+
+ memset(au1_encode, 0, num_layers);
+ memset(au1_encode, 1, num_enc_layers);
+
+ ht_n = pai4_ht[num_layers - 2];
+ ht_c = pai4_ht[num_layers - 1];
+
+ /* compute blk ht and unit ht for c and n */
+ if(au1_encode[num_layers - 1])
+ {
+ blk_ht_c = 16;
+ unit_ht_c = ctb_size;
+ }
+ else
+ {
+ blk_ht_c = hme_get_blk_size(use_4x4, num_layers - 1, num_layers, 0);
+ unit_ht_c = blk_ht_c;
+ }
+
+ num_vert_c = (ht_c + unit_ht_c - 1) / unit_ht_c;
+ /* For new design in Coarsest HME layer we need */
+ /* one additional row extra at the end of frame */
+ /* hence num_vert_c is incremented by 1 */
+ num_vert_c++;
+
+ /*************************************************************************/
+ /* Run through each layer, set the number of vertical units */
+ /*************************************************************************/
+ for(i = num_layers - 1; i > 0; i--)
+ {
+ pai4_num_vert_units_in_lyr[i] = num_vert_c;
+
+ /* "n" is computed for first time */
+ ht_n = pai4_ht[i - 1];
+ blk_ht_n = hme_get_blk_size(use_4x4, i - 1, num_layers, 0);
+ unit_ht_n = blk_ht_n;
+ if(au1_encode[i - 1])
+ unit_ht_n = ctb_size;
+
+ num_vert_n = (ht_n + unit_ht_n - 1) / unit_ht_n;
+
+ /* Compute the blk size and vert unit size in each layer */
+ /* "c" denotes curr layer, and "n" denotes the layer to which result */
+ /* is projected to */
+ ht_c = ht_n;
+ blk_ht_c = blk_ht_n;
+ unit_ht_c = unit_ht_n;
+ num_vert_c = num_vert_n;
+ }
+
+ /* LAYER 0 OR ENCODE LAYER UPDATE : NO OUTPUT DEPS */
+ /* set the numebr of vertical units */
+ pai4_num_vert_units_in_lyr[0] = num_vert_c;
+}
+
+/**
+********************************************************************************
+* @fn hme_coarse_dep_mngr_alloc_mem()
+*
+* @brief Requests memory for HME Dep Mngr
+*
+* \param[in,out] ps_mem_tab : pointer to memory descriptors table
+* \param[in] ps_init_prms : Create time static parameters
+* \param[in] i4_mem_space : memspace in whihc memory request should be done
+*
+* @return number of memtabs
+********************************************************************************
+*/
+WORD32 hme_coarse_dep_mngr_alloc_mem(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ WORD32 i4_mem_space,
+ WORD32 i4_num_proc_thrds,
+ WORD32 i4_resolution_id)
+{
+ WORD32 ai4_num_vert_units_in_lyr[MAX_NUM_HME_LAYERS];
+ WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS];
+ WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS];
+ WORD32 n_enc_layers = 1, n_tot_layers, n_dep_tabs = 0, i;
+ WORD32 min_cu_size;
+
+ /* get the min cu size from config params */
+ min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size;
+
+ min_cu_size = 1 << min_cu_size;
+
+ /* Get the width and heights of different decomp layers */
+ *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
+ SET_CTB_ALIGN(
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size);
+
+ *a_ht =
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
+ SET_CTB_ALIGN(
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size);
+
+ n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
+ ASSERT(n_tot_layers >= 3);
+
+ /* --- Get the number of vartical units in each layer for dep. mngr -- */
+ ihevce_coarse_me_get_lyr_prms_dep_mngr(
+ n_tot_layers, &a_ht[0], &a_wd[0], &ai4_num_vert_units_in_lyr[0]);
+
+ /* Fill memtabs for HME layers,except for L0 layer */
+ for(i = 1; i < n_tot_layers; i++)
+ {
+ n_dep_tabs += ihevce_dmgr_get_mem_recs(
+ &ps_mem_tab[n_dep_tabs],
+ DEP_MNGR_ROW_ROW_SYNC,
+ ai4_num_vert_units_in_lyr[i],
+ 1, /* Number of Col Tiles : Not supported in PreEnc */
+ i4_num_proc_thrds,
+ i4_mem_space);
+ }
+
+ ASSERT(n_dep_tabs <= hme_coarse_dep_mngr_num_alloc());
+
+ return (n_dep_tabs);
+}
+
+/**
+********************************************************************************
+* @fn hme_coarse_dep_mngr_init()
+*
+* @brief Assign memory for HME Dep Mngr
+*
+* \param[in,out] ps_mem_tab : pointer to memory descriptors table
+* \param[in] ps_init_prms : Create time static parameters
+* @param[in] pv_ctxt : ME ctxt
+* \param[in] pv_osal_handle : Osal handle
+*
+* @return number of memtabs
+********************************************************************************
+*/
+WORD32 hme_coarse_dep_mngr_init(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ void *pv_ctxt,
+ void *pv_osal_handle,
+ WORD32 i4_num_proc_thrds,
+ WORD32 i4_resolution_id)
+{
+ WORD32 ai4_num_vert_units_in_lyr[MAX_NUM_HME_LAYERS];
+ WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS];
+ WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS];
+ WORD32 n_enc_layers = 1, n_tot_layers, n_dep_tabs = 0, i;
+ WORD32 min_cu_size;
+
+ coarse_me_master_ctxt_t *ps_me_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
+
+ /* get the min cu size from config params */
+ min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size;
+
+ min_cu_size = 1 << min_cu_size;
+
+ /* Get the width and heights of different decomp layers */
+ *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
+ SET_CTB_ALIGN(
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size);
+ *a_ht =
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
+ SET_CTB_ALIGN(
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size);
+
+ n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
+ ASSERT(n_tot_layers >= 3);
+
+ /* --- Get the number of vartical units in each layer for dep. mngr -- */
+ ihevce_coarse_me_get_lyr_prms_dep_mngr(
+ n_tot_layers, &a_ht[0], &a_wd[0], &ai4_num_vert_units_in_lyr[0]);
+
+ /* --- HME sync Dep Mngr Mem init -- */
+ for(i = 1; i < n_tot_layers; i++)
+ {
+ WORD32 num_blks_in_row, num_blks_in_pic, blk_size_shift;
+
+ if(i == (n_tot_layers - 1)) /* coarsest layer */
+ blk_size_shift = 2;
+ else
+ blk_size_shift = 3; /* refine layers */
+
+ GET_NUM_BLKS_IN_PIC(a_wd[i], a_ht[i], blk_size_shift, num_blks_in_row, num_blks_in_pic);
+
+ /* Coarsest layer : 1 block extra, since the last block */
+ if(i == (n_tot_layers - 1)) /* in a row needs East block */
+ num_blks_in_row += 1;
+
+ /* Note : i-1, only for HME layers, L0 is separate */
+ ps_me_ctxt->apv_dep_mngr_hme_sync[i - 1] = ihevce_dmgr_init(
+ &ps_mem_tab[n_dep_tabs],
+ pv_osal_handle,
+ DEP_MNGR_ROW_ROW_SYNC,
+ ai4_num_vert_units_in_lyr[i],
+ num_blks_in_row,
+ 1, /* Number of Col Tiles : Not supported in PreEnc */
+ i4_num_proc_thrds,
+ 1 /*Sem disabled*/
+ );
+
+ n_dep_tabs += ihevce_dmgr_get_num_mem_recs();
+ }
+
+ return n_dep_tabs;
+}
+
+/**
+********************************************************************************
+* @fn hme_coarse_dep_mngr_reg_sem()
+*
+* @brief Assign semaphores for HME Dep Mngr
+*
+* \param[in] pv_me_ctxt : pointer to Coarse ME ctxt
+* \param[in] ppv_sem_hdls : Arry of semaphore handles
+* \param[in] i4_num_proc_thrds : Number of processing threads
+*
+* @return number of memtabs
+********************************************************************************
+*/
+void hme_coarse_dep_mngr_reg_sem(void *pv_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds)
+{
+ WORD32 i;
+ coarse_me_master_ctxt_t *ps_me_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
+ coarse_me_ctxt_t *ps_ctxt = ps_me_ctxt->aps_me_ctxt[0];
+
+ /* --- HME sync Dep Mngr semaphore init -- */
+ for(i = 1; i < ps_ctxt->num_layers; i++)
+ {
+ ihevce_dmgr_reg_sem_hdls(
+ ps_me_ctxt->apv_dep_mngr_hme_sync[i - 1], ppv_sem_hdls, i4_num_proc_thrds);
+ }
+
+ return;
+}
+
+/**
+********************************************************************************
+* @fn hme_coarse_dep_mngr_delete()
+*
+* Destroy Coarse ME Dep Mngr module
+* Note : Only Destroys the resources allocated in the module like
+* semaphore,etc. Memory free is done Separately using memtabs
+*
+* \param[in] pv_me_ctxt : pointer to Coarse ME ctxt
+* \param[in] ps_init_prms : Create time static parameters
+*
+* @return none
+********************************************************************************
+*/
+void hme_coarse_dep_mngr_delete(
+ void *pv_me_ctxt, ihevce_static_cfg_params_t *ps_init_prms, WORD32 i4_resolution_id)
+{
+ WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS];
+ WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS];
+ WORD32 n_enc_layers = 1, n_tot_layers, i;
+ WORD32 min_cu_size;
+
+ coarse_me_master_ctxt_t *ps_me_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
+
+ /* get the min cu size from config params */
+ min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size;
+
+ min_cu_size = 1 << min_cu_size;
+
+ /* Get the width and heights of different decomp layers */
+ *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
+ SET_CTB_ALIGN(
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size);
+ *a_ht =
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
+ SET_CTB_ALIGN(
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size);
+ n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
+ ASSERT(n_tot_layers >= 3);
+
+ /* --- HME sync Dep Mngr Delete -- */
+ for(i = 1; i < n_tot_layers; i++)
+ {
+ /* Note : i-1, only for HME layers, L0 is separate */
+ ihevce_dmgr_del(ps_me_ctxt->apv_dep_mngr_hme_sync[i - 1]);
+ }
+}
+
+/**
+*******************************************************************************
+* @fn S32 hme_enc_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
+*
+* @brief Fills up memtabs with memory information details required by HME
+*
+* @param[out] ps_memtabs : Pointre to an array of memtabs where module fills
+* up its requirements of memory
+*
+* @param[in] ps_prms : Input parameters to module crucial in calculating reqd
+* amt of memory
+*
+* @return Number of memtabs required
+*******************************************************************************
+*/
+S32 hme_enc_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms, WORD32 i4_num_me_frm_pllel)
+{
+ S32 num, tot, i;
+
+ /* Validation of init params */
+ if(-1 == hme_validate_init_prms(ps_prms))
+ return (-1);
+
+ num = hme_enc_alloc_init_mem(ps_memtabs, ps_prms, NULL, 0, i4_num_me_frm_pllel);
+ tot = hme_enc_num_alloc(i4_num_me_frm_pllel);
+ for(i = num; i < tot; i++)
+ {
+ ps_memtabs[i].size = 4;
+ ps_memtabs[i].align = 4;
+ ps_memtabs[i].e_mem_attr = HME_PERSISTENT_MEM;
+ }
+ return (tot);
+}
+
+/**
+*******************************************************************************
+* @fn S32 hme_coarse_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
+*
+* @brief Fills up memtabs with memory information details required by Coarse HME
+*
+* @param[out] ps_memtabs : Pointre to an array of memtabs where module fills
+* up its requirements of memory
+*
+* @param[in] ps_prms : Input parameters to module crucial in calculating reqd
+* amt of memory
+*
+* @return Number of memtabs required
+*******************************************************************************
+*/
+S32 hme_coarse_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
+{
+ S32 num, tot, i;
+
+ /* Validation of init params */
+ if(-1 == hme_validate_init_prms(ps_prms))
+ return (-1);
+
+ num = hme_coarse_alloc_init_mem(ps_memtabs, ps_prms, NULL, 0);
+ tot = hme_coarse_num_alloc();
+ for(i = num; i < tot; i++)
+ {
+ ps_memtabs[i].size = 4;
+ ps_memtabs[i].align = 4;
+ ps_memtabs[i].e_mem_attr = HME_PERSISTENT_MEM;
+ }
+ return (tot);
+}
+
+/**
+*******************************************************************************
+* @fn hme_coarse_dep_mngr_alloc
+*
+* @brief Fills up memtabs with memory information details required by Coarse HME
+*
+* \param[in,out] ps_mem_tab : pointer to memory descriptors table
+* \param[in] ps_init_prms : Create time static parameters
+* \param[in] i4_mem_space : memspace in whihc memory request should be done
+*
+* @return Number of memtabs required
+*******************************************************************************
+*/
+WORD32 hme_coarse_dep_mngr_alloc(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ WORD32 i4_mem_space,
+ WORD32 i4_num_proc_thrds,
+ WORD32 i4_resolution_id)
+{
+ S32 num, tot, i;
+
+ num = hme_coarse_dep_mngr_alloc_mem(
+ ps_mem_tab, ps_init_prms, i4_mem_space, i4_num_proc_thrds, i4_resolution_id);
+ tot = hme_coarse_dep_mngr_num_alloc();
+ for(i = num; i < tot; i++)
+ {
+ ps_mem_tab[i].i4_mem_size = 4;
+ ps_mem_tab[i].i4_mem_alignment = 4;
+ ps_mem_tab[i].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+ }
+ return (tot);
+}
+
+/**
+********************************************************************************
+* @fn hme_coarse_init_ctxt()
+*
+* @brief initialise context memory
+*
+* @param[in] ps_prms : init prms
+*
+* @param[in] pv_ctxt : ME ctxt
+*
+* @return number of memtabs
+********************************************************************************
+*/
+void hme_coarse_init_ctxt(coarse_me_master_ctxt_t *ps_master_ctxt, hme_init_prms_t *ps_prms)
+{
+ S32 i, j, num_thrds;
+ coarse_me_ctxt_t *ps_ctxt;
+ S32 num_rows_coarse;
+
+ /* initialise the parameters inot context of all threads */
+ for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
+ {
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+
+ /* Copy the init prms to context */
+ ps_ctxt->s_init_prms = *ps_prms;
+
+ /* Initialize some other variables in ctxt */
+ ps_ctxt->i4_prev_poc = -1;
+
+ ps_ctxt->num_b_frms = ps_prms->num_b_frms;
+
+ ps_ctxt->apu1_ref_bits_tlu_lc[0] = &ps_ctxt->au1_ref_bits_tlu_lc[0][0];
+ ps_ctxt->apu1_ref_bits_tlu_lc[1] = &ps_ctxt->au1_ref_bits_tlu_lc[1][0];
+
+ /* Initialize num rows lookuptable */
+ ps_ctxt->i4_num_row_bufs = ps_prms->i4_num_proc_thrds + 1;
+ num_rows_coarse = ps_ctxt->i4_num_row_bufs;
+ for(i = 0; i < ((HEVCE_MAX_HEIGHT >> 1) >> 2); i++)
+ {
+ ps_ctxt->ai4_row_index[i] = (i % num_rows_coarse);
+ }
+ }
+
+ /* since same layer desc pointer is stored in all the threads ctxt */
+ /* layer init is done only using 0th thread ctxt */
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
+
+ /* Initialize all layers descriptors to have -1 = poc meaning unfilled */
+ for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
+ {
+ for(j = 1; j < ps_ctxt->num_layers; j++)
+ {
+ layer_ctxt_t *ps_layer;
+ ps_layer = ps_ctxt->as_ref_descr[i].aps_layers[j];
+ ps_layer->i4_poc = -1;
+ ps_layer->ppu1_list_inp = &ps_ctxt->apu1_list_inp[j][0];
+ memset(
+ ps_layer->s_global_mv, 0, sizeof(hme_mv_t) * ps_ctxt->max_num_ref * NUM_GMV_LOBES);
+ }
+ }
+}
+
+/**
+********************************************************************************
+* @fn hme_enc_init_ctxt()
+*
+* @brief initialise context memory
+*
+* @param[in] ps_prms : init prms
+*
+* @param[in] pv_ctxt : ME ctxt
+*
+* @return number of memtabs
+********************************************************************************
+*/
+void hme_enc_init_ctxt(
+ me_master_ctxt_t *ps_master_ctxt, hme_init_prms_t *ps_prms, rc_quant_t *ps_rc_quant_ctxt)
+{
+ S32 i, j, num_thrds;
+ me_ctxt_t *ps_ctxt;
+ me_frm_ctxt_t *ps_frm_ctxt;
+
+ /* initialise the parameters in context of all threads */
+ for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
+ {
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+ /* Store Tile params base into ME context */
+ ps_ctxt->pv_tile_params_base = ps_master_ctxt->pv_tile_params_base;
+
+ for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
+ {
+ ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i];
+
+ /* Copy the init prms to context */
+ ps_ctxt->s_init_prms = *ps_prms;
+
+ /* Initialize some other variables in ctxt */
+ ps_frm_ctxt->i4_prev_poc = INVALID_POC;
+
+ ps_frm_ctxt->log_ctb_size = ps_prms->log_ctb_size;
+
+ ps_frm_ctxt->num_b_frms = ps_prms->num_b_frms;
+
+ ps_frm_ctxt->i4_is_prev_frame_reference = 0;
+
+ ps_frm_ctxt->ps_rc_quant_ctxt = ps_rc_quant_ctxt;
+
+ /* Initialize mv grids for L0 and L1 used in final refinement layer */
+ {
+ hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid[0]);
+ hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid[1]);
+ hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_fpel[0]);
+ hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_fpel[1]);
+ hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_qpel[0]);
+ hme_init_mv_grid(&ps_frm_ctxt->as_mv_grid_qpel[1]);
+ }
+
+ ps_frm_ctxt->apu1_ref_bits_tlu_lc[0] = &ps_frm_ctxt->au1_ref_bits_tlu_lc[0][0];
+ ps_frm_ctxt->apu1_ref_bits_tlu_lc[1] = &ps_frm_ctxt->au1_ref_bits_tlu_lc[1][0];
+ }
+ }
+
+ /* since same layer desc pointer is stored in all the threads ctxt */
+ /* layer init is done only using 0th thread ctxt */
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
+
+ ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[0];
+
+ /* Initialize all layers descriptors to have -1 = poc meaning unfilled */
+ for(i = 0; i < (ps_frm_ctxt->max_num_ref * ps_master_ctxt->i4_num_me_frm_pllel) + 1; i++)
+ {
+ /* only enocde layer is processed */
+ for(j = 0; j < 1; j++)
+ {
+ layer_ctxt_t *ps_layer;
+ ps_layer = ps_ctxt->as_ref_descr[i].aps_layers[j];
+ ps_layer->i4_poc = INVALID_POC;
+ ps_layer->i4_is_free = 1;
+ ps_layer->ppu1_list_inp = &ps_frm_ctxt->apu1_list_inp[j][0];
+ ps_layer->ppu1_list_rec_fxfy = &ps_frm_ctxt->apu1_list_rec_fxfy[j][0];
+ ps_layer->ppu1_list_rec_hxfy = &ps_frm_ctxt->apu1_list_rec_hxfy[j][0];
+ ps_layer->ppu1_list_rec_fxhy = &ps_frm_ctxt->apu1_list_rec_fxhy[j][0];
+ ps_layer->ppu1_list_rec_hxhy = &ps_frm_ctxt->apu1_list_rec_hxhy[j][0];
+ ps_layer->ppv_dep_mngr_recon = &ps_frm_ctxt->apv_list_dep_mngr[j][0];
+
+ memset(
+ ps_layer->s_global_mv,
+ 0,
+ sizeof(hme_mv_t) * ps_frm_ctxt->max_num_ref * NUM_GMV_LOBES);
+ }
+ }
+}
+
+/**
+*******************************************************************************
+* @fn S32 hme_enc_init(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms,rc_quant_t *ps_rc_quant_ctxt)
+*
+* @brief Initialises the Encode Layer HME ctxt
+*
+* @param[out] ps_memtabs : Pointer to an array of memtabs where module fills
+* up its requirements of memory
+*
+* @param[in] ps_prms : Input parameters to module crucial in calculating reqd
+* amt of memory
+*
+* @return Number of memtabs required
+*******************************************************************************
+*/
+S32 hme_enc_init(
+ void *pv_ctxt,
+ hme_memtab_t *ps_memtabs,
+ hme_init_prms_t *ps_prms,
+ rc_quant_t *ps_rc_quant_ctxt,
+ WORD32 i4_num_me_frm_pllel)
+{
+ S32 num, tot;
+ me_master_ctxt_t *ps_ctxt = (me_master_ctxt_t *)pv_ctxt;
+
+ tot = hme_enc_num_alloc(i4_num_me_frm_pllel);
+ /* Validation of init params */
+ if(-1 == hme_validate_init_prms(ps_prms))
+ return (-1);
+
+ num = hme_enc_alloc_init_mem(ps_memtabs, ps_prms, pv_ctxt, 1, i4_num_me_frm_pllel);
+ if(num > tot)
+ return (-1);
+
+ /* Initialize all enumerations based globals */
+ //hme_init_globals(); /* done as part of coarse me */
+
+ /* Copy the memtabs into the context for returning during free */
+ memcpy(ps_ctxt->as_memtabs, ps_memtabs, sizeof(hme_memtab_t) * tot);
+
+ /* initialize the context and related buffers */
+ hme_enc_init_ctxt(ps_ctxt, ps_prms, ps_rc_quant_ctxt);
+ return (0);
+}
+
+/**
+*******************************************************************************
+* @fn S32 hme_coarse_init(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
+*
+* @brief Initialises the Coarse HME ctxt
+*
+* @param[out] ps_memtabs : Pointer to an array of memtabs where module fills
+* up its requirements of memory
+*
+* @param[in] ps_prms : Input parameters to module crucial in calculating reqd
+* amt of memory
+*
+* @return Number of memtabs required
+*******************************************************************************
+*/
+S32 hme_coarse_init(void *pv_ctxt, hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
+{
+ S32 num, tot;
+ coarse_me_master_ctxt_t *ps_ctxt = (coarse_me_master_ctxt_t *)pv_ctxt;
+
+ tot = hme_coarse_num_alloc();
+ /* Validation of init params */
+ if(-1 == hme_validate_init_prms(ps_prms))
+ return (-1);
+
+ num = hme_coarse_alloc_init_mem(ps_memtabs, ps_prms, pv_ctxt, 1);
+ if(num > tot)
+ return (-1);
+
+ /* Initialize all enumerations based globals */
+ hme_init_globals();
+
+ /* Copy the memtabs into the context for returning during free */
+ memcpy(ps_ctxt->as_memtabs, ps_memtabs, sizeof(hme_memtab_t) * tot);
+
+ /* initialize the context and related buffers */
+ hme_coarse_init_ctxt(ps_ctxt, ps_prms);
+
+ return (0);
+}
+
+/**
+*******************************************************************************
+* @fn S32 hme_set_resolution(void *pv_me_ctxt,
+* S32 n_enc_layers,
+* S32 *p_wd,
+* S32 *p_ht
+*
+* @brief Sets up the layers based on resolution information.
+*
+* @param[in, out] pv_me_ctxt : ME handle, updated with the resolution info
+*
+* @param[in] n_enc_layers : Number of layers encoded
+*
+* @param[in] p_wd : Pointer to an array having widths for each encode layer
+*
+* @param[in] p_ht : Pointer to an array having heights for each encode layer
+*
+* @return void
+*******************************************************************************
+*/
+
+void hme_set_resolution(void *pv_me_ctxt, S32 n_enc_layers, S32 *p_wd, S32 *p_ht, S32 me_frm_id)
+{
+ S32 n_tot_layers, num_layers_explicit_search, i, j;
+ me_ctxt_t *ps_thrd_ctxt;
+ me_frm_ctxt_t *ps_ctxt;
+
+ S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
+ S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
+ memcpy(a_wd, p_wd, n_enc_layers * sizeof(S32));
+ memcpy(a_ht, p_ht, n_enc_layers * sizeof(S32));
+
+ ps_thrd_ctxt = (me_ctxt_t *)pv_me_ctxt;
+
+ ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[me_frm_id];
+
+ /*************************************************************************/
+ /* Derive the number of HME layers, including both encoded and non encode*/
+ /* This function also derives the width and ht of each layer. */
+ /*************************************************************************/
+ n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
+ num_layers_explicit_search = ps_thrd_ctxt->s_init_prms.num_layers_explicit_search;
+ if(num_layers_explicit_search <= 0)
+ num_layers_explicit_search = n_tot_layers - 1;
+
+ num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
+ ps_ctxt->num_layers_explicit_search = num_layers_explicit_search;
+ memset(ps_ctxt->u1_encode, 0, n_tot_layers);
+ memset(ps_ctxt->u1_encode, 1, n_enc_layers);
+
+ /* only encode layer should be processed */
+ ps_ctxt->num_layers = n_tot_layers;
+
+ ps_ctxt->i4_wd = a_wd[0];
+ ps_ctxt->i4_ht = a_ht[0];
+
+ /* Memtabs : Layers * num-ref + 1 */
+ for(i = 0; i < ps_ctxt->max_num_ref + 1; i++)
+ {
+ for(j = 0; j < 1; j++)
+ {
+ S32 wd, ht;
+ layer_ctxt_t *ps_layer;
+ U08 u1_enc = ps_ctxt->u1_encode[j];
+ wd = a_wd[j];
+ ht = a_ht[j];
+ ps_layer = ps_thrd_ctxt->as_ref_descr[i].aps_layers[j];
+ hme_set_layer_res_attrs(ps_layer, wd, ht, a_disp_wd[j], a_disp_ht[j], u1_enc);
+ }
+ }
+}
+
+/**
+*******************************************************************************
+* @fn S32 hme_coarse_set_resolution(void *pv_me_ctxt,
+* S32 n_enc_layers,
+* S32 *p_wd,
+* S32 *p_ht
+*
+* @brief Sets up the layers based on resolution information.
+*
+* @param[in, out] pv_me_ctxt : ME handle, updated with the resolution info
+*
+* @param[in] n_enc_layers : Number of layers encoded
+*
+* @param[in] p_wd : Pointer to an array having widths for each encode layer
+*
+* @param[in] p_ht : Pointer to an array having heights for each encode layer
+*
+* @return void
+*******************************************************************************
+*/
+
+void hme_coarse_set_resolution(void *pv_me_ctxt, S32 n_enc_layers, S32 *p_wd, S32 *p_ht)
+{
+ S32 n_tot_layers, num_layers_explicit_search, i, j;
+ coarse_me_ctxt_t *ps_ctxt;
+ S32 a_wd[MAX_NUM_LAYERS], a_ht[MAX_NUM_LAYERS];
+ S32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
+ memcpy(a_wd, p_wd, n_enc_layers * sizeof(S32));
+ memcpy(a_ht, p_ht, n_enc_layers * sizeof(S32));
+
+ ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
+ /*************************************************************************/
+ /* Derive the number of HME layers, including both encoded and non encode*/
+ /* This function also derives the width and ht of each layer. */
+ /*************************************************************************/
+ n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
+ num_layers_explicit_search = ps_ctxt->s_init_prms.num_layers_explicit_search;
+ if(num_layers_explicit_search <= 0)
+ num_layers_explicit_search = n_tot_layers - 1;
+
+ num_layers_explicit_search = MIN(num_layers_explicit_search, n_tot_layers - 1);
+ ps_ctxt->num_layers_explicit_search = num_layers_explicit_search;
+ memset(ps_ctxt->u1_encode, 0, n_tot_layers);
+ memset(ps_ctxt->u1_encode, 1, n_enc_layers);
+
+ /* encode layer should be excluded */
+ ps_ctxt->num_layers = n_tot_layers;
+
+ memcpy(ps_ctxt->a_wd, a_wd, sizeof(S32) * n_tot_layers);
+ memcpy(ps_ctxt->a_ht, a_ht, sizeof(S32) * n_tot_layers);
+
+ /* Memtabs : Layers * num-ref + 1 */
+ for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
+ {
+ for(j = 1; j < n_tot_layers; j++)
+ {
+ S32 wd, ht;
+ layer_ctxt_t *ps_layer;
+ U08 u1_enc = ps_ctxt->u1_encode[j];
+ wd = a_wd[j];
+ ht = a_ht[j];
+ ps_layer = ps_ctxt->as_ref_descr[i].aps_layers[j];
+ hme_set_layer_res_attrs(ps_layer, wd, ht, a_disp_wd[j], a_disp_ht[j], u1_enc);
+ }
+ }
+}
+
+S32 hme_find_descr_idx(me_ctxt_t *ps_ctxt, S32 i4_poc, S32 i4_idr_gop_num, S32 i4_num_me_frm_pllel)
+{
+ S32 i;
+
+ for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
+ {
+ if(ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc == i4_poc &&
+ ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_idr_gop_num == i4_idr_gop_num)
+ return i;
+ }
+ /* Should not come here */
+ ASSERT(0);
+ return (-1);
+}
+
+S32 hme_coarse_find_descr_idx(coarse_me_ctxt_t *ps_ctxt, S32 i4_poc)
+{
+ S32 i;
+
+ for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
+ {
+ if(ps_ctxt->as_ref_descr[i].aps_layers[1]->i4_poc == i4_poc)
+ return i;
+ }
+ /* Should not come here */
+ ASSERT(0);
+ return (-1);
+}
+
+S32 hme_find_free_descr_idx(me_ctxt_t *ps_ctxt, S32 i4_num_me_frm_pllel)
+{
+ S32 i;
+
+ for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
+ {
+ if(ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_free == 1)
+ {
+ ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_free = 0;
+ return i;
+ }
+ }
+ /* Should not come here */
+ ASSERT(0);
+ return (-1);
+}
+
+S32 hme_coarse_find_free_descr_idx(void *pv_ctxt)
+{
+ S32 i;
+
+ coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_ctxt;
+
+ for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
+ {
+ if(ps_ctxt->as_ref_descr[i].aps_layers[1]->i4_poc == -1)
+ return i;
+ }
+ /* Should not come here */
+ ASSERT(0);
+ return (-1);
+}
+
+void hme_discard_frm(
+ void *pv_me_ctxt, S32 *p_pocs_to_remove, S32 i4_idr_gop_num, S32 i4_num_me_frm_pllel)
+{
+ me_ctxt_t *ps_ctxt = (me_ctxt_t *)pv_me_ctxt;
+ S32 count = 0, idx, i;
+ layers_descr_t *ps_descr;
+
+ /* Search for the id of the layer descriptor that has this poc */
+ while(p_pocs_to_remove[count] != INVALID_POC)
+ {
+ ASSERT(count == 0);
+ idx = hme_find_descr_idx(
+ ps_ctxt, p_pocs_to_remove[count], i4_idr_gop_num, i4_num_me_frm_pllel);
+ ps_descr = &ps_ctxt->as_ref_descr[idx];
+ /*********************************************************************/
+ /* Setting i4_is_free = 1 in all layers invalidates this layer ctxt */
+ /* Now this can be used for a fresh picture. */
+ /*********************************************************************/
+ for(i = 0; i < 1; i++)
+ {
+ ps_descr->aps_layers[i]->i4_is_free = 1;
+ }
+ count++;
+ }
+}
+
+void hme_coarse_discard_frm(void *pv_me_ctxt, S32 *p_pocs_to_remove)
+{
+ coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
+ S32 count = 0, idx, i;
+ layers_descr_t *ps_descr;
+
+ /* Search for the id of the layer descriptor that has this poc */
+ while(p_pocs_to_remove[count] != -1)
+ {
+ idx = hme_coarse_find_descr_idx(ps_ctxt, p_pocs_to_remove[count]);
+ ps_descr = &ps_ctxt->as_ref_descr[idx];
+ /*********************************************************************/
+ /* Setting poc = -1 in all layers invalidates this layer ctxt */
+ /* Now this can be used for a fresh picture. */
+ /*********************************************************************/
+ for(i = 1; i < ps_ctxt->num_layers; i++)
+ {
+ ps_descr->aps_layers[i]->i4_poc = -1;
+ }
+ count++;
+ }
+}
+
+void hme_update_layer_desc(
+ layers_descr_t *ps_layers_desc,
+ hme_ref_desc_t *ps_ref_desc,
+ S32 start_lyr_id,
+ S32 num_layers,
+ layers_descr_t *ps_curr_desc)
+{
+ layer_ctxt_t *ps_layer_ctxt, *ps_curr_layer;
+ S32 i;
+ for(i = start_lyr_id; i < num_layers; i++)
+ {
+ ps_layer_ctxt = ps_layers_desc->aps_layers[i];
+ ps_curr_layer = ps_curr_desc->aps_layers[i];
+
+ ps_layer_ctxt->i4_poc = ps_ref_desc->i4_poc;
+ ps_layer_ctxt->i4_idr_gop_num = ps_ref_desc->i4_GOP_num;
+
+ /* Copy the recon planes for the given reference pic at given layer */
+ ps_layer_ctxt->pu1_rec_fxfy = ps_ref_desc->as_ref_info[i].pu1_rec_fxfy;
+ ps_layer_ctxt->pu1_rec_hxfy = ps_ref_desc->as_ref_info[i].pu1_rec_hxfy;
+ ps_layer_ctxt->pu1_rec_fxhy = ps_ref_desc->as_ref_info[i].pu1_rec_fxhy;
+ ps_layer_ctxt->pu1_rec_hxhy = ps_ref_desc->as_ref_info[i].pu1_rec_hxhy;
+
+ /*********************************************************************/
+ /* reconstruction strides, offsets and padding info are copied for */
+ /* this reference pic. It is assumed that these will be same across */
+ /* pics, so even the current pic has this info updated, though the */
+ /* current pic still does not have valid recon pointers. */
+ /*********************************************************************/
+ ps_layer_ctxt->i4_rec_stride = ps_ref_desc->as_ref_info[i].luma_stride;
+ ps_layer_ctxt->i4_rec_offset = ps_ref_desc->as_ref_info[i].luma_offset;
+ ps_layer_ctxt->i4_pad_x_rec = ps_ref_desc->as_ref_info[i].u1_pad_x;
+ ps_layer_ctxt->i4_pad_y_rec = ps_ref_desc->as_ref_info[i].u1_pad_y;
+
+ ps_curr_layer->i4_rec_stride = ps_ref_desc->as_ref_info[i].luma_stride;
+ ps_curr_layer->i4_pad_x_rec = ps_ref_desc->as_ref_info[i].u1_pad_x;
+ ps_curr_layer->i4_pad_y_rec = ps_ref_desc->as_ref_info[i].u1_pad_y;
+ }
+}
+
+void hme_add_inp(void *pv_me_ctxt, hme_inp_desc_t *ps_inp_desc, S32 me_frm_id, S32 i4_thrd_id)
+{
+ layers_descr_t *ps_desc;
+ layer_ctxt_t *ps_layer_ctxt;
+ me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
+ me_ctxt_t *ps_thrd_ctxt;
+ me_frm_ctxt_t *ps_ctxt;
+
+ hme_inp_buf_attr_t *ps_attr;
+ S32 i4_poc, idx, i, i4_prev_poc;
+ S32 num_thrds, prev_me_frm_id;
+ S32 i4_idr_gop_num, i4_is_reference;
+
+ /* since same layer desc pointer is stored in all thread ctxt */
+ /* a free idx is obtained using 0th thread ctxt pointer */
+
+ ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[i4_thrd_id];
+
+ ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[me_frm_id];
+
+ /* Deriving the previous poc from previous frames context */
+ if(me_frm_id == 0)
+ prev_me_frm_id = (MAX_NUM_ME_PARALLEL - 1);
+ else
+ prev_me_frm_id = me_frm_id - 1;
+
+ i4_prev_poc = ps_thrd_ctxt->aps_me_frm_prms[prev_me_frm_id]->i4_curr_poc;
+
+ /* Obtain an empty layer descriptor */
+ idx = hme_find_free_descr_idx(ps_thrd_ctxt, ps_master_ctxt->i4_num_me_frm_pllel);
+ ps_desc = &ps_thrd_ctxt->as_ref_descr[idx];
+
+ /* initialise the parameters for all the threads */
+ for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
+ {
+ me_frm_ctxt_t *ps_tmp_frm_ctxt;
+
+ ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+ ps_tmp_frm_ctxt = ps_thrd_ctxt->aps_me_frm_prms[me_frm_id];
+
+ ps_tmp_frm_ctxt->ps_curr_descr = &ps_thrd_ctxt->as_ref_descr[idx];
+
+ /* Do the initialization for the first thread alone */
+ i4_poc = ps_inp_desc->i4_poc;
+ i4_idr_gop_num = ps_inp_desc->i4_idr_gop_num;
+ i4_is_reference = ps_inp_desc->i4_is_reference;
+ /*Update poc id of previously encoded frm and curr frm */
+ ps_tmp_frm_ctxt->i4_prev_poc = i4_prev_poc;
+ ps_tmp_frm_ctxt->i4_curr_poc = i4_poc;
+ }
+
+ /* since same layer desc pointer is stored in all thread ctxt */
+ /* following processing is done using 0th thread ctxt pointer */
+ ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[0];
+
+ /* only encode layer */
+ for(i = 0; i < 1; i++)
+ {
+ ps_layer_ctxt = ps_desc->aps_layers[i];
+ ps_attr = &ps_inp_desc->s_layer_desc[i];
+
+ ps_layer_ctxt->i4_poc = i4_poc;
+ ps_layer_ctxt->i4_idr_gop_num = i4_idr_gop_num;
+ ps_layer_ctxt->i4_is_reference = i4_is_reference;
+ ps_layer_ctxt->i4_non_ref_free = 0;
+
+ /* If this layer is encoded, copy input attributes */
+ if(ps_ctxt->u1_encode[i])
+ {
+ ps_layer_ctxt->pu1_inp = ps_attr->pu1_y;
+ ps_layer_ctxt->i4_inp_stride = ps_attr->luma_stride;
+ ps_layer_ctxt->i4_pad_x_inp = 0;
+ ps_layer_ctxt->i4_pad_y_inp = 0;
+ }
+ else
+ {
+ /* If not encoded, then ME owns the buffer.*/
+ S32 wd, dst_stride;
+
+ ASSERT(i != 0);
+
+ wd = ps_ctxt->i4_wd;
+
+ /* destination has padding on either side of 16 */
+ dst_stride = CEIL16((wd >> 1)) + 32 + 4;
+ ps_layer_ctxt->i4_inp_stride = dst_stride;
+ }
+ }
+
+ return;
+}
+
+void hme_coarse_add_inp(void *pv_me_ctxt, hme_inp_desc_t *ps_inp_desc, WORD32 i4_curr_idx)
+{
+ layers_descr_t *ps_desc;
+ layer_ctxt_t *ps_layer_ctxt;
+ coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
+ coarse_me_ctxt_t *ps_ctxt;
+ hme_inp_buf_attr_t *ps_attr;
+ S32 i4_poc, i;
+ S32 num_thrds;
+
+ /* since same layer desc pointer is stored in all thread ctxt */
+ /* a free idx is obtained using 0th thread ctxt pointer */
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
+
+ ps_desc = &ps_ctxt->as_ref_descr[i4_curr_idx];
+
+ /* initialise the parameters for all the threads */
+ for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
+ {
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+ ps_ctxt->ps_curr_descr = &ps_ctxt->as_ref_descr[i4_curr_idx];
+ i4_poc = ps_inp_desc->i4_poc;
+
+ /*Update poc id of previously encoded frm and curr frm */
+ ps_ctxt->i4_prev_poc = ps_ctxt->i4_curr_poc;
+ ps_ctxt->i4_curr_poc = i4_poc;
+ }
+
+ /* since same layer desc pointer is stored in all thread ctxt */
+ /* following processing is done using 0th thread ctxt pointer */
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
+
+ /* only non encode layer */
+ for(i = 1; i < ps_ctxt->num_layers; i++)
+ {
+ ps_layer_ctxt = ps_desc->aps_layers[i];
+ ps_attr = &ps_inp_desc->s_layer_desc[i];
+
+ ps_layer_ctxt->i4_poc = i4_poc;
+ /* If this layer is encoded, copy input attributes */
+ if(ps_ctxt->u1_encode[i])
+ {
+ ps_layer_ctxt->pu1_inp = ps_attr->pu1_y;
+ ps_layer_ctxt->i4_inp_stride = ps_attr->luma_stride;
+ ps_layer_ctxt->i4_pad_x_inp = 0;
+ ps_layer_ctxt->i4_pad_y_inp = 0;
+ }
+ else
+ {
+ /* If not encoded, then ME owns the buffer. */
+ /* decomp of lower layers happens on a seperate pass */
+ /* Coarse Me should export the pointers to the caller */
+ S32 wd, dst_stride;
+
+ ASSERT(i != 0);
+
+ wd = ps_ctxt->a_wd[i - 1];
+
+ /* destination has padding on either side of 16 */
+ dst_stride = CEIL16((wd >> 1)) + 32 + 4;
+ ps_layer_ctxt->i4_inp_stride = dst_stride;
+ }
+ }
+}
+
+static __inline U08 hme_determine_num_results_per_part(
+ U08 u1_layer_id, U08 u1_num_layers, ME_QUALITY_PRESETS_T e_quality_preset)
+{
+ U08 u1_num_results_per_part = MAX_RESULTS_PER_PART;
+
+ if((u1_layer_id == 0) && !!RESTRICT_NUM_PARTITION_LEVEL_L0ME_RESULTS_TO_1)
+ {
+ switch(e_quality_preset)
+ {
+ case ME_XTREME_SPEED_25:
+ case ME_XTREME_SPEED:
+ case ME_HIGH_SPEED:
+ case ME_MEDIUM_SPEED:
+ case ME_HIGH_QUALITY:
+ case ME_PRISTINE_QUALITY:
+ {
+ u1_num_results_per_part = 1;
+
+ break;
+ }
+ default:
+ {
+ u1_num_results_per_part = MAX_RESULTS_PER_PART;
+
+ break;
+ }
+ }
+ }
+ else if((u1_layer_id == 1) && !!RESTRICT_NUM_PARTITION_LEVEL_L1ME_RESULTS_TO_1)
+ {
+ switch(e_quality_preset)
+ {
+ case ME_XTREME_SPEED_25:
+ case ME_HIGH_QUALITY:
+ case ME_PRISTINE_QUALITY:
+ {
+ u1_num_results_per_part = 1;
+
+ break;
+ }
+ default:
+ {
+ u1_num_results_per_part = MAX_RESULTS_PER_PART;
+
+ break;
+ }
+ }
+ }
+ else if((u1_layer_id == 2) && (u1_num_layers > 3) && !!RESTRICT_NUM_PARTITION_LEVEL_L2ME_RESULTS_TO_1)
+ {
+ switch(e_quality_preset)
+ {
+ case ME_XTREME_SPEED_25:
+ case ME_XTREME_SPEED:
+ case ME_HIGH_SPEED:
+ case ME_MEDIUM_SPEED:
+ {
+ u1_num_results_per_part = 1;
+
+ break;
+ }
+ default:
+ {
+ u1_num_results_per_part = MAX_RESULTS_PER_PART;
+
+ break;
+ }
+ }
+ }
+
+ return u1_num_results_per_part;
+}
+
+static __inline void hme_max_search_cands_per_search_cand_loc_populator(
+ hme_frm_prms_t *ps_frm_prms,
+ U08 *pu1_num_fpel_search_cands,
+ U08 u1_layer_id,
+ ME_QUALITY_PRESETS_T e_quality_preset)
+{
+ if(0 == u1_layer_id)
+ {
+ S32 i;
+
+ for(i = 0; i < NUM_SEARCH_CAND_LOCATIONS; i++)
+ {
+ switch(e_quality_preset)
+ {
+#if RESTRICT_NUM_SEARCH_CANDS_PER_SEARCH_CAND_LOC
+ case ME_XTREME_SPEED_25:
+ case ME_XTREME_SPEED:
+ case ME_HIGH_SPEED:
+ case ME_MEDIUM_SPEED:
+ {
+ pu1_num_fpel_search_cands[i] = 1;
+
+ break;
+ }
+#endif
+ default:
+ {
+ pu1_num_fpel_search_cands[i] =
+ MAX(2,
+ MAX(ps_frm_prms->u1_num_active_ref_l0, ps_frm_prms->u1_num_active_ref_l1) *
+ ((COLOCATED == (SEARCH_CAND_LOCATIONS_T)i) + 1));
+
+ break;
+ }
+ }
+ }
+ }
+}
+
+static __inline U08
+ hme_determine_max_2nx2n_tu_recur_cands(U08 u1_layer_id, ME_QUALITY_PRESETS_T e_quality_preset)
+{
+ U08 u1_num_cands = 2;
+
+ if((u1_layer_id == 0) && !!RESTRICT_NUM_2NX2N_TU_RECUR_CANDS)
+ {
+ switch(e_quality_preset)
+ {
+ case ME_XTREME_SPEED_25:
+ case ME_XTREME_SPEED:
+ case ME_HIGH_SPEED:
+ case ME_MEDIUM_SPEED:
+ {
+ u1_num_cands = 1;
+
+ break;
+ }
+ default:
+ {
+ u1_num_cands = 2;
+
+ break;
+ }
+ }
+ }
+
+ return u1_num_cands;
+}
+
+static __inline U08
+ hme_determine_max_num_fpel_refine_centers(U08 u1_layer_id, ME_QUALITY_PRESETS_T e_quality_preset)
+{
+ U08 i;
+
+ U08 u1_num_centers = 0;
+
+ if(0 == u1_layer_id)
+ {
+ switch(e_quality_preset)
+ {
+ case ME_XTREME_SPEED_25:
+ {
+ for(i = 0; i < TOT_NUM_PARTS; i++)
+ {
+ u1_num_centers += gau1_num_best_results_XS25[i];
+ }
+
+ break;
+ }
+ case ME_XTREME_SPEED:
+ {
+ for(i = 0; i < TOT_NUM_PARTS; i++)
+ {
+ u1_num_centers += gau1_num_best_results_XS[i];
+ }
+
+ break;
+ }
+ case ME_HIGH_SPEED:
+ {
+ for(i = 0; i < TOT_NUM_PARTS; i++)
+ {
+ u1_num_centers += gau1_num_best_results_HS[i];
+ }
+
+ break;
+ }
+ case ME_MEDIUM_SPEED:
+ {
+ for(i = 0; i < TOT_NUM_PARTS; i++)
+ {
+ u1_num_centers += gau1_num_best_results_MS[i];
+ }
+
+ break;
+ }
+ case ME_HIGH_QUALITY:
+ {
+ for(i = 0; i < TOT_NUM_PARTS; i++)
+ {
+ u1_num_centers += gau1_num_best_results_HQ[i];
+ }
+
+ break;
+ }
+ case ME_PRISTINE_QUALITY:
+ {
+ for(i = 0; i < TOT_NUM_PARTS; i++)
+ {
+ u1_num_centers += gau1_num_best_results_PQ[i];
+ }
+
+ break;
+ }
+ }
+ }
+
+ return u1_num_centers;
+}
+
+static __inline U08 hme_determine_max_num_subpel_refine_centers(
+ U08 u1_layer_id, U08 u1_max_2Nx2N_subpel_cands, U08 u1_max_NxN_subpel_cands)
+{
+ U08 u1_num_centers = 0;
+
+ if(0 == u1_layer_id)
+ {
+ u1_num_centers += u1_max_2Nx2N_subpel_cands + 4 * u1_max_NxN_subpel_cands;
+ }
+
+ return u1_num_centers;
+}
+
+void hme_set_refine_prms(
+ void *pv_refine_prms,
+ U08 u1_encode,
+ S32 num_ref,
+ S32 layer_id,
+ S32 num_layers,
+ S32 num_layers_explicit_search,
+ S32 use_4x4,
+ hme_frm_prms_t *ps_frm_prms,
+ double **ppd_intra_costs,
+ me_coding_params_t *ps_me_coding_tools)
+{
+ refine_prms_t *ps_refine_prms = (refine_prms_t *)pv_refine_prms;
+
+ ps_refine_prms->i4_encode = u1_encode;
+ ps_refine_prms->bidir_enabled = ps_frm_prms->bidir_enabled;
+ ps_refine_prms->i4_layer_id = layer_id;
+ /*************************************************************************/
+ /* Refinement layers have two lambdas, one for closed loop, another for */
+ /* open loop. Non encode layers use only open loop lambda. */
+ /*************************************************************************/
+ ps_refine_prms->lambda_inp = ps_frm_prms->i4_ol_sad_lambda_qf;
+ ps_refine_prms->lambda_recon = ps_frm_prms->i4_cl_sad_lambda_qf;
+ ps_refine_prms->lambda_q_shift = ps_frm_prms->lambda_q_shift;
+ ps_refine_prms->lambda_inp =
+ ((float)ps_refine_prms->lambda_inp) * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f;
+ ps_refine_prms->lambda_recon =
+ ((float)ps_refine_prms->lambda_recon) * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f;
+
+ if((u1_encode) && (NULL != ppd_intra_costs))
+ {
+ ps_refine_prms->pd_intra_costs = ppd_intra_costs[layer_id];
+ }
+
+ /* Explicit or implicit depends on number of layers having eplicit search */
+ if((layer_id == 0) || (num_layers - layer_id > num_layers_explicit_search))
+ {
+ ps_refine_prms->explicit_ref = 0;
+ ps_refine_prms->i4_num_ref_fpel = MIN(2, num_ref);
+ }
+ else
+ {
+ ps_refine_prms->explicit_ref = 1;
+ ps_refine_prms->i4_num_ref_fpel = num_ref;
+ }
+
+ ps_refine_prms->e_search_complexity = SEARCH_CX_HIGH;
+
+ ps_refine_prms->i4_num_steps_hpel_refine = ps_me_coding_tools->i4_num_steps_hpel_refine;
+ ps_refine_prms->i4_num_steps_qpel_refine = ps_me_coding_tools->i4_num_steps_qpel_refine;
+
+ if(u1_encode)
+ {
+ ps_refine_prms->i4_num_mvbank_results = 1;
+ ps_refine_prms->i4_use_rec_in_fpel = 1;
+ ps_refine_prms->i4_num_steps_fpel_refine = 1;
+
+ if(ps_me_coding_tools->e_me_quality_presets == ME_PRISTINE_QUALITY)
+ {
+ ps_refine_prms->i4_num_fpel_results = 4;
+ ps_refine_prms->i4_num_32x32_merge_results = 4;
+ ps_refine_prms->i4_num_64x64_merge_results = 4;
+ ps_refine_prms->i4_num_steps_post_refine_fpel = 3;
+ ps_refine_prms->i4_use_satd_subpel = 1;
+ ps_refine_prms->u1_max_subpel_candts_2Nx2N = 2;
+ ps_refine_prms->u1_max_subpel_candts_NxN = 1;
+ ps_refine_prms->u1_subpel_candt_threshold = 1;
+ ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
+ ps_refine_prms->pu1_num_best_results = gau1_num_best_results_PQ;
+ ps_refine_prms->limit_active_partitions = 0;
+ }
+ else if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_QUALITY)
+ {
+ ps_refine_prms->i4_num_fpel_results = 4;
+ ps_refine_prms->i4_num_32x32_merge_results = 4;
+ ps_refine_prms->i4_num_64x64_merge_results = 4;
+ ps_refine_prms->i4_num_steps_post_refine_fpel = 3;
+ ps_refine_prms->i4_use_satd_subpel = 1;
+ ps_refine_prms->u1_max_subpel_candts_2Nx2N = 2;
+ ps_refine_prms->u1_max_subpel_candts_NxN = 1;
+ ps_refine_prms->u1_subpel_candt_threshold = 2;
+ ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
+ ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HQ;
+ ps_refine_prms->limit_active_partitions = 0;
+ }
+ else if(ps_me_coding_tools->e_me_quality_presets == ME_MEDIUM_SPEED)
+ {
+ ps_refine_prms->i4_num_fpel_results = 1;
+ ps_refine_prms->i4_num_32x32_merge_results = 2;
+ ps_refine_prms->i4_num_64x64_merge_results = 2;
+ ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
+ ps_refine_prms->i4_use_satd_subpel = 1;
+ ps_refine_prms->u1_max_subpel_candts_2Nx2N = 2;
+ ps_refine_prms->u1_max_subpel_candts_NxN = 1;
+ ps_refine_prms->u1_subpel_candt_threshold = 3;
+ ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
+ ps_refine_prms->pu1_num_best_results = gau1_num_best_results_MS;
+ ps_refine_prms->limit_active_partitions = 1;
+ }
+ else if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_SPEED)
+ {
+ ps_refine_prms->i4_num_fpel_results = 1;
+ ps_refine_prms->i4_num_32x32_merge_results = 2;
+ ps_refine_prms->i4_num_64x64_merge_results = 2;
+ ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
+ ps_refine_prms->u1_max_subpel_candts_2Nx2N = 1;
+ ps_refine_prms->u1_max_subpel_candts_NxN = 1;
+ ps_refine_prms->i4_use_satd_subpel = 0;
+ ps_refine_prms->u1_subpel_candt_threshold = 0;
+ ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
+ ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HS;
+ ps_refine_prms->limit_active_partitions = 1;
+ }
+ else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED)
+ {
+ ps_refine_prms->i4_num_fpel_results = 1;
+ ps_refine_prms->i4_num_32x32_merge_results = 2;
+ ps_refine_prms->i4_num_64x64_merge_results = 2;
+ ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
+ ps_refine_prms->i4_use_satd_subpel = 0;
+ ps_refine_prms->u1_max_subpel_candts_2Nx2N = 1;
+ ps_refine_prms->u1_max_subpel_candts_NxN = 0;
+ ps_refine_prms->u1_subpel_candt_threshold = 0;
+ ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
+ ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS;
+ ps_refine_prms->limit_active_partitions = 1;
+ }
+ else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED_25)
+ {
+ ps_refine_prms->i4_num_fpel_results = 1;
+ ps_refine_prms->i4_num_32x32_merge_results = 2;
+ ps_refine_prms->i4_num_64x64_merge_results = 2;
+ ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
+ ps_refine_prms->i4_use_satd_subpel = 0;
+ ps_refine_prms->u1_max_subpel_candts_2Nx2N = 1;
+ ps_refine_prms->u1_max_subpel_candts_NxN = 0;
+ ps_refine_prms->u1_subpel_candt_threshold = 0;
+ ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
+ ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS25;
+ ps_refine_prms->limit_active_partitions = 1;
+ }
+ }
+ else
+ {
+ ps_refine_prms->i4_num_fpel_results = 2;
+ ps_refine_prms->i4_use_rec_in_fpel = 0;
+ ps_refine_prms->i4_num_steps_fpel_refine = 1;
+ ps_refine_prms->i4_num_steps_hpel_refine = 0;
+ ps_refine_prms->i4_num_steps_qpel_refine = 0;
+
+ if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_SPEED)
+ {
+ ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
+ ps_refine_prms->i4_use_satd_subpel = 1;
+ ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
+ ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HS;
+ }
+ else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED)
+ {
+ ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
+ ps_refine_prms->i4_use_satd_subpel = 0;
+ ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
+ ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS;
+ }
+ else if(ps_me_coding_tools->e_me_quality_presets == ME_XTREME_SPEED_25)
+ {
+ ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
+ ps_refine_prms->i4_use_satd_subpel = 0;
+ ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
+ ps_refine_prms->pu1_num_best_results = gau1_num_best_results_XS25;
+ }
+ else if(ps_me_coding_tools->e_me_quality_presets == ME_PRISTINE_QUALITY)
+ {
+ ps_refine_prms->i4_num_steps_post_refine_fpel = 2;
+ ps_refine_prms->i4_use_satd_subpel = 1;
+ ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
+ ps_refine_prms->pu1_num_best_results = gau1_num_best_results_PQ;
+ }
+ else if(ps_me_coding_tools->e_me_quality_presets == ME_HIGH_QUALITY)
+ {
+ ps_refine_prms->i4_num_steps_post_refine_fpel = 2;
+ ps_refine_prms->i4_use_satd_subpel = 1;
+ ps_refine_prms->e_search_complexity = SEARCH_CX_MED;
+ ps_refine_prms->pu1_num_best_results = gau1_num_best_results_HQ;
+ }
+ else if(ps_me_coding_tools->e_me_quality_presets == ME_MEDIUM_SPEED)
+ {
+ ps_refine_prms->i4_num_steps_post_refine_fpel = 0;
+ ps_refine_prms->i4_use_satd_subpel = 1;
+ ps_refine_prms->e_search_complexity = SEARCH_CX_LOW;
+ ps_refine_prms->pu1_num_best_results = gau1_num_best_results_MS;
+ }
+
+ /* Following fields unused in the non-encode layers */
+ /* But setting the same to default values */
+ ps_refine_prms->i4_num_32x32_merge_results = 4;
+ ps_refine_prms->i4_num_64x64_merge_results = 4;
+
+ if(!ps_frm_prms->bidir_enabled)
+ {
+ ps_refine_prms->limit_active_partitions = 0;
+ }
+ else
+ {
+ ps_refine_prms->limit_active_partitions = 1;
+ }
+ }
+
+ ps_refine_prms->i4_enable_4x4_part =
+ hme_get_mv_blk_size(use_4x4, layer_id, num_layers, u1_encode);
+
+ if(!ps_me_coding_tools->u1_l0_me_controlled_via_cmd_line)
+ {
+ ps_refine_prms->i4_num_results_per_part = hme_determine_num_results_per_part(
+ layer_id, num_layers, ps_me_coding_tools->e_me_quality_presets);
+
+ hme_max_search_cands_per_search_cand_loc_populator(
+ ps_frm_prms,
+ ps_refine_prms->au1_num_fpel_search_cands,
+ layer_id,
+ ps_me_coding_tools->e_me_quality_presets);
+
+ ps_refine_prms->u1_max_2nx2n_tu_recur_cands = hme_determine_max_2nx2n_tu_recur_cands(
+ layer_id, ps_me_coding_tools->e_me_quality_presets);
+
+ ps_refine_prms->u1_max_num_fpel_refine_centers = hme_determine_max_num_fpel_refine_centers(
+ layer_id, ps_me_coding_tools->e_me_quality_presets);
+
+ ps_refine_prms->u1_max_num_subpel_refine_centers =
+ hme_determine_max_num_subpel_refine_centers(
+ layer_id,
+ ps_refine_prms->u1_max_subpel_candts_2Nx2N,
+ ps_refine_prms->u1_max_subpel_candts_NxN);
+ }
+ else
+ {
+ if(0 == layer_id)
+ {
+ ps_refine_prms->i4_num_results_per_part =
+ ps_me_coding_tools->u1_num_results_per_part_in_l0me;
+ }
+ else if(1 == layer_id)
+ {
+ ps_refine_prms->i4_num_results_per_part =
+ ps_me_coding_tools->u1_num_results_per_part_in_l1me;
+ }
+ else if((2 == layer_id) && (num_layers > 3))
+ {
+ ps_refine_prms->i4_num_results_per_part =
+ ps_me_coding_tools->u1_num_results_per_part_in_l2me;
+ }
+ else
+ {
+ ps_refine_prms->i4_num_results_per_part = hme_determine_num_results_per_part(
+ layer_id, num_layers, ps_me_coding_tools->e_me_quality_presets);
+ }
+
+ memset(
+ ps_refine_prms->au1_num_fpel_search_cands,
+ ps_me_coding_tools->u1_max_num_coloc_cands,
+ sizeof(ps_refine_prms->au1_num_fpel_search_cands));
+
+ ps_refine_prms->u1_max_2nx2n_tu_recur_cands =
+ ps_me_coding_tools->u1_max_2nx2n_tu_recur_cands;
+
+ ps_refine_prms->u1_max_num_fpel_refine_centers =
+ ps_me_coding_tools->u1_max_num_fpel_refine_centers;
+
+ ps_refine_prms->u1_max_num_subpel_refine_centers =
+ ps_me_coding_tools->u1_max_num_subpel_refine_centers;
+ }
+
+ if(layer_id != 0)
+ {
+ ps_refine_prms->i4_num_mvbank_results = ps_refine_prms->i4_num_results_per_part;
+ }
+
+ /* 4 * lambda */
+ ps_refine_prms->sdi_threshold =
+ (ps_refine_prms->lambda_recon + (1 << (ps_frm_prms->lambda_q_shift - 1))) >>
+ (ps_frm_prms->lambda_q_shift - 2);
+
+ ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb =
+ MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON && ps_frm_prms->u1_is_cu_qp_delta_enabled;
+}
+
+void hme_set_ctb_boundary_attrs(ctb_boundary_attrs_t *ps_attrs, S32 num_8x8_horz, S32 num_8x8_vert)
+{
+ S32 cu_16x16_valid_flag = 0, merge_pattern_x, merge_pattern_y;
+ S32 blk, blk_x, blk_y;
+ S32 num_16x16_horz, num_16x16_vert;
+ blk_ctb_attrs_t *ps_blk_attrs = &ps_attrs->as_blk_attrs[0];
+
+ num_16x16_horz = (num_8x8_horz + 1) >> 1;
+ num_16x16_vert = (num_8x8_vert + 1) >> 1;
+ ps_attrs->u1_num_blks_in_ctb = (U08)(num_16x16_horz * num_16x16_vert);
+
+ /*************************************************************************/
+ /* Run through each blk assuming all 16x16 CUs valid. The order would be */
+ /* 0 1 4 5 */
+ /* 2 3 6 7 */
+ /* 8 9 12 13 */
+ /* 10 11 14 15 */
+ /* Out of these some may not be valid. For example, if num_16x16_horz is */
+ /* 2 and num_16x16_vert is 4, then right 2 columns not valid. In this */
+ /* case, blks 8-11 get encoding number of 4-7. Further, the variable */
+ /* cu_16x16_valid_flag will be 1111 0000 1111 0000. Also, the variable */
+ /* u1_merge_to_32x32_flag will be 1010, and u1_merge_to_64x64_flag 0 */
+ /*************************************************************************/
+ for(blk = 0; blk < 16; blk++)
+ {
+ U08 u1_blk_8x8_mask = 0xF;
+ blk_x = gau1_encode_to_raster_x[blk];
+ blk_y = gau1_encode_to_raster_y[blk];
+ if((blk_x >= num_16x16_horz) || (blk_y >= num_16x16_vert))
+ {
+ continue;
+ }
+
+ /* The CU at encode location blk is valid */
+ cu_16x16_valid_flag |= (1 << blk);
+ ps_blk_attrs->u1_blk_id_in_full_ctb = blk;
+ ps_blk_attrs->u1_blk_x = blk_x;
+ ps_blk_attrs->u1_blk_y = blk_y;
+
+ /* Disable blks 1 and 3 if the 16x16 blk overshoots on rt border */
+ if(((blk_x << 1) + 2) > num_8x8_horz)
+ u1_blk_8x8_mask &= 0x5;
+ /* Disable blks 2 and 3 if the 16x16 blk overshoots on bot border */
+ if(((blk_y << 1) + 2) > num_8x8_vert)
+ u1_blk_8x8_mask &= 0x3;
+ ps_blk_attrs->u1_blk_8x8_mask = u1_blk_8x8_mask;
+ ps_blk_attrs++;
+ }
+
+ ps_attrs->cu_16x16_valid_flag = cu_16x16_valid_flag;
+
+ /* 32x32 merge is logical combination of what merge is possible */
+ /* horizontally as well as vertically. */
+ if(num_8x8_horz < 4)
+ merge_pattern_x = 0x0;
+ else if(num_8x8_horz < 8)
+ merge_pattern_x = 0x5;
+ else
+ merge_pattern_x = 0xF;
+
+ if(num_8x8_vert < 4)
+ merge_pattern_y = 0x0;
+ else if(num_8x8_vert < 8)
+ merge_pattern_y = 0x3;
+ else
+ merge_pattern_y = 0xF;
+
+ ps_attrs->u1_merge_to_32x32_flag = (U08)(merge_pattern_x & merge_pattern_y);
+
+ /* Do not attempt 64x64 merge if any blk invalid */
+ if(ps_attrs->u1_merge_to_32x32_flag != 0xF)
+ ps_attrs->u1_merge_to_64x64_flag = 0;
+ else
+ ps_attrs->u1_merge_to_64x64_flag = 1;
+}
+
+void hme_set_ctb_attrs(ctb_boundary_attrs_t *ps_attrs, S32 wd, S32 ht)
+{
+ S32 is_cropped_rt, is_cropped_bot;
+
+ is_cropped_rt = ((wd & 63) != 0) ? 1 : 0;
+ is_cropped_bot = ((ht & 63) != 0) ? 1 : 0;
+
+ if(is_cropped_rt)
+ {
+ hme_set_ctb_boundary_attrs(&ps_attrs[CTB_RT_PIC_BOUNDARY], (wd & 63) >> 3, 8);
+ }
+ if(is_cropped_bot)
+ {
+ hme_set_ctb_boundary_attrs(&ps_attrs[CTB_BOT_PIC_BOUNDARY], 8, (ht & 63) >> 3);
+ }
+ if(is_cropped_rt & is_cropped_bot)
+ {
+ hme_set_ctb_boundary_attrs(
+ &ps_attrs[CTB_BOT_RT_PIC_BOUNDARY], (wd & 63) >> 3, (ht & 63) >> 3);
+ }
+ hme_set_ctb_boundary_attrs(&ps_attrs[CTB_CENTRE], 8, 8);
+}
+
+/**
+********************************************************************************
+* @fn hme_scale_for_ref_idx(S32 curr_poc, S32 poc_from, S32 poc_to)
+*
+* @brief When we have an mv with ref id "poc_to" for which predictor to be
+* computed, and predictor is ref id "poc_from", this funciton returns
+* scale factor in Q8 for such a purpose
+*
+* @param[in] curr_poc : input picture poc
+*
+* @param[in] poc_from : POC of the pic, pointed to by ref id to be scaled
+*
+* @param[in] poc_to : POC of hte pic, pointed to by ref id to be scaled to
+*
+* @return Scale factor in Q8 format
+********************************************************************************
+*/
+S16 hme_scale_for_ref_idx(S32 curr_poc, S32 poc_from, S32 poc_to)
+{
+ S32 td, tx, tb;
+ S16 i2_scf;
+ /*************************************************************************/
+ /* Approximate scale factor: 256 * num / denom */
+ /* num = curr_poc - poc_to, denom = curr_poc - poc_from */
+ /* Exact implementation as per standard. */
+ /*************************************************************************/
+
+ tb = HME_CLIP((curr_poc - poc_to), -128, 127);
+ td = HME_CLIP((curr_poc - poc_from), -128, 127);
+
+ tx = (16384 + (ABS(td) >> 1)) / td;
+ //i2_scf = HME_CLIP((((tb*tx)+32)>>6), -128, 127);
+ i2_scf = HME_CLIP((((tb * tx) + 32) >> 6), -4096, 4095);
+
+ return (i2_scf);
+}
+
+/**
+********************************************************************************
+* @fn hme_process_frm_init
+*
+* @brief HME frame level initialsation processing function
+*
+* @param[in] pv_me_ctxt : ME ctxt pointer
+*
+* @param[in] ps_ref_map : Reference map prms pointer
+*
+* @param[in] ps_frm_prms :Pointer to frame params
+*
+* called only for encode layer
+*
+* @return Scale factor in Q8 format
+********************************************************************************
+*/
+void hme_process_frm_init(
+ void *pv_me_ctxt,
+ hme_ref_map_t *ps_ref_map,
+ hme_frm_prms_t *ps_frm_prms,
+ WORD32 i4_me_frm_id,
+ WORD32 i4_num_me_frm_pllel)
+{
+ me_ctxt_t *ps_thrd_ctxt = (me_ctxt_t *)pv_me_ctxt;
+ me_frm_ctxt_t *ps_ctxt = (me_frm_ctxt_t *)ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
+
+ S32 i, j, desc_idx;
+ S16 i2_max_x = 0, i2_max_y = 0;
+
+ /* Set the Qp of current frm passed by caller. Required for intra cost */
+ ps_ctxt->frm_qstep = ps_frm_prms->qstep;
+ ps_ctxt->qstep_ls8 = ps_frm_prms->qstep_ls8;
+
+ /* Bidir enabled or not */
+ ps_ctxt->s_frm_prms = *ps_frm_prms;
+
+ /*************************************************************************/
+ /* Set up the ref pic parameters across all layers. For this, we do the */
+ /* following: the application has given us a ref pic list, we go index */
+ /* by index and pick up the picture. A picture can be uniquely be mapped */
+ /* to a POC. So we search all layer descriptor array to find the POC */
+ /* Once found, we update all attributes in this descriptor. */
+ /* During this updation process we also create an index of descriptor id */
+ /* to ref id mapping. It is important to find the same POC in the layers */
+ /* descr strcture since it holds the pyramid inputs for non encode layers*/
+ /* Apart from this, e also update array containing the index of the descr*/
+ /* During processing for ease of access, each layer has a pointer to aray*/
+ /* of pointers containing fxfy, fxhy, hxfy, hxhy and inputs for each ref */
+ /* we update this too. */
+ /*************************************************************************/
+ ps_ctxt->num_ref_past = 0;
+ ps_ctxt->num_ref_future = 0;
+ for(i = 0; i < ps_ref_map->i4_num_ref; i++)
+ {
+ S32 ref_id_lc, idx;
+ hme_ref_desc_t *ps_ref_desc;
+
+ ps_ref_desc = &ps_ref_map->as_ref_desc[i];
+ ref_id_lc = ps_ref_desc->i1_ref_id_lc;
+ /* Obtain the id of descriptor that contains this POC */
+ idx = hme_find_descr_idx(
+ ps_thrd_ctxt, ps_ref_desc->i4_poc, ps_ref_desc->i4_GOP_num, i4_num_me_frm_pllel);
+
+ /* Update all layers in this descr with the reference attributes */
+ hme_update_layer_desc(
+ &ps_thrd_ctxt->as_ref_descr[idx],
+ ps_ref_desc,
+ 0,
+ 1, //ps_ctxt->num_layers,
+ ps_ctxt->ps_curr_descr);
+
+ /* Update the pointer holder for the recon planes */
+ ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_inp = &ps_ctxt->apu1_list_inp[0][0];
+ ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_fxfy =
+ &ps_ctxt->apu1_list_rec_fxfy[0][0];
+ ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_hxfy =
+ &ps_ctxt->apu1_list_rec_hxfy[0][0];
+ ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_fxhy =
+ &ps_ctxt->apu1_list_rec_fxhy[0][0];
+ ps_ctxt->ps_curr_descr->aps_layers[0]->ppu1_list_rec_hxhy =
+ &ps_ctxt->apu1_list_rec_hxhy[0][0];
+ ps_ctxt->ps_curr_descr->aps_layers[0]->ppv_dep_mngr_recon =
+ &ps_ctxt->apv_list_dep_mngr[0][0];
+
+ /* Update the array having ref id lc to descr id mapping */
+ ps_ctxt->a_ref_to_descr_id[ps_ref_desc->i1_ref_id_lc] = idx;
+
+ /* From ref id lc we need to work out the POC, So update this array */
+ ps_ctxt->ai4_ref_idx_to_poc_lc[ref_id_lc] = ps_ref_desc->i4_poc;
+
+ /* When computing costs in L0 and L1 directions, we need the */
+ /* respective ref id L0 and L1, so update this mapping */
+ ps_ctxt->a_ref_idx_lc_to_l0[ref_id_lc] = ps_ref_desc->i1_ref_id_l0;
+ ps_ctxt->a_ref_idx_lc_to_l1[ref_id_lc] = ps_ref_desc->i1_ref_id_l1;
+ if((ps_ctxt->i4_curr_poc > ps_ref_desc->i4_poc) || ps_ctxt->i4_curr_poc == 0)
+ {
+ ps_ctxt->au1_is_past[ref_id_lc] = 1;
+ ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = ref_id_lc;
+ ps_ctxt->num_ref_past++;
+ }
+ else
+ {
+ ps_ctxt->au1_is_past[ref_id_lc] = 0;
+ ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = ref_id_lc;
+ ps_ctxt->num_ref_future++;
+ }
+
+ if(1 == ps_ctxt->i4_wt_pred_enable_flag)
+ {
+ /* copy the weight and offsets from current ref desc */
+ ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = ps_ref_desc->i2_weight;
+
+ /* inv weight is stored in Q15 format */
+ ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
+ ((1 << 15) + (ps_ref_desc->i2_weight >> 1)) / ps_ref_desc->i2_weight;
+ ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = ps_ref_desc->i2_offset;
+ }
+ else
+ {
+ /* store default wt and offset*/
+ ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = WGHT_DEFAULT;
+
+ /* inv weight is stored in Q15 format */
+ ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
+ ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
+
+ ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = 0;
+ }
+ }
+
+ ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = -1;
+ ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = -1;
+
+ /*************************************************************************/
+ /* Preparation of the TLU for bits for reference indices. */
+ /* Special case is that of numref = 2. (TEV) */
+ /* Other cases uses UEV */
+ /*************************************************************************/
+ for(i = 0; i < MAX_NUM_REF; i++)
+ {
+ ps_ctxt->au1_ref_bits_tlu_lc[0][i] = 0;
+ ps_ctxt->au1_ref_bits_tlu_lc[1][i] = 0;
+ }
+
+ if(ps_ref_map->i4_num_ref == 2)
+ {
+ ps_ctxt->au1_ref_bits_tlu_lc[0][0] = 1;
+ ps_ctxt->au1_ref_bits_tlu_lc[1][0] = 1;
+ ps_ctxt->au1_ref_bits_tlu_lc[0][1] = 1;
+ ps_ctxt->au1_ref_bits_tlu_lc[1][1] = 1;
+ }
+ else if(ps_ref_map->i4_num_ref > 2)
+ {
+ for(i = 0; i < ps_ref_map->i4_num_ref; i++)
+ {
+ S32 l0, l1;
+ l0 = ps_ctxt->a_ref_idx_lc_to_l0[i];
+ l1 = ps_ctxt->a_ref_idx_lc_to_l1[i];
+ ps_ctxt->au1_ref_bits_tlu_lc[0][i] = gau1_ref_bits[l0];
+ ps_ctxt->au1_ref_bits_tlu_lc[1][i] = gau1_ref_bits[l1];
+ }
+ }
+
+ /*************************************************************************/
+ /* Preparation of the scaling factors for reference indices. The scale */
+ /* factor depends on distance of the two ref indices from current input */
+ /* in terms of poc delta. */
+ /*************************************************************************/
+ for(i = 0; i < ps_ref_map->i4_num_ref; i++)
+ {
+ for(j = 0; j < ps_ref_map->i4_num_ref; j++)
+ {
+ S16 i2_scf_q8;
+ S32 poc_from, poc_to;
+
+ poc_from = ps_ctxt->ai4_ref_idx_to_poc_lc[j];
+ poc_to = ps_ctxt->ai4_ref_idx_to_poc_lc[i];
+
+ i2_scf_q8 = hme_scale_for_ref_idx(ps_ctxt->i4_curr_poc, poc_from, poc_to);
+ ps_ctxt->ai2_ref_scf[j + i * MAX_NUM_REF] = i2_scf_q8;
+ }
+ }
+
+ /*************************************************************************/
+ /* We store simplified look ups for 4 hpel planes and inp y plane for */
+ /* every layer and for every ref id in the layer. So update these lookups*/
+ /*************************************************************************/
+ for(i = 0; i < 1; i++)
+ {
+ U08 **ppu1_rec_fxfy, **ppu1_rec_hxfy, **ppu1_rec_fxhy, **ppu1_rec_hxhy;
+ U08 **ppu1_inp;
+ void **ppvlist_dep_mngr;
+ layer_ctxt_t *ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i];
+
+ ppvlist_dep_mngr = &ps_ctxt->apv_list_dep_mngr[i][0];
+ ppu1_rec_fxfy = &ps_ctxt->apu1_list_rec_fxfy[i][0];
+ ppu1_rec_hxfy = &ps_ctxt->apu1_list_rec_hxfy[i][0];
+ ppu1_rec_fxhy = &ps_ctxt->apu1_list_rec_fxhy[i][0];
+ ppu1_rec_hxhy = &ps_ctxt->apu1_list_rec_hxhy[i][0];
+ ppu1_inp = &ps_ctxt->apu1_list_inp[i][0];
+ for(j = 0; j < ps_ref_map->i4_num_ref; j++)
+ {
+ hme_ref_desc_t *ps_ref_desc;
+ hme_ref_buf_info_t *ps_buf_info;
+ layer_ctxt_t *ps_layer;
+ S32 ref_id_lc;
+
+ ps_ref_desc = &ps_ref_map->as_ref_desc[j];
+ ps_buf_info = &ps_ref_desc->as_ref_info[i];
+ ref_id_lc = ps_ref_desc->i1_ref_id_lc;
+
+ desc_idx = ps_ctxt->a_ref_to_descr_id[ref_id_lc];
+ ps_layer = ps_thrd_ctxt->as_ref_descr[desc_idx].aps_layers[i];
+
+ ppu1_inp[j] = ps_buf_info->pu1_ref_src;
+ ppu1_rec_fxfy[j] = ps_buf_info->pu1_rec_fxfy;
+ ppu1_rec_hxfy[j] = ps_buf_info->pu1_rec_hxfy;
+ ppu1_rec_fxhy[j] = ps_buf_info->pu1_rec_fxhy;
+ ppu1_rec_hxhy[j] = ps_buf_info->pu1_rec_hxhy;
+ ppvlist_dep_mngr[j] = ps_buf_info->pv_dep_mngr;
+
+ /* Update the curr descriptors reference pointers here */
+ ps_layer_ctxt->ppu1_list_inp[j] = ps_buf_info->pu1_ref_src;
+ ps_layer_ctxt->ppu1_list_rec_fxfy[j] = ps_buf_info->pu1_rec_fxfy;
+ ps_layer_ctxt->ppu1_list_rec_hxfy[j] = ps_buf_info->pu1_rec_hxfy;
+ ps_layer_ctxt->ppu1_list_rec_fxhy[j] = ps_buf_info->pu1_rec_fxhy;
+ ps_layer_ctxt->ppu1_list_rec_hxhy[j] = ps_buf_info->pu1_rec_hxhy;
+ }
+ }
+ /*************************************************************************/
+ /* The mv range for each layer is computed. For dyadic layers it will */
+ /* keep shrinking by 2, for non dyadic it will shrink by ratio of wd and */
+ /* ht. In general formula used is scale by ratio of wd for x and ht for y*/
+ /*************************************************************************/
+ for(i = 0; i < 1; i++)
+ {
+ layer_ctxt_t *ps_layer_ctxt;
+ if(i == 0)
+ {
+ i2_max_x = ps_frm_prms->i2_mv_range_x;
+ i2_max_y = ps_frm_prms->i2_mv_range_y;
+ }
+ else
+ {
+ i2_max_x = (S16)FLOOR8(((i2_max_x * ps_ctxt->i4_wd) / ps_ctxt->i4_wd));
+ i2_max_y = (S16)FLOOR8(((i2_max_y * ps_ctxt->i4_ht) / ps_ctxt->i4_ht));
+ }
+ ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i];
+ ps_layer_ctxt->i2_max_mv_x = i2_max_x;
+ ps_layer_ctxt->i2_max_mv_y = i2_max_y;
+
+ /*********************************************************************/
+ /* Every layer maintains a reference id lc to POC mapping. This is */
+ /* because the mapping is unique for every frm. Also, in next frm, */
+ /* we require colocated mvs which means scaling according to temporal*/
+ /*distance. Hence this mapping needs to be maintained in every */
+ /* layer ctxt */
+ /*********************************************************************/
+ memset(ps_layer_ctxt->ai4_ref_id_to_poc_lc, -1, sizeof(S32) * ps_ctxt->max_num_ref);
+ if(ps_ref_map->i4_num_ref)
+ {
+ memcpy(
+ ps_layer_ctxt->ai4_ref_id_to_poc_lc,
+ ps_ctxt->ai4_ref_idx_to_poc_lc,
+ ps_ref_map->i4_num_ref * sizeof(S32));
+ }
+ }
+
+ return;
+}
+
+/**
+********************************************************************************
+* @fn hme_coarse_process_frm_init
+*
+* @brief HME frame level initialsation processing function
+*
+* @param[in] pv_me_ctxt : ME ctxt pointer
+*
+* @param[in] ps_ref_map : Reference map prms pointer
+*
+* @param[in] ps_frm_prms :Pointer to frame params
+*
+* @return Scale factor in Q8 format
+********************************************************************************
+*/
+void hme_coarse_process_frm_init(
+ void *pv_me_ctxt, hme_ref_map_t *ps_ref_map, hme_frm_prms_t *ps_frm_prms)
+{
+ coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
+ S32 i, j, desc_idx;
+ S16 i2_max_x = 0, i2_max_y = 0;
+
+ /* Set the Qp of current frm passed by caller. Required for intra cost */
+ ps_ctxt->frm_qstep = ps_frm_prms->qstep;
+
+ /* Bidir enabled or not */
+ ps_ctxt->s_frm_prms = *ps_frm_prms;
+
+ /*************************************************************************/
+ /* Set up the ref pic parameters across all layers. For this, we do the */
+ /* following: the application has given us a ref pic list, we go index */
+ /* by index and pick up the picture. A picture can be uniquely be mapped */
+ /* to a POC. So we search all layer descriptor array to find the POC */
+ /* Once found, we update all attributes in this descriptor. */
+ /* During this updation process we also create an index of descriptor id */
+ /* to ref id mapping. It is important to find the same POC in the layers */
+ /* descr strcture since it holds the pyramid inputs for non encode layers*/
+ /* Apart from this, e also update array containing the index of the descr*/
+ /* During processing for ease of access, each layer has a pointer to aray*/
+ /* of pointers containing fxfy, fxhy, hxfy, hxhy and inputs for each ref */
+ /* we update this too. */
+ /*************************************************************************/
+ ps_ctxt->num_ref_past = 0;
+ ps_ctxt->num_ref_future = 0;
+ for(i = 0; i < ps_ref_map->i4_num_ref; i++)
+ {
+ S32 ref_id_lc, idx;
+ hme_ref_desc_t *ps_ref_desc;
+
+ ps_ref_desc = &ps_ref_map->as_ref_desc[i];
+ ref_id_lc = ps_ref_desc->i1_ref_id_lc;
+ /* Obtain the id of descriptor that contains this POC */
+ idx = hme_coarse_find_descr_idx(ps_ctxt, ps_ref_desc->i4_poc);
+
+ /* Update all layers in this descr with the reference attributes */
+ hme_update_layer_desc(
+ &ps_ctxt->as_ref_descr[idx],
+ ps_ref_desc,
+ 1,
+ ps_ctxt->num_layers - 1,
+ ps_ctxt->ps_curr_descr);
+
+ /* Update the array having ref id lc to descr id mapping */
+ ps_ctxt->a_ref_to_descr_id[ps_ref_desc->i1_ref_id_lc] = idx;
+
+ /* From ref id lc we need to work out the POC, So update this array */
+ ps_ctxt->ai4_ref_idx_to_poc_lc[ref_id_lc] = ps_ref_desc->i4_poc;
+
+ /* From ref id lc we need to work out the display num, So update this array */
+ ps_ctxt->ai4_ref_idx_to_disp_num[ref_id_lc] = ps_ref_desc->i4_display_num;
+
+ /* When computing costs in L0 and L1 directions, we need the */
+ /* respective ref id L0 and L1, so update this mapping */
+ ps_ctxt->a_ref_idx_lc_to_l0[ref_id_lc] = ps_ref_desc->i1_ref_id_l0;
+ ps_ctxt->a_ref_idx_lc_to_l1[ref_id_lc] = ps_ref_desc->i1_ref_id_l1;
+ if((ps_ctxt->i4_curr_poc > ps_ref_desc->i4_poc) || ps_ctxt->i4_curr_poc == 0)
+ {
+ ps_ctxt->au1_is_past[ref_id_lc] = 1;
+ ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = ref_id_lc;
+ ps_ctxt->num_ref_past++;
+ }
+ else
+ {
+ ps_ctxt->au1_is_past[ref_id_lc] = 0;
+ ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = ref_id_lc;
+ ps_ctxt->num_ref_future++;
+ }
+ if(1 == ps_ctxt->i4_wt_pred_enable_flag)
+ {
+ /* copy the weight and offsets from current ref desc */
+ ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = ps_ref_desc->i2_weight;
+
+ /* inv weight is stored in Q15 format */
+ ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
+ ((1 << 15) + (ps_ref_desc->i2_weight >> 1)) / ps_ref_desc->i2_weight;
+
+ ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = ps_ref_desc->i2_offset;
+ }
+ else
+ {
+ /* store default wt and offset*/
+ ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_lc] = WGHT_DEFAULT;
+
+ /* inv weight is stored in Q15 format */
+ ps_ctxt->s_wt_pred.a_inv_wpred_wt[ref_id_lc] =
+ ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
+
+ ps_ctxt->s_wt_pred.a_wpred_off[ref_id_lc] = 0;
+ }
+ }
+
+ ps_ctxt->ai1_future_list[ps_ctxt->num_ref_future] = -1;
+ ps_ctxt->ai1_past_list[ps_ctxt->num_ref_past] = -1;
+
+ /*************************************************************************/
+ /* Preparation of the TLU for bits for reference indices. */
+ /* Special case is that of numref = 2. (TEV) */
+ /* Other cases uses UEV */
+ /*************************************************************************/
+ for(i = 0; i < MAX_NUM_REF; i++)
+ {
+ ps_ctxt->au1_ref_bits_tlu_lc[0][i] = 0;
+ ps_ctxt->au1_ref_bits_tlu_lc[1][i] = 0;
+ }
+
+ if(ps_ref_map->i4_num_ref == 2)
+ {
+ ps_ctxt->au1_ref_bits_tlu_lc[0][0] = 1;
+ ps_ctxt->au1_ref_bits_tlu_lc[1][0] = 1;
+ ps_ctxt->au1_ref_bits_tlu_lc[0][1] = 1;
+ ps_ctxt->au1_ref_bits_tlu_lc[1][1] = 1;
+ }
+ else if(ps_ref_map->i4_num_ref > 2)
+ {
+ for(i = 0; i < ps_ref_map->i4_num_ref; i++)
+ {
+ S32 l0, l1;
+ l0 = ps_ctxt->a_ref_idx_lc_to_l0[i];
+ l1 = ps_ctxt->a_ref_idx_lc_to_l1[i];
+ ps_ctxt->au1_ref_bits_tlu_lc[0][i] = gau1_ref_bits[l0];
+ ps_ctxt->au1_ref_bits_tlu_lc[1][i] = gau1_ref_bits[l1];
+ }
+ }
+
+ /*************************************************************************/
+ /* Preparation of the scaling factors for reference indices. The scale */
+ /* factor depends on distance of the two ref indices from current input */
+ /* in terms of poc delta. */
+ /*************************************************************************/
+ for(i = 0; i < ps_ref_map->i4_num_ref; i++)
+ {
+ for(j = 0; j < ps_ref_map->i4_num_ref; j++)
+ {
+ S16 i2_scf_q8;
+ S32 poc_from, poc_to;
+
+ poc_from = ps_ctxt->ai4_ref_idx_to_poc_lc[j];
+ poc_to = ps_ctxt->ai4_ref_idx_to_poc_lc[i];
+
+ i2_scf_q8 = hme_scale_for_ref_idx(ps_ctxt->i4_curr_poc, poc_from, poc_to);
+ ps_ctxt->ai2_ref_scf[j + i * MAX_NUM_REF] = i2_scf_q8;
+ }
+ }
+
+ /*************************************************************************/
+ /* We store simplified look ups for inp y plane for */
+ /* every layer and for every ref id in the layer. */
+ /*************************************************************************/
+ for(i = 1; i < ps_ctxt->num_layers; i++)
+ {
+ U08 **ppu1_inp;
+
+ ppu1_inp = &ps_ctxt->apu1_list_inp[i][0];
+ for(j = 0; j < ps_ref_map->i4_num_ref; j++)
+ {
+ hme_ref_desc_t *ps_ref_desc;
+ hme_ref_buf_info_t *ps_buf_info;
+ layer_ctxt_t *ps_layer;
+ S32 ref_id_lc;
+
+ ps_ref_desc = &ps_ref_map->as_ref_desc[j];
+ ps_buf_info = &ps_ref_desc->as_ref_info[i];
+ ref_id_lc = ps_ref_desc->i1_ref_id_lc;
+
+ desc_idx = ps_ctxt->a_ref_to_descr_id[ref_id_lc];
+ ps_layer = ps_ctxt->as_ref_descr[desc_idx].aps_layers[i];
+
+ ppu1_inp[j] = ps_layer->pu1_inp;
+ }
+ }
+ /*************************************************************************/
+ /* The mv range for each layer is computed. For dyadic layers it will */
+ /* keep shrinking by 2, for non dyadic it will shrink by ratio of wd and */
+ /* ht. In general formula used is scale by ratio of wd for x and ht for y*/
+ /*************************************************************************/
+
+ /* set to layer 0 search range params */
+ i2_max_x = ps_frm_prms->i2_mv_range_x;
+ i2_max_y = ps_frm_prms->i2_mv_range_y;
+
+ for(i = 1; i < ps_ctxt->num_layers; i++)
+ {
+ layer_ctxt_t *ps_layer_ctxt;
+
+ {
+ i2_max_x = (S16)FLOOR8(((i2_max_x * ps_ctxt->a_wd[i]) / ps_ctxt->a_wd[i - 1]));
+ i2_max_y = (S16)FLOOR8(((i2_max_y * ps_ctxt->a_ht[i]) / ps_ctxt->a_ht[i - 1]));
+ }
+ ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i];
+ ps_layer_ctxt->i2_max_mv_x = i2_max_x;
+ ps_layer_ctxt->i2_max_mv_y = i2_max_y;
+
+ /*********************************************************************/
+ /* Every layer maintains a reference id lc to POC mapping. This is */
+ /* because the mapping is unique for every frm. Also, in next frm, */
+ /* we require colocated mvs which means scaling according to temporal*/
+ /*distance. Hence this mapping needs to be maintained in every */
+ /* layer ctxt */
+ /*********************************************************************/
+ memset(ps_layer_ctxt->ai4_ref_id_to_poc_lc, -1, sizeof(S32) * ps_ctxt->max_num_ref);
+ if(ps_ref_map->i4_num_ref)
+ {
+ memcpy(
+ ps_layer_ctxt->ai4_ref_id_to_poc_lc,
+ ps_ctxt->ai4_ref_idx_to_poc_lc,
+ ps_ref_map->i4_num_ref * sizeof(S32));
+ memcpy(
+ ps_layer_ctxt->ai4_ref_id_to_disp_num,
+ ps_ctxt->ai4_ref_idx_to_disp_num,
+ ps_ref_map->i4_num_ref * sizeof(S32));
+ }
+ }
+
+ return;
+}
+
+/**
+********************************************************************************
+* @fn hme_process_frm
+*
+* @brief HME frame level processing function
+*
+* @param[in] pv_me_ctxt : ME ctxt pointer
+*
+* @param[in] ps_ref_map : Reference map prms pointer
+*
+* @param[in] ppd_intra_costs : pointer to array of intra cost cost buffers for each layer
+*
+* @param[in] ps_frm_prms : pointer to Frame level parameters of HME
+*
+* @param[in] pf_ext_update_fxn : function pointer to update CTb results
+*
+* @param[in] pf_get_intra_cu_and_cost :function pointer to get intra cu size and cost
+*
+* @param[in] ps_multi_thrd_ctxt :function pointer to get intra cu size and cost
+*
+* @return Scale factor in Q8 format
+********************************************************************************
+*/
+
+void hme_process_frm(
+ void *pv_me_ctxt,
+ pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input,
+ hme_ref_map_t *ps_ref_map,
+ double **ppd_intra_costs,
+ hme_frm_prms_t *ps_frm_prms,
+ PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,
+ void *pv_coarse_layer,
+ void *pv_multi_thrd_ctxt,
+ S32 i4_frame_parallelism_level,
+ S32 thrd_id,
+ S32 i4_me_frm_id)
+{
+ refine_prms_t s_refine_prms;
+ me_ctxt_t *ps_thrd_ctxt = (me_ctxt_t *)pv_me_ctxt;
+ me_frm_ctxt_t *ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
+
+ S32 lyr_job_type;
+ multi_thrd_ctxt_t *ps_multi_thrd_ctxt;
+ layer_ctxt_t *ps_coarse_layer = (layer_ctxt_t *)pv_coarse_layer;
+
+ ps_multi_thrd_ctxt = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
+
+ lyr_job_type = ME_JOB_ENC_LYR;
+ /*************************************************************************/
+ /* Final L0 layer ME call */
+ /*************************************************************************/
+ {
+ /* Set the CTB attributes dependin on corner/rt edge/bot edge/center*/
+ hme_set_ctb_attrs(ps_ctxt->as_ctb_bound_attrs, ps_ctxt->i4_wd, ps_ctxt->i4_ht);
+
+ hme_set_refine_prms(
+ &s_refine_prms,
+ ps_ctxt->u1_encode[0],
+ ps_ref_map->i4_num_ref,
+ 0,
+ ps_ctxt->num_layers,
+ ps_ctxt->num_layers_explicit_search,
+ ps_thrd_ctxt->s_init_prms.use_4x4,
+ ps_frm_prms,
+ ppd_intra_costs,
+ &ps_thrd_ctxt->s_init_prms.s_me_coding_tools);
+
+ hme_refine(
+ ps_thrd_ctxt,
+ &s_refine_prms,
+ pf_ext_update_fxn,
+ ps_coarse_layer,
+ ps_multi_thrd_ctxt,
+ lyr_job_type,
+ thrd_id,
+ i4_me_frm_id,
+ ps_l0_ipe_input);
+
+ /* Set current ref pic status which will used as perv frame ref pic */
+ if(i4_frame_parallelism_level)
+ {
+ ps_ctxt->i4_is_prev_frame_reference = 0;
+ }
+ else
+ {
+ ps_ctxt->i4_is_prev_frame_reference =
+ ps_multi_thrd_ctxt->aps_cur_inp_me_prms[i4_me_frm_id]
+ ->ps_curr_inp->s_lap_out.i4_is_ref_pic;
+ }
+ }
+
+ return;
+}
+
+/**
+********************************************************************************
+* @fn hme_coarse_process_frm
+*
+* @brief HME frame level processing function (coarse + refine)
+*
+* @param[in] pv_me_ctxt : ME ctxt pointer
+*
+* @param[in] ps_ref_map : Reference map prms pointer
+*
+* @param[in] ps_frm_prms : pointer to Frame level parameters of HME
+*
+* @param[in] ps_multi_thrd_ctxt :Multi thread related ctxt
+*
+* @return Scale factor in Q8 format
+********************************************************************************
+*/
+
+void hme_coarse_process_frm(
+ void *pv_me_ctxt,
+ hme_ref_map_t *ps_ref_map,
+ hme_frm_prms_t *ps_frm_prms,
+ void *pv_multi_thrd_ctxt,
+ WORD32 i4_ping_pong,
+ void **ppv_dep_mngr_hme_sync)
+{
+ S16 i2_max;
+ S32 layer_id;
+ coarse_prms_t s_coarse_prms;
+ refine_prms_t s_refine_prms;
+ coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
+ S32 lyr_job_type;
+ multi_thrd_ctxt_t *ps_multi_thrd_ctxt;
+
+ ps_multi_thrd_ctxt = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
+ /*************************************************************************/
+ /* Fire processing of all layers, starting with coarsest layer. */
+ /*************************************************************************/
+ layer_id = ps_ctxt->num_layers - 1;
+ i2_max = ps_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_x;
+ i2_max = MAX(i2_max, ps_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_y);
+ s_coarse_prms.i4_layer_id = layer_id;
+ {
+ S32 log_start_step;
+ /* Based on Preset, set the starting step size for Refinement */
+ if(ME_MEDIUM_SPEED > ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets)
+ {
+ log_start_step = 0;
+ }
+ else
+ {
+ log_start_step = 1;
+ }
+
+ s_coarse_prms.i4_max_iters = i2_max >> log_start_step;
+ s_coarse_prms.i4_start_step = 1 << log_start_step;
+ }
+ s_coarse_prms.i4_num_ref = ps_ref_map->i4_num_ref;
+ s_coarse_prms.do_full_search = 1;
+ if(s_coarse_prms.do_full_search)
+ {
+ /* Set to 2 or 4 */
+ if(ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets < ME_MEDIUM_SPEED)
+ s_coarse_prms.full_search_step = HME_COARSE_STEP_SIZE_HIGH_QUALITY;
+ else if(ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets >= ME_MEDIUM_SPEED)
+ s_coarse_prms.full_search_step = HME_COARSE_STEP_SIZE_HIGH_SPEED;
+ }
+ s_coarse_prms.num_results = ps_ctxt->max_num_results_coarse;
+
+ /* Coarse layer uses only 1 lambda, i.e. the one for open loop ME */
+ s_coarse_prms.lambda = ps_frm_prms->i4_ol_sad_lambda_qf;
+ s_coarse_prms.lambda_q_shift = ps_frm_prms->lambda_q_shift;
+ s_coarse_prms.lambda = ((float)s_coarse_prms.lambda * (100.0 - ME_LAMBDA_DISCOUNT) / 100.0);
+
+ hme_coarsest(ps_ctxt, &s_coarse_prms, ps_multi_thrd_ctxt, i4_ping_pong, ppv_dep_mngr_hme_sync);
+
+ /* all refinement layer processed in the loop below */
+ layer_id--;
+ lyr_job_type = ps_multi_thrd_ctxt->i4_me_coarsest_lyr_type + 1;
+
+ /*************************************************************************/
+ /* This loop will run for all refine layers (non- encode layers) */
+ /*************************************************************************/
+ while(layer_id > 0)
+ {
+ hme_set_refine_prms(
+ &s_refine_prms,
+ ps_ctxt->u1_encode[layer_id],
+ ps_ref_map->i4_num_ref,
+ layer_id,
+ ps_ctxt->num_layers,
+ ps_ctxt->num_layers_explicit_search,
+ ps_ctxt->s_init_prms.use_4x4,
+ ps_frm_prms,
+ NULL,
+ &ps_ctxt->s_init_prms.s_me_coding_tools);
+
+ hme_refine_no_encode(
+ ps_ctxt,
+ &s_refine_prms,
+ ps_multi_thrd_ctxt,
+ lyr_job_type,
+ i4_ping_pong,
+ ppv_dep_mngr_hme_sync);
+
+ layer_id--;
+ lyr_job_type++;
+ }
+}
+/**
+********************************************************************************
+* @fn hme_fill_neighbour_mvs
+*
+* @brief HME neighbour MV population function
+*
+* @param[in] pps_mv_grid : MV grid array pointer
+*
+* @param[in] i4_ctb_x : CTB pos X
+
+* @param[in] i4_ctb_y : CTB pos Y
+*
+* @remarks : Needs to be populated for proper implementation of cost fxn
+*
+* @return Scale factor in Q8 format
+********************************************************************************
+*/
+void hme_fill_neighbour_mvs(
+ mv_grid_t **pps_mv_grid, S32 i4_ctb_x, S32 i4_ctb_y, S32 i4_num_ref, void *pv_ctxt)
+{
+ /* TODO : Needs to be populated for proper implementation of cost fxn */
+ ARG_NOT_USED(pps_mv_grid);
+ ARG_NOT_USED(i4_ctb_x);
+ ARG_NOT_USED(i4_ctb_y);
+ ARG_NOT_USED(i4_num_ref);
+ ARG_NOT_USED(pv_ctxt);
+}
+
+/**
+*******************************************************************************
+* @fn void hme_get_active_pocs_list(void *pv_me_ctxt,
+* S32 *p_pocs_buffered_in_me)
+*
+* @brief Returns the list of active POCs in ME ctxt
+*
+* @param[in] pv_me_ctxt : handle to ME context
+*
+* @param[out] p_pocs_buffered_in_me : pointer to an array which this fxn
+* populates with pocs active
+*
+* @return void
+*******************************************************************************
+*/
+WORD32 hme_get_active_pocs_list(void *pv_me_ctxt, S32 i4_num_me_frm_pllel)
+{
+ me_ctxt_t *ps_ctxt = (me_ctxt_t *)pv_me_ctxt;
+ S32 i, count = 0;
+
+ for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
+ {
+ S32 poc = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc;
+ S32 i4_is_free = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_free;
+
+ if((i4_is_free == 0) && (poc != INVALID_POC))
+ {
+ count++;
+ }
+ }
+ if(count == (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1)
+ {
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+/**
+*******************************************************************************
+* @fn void hme_coarse_get_active_pocs_list(void *pv_me_ctxt,
+* S32 *p_pocs_buffered_in_me)
+*
+* @brief Returns the list of active POCs in ME ctxt
+*
+* @param[in] pv_me_ctxt : handle to ME context
+*
+* @param[out] p_pocs_buffered_in_me : pointer to an array which this fxn
+* populates with pocs active
+*
+* @return void
+*******************************************************************************
+*/
+void hme_coarse_get_active_pocs_list(void *pv_me_ctxt, S32 *p_pocs_buffered_in_me)
+{
+ coarse_me_ctxt_t *ps_ctxt = (coarse_me_ctxt_t *)pv_me_ctxt;
+ S32 i, count = 0;
+
+ for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
+ {
+ S32 poc = ps_ctxt->as_ref_descr[i].aps_layers[1]->i4_poc;
+
+ if(poc != -1)
+ {
+ p_pocs_buffered_in_me[count] = poc;
+ count++;
+ }
+ }
+ p_pocs_buffered_in_me[count] = -1;
+}
+
+S32 hme_get_blk_size(S32 use_4x4, S32 layer_id, S32 n_layers, S32 encode)
+{
+ /* coarsest layer uses 4x4 blks, lowermost layer/encode layer uses 16x16 */
+ if(layer_id == n_layers - 1)
+ return 4;
+ else if((layer_id == 0) || (encode))
+ return 16;
+
+ /* Intermediate non encode layers use 8 */
+ return 8;
+}
diff --git a/encoder/hme_interface.h b/encoder/hme_interface.h
new file mode 100644
index 0000000..33c98fa
--- /dev/null
+++ b/encoder/hme_interface.h
@@ -0,0 +1,1035 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file hme_interface.h
+*
+* \brief
+* Interfaces exported by ME to the world outside of ME
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _HME_INTERFACE_H_
+#define _HME_INTERFACE_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief Maximum number of layers allowed
+******************************************************************************
+ */
+#define MAX_NUM_LAYERS 4
+
+/**
+******************************************************************************
+ * @brief layer max dimensions
+******************************************************************************
+ */
+#define HME_MAX_WIDTH 1920
+#define HME_MAX_HEIGHT 1088
+
+/**
+******************************************************************************
+ * @brief layer min dimensions
+******************************************************************************
+ */
+#define MIN_WD_COARSE 16
+#define MIN_HT_COARSE 16
+
+/**
+******************************************************************************
+ * @brief HME COARSE LAYER STEP SIZE
+******************************************************************************
+ */
+
+#define HME_COARSE_STEP_SIZE_HIGH_SPEED 4
+#define HME_COARSE_STEP_SIZE_HIGH_QUALITY 2
+
+/**
+******************************************************************************
+ * @brief Memtabs required by layer ctxt: each layer ctxt requires 1
+ * memtab for itslf, 1 for mv bank, 1 for ref idx bank, one
+ * for input bufffer and 1 for storing segmentation info in
+ * worst case
+******************************************************************************
+ */
+#define HME_MEMTABS_COARSE_LAYER_CTXT (5 * (MAX_NUM_LAYERS - 1) * (MAX_NUM_REF + 1))
+
+/**
+******************************************************************************
+ * @brief Total number of memtabs reuqired by HME. Atleast 22 memtabs
+ * for different search results structure, 2*MAX_NUM_REF memtabs
+ * for search nodes maintaining coarse layer results in prev
+ * row, and for histograms. Memtabs reqd for layer,me ctxt
+ * ctb node mgr and buf mgr plus some 8 for safety
+ * if multi threaded then some memtabs will be more
+******************************************************************************
+ */
+#define HME_COARSE_TOT_MEMTABS \
+ (22 + HME_MEMTABS_COARSE_LAYER_CTXT + (3 * MAX_NUM_REF) + 8 * MAX_NUM_FRM_PROC_THRDS_PRE_ENC + \
+ 1)
+
+/**
+******************************************************************************
+ * @brief Memtabs required by layer ctxt (enc): each layer ctxt requires 1
+ * memtab for itslf, 1 for mv bank, 1 for ref idx bank, one
+ * for input bufffer and 1 for storing segmentation info in
+ * worst case
+******************************************************************************
+ */
+#define MIN_HME_MEMTABS_ENC_LAYER_CTXT (5 * 1 * (MAX_NUM_REF + 1))
+
+#define MAX_HME_MEMTABS_ENC_LAYER_CTXT (5 * 1 * (MAX_NUM_REF + 1 + MAX_NUM_ME_PARALLEL))
+
+/**
+******************************************************************************
+ * @brief Total number of memtabs reuqired by HME. Atleast 22 memtabs
+ * for different search results structure, 2*MAX_NUM_REF memtabs
+ * for search nodes maintaining coarse layer results in prev
+ * row, and for histograms. Memtabs reqd for layer,me ctxt
+ * ctb node mgr and buf mgr plus some 8 for safety
+ * if multi threaded then some memtabs will be more
+******************************************************************************
+ */
+
+#define MIN_HME_ENC_TOT_MEMTABS \
+ (22 + MIN_HME_MEMTABS_ENC_LAYER_CTXT + (3 * MAX_NUM_REF) + 28 * MAX_NUM_FRM_PROC_THRDS_ENC + \
+ 2 /* Clustering */ + 1 /*traqo*/ + 1 /* ME Optimised Function List */)
+
+#define MAX_HME_ENC_TOT_MEMTABS \
+ ((22 * MAX_NUM_ME_PARALLEL) + MAX_HME_MEMTABS_ENC_LAYER_CTXT + \
+ (3 * MAX_NUM_REF * MAX_NUM_ME_PARALLEL) + \
+ 28 * MAX_NUM_FRM_PROC_THRDS_ENC * MAX_NUM_ME_PARALLEL + 2 /* Clustering */ + 1 /*traqo*/ + \
+ 1 /* ME Optimised Function List */)
+
+/*****************************************************************************/
+/* Enumerations */
+/*****************************************************************************/
+/**
+******************************************************************************
+ * @enum HME_MEM_ATTRS_T
+ * @brief Contains type of memory: scratch, scratch ovly, persistent
+******************************************************************************
+ */
+typedef enum
+{
+ HME_SCRATCH_MEM,
+ HME_SCRATCH_OVLY_MEM,
+ HME_PERSISTENT_MEM
+} HME_MEM_ATTRS_T;
+
+/**
+******************************************************************************
+ * @enum ME_QUALITY_PRESETS_T
+ * @brief Describes the source for values in me_quality_params_t struct
+******************************************************************************
+ */
+typedef enum
+{
+ ME_PRISTINE_QUALITY = 0,
+ ME_HIGH_QUALITY = 2,
+ ME_MEDIUM_SPEED,
+ ME_HIGH_SPEED,
+ ME_XTREME_SPEED,
+ ME_XTREME_SPEED_25,
+ ME_USER_DEFINED
+} ME_QUALITY_PRESETS_T;
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @struct hme_ref_buf_info_t
+ * @brief Contains all required information of a ref picture
+ * Valid for a given layer.
+******************************************************************************
+ */
+typedef struct
+{
+ /** Amt of padding in X direction both sides. */
+ U08 u1_pad_x;
+
+ /** Amt of padding in Y direction both sides */
+ U08 u1_pad_y;
+
+ /** Recon stride, in pixels */
+ S32 luma_stride;
+
+ /** Offset w.r.t. actual start of the buffer */
+ S32 luma_offset;
+
+ /** Src ptrs of the reference pictures*/
+ U08 *pu1_ref_src;
+
+ /** Reference ptrs for fpel plane, needed for this layer closed loop ME */
+ U08 *pu1_rec_fxfy;
+
+ /** Reference ptrs for hxfy plane (x = k+0.5, y = m) */
+ U08 *pu1_rec_hxfy;
+
+ /** Reference ptrs for fxhy plane (x = k, y = m + 0.5 */
+ U08 *pu1_rec_fxhy;
+
+ /** Reference ptrs for hxhy plane (x = k + 0.5, y = m + 0.5 */
+ U08 *pu1_rec_hxhy;
+
+ /** Reference ptr for u plane */
+ U08 *pu1_rec_u;
+
+ /** Reference ptr for v plane */
+ U08 *pu1_rec_v;
+
+ /** chroma plane stride in pixels */
+ S32 chroma_stride;
+
+ S32 chroma_offset;
+
+ /** Pointer to dependency manager of recon buffer */
+ void *pv_dep_mngr;
+
+} hme_ref_buf_info_t;
+
+/**
+******************************************************************************
+ * @struct interp_prms_t
+ * @brief All parameters for the interpolation function
+******************************************************************************
+ */
+typedef struct
+{
+ /** Array of ptr of 4 planes in order fxfy, hxfy, fxhy, hxhy */
+ U08 **ppu1_ref;
+
+ /**
+ * Array of pointers for ping-pong buffers, used to store interp out
+ * Output during a call goes to any one of these buffers
+ */
+ U08 *apu1_interp_out[5];
+
+ /**
+ * Working memory to store 16 bit intermediate output. This has to be
+ * of size i4_blk_wd * (i4_blk_ht + 7) * 2
+ */
+ U08 *pu1_wkg_mem;
+
+ /** Stride of all 4 planes of ref buffers */
+ S32 i4_ref_stride;
+
+ /** Width of interpolated output blk desired */
+ S32 i4_blk_wd;
+
+ /** Ht of interpolated output blk desired */
+ S32 i4_blk_ht;
+
+ /**
+ * Stride of interpolated output bufers,
+ * applicable for both ping and pong
+ */
+ S32 i4_out_stride;
+
+ /** Final output pointer, which may be one of ping-pong or hpel planes */
+ U08 *pu1_final_out;
+
+ /** STride of the output bfufer */
+ S32 i4_final_out_stride;
+
+} interp_prms_t;
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+typedef void (*PF_EXT_UPDATE_FXN_T)(void *, void *, S32, S32);
+
+//typedef void (*PF_GET_INTRA_CU_AND_COST)(void *, S32, S32, S32 *, S32*, double *, S32);
+
+typedef void (*PF_INTERP_FXN_T)(interp_prms_t *ps_prms, S32 i4_mv_x, S32 i4_mv_y, S32 interp_buf_id);
+
+typedef void (*PF_SCALE_FXN_T)(
+ U08 *pu1_src, S32 src_stride, U08 *pu1_dst, S32 dst_stride, S32 wd, S32 ht, U08 *pu1_wkg_mem);
+
+/**
+******************************************************************************
+ * @struct hme_ref_desc_t
+ * @brief Contains all reqd information for ref pics across all layers
+ * but for a given POC/ref id
+******************************************************************************
+ */
+typedef struct
+{
+ /**
+ * Reference id in LC list. This is a unified list containing both fwd
+ * and backward direction references. Having a unified list just does
+ * a unique mapping of frames to ref id and eases out addressing in the
+ * ME search.
+ */
+ S08 i1_ref_id_lc;
+
+ /**
+ * Reference id in L0 list. Priority is given to temporally fwd dirn
+ * unless of a scene change like case
+ */
+ S08 i1_ref_id_l0;
+
+ /**
+ * Reference id in L1 list. Priority to backward dirn unless scene change
+ * like case
+ */
+ S08 i1_ref_id_l1;
+
+ /** Whether this ref is temporally forward w.r.t. current pic */
+ U08 u1_is_fwd;
+
+ /** POC of this ref pic. */
+ S32 i4_poc;
+
+ /** display_num of this ref pic. */
+ S32 i4_display_num;
+ /**
+ * Lambda to be used for S + lambda*bits style cost computations when
+ * using this ref pic. This is a function of ref dist and hence diff
+ * ref has diff lambda
+ */
+ S32 lambda;
+
+ /** Ref buffer info for all layers */
+ hme_ref_buf_info_t as_ref_info[MAX_NUM_LAYERS];
+
+ /** Weights and offset of reference picture
+ * used for weighted pred analysis
+ */
+ S16 i2_weight;
+
+ S16 i2_offset;
+
+ /*
+ * IDR GOP number
+ */
+
+ WORD32 i4_GOP_num;
+
+} hme_ref_desc_t;
+
+/**
+******************************************************************************
+ * @struct hme_ref_map_t
+ * @brief Complete ref information across all layers and POCs
+ * Information valid for a given inp frame with a given POC.
+******************************************************************************
+ */
+typedef struct
+{
+ /** Number of active ref picturs in LC list */
+ S32 i4_num_ref;
+
+ /** Recon Pic buffer pointers for L0 list */
+ recon_pic_buf_t **pps_rec_list_l0;
+
+ /** Recon Pic buffer pointers for L0 list */
+ recon_pic_buf_t **pps_rec_list_l1;
+
+ /** Reference descriptors for all ref pics */
+ hme_ref_desc_t as_ref_desc[MAX_NUM_REF];
+
+} hme_ref_map_t;
+
+/**
+ ******************************************************************************
+ * @struct me_coding_params_t
+ * @param e_me_quality_presets : Quality preset value
+ * @brief ME Parameters that affect quality depending on their state
+ ******************************************************************************
+*/
+typedef struct
+{
+ ME_QUALITY_PRESETS_T e_me_quality_presets;
+
+ S32 i4_num_steps_hpel_refine;
+
+ S32 i4_num_steps_qpel_refine;
+
+ U08 u1_l0_me_controlled_via_cmd_line;
+
+ U08 u1_num_results_per_part_in_l0me;
+
+ U08 u1_num_results_per_part_in_l1me;
+
+ U08 u1_num_results_per_part_in_l2me;
+
+ U08 u1_max_num_coloc_cands;
+
+ U08 u1_max_2nx2n_tu_recur_cands;
+
+ U08 u1_max_num_fpel_refine_centers;
+
+ U08 u1_max_num_subpel_refine_centers;
+} me_coding_params_t;
+
+/**
+ ******************************************************************************
+ * @struct hme_init_prms_t
+ * @brief Initialization parameters used during HME instance creation
+ ******************************************************************************
+*/
+typedef struct
+{
+ /** Pointer to widths of various simulcast layers,
+ * starting with biggest resolution
+ */
+ S32 a_wd[MAX_NUM_LAYERS];
+
+ /** Pointer to heights of various simulcast layers,
+ * starting with biggest resolution
+ */
+ S32 a_ht[MAX_NUM_LAYERS];
+
+ /** Maximum number of reference frames that a frame ever has to search */
+ S32 max_num_ref;
+
+ /** Number of results to be stored in the coarsest layer */
+ S32 max_num_results_coarse;
+
+ /**
+ * Number of layers for which explicit ME is to be done
+ * 0 or MAX_NUM_LAYERS: encoder will do explicit ME for all layers
+ * anything in between, explicit ME done for that many layers
+ */
+ S32 num_layers_explicit_search;
+
+ /** Number of simulcast layers to be encoded */
+ S32 num_simulcast_layers;
+
+ /** Maximum number of results per reference per partition */
+ S32 max_num_results;
+
+ /**
+ * If enabled, all layers store segmentation info at 16x16 lvl
+ * If not enabled, then only finest layer stores this info
+ */
+ S32 segment_higher_layers;
+
+ /**
+ * If enabled, the non enocde layers use 8x8 blks with 4x4 partial
+ * sads also being evaluated, which is more powerful but computationally
+ * less efficient
+ */
+ S32 use_4x4;
+
+ /**
+ * Number of B frames allowed between P frames
+ */
+ S32 num_b_frms;
+
+ /** CTB Size as passed by encoder */
+ S32 log_ctb_size;
+
+ /** number of threads created run time */
+ S32 i4_num_proc_thrds;
+
+ /* This struct contains fields corresponding to quality knobs for ME */
+ me_coding_params_t s_me_coding_tools;
+
+ S32 max_vert_search_range;
+
+ S32 max_horz_search_range;
+
+ S32 is_interlaced;
+
+ U08 u1_max_tr_depth;
+
+ U08 u1_is_stasino_enabled;
+
+ IV_ARCH_T e_arch_type;
+} hme_init_prms_t;
+
+/**
+ ******************************************************************************
+ * @struct hme_frm_prms_t
+ * @brief Frame level prms for HME execution
+ ******************************************************************************
+*/
+typedef struct
+{
+ /** Range of the Motion vector in fpel units at finest layer x dirn */
+ S16 i2_mv_range_x;
+
+ /** range of motion vector in fpel units at finest layer y dirn */
+ S16 i2_mv_range_y;
+
+ /** Context for computing the cost function */
+ void *pv_mv_cost_ctxt;
+
+ /** Interpolation function pointers */
+ PF_INTERP_FXN_T pf_interp_fxn;
+
+ U08 is_i_pic;
+
+ S32 bidir_enabled;
+
+ S32 i4_temporal_layer_id;
+
+ /**
+ * Lambda values in Q format. 4 values exist: Closed loop SATD/SAD
+ * and open loop SATD/SAD
+ */
+ S32 i4_cl_sad_lambda_qf;
+ S32 i4_cl_satd_lambda_qf;
+ S32 i4_ol_sad_lambda_qf;
+ S32 i4_ol_satd_lambda_qf;
+
+ /** Shift for lambda QFormat */
+ S32 lambda_q_shift;
+
+ S32 qstep;
+ S32 qstep_ls8;
+ S32 i4_frame_qp;
+ S32 is_pic_second_field;
+
+ /**
+ * Number of active references in l0
+ */
+ U08 u1_num_active_ref_l0;
+
+ /**
+ * Number of active references in l1
+ */
+ U08 u1_num_active_ref_l1;
+
+ /* Flag that specifies whether CU level QP */
+ /* modulation is enabled */
+ U08 u1_is_cu_qp_delta_enabled;
+
+} hme_frm_prms_t;
+
+/**
+ ******************************************************************************
+ * @struct hme_memtab_t
+ * @brief Structure to return memory requirements for one buffer.
+ ******************************************************************************
+*/
+typedef struct
+{
+ /** Base of the memtab. Filled by application */
+ U08 *pu1_mem;
+
+ /** Required size of the memtab. Filed by module */
+ S32 size;
+
+ /** Alignment required */
+ S32 align;
+
+ /** type of memory */
+ HME_MEM_ATTRS_T e_mem_attr;
+
+} hme_memtab_t;
+
+/**
+ ******************************************************************************
+ * @struct hme_inp_buf_attr_t
+ * @brief Attributes of input buffer and planes
+ ******************************************************************************
+*/
+typedef struct
+{
+ /** Luma ptr 0, 0 position */
+ U08 *pu1_y;
+
+ /** Cb component or U component, 0, 0 position */
+ U08 *pu1_u;
+
+ /** Cr component or V component, 0, 0 position */
+ U08 *pu1_v;
+
+ /** Stride of luma component in pixels */
+ S32 luma_stride;
+
+ /** Stride of chroma component in pixels */
+ S32 chroma_stride;
+} hme_inp_buf_attr_t;
+
+/**
+ ******************************************************************************
+ * @struct hme_inp_desc_t
+ * @brief Descriptor of a complete input frames (all simulcast layers incl)
+ ******************************************************************************
+*/
+typedef struct
+{
+ /** input attributes for all simulcast layers */
+ hme_inp_buf_attr_t s_layer_desc[MAX_NUM_LAYERS];
+
+ /** POC of the current input frame */
+ S32 i4_poc;
+
+ /** idr GOP number*/
+ S32 i4_idr_gop_num;
+
+ /** is refence picture */
+ S32 i4_is_reference;
+
+} hme_inp_desc_t;
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/**
+********************************************************************************
+* @fn hme_enc_num_alloc()
+*
+* @brief returns number of memtabs that is required by hme module
+*
+* @return Number of memtabs required
+********************************************************************************
+*/
+S32 hme_enc_num_alloc(WORD32 i4_num_me_frm_pllel);
+
+/**
+********************************************************************************
+* @fn hme_coarse_num_alloc()
+*
+* @brief returns number of memtabs that is required by hme module
+*
+* @return Number of memtabs required
+********************************************************************************
+*/
+S32 hme_coarse_num_alloc();
+
+/**
+********************************************************************************
+* @fn hme_coarse_dep_mngr_num_alloc()
+*
+* @brief returns number of memtabs that is required by Dep Mngr for hme module
+*
+* @return Number of memtabs required
+********************************************************************************
+*/
+WORD32 hme_coarse_dep_mngr_num_alloc();
+
+/**
+********************************************************************************
+* @fn S32 hme_coarse_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
+*
+* @brief Fills up memtabs with memory information details required by HME
+*
+* @param[out] ps_memtabs : Pointre to an array of memtabs where module fills
+* up its requirements of memory
+*
+* @param[in] ps_prms : Input parameters to module crucial in calculating reqd
+* amt of memory
+*
+* @return Number of memtabs required
+*******************************************************************************
+*/
+S32 hme_coarse_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms);
+
+/**
+*******************************************************************************
+* @fn hme_coarse_dep_mngr_alloc
+*
+* @brief Fills up memtabs with memory information details required by Coarse HME
+*
+* \param[in,out] ps_mem_tab : pointer to memory descriptors table
+* \param[in] ps_init_prms : Create time static parameters
+* \param[in] i4_mem_space : memspace in whihc memory request should be done
+*
+* @return Number of memtabs required
+*******************************************************************************
+*/
+WORD32 hme_coarse_dep_mngr_alloc(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ WORD32 i4_mem_space,
+ WORD32 i4_num_proc_thrds,
+ WORD32 i4_resolution_id);
+
+/**
+********************************************************************************
+* @fn S32 hme_enc_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms)
+*
+* @brief Fills up memtabs with memory information details required by HME
+*
+* @param[out] ps_memtabs : Pointer to an array of memtabs where module fills
+* up its requirements of memory
+*
+* @param[in] ps_prms : Input parameters to module crucial in calculating reqd
+* amt of memory
+*
+* @return Number of memtabs required
+*******************************************************************************
+*/
+S32 hme_enc_alloc(hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms, WORD32 i4_num_me_frm_pllel);
+
+/**
+********************************************************************************
+* @fn S32 hme_enc_init(void *pv_ctxt,
+* hme_memtab_t *ps_memtabs,
+* hme_init_prms_t *ps_prms);
+*
+* @brief Initialization (one time) of HME
+*
+* @param[in,out] pv_ctxt : Pointer to context of HME
+*
+* @param[in] ps_memtabs : updated memtabs by application (allocated memory)
+*
+* @param[in] ps_prms : Initialization parametres
+*
+* @return 0 : success, -1 : failure
+*******************************************************************************
+*/
+S32 hme_enc_init(
+ void *pv_ctxt,
+ hme_memtab_t *ps_memtabs,
+ hme_init_prms_t *ps_prms,
+ rc_quant_t *ps_rc_quant_ctxt,
+ WORD32 i4_num_me_frm_pllel);
+
+/**
+********************************************************************************
+* @fn S32 hme_coarse_init(void *pv_ctxt,
+* hme_memtab_t *ps_memtabs,
+* hme_init_prms_t *ps_prms);
+*
+* @brief Initialization (one time) of HME
+*
+* @param[in,out] pv_ctxt : Pointer to context of HME
+*
+* @param[in] ps_memtabs : updated memtabs by application (allocated memory)
+*
+* @param[in] ps_prms : Initialization parametres
+*
+* @return 0 : success, -1 : failure
+*******************************************************************************
+*/
+S32 hme_coarse_init(void *pv_ctxt, hme_memtab_t *ps_memtabs, hme_init_prms_t *ps_prms);
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_coarse_me_get_lyr_prms_dep_mngr \endif
+*
+* \brief Returns to the caller key attributes relevant for dependency manager,
+* ie, the number of vertical units in each layer
+*
+* \par Description:
+* This function requires the precondition that the width and ht of encode
+* layer is known.
+* The number of layers, number of vertical units in each layer, and for
+* each vertial unit in each layer, its dependency on previous layer's units
+* From ME's perspective, a vertical unit is one which is smallest min size
+* vertically (and spans the entire row horizontally). This is CTB for encode
+* layer, and 8x8 / 4x4 for non encode layers.
+*
+* \param[in] num_layers : Number of ME Layers
+* \param[in] pai4_ht : Array storing ht at each layer
+* \param[in] pai4_wd : Array storing wd at each layer
+* \param[out] pi4_num_vert_units_in_lyr : Array of size N (num layers), each
+* entry has num vertical units in that particular layer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_coarse_me_get_lyr_prms_dep_mngr(
+ WORD32 num_layers, WORD32 *pai4_ht, WORD32 *pai4_wd, WORD32 *pai4_num_vert_units_in_lyr);
+
+/**
+********************************************************************************
+* @fn hme_coarse_dep_mngr_alloc_mem()
+*
+* @brief Requests/ assign memory for HME Dep Mngr
+*
+* \param[in,out] ps_mem_tab : pointer to memory descriptors table
+* \param[in] ps_init_prms : Create time static parameters
+* \param[in] i4_mem_space : memspace in whihc memory request should be done
+*
+* @return number of memtabs
+********************************************************************************
+*/
+WORD32 hme_coarse_dep_mngr_alloc_mem(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ WORD32 i4_mem_space,
+ WORD32 i4_num_proc_thrds,
+ WORD32 i4_resolution_id);
+
+/**
+********************************************************************************
+* @fn hme_coarse_dep_mngr_init()
+*
+* @brief Assign memory for HME Dep Mngr
+*
+* \param[in,out] ps_mem_tab : pointer to memory descriptors table
+* \param[in] ps_init_prms : Create time static parameters
+* @param[in] pv_ctxt : ME ctxt
+* \param[in] pv_osal_handle : Osal handle
+*
+* @return number of memtabs
+********************************************************************************
+*/
+WORD32 hme_coarse_dep_mngr_init(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ void *pv_ctxt,
+ void *pv_osal_handle,
+ WORD32 i4_num_proc_thrds,
+ WORD32 i4_resolution_id);
+
+/**
+********************************************************************************
+* @fn hme_coarse_dep_mngr_reg_sem()
+*
+* @brief Assign semaphores for HME Dep Mngr
+*
+* \param[in] pv_me_ctxt : pointer to Coarse ME ctxt
+* \param[in] ppv_sem_hdls : Arry of semaphore handles
+* \param[in] i4_num_proc_thrds : Number of processing threads
+*
+* @return number of memtabs
+********************************************************************************
+*/
+void hme_coarse_dep_mngr_reg_sem(void *pv_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds);
+
+/**
+********************************************************************************
+* @fn hme_coarse_dep_mngr_delete()
+*
+* Destroy Coarse ME Dep Mngr module
+* Note : Only Destroys the resources allocated in the module like
+* semaphore,etc. Memory free is done Separately using memtabs
+*
+* \param[in] pv_me_ctxt : pointer to Coarse ME ctxt
+* \param[in] ps_init_prms : Create time static parameters
+*
+* @return none
+********************************************************************************
+*/
+void hme_coarse_dep_mngr_delete(
+ void *pv_me_ctxt, ihevce_static_cfg_params_t *ps_init_prms, WORD32 i4_resolution_id);
+
+void hme_coarse_get_layer1_mv_bank_ref_idx_size(
+ S32 n_tot_layers,
+ S32 *a_wd,
+ S32 *a_ht,
+ S32 max_num_ref,
+ S32 *pi4_mv_bank_size,
+ S32 *pi4_ref_idx_size);
+
+/**
+********************************************************************************
+* @fn S32 hme_add_inp(void *pv_ctxt,
+* hme_inp_desc_t *ps_inp_desc);
+*
+* @brief Updates the HME context with details of the input buffers and POC.
+* Layers that are not encoded are processed further in terms of
+* pyramid generation.
+*
+* @param[in,out] pv_ctxt : Pointer to context of HME
+*
+* @param[in] ps_inp_desc : Input descriptor containing information of all
+* simulcast layers of input.
+*
+* @return void
+*******************************************************************************
+*/
+void hme_add_inp(void *pv_ctxt, hme_inp_desc_t *ps_inp_desc, S32 me_frm_id, WORD32 thrd_id);
+
+void hme_coarse_add_inp(void *pv_me_ctxt, hme_inp_desc_t *ps_inp_desc, WORD32 i4_curr_idx);
+
+/**
+********************************************************************************
+* @fn hme_process_frm_init
+*
+* @brief HME frame level initialsation processing function
+*
+* @param[in] pv_me_ctxt : ME ctxt pointer
+*
+* @param[in] ps_ref_map : Reference map prms pointer
+*
+* @param[in] ps_frm_prms :Pointer to frame params
+*
+* @return Scale factor in Q8 format
+********************************************************************************
+*/
+
+void hme_process_frm_init(
+ void *pv_me_ctxt,
+ hme_ref_map_t *ps_ref_map,
+ hme_frm_prms_t *ps_frm_prms,
+ WORD32 me_frm_id,
+ WORD32 i4_num_me_frm_pllel);
+
+void hme_coarse_process_frm_init(
+ void *pv_me_ctxt, hme_ref_map_t *ps_ref_map, hme_frm_prms_t *ps_frm_prms);
+
+/**
+********************************************************************************
+* @fn void hme_process_frm(void *pv_ctxt,
+* hme_ref_map_t *ps_ref_map,
+* U16 **ppu2_intra_cost,
+* hme_frm_prms_t *ps_frm_prms);
+*
+* @brief Processes all the layers of the input, and updates the MV Banks.
+* Note that this function is not to be called if processing of a single
+* layer is desired.
+*
+* @param[in,out] pv_ctxt : Pointer to context of HME
+*
+* @param[in] ps_ref_map : Map structure that has for current input, lists of
+* ref pics (POC) mapping to LC, L0 and L1, and buffer ptrs as well
+* Informatino for all simulcast layers present.
+*
+* @param[in] ppu2_intra_cost : array of Pointer to intra cost evaluated at an
+* 8x8 level, stored in raster order. At each layer, the
+* corresponding ptr points to raster ordered array of wdxht/64,
+* wd and ht are layer width and ht respectively. Also, note that
+* ppu2_intra_cost[0] points to biggest resolution layer,
+* and from there on in decreasing order of size.
+*
+* @param[in] ps_frm_prms : input frame parameters (excluding ref info) that
+* control the search complexity. Refer to hme_frm_prms_t for more
+* info regards the same.
+*
+* @return void
+*******************************************************************************
+*/
+
+void hme_process_frm(
+ void *pv_me_ctxt,
+ pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input,
+ hme_ref_map_t *ps_ref_map,
+ double **ppd_intra_costs,
+ hme_frm_prms_t *ps_frm_prms,
+ PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,
+ //PF_GET_INTRA_CU_AND_COST pf_get_intra_cu_and_cost,
+ void *pv_coarse_layer,
+ void *pv_multi_thrd_ctxt,
+ WORD32 i4_frame_parallelism_level,
+ S32 thrd_id,
+ S32 i4_me_frm_id);
+
+void hme_coarse_process_frm(
+ void *pv_me_ctxt,
+ hme_ref_map_t *ps_ref_map,
+ hme_frm_prms_t *ps_frm_prms,
+ void *pv_multi_thrd_ctxt,
+ WORD32 i4_ping_pong,
+ void **ppv_dep_mngr_hme_sync);
+
+void hme_discard_frm(
+ void *pv_ctxt, S32 *p_pocs_to_remove, S32 i4_idr_gop_num, S32 i4_num_me_frm_pllel);
+
+void hme_coarse_discard_frm(void *pv_me_ctxt, S32 *p_pocs_to_remove);
+
+/**
+*******************************************************************************
+* @fn S32 hme_set_resolution(void *pv_me_ctxt,
+* S32 n_enc_layers,
+* S32 *p_wd,
+* S32 *p_ht
+*
+* @brief Sets up the layers based on resolution information.
+*
+* @param[in, out] pv_me_ctxt : ME handle, updated with the resolution info
+*
+* @param[in] n_enc_layers : Number of layers encoded
+*
+* @param[in] p_wd : Pointer to an array having widths for each encode layer
+*
+* @param[in] p_ht : Pointer to an array having heights for each encode layer
+*
+* @return void
+*******************************************************************************
+*/
+
+void hme_set_resolution(void *pv_me_ctxt, S32 n_enc_layers, S32 *p_wd, S32 *p_ht, S32 me_frm_id);
+
+void hme_coarse_set_resolution(void *pv_me_ctxt, S32 n_enc_layers, S32 *p_wd, S32 *p_ht);
+
+/**
+*******************************************************************************
+* @fn WORD32 hme_get_active_pocs_list(void *pv_me_ctxt)
+*
+* @brief Returns the list of active POCs in ME ctxt
+*
+* @param[in] pv_me_ctxt : handle to ME context
+*
+* @param[out] p_pocs_buffered_in_me : pointer to an array which this fxn
+* populates with pocs active
+*
+* @return void
+*******************************************************************************
+*/
+WORD32 hme_get_active_pocs_list(void *pv_me_ctxt, S32 i4_num_me_frm_pllel);
+
+void hme_coarse_get_active_pocs_list(void *pv_me_ctxt, S32 *p_pocs_buffered_in_me);
+
+S32 hme_get_blk_size(S32 use_4x4, S32 layer_id, S32 n_layers, S32 encode);
+
+/**
+********************************************************************************
+* @fn hme_get_mv_blk_size()
+*
+* @brief returns whether blk uses 4x4 size or something else.
+*
+* @param[in] enable_4x4 : input param from application to enable 4x4
+*
+* @param[in] layer_id : id of current layer (0 finest)
+*
+* @param[in] num_layeers : total num layers
+*
+* @param[in] is_enc : Whether encoding enabled for layer
+*
+* @return 1 for 4x4 blks, 0 for 8x8
+********************************************************************************
+*/
+S32 hme_get_mv_blk_size(S32 enable_4x4, S32 layer_id, S32 num_layers, S32 is_enc);
+
+void hme_set_refine_prms(
+ void *pv_refine_prms,
+ U08 u1_encode,
+ S32 num_ref,
+ S32 layer_id,
+ S32 num_layers,
+ S32 num_layers_explicit_search,
+ S32 use_4x4,
+ hme_frm_prms_t *ps_frm_prms,
+ double **ppd_intra_costs,
+ me_coding_params_t *ps_me_coding_tools);
+
+S32 hme_coarse_find_free_descr_idx(void *pv_ctxt);
+
+S32 hme_derive_num_layers(S32 n_enc_layers, S32 *p_wd, S32 *p_ht, S32 *p_disp_wd, S32 *p_disp_ht);
+
+#endif /* #ifndef _HME_INTERFACE_H_ */
diff --git a/encoder/hme_refine.c b/encoder/hme_refine.c
new file mode 100644
index 0000000..c8e6b38
--- /dev/null
+++ b/encoder/hme_refine.c
@@ -0,0 +1,10667 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+******************************************************************************
+* @file hme_refine.c
+*
+* @brief
+* Contains the implementation of the refinement layer searches and related
+* functionality like CU merge.
+*
+* @author
+* Ittiam
+*
+*
+* List of Functions
+*
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+#include <limits.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_bs_compute_ctb.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_dep_mngr_interface.h"
+#include "hme_datatype.h"
+#include "hme_interface.h"
+#include "hme_common_defs.h"
+#include "hme_defs.h"
+#include "ihevce_me_instr_set_router.h"
+#include "hme_globals.h"
+#include "hme_utils.h"
+#include "hme_coarse.h"
+#include "hme_fullpel.h"
+#include "hme_subpel.h"
+#include "hme_refine.h"
+#include "hme_err_compute.h"
+#include "hme_common_utils.h"
+#include "hme_search_algo.h"
+#include "ihevce_stasino_helpers.h"
+#include "ihevce_common_utils.h"
+
+/*****************************************************************************/
+/* Globals */
+/*****************************************************************************/
+
+/* brief: mapping buffer to convert raster scan indices into z-scan oder in a ctb */
+UWORD8 gau1_raster_scan_to_ctb[4][4] = {
+ { 0, 4, 16, 20 }, { 8, 12, 24, 28 }, { 32, 36, 48, 52 }, { 40, 44, 56, 60 }
+};
+
+/*****************************************************************************/
+/* Extern Fucntion declaration */
+/*****************************************************************************/
+extern ctb_boundary_attrs_t *
+ get_ctb_attrs(S32 ctb_start_x, S32 ctb_start_y, S32 pic_wd, S32 pic_ht, me_frm_ctxt_t *ps_ctxt);
+
+typedef void (*PF_HME_PROJECT_COLOC_CANDT_FXN)(
+ search_node_t *ps_search_node,
+ layer_ctxt_t *ps_curr_layer,
+ layer_ctxt_t *ps_coarse_layer,
+ S32 i4_pos_x,
+ S32 i4_pos_y,
+ S08 i1_ref_id,
+ S32 i4_result_id);
+
+typedef void (*PF_HME_PROJECT_COLOC_CANDT_L0_ME_FXN)(
+ search_node_t *ps_search_node,
+ layer_ctxt_t *ps_curr_layer,
+ layer_ctxt_t *ps_coarse_layer,
+ S32 i4_pos_x,
+ S32 i4_pos_y,
+ S32 i4_num_act_ref_l0,
+ U08 u1_pred_dir,
+ U08 u1_default_ref_id,
+ S32 i4_result_id);
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+void ihevce_no_wt_copy(
+ coarse_me_ctxt_t *ps_ctxt,
+ layer_ctxt_t *ps_curr_layer,
+ pu_t *ps_pu,
+ UWORD8 *pu1_temp_pred,
+ WORD32 temp_stride,
+ WORD32 blk_x,
+ WORD32 blk_y)
+{
+ UWORD8 *pu1_ref;
+ WORD32 ref_stride, ref_offset;
+ WORD32 row, col, i4_tmp;
+
+ ASSERT((ps_pu->b2_pred_mode == PRED_L0) || (ps_pu->b2_pred_mode == PRED_L1));
+
+ if(ps_pu->b2_pred_mode == PRED_L0)
+ {
+ WORD8 i1_ref_idx;
+
+ i1_ref_idx = ps_pu->mv.i1_l0_ref_idx;
+ pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];
+
+ ref_stride = ps_curr_layer->i4_inp_stride;
+
+ ref_offset = ((blk_y << 3) + ps_pu->mv.s_l0_mv.i2_mvy) * ref_stride;
+ ref_offset += (blk_x << 3) + ps_pu->mv.s_l0_mv.i2_mvx;
+
+ pu1_ref += ref_offset;
+
+ for(row = 0; row < temp_stride; row++)
+ {
+ for(col = 0; col < temp_stride; col++)
+ {
+ i4_tmp = pu1_ref[col];
+ pu1_temp_pred[col] = CLIP_U8(i4_tmp);
+ }
+
+ pu1_ref += ref_stride;
+ pu1_temp_pred += temp_stride;
+ }
+ }
+ else
+ {
+ WORD8 i1_ref_idx;
+
+ i1_ref_idx = ps_pu->mv.i1_l1_ref_idx;
+ pu1_ref = ps_curr_layer->ppu1_list_inp[i1_ref_idx];
+
+ ref_stride = ps_curr_layer->i4_inp_stride;
+
+ ref_offset = ((blk_y << 3) + ps_pu->mv.s_l1_mv.i2_mvy) * ref_stride;
+ ref_offset += (blk_x << 3) + ps_pu->mv.s_l1_mv.i2_mvx;
+
+ pu1_ref += ref_offset;
+
+ for(row = 0; row < temp_stride; row++)
+ {
+ for(col = 0; col < temp_stride; col++)
+ {
+ i4_tmp = pu1_ref[col];
+ pu1_temp_pred[col] = CLIP_U8(i4_tmp);
+ }
+
+ pu1_ref += ref_stride;
+ pu1_temp_pred += temp_stride;
+ }
+ }
+}
+
+static WORD32 hme_add_clustered_mvs_as_merge_cands(
+ cluster_data_t *ps_cluster_base,
+ search_node_t *ps_merge_cand,
+ range_prms_t **pps_range_prms,
+ U08 *pu1_refid_to_pred_dir_list,
+ WORD32 i4_num_clusters,
+ U08 u1_pred_dir)
+{
+ WORD32 i, j, k;
+ WORD32 i4_num_cands_added = 0;
+ WORD32 i4_num_mvs_in_cluster;
+
+ for(i = 0; i < i4_num_clusters; i++)
+ {
+ cluster_data_t *ps_data = &ps_cluster_base[i];
+
+ if(u1_pred_dir == !pu1_refid_to_pred_dir_list[ps_data->ref_id])
+ {
+ i4_num_mvs_in_cluster = ps_data->num_mvs;
+
+ for(j = 0; j < i4_num_mvs_in_cluster; j++)
+ {
+ ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_data->as_mv[j].mvx;
+ ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_data->as_mv[j].mvy;
+ ps_merge_cand[i4_num_cands_added].i1_ref_idx = ps_data->ref_id;
+
+ CLIP_MV_WITHIN_RANGE(
+ ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
+ ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
+ pps_range_prms[ps_data->ref_id],
+ 0,
+ 0,
+ 0);
+
+ for(k = 0; k < i4_num_cands_added; k++)
+ {
+ if((ps_merge_cand[k].s_mv.i2_mvx == ps_data->as_mv[j].mvx) &&
+ (ps_merge_cand[k].s_mv.i2_mvy == ps_data->as_mv[j].mvy) &&
+ (ps_merge_cand[k].i1_ref_idx == ps_data->ref_id))
+ {
+ break;
+ }
+ }
+
+ if(k == i4_num_cands_added)
+ {
+ i4_num_cands_added++;
+ }
+ }
+ }
+ }
+
+ return i4_num_cands_added;
+}
+
+static WORD32 hme_add_me_best_as_merge_cands(
+ search_results_t **pps_child_data_array,
+ inter_cu_results_t *ps_8x8cu_results,
+ search_node_t *ps_merge_cand,
+ range_prms_t **pps_range_prms,
+ U08 *pu1_refid_to_pred_dir_list,
+ S08 *pi1_past_list,
+ S08 *pi1_future_list,
+ BLK_SIZE_T e_blk_size,
+ ME_QUALITY_PRESETS_T e_quality_preset,
+ S32 i4_num_cands_added,
+ U08 u1_pred_dir)
+{
+ WORD32 i, j, k;
+ WORD32 i4_max_cands_to_add;
+
+ WORD32 i4_result_id = 0;
+
+ ASSERT(!pps_child_data_array[0]->u1_split_flag || (BLK_64x64 != e_blk_size));
+ ASSERT(!pps_child_data_array[1]->u1_split_flag || (BLK_64x64 != e_blk_size));
+ ASSERT(!pps_child_data_array[2]->u1_split_flag || (BLK_64x64 != e_blk_size));
+ ASSERT(!pps_child_data_array[3]->u1_split_flag || (BLK_64x64 != e_blk_size));
+
+ switch(e_quality_preset)
+ {
+ case ME_PRISTINE_QUALITY:
+ {
+ i4_max_cands_to_add = MAX_MERGE_CANDTS;
+
+ break;
+ }
+ case ME_HIGH_QUALITY:
+ {
+ /* All 4 children are split and each grandchild contributes an MV */
+ /* and 2 best results per grandchild */
+ i4_max_cands_to_add = 4 * 4 * 2;
+
+ break;
+ }
+ case ME_MEDIUM_SPEED:
+ {
+ i4_max_cands_to_add = 4 * 2 * 2;
+
+ break;
+ }
+ case ME_HIGH_SPEED:
+ case ME_XTREME_SPEED:
+ case ME_XTREME_SPEED_25:
+ {
+ i4_max_cands_to_add = 4 * 2 * 1;
+
+ break;
+ }
+ }
+
+ while(i4_result_id < 4)
+ {
+ for(i = 0; i < 4; i++)
+ {
+ inter_cu_results_t *ps_child_data = pps_child_data_array[i]->ps_cu_results;
+ inter_cu_results_t *ps_grandchild_data = &ps_8x8cu_results[i << 2];
+
+ if(!pps_child_data_array[i]->u1_split_flag)
+ {
+ part_type_results_t *ps_data = &ps_child_data->ps_best_results[i4_result_id];
+
+ if(ps_child_data->u1_num_best_results <= i4_result_id)
+ {
+ continue;
+ }
+
+ if(ps_data->as_pu_results->pu.b1_intra_flag)
+ {
+ continue;
+ }
+
+ for(j = 0; j <= (ps_data->u1_part_type != PRT_2Nx2N); j++)
+ {
+ mv_t *ps_mv;
+
+ S08 i1_ref_idx;
+
+ pu_t *ps_pu = &ps_data->as_pu_results[j].pu;
+
+ if(u1_pred_dir !=
+ ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
+ {
+ continue;
+ }
+
+ if(u1_pred_dir)
+ {
+ ps_mv = &ps_pu->mv.s_l1_mv;
+ i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
+ }
+ else
+ {
+ ps_mv = &ps_pu->mv.s_l0_mv;
+ i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
+ }
+
+ if(-1 == i1_ref_idx)
+ {
+ continue;
+ }
+
+ ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
+ ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
+ ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;
+
+ CLIP_MV_WITHIN_RANGE(
+ ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
+ ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
+ pps_range_prms[i1_ref_idx],
+ 0,
+ 0,
+ 0);
+
+ for(k = 0; k < i4_num_cands_added; k++)
+ {
+ if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
+ (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
+ (ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
+ {
+ break;
+ }
+ }
+
+ if(k == i4_num_cands_added)
+ {
+ i4_num_cands_added++;
+
+ if(i4_max_cands_to_add <= i4_num_cands_added)
+ {
+ return i4_num_cands_added;
+ }
+ }
+ }
+ }
+ else
+ {
+ for(j = 0; j < 4; j++)
+ {
+ mv_t *ps_mv;
+
+ S08 i1_ref_idx;
+
+ part_type_results_t *ps_data = ps_grandchild_data[j].ps_best_results;
+ pu_t *ps_pu = &ps_data->as_pu_results[0].pu;
+
+ ASSERT(ps_data->u1_part_type == PRT_2Nx2N);
+
+ if(ps_grandchild_data[j].u1_num_best_results <= i4_result_id)
+ {
+ continue;
+ }
+
+ if(ps_data->as_pu_results->pu.b1_intra_flag)
+ {
+ continue;
+ }
+
+ if(u1_pred_dir !=
+ ((ps_pu->b2_pred_mode == 2) ? u1_pred_dir : ps_pu->b2_pred_mode))
+ {
+ continue;
+ }
+
+ if(u1_pred_dir)
+ {
+ ps_mv = &ps_pu->mv.s_l1_mv;
+ i1_ref_idx = pi1_future_list[ps_pu->mv.i1_l1_ref_idx];
+ }
+ else
+ {
+ ps_mv = &ps_pu->mv.s_l0_mv;
+ i1_ref_idx = pi1_past_list[ps_pu->mv.i1_l0_ref_idx];
+ }
+
+ ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx = ps_mv->i2_mvx;
+ ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy = ps_mv->i2_mvy;
+ ps_merge_cand[i4_num_cands_added].i1_ref_idx = i1_ref_idx;
+
+ CLIP_MV_WITHIN_RANGE(
+ ps_merge_cand[i4_num_cands_added].s_mv.i2_mvx,
+ ps_merge_cand[i4_num_cands_added].s_mv.i2_mvy,
+ pps_range_prms[i1_ref_idx],
+ 0,
+ 0,
+ 0);
+
+ for(k = 0; k < i4_num_cands_added; k++)
+ {
+ if((ps_merge_cand[k].s_mv.i2_mvx == ps_mv->i2_mvx) &&
+ (ps_merge_cand[k].s_mv.i2_mvy == ps_mv->i2_mvy) &&
+ (ps_merge_cand[k].i1_ref_idx == i1_ref_idx))
+ {
+ break;
+ }
+ }
+
+ if(k == i4_num_cands_added)
+ {
+ i4_num_cands_added++;
+
+ if(i4_max_cands_to_add <= i4_num_cands_added)
+ {
+ return i4_num_cands_added;
+ }
+ }
+ }
+ }
+ }
+
+ i4_result_id++;
+ }
+
+ return i4_num_cands_added;
+}
+
+WORD32 hme_add_cands_for_merge_eval(
+ ctb_cluster_info_t *ps_cluster_info,
+ search_results_t **pps_child_data_array,
+ inter_cu_results_t *ps_8x8cu_results,
+ range_prms_t **pps_range_prms,
+ search_node_t *ps_merge_cand,
+ U08 *pu1_refid_to_pred_dir_list,
+ S08 *pi1_past_list,
+ S08 *pi1_future_list,
+ ME_QUALITY_PRESETS_T e_quality_preset,
+ BLK_SIZE_T e_blk_size,
+ U08 u1_pred_dir,
+ U08 u1_blk_id)
+{
+ WORD32 i4_num_cands_added = 0;
+
+ if(ME_PRISTINE_QUALITY == e_quality_preset)
+ {
+ cluster_data_t *ps_cluster_primo;
+
+ WORD32 i4_num_clusters;
+
+ if(BLK_32x32 == e_blk_size)
+ {
+ ps_cluster_primo = ps_cluster_info->ps_32x32_blk[u1_blk_id].as_cluster_data;
+ i4_num_clusters = ps_cluster_info->ps_32x32_blk[u1_blk_id].num_clusters;
+ }
+ else
+ {
+ ps_cluster_primo = ps_cluster_info->ps_64x64_blk->as_cluster_data;
+ i4_num_clusters = ps_cluster_info->ps_64x64_blk->num_clusters;
+ }
+
+ i4_num_cands_added = hme_add_clustered_mvs_as_merge_cands(
+ ps_cluster_primo,
+ ps_merge_cand,
+ pps_range_prms,
+ pu1_refid_to_pred_dir_list,
+ i4_num_clusters,
+ u1_pred_dir);
+ }
+
+ i4_num_cands_added = hme_add_me_best_as_merge_cands(
+ pps_child_data_array,
+ ps_8x8cu_results,
+ ps_merge_cand,
+ pps_range_prms,
+ pu1_refid_to_pred_dir_list,
+ pi1_past_list,
+ pi1_future_list,
+ e_blk_size,
+ e_quality_preset,
+ i4_num_cands_added,
+ u1_pred_dir);
+
+ return i4_num_cands_added;
+}
+
+/**
+********************************************************************************
+* @fn void hme_pick_refine_merge_candts(hme_merge_prms_t *ps_merge_prms,
+* S08 i1_ref_idx,
+* S32 i4_best_part_type,
+* S32 i4_is_vert)
+*
+* @brief Given a target partition orientation in the merged CU, and the
+* partition type of most likely partition this fxn picks up
+* candidates from the 4 constituent CUs and does refinement search
+* to identify best results for the merge CU across active partitions
+*
+* @param[in,out] ps_merge_prms : Parameters sent from higher layers. Out of
+* these params, the search result structure is also derived and
+* updated during the search
+*
+* @param[in] i1_ref_idx : ID of the buffer within the search results to update.
+* Will be 0 if all refidx collapsed to one buf, else it'll be 0/1
+*
+* @param[in] i4_best_part_type : partition type of potential partition in the
+* merged CU, -1 if the merge process has not yet been able to
+* determine this.
+*
+* @param[in] i4_is_vert : Whether target partition of merged CU is vertical
+* orientation or horizontal orientation.
+*
+* @return Number of merge candidates
+********************************************************************************
+*/
+WORD32 hme_pick_eval_merge_candts(
+ hme_merge_prms_t *ps_merge_prms,
+ hme_subpel_prms_t *ps_subpel_prms,
+ S32 i4_search_idx,
+ S32 i4_best_part_type,
+ S32 i4_is_vert,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ S32 i4_frm_qstep,
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
+ ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
+{
+ S32 x_off, y_off;
+ search_node_t *ps_search_node;
+ S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
+ S32 i4_num_valid_parts;
+ pred_ctxt_t *ps_pred_ctxt;
+
+ search_node_t as_merge_unique_node[MAX_MERGE_CANDTS];
+ S32 num_unique_nodes_cu_merge = 0;
+
+ search_results_t *ps_search_results = ps_merge_prms->ps_results_merge;
+ CU_SIZE_T e_cu_size = ps_search_results->e_cu_size;
+ S32 i4_part_mask = ps_search_results->i4_part_mask;
+
+ search_results_t *aps_child_results[4];
+ layer_ctxt_t *ps_curr_layer = ps_merge_prms->ps_layer_ctxt;
+
+ S32 i4_ref_stride, i, j;
+ result_upd_prms_t s_result_prms;
+
+ BLK_SIZE_T e_blk_size = ge_cu_size_to_blk_size[e_cu_size];
+ S32 i4_offset;
+
+ /*************************************************************************/
+ /* Function pointer for SAD/SATD, array and prms structure to pass to */
+ /* This function */
+ /*************************************************************************/
+ PF_SAD_FXN_T pf_err_compute;
+ S32 ai4_sad_grid[9][17];
+ err_prms_t s_err_prms;
+
+ /*************************************************************************/
+ /* Allowed MV RANGE */
+ /*************************************************************************/
+ range_prms_t **pps_range_prms = ps_merge_prms->aps_mv_range;
+ PF_INTERP_FXN_T pf_qpel_interp;
+ PF_MV_COST_FXN pf_mv_cost_compute;
+ WORD32 pred_lx;
+ U08 *apu1_hpel_ref[4];
+
+ interp_prms_t s_interp_prms;
+ S32 i4_interp_buf_id;
+
+ S32 i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
+ S32 i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;
+
+ /* Sanity checks */
+ ASSERT((e_blk_size == BLK_64x64) || (e_blk_size == BLK_32x32));
+
+ s_err_prms.ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list;
+
+ /* Initialize all the ptrs to child CUs for merge decision */
+ aps_child_results[0] = ps_merge_prms->ps_results_tl;
+ aps_child_results[1] = ps_merge_prms->ps_results_tr;
+ aps_child_results[2] = ps_merge_prms->ps_results_bl;
+ aps_child_results[3] = ps_merge_prms->ps_results_br;
+
+ num_unique_nodes_cu_merge = 0;
+
+ pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
+
+ if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
+ {
+ num_unique_nodes_cu_merge = hme_add_cands_for_merge_eval(
+ ps_merge_prms->ps_cluster_info,
+ aps_child_results,
+ ps_merge_prms->ps_8x8_cu_results,
+ pps_range_prms,
+ as_merge_unique_node,
+ ps_search_results->pu1_is_past,
+ ps_merge_prms->pi1_past_list,
+ ps_merge_prms->pi1_future_list,
+ ps_merge_prms->e_quality_preset,
+ e_blk_size,
+ i4_search_idx,
+ (ps_merge_prms->ps_results_merge->u1_x_off >> 5) +
+ (ps_merge_prms->ps_results_merge->u1_y_off >> 4));
+ }
+ else
+ {
+ /*************************************************************************/
+ /* Populate the list of unique search nodes in the child CUs for merge */
+ /* evaluation */
+ /*************************************************************************/
+ for(i = 0; i < 4; i++)
+ {
+ search_node_t s_search_node;
+
+ PART_TYPE_T e_part_type;
+ PART_ID_T e_part_id;
+
+ WORD32 part_num;
+
+ search_results_t *ps_child = aps_child_results[i];
+
+ if(ps_child->ps_cu_results->u1_num_best_results)
+ {
+ if(!((ps_child->ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
+ (1 == ps_child->ps_cu_results->u1_num_best_results)))
+ {
+ e_part_type =
+ (PART_TYPE_T)ps_child->ps_cu_results->ps_best_results[0].u1_part_type;
+
+ ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);
+
+ /* Insert mvs of NxN partitions. */
+ for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
+ part_num++)
+ {
+ e_part_id = ge_part_type_to_part_id[e_part_type][part_num];
+
+ if(ps_child->aps_part_results[i4_search_idx][e_part_id]->i1_ref_idx != -1)
+ {
+ s_search_node = *ps_child->aps_part_results[i4_search_idx][e_part_id];
+ if(s_search_node.s_mv.i2_mvx != INTRA_MV)
+ {
+ CLIP_MV_WITHIN_RANGE(
+ s_search_node.s_mv.i2_mvx,
+ s_search_node.s_mv.i2_mvy,
+ pps_range_prms[s_search_node.i1_ref_idx],
+ 0,
+ 0,
+ 0);
+
+ INSERT_NEW_NODE_NOMAP(
+ as_merge_unique_node,
+ num_unique_nodes_cu_merge,
+ s_search_node,
+ 1);
+ }
+ }
+ }
+ }
+ }
+ else if(!((ps_merge_prms->ps_results_grandchild[(i << 2)]
+ .ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag) &&
+ (1 == ps_merge_prms->ps_results_grandchild[(i << 2)]
+ .ps_cu_results->u1_num_best_results)))
+ {
+ search_results_t *ps_results_root = &ps_merge_prms->ps_results_grandchild[(i << 2)];
+
+ for(j = 0; j < 4; j++)
+ {
+ e_part_type = (PART_TYPE_T)ps_results_root[j]
+ .ps_cu_results->ps_best_results[0]
+ .u1_part_type;
+
+ ASSERT(num_unique_nodes_cu_merge < MAX_MERGE_CANDTS);
+
+ /* Insert mvs of NxN partitions. */
+ for(part_num = 0; part_num < gau1_num_parts_in_part_type[((S32)e_part_type)];
+ part_num++)
+ {
+ e_part_id = ge_part_type_to_part_id[e_part_type][part_num];
+
+ if((ps_results_root[j]
+ .aps_part_results[i4_search_idx][e_part_id]
+ ->i1_ref_idx != -1) &&
+ (!ps_child->ps_cu_results->ps_best_results->as_pu_results->pu
+ .b1_intra_flag))
+ {
+ s_search_node =
+ *ps_results_root[j].aps_part_results[i4_search_idx][e_part_id];
+ if(s_search_node.s_mv.i2_mvx != INTRA_MV)
+ {
+ CLIP_MV_WITHIN_RANGE(
+ s_search_node.s_mv.i2_mvx,
+ s_search_node.s_mv.i2_mvy,
+ pps_range_prms[s_search_node.i1_ref_idx],
+ 0,
+ 0,
+ 0);
+
+ INSERT_NEW_NODE_NOMAP(
+ as_merge_unique_node,
+ num_unique_nodes_cu_merge,
+ s_search_node,
+ 1);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ if(0 == num_unique_nodes_cu_merge)
+ {
+ return 0;
+ }
+
+ /*************************************************************************/
+ /* Appropriate Err compute fxn, depends on SAD/SATD, blk size and remains*/
+ /* fixed through this subpel refinement for this partition. */
+ /* Note, we do not enable grid sads since one pt is evaluated per node */
+ /* Hence, part mask is also nearly dont care and we use 2Nx2N enabled. */
+ /*************************************************************************/
+ i4_part_mask = ps_search_results->i4_part_mask;
+
+ /* Need to add the corresponding SAD functions for EXTREME SPEED : Lokesh */
+ if(ps_subpel_prms->i4_use_satd)
+ {
+ if(BLK_32x32 == e_blk_size)
+ {
+ pf_err_compute = hme_evalsatd_pt_pu_32x32;
+ }
+ else
+ {
+ pf_err_compute = hme_evalsatd_pt_pu_64x64;
+ }
+ }
+ else
+ {
+ pf_err_compute = (PF_SAD_FXN_T)hme_evalsad_grid_pu_MxM;
+ }
+
+ i4_ref_stride = ps_curr_layer->i4_rec_stride;
+
+ x_off = ps_merge_prms->ps_results_tl->u1_x_off;
+ y_off = ps_merge_prms->ps_results_tl->u1_y_off;
+ i4_offset = x_off + i4_ctb_x_off + ((y_off + i4_ctb_y_off) * i4_ref_stride);
+
+ /*************************************************************************/
+ /* This array stores the ids of the partitions whose */
+ /* SADs are updated. Since the partitions whose SADs are updated may not */
+ /* be in contiguous order, we supply another level of indirection. */
+ /*************************************************************************/
+ i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
+
+ /* Initialize result params used for partition update */
+ s_result_prms.pf_mv_cost_compute = NULL;
+ s_result_prms.ps_search_results = ps_search_results;
+ s_result_prms.pi4_valid_part_ids = ai4_valid_part_ids;
+ s_result_prms.i1_ref_idx = i4_search_idx;
+ s_result_prms.i4_part_mask = i4_part_mask;
+ s_result_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
+ s_result_prms.i4_grid_mask = 1;
+
+ /* One time Initialization of error params used for SAD/SATD compute */
+ s_err_prms.i4_inp_stride = ps_subpel_prms->i4_inp_stride;
+ s_err_prms.i4_ref_stride = i4_ref_stride;
+ s_err_prms.i4_part_mask = (ENABLE_2Nx2N);
+ s_err_prms.i4_grid_mask = 1;
+ s_err_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
+ s_err_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
+ s_err_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
+ s_err_prms.i4_step = 1;
+
+ /*************************************************************************/
+ /* One time preparation of non changing interpolation params. */
+ /*************************************************************************/
+ s_interp_prms.i4_ref_stride = i4_ref_stride;
+ s_interp_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
+ s_interp_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
+ s_interp_prms.apu1_interp_out[0] = ps_subpel_prms->pu1_wkg_mem;
+ s_interp_prms.i4_out_stride = gau1_blk_size_to_wd[e_blk_size];
+ i4_interp_buf_id = 0;
+
+ pf_qpel_interp = ps_subpel_prms->pf_qpel_interp;
+
+ /***************************************************************************/
+ /* Compute SATD/SAD for all unique nodes of children CUs to get best merge */
+ /* results */
+ /***************************************************************************/
+ for(i = 0; i < num_unique_nodes_cu_merge; i++)
+ {
+ WORD8 i1_ref_idx;
+ ps_search_node = &as_merge_unique_node[i];
+
+ /*********************************************************************/
+ /* Compute the base pointer for input, interpolated buffers */
+ /* The base pointers point as follows: */
+ /* fx fy : 0, 0 :: fx, hy : 0, 0.5, hx, fy: 0.5, 0, hx, fy: 0.5, 0.5 */
+ /* To these, we need to add the offset of the current node */
+ /*********************************************************************/
+ i1_ref_idx = ps_search_node->i1_ref_idx;
+ apu1_hpel_ref[0] = ps_curr_layer->ppu1_list_rec_fxfy[i1_ref_idx] + i4_offset;
+ apu1_hpel_ref[1] = ps_curr_layer->ppu1_list_rec_hxfy[i1_ref_idx] + i4_offset;
+ apu1_hpel_ref[2] = ps_curr_layer->ppu1_list_rec_fxhy[i1_ref_idx] + i4_offset;
+ apu1_hpel_ref[3] = ps_curr_layer->ppu1_list_rec_hxhy[i1_ref_idx] + i4_offset;
+
+ s_interp_prms.ppu1_ref = &apu1_hpel_ref[0];
+
+ pf_qpel_interp(
+ &s_interp_prms,
+ ps_search_node->s_mv.i2_mvx,
+ ps_search_node->s_mv.i2_mvy,
+ i4_interp_buf_id);
+
+ pred_lx = i4_search_idx;
+ ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
+
+ s_result_prms.u1_pred_lx = pred_lx;
+ s_result_prms.ps_search_node_base = ps_search_node;
+ s_err_prms.pu1_inp =
+ ps_wt_inp_prms->apu1_wt_inp[i1_ref_idx] + x_off + y_off * ps_subpel_prms->i4_inp_stride;
+ s_err_prms.pu1_ref = s_interp_prms.pu1_final_out;
+ s_err_prms.i4_ref_stride = s_interp_prms.i4_final_out_stride;
+
+ /* Carry out the SAD/SATD. This call also does the TU RECURSION.
+ Here the tu recursion logic is restricted with the size of the PU*/
+ pf_err_compute(&s_err_prms);
+
+ if(ps_subpel_prms->u1_is_cu_noisy &&
+ ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
+ {
+ ps_me_optimised_function_list->pf_compute_stim_injected_distortion_for_all_parts(
+ s_err_prms.pu1_ref,
+ s_err_prms.i4_ref_stride,
+ ai4_valid_part_ids,
+ ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX,
+ ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
+ s_err_prms.pi4_sad_grid,
+ ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier,
+ ps_wt_inp_prms->a_inv_wpred_wt[i1_ref_idx],
+ ps_wt_inp_prms->ai4_shift_val[i1_ref_idx],
+ i4_num_valid_parts,
+ ps_wt_inp_prms->wpred_log_wdc,
+ (BLK_32x32 == e_blk_size) ? 32 : 64);
+ }
+
+ /* Update the mv's */
+ s_result_prms.i2_mv_x = ps_search_node->s_mv.i2_mvx;
+ s_result_prms.i2_mv_y = ps_search_node->s_mv.i2_mvy;
+
+ /* Update best results */
+ hme_update_results_pt_pu_best1_subpel_hs(&s_err_prms, &s_result_prms);
+ }
+
+ /************************************************************************/
+ /* Update mv cost and total cost for each valid partition in the CU */
+ /************************************************************************/
+ for(i = 0; i < TOT_NUM_PARTS; i++)
+ {
+ if(i4_part_mask & (1 << i))
+ {
+ WORD32 j;
+ WORD32 i4_mv_cost;
+
+ ps_search_node = ps_search_results->aps_part_results[i4_search_idx][i];
+
+ for(j = 0;
+ j < MIN(ps_search_results->u1_num_results_per_part, num_unique_nodes_cu_merge);
+ j++)
+ {
+ if(ps_search_node->i1_ref_idx != -1)
+ {
+ pred_lx = i4_search_idx;
+ ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
+
+ /* Prediction context should now deal with qpel units */
+ HME_SET_MVPRED_RES(ps_pred_ctxt, MV_RES_QPEL);
+
+ ps_search_node->u1_subpel_done = 1;
+ ps_search_node->u1_is_avail = 1;
+
+ i4_mv_cost =
+ pf_mv_cost_compute(ps_search_node, ps_pred_ctxt, (PART_ID_T)i, MV_RES_QPEL);
+
+ ps_search_node->i4_tot_cost = i4_mv_cost + ps_search_node->i4_sad;
+ ps_search_node->i4_mv_cost = i4_mv_cost;
+
+ ps_search_node++;
+ }
+ }
+ }
+ }
+
+ return num_unique_nodes_cu_merge;
+}
+
+#define CU_MERGE_MAX_INTRA_PARTS 4
+
+/**
+********************************************************************************
+* @fn hme_try_merge_high_speed
+*
+* @brief Attempts to merge 4 NxN candts to a 2Nx2N candt, either as a single
+entity or with partititons for high speed preset
+*
+* @param[in,out] hme_merge_prms_t: Params for CU merge
+*
+* @return MERGE_RESULT_T type result of merge (CU_MERGED/CU_SPLIT)
+********************************************************************************
+*/
+CU_MERGE_RESULT_T hme_try_merge_high_speed(
+ me_ctxt_t *ps_thrd_ctxt,
+ me_frm_ctxt_t *ps_ctxt,
+ ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
+ hme_subpel_prms_t *ps_subpel_prms,
+ hme_merge_prms_t *ps_merge_prms,
+ inter_pu_results_t *ps_pu_results,
+ pu_result_t *ps_pu_result)
+{
+ search_results_t *ps_results_tl, *ps_results_tr;
+ search_results_t *ps_results_bl, *ps_results_br;
+
+ S32 i;
+ S32 i4_search_idx;
+ S32 i4_cost_parent;
+ S32 intra_cu_size;
+ ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];
+
+ search_results_t *ps_results_merge = ps_merge_prms->ps_results_merge;
+ wgt_pred_ctxt_t *ps_wt_inp_prms = &ps_ctxt->s_wt_pred;
+
+ S32 i4_part_mask = ENABLE_ALL_PARTS - ENABLE_NxN;
+ S32 is_vert = 0, i4_best_part_type = -1;
+ S32 i4_intra_parts = 0; /* Keeps track of intra percentage before merge */
+ S32 i4_cost_children = 0;
+ S32 i4_frm_qstep = ps_ctxt->frm_qstep;
+ S32 i4_num_merge_cands_evaluated = 0;
+ U08 u1_x_off = ps_results_merge->u1_x_off;
+ U08 u1_y_off = ps_results_merge->u1_y_off;
+ S32 i4_32x32_id = (u1_y_off >> 4) + (u1_x_off >> 5);
+
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
+ ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
+ ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
+ ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);
+ ps_results_tl = ps_merge_prms->ps_results_tl;
+ ps_results_tr = ps_merge_prms->ps_results_tr;
+ ps_results_bl = ps_merge_prms->ps_results_bl;
+ ps_results_br = ps_merge_prms->ps_results_br;
+
+ if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED)
+ {
+ i4_part_mask &= ~ENABLE_AMP;
+ }
+
+ if(ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25)
+ {
+ i4_part_mask &= ~ENABLE_AMP;
+
+ i4_part_mask &= ~ENABLE_SMP;
+ }
+
+ ps_merge_prms->i4_num_pred_dir_actual = 0;
+
+ /*************************************************************************/
+ /* The logic for High speed CU merge goes as follows: */
+ /* */
+ /* 1. Early exit with CU_SPLIT if sum of best partitions of children CUs */
+ /* exceed 7 */
+ /* 2. Early exit with CU_MERGE if mvs of best partitions of children CUs */
+ /* are identical */
+ /* 3. Find the all unique mvs of best partitions of children CUs and */
+ /* evaluate partial SATDs (all 17 partitions) for each unique mv. If */
+ /* best parent cost is lower than sum of the best children costs */
+ /* return CU_MERGE after seeding the best results else return CU_SPLIT*/
+ /* */
+ /*************************************************************************/
+
+ /* Count the number of best partitions in child CUs, early exit if > 7 */
+ if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
+ (CU_32x32 == ps_results_merge->e_cu_size))
+ {
+ S32 num_parts_in_32x32 = 0;
+ WORD32 i4_part_type;
+
+ if(ps_results_tl->u1_split_flag)
+ {
+ num_parts_in_32x32 += 4;
+
+#define COST_INTERCHANGE 0
+ i4_cost_children = ps_merge_prms->ps_8x8_cu_results[0].ps_best_results->i4_tot_cost +
+ ps_merge_prms->ps_8x8_cu_results[1].ps_best_results->i4_tot_cost +
+ ps_merge_prms->ps_8x8_cu_results[2].ps_best_results->i4_tot_cost +
+ ps_merge_prms->ps_8x8_cu_results[3].ps_best_results->i4_tot_cost;
+ }
+ else
+ {
+ i4_part_type = ps_results_tl->ps_cu_results->ps_best_results[0].u1_part_type;
+ num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
+ i4_cost_children = ps_results_tl->ps_cu_results->ps_best_results[0].i4_tot_cost;
+ }
+
+ if(ps_results_tr->u1_split_flag)
+ {
+ num_parts_in_32x32 += 4;
+
+ i4_cost_children += ps_merge_prms->ps_8x8_cu_results[4].ps_best_results->i4_tot_cost +
+ ps_merge_prms->ps_8x8_cu_results[5].ps_best_results->i4_tot_cost +
+ ps_merge_prms->ps_8x8_cu_results[6].ps_best_results->i4_tot_cost +
+ ps_merge_prms->ps_8x8_cu_results[7].ps_best_results->i4_tot_cost;
+ }
+ else
+ {
+ i4_part_type = ps_results_tr->ps_cu_results->ps_best_results[0].u1_part_type;
+ num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
+ i4_cost_children += ps_results_tr->ps_cu_results->ps_best_results[0].i4_tot_cost;
+ }
+
+ if(ps_results_bl->u1_split_flag)
+ {
+ num_parts_in_32x32 += 4;
+
+ i4_cost_children += ps_merge_prms->ps_8x8_cu_results[8].ps_best_results->i4_tot_cost +
+ ps_merge_prms->ps_8x8_cu_results[9].ps_best_results->i4_tot_cost +
+ ps_merge_prms->ps_8x8_cu_results[10].ps_best_results->i4_tot_cost +
+ ps_merge_prms->ps_8x8_cu_results[11].ps_best_results->i4_tot_cost;
+ }
+ else
+ {
+ i4_part_type = ps_results_bl->ps_cu_results->ps_best_results[0].u1_part_type;
+ num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
+ i4_cost_children += ps_results_bl->ps_cu_results->ps_best_results[0].i4_tot_cost;
+ }
+
+ if(ps_results_br->u1_split_flag)
+ {
+ num_parts_in_32x32 += 4;
+
+ i4_cost_children += ps_merge_prms->ps_8x8_cu_results[12].ps_best_results->i4_tot_cost +
+ ps_merge_prms->ps_8x8_cu_results[13].ps_best_results->i4_tot_cost +
+ ps_merge_prms->ps_8x8_cu_results[14].ps_best_results->i4_tot_cost +
+ ps_merge_prms->ps_8x8_cu_results[15].ps_best_results->i4_tot_cost;
+ }
+ else
+ {
+ i4_part_type = ps_results_br->ps_cu_results->ps_best_results[0].u1_part_type;
+ num_parts_in_32x32 += gau1_num_parts_in_part_type[i4_part_type];
+ i4_cost_children += ps_results_br->ps_cu_results->ps_best_results[0].i4_tot_cost;
+ }
+
+ if((num_parts_in_32x32 > 7) && (ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY))
+ {
+ return CU_SPLIT;
+ }
+
+ if((num_parts_in_32x32 > MAX_NUM_CONSTITUENT_MVS_TO_ENABLE_32MERGE_IN_XS25) &&
+ (ps_merge_prms->e_quality_preset == ME_XTREME_SPEED_25))
+ {
+ return CU_SPLIT;
+ }
+ }
+
+ /* Accumulate intra percentage before merge for early CU_SPLIT decision */
+ /* Note : Each intra part represent a NxN unit of the children CUs */
+ /* This is essentially 1/16th of the CUsize under consideration for merge */
+ if(ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset)
+ {
+ if(CU_64x64 == ps_results_merge->e_cu_size)
+ {
+ i4_intra_parts =
+ (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_inter_eval_enable)
+ ? 16
+ : ps_merge_prms->ps_cluster_info->ps_cu_tree_root->u1_intra_eval_enable;
+ }
+ else
+ {
+ switch((ps_results_merge->u1_x_off >> 5) + ((ps_results_merge->u1_y_off >> 4)))
+ {
+ case 0:
+ {
+ i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tl
+ ->u1_inter_eval_enable)
+ ? 16
+ : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
+ ->ps_child_node_tl->u1_intra_eval_enable);
+
+ break;
+ }
+ case 1:
+ {
+ i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_tr
+ ->u1_inter_eval_enable)
+ ? 16
+ : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
+ ->ps_child_node_tr->u1_intra_eval_enable);
+
+ break;
+ }
+ case 2:
+ {
+ i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_bl
+ ->u1_inter_eval_enable)
+ ? 16
+ : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
+ ->ps_child_node_bl->u1_intra_eval_enable);
+
+ break;
+ }
+ case 3:
+ {
+ i4_intra_parts = (!ps_merge_prms->ps_cluster_info->ps_cu_tree_root->ps_child_node_br
+ ->u1_inter_eval_enable)
+ ? 16
+ : (ps_merge_prms->ps_cluster_info->ps_cu_tree_root
+ ->ps_child_node_br->u1_intra_eval_enable);
+
+ break;
+ }
+ }
+ }
+ }
+ else
+ {
+ for(i = 0; i < 4; i++)
+ {
+ search_results_t *ps_results =
+ (i == 0) ? ps_results_tl
+ : ((i == 1) ? ps_results_tr : ((i == 2) ? ps_results_bl : ps_results_br));
+
+ part_type_results_t *ps_best_res = &ps_results->ps_cu_results->ps_best_results[0];
+
+ if(ps_results->u1_split_flag)
+ {
+ U08 u1_x_off = ps_results->u1_x_off;
+ U08 u1_y_off = ps_results->u1_y_off;
+ U08 u1_8x8_zscan_id = gau1_ctb_raster_to_zscan[(u1_x_off >> 2) + (u1_y_off << 2)] >>
+ 2;
+
+ /* Special case to handle 8x8 CUs when 16x16 is split */
+ ASSERT(ps_results->e_cu_size == CU_16x16);
+
+ ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id].ps_best_results[0];
+
+ if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
+ i4_intra_parts += 1;
+
+ ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 1].ps_best_results[0];
+
+ if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
+ i4_intra_parts += 1;
+
+ ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 2].ps_best_results[0];
+
+ if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
+ i4_intra_parts += 1;
+
+ ps_best_res = &ps_ctxt->as_cu8x8_results[u1_8x8_zscan_id + 3].ps_best_results[0];
+
+ if(ps_best_res->as_pu_results[0].pu.b1_intra_flag)
+ i4_intra_parts += 1;
+ }
+ else if(ps_best_res[0].as_pu_results[0].pu.b1_intra_flag)
+ {
+ i4_intra_parts += 4;
+ }
+ }
+ }
+
+ /* Determine the max intra CU size indicated by IPE */
+ intra_cu_size = CU_64x64;
+ if(ps_cur_ipe_ctb->u1_split_flag)
+ {
+ intra_cu_size = CU_32x32;
+ if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
+ {
+ intra_cu_size = CU_16x16;
+ }
+ }
+
+ if(((i4_intra_parts > CU_MERGE_MAX_INTRA_PARTS) &&
+ (intra_cu_size < ps_results_merge->e_cu_size) &&
+ (ME_PRISTINE_QUALITY != ps_merge_prms->e_quality_preset)) ||
+ (i4_intra_parts == 16))
+ {
+ S32 i4_merge_outcome;
+
+ i4_merge_outcome = (CU_32x32 == ps_results_merge->e_cu_size)
+ ? (!ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag &&
+ ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_valid_cu)
+ : (!ps_cur_ipe_ctb->u1_split_flag);
+
+ i4_merge_outcome = i4_merge_outcome ||
+ (ME_PRISTINE_QUALITY == ps_merge_prms->e_quality_preset);
+
+ i4_merge_outcome = i4_merge_outcome &&
+ !(ps_subpel_prms->u1_is_cu_noisy && DISABLE_INTRA_WHEN_NOISY);
+
+ if(i4_merge_outcome)
+ {
+ inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;
+ part_type_results_t *ps_best_result = ps_cu_results->ps_best_results;
+ pu_t *ps_pu = &ps_best_result->as_pu_results->pu;
+
+ ps_cu_results->u1_num_best_results = 1;
+ ps_cu_results->u1_cu_size = ps_results_merge->e_cu_size;
+ ps_cu_results->u1_x_off = u1_x_off;
+ ps_cu_results->u1_y_off = u1_y_off;
+
+ ps_best_result->u1_part_type = PRT_2Nx2N;
+ ps_best_result->ai4_tu_split_flag[0] = 0;
+ ps_best_result->ai4_tu_split_flag[1] = 0;
+ ps_best_result->ai4_tu_split_flag[2] = 0;
+ ps_best_result->ai4_tu_split_flag[3] = 0;
+ ps_best_result->i4_tot_cost =
+ (CU_64x64 == ps_results_merge->e_cu_size)
+ ? ps_cur_ipe_ctb->i4_best64x64_intra_cost
+ : ps_cur_ipe_ctb->ai4_best32x32_intra_cost[i4_32x32_id];
+
+ ps_pu->b1_intra_flag = 1;
+ ps_pu->b4_pos_x = u1_x_off >> 2;
+ ps_pu->b4_pos_y = u1_y_off >> 2;
+ ps_pu->b4_wd = (1 << (ps_results_merge->e_cu_size + 1)) - 1;
+ ps_pu->b4_ht = ps_pu->b4_wd;
+ ps_pu->mv.i1_l0_ref_idx = -1;
+ ps_pu->mv.i1_l1_ref_idx = -1;
+ ps_pu->mv.s_l0_mv.i2_mvx = INTRA_MV;
+ ps_pu->mv.s_l0_mv.i2_mvy = INTRA_MV;
+ ps_pu->mv.s_l1_mv.i2_mvx = INTRA_MV;
+ ps_pu->mv.s_l1_mv.i2_mvy = INTRA_MV;
+
+ return CU_MERGED;
+ }
+ else
+ {
+ return CU_SPLIT;
+ }
+ }
+
+ if(i4_intra_parts)
+ {
+ i4_part_mask = ENABLE_2Nx2N;
+ }
+
+ ps_results_merge->u1_num_active_ref = (ps_ctxt->s_frm_prms.bidir_enabled) ? 2 : 1;
+
+ hme_reset_search_results(ps_results_merge, i4_part_mask, MV_RES_QPEL);
+
+ ps_results_merge->u1_num_active_ref = ps_merge_prms->i4_num_ref;
+ ps_merge_prms->i4_num_pred_dir_actual = 0;
+
+ if(ps_subpel_prms->u1_is_cu_noisy && ps_merge_prms->ps_inter_ctb_prms->i4_alpha_stim_multiplier)
+ {
+ S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
+ S32 i4_num_valid_parts;
+ S32 i4_sigma_array_offset;
+
+ i4_num_valid_parts = hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
+
+ /*********************************************************************************************************************************************/
+ /* i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values */
+ /* Logic is x/4 + ((y/4) x 16) : every 4 pixel increase in x equals one 4x4 block increment, every 4 pixel increase in y equals 16 4x4 block */
+ /* increment as there will be 256 4x4 blocks in a CTB */
+ /*********************************************************************************************************************************************/
+ i4_sigma_array_offset = (ps_merge_prms->ps_results_merge->u1_x_off / 4) +
+ (ps_merge_prms->ps_results_merge->u1_y_off * 4);
+
+ for(i = 0; i < i4_num_valid_parts; i++)
+ {
+ S32 i4_part_id = ai4_valid_part_ids[i];
+
+ hme_compute_final_sigma_of_pu_from_base_blocks(
+ ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
+ ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
+ au8_final_src_sigmaX,
+ au8_final_src_sigmaXSquared,
+ (CU_32x32 == ps_results_merge->e_cu_size) ? 32 : 64,
+ 4,
+ i4_part_id,
+ 16);
+ }
+
+ ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaX = au8_final_src_sigmaX;
+ ps_merge_prms->ps_inter_ctb_prms->pu8_part_src_sigmaXSquared = au8_final_src_sigmaXSquared;
+ }
+
+ /*************************************************************************/
+ /* Loop through all ref idx and pick the merge candts and refine based */
+ /* on the active partitions. At this stage num ref will be 1 or 2 */
+ /*************************************************************************/
+ for(i4_search_idx = 0; i4_search_idx < ps_merge_prms->i4_num_ref; i4_search_idx++)
+ {
+ S32 i4_cands;
+ U08 u1_pred_dir = 0;
+
+ if((2 == ps_merge_prms->i4_num_ref) || (!ps_ctxt->s_frm_prms.bidir_enabled))
+ {
+ u1_pred_dir = i4_search_idx;
+ }
+ else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
+ {
+ u1_pred_dir = 1;
+ }
+ else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0)
+ {
+ u1_pred_dir = 0;
+ }
+ else
+ {
+ ASSERT(0);
+ }
+
+ /* call the function to pick and evaluate the merge candts, given */
+ /* a ref id and a part mask. */
+ i4_cands = hme_pick_eval_merge_candts(
+ ps_merge_prms,
+ ps_subpel_prms,
+ u1_pred_dir,
+ i4_best_part_type,
+ is_vert,
+ ps_wt_inp_prms,
+ i4_frm_qstep,
+ ps_cmn_utils_optimised_function_list,
+ ps_me_optimised_function_list);
+
+ if(i4_cands)
+ {
+ ps_merge_prms->au1_pred_dir_searched[ps_merge_prms->i4_num_pred_dir_actual] =
+ u1_pred_dir;
+ ps_merge_prms->i4_num_pred_dir_actual++;
+ }
+
+ i4_num_merge_cands_evaluated += i4_cands;
+ }
+
+ /* Call the decide_part_types function here */
+ /* Populate the new PU struct with the results post subpel refinement*/
+ if(i4_num_merge_cands_evaluated)
+ {
+ inter_cu_results_t *ps_cu_results = ps_results_merge->ps_cu_results;
+
+ hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
+
+ ps_merge_prms->ps_inter_ctb_prms->i4_ctb_x_off = ps_merge_prms->i4_ctb_x_off;
+ ps_merge_prms->ps_inter_ctb_prms->i4_ctb_y_off = ps_merge_prms->i4_ctb_y_off;
+
+ hme_populate_pus(
+ ps_thrd_ctxt,
+ ps_ctxt,
+ ps_subpel_prms,
+ ps_results_merge,
+ ps_cu_results,
+ ps_pu_results,
+ ps_pu_result,
+ ps_merge_prms->ps_inter_ctb_prms,
+ &ps_ctxt->s_wt_pred,
+ ps_merge_prms->ps_layer_ctxt,
+ ps_merge_prms->au1_pred_dir_searched,
+ ps_merge_prms->i4_num_pred_dir_actual);
+
+ ps_cu_results->i4_inp_offset = (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);
+
+ hme_decide_part_types(
+ ps_cu_results,
+ ps_pu_results,
+ ps_merge_prms->ps_inter_ctb_prms,
+ ps_ctxt,
+ ps_cmn_utils_optimised_function_list,
+ ps_me_optimised_function_list
+
+ );
+
+ /*****************************************************************/
+ /* INSERT INTRA RESULTS AT 32x32/64x64 LEVEL. */
+ /*****************************************************************/
+#if DISABLE_INTRA_IN_BPICS
+ if(1 != ((ME_XTREME_SPEED_25 == ps_merge_prms->e_quality_preset) &&
+ (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
+#endif
+ {
+ if(!(DISABLE_INTRA_WHEN_NOISY && ps_merge_prms->ps_inter_ctb_prms->u1_is_cu_noisy))
+ {
+ hme_insert_intra_nodes_post_bipred(
+ ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
+ }
+ }
+ }
+ else
+ {
+ return CU_SPLIT;
+ }
+
+ /* We check the best result of ref idx 0 and compare for parent vs child */
+ if((ps_merge_prms->e_quality_preset != ME_PRISTINE_QUALITY) ||
+ (CU_32x32 == ps_results_merge->e_cu_size))
+ {
+ i4_cost_parent = ps_results_merge->ps_cu_results->ps_best_results[0].i4_tot_cost;
+ /*********************************************************************/
+ /* Add the cost of signaling the CU tree bits. */
+ /* Assuming parent is not split, then we signal 1 bit for this parent*/
+ /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
+ /* So, 4*lambda is extra for children cost. :Lokesh */
+ /*********************************************************************/
+ {
+ pred_ctxt_t *ps_pred_ctxt = &ps_results_merge->as_pred_ctxt[0];
+
+ i4_cost_children += ((4 * ps_pred_ctxt->lambda) >> (ps_pred_ctxt->lambda_q_shift));
+ }
+
+ if(i4_cost_parent < i4_cost_children)
+ {
+ return CU_MERGED;
+ }
+
+ return CU_SPLIT;
+ }
+ else
+ {
+ return CU_MERGED;
+ }
+}
+
+#define COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, shift) \
+ { \
+ (ps_mv)->i2_mv_x = (ps_search_node)->s_mv.i2_mvx >> (shift); \
+ (ps_mv)->i2_mv_y = (ps_search_node)->s_mv.i2_mvy >> (shift); \
+ *(pi1_ref_idx) = (ps_search_node)->i1_ref_idx; \
+ }
+
+/**
+********************************************************************************
+* @fn hme_update_mv_bank_noencode(search_results_t *ps_search_results,
+* layer_mv_t *ps_layer_mv,
+* S32 i4_search_blk_x,
+* S32 i4_search_blk_y,
+* mvbank_update_prms_t *ps_prms)
+*
+* @brief Updates the mv bank in case there is no further encodign to be done
+*
+* @param[in] ps_search_results: contains results for the block just searched
+*
+* @param[in,out] ps_layer_mv : Has pointer to mv bank amongst other things
+*
+* @param[in] i4_search_blk_x : col num of blk being searched
+*
+* @param[in] i4_search_blk_y : row num of blk being searched
+*
+* @param[in] ps_prms : contains certain parameters which govern how updatedone
+*
+* @return None
+********************************************************************************
+*/
+
+void hme_update_mv_bank_noencode(
+ search_results_t *ps_search_results,
+ layer_mv_t *ps_layer_mv,
+ S32 i4_search_blk_x,
+ S32 i4_search_blk_y,
+ mvbank_update_prms_t *ps_prms)
+{
+ hme_mv_t *ps_mv;
+ hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
+ S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
+ S32 i4_blk_x, i4_blk_y, i4_offset;
+ S32 i4_j, i4_ref_id;
+ search_node_t *ps_search_node;
+ search_node_t *ps_search_node_8x8, *ps_search_node_4x4_1;
+ search_node_t *ps_search_node_4x4_2, *ps_search_node_4x4_3;
+ search_node_t *ps_search_node_4x4_4;
+
+ i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
+ i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
+ i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
+
+ i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
+
+ /* Identify the correct offset in the mvbank and the reference id buf */
+ ps_mv = ps_layer_mv->ps_mv + i4_offset;
+ pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
+
+ /*************************************************************************/
+ /* Supposing we store the mvs in the same blk size as we searched (e.g. */
+ /* we searched 8x8 blks and store results for 8x8 blks), then we can */
+ /* do a straightforward single update of results. This will have a 1-1 */
+ /* correspondence. */
+ /*************************************************************************/
+ if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
+ {
+ for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
+ {
+ ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
+ for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
+ {
+ COPY_SEARCH_RESULT(ps_mv, pi1_ref_idx, ps_search_node, 0);
+ ps_mv++;
+ pi1_ref_idx++;
+ ps_search_node++;
+ }
+ }
+ return;
+ }
+
+ /*************************************************************************/
+ /* Case where search blk size is 8x8, but we update 4x4 results. In this */
+ /* case, we need to have NxN partitions enabled in search. */
+ /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
+ /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
+ /*************************************************************************/
+ ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
+ ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
+ ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));
+
+ /*************************************************************************/
+ /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
+ /* hence the below check. */
+ /*************************************************************************/
+ ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);
+
+ ps_mv1 = ps_mv;
+ ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
+ ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
+ ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
+ pi1_ref_idx1 = pi1_ref_idx;
+ pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
+ pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
+ pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
+
+ for(i4_ref_id = 0; i4_ref_id < (S32)ps_search_results->u1_num_active_ref; i4_ref_id++)
+ {
+ ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
+
+ ps_search_node_4x4_1 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL];
+
+ ps_search_node_4x4_2 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TR];
+
+ ps_search_node_4x4_3 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BL];
+
+ ps_search_node_4x4_4 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_BR];
+
+ COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
+ ps_mv1++;
+ pi1_ref_idx1++;
+ ps_search_node_4x4_1++;
+ COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
+ ps_mv2++;
+ pi1_ref_idx2++;
+ ps_search_node_4x4_2++;
+ COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
+ ps_mv3++;
+ pi1_ref_idx3++;
+ ps_search_node_4x4_3++;
+ COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
+ ps_mv4++;
+ pi1_ref_idx4++;
+ ps_search_node_4x4_4++;
+
+ if(ps_layer_mv->i4_num_mvs_per_ref > 1)
+ {
+ COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_8x8, 0);
+ ps_mv1++;
+ pi1_ref_idx1++;
+ COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_8x8, 0);
+ ps_mv2++;
+ pi1_ref_idx2++;
+ COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_8x8, 0);
+ ps_mv3++;
+ pi1_ref_idx3++;
+ COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_8x8, 0);
+ ps_mv4++;
+ pi1_ref_idx4++;
+ }
+
+ for(i4_j = 2; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
+ {
+ COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_4x4_1, 0);
+ ps_mv1++;
+ pi1_ref_idx1++;
+ ps_search_node_4x4_1++;
+ COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_4x4_2, 0);
+ ps_mv2++;
+ pi1_ref_idx2++;
+ ps_search_node_4x4_2++;
+ COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_4x4_3, 0);
+ ps_mv3++;
+ pi1_ref_idx3++;
+ ps_search_node_4x4_3++;
+ COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_4x4_4, 0);
+ ps_mv4++;
+ pi1_ref_idx4++;
+ ps_search_node_4x4_4++;
+ }
+ }
+}
+
+void hme_update_mv_bank_encode(
+ search_results_t *ps_search_results,
+ layer_mv_t *ps_layer_mv,
+ S32 i4_search_blk_x,
+ S32 i4_search_blk_y,
+ mvbank_update_prms_t *ps_prms,
+ U08 *pu1_pred_dir_searched,
+ S32 i4_num_act_ref_l0)
+{
+ hme_mv_t *ps_mv;
+ hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
+ S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
+ S32 i4_blk_x, i4_blk_y, i4_offset;
+ S32 j, i, num_parts;
+ search_node_t *ps_search_node_tl, *ps_search_node_tr;
+ search_node_t *ps_search_node_bl, *ps_search_node_br;
+ search_node_t s_zero_mv;
+ WORD32 i4_part_type = ps_search_results->ps_cu_results->ps_best_results[0].u1_part_type;
+
+ i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
+ i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
+ i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
+
+ i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
+
+ /* Identify the correct offset in the mvbank and the reference id buf */
+ ps_mv = ps_layer_mv->ps_mv + i4_offset;
+ pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
+
+ ASSERT(ps_layer_mv->e_blk_size == BLK_8x8);
+ ASSERT(ps_prms->e_search_blk_size == BLK_16x16);
+
+ /*************************************************************************/
+ /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
+ /* hence the below check. */
+ /*************************************************************************/
+ ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_best_results);
+
+ ps_mv1 = ps_mv;
+ ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
+ ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
+ ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
+ pi1_ref_idx1 = pi1_ref_idx;
+ pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
+ pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
+ pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
+
+ /* Initialize zero mv: default mv used for intra mvs */
+ s_zero_mv.s_mv.i2_mvx = 0;
+ s_zero_mv.s_mv.i2_mvy = 0;
+ s_zero_mv.i1_ref_idx = 0;
+
+ if((ps_search_results->e_cu_size == CU_16x16) && (ps_search_results->u1_split_flag) &&
+ (ps_search_results->i4_part_mask & ENABLE_NxN))
+ {
+ i4_part_type = PRT_NxN;
+ }
+
+ for(i = 0; i < ps_prms->i4_num_ref; i++)
+ {
+ for(j = 0; j < ps_layer_mv->i4_num_mvs_per_ref; j++)
+ {
+ WORD32 i4_part_id = ge_part_type_to_part_id[i4_part_type][0];
+
+ num_parts = gau1_num_parts_in_part_type[i4_part_type];
+
+ ps_search_node_tl =
+ ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id];
+
+ if(num_parts == 1)
+ {
+ ps_search_node_tr = ps_search_node_tl;
+ ps_search_node_bl = ps_search_node_tl;
+ ps_search_node_br = ps_search_node_tl;
+ }
+ else if(num_parts == 2)
+ {
+ /* For vertically oriented partitions, tl, bl pt to same result */
+ /* For horizontally oriented partition, tl, tr pt to same result */
+ /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
+ /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
+ /* and right 2 8x8 have 12x16R partition */
+ if(gau1_is_vert_part[i4_part_type])
+ {
+ ps_search_node_tr =
+ ps_search_results
+ ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
+ ps_search_node_bl = ps_search_node_tl;
+ }
+ else
+ {
+ ps_search_node_tr = ps_search_node_tl;
+ ps_search_node_bl =
+ ps_search_results
+ ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
+ }
+ ps_search_node_br =
+ ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
+ }
+ else
+ {
+ /* 4 unique results */
+ ps_search_node_tr =
+ ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1];
+ ps_search_node_bl =
+ ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2];
+ ps_search_node_br =
+ ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3];
+ }
+
+ if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
+ ps_search_node_tl++;
+ if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
+ ps_search_node_tr++;
+ if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
+ ps_search_node_bl++;
+ if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
+ ps_search_node_br++;
+
+ COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
+ ps_mv1++;
+ pi1_ref_idx1++;
+ COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
+ ps_mv2++;
+ pi1_ref_idx2++;
+ COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
+ ps_mv3++;
+ pi1_ref_idx3++;
+ COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
+ ps_mv4++;
+ pi1_ref_idx4++;
+
+ if(ps_prms->i4_num_results_to_store > 1)
+ {
+ ps_search_node_tl =
+ &ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id][1];
+
+ if(num_parts == 1)
+ {
+ ps_search_node_tr = ps_search_node_tl;
+ ps_search_node_bl = ps_search_node_tl;
+ ps_search_node_br = ps_search_node_tl;
+ }
+ else if(num_parts == 2)
+ {
+ /* For vertically oriented partitions, tl, bl pt to same result */
+ /* For horizontally oriented partition, tl, tr pt to same result */
+ /* This means for AMP, 2 of the 8x8 blks in mv bank have ambiguous */
+ /* result, e.g. for 4x16L. Here left 2 8x8 have the 4x16L partition */
+ /* and right 2 8x8 have 12x16R partition */
+ if(gau1_is_vert_part[i4_part_type])
+ {
+ ps_search_node_tr =
+ &ps_search_results
+ ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
+ ps_search_node_bl = ps_search_node_tl;
+ }
+ else
+ {
+ ps_search_node_tr = ps_search_node_tl;
+ ps_search_node_bl =
+ &ps_search_results
+ ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
+ }
+ ps_search_node_br =
+ &ps_search_results
+ ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
+ }
+ else
+ {
+ /* 4 unique results */
+ ps_search_node_tr =
+ &ps_search_results
+ ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 1][1];
+ ps_search_node_bl =
+ &ps_search_results
+ ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 2][1];
+ ps_search_node_br =
+ &ps_search_results
+ ->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id + 3][1];
+ }
+
+ if(ps_search_node_tl->s_mv.i2_mvx == INTRA_MV)
+ ps_search_node_tl++;
+ if(ps_search_node_tr->s_mv.i2_mvx == INTRA_MV)
+ ps_search_node_tr++;
+ if(ps_search_node_bl->s_mv.i2_mvx == INTRA_MV)
+ ps_search_node_bl++;
+ if(ps_search_node_br->s_mv.i2_mvx == INTRA_MV)
+ ps_search_node_br++;
+
+ COPY_SEARCH_RESULT(ps_mv1, pi1_ref_idx1, ps_search_node_tl, 0);
+ ps_mv1++;
+ pi1_ref_idx1++;
+ COPY_SEARCH_RESULT(ps_mv2, pi1_ref_idx2, ps_search_node_tr, 0);
+ ps_mv2++;
+ pi1_ref_idx2++;
+ COPY_SEARCH_RESULT(ps_mv3, pi1_ref_idx3, ps_search_node_bl, 0);
+ ps_mv3++;
+ pi1_ref_idx3++;
+ COPY_SEARCH_RESULT(ps_mv4, pi1_ref_idx4, ps_search_node_br, 0);
+ ps_mv4++;
+ pi1_ref_idx4++;
+ }
+ }
+ }
+}
+
+/**
+********************************************************************************
+* @fn hme_update_mv_bank_noencode(search_results_t *ps_search_results,
+* layer_mv_t *ps_layer_mv,
+* S32 i4_search_blk_x,
+* S32 i4_search_blk_y,
+* mvbank_update_prms_t *ps_prms)
+*
+* @brief Updates the mv bank in case there is no further encodign to be done
+*
+* @param[in] ps_search_results: contains results for the block just searched
+*
+* @param[in,out] ps_layer_mv : Has pointer to mv bank amongst other things
+*
+* @param[in] i4_search_blk_x : col num of blk being searched
+*
+* @param[in] i4_search_blk_y : row num of blk being searched
+*
+* @param[in] ps_prms : contains certain parameters which govern how updatedone
+*
+* @return None
+********************************************************************************
+*/
+
+void hme_update_mv_bank_in_l1_me(
+ search_results_t *ps_search_results,
+ layer_mv_t *ps_layer_mv,
+ S32 i4_search_blk_x,
+ S32 i4_search_blk_y,
+ mvbank_update_prms_t *ps_prms)
+{
+ hme_mv_t *ps_mv;
+ hme_mv_t *ps_mv1, *ps_mv2, *ps_mv3, *ps_mv4;
+ S08 *pi1_ref_idx, *pi1_ref_idx1, *pi1_ref_idx2, *pi1_ref_idx3, *pi1_ref_idx4;
+ S32 i4_blk_x, i4_blk_y, i4_offset;
+ S32 i4_j, i4_ref_id;
+ search_node_t *ps_search_node;
+ search_node_t *ps_search_node_8x8, *ps_search_node_4x4;
+
+ i4_blk_x = i4_search_blk_x << ps_prms->i4_shift;
+ i4_blk_y = i4_search_blk_y << ps_prms->i4_shift;
+ i4_offset = i4_blk_x + i4_blk_y * ps_layer_mv->i4_num_blks_per_row;
+
+ i4_offset *= ps_layer_mv->i4_num_mvs_per_blk;
+
+ /* Identify the correct offset in the mvbank and the reference id buf */
+ ps_mv = ps_layer_mv->ps_mv + i4_offset;
+ pi1_ref_idx = ps_layer_mv->pi1_ref_idx + i4_offset;
+
+ /*************************************************************************/
+ /* Supposing we store the mvs in the same blk size as we searched (e.g. */
+ /* we searched 8x8 blks and store results for 8x8 blks), then we can */
+ /* do a straightforward single update of results. This will have a 1-1 */
+ /* correspondence. */
+ /*************************************************************************/
+ if(ps_layer_mv->e_blk_size == ps_prms->e_search_blk_size)
+ {
+ search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * 2];
+
+ hme_mv_t *ps_mv_l0_root = ps_mv;
+ hme_mv_t *ps_mv_l1_root =
+ ps_mv + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
+
+ U32 u4_num_l0_results_updated = 0;
+ U32 u4_num_l1_results_updated = 0;
+
+ S08 *pi1_ref_idx_l0_root = pi1_ref_idx;
+ S08 *pi1_ref_idx_l1_root =
+ pi1_ref_idx_l0_root + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
+
+ for(i4_ref_id = 0; i4_ref_id < (S32)ps_prms->i4_num_ref; i4_ref_id++)
+ {
+ U32 *pu4_num_results_updated;
+ search_node_t **pps_result_nodes;
+
+ U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];
+
+ if(u1_pred_dir_of_cur_ref)
+ {
+ pu4_num_results_updated = &u4_num_l1_results_updated;
+ pps_result_nodes = &aps_result_nodes_sorted[1][0];
+ }
+ else
+ {
+ pu4_num_results_updated = &u4_num_l0_results_updated;
+ pps_result_nodes = &aps_result_nodes_sorted[0][0];
+ }
+
+ ps_search_node = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
+
+ for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
+ {
+ hme_add_new_node_to_a_sorted_array(
+ &ps_search_node[i4_j], pps_result_nodes, NULL, *pu4_num_results_updated, 0);
+
+ ASSERT(ps_search_node[i4_j].i1_ref_idx == i4_ref_id);
+ (*pu4_num_results_updated)++;
+ }
+ }
+
+ for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
+ {
+ COPY_SEARCH_RESULT(
+ &ps_mv_l0_root[i4_j],
+ &pi1_ref_idx_l0_root[i4_j],
+ aps_result_nodes_sorted[0][i4_j],
+ 0);
+ }
+
+ for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
+ {
+ COPY_SEARCH_RESULT(
+ &ps_mv_l1_root[i4_j],
+ &pi1_ref_idx_l1_root[i4_j],
+ aps_result_nodes_sorted[1][i4_j],
+ 0);
+ }
+
+ return;
+ }
+
+ /*************************************************************************/
+ /* Case where search blk size is 8x8, but we update 4x4 results. In this */
+ /* case, we need to have NxN partitions enabled in search. */
+ /* Further, we update on a 1-1 basis the 4x4 blk mvs from the respective */
+ /* NxN partition. We also update the 8x8 result into each of the 4x4 bank*/
+ /*************************************************************************/
+ ASSERT(ps_layer_mv->e_blk_size == BLK_4x4);
+ ASSERT(ps_prms->e_search_blk_size == BLK_8x8);
+ ASSERT((ps_search_results->i4_part_mask & (ENABLE_NxN)) == (ENABLE_NxN));
+
+ /*************************************************************************/
+ /* For every 4x4 blk we store corresponding 4x4 results and 1 8x8 result */
+ /* hence the below check. */
+ /*************************************************************************/
+ ASSERT(ps_layer_mv->i4_num_mvs_per_ref <= ps_search_results->u1_num_results_per_part + 1);
+
+ ps_mv1 = ps_mv;
+ ps_mv2 = ps_mv1 + ps_layer_mv->i4_num_mvs_per_blk;
+ ps_mv3 = ps_mv1 + (ps_layer_mv->i4_num_mvs_per_row);
+ ps_mv4 = ps_mv3 + (ps_layer_mv->i4_num_mvs_per_blk);
+ pi1_ref_idx1 = pi1_ref_idx;
+ pi1_ref_idx2 = pi1_ref_idx1 + ps_layer_mv->i4_num_mvs_per_blk;
+ pi1_ref_idx3 = pi1_ref_idx1 + (ps_layer_mv->i4_num_mvs_per_row);
+ pi1_ref_idx4 = pi1_ref_idx3 + (ps_layer_mv->i4_num_mvs_per_blk);
+
+ {
+ search_node_t *aps_result_nodes_sorted[2][MAX_NUM_REF * 4];
+ U08 au1_cost_shifts_for_sorted_node[2][MAX_NUM_REF * 4];
+
+ S32 i;
+
+ hme_mv_t *ps_mv1_l0_root = ps_mv1;
+ hme_mv_t *ps_mv1_l1_root =
+ ps_mv1 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
+ hme_mv_t *ps_mv2_l0_root = ps_mv2;
+ hme_mv_t *ps_mv2_l1_root =
+ ps_mv2 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
+ hme_mv_t *ps_mv3_l0_root = ps_mv3;
+ hme_mv_t *ps_mv3_l1_root =
+ ps_mv3 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
+ hme_mv_t *ps_mv4_l0_root = ps_mv4;
+ hme_mv_t *ps_mv4_l1_root =
+ ps_mv4 + (ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
+
+ U32 u4_num_l0_results_updated = 0;
+ U32 u4_num_l1_results_updated = 0;
+
+ S08 *pi1_ref_idx1_l0_root = pi1_ref_idx1;
+ S08 *pi1_ref_idx1_l1_root = pi1_ref_idx1_l0_root + (ps_prms->i4_num_active_ref_l0 *
+ ps_layer_mv->i4_num_mvs_per_ref);
+ S08 *pi1_ref_idx2_l0_root = pi1_ref_idx2;
+ S08 *pi1_ref_idx2_l1_root = pi1_ref_idx2_l0_root + (ps_prms->i4_num_active_ref_l0 *
+ ps_layer_mv->i4_num_mvs_per_ref);
+ S08 *pi1_ref_idx3_l0_root = pi1_ref_idx3;
+ S08 *pi1_ref_idx3_l1_root = pi1_ref_idx3_l0_root + (ps_prms->i4_num_active_ref_l0 *
+ ps_layer_mv->i4_num_mvs_per_ref);
+ S08 *pi1_ref_idx4_l0_root = pi1_ref_idx4;
+ S08 *pi1_ref_idx4_l1_root = pi1_ref_idx4_l0_root + (ps_prms->i4_num_active_ref_l0 *
+ ps_layer_mv->i4_num_mvs_per_ref);
+
+ for(i = 0; i < 4; i++)
+ {
+ hme_mv_t *ps_mv_l0_root;
+ hme_mv_t *ps_mv_l1_root;
+
+ S08 *pi1_ref_idx_l0_root;
+ S08 *pi1_ref_idx_l1_root;
+
+ for(i4_ref_id = 0; i4_ref_id < ps_search_results->u1_num_active_ref; i4_ref_id++)
+ {
+ U32 *pu4_num_results_updated;
+ search_node_t **pps_result_nodes;
+ U08 *pu1_cost_shifts_for_sorted_node;
+
+ U08 u1_pred_dir_of_cur_ref = !ps_search_results->pu1_is_past[i4_ref_id];
+
+ if(u1_pred_dir_of_cur_ref)
+ {
+ pu4_num_results_updated = &u4_num_l1_results_updated;
+ pps_result_nodes = &aps_result_nodes_sorted[1][0];
+ pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
+ }
+ else
+ {
+ pu4_num_results_updated = &u4_num_l0_results_updated;
+ pps_result_nodes = &aps_result_nodes_sorted[0][0];
+ pu1_cost_shifts_for_sorted_node = &au1_cost_shifts_for_sorted_node[1][0];
+ }
+
+ ps_search_node_8x8 = ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
+
+ ps_search_node_4x4 =
+ ps_search_results->aps_part_results[i4_ref_id][PART_ID_NxN_TL + i];
+
+ for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
+ {
+ hme_add_new_node_to_a_sorted_array(
+ &ps_search_node_4x4[i4_j],
+ pps_result_nodes,
+ pu1_cost_shifts_for_sorted_node,
+ *pu4_num_results_updated,
+ 0);
+
+ (*pu4_num_results_updated)++;
+
+ hme_add_new_node_to_a_sorted_array(
+ &ps_search_node_8x8[i4_j],
+ pps_result_nodes,
+ pu1_cost_shifts_for_sorted_node,
+ *pu4_num_results_updated,
+ 2);
+
+ (*pu4_num_results_updated)++;
+ }
+ }
+
+ switch(i)
+ {
+ case 0:
+ {
+ ps_mv_l0_root = ps_mv1_l0_root;
+ ps_mv_l1_root = ps_mv1_l1_root;
+
+ pi1_ref_idx_l0_root = pi1_ref_idx1_l0_root;
+ pi1_ref_idx_l1_root = pi1_ref_idx1_l1_root;
+
+ break;
+ }
+ case 1:
+ {
+ ps_mv_l0_root = ps_mv2_l0_root;
+ ps_mv_l1_root = ps_mv2_l1_root;
+
+ pi1_ref_idx_l0_root = pi1_ref_idx2_l0_root;
+ pi1_ref_idx_l1_root = pi1_ref_idx2_l1_root;
+
+ break;
+ }
+ case 2:
+ {
+ ps_mv_l0_root = ps_mv3_l0_root;
+ ps_mv_l1_root = ps_mv3_l1_root;
+
+ pi1_ref_idx_l0_root = pi1_ref_idx3_l0_root;
+ pi1_ref_idx_l1_root = pi1_ref_idx3_l1_root;
+
+ break;
+ }
+ case 3:
+ {
+ ps_mv_l0_root = ps_mv4_l0_root;
+ ps_mv_l1_root = ps_mv4_l1_root;
+
+ pi1_ref_idx_l0_root = pi1_ref_idx4_l0_root;
+ pi1_ref_idx_l1_root = pi1_ref_idx4_l1_root;
+
+ break;
+ }
+ }
+
+ u4_num_l0_results_updated =
+ MIN((S32)u4_num_l0_results_updated,
+ ps_prms->i4_num_active_ref_l0 * ps_layer_mv->i4_num_mvs_per_ref);
+
+ u4_num_l1_results_updated =
+ MIN((S32)u4_num_l1_results_updated,
+ ps_prms->i4_num_active_ref_l1 * ps_layer_mv->i4_num_mvs_per_ref);
+
+ for(i4_j = 0; i4_j < (S32)u4_num_l0_results_updated; i4_j++)
+ {
+ COPY_SEARCH_RESULT(
+ &ps_mv_l0_root[i4_j],
+ &pi1_ref_idx_l0_root[i4_j],
+ aps_result_nodes_sorted[0][i4_j],
+ 0);
+ }
+
+ for(i4_j = 0; i4_j < (S32)u4_num_l1_results_updated; i4_j++)
+ {
+ COPY_SEARCH_RESULT(
+ &ps_mv_l1_root[i4_j],
+ &pi1_ref_idx_l1_root[i4_j],
+ aps_result_nodes_sorted[1][i4_j],
+ 0);
+ }
+ }
+ }
+}
+
+/**
+******************************************************************************
+* @brief Scales motion vector component projecte from a diff layer in same
+* picture (so no ref id related delta poc scaling required)
+******************************************************************************
+*/
+
+#define SCALE_MV_COMP_RES(mvcomp_p, dim_c, dim_p) \
+ ((((mvcomp_p) * (dim_c)) + ((SIGN((mvcomp_p)) * (dim_p)) >> 1)) / (dim_p))
+/**
+********************************************************************************
+* @fn hme_project_coloc_candt(search_node_t *ps_search_node,
+* layer_ctxt_t *ps_curr_layer,
+* layer_ctxt_t *ps_coarse_layer,
+* S32 i4_pos_x,
+* S32 i4_pos_y,
+* S08 i1_ref_id,
+* S08 i1_result_id)
+*
+* @brief From a coarser layer, projects a candidated situated at "colocated"
+* position in the picture (e.g. given x, y it will be x/2, y/2 dyadic
+*
+* @param[out] ps_search_node : contains the projected result
+*
+* @param[in] ps_curr_layer : current layer context
+*
+* @param[in] ps_coarse_layer : coarser layer context
+*
+* @param[in] i4_pos_x : x Position where mv is required (w.r.t. curr layer)
+*
+* @param[in] i4_pos_y : y Position where mv is required (w.r.t. curr layer)
+*
+* @param[in] i1_ref_id : reference id for which the candidate required
+*
+* @param[in] i4_result_id : result id for which the candidate required
+* (0 : best result, 1 : next best)
+*
+* @return None
+********************************************************************************
+*/
+
+void hme_project_coloc_candt(
+ search_node_t *ps_search_node,
+ layer_ctxt_t *ps_curr_layer,
+ layer_ctxt_t *ps_coarse_layer,
+ S32 i4_pos_x,
+ S32 i4_pos_y,
+ S08 i1_ref_id,
+ S32 i4_result_id)
+{
+ S32 wd_c, ht_c, wd_p, ht_p;
+ S32 blksize_p, blk_x, blk_y, i4_offset;
+ layer_mv_t *ps_layer_mvbank;
+ hme_mv_t *ps_mv;
+ S08 *pi1_ref_idx;
+
+ /* Width and ht of current and prev layers */
+ wd_c = ps_curr_layer->i4_wd;
+ ht_c = ps_curr_layer->i4_ht;
+ wd_p = ps_coarse_layer->i4_wd;
+ ht_p = ps_coarse_layer->i4_ht;
+
+ ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
+ blksize_p = (S32)gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
+
+ /* Safety check to avoid uninitialized access across temporal layers */
+ i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
+ i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
+
+ /* Project the positions to prev layer */
+ /* TODO: convert these to scale factors at pic level */
+ blk_x = (i4_pos_x * wd_p) / (wd_c * blksize_p);
+ blk_y = (i4_pos_y * ht_p) / (ht_c * blksize_p);
+
+ /* Pick up the mvs from the location */
+ i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
+ i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
+
+ ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
+ pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
+
+ ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
+ pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
+
+ ps_search_node->s_mv.i2_mvx = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_x, wd_c, wd_p);
+ ps_search_node->s_mv.i2_mvy = SCALE_MV_COMP_RES(ps_mv[i4_result_id].i2_mv_y, ht_c, ht_p);
+ ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
+ ps_search_node->u1_subpel_done = 0;
+ if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
+ {
+ ps_search_node->i1_ref_idx = i1_ref_id;
+ ps_search_node->s_mv.i2_mvx = 0;
+ ps_search_node->s_mv.i2_mvy = 0;
+ }
+}
+
+/**
+********************************************************************************
+* @fn hme_project_coloc_candt_dyadic(search_node_t *ps_search_node,
+* layer_ctxt_t *ps_curr_layer,
+* layer_ctxt_t *ps_coarse_layer,
+* S32 i4_pos_x,
+* S32 i4_pos_y,
+* S08 i1_ref_id,
+* S08 i1_result_id)
+*
+* @brief From a coarser layer, projects a candidated situated at "colocated"
+* position in the picture when the ratios are dyadic
+*
+* @param[out] ps_search_node : contains the projected result
+*
+* @param[in] ps_curr_layer : current layer context
+*
+* @param[in] ps_coarse_layer : coarser layer context
+*
+* @param[in] i4_pos_x : x Position where mv is required (w.r.t. curr layer)
+*
+* @param[in] i4_pos_y : y Position where mv is required (w.r.t. curr layer)
+*
+* @param[in] i1_ref_id : reference id for which the candidate required
+*
+* @param[in] i4_result_id : result id for which the candidate required
+* (0 : best result, 1 : next best)
+*
+* @return None
+********************************************************************************
+*/
+
+void hme_project_coloc_candt_dyadic(
+ search_node_t *ps_search_node,
+ layer_ctxt_t *ps_curr_layer,
+ layer_ctxt_t *ps_coarse_layer,
+ S32 i4_pos_x,
+ S32 i4_pos_y,
+ S08 i1_ref_id,
+ S32 i4_result_id)
+{
+ S32 wd_c, ht_c, wd_p, ht_p;
+ S32 blksize_p, blk_x, blk_y, i4_offset;
+ layer_mv_t *ps_layer_mvbank;
+ hme_mv_t *ps_mv;
+ S08 *pi1_ref_idx;
+
+ /* Width and ht of current and prev layers */
+ wd_c = ps_curr_layer->i4_wd;
+ ht_c = ps_curr_layer->i4_ht;
+ wd_p = ps_coarse_layer->i4_wd;
+ ht_p = ps_coarse_layer->i4_ht;
+
+ ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
+ /* blksize_p = log2(wd) + 1 */
+ blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
+
+ /* ASSERT for valid sizes */
+ ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
+
+ /* Safety check to avoid uninitialized access across temporal layers */
+ i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
+ i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
+
+ /* Project the positions to prev layer */
+ /* TODO: convert these to scale factors at pic level */
+ blk_x = i4_pos_x >> blksize_p; // (2 * blksize_p);
+ blk_y = i4_pos_y >> blksize_p; // (2 * blksize_p);
+
+ /* Pick up the mvs from the location */
+ i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
+ i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
+
+ ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
+ pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
+
+ ps_mv += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
+ pi1_ref_idx += (i1_ref_id * ps_layer_mvbank->i4_num_mvs_per_ref);
+
+ ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
+ ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
+ ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
+ if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
+ {
+ ps_search_node->i1_ref_idx = i1_ref_id;
+ ps_search_node->s_mv.i2_mvx = 0;
+ ps_search_node->s_mv.i2_mvy = 0;
+ }
+}
+
+void hme_project_coloc_candt_dyadic_implicit(
+ search_node_t *ps_search_node,
+ layer_ctxt_t *ps_curr_layer,
+ layer_ctxt_t *ps_coarse_layer,
+ S32 i4_pos_x,
+ S32 i4_pos_y,
+ S32 i4_num_act_ref_l0,
+ U08 u1_pred_dir,
+ U08 u1_default_ref_id,
+ S32 i4_result_id)
+{
+ S32 wd_c, ht_c, wd_p, ht_p;
+ S32 blksize_p, blk_x, blk_y, i4_offset;
+ layer_mv_t *ps_layer_mvbank;
+ hme_mv_t *ps_mv;
+ S08 *pi1_ref_idx;
+
+ /* Width and ht of current and prev layers */
+ wd_c = ps_curr_layer->i4_wd;
+ ht_c = ps_curr_layer->i4_ht;
+ wd_p = ps_coarse_layer->i4_wd;
+ ht_p = ps_coarse_layer->i4_ht;
+
+ ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
+ blksize_p = (S32)gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
+
+ /* ASSERT for valid sizes */
+ ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
+
+ /* Safety check to avoid uninitialized access across temporal layers */
+ i4_pos_x = CLIP3(i4_pos_x, 0, (wd_c - blksize_p));
+ i4_pos_y = CLIP3(i4_pos_y, 0, (ht_c - blksize_p));
+ /* Project the positions to prev layer */
+ /* TODO: convert these to scale factors at pic level */
+ blk_x = i4_pos_x >> blksize_p; // (2 * blksize_p);
+ blk_y = i4_pos_y >> blksize_p; // (2 * blksize_p);
+
+ /* Pick up the mvs from the location */
+ i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
+ i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
+
+ ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
+ pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
+
+ if(u1_pred_dir == 1)
+ {
+ ps_mv += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
+ pi1_ref_idx += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
+ }
+
+ ps_search_node->s_mv.i2_mvx = ps_mv[i4_result_id].i2_mv_x << 1;
+ ps_search_node->s_mv.i2_mvy = ps_mv[i4_result_id].i2_mv_y << 1;
+ ps_search_node->i1_ref_idx = pi1_ref_idx[i4_result_id];
+ if((ps_search_node->i1_ref_idx < 0) || (ps_search_node->s_mv.i2_mvx == INTRA_MV))
+ {
+ ps_search_node->i1_ref_idx = u1_default_ref_id;
+ ps_search_node->s_mv.i2_mvx = 0;
+ ps_search_node->s_mv.i2_mvy = 0;
+ }
+}
+
+#define SCALE_RANGE_PRMS(prm1, prm2, shift) \
+ { \
+ prm1.i2_min_x = prm2.i2_min_x << shift; \
+ prm1.i2_max_x = prm2.i2_max_x << shift; \
+ prm1.i2_min_y = prm2.i2_min_y << shift; \
+ prm1.i2_max_y = prm2.i2_max_y << shift; \
+ }
+
+#define SCALE_RANGE_PRMS_POINTERS(prm1, prm2, shift) \
+ { \
+ prm1->i2_min_x = prm2->i2_min_x << shift; \
+ prm1->i2_max_x = prm2->i2_max_x << shift; \
+ prm1->i2_min_y = prm2->i2_min_y << shift; \
+ prm1->i2_max_y = prm2->i2_max_y << shift; \
+ }
+
+/**
+********************************************************************************
+* @fn void hme_refine_frm_init(me_ctxt_t *ps_ctxt,
+* refine_layer_prms_t *ps_refine_prms)
+*
+* @brief Frame init of refinemnet layers in ME
+*
+* @param[in,out] ps_ctxt: ME Handle
+*
+* @param[in] ps_refine_prms : refinement layer prms
+*
+* @return None
+********************************************************************************
+*/
+void hme_refine_frm_init(
+ layer_ctxt_t *ps_curr_layer, refine_prms_t *ps_refine_prms, layer_ctxt_t *ps_coarse_layer)
+{
+ /* local variables */
+ BLK_SIZE_T e_result_blk_size = BLK_8x8;
+ S32 i4_num_ref_fpel, i4_num_ref_prev_layer;
+
+ i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
+
+ if(ps_refine_prms->explicit_ref)
+ {
+ i4_num_ref_fpel = i4_num_ref_prev_layer;
+ }
+ else
+ {
+ i4_num_ref_fpel = 2;
+ }
+
+ if(ps_refine_prms->i4_enable_4x4_part)
+ {
+ e_result_blk_size = BLK_4x4;
+ }
+
+ i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);
+
+ hme_init_mv_bank(
+ ps_curr_layer,
+ e_result_blk_size,
+ i4_num_ref_fpel,
+ ps_refine_prms->i4_num_mvbank_results,
+ ps_refine_prms->i4_layer_id > 0 ? 0 : 1);
+}
+
+#if 1 //ENABLE_CU_RECURSION || TEST_AND_EVALUATE_CU_RECURSION
+/**
+********************************************************************************
+* @fn void hme_init_clusters_16x16
+* (
+* cluster_16x16_blk_t *ps_cluster_blk_16x16
+* )
+*
+* @brief Intialisations for the structs used in clustering algorithm
+*
+* @param[in/out] ps_cluster_blk_16x16: pointer to structure containing clusters
+* of 16x16 block
+*
+* @return None
+********************************************************************************
+*/
+static __inline void
+ hme_init_clusters_16x16(cluster_16x16_blk_t *ps_cluster_blk_16x16, S32 bidir_enabled)
+{
+ S32 i;
+
+ ps_cluster_blk_16x16->num_clusters = 0;
+ ps_cluster_blk_16x16->intra_mv_area = 0;
+ ps_cluster_blk_16x16->best_inter_cost = 0;
+
+ for(i = 0; i < MAX_NUM_CLUSTERS_16x16; i++)
+ {
+ ps_cluster_blk_16x16->as_cluster_data[i].max_dist_from_centroid =
+ bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_16x16_B : MAX_DISTANCE_FROM_CENTROID_16x16;
+
+ ps_cluster_blk_16x16->as_cluster_data[i].is_valid_cluster = 0;
+
+ ps_cluster_blk_16x16->as_cluster_data[i].bi_mv_pixel_area = 0;
+ ps_cluster_blk_16x16->as_cluster_data[i].uni_mv_pixel_area = 0;
+ }
+ for(i = 0; i < MAX_NUM_REF; i++)
+ {
+ ps_cluster_blk_16x16->au1_num_clusters[i] = 0;
+ }
+}
+
+/**
+********************************************************************************
+* @fn void hme_init_clusters_32x32
+* (
+* cluster_32x32_blk_t *ps_cluster_blk_32x32
+* )
+*
+* @brief Intialisations for the structs used in clustering algorithm
+*
+* @param[in/out] ps_cluster_blk_32x32: pointer to structure containing clusters
+* of 32x32 block
+*
+* @return None
+********************************************************************************
+*/
+static __inline void
+ hme_init_clusters_32x32(cluster_32x32_blk_t *ps_cluster_blk_32x32, S32 bidir_enabled)
+{
+ S32 i;
+
+ ps_cluster_blk_32x32->num_clusters = 0;
+ ps_cluster_blk_32x32->intra_mv_area = 0;
+ ps_cluster_blk_32x32->best_alt_ref = -1;
+ ps_cluster_blk_32x32->best_uni_ref = -1;
+ ps_cluster_blk_32x32->best_inter_cost = 0;
+ ps_cluster_blk_32x32->num_clusters_with_weak_sdi_density = 0;
+
+ for(i = 0; i < MAX_NUM_CLUSTERS_32x32; i++)
+ {
+ ps_cluster_blk_32x32->as_cluster_data[i].max_dist_from_centroid =
+ bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_32x32_B : MAX_DISTANCE_FROM_CENTROID_32x32;
+ ps_cluster_blk_32x32->as_cluster_data[i].is_valid_cluster = 0;
+
+ ps_cluster_blk_32x32->as_cluster_data[i].bi_mv_pixel_area = 0;
+ ps_cluster_blk_32x32->as_cluster_data[i].uni_mv_pixel_area = 0;
+ }
+ for(i = 0; i < MAX_NUM_REF; i++)
+ {
+ ps_cluster_blk_32x32->au1_num_clusters[i] = 0;
+ }
+}
+
+/**
+********************************************************************************
+* @fn void hme_init_clusters_64x64
+* (
+* cluster_64x64_blk_t *ps_cluster_blk_64x64
+* )
+*
+* @brief Intialisations for the structs used in clustering algorithm
+*
+* @param[in/out] ps_cluster_blk_64x64: pointer to structure containing clusters
+* of 64x64 block
+*
+* @return None
+********************************************************************************
+*/
+static __inline void
+ hme_init_clusters_64x64(cluster_64x64_blk_t *ps_cluster_blk_64x64, S32 bidir_enabled)
+{
+ S32 i;
+
+ ps_cluster_blk_64x64->num_clusters = 0;
+ ps_cluster_blk_64x64->intra_mv_area = 0;
+ ps_cluster_blk_64x64->best_alt_ref = -1;
+ ps_cluster_blk_64x64->best_uni_ref = -1;
+ ps_cluster_blk_64x64->best_inter_cost = 0;
+
+ for(i = 0; i < MAX_NUM_CLUSTERS_64x64; i++)
+ {
+ ps_cluster_blk_64x64->as_cluster_data[i].max_dist_from_centroid =
+ bidir_enabled ? MAX_DISTANCE_FROM_CENTROID_64x64_B : MAX_DISTANCE_FROM_CENTROID_64x64;
+ ps_cluster_blk_64x64->as_cluster_data[i].is_valid_cluster = 0;
+
+ ps_cluster_blk_64x64->as_cluster_data[i].bi_mv_pixel_area = 0;
+ ps_cluster_blk_64x64->as_cluster_data[i].uni_mv_pixel_area = 0;
+ }
+ for(i = 0; i < MAX_NUM_REF; i++)
+ {
+ ps_cluster_blk_64x64->au1_num_clusters[i] = 0;
+ }
+}
+
+/**
+********************************************************************************
+* @fn void hme_sort_and_assign_top_ref_ids_areawise
+* (
+* ctb_cluster_info_t *ps_ctb_cluster_info
+* )
+*
+* @brief Finds best_uni_ref and best_alt_ref
+*
+* @param[in/out] ps_ctb_cluster_info: structure that points to ctb data
+*
+* @param[in] bidir_enabled: flag that indicates whether or not bi-pred is
+* enabled
+*
+* @param[in] block_width: width of the block in pels
+*
+* @param[in] e_cu_pos: position of the block within the CTB
+*
+* @return None
+********************************************************************************
+*/
+void hme_sort_and_assign_top_ref_ids_areawise(
+ ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width, CU_POS_T e_cu_pos)
+{
+ cluster_32x32_blk_t *ps_32x32 = NULL;
+ cluster_64x64_blk_t *ps_64x64 = NULL;
+ cluster_data_t *ps_data;
+
+ S32 j, k;
+
+ S32 ai4_uni_area[MAX_NUM_REF];
+ S32 ai4_bi_area[MAX_NUM_REF];
+ S32 ai4_ref_id_found[MAX_NUM_REF];
+ S32 ai4_ref_id[MAX_NUM_REF];
+
+ S32 best_uni_ref = -1, best_alt_ref = -1;
+ S32 num_clusters;
+ S32 num_ref = 0;
+ S32 num_clusters_evaluated = 0;
+ S32 is_cur_blk_valid;
+
+ if(32 == block_width)
+ {
+ is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << e_cu_pos)) || 0;
+ ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cu_pos];
+ num_clusters = ps_32x32->num_clusters;
+ ps_data = &ps_32x32->as_cluster_data[0];
+ }
+ else
+ {
+ is_cur_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask == 0xf);
+ ps_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
+ num_clusters = ps_64x64->num_clusters;
+ ps_data = &ps_64x64->as_cluster_data[0];
+ }
+
+#if !ENABLE_4CTB_EVALUATION
+ if((num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
+ {
+ return;
+ }
+#endif
+ if(num_clusters == 0)
+ {
+ return;
+ }
+ else if(!is_cur_blk_valid)
+ {
+ return;
+ }
+
+ memset(ai4_uni_area, 0, sizeof(S32) * MAX_NUM_REF);
+ memset(ai4_bi_area, 0, sizeof(S32) * MAX_NUM_REF);
+ memset(ai4_ref_id_found, 0, sizeof(S32) * MAX_NUM_REF);
+ memset(ai4_ref_id, -1, sizeof(S32) * MAX_NUM_REF);
+
+ for(j = 0; num_clusters_evaluated < num_clusters; j++, ps_data++)
+ {
+ S32 ref_id;
+
+ if(!ps_data->is_valid_cluster)
+ {
+ continue;
+ }
+
+ ref_id = ps_data->ref_id;
+
+ num_clusters_evaluated++;
+
+ ai4_uni_area[ref_id] += ps_data->uni_mv_pixel_area;
+ ai4_bi_area[ref_id] += ps_data->bi_mv_pixel_area;
+
+ if(!ai4_ref_id_found[ref_id])
+ {
+ ai4_ref_id[ref_id] = ref_id;
+ ai4_ref_id_found[ref_id] = 1;
+ num_ref++;
+ }
+ }
+
+ {
+ S32 ai4_ref_id_temp[MAX_NUM_REF];
+
+ memcpy(ai4_ref_id_temp, ai4_ref_id, sizeof(S32) * MAX_NUM_REF);
+
+ for(k = 1; k < MAX_NUM_REF; k++)
+ {
+ if(ai4_uni_area[k] > ai4_uni_area[0])
+ {
+ SWAP_HME(ai4_uni_area[k], ai4_uni_area[0], S32);
+ SWAP_HME(ai4_ref_id_temp[k], ai4_ref_id_temp[0], S32);
+ }
+ }
+
+ best_uni_ref = ai4_ref_id_temp[0];
+ }
+
+ if(bidir_enabled)
+ {
+ for(k = 1; k < MAX_NUM_REF; k++)
+ {
+ if(ai4_bi_area[k] > ai4_bi_area[0])
+ {
+ SWAP_HME(ai4_bi_area[k], ai4_bi_area[0], S32);
+ SWAP_HME(ai4_ref_id[k], ai4_ref_id[0], S32);
+ }
+ }
+
+ if(!ai4_bi_area[0])
+ {
+ best_alt_ref = -1;
+
+ if(32 == block_width)
+ {
+ SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
+ }
+ else
+ {
+ SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
+ }
+
+ return;
+ }
+
+ if(best_uni_ref == ai4_ref_id[0])
+ {
+ for(k = 2; k < MAX_NUM_REF; k++)
+ {
+ if(ai4_bi_area[k] > ai4_bi_area[1])
+ {
+ SWAP_HME(ai4_bi_area[k], ai4_bi_area[1], S32);
+ SWAP_HME(ai4_ref_id[k], ai4_ref_id[1], S32);
+ }
+ }
+
+ best_alt_ref = ai4_ref_id[1];
+ }
+ else
+ {
+ best_alt_ref = ai4_ref_id[0];
+ }
+ }
+
+ if(32 == block_width)
+ {
+ SET_VALUES_FOR_TOP_REF_IDS(ps_32x32, best_uni_ref, best_alt_ref, num_ref);
+ }
+ else
+ {
+ SET_VALUES_FOR_TOP_REF_IDS(ps_64x64, best_uni_ref, best_alt_ref, num_ref);
+ }
+}
+
+/**
+********************************************************************************
+* @fn void hme_find_top_ref_ids
+* (
+* ctb_cluster_info_t *ps_ctb_cluster_info
+* )
+*
+* @brief Finds best_uni_ref and best_alt_ref
+*
+* @param[in/out] ps_ctb_cluster_info: structure that points to ctb data
+*
+* @return None
+********************************************************************************
+*/
+void hme_find_top_ref_ids(
+ ctb_cluster_info_t *ps_ctb_cluster_info, S32 bidir_enabled, S32 block_width)
+{
+ S32 i;
+
+ if(32 == block_width)
+ {
+ for(i = 0; i < 4; i++)
+ {
+ hme_sort_and_assign_top_ref_ids_areawise(
+ ps_ctb_cluster_info, bidir_enabled, block_width, (CU_POS_T)i);
+ }
+ }
+ else if(64 == block_width)
+ {
+ hme_sort_and_assign_top_ref_ids_areawise(
+ ps_ctb_cluster_info, bidir_enabled, block_width, POS_NA);
+ }
+}
+
+/**
+********************************************************************************
+* @fn void hme_boot_out_outlier
+* (
+* ctb_cluster_info_t *ps_ctb_cluster_info
+* )
+*
+* @brief Removes outlier clusters before CU tree population
+*
+* @param[in/out] ps_ctb_cluster_info: structure that points to ctb data
+*
+* @return None
+********************************************************************************
+*/
+void hme_boot_out_outlier(ctb_cluster_info_t *ps_ctb_cluster_info, S32 blk_width)
+{
+ cluster_32x32_blk_t *ps_32x32;
+
+ S32 i;
+
+ cluster_64x64_blk_t *ps_64x64 = &ps_ctb_cluster_info->ps_64x64_blk[0];
+
+ S32 sdi_threshold = ps_ctb_cluster_info->sdi_threshold;
+
+ if(32 == blk_width)
+ {
+ /* 32x32 clusters */
+ for(i = 0; i < 4; i++)
+ {
+ ps_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
+
+ if(ps_32x32->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
+ {
+ BUMP_OUTLIER_CLUSTERS(ps_32x32, sdi_threshold);
+ }
+ }
+ }
+ else if(64 == blk_width)
+ {
+ /* 64x64 clusters */
+ if(ps_64x64->num_clusters > MAX_NUM_CLUSTERS_IN_ONE_REF_IDX)
+ {
+ BUMP_OUTLIER_CLUSTERS(ps_64x64, sdi_threshold);
+ }
+ }
+}
+
+/**
+********************************************************************************
+* @fn void hme_update_cluster_attributes
+* (
+* cluster_data_t *ps_cluster_data,
+* S32 mvx,
+* S32 mvy,
+* PART_ID_T e_part_id
+* )
+*
+* @brief Implementation fo the clustering algorithm
+*
+* @param[in/out] ps_cluster_data: pointer to cluster_data_t struct
+*
+* @param[in] mvx : x co-ordinate of the motion vector
+*
+* @param[in] mvy : y co-ordinate of the motion vector
+*
+* @param[in] ref_idx : ref_id of the motion vector
+*
+* @param[in] e_part_id : partition id of the motion vector
+*
+* @return None
+********************************************************************************
+*/
+static __inline void hme_update_cluster_attributes(
+ cluster_data_t *ps_cluster_data,
+ S32 mvx,
+ S32 mvy,
+ S32 mvdx,
+ S32 mvdy,
+ S32 ref_id,
+ S32 sdi,
+ U08 is_part_of_bi,
+ PART_ID_T e_part_id)
+{
+ LWORD64 i8_mvx_sum_q8;
+ LWORD64 i8_mvy_sum_q8;
+
+ S32 centroid_posx_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
+ S32 centroid_posy_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
+
+ if((mvdx > 0) && (ps_cluster_data->min_x > mvx))
+ {
+ ps_cluster_data->min_x = mvx;
+ }
+ else if((mvdx < 0) && (ps_cluster_data->max_x < mvx))
+ {
+ ps_cluster_data->max_x = mvx;
+ }
+
+ if((mvdy > 0) && (ps_cluster_data->min_y > mvy))
+ {
+ ps_cluster_data->min_y = mvy;
+ }
+ else if((mvdy < 0) && (ps_cluster_data->max_y < mvy))
+ {
+ ps_cluster_data->max_y = mvy;
+ }
+
+ {
+ S32 num_mvs = ps_cluster_data->num_mvs;
+
+ ps_cluster_data->as_mv[num_mvs].pixel_count = gai4_partition_area[e_part_id];
+ ps_cluster_data->as_mv[num_mvs].mvx = mvx;
+ ps_cluster_data->as_mv[num_mvs].mvy = mvy;
+
+ /***************************/
+ ps_cluster_data->as_mv[num_mvs].is_uni = !is_part_of_bi;
+ ps_cluster_data->as_mv[num_mvs].sdi = sdi;
+ /**************************/
+ }
+
+ /* Updation of centroid */
+ {
+ i8_mvx_sum_q8 = (LWORD64)centroid_posx_q8 * ps_cluster_data->num_mvs + (mvx << 8);
+ i8_mvy_sum_q8 = (LWORD64)centroid_posy_q8 * ps_cluster_data->num_mvs + (mvy << 8);
+
+ ps_cluster_data->num_mvs++;
+
+ ps_cluster_data->s_centroid.i4_pos_x_q8 =
+ (WORD32)((i8_mvx_sum_q8) / ps_cluster_data->num_mvs);
+ ps_cluster_data->s_centroid.i4_pos_y_q8 =
+ (WORD32)((i8_mvy_sum_q8) / ps_cluster_data->num_mvs);
+ }
+
+ ps_cluster_data->area_in_pixels += gai4_partition_area[e_part_id];
+
+ if(is_part_of_bi)
+ {
+ ps_cluster_data->bi_mv_pixel_area += gai4_partition_area[e_part_id];
+ }
+ else
+ {
+ ps_cluster_data->uni_mv_pixel_area += gai4_partition_area[e_part_id];
+ }
+}
+
+/**
+********************************************************************************
+* @fn void hme_try_cluster_merge
+* (
+* cluster_data_t *ps_cluster_data,
+* S32 *pi4_num_clusters,
+* S32 idx_of_updated_cluster
+* )
+*
+* @brief Implementation fo the clustering algorithm
+*
+* @param[in/out] ps_cluster_data: pointer to cluster_data_t struct
+*
+* @param[in/out] pi4_num_clusters : pointer to number of clusters
+*
+* @param[in] idx_of_updated_cluster : index of the cluster most recently
+* updated
+*
+* @return Nothing
+********************************************************************************
+*/
+void hme_try_cluster_merge(
+ cluster_data_t *ps_cluster_data, U08 *pu1_num_clusters, S32 idx_of_updated_cluster)
+{
+ centroid_t *ps_centroid;
+
+ S32 cur_pos_x_q8;
+ S32 cur_pos_y_q8;
+ S32 i;
+ S32 max_dist_from_centroid;
+ S32 mvd;
+ S32 mvdx_q8;
+ S32 mvdx;
+ S32 mvdy_q8;
+ S32 mvdy;
+ S32 num_clusters, num_clusters_evaluated;
+ S32 other_pos_x_q8;
+ S32 other_pos_y_q8;
+
+ cluster_data_t *ps_root = ps_cluster_data;
+ cluster_data_t *ps_cur_cluster = &ps_cluster_data[idx_of_updated_cluster];
+ centroid_t *ps_cur_centroid = &ps_cur_cluster->s_centroid;
+
+ /* Merge is superfluous if num_clusters is 1 */
+ if(*pu1_num_clusters == 1)
+ {
+ return;
+ }
+
+ cur_pos_x_q8 = ps_cur_centroid->i4_pos_x_q8;
+ cur_pos_y_q8 = ps_cur_centroid->i4_pos_y_q8;
+
+ max_dist_from_centroid = ps_cur_cluster->max_dist_from_centroid;
+
+ num_clusters = *pu1_num_clusters;
+ num_clusters_evaluated = 0;
+
+ for(i = 0; num_clusters_evaluated < num_clusters; i++, ps_cluster_data++)
+ {
+ if(!ps_cluster_data->is_valid_cluster)
+ {
+ continue;
+ }
+ if((ps_cluster_data->ref_id != ps_cur_cluster->ref_id) || (i == idx_of_updated_cluster))
+ {
+ num_clusters_evaluated++;
+ continue;
+ }
+
+ ps_centroid = &ps_cluster_data->s_centroid;
+
+ other_pos_x_q8 = ps_centroid->i4_pos_x_q8;
+ other_pos_y_q8 = ps_centroid->i4_pos_y_q8;
+
+ mvdx_q8 = (cur_pos_x_q8 - other_pos_x_q8);
+ mvdy_q8 = (cur_pos_y_q8 - other_pos_y_q8);
+ mvdx = (mvdx_q8 + (1 << 7)) >> 8;
+ mvdy = (mvdy_q8 + (1 << 7)) >> 8;
+
+ mvd = ABS(mvdx) + ABS(mvdy);
+
+ if(mvd <= (max_dist_from_centroid >> 1))
+ {
+ /* 0 => no updates */
+ /* 1 => min updated */
+ /* 2 => max updated */
+ S32 minmax_x_update_id;
+ S32 minmax_y_update_id;
+
+ LWORD64 i8_mv_x_sum_self = (LWORD64)cur_pos_x_q8 * ps_cur_cluster->num_mvs;
+ LWORD64 i8_mv_y_sum_self = (LWORD64)cur_pos_y_q8 * ps_cur_cluster->num_mvs;
+ LWORD64 i8_mv_x_sum_cousin = (LWORD64)other_pos_x_q8 * ps_cluster_data->num_mvs;
+ LWORD64 i8_mv_y_sum_cousin = (LWORD64)other_pos_y_q8 * ps_cluster_data->num_mvs;
+
+ (*pu1_num_clusters)--;
+
+ ps_cluster_data->is_valid_cluster = 0;
+
+ memcpy(
+ &ps_cur_cluster->as_mv[ps_cur_cluster->num_mvs],
+ ps_cluster_data->as_mv,
+ sizeof(mv_data_t) * ps_cluster_data->num_mvs);
+
+ ps_cur_cluster->num_mvs += ps_cluster_data->num_mvs;
+ ps_cur_cluster->area_in_pixels += ps_cluster_data->area_in_pixels;
+ ps_cur_cluster->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
+ ps_cur_cluster->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
+ i8_mv_x_sum_self += i8_mv_x_sum_cousin;
+ i8_mv_y_sum_self += i8_mv_y_sum_cousin;
+
+ ps_cur_centroid->i4_pos_x_q8 = (WORD32)(i8_mv_x_sum_self / ps_cur_cluster->num_mvs);
+ ps_cur_centroid->i4_pos_y_q8 = (WORD32)(i8_mv_y_sum_self / ps_cur_cluster->num_mvs);
+
+ minmax_x_update_id = (ps_cur_cluster->min_x < ps_cluster_data->min_x)
+ ? ((ps_cur_cluster->max_x > ps_cluster_data->max_x) ? 0 : 2)
+ : 1;
+ minmax_y_update_id = (ps_cur_cluster->min_y < ps_cluster_data->min_y)
+ ? ((ps_cur_cluster->max_y > ps_cluster_data->max_y) ? 0 : 2)
+ : 1;
+
+ /* Updation of centroid spread */
+ switch(minmax_x_update_id + (minmax_y_update_id << 2))
+ {
+ case 1:
+ {
+ S32 mvd, mvd_q8;
+
+ ps_cur_cluster->min_x = ps_cluster_data->min_x;
+
+ mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
+ mvd = (mvd_q8 + (1 << 7)) >> 8;
+
+ if(mvd > (max_dist_from_centroid))
+ {
+ ps_cluster_data->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 2:
+ {
+ S32 mvd, mvd_q8;
+
+ ps_cur_cluster->max_x = ps_cluster_data->max_x;
+
+ mvd_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
+ mvd = (mvd_q8 + (1 << 7)) >> 8;
+
+ if(mvd > (max_dist_from_centroid))
+ {
+ ps_cluster_data->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 4:
+ {
+ S32 mvd, mvd_q8;
+
+ ps_cur_cluster->min_y = ps_cluster_data->min_y;
+
+ mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
+ mvd = (mvd_q8 + (1 << 7)) >> 8;
+
+ if(mvd > (max_dist_from_centroid))
+ {
+ ps_cluster_data->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 5:
+ {
+ S32 mvd;
+ S32 mvdx, mvdx_q8;
+ S32 mvdy, mvdy_q8;
+
+ mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
+ mvdy = (mvdy_q8 + (1 << 7)) >> 8;
+
+ mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
+ mvdx = (mvdx_q8 + (1 << 7)) >> 8;
+
+ mvd = (mvdx > mvdy) ? mvdx : mvdy;
+
+ ps_cur_cluster->min_x = ps_cluster_data->min_x;
+ ps_cur_cluster->min_y = ps_cluster_data->min_y;
+
+ if(mvd > max_dist_from_centroid)
+ {
+ ps_cluster_data->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 6:
+ {
+ S32 mvd;
+ S32 mvdx, mvdx_q8;
+ S32 mvdy, mvdy_q8;
+
+ mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster->min_y << 8);
+ mvdy = (mvdy_q8 + (1 << 7)) >> 8;
+
+ mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
+ mvdx = (mvdx_q8 + (1 << 7)) >> 8;
+
+ mvd = (mvdx > mvdy) ? mvdx : mvdy;
+
+ ps_cur_cluster->max_x = ps_cluster_data->max_x;
+ ps_cur_cluster->min_y = ps_cluster_data->min_y;
+
+ if(mvd > max_dist_from_centroid)
+ {
+ ps_cluster_data->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 8:
+ {
+ S32 mvd, mvd_q8;
+
+ ps_cur_cluster->max_y = ps_cluster_data->max_y;
+
+ mvd_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
+ mvd = (mvd_q8 + (1 << 7)) >> 8;
+
+ if(mvd > (max_dist_from_centroid))
+ {
+ ps_cluster_data->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 9:
+ {
+ S32 mvd;
+ S32 mvdx, mvdx_q8;
+ S32 mvdy, mvdy_q8;
+
+ mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster->min_x << 8);
+ mvdx = (mvdx_q8 + (1 << 7)) >> 8;
+
+ mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
+ mvdy = (mvdy_q8 + (1 << 7)) >> 8;
+
+ mvd = (mvdx > mvdy) ? mvdx : mvdy;
+
+ ps_cur_cluster->min_x = ps_cluster_data->min_x;
+ ps_cur_cluster->max_y = ps_cluster_data->max_y;
+
+ if(mvd > max_dist_from_centroid)
+ {
+ ps_cluster_data->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 10:
+ {
+ S32 mvd;
+ S32 mvdx, mvdx_q8;
+ S32 mvdy, mvdy_q8;
+
+ mvdx_q8 = (ps_cur_cluster->max_x << 8) - ps_centroid->i4_pos_x_q8;
+ mvdx = (mvdx_q8 + (1 << 7)) >> 8;
+
+ mvdy_q8 = (ps_cur_cluster->max_y << 8) - ps_centroid->i4_pos_y_q8;
+ mvdy = (mvdy_q8 + (1 << 7)) >> 8;
+
+ mvd = (mvdx > mvdy) ? mvdx : mvdy;
+
+ ps_cur_cluster->max_x = ps_cluster_data->max_x;
+ ps_cur_cluster->max_y = ps_cluster_data->max_y;
+
+ if(mvd > ps_cluster_data->max_dist_from_centroid)
+ {
+ ps_cluster_data->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ default:
+ {
+ break;
+ }
+ }
+
+ hme_try_cluster_merge(ps_root, pu1_num_clusters, idx_of_updated_cluster);
+
+ return;
+ }
+
+ num_clusters_evaluated++;
+ }
+}
+
+/**
+********************************************************************************
+* @fn void hme_find_and_update_clusters
+* (
+* cluster_data_t *ps_cluster_data,
+* S32 *pi4_num_clusters,
+* S32 mvx,
+* S32 mvy,
+* S32 ref_idx,
+* PART_ID_T e_part_id
+* )
+*
+* @brief Implementation fo the clustering algorithm
+*
+* @param[in/out] ps_cluster_data: pointer to cluster_data_t struct
+*
+* @param[in/out] pi4_num_clusters : pointer to number of clusters
+*
+* @param[in] mvx : x co-ordinate of the motion vector
+*
+* @param[in] mvy : y co-ordinate of the motion vector
+*
+* @param[in] ref_idx : ref_id of the motion vector
+*
+* @param[in] e_part_id : partition id of the motion vector
+*
+* @return None
+********************************************************************************
+*/
+void hme_find_and_update_clusters(
+ cluster_data_t *ps_cluster_data,
+ U08 *pu1_num_clusters,
+ S16 i2_mv_x,
+ S16 i2_mv_y,
+ U08 i1_ref_idx,
+ S32 i4_sdi,
+ PART_ID_T e_part_id,
+ U08 is_part_of_bi)
+{
+ S32 i;
+ S32 min_mvd_cluster_id = -1;
+ S32 mvd, mvd_limit, mvdx, mvdy;
+ S32 min_mvdx, min_mvdy;
+
+ S32 min_mvd = MAX_32BIT_VAL;
+ S32 num_clusters = *pu1_num_clusters;
+
+ S32 mvx = i2_mv_x;
+ S32 mvy = i2_mv_y;
+ S32 ref_idx = i1_ref_idx;
+ S32 sdi = i4_sdi;
+ S32 new_cluster_idx = MAX_NUM_CLUSTERS_16x16;
+
+ if(num_clusters == 0)
+ {
+ cluster_data_t *ps_data = &ps_cluster_data[num_clusters];
+
+ ps_data->num_mvs = 1;
+ ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
+ ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
+ ps_data->ref_id = ref_idx;
+ ps_data->area_in_pixels = gai4_partition_area[e_part_id];
+ ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
+ ps_data->as_mv[0].mvx = mvx;
+ ps_data->as_mv[0].mvy = mvy;
+
+ /***************************/
+ ps_data->as_mv[0].is_uni = !is_part_of_bi;
+ ps_data->as_mv[0].sdi = sdi;
+ if(is_part_of_bi)
+ {
+ ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
+ }
+ else
+ {
+ ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
+ }
+ /**************************/
+ ps_data->max_x = mvx;
+ ps_data->min_x = mvx;
+ ps_data->max_y = mvy;
+ ps_data->min_y = mvy;
+
+ ps_data->is_valid_cluster = 1;
+
+ *pu1_num_clusters = 1;
+ }
+ else
+ {
+ S32 num_clusters_evaluated = 0;
+
+ for(i = 0; num_clusters_evaluated < num_clusters; i++)
+ {
+ cluster_data_t *ps_data = &ps_cluster_data[i];
+
+ centroid_t *ps_centroid;
+
+ S32 mvx_q8;
+ S32 mvy_q8;
+ S32 posx_q8;
+ S32 posy_q8;
+ S32 mvdx_q8;
+ S32 mvdy_q8;
+
+ /* In anticipation of a possible merging of clusters */
+ if(ps_data->is_valid_cluster == 0)
+ {
+ new_cluster_idx = i;
+ continue;
+ }
+
+ if(ref_idx != ps_data->ref_id)
+ {
+ num_clusters_evaluated++;
+ continue;
+ }
+
+ ps_centroid = &ps_data->s_centroid;
+ posx_q8 = ps_centroid->i4_pos_x_q8;
+ posy_q8 = ps_centroid->i4_pos_y_q8;
+
+ mvx_q8 = mvx << 8;
+ mvy_q8 = mvy << 8;
+
+ mvdx_q8 = posx_q8 - mvx_q8;
+ mvdy_q8 = posy_q8 - mvy_q8;
+
+ mvdx = (((mvdx_q8 + (1 << 7)) >> 8));
+ mvdy = (((mvdy_q8 + (1 << 7)) >> 8));
+
+ mvd = ABS(mvdx) + ABS(mvdy);
+
+ if(mvd < min_mvd)
+ {
+ min_mvd = mvd;
+ min_mvdx = mvdx;
+ min_mvdy = mvdy;
+ min_mvd_cluster_id = i;
+ }
+
+ num_clusters_evaluated++;
+ }
+
+ mvd_limit = (min_mvd_cluster_id == -1)
+ ? ps_cluster_data[0].max_dist_from_centroid
+ : ps_cluster_data[min_mvd_cluster_id].max_dist_from_centroid;
+
+ /* This condition implies that min_mvd has been updated */
+ if(min_mvd <= mvd_limit)
+ {
+ hme_update_cluster_attributes(
+ &ps_cluster_data[min_mvd_cluster_id],
+ mvx,
+ mvy,
+ min_mvdx,
+ min_mvdy,
+ ref_idx,
+ sdi,
+ is_part_of_bi,
+ e_part_id);
+
+ if(PRT_NxN == ge_part_id_to_part_type[e_part_id])
+ {
+ hme_try_cluster_merge(ps_cluster_data, pu1_num_clusters, min_mvd_cluster_id);
+ }
+ }
+ else
+ {
+ cluster_data_t *ps_data = (new_cluster_idx == MAX_NUM_CLUSTERS_16x16)
+ ? &ps_cluster_data[num_clusters]
+ : &ps_cluster_data[new_cluster_idx];
+
+ ps_data->num_mvs = 1;
+ ps_data->s_centroid.i4_pos_x_q8 = mvx << 8;
+ ps_data->s_centroid.i4_pos_y_q8 = mvy << 8;
+ ps_data->ref_id = ref_idx;
+ ps_data->area_in_pixels = gai4_partition_area[e_part_id];
+ ps_data->as_mv[0].pixel_count = gai4_partition_area[e_part_id];
+ ps_data->as_mv[0].mvx = mvx;
+ ps_data->as_mv[0].mvy = mvy;
+
+ /***************************/
+ ps_data->as_mv[0].is_uni = !is_part_of_bi;
+ ps_data->as_mv[0].sdi = sdi;
+ if(is_part_of_bi)
+ {
+ ps_data->bi_mv_pixel_area += ps_data->area_in_pixels;
+ }
+ else
+ {
+ ps_data->uni_mv_pixel_area += ps_data->area_in_pixels;
+ }
+ /**************************/
+ ps_data->max_x = mvx;
+ ps_data->min_x = mvx;
+ ps_data->max_y = mvy;
+ ps_data->min_y = mvy;
+
+ ps_data->is_valid_cluster = 1;
+
+ num_clusters++;
+ *pu1_num_clusters = num_clusters;
+ }
+ }
+}
+
+/**
+********************************************************************************
+* @fn void hme_update_32x32_cluster_attributes
+* (
+* cluster_32x32_blk_t *ps_blk_32x32,
+* cluster_data_t *ps_cluster_data
+* )
+*
+* @brief Updates attributes for 32x32 clusters based on the attributes of
+* the constituent 16x16 clusters
+*
+* @param[out] ps_blk_32x32: structure containing 32x32 block results
+*
+* @param[in] ps_cluster_data : structure containing 16x16 block results
+*
+* @return None
+********************************************************************************
+*/
+void hme_update_32x32_cluster_attributes(
+ cluster_32x32_blk_t *ps_blk_32x32, cluster_data_t *ps_cluster_data)
+{
+ cluster_data_t *ps_cur_cluster_32;
+
+ S32 i;
+ S32 mvd_limit;
+
+ S32 num_clusters = ps_blk_32x32->num_clusters;
+
+ if(0 == num_clusters)
+ {
+ ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];
+
+ ps_blk_32x32->num_clusters++;
+ ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;
+
+ ps_cur_cluster_32->is_valid_cluster = 1;
+
+ ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
+ ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
+ ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
+
+ memcpy(
+ ps_cur_cluster_32->as_mv,
+ ps_cluster_data->as_mv,
+ sizeof(mv_data_t) * ps_cluster_data->num_mvs);
+
+ ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;
+
+ ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;
+
+ ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
+ ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
+ ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
+ ps_cur_cluster_32->min_y = ps_cluster_data->min_y;
+
+ ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
+ }
+ else
+ {
+ centroid_t *ps_centroid;
+
+ S32 cur_posx_q8, cur_posy_q8;
+ S32 min_mvd_cluster_id = -1;
+ S32 mvd;
+ S32 mvdx;
+ S32 mvdy;
+ S32 mvdx_min;
+ S32 mvdy_min;
+ S32 mvdx_q8;
+ S32 mvdy_q8;
+
+ S32 num_clusters_evaluated = 0;
+
+ S32 mvd_min = MAX_32BIT_VAL;
+
+ S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
+ S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
+
+ for(i = 0; num_clusters_evaluated < num_clusters; i++)
+ {
+ ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[i];
+
+ if(ps_cur_cluster_32->ref_id != ps_cluster_data->ref_id)
+ {
+ num_clusters_evaluated++;
+ continue;
+ }
+ if(!ps_cluster_data->is_valid_cluster)
+ {
+ continue;
+ }
+
+ num_clusters_evaluated++;
+
+ ps_centroid = &ps_cur_cluster_32->s_centroid;
+
+ cur_posx_q8 = ps_centroid->i4_pos_x_q8;
+ cur_posy_q8 = ps_centroid->i4_pos_y_q8;
+
+ mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
+ mvdy_q8 = cur_posy_q8 - mvy_inp_q8;
+
+ mvdx = (mvdx_q8 + (1 << 7)) >> 8;
+ mvdy = (mvdy_q8 + (1 << 7)) >> 8;
+
+ mvd = ABS(mvdx) + ABS(mvdy);
+
+ if(mvd < mvd_min)
+ {
+ mvd_min = mvd;
+ mvdx_min = mvdx;
+ mvdy_min = mvdy;
+ min_mvd_cluster_id = i;
+ }
+ }
+
+ ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[0];
+
+ mvd_limit = (min_mvd_cluster_id == -1)
+ ? ps_cur_cluster_32[0].max_dist_from_centroid
+ : ps_cur_cluster_32[min_mvd_cluster_id].max_dist_from_centroid;
+
+ if(mvd_min <= mvd_limit)
+ {
+ LWORD64 i8_updated_posx;
+ LWORD64 i8_updated_posy;
+ WORD32 minmax_updated_x = 0;
+ WORD32 minmax_updated_y = 0;
+
+ ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[min_mvd_cluster_id];
+
+ ps_centroid = &ps_cur_cluster_32->s_centroid;
+
+ ps_cur_cluster_32->is_valid_cluster = 1;
+
+ ps_cur_cluster_32->area_in_pixels += ps_cluster_data->area_in_pixels;
+ ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
+ ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
+
+ memcpy(
+ &ps_cur_cluster_32->as_mv[ps_cur_cluster_32->num_mvs],
+ ps_cluster_data->as_mv,
+ sizeof(mv_data_t) * ps_cluster_data->num_mvs);
+
+ if((mvdx_min > 0) && ((ps_cur_cluster_32->min_x << 8) > mvx_inp_q8))
+ {
+ ps_cur_cluster_32->min_x = (mvx_inp_q8 + ((1 << 7))) >> 8;
+ minmax_updated_x = 1;
+ }
+ else if((mvdx_min < 0) && ((ps_cur_cluster_32->max_x << 8) < mvx_inp_q8))
+ {
+ ps_cur_cluster_32->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
+ minmax_updated_x = 2;
+ }
+
+ if((mvdy_min > 0) && ((ps_cur_cluster_32->min_y << 8) > mvy_inp_q8))
+ {
+ ps_cur_cluster_32->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
+ minmax_updated_y = 1;
+ }
+ else if((mvdy_min < 0) && ((ps_cur_cluster_32->max_y << 8) < mvy_inp_q8))
+ {
+ ps_cur_cluster_32->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
+ minmax_updated_y = 2;
+ }
+
+ switch((minmax_updated_y << 2) + minmax_updated_x)
+ {
+ case 1:
+ {
+ S32 mvd, mvd_q8;
+
+ mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
+ mvd = (mvd_q8 + (1 << 7)) >> 8;
+
+ if(mvd > (mvd_limit))
+ {
+ ps_cur_cluster_32->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 2:
+ {
+ S32 mvd, mvd_q8;
+
+ mvd_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
+ mvd = (mvd_q8 + (1 << 7)) >> 8;
+
+ if(mvd > (mvd_limit))
+ {
+ ps_cur_cluster_32->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 4:
+ {
+ S32 mvd, mvd_q8;
+
+ mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
+ mvd = (mvd_q8 + (1 << 7)) >> 8;
+
+ if(mvd > (mvd_limit))
+ {
+ ps_cur_cluster_32->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 5:
+ {
+ S32 mvd;
+ S32 mvdx, mvdx_q8;
+ S32 mvdy, mvdy_q8;
+
+ mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
+ mvdy = (mvdy_q8 + (1 << 7)) >> 8;
+
+ mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
+ mvdx = (mvdx_q8 + (1 << 7)) >> 8;
+
+ mvd = (mvdx > mvdy) ? mvdx : mvdy;
+
+ if(mvd > mvd_limit)
+ {
+ ps_cur_cluster_32->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 6:
+ {
+ S32 mvd;
+ S32 mvdx, mvdx_q8;
+ S32 mvdy, mvdy_q8;
+
+ mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_32->min_y << 8);
+ mvdy = (mvdy_q8 + (1 << 7)) >> 8;
+
+ mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
+ mvdx = (mvdx_q8 + (1 << 7)) >> 8;
+
+ mvd = (mvdx > mvdy) ? mvdx : mvdy;
+
+ if(mvd > mvd_limit)
+ {
+ ps_cur_cluster_32->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 8:
+ {
+ S32 mvd, mvd_q8;
+
+ mvd_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
+ mvd = (mvd_q8 + (1 << 7)) >> 8;
+
+ if(mvd > (mvd_limit))
+ {
+ ps_cur_cluster_32->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 9:
+ {
+ S32 mvd;
+ S32 mvdx, mvdx_q8;
+ S32 mvdy, mvdy_q8;
+
+ mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_32->min_x << 8);
+ mvdx = (mvdx_q8 + (1 << 7)) >> 8;
+
+ mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
+ mvdy = (mvdy_q8 + (1 << 7)) >> 8;
+
+ mvd = (mvdx > mvdy) ? mvdx : mvdy;
+
+ if(mvd > mvd_limit)
+ {
+ ps_cur_cluster_32->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 10:
+ {
+ S32 mvd;
+ S32 mvdx, mvdx_q8;
+ S32 mvdy, mvdy_q8;
+
+ mvdx_q8 = (ps_cur_cluster_32->max_x << 8) - ps_centroid->i4_pos_x_q8;
+ mvdx = (mvdx_q8 + (1 << 7)) >> 8;
+
+ mvdy_q8 = (ps_cur_cluster_32->max_y << 8) - ps_centroid->i4_pos_y_q8;
+ mvdy = (mvdy_q8 + (1 << 7)) >> 8;
+
+ mvd = (mvdx > mvdy) ? mvdx : mvdy;
+
+ if(mvd > ps_cur_cluster_32->max_dist_from_centroid)
+ {
+ ps_cur_cluster_32->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ default:
+ {
+ break;
+ }
+ }
+
+ i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_32->num_mvs) +
+ ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
+ i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_32->num_mvs) +
+ ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);
+
+ ps_cur_cluster_32->num_mvs += ps_cluster_data->num_mvs;
+
+ ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_32->num_mvs);
+ ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_32->num_mvs);
+ }
+ else if(num_clusters < MAX_NUM_CLUSTERS_32x32)
+ {
+ ps_cur_cluster_32 = &ps_blk_32x32->as_cluster_data[num_clusters];
+
+ ps_blk_32x32->num_clusters++;
+ ps_blk_32x32->au1_num_clusters[ps_cluster_data->ref_id]++;
+
+ ps_cur_cluster_32->is_valid_cluster = 1;
+
+ ps_cur_cluster_32->area_in_pixels = ps_cluster_data->area_in_pixels;
+ ps_cur_cluster_32->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
+ ps_cur_cluster_32->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
+
+ memcpy(
+ ps_cur_cluster_32->as_mv,
+ ps_cluster_data->as_mv,
+ sizeof(mv_data_t) * ps_cluster_data->num_mvs);
+
+ ps_cur_cluster_32->num_mvs = ps_cluster_data->num_mvs;
+
+ ps_cur_cluster_32->ref_id = ps_cluster_data->ref_id;
+
+ ps_cur_cluster_32->max_x = ps_cluster_data->max_x;
+ ps_cur_cluster_32->max_y = ps_cluster_data->max_y;
+ ps_cur_cluster_32->min_x = ps_cluster_data->min_x;
+ ps_cur_cluster_32->min_y = ps_cluster_data->min_y;
+
+ ps_cur_cluster_32->s_centroid = ps_cluster_data->s_centroid;
+ }
+ }
+}
+
+/**
+********************************************************************************
+* @fn void hme_update_64x64_cluster_attributes
+* (
+* cluster_64x64_blk_t *ps_blk_32x32,
+* cluster_data_t *ps_cluster_data
+* )
+*
+* @brief Updates attributes for 64x64 clusters based on the attributes of
+* the constituent 16x16 clusters
+*
+* @param[out] ps_blk_64x64: structure containing 64x64 block results
+*
+* @param[in] ps_cluster_data : structure containing 32x32 block results
+*
+* @return None
+********************************************************************************
+*/
+void hme_update_64x64_cluster_attributes(
+ cluster_64x64_blk_t *ps_blk_64x64, cluster_data_t *ps_cluster_data)
+{
+ cluster_data_t *ps_cur_cluster_64;
+
+ S32 i;
+ S32 mvd_limit;
+
+ S32 num_clusters = ps_blk_64x64->num_clusters;
+
+ if(0 == num_clusters)
+ {
+ ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[0];
+
+ ps_blk_64x64->num_clusters++;
+ ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;
+
+ ps_cur_cluster_64->is_valid_cluster = 1;
+
+ ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
+ ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
+ ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
+
+ memcpy(
+ ps_cur_cluster_64->as_mv,
+ ps_cluster_data->as_mv,
+ sizeof(mv_data_t) * ps_cluster_data->num_mvs);
+
+ ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;
+
+ ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;
+
+ ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
+ ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
+ ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
+ ps_cur_cluster_64->min_y = ps_cluster_data->min_y;
+
+ ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
+ }
+ else
+ {
+ centroid_t *ps_centroid;
+
+ S32 cur_posx_q8, cur_posy_q8;
+ S32 min_mvd_cluster_id = -1;
+ S32 mvd;
+ S32 mvdx;
+ S32 mvdy;
+ S32 mvdx_min;
+ S32 mvdy_min;
+ S32 mvdx_q8;
+ S32 mvdy_q8;
+
+ S32 num_clusters_evaluated = 0;
+
+ S32 mvd_min = MAX_32BIT_VAL;
+
+ S32 mvx_inp_q8 = ps_cluster_data->s_centroid.i4_pos_x_q8;
+ S32 mvy_inp_q8 = ps_cluster_data->s_centroid.i4_pos_y_q8;
+
+ for(i = 0; num_clusters_evaluated < num_clusters; i++)
+ {
+ ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[i];
+
+ if(ps_cur_cluster_64->ref_id != ps_cluster_data->ref_id)
+ {
+ num_clusters_evaluated++;
+ continue;
+ }
+
+ if(!ps_cur_cluster_64->is_valid_cluster)
+ {
+ continue;
+ }
+
+ num_clusters_evaluated++;
+
+ ps_centroid = &ps_cur_cluster_64->s_centroid;
+
+ cur_posx_q8 = ps_centroid->i4_pos_x_q8;
+ cur_posy_q8 = ps_centroid->i4_pos_y_q8;
+
+ mvdx_q8 = cur_posx_q8 - mvx_inp_q8;
+ mvdy_q8 = cur_posy_q8 - mvy_inp_q8;
+
+ mvdx = (mvdx_q8 + (1 << 7)) >> 8;
+ mvdy = (mvdy_q8 + (1 << 7)) >> 8;
+
+ mvd = ABS(mvdx) + ABS(mvdy);
+
+ if(mvd < mvd_min)
+ {
+ mvd_min = mvd;
+ mvdx_min = mvdx;
+ mvdy_min = mvdy;
+ min_mvd_cluster_id = i;
+ }
+ }
+
+ ps_cur_cluster_64 = ps_blk_64x64->as_cluster_data;
+
+ mvd_limit = (min_mvd_cluster_id == -1)
+ ? ps_cur_cluster_64[0].max_dist_from_centroid
+ : ps_cur_cluster_64[min_mvd_cluster_id].max_dist_from_centroid;
+
+ if(mvd_min <= mvd_limit)
+ {
+ LWORD64 i8_updated_posx;
+ LWORD64 i8_updated_posy;
+ WORD32 minmax_updated_x = 0;
+ WORD32 minmax_updated_y = 0;
+
+ ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[min_mvd_cluster_id];
+
+ ps_centroid = &ps_cur_cluster_64->s_centroid;
+
+ ps_cur_cluster_64->is_valid_cluster = 1;
+
+ ps_cur_cluster_64->area_in_pixels += ps_cluster_data->area_in_pixels;
+ ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
+ ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
+
+ memcpy(
+ &ps_cur_cluster_64->as_mv[ps_cur_cluster_64->num_mvs],
+ ps_cluster_data->as_mv,
+ sizeof(mv_data_t) * ps_cluster_data->num_mvs);
+
+ if((mvdx_min > 0) && ((ps_cur_cluster_64->min_x << 8) > mvx_inp_q8))
+ {
+ ps_cur_cluster_64->min_x = (mvx_inp_q8 + (1 << 7)) >> 8;
+ minmax_updated_x = 1;
+ }
+ else if((mvdx_min < 0) && ((ps_cur_cluster_64->max_x << 8) < mvx_inp_q8))
+ {
+ ps_cur_cluster_64->max_x = (mvx_inp_q8 + (1 << 7)) >> 8;
+ minmax_updated_x = 2;
+ }
+
+ if((mvdy_min > 0) && ((ps_cur_cluster_64->min_y << 8) > mvy_inp_q8))
+ {
+ ps_cur_cluster_64->min_y = (mvy_inp_q8 + (1 << 7)) >> 8;
+ minmax_updated_y = 1;
+ }
+ else if((mvdy_min < 0) && ((ps_cur_cluster_64->max_y << 8) < mvy_inp_q8))
+ {
+ ps_cur_cluster_64->max_y = (mvy_inp_q8 + (1 << 7)) >> 8;
+ minmax_updated_y = 2;
+ }
+
+ switch((minmax_updated_y << 2) + minmax_updated_x)
+ {
+ case 1:
+ {
+ S32 mvd, mvd_q8;
+
+ mvd_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
+ mvd = (mvd_q8 + (1 << 7)) >> 8;
+
+ if(mvd > (mvd_limit))
+ {
+ ps_cur_cluster_64->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 2:
+ {
+ S32 mvd, mvd_q8;
+
+ mvd_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
+ mvd = (mvd_q8 + (1 << 7)) >> 8;
+
+ if(mvd > (mvd_limit))
+ {
+ ps_cur_cluster_64->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 4:
+ {
+ S32 mvd, mvd_q8;
+
+ mvd_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
+ mvd = (mvd_q8 + (1 << 7)) >> 8;
+
+ if(mvd > (mvd_limit))
+ {
+ ps_cur_cluster_64->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 5:
+ {
+ S32 mvd;
+ S32 mvdx, mvdx_q8;
+ S32 mvdy, mvdy_q8;
+
+ mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
+ mvdy = (mvdy_q8 + (1 << 7)) >> 8;
+
+ mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
+ mvdx = (mvdx_q8 + (1 << 7)) >> 8;
+
+ mvd = (mvdx > mvdy) ? mvdx : mvdy;
+
+ if(mvd > mvd_limit)
+ {
+ ps_cur_cluster_64->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 6:
+ {
+ S32 mvd;
+ S32 mvdx, mvdx_q8;
+ S32 mvdy, mvdy_q8;
+
+ mvdy_q8 = ps_centroid->i4_pos_y_q8 - (ps_cur_cluster_64->min_y << 8);
+ mvdy = (mvdy_q8 + (1 << 7)) >> 8;
+
+ mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
+ mvdx = (mvdx_q8 + (1 << 7)) >> 8;
+
+ mvd = (mvdx > mvdy) ? mvdx : mvdy;
+
+ if(mvd > mvd_limit)
+ {
+ ps_cur_cluster_64->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 8:
+ {
+ S32 mvd, mvd_q8;
+
+ mvd_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
+ mvd = (mvd_q8 + (1 << 7)) >> 8;
+
+ if(mvd > (mvd_limit))
+ {
+ ps_cur_cluster_64->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 9:
+ {
+ S32 mvd;
+ S32 mvdx, mvdx_q8;
+ S32 mvdy, mvdy_q8;
+
+ mvdx_q8 = ps_centroid->i4_pos_x_q8 - (ps_cur_cluster_64->min_x << 8);
+ mvdx = (mvdx_q8 + (1 << 7)) >> 8;
+
+ mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
+ mvdy = (mvdy_q8 + (1 << 7)) >> 8;
+
+ mvd = (mvdx > mvdy) ? mvdx : mvdy;
+
+ if(mvd > mvd_limit)
+ {
+ ps_cur_cluster_64->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 10:
+ {
+ S32 mvd;
+ S32 mvdx, mvdx_q8;
+ S32 mvdy, mvdy_q8;
+
+ mvdx_q8 = (ps_cur_cluster_64->max_x << 8) - ps_centroid->i4_pos_x_q8;
+ mvdx = (mvdx_q8 + (1 << 7)) >> 8;
+
+ mvdy_q8 = (ps_cur_cluster_64->max_y << 8) - ps_centroid->i4_pos_y_q8;
+ mvdy = (mvdy_q8 + (1 << 7)) >> 8;
+
+ mvd = (mvdx > mvdy) ? mvdx : mvdy;
+
+ if(mvd > ps_cur_cluster_64->max_dist_from_centroid)
+ {
+ ps_cur_cluster_64->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ default:
+ {
+ break;
+ }
+ }
+
+ i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cur_cluster_64->num_mvs) +
+ ((LWORD64)mvx_inp_q8 * ps_cluster_data->num_mvs);
+ i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cur_cluster_64->num_mvs) +
+ ((LWORD64)mvy_inp_q8 * ps_cluster_data->num_mvs);
+
+ ps_cur_cluster_64->num_mvs += ps_cluster_data->num_mvs;
+
+ ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cur_cluster_64->num_mvs);
+ ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cur_cluster_64->num_mvs);
+ }
+ else if(num_clusters < MAX_NUM_CLUSTERS_64x64)
+ {
+ ps_cur_cluster_64 = &ps_blk_64x64->as_cluster_data[num_clusters];
+
+ ps_blk_64x64->num_clusters++;
+ ps_blk_64x64->au1_num_clusters[ps_cluster_data->ref_id]++;
+
+ ps_cur_cluster_64->is_valid_cluster = 1;
+
+ ps_cur_cluster_64->area_in_pixels = ps_cluster_data->area_in_pixels;
+ ps_cur_cluster_64->bi_mv_pixel_area += ps_cluster_data->bi_mv_pixel_area;
+ ps_cur_cluster_64->uni_mv_pixel_area += ps_cluster_data->uni_mv_pixel_area;
+
+ memcpy(
+ &ps_cur_cluster_64->as_mv[0],
+ ps_cluster_data->as_mv,
+ sizeof(mv_data_t) * ps_cluster_data->num_mvs);
+
+ ps_cur_cluster_64->num_mvs = ps_cluster_data->num_mvs;
+
+ ps_cur_cluster_64->ref_id = ps_cluster_data->ref_id;
+
+ ps_cur_cluster_64->max_x = ps_cluster_data->max_x;
+ ps_cur_cluster_64->max_y = ps_cluster_data->max_y;
+ ps_cur_cluster_64->min_x = ps_cluster_data->min_x;
+ ps_cur_cluster_64->min_y = ps_cluster_data->min_y;
+
+ ps_cur_cluster_64->s_centroid = ps_cluster_data->s_centroid;
+ }
+ }
+}
+
+/**
+********************************************************************************
+* @fn void hme_update_32x32_clusters
+* (
+* cluster_32x32_blk_t *ps_blk_32x32,
+* cluster_16x16_blk_t *ps_blk_16x16
+* )
+*
+* @brief Updates attributes for 32x32 clusters based on the attributes of
+* the constituent 16x16 clusters
+*
+* @param[out] ps_blk_32x32: structure containing 32x32 block results
+*
+* @param[in] ps_blk_16x16 : structure containing 16x16 block results
+*
+* @return None
+********************************************************************************
+*/
+static __inline void
+ hme_update_32x32_clusters(cluster_32x32_blk_t *ps_blk_32x32, cluster_16x16_blk_t *ps_blk_16x16)
+{
+ cluster_16x16_blk_t *ps_blk_16x16_cur;
+ cluster_data_t *ps_cur_cluster;
+
+ S32 i, j;
+ S32 num_clusters_cur_16x16_blk;
+
+ for(i = 0; i < 4; i++)
+ {
+ S32 num_clusters_evaluated = 0;
+
+ ps_blk_16x16_cur = &ps_blk_16x16[i];
+
+ num_clusters_cur_16x16_blk = ps_blk_16x16_cur->num_clusters;
+
+ ps_blk_32x32->intra_mv_area += ps_blk_16x16_cur->intra_mv_area;
+
+ ps_blk_32x32->best_inter_cost += ps_blk_16x16_cur->best_inter_cost;
+
+ for(j = 0; num_clusters_evaluated < num_clusters_cur_16x16_blk; j++)
+ {
+ ps_cur_cluster = &ps_blk_16x16_cur->as_cluster_data[j];
+
+ if(!ps_cur_cluster->is_valid_cluster)
+ {
+ continue;
+ }
+
+ hme_update_32x32_cluster_attributes(ps_blk_32x32, ps_cur_cluster);
+
+ num_clusters_evaluated++;
+ }
+ }
+}
+
+/**
+********************************************************************************
+* @fn void hme_update_64x64_clusters
+* (
+* cluster_64x64_blk_t *ps_blk_64x64,
+* cluster_32x32_blk_t *ps_blk_32x32
+* )
+*
+* @brief Updates attributes for 64x64 clusters based on the attributes of
+* the constituent 16x16 clusters
+*
+* @param[out] ps_blk_64x64: structure containing 32x32 block results
+*
+* @param[in] ps_blk_32x32 : structure containing 16x16 block results
+*
+* @return None
+********************************************************************************
+*/
+static __inline void
+ hme_update_64x64_clusters(cluster_64x64_blk_t *ps_blk_64x64, cluster_32x32_blk_t *ps_blk_32x32)
+{
+ cluster_32x32_blk_t *ps_blk_32x32_cur;
+ cluster_data_t *ps_cur_cluster;
+
+ S32 i, j;
+ S32 num_clusters_cur_32x32_blk;
+
+ for(i = 0; i < 4; i++)
+ {
+ S32 num_clusters_evaluated = 0;
+
+ ps_blk_32x32_cur = &ps_blk_32x32[i];
+
+ num_clusters_cur_32x32_blk = ps_blk_32x32_cur->num_clusters;
+
+ ps_blk_64x64->intra_mv_area += ps_blk_32x32_cur->intra_mv_area;
+ ps_blk_64x64->best_inter_cost += ps_blk_32x32_cur->best_inter_cost;
+
+ for(j = 0; num_clusters_evaluated < num_clusters_cur_32x32_blk; j++)
+ {
+ ps_cur_cluster = &ps_blk_32x32_cur->as_cluster_data[j];
+
+ if(!ps_cur_cluster->is_valid_cluster)
+ {
+ continue;
+ }
+
+ hme_update_64x64_cluster_attributes(ps_blk_64x64, ps_cur_cluster);
+
+ num_clusters_evaluated++;
+ }
+ }
+}
+
+/**
+********************************************************************************
+* @fn void hme_try_merge_clusters_blksize_gt_16
+* (
+* cluster_data_t *ps_cluster_data,
+* S32 num_clusters
+* )
+*
+* @brief Merging clusters from blocks of size 32x32 and greater
+*
+* @param[in/out] ps_cluster_data: structure containing cluster data
+*
+* @param[in/out] pi4_num_clusters : pointer to number of clusters
+*
+* @return Success or failure
+********************************************************************************
+*/
+S32 hme_try_merge_clusters_blksize_gt_16(cluster_data_t *ps_cluster_data, S32 num_clusters)
+{
+ centroid_t *ps_cur_centroid;
+ cluster_data_t *ps_cur_cluster;
+
+ S32 i, mvd;
+ S32 mvdx, mvdy, mvdx_q8, mvdy_q8;
+
+ centroid_t *ps_centroid = &ps_cluster_data->s_centroid;
+
+ S32 mvd_limit = ps_cluster_data->max_dist_from_centroid;
+ S32 ref_id = ps_cluster_data->ref_id;
+
+ S32 node0_posx_q8 = ps_centroid->i4_pos_x_q8;
+ S32 node0_posy_q8 = ps_centroid->i4_pos_y_q8;
+ S32 num_clusters_evaluated = 1;
+ S32 ret_value = 0;
+
+ if(1 >= num_clusters)
+ {
+ return ret_value;
+ }
+
+ for(i = 1; num_clusters_evaluated < num_clusters; i++)
+ {
+ S32 cur_posx_q8;
+ S32 cur_posy_q8;
+
+ ps_cur_cluster = &ps_cluster_data[i];
+
+ if((ref_id != ps_cur_cluster->ref_id))
+ {
+ num_clusters_evaluated++;
+ continue;
+ }
+
+ if((!ps_cur_cluster->is_valid_cluster))
+ {
+ continue;
+ }
+
+ num_clusters_evaluated++;
+
+ ps_cur_centroid = &ps_cur_cluster->s_centroid;
+
+ cur_posx_q8 = ps_cur_centroid->i4_pos_x_q8;
+ cur_posy_q8 = ps_cur_centroid->i4_pos_y_q8;
+
+ mvdx_q8 = cur_posx_q8 - node0_posx_q8;
+ mvdy_q8 = cur_posy_q8 - node0_posy_q8;
+
+ mvdx = (mvdx_q8 + (1 << 7)) >> 8;
+ mvdy = (mvdy_q8 + (1 << 7)) >> 8;
+
+ mvd = ABS(mvdx) + ABS(mvdy);
+
+ if(mvd <= (mvd_limit >> 1))
+ {
+ LWORD64 i8_updated_posx;
+ LWORD64 i8_updated_posy;
+ WORD32 minmax_updated_x = 0;
+ WORD32 minmax_updated_y = 0;
+
+ ps_cur_cluster->is_valid_cluster = 0;
+
+ ps_cluster_data->area_in_pixels += ps_cur_cluster->area_in_pixels;
+ ps_cluster_data->bi_mv_pixel_area += ps_cur_cluster->bi_mv_pixel_area;
+ ps_cluster_data->uni_mv_pixel_area += ps_cur_cluster->uni_mv_pixel_area;
+
+ memcpy(
+ &ps_cluster_data->as_mv[ps_cluster_data->num_mvs],
+ ps_cur_cluster->as_mv,
+ sizeof(mv_data_t) * ps_cur_cluster->num_mvs);
+
+ if(mvdx > 0)
+ {
+ ps_cluster_data->min_x = (cur_posx_q8 + (1 << 7)) >> 8;
+ minmax_updated_x = 1;
+ }
+ else
+ {
+ ps_cluster_data->max_x = (cur_posx_q8 + (1 << 7)) >> 8;
+ minmax_updated_x = 2;
+ }
+
+ if(mvdy > 0)
+ {
+ ps_cluster_data->min_y = (cur_posy_q8 + (1 << 7)) >> 8;
+ minmax_updated_y = 1;
+ }
+ else
+ {
+ ps_cluster_data->max_y = (cur_posy_q8 + (1 << 7)) >> 8;
+ minmax_updated_y = 2;
+ }
+
+ switch((minmax_updated_y << 2) + minmax_updated_x)
+ {
+ case 1:
+ {
+ S32 mvd, mvd_q8;
+
+ mvd_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
+ mvd = (mvd_q8 + (1 << 7)) >> 8;
+
+ if(mvd > (mvd_limit))
+ {
+ ps_cluster_data->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 2:
+ {
+ S32 mvd, mvd_q8;
+
+ mvd_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
+ mvd = (mvd_q8 + (1 << 7)) >> 8;
+
+ if(mvd > (mvd_limit))
+ {
+ ps_cluster_data->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 4:
+ {
+ S32 mvd, mvd_q8;
+
+ mvd_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
+ mvd = (mvd_q8 + (1 << 7)) >> 8;
+
+ if(mvd > (mvd_limit))
+ {
+ ps_cluster_data->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 5:
+ {
+ S32 mvd;
+ S32 mvdx, mvdx_q8;
+ S32 mvdy, mvdy_q8;
+
+ mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
+ mvdy = (mvdy_q8 + (1 << 7)) >> 8;
+
+ mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
+ mvdx = (mvdx_q8 + (1 << 7)) >> 8;
+
+ mvd = (mvdx > mvdy) ? mvdx : mvdy;
+
+ if(mvd > mvd_limit)
+ {
+ ps_cluster_data->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 6:
+ {
+ S32 mvd;
+ S32 mvdx, mvdx_q8;
+ S32 mvdy, mvdy_q8;
+
+ mvdy_q8 = ps_cur_centroid->i4_pos_y_q8 - (ps_cluster_data->min_y << 8);
+ mvdy = (mvdy_q8 + (1 << 7)) >> 8;
+
+ mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
+ mvdx = (mvdx_q8 + (1 << 7)) >> 8;
+
+ mvd = (mvdx > mvdy) ? mvdx : mvdy;
+
+ if(mvd > mvd_limit)
+ {
+ ps_cluster_data->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 8:
+ {
+ S32 mvd, mvd_q8;
+
+ mvd_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
+ mvd = (mvd_q8 + (1 << 7)) >> 8;
+
+ if(mvd > (mvd_limit))
+ {
+ ps_cluster_data->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 9:
+ {
+ S32 mvd;
+ S32 mvdx, mvdx_q8;
+ S32 mvdy, mvdy_q8;
+
+ mvdx_q8 = ps_cur_centroid->i4_pos_x_q8 - (ps_cluster_data->min_x << 8);
+ mvdx = (mvdx_q8 + (1 << 7)) >> 8;
+
+ mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
+ mvdy = (mvdy_q8 + (1 << 7)) >> 8;
+
+ mvd = (mvdx > mvdy) ? mvdx : mvdy;
+
+ if(mvd > mvd_limit)
+ {
+ ps_cluster_data->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ case 10:
+ {
+ S32 mvd;
+ S32 mvdx, mvdx_q8;
+ S32 mvdy, mvdy_q8;
+
+ mvdx_q8 = (ps_cluster_data->max_x << 8) - ps_cur_centroid->i4_pos_x_q8;
+ mvdx = (mvdx_q8 + (1 << 7)) >> 8;
+
+ mvdy_q8 = (ps_cluster_data->max_y << 8) - ps_cur_centroid->i4_pos_y_q8;
+ mvdy = (mvdy_q8 + (1 << 7)) >> 8;
+
+ mvd = (mvdx > mvdy) ? mvdx : mvdy;
+
+ if(mvd > ps_cluster_data->max_dist_from_centroid)
+ {
+ ps_cluster_data->max_dist_from_centroid = mvd;
+ }
+ break;
+ }
+ default:
+ {
+ break;
+ }
+ }
+
+ i8_updated_posx = ((LWORD64)ps_centroid->i4_pos_x_q8 * ps_cluster_data->num_mvs) +
+ ((LWORD64)cur_posx_q8 * ps_cur_cluster->num_mvs);
+ i8_updated_posy = ((LWORD64)ps_centroid->i4_pos_y_q8 * ps_cluster_data->num_mvs) +
+ ((LWORD64)cur_posy_q8 * ps_cur_cluster->num_mvs);
+
+ ps_cluster_data->num_mvs += ps_cur_cluster->num_mvs;
+
+ ps_centroid->i4_pos_x_q8 = (WORD32)(i8_updated_posx / ps_cluster_data->num_mvs);
+ ps_centroid->i4_pos_y_q8 = (WORD32)(i8_updated_posy / ps_cluster_data->num_mvs);
+
+ if(MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK >= num_clusters)
+ {
+ num_clusters--;
+ num_clusters_evaluated = 1;
+ i = 0;
+ ret_value++;
+ }
+ else
+ {
+ ret_value++;
+
+ return ret_value;
+ }
+ }
+ }
+
+ if(ret_value)
+ {
+ for(i = 1; i < (num_clusters + ret_value); i++)
+ {
+ if(ps_cluster_data[i].is_valid_cluster)
+ {
+ break;
+ }
+ }
+ if(i == (num_clusters + ret_value))
+ {
+ return ret_value;
+ }
+ }
+ else
+ {
+ i = 1;
+ }
+
+ return (hme_try_merge_clusters_blksize_gt_16(&ps_cluster_data[i], num_clusters - 1)) +
+ ret_value;
+}
+
+/**
+********************************************************************************
+* @fn S32 hme_determine_validity_32x32
+* (
+* ctb_cluster_info_t *ps_ctb_cluster_info
+* )
+*
+* @brief Determines whther current 32x32 block needs to be evaluated in enc_loop
+* while recursing through the CU tree or not
+*
+* @param[in] ps_cluster_data: structure containing cluster data
+*
+* @return Success or failure
+********************************************************************************
+*/
+__inline S32 hme_determine_validity_32x32(
+ ctb_cluster_info_t *ps_ctb_cluster_info,
+ S32 *pi4_children_nodes_required,
+ S32 blk_validity_wrt_pic_bndry,
+ S32 parent_blk_validity_wrt_pic_bndry)
+{
+ cluster_data_t *ps_data;
+
+ cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
+ cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;
+
+ S32 num_clusters = ps_32x32_blk->num_clusters;
+ S32 num_clusters_parent = ps_64x64_blk->num_clusters;
+
+ if(!blk_validity_wrt_pic_bndry)
+ {
+ *pi4_children_nodes_required = 1;
+ return 0;
+ }
+
+ if(!parent_blk_validity_wrt_pic_bndry)
+ {
+ *pi4_children_nodes_required = 1;
+ return 1;
+ }
+
+ if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
+ {
+ *pi4_children_nodes_required = 1;
+ return 0;
+ }
+
+ if(num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
+ {
+ *pi4_children_nodes_required = 1;
+
+ return 1;
+ }
+ else if(num_clusters_parent < MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK)
+ {
+ *pi4_children_nodes_required = 0;
+
+ return 1;
+ }
+ else
+ {
+ if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
+ {
+ *pi4_children_nodes_required = 0;
+ return 1;
+ }
+ else
+ {
+ S32 i;
+
+ S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 4;
+ S32 min_area = MAX_32BIT_VAL;
+ S32 num_clusters_evaluated = 0;
+
+ for(i = 0; num_clusters_evaluated < num_clusters; i++)
+ {
+ ps_data = &ps_32x32_blk->as_cluster_data[i];
+
+ if(!ps_data->is_valid_cluster)
+ {
+ continue;
+ }
+
+ num_clusters_evaluated++;
+
+ if(ps_data->area_in_pixels < min_area)
+ {
+ min_area = ps_data->area_in_pixels;
+ }
+ }
+
+ if((min_area << 4) < area_of_parent)
+ {
+ *pi4_children_nodes_required = 1;
+ return 0;
+ }
+ else
+ {
+ *pi4_children_nodes_required = 0;
+ return 1;
+ }
+ }
+ }
+}
+
+/**
+********************************************************************************
+* @fn S32 hme_determine_validity_16x16
+* (
+* ctb_cluster_info_t *ps_ctb_cluster_info
+* )
+*
+* @brief Determines whther current 16x16 block needs to be evaluated in enc_loop
+* while recursing through the CU tree or not
+*
+* @param[in] ps_cluster_data: structure containing cluster data
+*
+* @return Success or failure
+********************************************************************************
+*/
+__inline S32 hme_determine_validity_16x16(
+ ctb_cluster_info_t *ps_ctb_cluster_info,
+ S32 *pi4_children_nodes_required,
+ S32 blk_validity_wrt_pic_bndry,
+ S32 parent_blk_validity_wrt_pic_bndry)
+{
+ cluster_data_t *ps_data;
+
+ cluster_16x16_blk_t *ps_16x16_blk = ps_ctb_cluster_info->ps_16x16_blk;
+ cluster_32x32_blk_t *ps_32x32_blk = ps_ctb_cluster_info->ps_32x32_blk;
+ cluster_64x64_blk_t *ps_64x64_blk = ps_ctb_cluster_info->ps_64x64_blk;
+
+ S32 num_clusters = ps_16x16_blk->num_clusters;
+ S32 num_clusters_parent = ps_32x32_blk->num_clusters;
+ S32 num_clusters_grandparent = ps_64x64_blk->num_clusters;
+
+ if(!blk_validity_wrt_pic_bndry)
+ {
+ *pi4_children_nodes_required = 1;
+ return 0;
+ }
+
+ if(!parent_blk_validity_wrt_pic_bndry)
+ {
+ *pi4_children_nodes_required = 1;
+ return 1;
+ }
+
+ if((num_clusters_parent > MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
+ (num_clusters_grandparent > MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
+ {
+ *pi4_children_nodes_required = 1;
+ return 1;
+ }
+
+ /* Implies nc_64 <= 3 when num_clusters_parent > 3 & */
+ /* implies nc_64 > 3 when num_clusters_parent < 3 & */
+ if(num_clusters_parent != MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK)
+ {
+ if(num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
+ {
+ *pi4_children_nodes_required = 0;
+
+ return 1;
+ }
+ else
+ {
+ *pi4_children_nodes_required = 1;
+
+ return 0;
+ }
+ }
+ /* Implies nc_64 >= 3 */
+ else
+ {
+ if(num_clusters < MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
+ {
+ *pi4_children_nodes_required = 0;
+ return 1;
+ }
+ else if(num_clusters > MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK)
+ {
+ *pi4_children_nodes_required = 1;
+ return 0;
+ }
+ else
+ {
+ S32 i;
+
+ S32 area_of_parent = gai4_partition_area[PART_ID_2Nx2N] << 2;
+ S32 min_area = MAX_32BIT_VAL;
+ S32 num_clusters_evaluated = 0;
+
+ for(i = 0; num_clusters_evaluated < num_clusters; i++)
+ {
+ ps_data = &ps_16x16_blk->as_cluster_data[i];
+
+ if(!ps_data->is_valid_cluster)
+ {
+ continue;
+ }
+
+ num_clusters_evaluated++;
+
+ if(ps_data->area_in_pixels < min_area)
+ {
+ min_area = ps_data->area_in_pixels;
+ }
+ }
+
+ if((min_area << 4) < area_of_parent)
+ {
+ *pi4_children_nodes_required = 1;
+ return 0;
+ }
+ else
+ {
+ *pi4_children_nodes_required = 0;
+ return 1;
+ }
+ }
+ }
+}
+
+/**
+********************************************************************************
+* @fn void hme_build_cu_tree
+* (
+* ctb_cluster_info_t *ps_ctb_cluster_info,
+* cur_ctb_cu_tree_t *ps_cu_tree,
+* S32 tree_depth,
+* CU_POS_T e_grand_parent_blk_pos,
+* CU_POS_T e_parent_blk_pos,
+* CU_POS_T e_cur_blk_pos
+* )
+*
+* @brief Recursive function for CU tree initialisation
+*
+* @param[in] ps_ctb_cluster_info: structure containing pointers to clusters
+* corresponding to all block sizes from 64x64
+* to 16x16
+*
+* @param[in] e_parent_blk_pos: position of parent block wrt its parent, if
+* applicable
+*
+* @param[in] e_cur_blk_pos: position of current block wrt parent
+*
+* @param[out] ps_cu_tree : represents CU tree used in CU recursion
+*
+* @param[in] tree_depth : specifies depth of the CU tree
+*
+* @return Nothing
+********************************************************************************
+*/
+void hme_build_cu_tree(
+ ctb_cluster_info_t *ps_ctb_cluster_info,
+ cur_ctb_cu_tree_t *ps_cu_tree,
+ S32 tree_depth,
+ CU_POS_T e_grandparent_blk_pos,
+ CU_POS_T e_parent_blk_pos,
+ CU_POS_T e_cur_blk_pos)
+{
+ ihevce_cu_tree_init(
+ ps_cu_tree,
+ ps_ctb_cluster_info->ps_cu_tree_root,
+ &ps_ctb_cluster_info->nodes_created_in_cu_tree,
+ tree_depth,
+ e_grandparent_blk_pos,
+ e_parent_blk_pos,
+ e_cur_blk_pos);
+}
+
+/**
+********************************************************************************
+* @fn S32 hme_sdi_based_cluster_spread_eligibility
+* (
+* cluster_32x32_blk_t *ps_blk_32x32
+* )
+*
+* @brief Determines whether the spread of high SDI MV's around each cluster
+* center is below a pre-determined threshold
+*
+* @param[in] ps_blk_32x32: structure containing pointers to clusters
+* corresponding to all block sizes from 64x64
+* to 16x16
+*
+* @return 1 if the spread is constrained, else 0
+********************************************************************************
+*/
+__inline S32
+ hme_sdi_based_cluster_spread_eligibility(cluster_32x32_blk_t *ps_blk_32x32, S32 sdi_threshold)
+{
+ S32 cumulative_mv_distance;
+ S32 i, j;
+ S32 num_high_sdi_mvs;
+
+ S32 num_clusters = ps_blk_32x32->num_clusters;
+
+ for(i = 0; i < num_clusters; i++)
+ {
+ cluster_data_t *ps_data = &ps_blk_32x32->as_cluster_data[i];
+
+ num_high_sdi_mvs = 0;
+ cumulative_mv_distance = 0;
+
+ for(j = 0; j < ps_data->num_mvs; j++)
+ {
+ mv_data_t *ps_mv = &ps_data->as_mv[j];
+
+ if(ps_mv->sdi >= sdi_threshold)
+ {
+ num_high_sdi_mvs++;
+
+ COMPUTE_MVD(ps_mv, ps_data, cumulative_mv_distance);
+ }
+ }
+
+ if(cumulative_mv_distance > ((ps_data->max_dist_from_centroid >> 1) * num_high_sdi_mvs))
+ {
+ return 0;
+ }
+ }
+
+ return 1;
+}
+
+/**
+********************************************************************************
+* @fn S32 hme_populate_cu_tree
+* (
+* ctb_cluster_info_t *ps_ctb_cluster_info,
+* ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
+* cur_ctb_cu_tree_t *ps_cu_tree,
+* S32 tree_depth,
+* CU_POS_T e_parent_blk_pos,
+* CU_POS_T e_cur_blk_pos
+* )
+*
+* @brief Recursive function for CU tree population based on output of
+* clustering algorithm
+*
+* @param[in] ps_ctb_cluster_info: structure containing pointers to clusters
+* corresponding to all block sizes from 64x64
+* to 16x16
+*
+* @param[in] e_parent_blk_pos: position of parent block wrt its parent, if
+applicable
+*
+* @param[in] e_cur_blk_pos: position of current block wrt parent
+*
+* @param[in] ps_cur_ipe_ctb : output container for ipe analyses
+*
+* @param[out] ps_cu_tree : represents CU tree used in CU recursion
+*
+* @param[in] tree_depth : specifies depth of the CU tree
+*
+* @param[in] ipe_decision_precedence : specifies whether precedence should
+* be given to decisions made either by IPE(1) or clustering algos.
+*
+* @return 1 if re-evaluation of parent node's validity is not required,
+else 0
+********************************************************************************
+*/
+void hme_populate_cu_tree(
+ ctb_cluster_info_t *ps_ctb_cluster_info,
+ cur_ctb_cu_tree_t *ps_cu_tree,
+ S32 tree_depth,
+ ME_QUALITY_PRESETS_T e_quality_preset,
+ CU_POS_T e_grandparent_blk_pos,
+ CU_POS_T e_parent_blk_pos,
+ CU_POS_T e_cur_blk_pos)
+{
+ S32 area_of_cur_blk;
+ S32 area_limit_for_me_decision_precedence;
+ S32 children_nodes_required;
+ S32 intra_mv_area;
+ S32 intra_eval_enable;
+ S32 inter_eval_enable;
+ S32 ipe_decision_precedence;
+ S32 node_validity;
+ S32 num_clusters;
+
+ ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb = ps_ctb_cluster_info->ps_cur_ipe_ctb;
+
+ if(NULL == ps_cu_tree)
+ {
+ return;
+ }
+
+ switch(tree_depth)
+ {
+ case 0:
+ {
+ /* 64x64 block */
+ S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;
+
+ cluster_64x64_blk_t *ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
+
+ area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 4;
+ area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
+ children_nodes_required = 0;
+ intra_mv_area = ps_blk_64x64->intra_mv_area;
+
+ ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
+
+ intra_eval_enable = ipe_decision_precedence;
+ inter_eval_enable = !!ps_blk_64x64->num_clusters;
+
+#if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
+ if(e_quality_preset >= ME_HIGH_QUALITY)
+ {
+ inter_eval_enable = 1;
+ node_validity = (blk_32x32_mask == 0xf);
+#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
+ ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
+#endif
+ break;
+ }
+#endif
+
+#if ENABLE_4CTB_EVALUATION
+ node_validity = (blk_32x32_mask == 0xf);
+
+ break;
+#else
+ {
+ S32 i;
+
+ num_clusters = ps_blk_64x64->num_clusters;
+
+ node_validity = (ipe_decision_precedence)
+ ? (!ps_cur_ipe_ctb->u1_split_flag)
+ : (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK);
+
+ for(i = 0; i < MAX_NUM_REF; i++)
+ {
+ node_validity = node_validity && (ps_blk_64x64->au1_num_clusters[i] <=
+ MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
+ }
+
+ node_validity = node_validity && (blk_32x32_mask == 0xf);
+ }
+ break;
+#endif
+ }
+ case 1:
+ {
+ /* 32x32 block */
+ S32 is_percent_intra_area_gt_threshold;
+
+ cluster_32x32_blk_t *ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[e_cur_blk_pos];
+
+ S32 blk_32x32_mask = ps_ctb_cluster_info->blk_32x32_mask;
+
+#if !ENABLE_4CTB_EVALUATION
+ S32 best_inter_cost = ps_blk_32x32->best_inter_cost;
+ S32 best_intra_cost =
+ ((ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
+ ps_ctb_cluster_info->i4_frame_qstep * ps_ctb_cluster_info->i4_frame_qstep_multiplier *
+ 4) < 0)
+ ? MAX_32BIT_VAL
+ : (ps_ctb_cluster_info->ps_cur_ipe_ctb->ai4_best32x32_intra_cost[e_cur_blk_pos] +
+ ps_ctb_cluster_info->i4_frame_qstep *
+ ps_ctb_cluster_info->i4_frame_qstep_multiplier * 4);
+ S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
+ S32 cost_differential = (best_inter_cost - best_cost);
+#endif
+
+ area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N] << 2;
+ area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
+ intra_mv_area = ps_blk_32x32->intra_mv_area;
+ is_percent_intra_area_gt_threshold =
+ (intra_mv_area > area_limit_for_me_decision_precedence);
+ ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
+
+ intra_eval_enable = ipe_decision_precedence;
+ inter_eval_enable = !!ps_blk_32x32->num_clusters;
+ children_nodes_required = 1;
+
+#if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
+ if(e_quality_preset >= ME_HIGH_QUALITY)
+ {
+ inter_eval_enable = 1;
+ node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
+#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
+ ps_cu_tree->u1_inter_eval_enable = !(intra_mv_area == area_of_cur_blk);
+#endif
+ break;
+ }
+#endif
+
+#if ENABLE_4CTB_EVALUATION
+ node_validity = (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
+
+ break;
+#else
+ {
+ S32 i;
+ num_clusters = ps_blk_32x32->num_clusters;
+
+ if(ipe_decision_precedence)
+ {
+ node_validity = (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_merge_flag);
+ node_validity = node_validity && (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
+ }
+ else
+ {
+ node_validity =
+ ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential)) &&
+ (num_clusters <= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK) &&
+ (((blk_32x32_mask) & (1 << e_cur_blk_pos)) || 0);
+
+ for(i = 0; (i < MAX_NUM_REF) && (node_validity); i++)
+ {
+ node_validity = node_validity && (ps_blk_32x32->au1_num_clusters[i] <=
+ MAX_NUM_CLUSTERS_IN_ONE_REF_IDX);
+ }
+
+ if(node_validity)
+ {
+ node_validity = node_validity &&
+ hme_sdi_based_cluster_spread_eligibility(
+ ps_blk_32x32, ps_ctb_cluster_info->sdi_threshold);
+ }
+ }
+ }
+
+ break;
+#endif
+ }
+ case 2:
+ {
+ cluster_16x16_blk_t *ps_blk_16x16 =
+ &ps_ctb_cluster_info->ps_16x16_blk[e_cur_blk_pos + (e_parent_blk_pos << 2)];
+
+ S32 blk_8x8_mask =
+ ps_ctb_cluster_info->pi4_blk_8x8_mask[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
+
+ area_of_cur_blk = gai4_partition_area[PART_ID_2Nx2N];
+ area_limit_for_me_decision_precedence = (area_of_cur_blk * MAX_INTRA_PERCENTAGE) / 100;
+ children_nodes_required = 1;
+ intra_mv_area = ps_blk_16x16->intra_mv_area;
+ ipe_decision_precedence = (intra_mv_area >= area_limit_for_me_decision_precedence);
+ num_clusters = ps_blk_16x16->num_clusters;
+
+ intra_eval_enable = ipe_decision_precedence;
+ inter_eval_enable = 1;
+
+#if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
+ if(e_quality_preset >= ME_HIGH_QUALITY)
+ {
+ node_validity =
+ !ps_ctb_cluster_info
+ ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
+ children_nodes_required = !node_validity;
+ break;
+ }
+#endif
+
+#if ENABLE_4CTB_EVALUATION
+ node_validity = (blk_8x8_mask == 0xf);
+
+#if ENABLE_CU_TREE_CULLING
+ {
+ cur_ctb_cu_tree_t *ps_32x32_root;
+
+ switch(e_parent_blk_pos)
+ {
+ case POS_TL:
+ {
+ ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
+
+ break;
+ }
+ case POS_TR:
+ {
+ ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
+
+ break;
+ }
+ case POS_BL:
+ {
+ ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
+
+ break;
+ }
+ case POS_BR:
+ {
+ ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
+
+ break;
+ }
+ }
+
+ if(ps_32x32_root->is_node_valid)
+ {
+ node_validity =
+ node_validity &&
+ !ps_ctb_cluster_info
+ ->au1_is_16x16_blk_split[(S32)(e_parent_blk_pos << 2) + e_cur_blk_pos];
+ children_nodes_required = !node_validity;
+ }
+ }
+#endif
+
+ break;
+#else
+
+ if(ipe_decision_precedence)
+ {
+ S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
+ .as_intra16_analyse[e_cur_blk_pos]
+ .b1_merge_flag);
+ S32 valid_flag = (blk_8x8_mask == 0xf);
+
+ node_validity = merge_flag_16 && valid_flag;
+ }
+ else
+ {
+ node_validity = (blk_8x8_mask == 0xf);
+ }
+
+ break;
+#endif
+ }
+ case 3:
+ {
+ S32 blk_8x8_mask =
+ ps_ctb_cluster_info
+ ->pi4_blk_8x8_mask[(S32)(e_grandparent_blk_pos << 2) + e_parent_blk_pos];
+ S32 merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos]
+ .as_intra16_analyse[e_parent_blk_pos]
+ .b1_merge_flag);
+ S32 merge_flag_32 =
+ (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos].b1_merge_flag);
+
+ intra_eval_enable = !merge_flag_16 || !merge_flag_32;
+ inter_eval_enable = 1;
+ children_nodes_required = 0;
+
+#if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
+ if(e_quality_preset >= ME_HIGH_QUALITY)
+ {
+ node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);
+ break;
+ }
+#endif
+
+#if ENABLE_4CTB_EVALUATION
+ node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0);
+
+ break;
+#else
+ {
+ cur_ctb_cu_tree_t *ps_32x32_root;
+ cur_ctb_cu_tree_t *ps_16x16_root;
+ cluster_32x32_blk_t *ps_32x32_blk;
+
+ switch(e_grandparent_blk_pos)
+ {
+ case POS_TL:
+ {
+ ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
+
+ break;
+ }
+ case POS_TR:
+ {
+ ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
+
+ break;
+ }
+ case POS_BL:
+ {
+ ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
+
+ break;
+ }
+ case POS_BR:
+ {
+ ps_32x32_root = ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
+
+ break;
+ }
+ }
+
+ switch(e_parent_blk_pos)
+ {
+ case POS_TL:
+ {
+ ps_16x16_root = ps_32x32_root->ps_child_node_tl;
+
+ break;
+ }
+ case POS_TR:
+ {
+ ps_16x16_root = ps_32x32_root->ps_child_node_tr;
+
+ break;
+ }
+ case POS_BL:
+ {
+ ps_16x16_root = ps_32x32_root->ps_child_node_bl;
+
+ break;
+ }
+ case POS_BR:
+ {
+ ps_16x16_root = ps_32x32_root->ps_child_node_br;
+
+ break;
+ }
+ }
+
+ ps_32x32_blk = &ps_ctb_cluster_info->ps_32x32_blk[e_grandparent_blk_pos];
+
+ node_validity = ((blk_8x8_mask & (1 << e_cur_blk_pos)) || 0) &&
+ ((!ps_32x32_root->is_node_valid) ||
+ (ps_32x32_blk->num_clusters_with_weak_sdi_density > 0) ||
+ (!ps_16x16_root->is_node_valid));
+
+ break;
+ }
+#endif
+ }
+ }
+
+ /* Fill the current cu_tree node */
+ ps_cu_tree->is_node_valid = node_validity;
+ ps_cu_tree->u1_intra_eval_enable = intra_eval_enable;
+ ps_cu_tree->u1_inter_eval_enable = inter_eval_enable;
+
+ if(children_nodes_required)
+ {
+ tree_depth++;
+
+ hme_populate_cu_tree(
+ ps_ctb_cluster_info,
+ ps_cu_tree->ps_child_node_tl,
+ tree_depth,
+ e_quality_preset,
+ e_parent_blk_pos,
+ e_cur_blk_pos,
+ POS_TL);
+
+ hme_populate_cu_tree(
+ ps_ctb_cluster_info,
+ ps_cu_tree->ps_child_node_tr,
+ tree_depth,
+ e_quality_preset,
+ e_parent_blk_pos,
+ e_cur_blk_pos,
+ POS_TR);
+
+ hme_populate_cu_tree(
+ ps_ctb_cluster_info,
+ ps_cu_tree->ps_child_node_bl,
+ tree_depth,
+ e_quality_preset,
+ e_parent_blk_pos,
+ e_cur_blk_pos,
+ POS_BL);
+
+ hme_populate_cu_tree(
+ ps_ctb_cluster_info,
+ ps_cu_tree->ps_child_node_br,
+ tree_depth,
+ e_quality_preset,
+ e_parent_blk_pos,
+ e_cur_blk_pos,
+ POS_BR);
+ }
+}
+
+/**
+********************************************************************************
+* @fn void hme_analyse_mv_clustering
+* (
+* search_results_t *ps_search_results,
+* ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
+* cur_ctb_cu_tree_t *ps_cu_tree
+* )
+*
+* @brief Implementation for the clustering algorithm
+*
+* @param[in] ps_search_results: structure containing 16x16 block results
+*
+* @param[in] ps_cur_ipe_ctb : output container for ipe analyses
+*
+* @param[out] ps_cu_tree : represents CU tree used in CU recursion
+*
+* @return None
+********************************************************************************
+*/
+void hme_analyse_mv_clustering(
+ search_results_t *ps_search_results,
+ inter_cu_results_t *ps_16x16_cu_results,
+ inter_cu_results_t *ps_8x8_cu_results,
+ ctb_cluster_info_t *ps_ctb_cluster_info,
+ S08 *pi1_future_list,
+ S08 *pi1_past_list,
+ S32 bidir_enabled,
+ ME_QUALITY_PRESETS_T e_quality_preset)
+{
+ cluster_16x16_blk_t *ps_blk_16x16;
+ cluster_32x32_blk_t *ps_blk_32x32;
+ cluster_64x64_blk_t *ps_blk_64x64;
+
+ part_type_results_t *ps_best_result;
+ pu_result_t *aps_part_result[MAX_NUM_PARTS];
+ pu_result_t *aps_inferior_parts[MAX_NUM_PARTS];
+
+ PART_ID_T e_part_id;
+ PART_TYPE_T e_part_type;
+
+ S32 enable_64x64_merge;
+ S32 i, j, k;
+ S32 mvx, mvy;
+ S32 num_parts;
+ S32 ref_idx;
+ S32 ai4_pred_mode[MAX_NUM_PARTS];
+
+ S32 num_32x32_merges = 0;
+
+ /*****************************************/
+ /*****************************************/
+ /********* Enter ye who is HQ ************/
+ /*****************************************/
+ /*****************************************/
+
+ ps_blk_64x64 = ps_ctb_cluster_info->ps_64x64_blk;
+
+ /* Initialise data in each of the clusters */
+ for(i = 0; i < 16; i++)
+ {
+ ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];
+
+#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
+ if(e_quality_preset < ME_HIGH_QUALITY)
+ {
+ hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
+ }
+ else
+ {
+ ps_blk_16x16->best_inter_cost = 0;
+ ps_blk_16x16->intra_mv_area = 0;
+ }
+#else
+ hme_init_clusters_16x16(ps_blk_16x16, bidir_enabled);
+#endif
+ }
+
+ for(i = 0; i < 4; i++)
+ {
+ ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
+
+#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
+ if(e_quality_preset < ME_HIGH_QUALITY)
+ {
+ hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
+ }
+ else
+ {
+ ps_blk_32x32->best_inter_cost = 0;
+ ps_blk_32x32->intra_mv_area = 0;
+ }
+#else
+ hme_init_clusters_32x32(ps_blk_32x32, bidir_enabled);
+#endif
+ }
+
+#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
+ if(e_quality_preset < ME_HIGH_QUALITY)
+ {
+ hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
+ }
+ else
+ {
+ ps_blk_64x64->best_inter_cost = 0;
+ ps_blk_64x64->intra_mv_area = 0;
+ }
+#else
+ hme_init_clusters_64x64(ps_blk_64x64, bidir_enabled);
+#endif
+
+ /* Initialise data for all nodes in the CU tree */
+ hme_build_cu_tree(
+ ps_ctb_cluster_info, ps_ctb_cluster_info->ps_cu_tree_root, 0, POS_NA, POS_NA, POS_NA);
+
+ if(e_quality_preset >= ME_HIGH_QUALITY)
+ {
+ memset(ps_ctb_cluster_info->au1_is_16x16_blk_split, 1, 16 * sizeof(U08));
+ }
+
+#if ENABLE_UNIFORM_CU_SIZE_16x16 || ENABLE_UNIFORM_CU_SIZE_8x8
+ return;
+#endif
+
+ for(i = 0; i < 16; i++)
+ {
+ S32 blk_8x8_mask;
+ S32 is_16x16_blk_valid;
+ S32 num_clusters_updated;
+ S32 num_clusters;
+
+ blk_8x8_mask = ps_ctb_cluster_info->pi4_blk_8x8_mask[i];
+
+ ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i];
+
+ is_16x16_blk_valid = (blk_8x8_mask == 0xf);
+
+ if(is_16x16_blk_valid)
+ {
+ /* Use 8x8 data when 16x16 CU is split */
+ if(ps_search_results[i].u1_split_flag)
+ {
+ S32 blk_8x8_idx = i << 2;
+
+ num_parts = 4;
+ e_part_type = PRT_NxN;
+
+ for(j = 0; j < num_parts; j++, blk_8x8_idx++)
+ {
+ /* Only 2Nx2N partition supported for 8x8 block */
+ ASSERT(
+ ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].u1_part_type ==
+ ((PART_TYPE_T)PRT_2Nx2N));
+
+ aps_part_result[j] =
+ &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[0].as_pu_results[0];
+ aps_inferior_parts[j] =
+ &ps_8x8_cu_results[blk_8x8_idx].ps_best_results[1].as_pu_results[0];
+ ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
+ }
+ }
+ else
+ {
+ ps_best_result = &ps_16x16_cu_results[i].ps_best_results[0];
+
+ e_part_type = (PART_TYPE_T)ps_best_result->u1_part_type;
+ num_parts = gau1_num_parts_in_part_type[e_part_type];
+
+ for(j = 0; j < num_parts; j++)
+ {
+ aps_part_result[j] = &ps_best_result->as_pu_results[j];
+ aps_inferior_parts[j] = &ps_best_result[1].as_pu_results[j];
+ ai4_pred_mode[j] = (aps_part_result[j]->pu.b2_pred_mode);
+ }
+
+ ps_ctb_cluster_info->au1_is_16x16_blk_split[i] = 0;
+ }
+
+ for(j = 0; j < num_parts; j++)
+ {
+ pu_result_t *ps_part_result = aps_part_result[j];
+
+ S32 num_mvs = ((ai4_pred_mode[j] > 1) + 1);
+
+ e_part_id = ge_part_type_to_part_id[e_part_type][j];
+
+ /* Skip clustering if best mode is intra */
+ if((ps_part_result->pu.b1_intra_flag))
+ {
+ ps_blk_16x16->intra_mv_area += gai4_partition_area[e_part_id];
+ ps_blk_16x16->best_inter_cost += aps_inferior_parts[j]->i4_tot_cost;
+ continue;
+ }
+ else
+ {
+ ps_blk_16x16->best_inter_cost += ps_part_result->i4_tot_cost;
+ }
+
+#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
+ if(e_quality_preset >= ME_HIGH_QUALITY)
+ {
+ continue;
+ }
+#endif
+
+ for(k = 0; k < num_mvs; k++)
+ {
+ mv_t *ps_mv;
+
+ pu_mv_t *ps_pu_mv = &ps_part_result->pu.mv;
+
+ S32 is_l0_mv = ((ai4_pred_mode[j] == 2) && !k) || (ai4_pred_mode[j] == 0);
+
+ ps_mv = (is_l0_mv) ? (&ps_pu_mv->s_l0_mv) : (&ps_pu_mv->s_l1_mv);
+
+ mvx = ps_mv->i2_mvx;
+ mvy = ps_mv->i2_mvy;
+
+ ref_idx = (is_l0_mv) ? pi1_past_list[ps_pu_mv->i1_l0_ref_idx]
+ : pi1_future_list[ps_pu_mv->i1_l1_ref_idx];
+
+ num_clusters = ps_blk_16x16->num_clusters;
+
+ hme_find_and_update_clusters(
+ ps_blk_16x16->as_cluster_data,
+ &(ps_blk_16x16->num_clusters),
+ mvx,
+ mvy,
+ ref_idx,
+ ps_part_result->i4_sdi,
+ e_part_id,
+ (ai4_pred_mode[j] == 2));
+
+ num_clusters_updated = (ps_blk_16x16->num_clusters);
+
+ ps_blk_16x16->au1_num_clusters[ref_idx] +=
+ (num_clusters_updated - num_clusters);
+ }
+ }
+ }
+ }
+
+ /* Search for 32x32 clusters */
+ for(i = 0; i < 4; i++)
+ {
+ S32 num_clusters_merged;
+
+ S32 is_32x32_blk_valid = (ps_ctb_cluster_info->blk_32x32_mask & (1 << i)) || 0;
+
+ if(is_32x32_blk_valid)
+ {
+ ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[i];
+ ps_blk_16x16 = &ps_ctb_cluster_info->ps_16x16_blk[i << 2];
+
+#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
+ if(e_quality_preset >= ME_HIGH_QUALITY)
+ {
+ for(j = 0; j < 4; j++, ps_blk_16x16++)
+ {
+ ps_blk_32x32->intra_mv_area += ps_blk_16x16->intra_mv_area;
+
+ ps_blk_32x32->best_inter_cost += ps_blk_16x16->best_inter_cost;
+ }
+ continue;
+ }
+#endif
+
+ hme_update_32x32_clusters(ps_blk_32x32, ps_blk_16x16);
+
+ if((ps_blk_32x32->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK))
+ {
+ num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
+ ps_blk_32x32->as_cluster_data, (ps_blk_32x32->num_clusters));
+
+ if(num_clusters_merged)
+ {
+ ps_blk_32x32->num_clusters -= num_clusters_merged;
+
+ UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_32x32);
+ }
+ }
+ }
+ }
+
+#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
+ /* Eliminate outlier 32x32 clusters */
+ if(e_quality_preset < ME_HIGH_QUALITY)
+#endif
+ {
+ hme_boot_out_outlier(ps_ctb_cluster_info, 32);
+
+ /* Find best_uni_ref and best_alt_ref */
+ hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 32);
+ }
+
+ /* Populate the CU tree for depths 1 and higher */
+ {
+ cur_ctb_cu_tree_t *ps_tree_root = ps_ctb_cluster_info->ps_cu_tree_root;
+ cur_ctb_cu_tree_t *ps_tl = ps_tree_root->ps_child_node_tl;
+ cur_ctb_cu_tree_t *ps_tr = ps_tree_root->ps_child_node_tr;
+ cur_ctb_cu_tree_t *ps_bl = ps_tree_root->ps_child_node_bl;
+ cur_ctb_cu_tree_t *ps_br = ps_tree_root->ps_child_node_br;
+
+ hme_populate_cu_tree(
+ ps_ctb_cluster_info, ps_tl, 1, e_quality_preset, POS_NA, POS_NA, POS_TL);
+
+ num_32x32_merges += (ps_tl->is_node_valid == 1);
+
+ hme_populate_cu_tree(
+ ps_ctb_cluster_info, ps_tr, 1, e_quality_preset, POS_NA, POS_NA, POS_TR);
+
+ num_32x32_merges += (ps_tr->is_node_valid == 1);
+
+ hme_populate_cu_tree(
+ ps_ctb_cluster_info, ps_bl, 1, e_quality_preset, POS_NA, POS_NA, POS_BL);
+
+ num_32x32_merges += (ps_bl->is_node_valid == 1);
+
+ hme_populate_cu_tree(
+ ps_ctb_cluster_info, ps_br, 1, e_quality_preset, POS_NA, POS_NA, POS_BR);
+
+ num_32x32_merges += (ps_br->is_node_valid == 1);
+ }
+
+#if !ENABLE_4CTB_EVALUATION
+ if(e_quality_preset < ME_HIGH_QUALITY)
+ {
+ enable_64x64_merge = (num_32x32_merges >= 3);
+ }
+#else
+ if(e_quality_preset < ME_HIGH_QUALITY)
+ {
+ enable_64x64_merge = 1;
+ }
+#endif
+
+#if 1 //!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
+ if(e_quality_preset >= ME_HIGH_QUALITY)
+ {
+ enable_64x64_merge = 1;
+ }
+#else
+ if(e_quality_preset >= ME_HIGH_QUALITY)
+ {
+ enable_64x64_merge = (num_32x32_merges >= 3);
+ }
+#endif
+
+ if(enable_64x64_merge)
+ {
+ S32 num_clusters_merged;
+
+ ps_blk_32x32 = &ps_ctb_cluster_info->ps_32x32_blk[0];
+
+#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
+ if(e_quality_preset >= ME_HIGH_QUALITY)
+ {
+ for(j = 0; j < 4; j++, ps_blk_32x32++)
+ {
+ ps_blk_64x64->intra_mv_area += ps_blk_32x32->intra_mv_area;
+
+ ps_blk_64x64->best_inter_cost += ps_blk_32x32->best_inter_cost;
+ }
+ }
+ else
+#endif
+ {
+ hme_update_64x64_clusters(ps_blk_64x64, ps_blk_32x32);
+
+ if((ps_blk_64x64->num_clusters >= MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK))
+ {
+ num_clusters_merged = hme_try_merge_clusters_blksize_gt_16(
+ ps_blk_64x64->as_cluster_data, (ps_blk_64x64->num_clusters));
+
+ if(num_clusters_merged)
+ {
+ ps_blk_64x64->num_clusters -= num_clusters_merged;
+
+ UPDATE_CLUSTER_METADATA_POST_MERGE(ps_blk_64x64);
+ }
+ }
+ }
+
+#if !ENABLE_4CTB_EVALUATION
+ if(e_quality_preset < ME_HIGH_QUALITY)
+ {
+ S32 best_inter_cost = ps_blk_64x64->best_inter_cost;
+ S32 best_intra_cost =
+ ((ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
+ ps_ctb_cluster_info->i4_frame_qstep *
+ ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16) < 0)
+ ? MAX_32BIT_VAL
+ : (ps_ctb_cluster_info->ps_cur_ipe_ctb->i4_best64x64_intra_cost +
+ ps_ctb_cluster_info->i4_frame_qstep *
+ ps_ctb_cluster_info->i4_frame_qstep_multiplier * 16);
+ S32 best_cost = (best_inter_cost > best_intra_cost) ? best_intra_cost : best_inter_cost;
+ S32 cost_differential = (best_inter_cost - best_cost);
+
+ enable_64x64_merge =
+ ((ALL_INTER_COST_DIFF_THR * best_cost) >= (100 * cost_differential));
+ }
+#endif
+ }
+
+ if(enable_64x64_merge)
+ {
+#if !USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS
+ if(e_quality_preset < ME_HIGH_QUALITY)
+#endif
+ {
+ hme_boot_out_outlier(ps_ctb_cluster_info, 64);
+
+ hme_find_top_ref_ids(ps_ctb_cluster_info, bidir_enabled, 64);
+ }
+
+ hme_populate_cu_tree(
+ ps_ctb_cluster_info,
+ ps_ctb_cluster_info->ps_cu_tree_root,
+ 0,
+ e_quality_preset,
+ POS_NA,
+ POS_NA,
+ POS_NA);
+ }
+}
+#endif
+
+static __inline void hme_merge_prms_init(
+ hme_merge_prms_t *ps_prms,
+ layer_ctxt_t *ps_curr_layer,
+ refine_prms_t *ps_refine_prms,
+ me_frm_ctxt_t *ps_me_ctxt,
+ range_prms_t *ps_range_prms_rec,
+ range_prms_t *ps_range_prms_inp,
+ mv_grid_t **pps_mv_grid,
+ inter_ctb_prms_t *ps_inter_ctb_prms,
+ S32 i4_num_pred_dir,
+ S32 i4_32x32_id,
+ BLK_SIZE_T e_blk_size,
+ ME_QUALITY_PRESETS_T e_me_quality_presets)
+{
+ S32 i4_use_rec = ps_refine_prms->i4_use_rec_in_fpel;
+ S32 i4_cu_16x16 = (BLK_32x32 == e_blk_size) ? (i4_32x32_id << 2) : 0;
+
+ /* Currently not enabling segmentation info from prev layers */
+ ps_prms->i4_seg_info_avail = 0;
+ ps_prms->i4_part_mask = 0;
+
+ /* Number of reference pics in which to do merge */
+ ps_prms->i4_num_ref = i4_num_pred_dir;
+
+ /* Layer ctxt info */
+ ps_prms->ps_layer_ctxt = ps_curr_layer;
+
+ ps_prms->ps_inter_ctb_prms = ps_inter_ctb_prms;
+
+ /* Top left, top right, bottom left and bottom right 16x16 units */
+ if(BLK_32x32 == e_blk_size)
+ {
+ ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16];
+ ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 1];
+ ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 2];
+ ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_16x16[i4_cu_16x16 + 3];
+
+ /* Merge results stored here */
+ ps_prms->ps_results_merge = &ps_me_ctxt->as_search_results_32x32[i4_32x32_id];
+
+ /* This could be lesser than the number of 16x16results generated*/
+ /* For now, keeping it to be same */
+ ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_fpel_results;
+ ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[i4_32x32_id << 4];
+ ps_prms->ps_results_grandchild = NULL;
+ }
+ else
+ {
+ ps_prms->ps_results_tl = &ps_me_ctxt->as_search_results_32x32[0];
+ ps_prms->ps_results_tr = &ps_me_ctxt->as_search_results_32x32[1];
+ ps_prms->ps_results_bl = &ps_me_ctxt->as_search_results_32x32[2];
+ ps_prms->ps_results_br = &ps_me_ctxt->as_search_results_32x32[3];
+
+ /* Merge results stored here */
+ ps_prms->ps_results_merge = &ps_me_ctxt->s_search_results_64x64;
+
+ ps_prms->i4_num_inp_results = ps_refine_prms->i4_num_32x32_merge_results;
+ ps_prms->ps_8x8_cu_results = &ps_me_ctxt->as_cu8x8_results[0];
+ ps_prms->ps_results_grandchild = ps_me_ctxt->as_search_results_16x16;
+ }
+
+ if(i4_use_rec)
+ {
+ WORD32 ref_ctr;
+
+ for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
+ {
+ ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_rec[ref_ctr];
+ }
+ }
+ else
+ {
+ WORD32 ref_ctr;
+
+ for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
+ {
+ ps_prms->aps_mv_range[ref_ctr] = &ps_range_prms_inp[ref_ctr];
+ }
+ }
+ ps_prms->i4_use_rec = i4_use_rec;
+
+ ps_prms->pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
+
+ ps_prms->pps_mv_grid = pps_mv_grid;
+
+ ps_prms->log_ctb_size = ps_me_ctxt->log_ctb_size;
+
+ ps_prms->e_quality_preset = e_me_quality_presets;
+ ps_prms->pi1_future_list = ps_me_ctxt->ai1_future_list;
+ ps_prms->pi1_past_list = ps_me_ctxt->ai1_past_list;
+ ps_prms->ps_cluster_info = ps_me_ctxt->ps_ctb_cluster_info;
+}
+
+/**
+********************************************************************************
+* @fn void hme_refine(me_ctxt_t *ps_ctxt,
+* refine_layer_prms_t *ps_refine_prms)
+*
+* @brief Top level entry point for refinement ME
+*
+* @param[in,out] ps_ctxt: ME Handle
+*
+* @param[in] ps_refine_prms : refinement layer prms
+*
+* @return None
+********************************************************************************
+*/
+void hme_refine(
+ me_ctxt_t *ps_thrd_ctxt,
+ refine_prms_t *ps_refine_prms,
+ PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,
+ layer_ctxt_t *ps_coarse_layer,
+ multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
+ S32 lyr_job_type,
+ S32 thrd_id,
+ S32 me_frm_id,
+ pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input)
+{
+ inter_ctb_prms_t s_common_frm_prms;
+
+ BLK_SIZE_T e_search_blk_size, e_result_blk_size;
+ WORD32 i4_me_frm_id = me_frm_id % MAX_NUM_ME_PARALLEL;
+ me_frm_ctxt_t *ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
+ ME_QUALITY_PRESETS_T e_me_quality_presets =
+ ps_thrd_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
+
+ WORD32 num_rows_proc = 0;
+ WORD32 num_act_ref_pics;
+ WORD16 i2_prev_enc_frm_max_mv_y;
+ WORD32 i4_idx_dvsr_p = ps_multi_thrd_ctxt->i4_idx_dvsr_p;
+
+ /*************************************************************************/
+ /* Complexity of search: Low to High */
+ /*************************************************************************/
+ SEARCH_COMPLEXITY_T e_search_complexity;
+
+ /*************************************************************************/
+ /* to store the PU results which are passed to the decide_part_types */
+ /* as input prms. Multiplied by 4 as the max number of Ref in a List is 4*/
+ /*************************************************************************/
+
+ pu_result_t as_pu_results[2][TOT_NUM_PARTS][MAX_NUM_RESULTS_PER_PART_LIST];
+ inter_pu_results_t as_inter_pu_results[4];
+ inter_pu_results_t *ps_pu_results = as_inter_pu_results;
+
+ /*************************************************************************/
+ /* Config parameter structures for varius ME submodules */
+ /*************************************************************************/
+ hme_merge_prms_t s_merge_prms_32x32_tl, s_merge_prms_32x32_tr;
+ hme_merge_prms_t s_merge_prms_32x32_bl, s_merge_prms_32x32_br;
+ hme_merge_prms_t s_merge_prms_64x64;
+ hme_search_prms_t s_search_prms_blk;
+ mvbank_update_prms_t s_mv_update_prms;
+ hme_ctb_prms_t s_ctb_prms;
+ hme_subpel_prms_t s_subpel_prms;
+ fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_ctxt->ps_fullpel_refine_ctxt;
+ ctb_cluster_info_t *ps_ctb_cluster_info;
+ fpel_srch_cand_init_data_t s_srch_cand_init_data;
+
+ /* 4 bits (LSBs) of this variable control merge of 4 32x32 CUs in CTB */
+ S32 en_merge_32x32;
+ /* 5 lsb's specify whether or not merge algorithm is required */
+ /* to be executed or not. Relevant only in PQ. Ought to be */
+ /* used in conjunction with en_merge_32x32 and */
+ /* ps_ctb_bound_attrs->u1_merge_to_64x64_flag. This is */
+ /* required when all children are deemed to be intras */
+ S32 en_merge_execution;
+
+ /*************************************************************************/
+ /* All types of search candidates for predictor based search. */
+ /*************************************************************************/
+ S32 num_init_candts = 0;
+ S32 i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
+ S32 i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
+ search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
+ search_node_t as_top_neighbours[4], as_left_neighbours[3];
+
+ pf_get_wt_inp fp_get_wt_inp;
+
+ search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
+ U32 au4_unique_node_map[MAP_X_MAX * 2];
+
+ /* Controls the boundary attributes of CTB, whether it has 64x64 or not */
+ ctb_boundary_attrs_t *ps_ctb_bound_attrs;
+
+ /*************************************************************************/
+ /* points ot the search results for the blk level search (8x8/16x16) */
+ /*************************************************************************/
+ search_results_t *ps_search_results;
+
+ /*************************************************************************/
+ /* Coordinates */
+ /*************************************************************************/
+ S32 blk_x, blk_y, i4_ctb_x, i4_ctb_y, tile_col_idx, blk_id_in_ctb;
+ S32 pos_x, pos_y;
+ S32 blk_id_in_full_ctb;
+
+ /*************************************************************************/
+ /* Related to dimensions of block being searched and pic dimensions */
+ /*************************************************************************/
+ S32 blk_4x4_to_16x16;
+ S32 blk_wd, blk_ht, blk_size_shift;
+ S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
+ S32 num_results_prev_layer;
+
+ /*************************************************************************/
+ /* Size of a basic unit for this layer. For non encode layers, we search */
+ /* in block sizes of 8x8. For encode layers, though we search 16x16s the */
+ /* basic unit size is the ctb size. */
+ /*************************************************************************/
+ S32 unit_size;
+
+ /*************************************************************************/
+ /* Local variable storing results of any 4 CU merge to bigger CU */
+ /*************************************************************************/
+ CU_MERGE_RESULT_T e_merge_result;
+
+ /*************************************************************************/
+ /* This mv grid stores results during and after fpel search, during */
+ /* merge, subpel and bidirect refinements stages. 2 instances of this are*/
+ /* meant for the 2 directions of search (l0 and l1). */
+ /*************************************************************************/
+ mv_grid_t *aps_mv_grid[2];
+
+ /*************************************************************************/
+ /* Pointers to context in current and coarser layers */
+ /*************************************************************************/
+ layer_ctxt_t *ps_curr_layer, *ps_prev_layer;
+
+ /*************************************************************************/
+ /* to store mv range per blk, and picture limit, allowed search range */
+ /* range prms in hpel and qpel units as well */
+ /*************************************************************************/
+ range_prms_t as_range_prms_inp[MAX_NUM_REF], as_range_prms_rec[MAX_NUM_REF];
+ range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
+ range_prms_t as_range_prms_hpel[MAX_NUM_REF], as_range_prms_qpel[MAX_NUM_REF];
+
+ /*************************************************************************/
+ /* These variables are used to track number of references at different */
+ /* stages of ME. */
+ /*************************************************************************/
+ S32 i4_num_pred_dir;
+ S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
+ S32 lambda_recon = ps_refine_prms->lambda_recon;
+
+ /* Counts successful merge to 32x32 every CTB (0-4) */
+ S32 merge_count_32x32;
+
+ S32 ai4_id_coloc[14], ai4_id_Z[2];
+ U08 au1_search_candidate_list_index[2];
+ S32 ai4_num_coloc_cands[2];
+ U08 u1_pred_dir, u1_pred_dir_ctr;
+
+ /*************************************************************************/
+ /* Input pointer and stride */
+ /*************************************************************************/
+ U08 *pu1_inp;
+ S32 i4_inp_stride;
+ S32 end_of_frame;
+ S32 num_sync_units_in_row, num_sync_units_in_tile;
+
+ /*************************************************************************/
+ /* Indicates whether the all 4 8x8 blks are valid in the 16x16 blk in the*/
+ /* encode layer. If not 15, then 1 or more 8x8 blks not valid. Means that*/
+ /* we need to stop merges and force 8x8 CUs for that 16x16 blk */
+ /*************************************************************************/
+ S32 blk_8x8_mask;
+ S32 ai4_blk_8x8_mask[16];
+ U08 au1_is_64x64Blk_noisy[1];
+ U08 au1_is_32x32Blk_noisy[4];
+ U08 au1_is_16x16Blk_noisy[16];
+
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
+ ps_thrd_ctxt->ps_cmn_utils_optimised_function_list;
+ ihevce_me_optimised_function_list_t *ps_me_optimised_function_list =
+ ((ihevce_me_optimised_function_list_t *)ps_thrd_ctxt->pv_me_optimised_function_list);
+
+ ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);
+
+ /*************************************************************************/
+ /* Pointers to current and coarse layer are needed for projection */
+ /* Pointer to prev layer are needed for other candts like coloc */
+ /*************************************************************************/
+ ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];
+
+ ps_prev_layer = hme_get_past_layer_ctxt(
+ ps_thrd_ctxt, ps_ctxt, ps_refine_prms->i4_layer_id, ps_multi_thrd_ctxt->i4_num_me_frm_pllel);
+
+ num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
+
+ /* Function pointer is selected based on the C vc X86 macro */
+
+ fp_get_wt_inp = ps_me_optimised_function_list->pf_get_wt_inp_ctb;
+
+ i4_inp_stride = ps_curr_layer->i4_inp_stride;
+ i4_pic_wd = ps_curr_layer->i4_wd;
+ i4_pic_ht = ps_curr_layer->i4_ht;
+ e_search_complexity = ps_refine_prms->e_search_complexity;
+ end_of_frame = 0;
+
+ /* This points to all the initial candts */
+ ps_search_candts = &as_search_candts[0];
+
+ /* mv grid being huge strucutre is part of context */
+ aps_mv_grid[0] = &ps_ctxt->as_mv_grid[0];
+ aps_mv_grid[1] = &ps_ctxt->as_mv_grid[1];
+
+ /*************************************************************************/
+ /* If the current layer is encoded (since it may be multicast or final */
+ /* layer (finest)), then we use 16x16 blk size with some selected parts */
+ /* If the current layer is not encoded, then we use 8x8 blk size, with */
+ /* enable or disable of 4x4 partitions depending on the input prms */
+ /*************************************************************************/
+ e_search_blk_size = BLK_16x16;
+ blk_wd = blk_ht = 16;
+ blk_size_shift = 4;
+ e_result_blk_size = BLK_8x8;
+ s_mv_update_prms.i4_shift = 1;
+
+ if(ps_coarse_layer->ps_layer_mvbank->e_blk_size == BLK_4x4)
+ {
+ blk_4x4_to_16x16 = 1;
+ }
+ else
+ {
+ blk_4x4_to_16x16 = 0;
+ }
+
+ unit_size = 1 << ps_ctxt->log_ctb_size;
+ s_search_prms_blk.i4_inp_stride = unit_size;
+
+ /* This is required to properly update the layer mv bank */
+ s_mv_update_prms.e_search_blk_size = e_search_blk_size;
+ s_search_prms_blk.e_blk_size = e_search_blk_size;
+
+ /*************************************************************************/
+ /* If current layer is explicit, then the number of ref frames are to */
+ /* be same as previous layer. Else it will be 2 */
+ /*************************************************************************/
+ i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
+ i4_num_pred_dir =
+ (ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 0) && (i4_num_act_ref_l1 > 0)) +
+ 1;
+
+#if USE_MODIFIED == 1
+ s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
+#else
+ s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
+#endif
+
+ i4_num_pred_dir = MIN(i4_num_pred_dir, i4_num_ref_prev_layer);
+ if(i4_num_ref_prev_layer <= 2)
+ {
+ i4_num_ref_each_dir = 1;
+ }
+ else
+ {
+ i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
+ }
+
+ s_mv_update_prms.i4_num_ref = i4_num_pred_dir;
+ s_mv_update_prms.i4_num_results_to_store =
+ MIN((ps_ctxt->s_frm_prms.bidir_enabled) ? ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref
+ : (i4_num_act_ref_l0 > 1) + 1,
+ ps_refine_prms->i4_num_results_per_part);
+
+ /*************************************************************************/
+ /* Initialization of merge params for 16x16 to 32x32 merge. */
+ /* There are 4 32x32 units in a CTB, so 4 param structures initialized */
+ /*************************************************************************/
+ {
+ hme_merge_prms_t *aps_merge_prms[4];
+ aps_merge_prms[0] = &s_merge_prms_32x32_tl;
+ aps_merge_prms[1] = &s_merge_prms_32x32_tr;
+ aps_merge_prms[2] = &s_merge_prms_32x32_bl;
+ aps_merge_prms[3] = &s_merge_prms_32x32_br;
+ for(i = 0; i < 4; i++)
+ {
+ hme_merge_prms_init(
+ aps_merge_prms[i],
+ ps_curr_layer,
+ ps_refine_prms,
+ ps_ctxt,
+ as_range_prms_rec,
+ as_range_prms_inp,
+ &aps_mv_grid[0],
+ &s_common_frm_prms,
+ i4_num_pred_dir,
+ i,
+ BLK_32x32,
+ e_me_quality_presets);
+ }
+ }
+
+ /*************************************************************************/
+ /* Initialization of merge params for 32x32 to 64x64 merge. */
+ /* There are 4 32x32 units in a CTB, so only 1 64x64 CU can be in CTB */
+ /*************************************************************************/
+ {
+ hme_merge_prms_init(
+ &s_merge_prms_64x64,
+ ps_curr_layer,
+ ps_refine_prms,
+ ps_ctxt,
+ as_range_prms_rec,
+ as_range_prms_inp,
+ &aps_mv_grid[0],
+ &s_common_frm_prms,
+ i4_num_pred_dir,
+ 0,
+ BLK_64x64,
+ e_me_quality_presets);
+ }
+
+ /* Pointers to cu_results are initialised here */
+ {
+ WORD32 i;
+
+ ps_ctxt->s_search_results_64x64.ps_cu_results = &ps_ctxt->s_cu64x64_results;
+
+ for(i = 0; i < 4; i++)
+ {
+ ps_ctxt->as_search_results_32x32[i].ps_cu_results = &ps_ctxt->as_cu32x32_results[i];
+ }
+
+ for(i = 0; i < 16; i++)
+ {
+ ps_ctxt->as_search_results_16x16[i].ps_cu_results = &ps_ctxt->as_cu16x16_results[i];
+ }
+ }
+
+ /*************************************************************************/
+ /* SUBPEL Params initialized here */
+ /*************************************************************************/
+ {
+ s_subpel_prms.ps_search_results_16x16 = &ps_ctxt->as_search_results_16x16[0];
+ s_subpel_prms.ps_search_results_32x32 = &ps_ctxt->as_search_results_32x32[0];
+ s_subpel_prms.ps_search_results_64x64 = &ps_ctxt->s_search_results_64x64;
+
+ s_subpel_prms.i4_num_16x16_candts = ps_refine_prms->i4_num_fpel_results;
+ s_subpel_prms.i4_num_32x32_candts = ps_refine_prms->i4_num_32x32_merge_results;
+ s_subpel_prms.i4_num_64x64_candts = ps_refine_prms->i4_num_64x64_merge_results;
+
+ s_subpel_prms.i4_num_steps_hpel_refine = ps_refine_prms->i4_num_steps_hpel_refine;
+ s_subpel_prms.i4_num_steps_qpel_refine = ps_refine_prms->i4_num_steps_qpel_refine;
+
+ s_subpel_prms.i4_use_satd = ps_refine_prms->i4_use_satd_subpel;
+
+ s_subpel_prms.i4_inp_stride = unit_size;
+
+ s_subpel_prms.u1_max_subpel_candts_2Nx2N = ps_refine_prms->u1_max_subpel_candts_2Nx2N;
+ s_subpel_prms.u1_max_subpel_candts_NxN = ps_refine_prms->u1_max_subpel_candts_NxN;
+ s_subpel_prms.u1_subpel_candt_threshold = ps_refine_prms->u1_subpel_candt_threshold;
+
+ s_subpel_prms.pf_qpel_interp = ps_me_optimised_function_list->pf_qpel_interp_avg_generic;
+
+ {
+ WORD32 ref_ctr;
+ for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
+ {
+ s_subpel_prms.aps_mv_range_hpel[ref_ctr] = &as_range_prms_hpel[ref_ctr];
+ s_subpel_prms.aps_mv_range_qpel[ref_ctr] = &as_range_prms_qpel[ref_ctr];
+ }
+ }
+ s_subpel_prms.pi2_inp_bck = ps_ctxt->pi2_inp_bck;
+
+#if USE_MODIFIED == 0
+ s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
+#else
+ s_subpel_prms.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
+#endif
+ s_subpel_prms.e_me_quality_presets = e_me_quality_presets;
+
+ /* BI Refinement done only if this field is 1 */
+ s_subpel_prms.bidir_enabled = ps_refine_prms->bidir_enabled;
+
+ s_subpel_prms.u1_num_ref = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;
+
+ s_subpel_prms.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
+ s_subpel_prms.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
+ s_subpel_prms.u1_max_num_subpel_refine_centers =
+ ps_refine_prms->u1_max_num_subpel_refine_centers;
+ }
+
+ /* inter_ctb_prms_t struct initialisation */
+ {
+ inter_ctb_prms_t *ps_inter_ctb_prms = &s_common_frm_prms;
+ hme_subpel_prms_t *ps_subpel_prms = &s_subpel_prms;
+
+ ps_inter_ctb_prms->pps_rec_list_l0 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l0;
+ ps_inter_ctb_prms->pps_rec_list_l1 = ps_ctxt->ps_hme_ref_map->pps_rec_list_l1;
+ ps_inter_ctb_prms->wpred_log_wdc = ps_ctxt->s_wt_pred.wpred_log_wdc;
+ ps_inter_ctb_prms->u1_max_tr_depth = ps_thrd_ctxt->s_init_prms.u1_max_tr_depth;
+ ps_inter_ctb_prms->i1_quality_preset = e_me_quality_presets;
+ ps_inter_ctb_prms->i4_bidir_enabled = ps_subpel_prms->bidir_enabled;
+ ps_inter_ctb_prms->i4_inp_stride = ps_subpel_prms->i4_inp_stride;
+ ps_inter_ctb_prms->u1_num_ref = ps_subpel_prms->u1_num_ref;
+ ps_inter_ctb_prms->u1_use_satd = ps_subpel_prms->i4_use_satd;
+ ps_inter_ctb_prms->i4_rec_stride = ps_curr_layer->i4_rec_stride;
+ ps_inter_ctb_prms->u1_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
+ ps_inter_ctb_prms->u1_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
+ ps_inter_ctb_prms->i4_lamda = lambda_recon;
+ ps_inter_ctb_prms->u1_lamda_qshift = ps_refine_prms->lambda_q_shift;
+ ps_inter_ctb_prms->i4_qstep_ls8 = ps_ctxt->ps_hme_frm_prms->qstep_ls8;
+ ps_inter_ctb_prms->pi4_inv_wt = ps_ctxt->s_wt_pred.a_inv_wpred_wt;
+ ps_inter_ctb_prms->pi1_past_list = ps_ctxt->ai1_past_list;
+ ps_inter_ctb_prms->pi1_future_list = ps_ctxt->ai1_future_list;
+ ps_inter_ctb_prms->pu4_src_variance = s_search_prms_blk.au4_src_variance;
+ ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands =
+ ps_refine_prms->u1_max_2nx2n_tu_recur_cands;
+ }
+
+ for(i = 0; i < MAX_INIT_CANDTS; i++)
+ {
+ ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
+ ps_search_candts[i].ps_search_node->ps_mv = &ps_ctxt->as_search_cand_mv[i];
+
+ INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
+ }
+ num_act_ref_pics =
+ ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
+
+ if(num_act_ref_pics)
+ {
+ hme_search_cand_data_init(
+ ai4_id_Z,
+ ai4_id_coloc,
+ ai4_num_coloc_cands,
+ au1_search_candidate_list_index,
+ i4_num_act_ref_l0,
+ i4_num_act_ref_l1,
+ ps_ctxt->s_frm_prms.bidir_enabled,
+ blk_4x4_to_16x16);
+ }
+
+ if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 > 1))
+ {
+ ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
+ ps_search_candts[ai4_id_Z[1]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[1];
+ }
+ else if(!ps_ctxt->s_frm_prms.bidir_enabled && (i4_num_act_ref_l0 == 1))
+ {
+ ps_search_candts[ai4_id_Z[0]].ps_search_node->i1_ref_idx = ps_ctxt->ai1_past_list[0];
+ }
+
+ for(i = 0; i < 3; i++)
+ {
+ search_node_t *ps_search_node;
+ ps_search_node = &as_left_neighbours[i];
+ INIT_SEARCH_NODE(ps_search_node, 0);
+ ps_search_node = &as_top_neighbours[i];
+ INIT_SEARCH_NODE(ps_search_node, 0);
+ }
+
+ INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
+ as_left_neighbours[2].u1_is_avail = 0;
+
+ /*************************************************************************/
+ /* Initialize all the search results structure here. We update all the */
+ /* search results to default values, and configure things like blk sizes */
+ /*************************************************************************/
+ if(num_act_ref_pics)
+ {
+ S32 i4_x, i4_y;
+ /* 16x16 results */
+ for(i = 0; i < 16; i++)
+ {
+ search_results_t *ps_search_results;
+ S32 pred_lx;
+ ps_search_results = &ps_ctxt->as_search_results_16x16[i];
+ i4_x = (S32)gau1_encode_to_raster_x[i];
+ i4_y = (S32)gau1_encode_to_raster_y[i];
+ i4_x <<= 4;
+ i4_y <<= 4;
+
+ hme_init_search_results(
+ ps_search_results,
+ i4_num_pred_dir,
+ ps_refine_prms->i4_num_fpel_results,
+ ps_refine_prms->i4_num_results_per_part,
+ e_search_blk_size,
+ i4_x,
+ i4_y,
+ &ps_ctxt->au1_is_past[0]);
+
+ for(pred_lx = 0; pred_lx < 2; pred_lx++)
+ {
+ pred_ctxt_t *ps_pred_ctxt;
+
+ ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
+
+ hme_init_pred_ctxt_encode(
+ ps_pred_ctxt,
+ ps_search_results,
+ ps_search_candts[ai4_id_coloc[0]].ps_search_node,
+ ps_search_candts[ai4_id_Z[0]].ps_search_node,
+ aps_mv_grid[pred_lx],
+ pred_lx,
+ lambda_recon,
+ ps_refine_prms->lambda_q_shift,
+ &ps_ctxt->apu1_ref_bits_tlu_lc[0],
+ &ps_ctxt->ai2_ref_scf[0]);
+ }
+ }
+
+ for(i = 0; i < 4; i++)
+ {
+ search_results_t *ps_search_results;
+ S32 pred_lx;
+ ps_search_results = &ps_ctxt->as_search_results_32x32[i];
+
+ i4_x = (S32)gau1_encode_to_raster_x[i];
+ i4_y = (S32)gau1_encode_to_raster_y[i];
+ i4_x <<= 5;
+ i4_y <<= 5;
+
+ hme_init_search_results(
+ ps_search_results,
+ i4_num_pred_dir,
+ ps_refine_prms->i4_num_32x32_merge_results,
+ ps_refine_prms->i4_num_results_per_part,
+ BLK_32x32,
+ i4_x,
+ i4_y,
+ &ps_ctxt->au1_is_past[0]);
+
+ for(pred_lx = 0; pred_lx < 2; pred_lx++)
+ {
+ pred_ctxt_t *ps_pred_ctxt;
+
+ ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
+
+ hme_init_pred_ctxt_encode(
+ ps_pred_ctxt,
+ ps_search_results,
+ ps_search_candts[ai4_id_coloc[0]].ps_search_node,
+ ps_search_candts[ai4_id_Z[0]].ps_search_node,
+ aps_mv_grid[pred_lx],
+ pred_lx,
+ lambda_recon,
+ ps_refine_prms->lambda_q_shift,
+ &ps_ctxt->apu1_ref_bits_tlu_lc[0],
+ &ps_ctxt->ai2_ref_scf[0]);
+ }
+ }
+
+ {
+ search_results_t *ps_search_results;
+ S32 pred_lx;
+ ps_search_results = &ps_ctxt->s_search_results_64x64;
+
+ hme_init_search_results(
+ ps_search_results,
+ i4_num_pred_dir,
+ ps_refine_prms->i4_num_64x64_merge_results,
+ ps_refine_prms->i4_num_results_per_part,
+ BLK_64x64,
+ 0,
+ 0,
+ &ps_ctxt->au1_is_past[0]);
+
+ for(pred_lx = 0; pred_lx < 2; pred_lx++)
+ {
+ pred_ctxt_t *ps_pred_ctxt;
+
+ ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
+
+ hme_init_pred_ctxt_encode(
+ ps_pred_ctxt,
+ ps_search_results,
+ ps_search_candts[ai4_id_coloc[0]].ps_search_node,
+ ps_search_candts[ai4_id_Z[0]].ps_search_node,
+ aps_mv_grid[pred_lx],
+ pred_lx,
+ lambda_recon,
+ ps_refine_prms->lambda_q_shift,
+ &ps_ctxt->apu1_ref_bits_tlu_lc[0],
+ &ps_ctxt->ai2_ref_scf[0]);
+ }
+ }
+ }
+
+ /* Initialise the structure used in clustering */
+ if(ME_PRISTINE_QUALITY == e_me_quality_presets)
+ {
+ ps_ctb_cluster_info = ps_ctxt->ps_ctb_cluster_info;
+
+ ps_ctb_cluster_info->ps_16x16_blk = ps_ctxt->ps_blk_16x16;
+ ps_ctb_cluster_info->ps_32x32_blk = ps_ctxt->ps_blk_32x32;
+ ps_ctb_cluster_info->ps_64x64_blk = ps_ctxt->ps_blk_64x64;
+ ps_ctb_cluster_info->pi4_blk_8x8_mask = ai4_blk_8x8_mask;
+ ps_ctb_cluster_info->sdi_threshold = ps_refine_prms->sdi_threshold;
+ ps_ctb_cluster_info->i4_frame_qstep = ps_ctxt->frm_qstep;
+ ps_ctb_cluster_info->i4_frame_qstep_multiplier = 16;
+ }
+
+ /*********************************************************************/
+ /* Initialize the dyn. search range params. for each reference index */
+ /* in current layer ctxt */
+ /*********************************************************************/
+
+ /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
+ if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
+ {
+ WORD32 ref_ctr;
+ /* set no. of act ref in L0 for further use at frame level */
+ ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_num_act_ref_in_l0 =
+ ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
+
+ for(ref_ctr = 0; ref_ctr < ps_ctxt->s_frm_prms.u1_num_active_ref_l0; ref_ctr++)
+ {
+ INIT_DYN_SEARCH_PRMS(
+ &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[ref_ctr],
+ ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
+ }
+ }
+ /*************************************************************************/
+ /* Now that the candidates have been ordered, to choose the right number */
+ /* of initial candidates. */
+ /*************************************************************************/
+ if(blk_4x4_to_16x16)
+ {
+ if(i4_num_ref_prev_layer > 2)
+ {
+ if(e_search_complexity == SEARCH_CX_LOW)
+ num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
+ else if(e_search_complexity == SEARCH_CX_MED)
+ num_init_candts = 14 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
+ else if(e_search_complexity == SEARCH_CX_HIGH)
+ num_init_candts = 21 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
+ else
+ ASSERT(0);
+ }
+ else if(i4_num_ref_prev_layer == 2)
+ {
+ if(e_search_complexity == SEARCH_CX_LOW)
+ num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
+ else if(e_search_complexity == SEARCH_CX_MED)
+ num_init_candts = 12 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
+ else if(e_search_complexity == SEARCH_CX_HIGH)
+ num_init_candts = 19 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
+ else
+ ASSERT(0);
+ }
+ else
+ {
+ if(e_search_complexity == SEARCH_CX_LOW)
+ num_init_candts = 5;
+ else if(e_search_complexity == SEARCH_CX_MED)
+ num_init_candts = 12;
+ else if(e_search_complexity == SEARCH_CX_HIGH)
+ num_init_candts = 19;
+ else
+ ASSERT(0);
+ }
+ }
+ else
+ {
+ if(i4_num_ref_prev_layer > 2)
+ {
+ if(e_search_complexity == SEARCH_CX_LOW)
+ num_init_candts = 7 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
+ else if(e_search_complexity == SEARCH_CX_MED)
+ num_init_candts = 13 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
+ else if(e_search_complexity == SEARCH_CX_HIGH)
+ num_init_candts = 18 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
+ else
+ ASSERT(0);
+ }
+ else if(i4_num_ref_prev_layer == 2)
+ {
+ if(e_search_complexity == SEARCH_CX_LOW)
+ num_init_candts = 5 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
+ else if(e_search_complexity == SEARCH_CX_MED)
+ num_init_candts = 11 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
+ else if(e_search_complexity == SEARCH_CX_HIGH)
+ num_init_candts = 16 * (!ps_ctxt->s_frm_prms.bidir_enabled + 1);
+ else
+ ASSERT(0);
+ }
+ else
+ {
+ if(e_search_complexity == SEARCH_CX_LOW)
+ num_init_candts = 5;
+ else if(e_search_complexity == SEARCH_CX_MED)
+ num_init_candts = 11;
+ else if(e_search_complexity == SEARCH_CX_HIGH)
+ num_init_candts = 16;
+ else
+ ASSERT(0);
+ }
+ }
+
+ /*************************************************************************/
+ /* The following search parameters are fixed throughout the search across*/
+ /* all blks. So these are configured outside processing loop */
+ /*************************************************************************/
+ s_search_prms_blk.i4_num_init_candts = num_init_candts;
+ s_search_prms_blk.i4_start_step = 1;
+ s_search_prms_blk.i4_use_satd = 0;
+ s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
+ /* we use recon only for encoded layers, otherwise it is not available */
+ s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;
+
+ s_search_prms_blk.ps_search_candts = ps_search_candts;
+ if(s_search_prms_blk.i4_use_rec)
+ {
+ WORD32 ref_ctr;
+ for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
+ s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_rec[ref_ctr];
+ }
+ else
+ {
+ WORD32 ref_ctr;
+ for(ref_ctr = 0; ref_ctr < MAX_NUM_REF; ref_ctr++)
+ s_search_prms_blk.aps_mv_range[ref_ctr] = &as_range_prms_inp[ref_ctr];
+ }
+
+ /*************************************************************************/
+ /* Initialize coordinates. Meaning as follows */
+ /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks */
+ /* blk_y : same as above, y coord. */
+ /* num_blks_in_this_ctb : number of blks in this given ctb that starts */
+ /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries. */
+ /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left */
+ /* corner of the picture. Always multiple of 64. */
+ /* blk_id_in_ctb : encode order id of the blk in the ctb. */
+ /*************************************************************************/
+ blk_y = 0;
+ blk_id_in_ctb = 0;
+ i4_ctb_y = 0;
+
+ /*************************************************************************/
+ /* Picture limit on all 4 sides. This will be used to set mv limits for */
+ /* every block given its coordinate. Note thsi assumes that the min amt */
+ /* of padding to right of pic is equal to the blk size. If we go all the */
+ /* way upto 64x64, then the min padding on right size of picture should */
+ /* be 64, and also on bottom side of picture. */
+ /*************************************************************************/
+ SET_PIC_LIMIT(
+ s_pic_limit_inp,
+ ps_curr_layer->i4_pad_x_rec,
+ ps_curr_layer->i4_pad_y_rec,
+ ps_curr_layer->i4_wd,
+ ps_curr_layer->i4_ht,
+ s_search_prms_blk.i4_num_steps_post_refine);
+
+ SET_PIC_LIMIT(
+ s_pic_limit_rec,
+ ps_curr_layer->i4_pad_x_rec,
+ ps_curr_layer->i4_pad_y_rec,
+ ps_curr_layer->i4_wd,
+ ps_curr_layer->i4_ht,
+ s_search_prms_blk.i4_num_steps_post_refine);
+
+ /*************************************************************************/
+ /* set the MV limit per ref. pic. */
+ /* - P pic. : Based on the config params. */
+ /* - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
+ /*************************************************************************/
+ hme_set_mv_limit_using_dvsr_data(
+ ps_ctxt, ps_curr_layer, as_mv_limit, &i2_prev_enc_frm_max_mv_y, num_act_ref_pics);
+ s_srch_cand_init_data.pu1_num_fpel_search_cands = ps_refine_prms->au1_num_fpel_search_cands;
+ s_srch_cand_init_data.i4_num_act_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
+ s_srch_cand_init_data.i4_num_act_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
+ s_srch_cand_init_data.ps_coarse_layer = ps_coarse_layer;
+ s_srch_cand_init_data.ps_curr_layer = ps_curr_layer;
+ s_srch_cand_init_data.i4_max_num_init_cands = num_init_candts;
+ s_srch_cand_init_data.ps_search_cands = ps_search_candts;
+ s_srch_cand_init_data.u1_num_results_in_mvbank = s_mv_update_prms.i4_num_results_to_store;
+ s_srch_cand_init_data.pi4_ref_id_lc_to_l0_map = ps_ctxt->a_ref_idx_lc_to_l0;
+ s_srch_cand_init_data.pi4_ref_id_lc_to_l1_map = ps_ctxt->a_ref_idx_lc_to_l1;
+ s_srch_cand_init_data.e_search_blk_size = e_search_blk_size;
+
+ while(0 == end_of_frame)
+ {
+ job_queue_t *ps_job;
+ frm_ctb_ctxt_t *ps_frm_ctb_prms;
+ ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
+
+ WORD32 i4_max_mv_x_in_ctb;
+ WORD32 i4_max_mv_y_in_ctb;
+ void *pv_dep_mngr_encloop_dep_me;
+ WORD32 offset_val, check_dep_pos, set_dep_pos;
+ WORD32 left_ctb_in_diff_tile, i4_first_ctb_x = 0;
+
+ pv_dep_mngr_encloop_dep_me = ps_ctxt->pv_dep_mngr_encloop_dep_me;
+
+ ps_frm_ctb_prms = (frm_ctb_ctxt_t *)ps_thrd_ctxt->pv_ext_frm_prms;
+
+ /* Get the current row from the job queue */
+ ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job(
+ ps_multi_thrd_ctxt, lyr_job_type, 1, me_frm_id);
+
+ /* If all rows are done, set the end of process flag to 1, */
+ /* and the current row to -1 */
+ if(NULL == ps_job)
+ {
+ blk_y = -1;
+ i4_ctb_y = -1;
+ tile_col_idx = -1;
+ end_of_frame = 1;
+
+ continue;
+ }
+
+ /* set the output dependency after picking up the row */
+ ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, me_frm_id);
+
+ /* Obtain the current row's details from the job */
+ {
+ ihevce_tile_params_t *ps_col_tile_params;
+
+ i4_ctb_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
+ /* Obtain the current colum tile index from the job */
+ tile_col_idx = ps_job->s_job_info.s_me_job_info.i4_tile_col_idx;
+
+ /* in encode layer block are 16x16 and CTB is 64 x 64 */
+ /* note if ctb is 32x32 the this calc needs to be changed */
+ num_sync_units_in_row = (i4_pic_wd + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
+ ps_ctxt->log_ctb_size;
+
+ /* The tile parameter for the col. idx. Use only the properties
+ which is same for all the bottom tiles like width, start_x, etc.
+ Don't use height, start_y, etc. */
+ ps_col_tile_params =
+ ((ihevce_tile_params_t *)ps_thrd_ctxt->pv_tile_params_base + tile_col_idx);
+ /* in encode layer block are 16x16 and CTB is 64 x 64 */
+ /* note if ctb is 32x32 the this calc needs to be changed */
+ num_sync_units_in_tile =
+ (ps_col_tile_params->i4_curr_tile_width + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
+ ps_ctxt->log_ctb_size;
+
+ i4_first_ctb_x = ps_col_tile_params->i4_first_ctb_x;
+ i4_ctb_x = i4_first_ctb_x;
+
+ if(!num_act_ref_pics)
+ {
+ for(i4_ctb_x = i4_first_ctb_x;
+ i4_ctb_x < (ps_col_tile_params->i4_first_ctb_x + num_sync_units_in_tile);
+ i4_ctb_x++)
+ {
+ S32 blk_i = 0, blk_j = 0;
+ /* set the dependency for the corresponding row in enc loop */
+ ihevce_dmgr_set_row_row_sync(
+ pv_dep_mngr_encloop_dep_me,
+ (i4_ctb_x + 1),
+ i4_ctb_y,
+ tile_col_idx /* Col Tile No. */);
+ }
+
+ continue;
+ }
+
+ /* increment the number of rows proc */
+ num_rows_proc++;
+
+ /* Set Variables for Dep. Checking and Setting */
+ set_dep_pos = i4_ctb_y + 1;
+ if(i4_ctb_y > 0)
+ {
+ offset_val = 2;
+ check_dep_pos = i4_ctb_y - 1;
+ }
+ else
+ {
+ /* First row should run without waiting */
+ offset_val = -1;
+ check_dep_pos = 0;
+ }
+
+ /* row ctb out pointer */
+ ps_ctxt->ps_ctb_analyse_curr_row =
+ ps_ctxt->ps_ctb_analyse_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
+
+ /* Row level CU Tree buffer */
+ ps_ctxt->ps_cu_tree_curr_row =
+ ps_ctxt->ps_cu_tree_base +
+ i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE;
+
+ ps_ctxt->ps_me_ctb_data_curr_row =
+ ps_ctxt->ps_me_ctb_data_base + i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
+ }
+
+ /* This flag says the CTB under processing is at the start of tile in horz dir.*/
+ left_ctb_in_diff_tile = 1;
+
+ /* To make sure no 64-bit overflow happens when inv_wt is multiplied with un-normalized src_var, */
+ /* the shift value will be passed onto the functions wherever inv_wt isused so that inv_wt is appropriately shift and multiplied */
+ {
+ S32 i4_ref_id, i4_bits_req;
+
+ for(i4_ref_id = 0; i4_ref_id < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
+ ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
+ i4_ref_id++)
+ {
+ GETRANGE(i4_bits_req, ps_ctxt->s_wt_pred.a_inv_wpred_wt[i4_ref_id]);
+
+ if(i4_bits_req > 12)
+ {
+ ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = (i4_bits_req - 12);
+ }
+ else
+ {
+ ps_ctxt->s_wt_pred.ai4_shift_val[i4_ref_id] = 0;
+ }
+ }
+
+ s_common_frm_prms.pi4_inv_wt_shift_val = ps_ctxt->s_wt_pred.ai4_shift_val;
+ }
+
+ /* if non-encode layer then i4_ctb_x will be same as blk_x */
+ /* loop over all the units is a row */
+ for(i4_ctb_x = i4_first_ctb_x; i4_ctb_x < (i4_first_ctb_x + num_sync_units_in_tile);
+ i4_ctb_x++)
+ {
+ ihevce_ctb_noise_params *ps_ctb_noise_params =
+ &ps_ctxt->ps_ctb_analyse_curr_row[i4_ctb_x].s_ctb_noise_params;
+
+ s_common_frm_prms.i4_ctb_x_off = i4_ctb_x << 6;
+ s_common_frm_prms.i4_ctb_y_off = i4_ctb_y << 6;
+
+ ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = i4_ctb_y << 6;
+ ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = i4_ctb_x << 6;
+ /* Initialize ptr to current IPE CTB */
+ ps_cur_ipe_ctb = ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x +
+ i4_ctb_y * ps_frm_ctb_prms->i4_num_ctbs_horz;
+ {
+ ps_ctb_bound_attrs =
+ get_ctb_attrs(i4_ctb_x << 6, i4_ctb_y << 6, i4_pic_wd, i4_pic_ht, ps_ctxt);
+
+ en_merge_32x32 = ps_ctb_bound_attrs->u1_merge_to_32x32_flag;
+ num_blks_in_this_ctb = ps_ctb_bound_attrs->u1_num_blks_in_ctb;
+ }
+
+ /* Block to initialise pointers to part_type_results_t */
+ /* in each size-specific inter_cu_results_t */
+ {
+ WORD32 i;
+
+ for(i = 0; i < 64; i++)
+ {
+ ps_ctxt->as_cu8x8_results[i].ps_best_results =
+ ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
+ .as_8x8_block_data[i]
+ .as_best_results;
+ ps_ctxt->as_cu8x8_results[i].u1_num_best_results = 0;
+ }
+
+ for(i = 0; i < 16; i++)
+ {
+ ps_ctxt->as_cu16x16_results[i].ps_best_results =
+ ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].as_block_data[i].as_best_results;
+ ps_ctxt->as_cu16x16_results[i].u1_num_best_results = 0;
+ }
+
+ for(i = 0; i < 4; i++)
+ {
+ ps_ctxt->as_cu32x32_results[i].ps_best_results =
+ ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x]
+ .as_32x32_block_data[i]
+ .as_best_results;
+ ps_ctxt->as_cu32x32_results[i].u1_num_best_results = 0;
+ }
+
+ ps_ctxt->s_cu64x64_results.ps_best_results =
+ ps_ctxt->ps_me_ctb_data_curr_row[i4_ctb_x].s_64x64_block_data.as_best_results;
+ ps_ctxt->s_cu64x64_results.u1_num_best_results = 0;
+ }
+
+ if(ME_PRISTINE_QUALITY == e_me_quality_presets)
+ {
+ ps_ctb_cluster_info->blk_32x32_mask = en_merge_32x32;
+ ps_ctb_cluster_info->ps_cur_ipe_ctb = ps_cur_ipe_ctb;
+ ps_ctb_cluster_info->ps_cu_tree_root =
+ ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
+ ps_ctb_cluster_info->nodes_created_in_cu_tree = 1;
+ }
+
+ if(ME_PRISTINE_QUALITY != e_me_quality_presets)
+ {
+ S32 i4_nodes_created_in_cu_tree = 1;
+
+ ihevce_cu_tree_init(
+ (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
+ (ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE)),
+ &i4_nodes_created_in_cu_tree,
+ 0,
+ POS_NA,
+ POS_NA,
+ POS_NA);
+ }
+
+ memset(ai4_blk_8x8_mask, 0, 16 * sizeof(S32));
+
+ if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
+ {
+ S32 j;
+
+ ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
+
+ ps_cur_ipe_ctb =
+ ps_ctxt->ps_ipe_l0_ctb_frm_base + i4_ctb_x + i4_ctb_y * num_sync_units_in_row;
+ lambda_recon =
+ hme_recompute_lambda_from_min_8x8_act_in_ctb(ps_ctxt, ps_cur_ipe_ctb);
+
+ lambda_recon = ((float)lambda_recon * (100.0f - ME_LAMBDA_DISCOUNT) / 100.0f);
+
+ for(i = 0; i < 4; i++)
+ {
+ ps_search_results = &ps_ctxt->as_search_results_32x32[i];
+
+ for(j = 0; j < 2; j++)
+ {
+ ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
+ }
+ }
+ ps_search_results = &ps_ctxt->s_search_results_64x64;
+
+ for(j = 0; j < 2; j++)
+ {
+ ps_search_results->as_pred_ctxt[j].lambda = lambda_recon;
+ }
+
+ s_common_frm_prms.i4_lamda = lambda_recon;
+ }
+ else
+ {
+ lambda_recon = ps_refine_prms->lambda_recon;
+ }
+
+ /*********************************************************************/
+ /* replicate the inp buffer at blk or ctb level for each ref id, */
+ /* Instead of searching with wk * ref(k), we search with Ik = I / wk */
+ /* thereby avoiding a bloat up of memory. If we did all references */
+ /* weighted pred, we will end up with a duplicate copy of each ref */
+ /* at each layer, since we need to preserve the original reference. */
+ /* ToDo: Need to observe performance with this mechanism and compare */
+ /* with case where ref is weighted. */
+ /*********************************************************************/
+ fp_get_wt_inp(
+ ps_curr_layer,
+ &ps_ctxt->s_wt_pred,
+ unit_size,
+ s_common_frm_prms.i4_ctb_x_off,
+ s_common_frm_prms.i4_ctb_y_off,
+ unit_size,
+ ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
+ ps_ctxt->i4_wt_pred_enable_flag);
+
+ if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled)
+ {
+#if TEMPORAL_NOISE_DETECT
+ {
+ WORD32 had_block_size = 16;
+ WORD32 ctb_width = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
+ ? 64
+ : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
+ WORD32 ctb_height = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
+ ? 64
+ : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
+ WORD32 num_pred_dir = i4_num_pred_dir;
+ WORD32 i4_x_off = s_common_frm_prms.i4_ctb_x_off;
+ WORD32 i4_y_off = s_common_frm_prms.i4_ctb_y_off;
+
+ WORD32 i;
+ WORD32 noise_detected;
+ WORD32 ctb_size;
+ WORD32 num_comp_had_blocks;
+ WORD32 noisy_block_cnt;
+ WORD32 index_8x8_block;
+ WORD32 num_8x8_in_ctb_row;
+
+ WORD32 ht_offset;
+ WORD32 wd_offset;
+ WORD32 block_ht;
+ WORD32 block_wd;
+
+ WORD32 num_horz_blocks;
+ WORD32 num_vert_blocks;
+
+ WORD32 mean;
+ UWORD32 variance_8x8;
+
+ WORD32 hh_energy_percent;
+
+ /* variables to hold the constant values. The variable values held are decided by the HAD block size */
+ WORD32 min_noisy_block_cnt;
+ WORD32 min_coeffs_above_avg;
+ WORD32 min_coeff_avg_energy;
+
+ /* to store the mean and variance of each 8*8 block and find the variance of any higher block sizes later on. block */
+ WORD32 i4_cu_x_off, i4_cu_y_off;
+ WORD32 is_noisy;
+
+ /* intialise the variables holding the constants */
+ if(had_block_size == 8)
+ {
+ min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_8x8; //6;//
+ min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_8x8;
+ min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_8x8;
+ }
+ else
+ {
+ min_noisy_block_cnt = MIN_NOISY_BLOCKS_CNT_16x16; //7;//
+ min_coeffs_above_avg = MIN_NUM_COEFFS_ABOVE_AVG_16x16;
+ min_coeff_avg_energy = MIN_COEFF_AVG_ENERGY_16x16;
+ }
+
+ /* initialize the variables */
+ noise_detected = 0;
+ noisy_block_cnt = 0;
+ hh_energy_percent = 0;
+ variance_8x8 = 0;
+ block_ht = ctb_height;
+ block_wd = ctb_width;
+
+ mean = 0;
+
+ ctb_size = block_ht * block_wd; //ctb_width * ctb_height;
+ num_comp_had_blocks = ctb_size / (had_block_size * had_block_size);
+
+ num_horz_blocks = block_wd / had_block_size; //ctb_width / had_block_size;
+ num_vert_blocks = block_ht / had_block_size; //ctb_height / had_block_size;
+
+ ht_offset = -had_block_size;
+ wd_offset = -had_block_size;
+
+ num_8x8_in_ctb_row = block_wd / 8; // number of 8x8 in this ctb
+ for(i = 0; i < num_comp_had_blocks; i++)
+ {
+ if(i % num_horz_blocks == 0)
+ {
+ wd_offset = -had_block_size;
+ ht_offset += had_block_size;
+ }
+ wd_offset += had_block_size;
+
+ /* CU level offsets */
+ i4_cu_x_off = i4_x_off + (i % 4) * 16; //+ (i % 4) * 16
+ i4_cu_y_off = i4_y_off + (i / 4) * 16;
+
+ /* if 50 % or more of the CU is noisy then the return value is 1 */
+ is_noisy = ihevce_determine_cu_noise_based_on_8x8Blk_data(
+ ps_ctb_noise_params->au1_is_8x8Blk_noisy,
+ (i % 4) * 16,
+ (i / 4) * 16,
+ 16);
+
+ /* only if the CU is noisy then check the temporal noise detect call is made on the CU */
+ if(is_noisy)
+ {
+ index_8x8_block = (i / num_horz_blocks) * 2 * num_8x8_in_ctb_row +
+ (i % num_horz_blocks) * 2;
+ noisy_block_cnt += ihevce_16x16block_temporal_noise_detect(
+ 16,
+ ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
+ ? 64
+ : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off,
+ ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
+ ? 64
+ : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off,
+ ps_ctb_noise_params,
+ &s_srch_cand_init_data,
+ &s_search_prms_blk,
+ ps_ctxt,
+ num_pred_dir,
+ i4_num_act_ref_l0,
+ i4_num_act_ref_l1,
+ i4_cu_x_off,
+ i4_cu_y_off,
+ &ps_ctxt->s_wt_pred,
+ unit_size,
+ index_8x8_block,
+ num_horz_blocks,
+ /*num_8x8_in_ctb_row*/ 8, // this should be a variable extra
+ i);
+ } /* if 16x16 is noisy */
+ } /* loop over for all 16x16*/
+
+ if(noisy_block_cnt >= min_noisy_block_cnt)
+ {
+ noise_detected = 1;
+ }
+
+ /* write back the noise presence detected for the current CTB to the structure */
+ ps_ctb_noise_params->i4_noise_present = noise_detected;
+ }
+#endif
+
+#if EVERYWHERE_NOISY && USE_NOISE_TERM_IN_L0_ME
+ if(ps_thrd_ctxt->s_init_prms.u1_is_stasino_enabled &&
+ ps_ctb_noise_params->i4_noise_present)
+ {
+ memset(
+ ps_ctb_noise_params->au1_is_8x8Blk_noisy,
+ 1,
+ sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
+ }
+#endif
+
+ for(i = 0; i < 16; i++)
+ {
+ au1_is_16x16Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
+ ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 4) * 16, (i / 4) * 16, 16);
+ }
+
+ for(i = 0; i < 4; i++)
+ {
+ au1_is_32x32Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
+ ps_ctb_noise_params->au1_is_8x8Blk_noisy, (i % 2) * 32, (i / 2) * 32, 32);
+ }
+
+ for(i = 0; i < 1; i++)
+ {
+ au1_is_64x64Blk_noisy[i] = ihevce_determine_cu_noise_based_on_8x8Blk_data(
+ ps_ctb_noise_params->au1_is_8x8Blk_noisy, 0, 0, 64);
+ }
+
+ if(ps_ctxt->s_frm_prms.bidir_enabled &&
+ (ps_ctxt->s_frm_prms.i4_temporal_layer_id <=
+ MAX_LAYER_ID_OF_B_PICS_WITHOUT_NOISE_DETECTION))
+ {
+ ps_ctb_noise_params->i4_noise_present = 0;
+ memset(
+ ps_ctb_noise_params->au1_is_8x8Blk_noisy,
+ 0,
+ sizeof(ps_ctb_noise_params->au1_is_8x8Blk_noisy));
+ }
+
+#if ME_LAMBDA_DISCOUNT_WHEN_NOISY
+ for(i = 0; i < 4; i++)
+ {
+ S32 j;
+ S32 lambda;
+
+ if(au1_is_32x32Blk_noisy[i])
+ {
+ lambda = lambda_recon;
+ lambda =
+ ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
+
+ ps_search_results = &ps_ctxt->as_search_results_32x32[i];
+
+ for(j = 0; j < 2; j++)
+ {
+ ps_search_results->as_pred_ctxt[j].lambda = lambda;
+ }
+ }
+ }
+
+ {
+ S32 j;
+ S32 lambda;
+
+ if(au1_is_64x64Blk_noisy[0])
+ {
+ lambda = lambda_recon;
+ lambda =
+ ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
+
+ ps_search_results = &ps_ctxt->s_search_results_64x64;
+
+ for(j = 0; j < 2; j++)
+ {
+ ps_search_results->as_pred_ctxt[j].lambda = lambda;
+ }
+ }
+ }
+#endif
+ if(au1_is_64x64Blk_noisy[0])
+ {
+ U08 *pu1_inp = ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
+ (s_common_frm_prms.i4_ctb_y_off *
+ ps_curr_layer->i4_inp_stride));
+
+ hme_compute_sigmaX_and_sigmaXSquared(
+ pu1_inp,
+ ps_curr_layer->i4_inp_stride,
+ ps_ctxt->au4_4x4_src_sigmaX,
+ ps_ctxt->au4_4x4_src_sigmaXSquared,
+ 4,
+ 4,
+ 64,
+ 64,
+ 1,
+ 16);
+ }
+ else
+ {
+ for(i = 0; i < 4; i++)
+ {
+ if(au1_is_32x32Blk_noisy[i])
+ {
+ U08 *pu1_inp =
+ ps_curr_layer->pu1_inp +
+ (s_common_frm_prms.i4_ctb_x_off +
+ (s_common_frm_prms.i4_ctb_y_off * ps_curr_layer->i4_inp_stride));
+
+ U08 u1_cu_size = 32;
+ WORD32 i4_inp_buf_offset =
+ (((i / 2) * (u1_cu_size * ps_curr_layer->i4_inp_stride)) +
+ ((i % 2) * u1_cu_size));
+
+ U16 u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb = 128;
+ U16 u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb = 8;
+ S32 i4_sigma_arr_offset =
+ (((i / 2) * u2_sigma_arr_start_index_of_3rd_32x32_blk_in_ctb) +
+ ((i % 2) * u2_sigma_arr_start_index_of_2nd_32x32_blk_in_ctb));
+
+ hme_compute_sigmaX_and_sigmaXSquared(
+ pu1_inp + i4_inp_buf_offset,
+ ps_curr_layer->i4_inp_stride,
+ ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset,
+ ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset,
+ 4,
+ 4,
+ 32,
+ 32,
+ 1,
+ 16);
+ }
+ else
+ {
+ S32 j;
+
+ U08 u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb = 8;
+ U08 u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb = 2;
+ S32 i4_16x16_blk_start_index_in_i_th_32x32_blk =
+ (((i / 2) * u1_16x16_blk_start_index_in_3rd_32x32_blk_of_ctb) +
+ ((i % 2) * u1_16x16_blk_start_index_in_2nd_32x32_blk_of_ctb));
+
+ for(j = 0; j < 4; j++)
+ {
+ U08 u1_3rd_16x16_blk_index_in_32x32_blk = 4;
+ U08 u1_2nd_16x16_blk_index_in_32x32_blk = 1;
+ S32 i4_16x16_blk_index_in_ctb =
+ i4_16x16_blk_start_index_in_i_th_32x32_blk +
+ ((j % 2) * u1_2nd_16x16_blk_index_in_32x32_blk) +
+ ((j / 2) * u1_3rd_16x16_blk_index_in_32x32_blk);
+
+ //S32 k = (((i / 2) * 8) + ((i % 2) * 2)) + ((j % 2) * 1) + ((j / 2) * 4);
+
+ if(au1_is_16x16Blk_noisy[i4_16x16_blk_index_in_ctb])
+ {
+ U08 *pu1_inp =
+ ps_curr_layer->pu1_inp + (s_common_frm_prms.i4_ctb_x_off +
+ (s_common_frm_prms.i4_ctb_y_off *
+ ps_curr_layer->i4_inp_stride));
+
+ U08 u1_cu_size = 16;
+ WORD32 i4_inp_buf_offset =
+ (((i4_16x16_blk_index_in_ctb % 4) * u1_cu_size) +
+ ((i4_16x16_blk_index_in_ctb / 4) *
+ (u1_cu_size * ps_curr_layer->i4_inp_stride)));
+
+ U16 u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk = 64;
+ U16 u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk = 4;
+ S32 i4_sigma_arr_offset =
+ (((i4_16x16_blk_index_in_ctb % 4) *
+ u2_sigma_arr_start_index_of_2nd_16x16_blk_in_32x32_blk) +
+ ((i4_16x16_blk_index_in_ctb / 4) *
+ u2_sigma_arr_start_index_of_3rd_16x16_blk_in_32x32_blk));
+
+ hme_compute_sigmaX_and_sigmaXSquared(
+ pu1_inp + i4_inp_buf_offset,
+ ps_curr_layer->i4_inp_stride,
+ (ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_arr_offset),
+ (ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_arr_offset),
+ 4,
+ 4,
+ 16,
+ 16,
+ 1,
+ 16);
+ }
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ memset(au1_is_16x16Blk_noisy, 0, sizeof(au1_is_16x16Blk_noisy));
+
+ memset(au1_is_32x32Blk_noisy, 0, sizeof(au1_is_32x32Blk_noisy));
+
+ memset(au1_is_64x64Blk_noisy, 0, sizeof(au1_is_64x64Blk_noisy));
+ }
+
+ for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
+ {
+ S32 ref_ctr;
+ U08 au1_pred_dir_searched[2];
+ U08 u1_is_cu_noisy;
+ ULWORD64 au8_final_src_sigmaX[17], au8_final_src_sigmaXSquared[17];
+
+ {
+ blk_x = (i4_ctb_x << 2) +
+ (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_x);
+ blk_y = (i4_ctb_y << 2) +
+ (ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_y);
+
+ blk_id_in_full_ctb =
+ ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_id_in_full_ctb;
+ blk_8x8_mask = ps_ctb_bound_attrs->as_blk_attrs[blk_id_in_ctb].u1_blk_8x8_mask;
+ ai4_blk_8x8_mask[blk_id_in_full_ctb] = blk_8x8_mask;
+ s_search_prms_blk.i4_cu_x_off = (blk_x << blk_size_shift) - (i4_ctb_x << 6);
+ s_search_prms_blk.i4_cu_y_off = (blk_y << blk_size_shift) - (i4_ctb_y << 6);
+ }
+
+ /* get the current input blk point */
+ pos_x = blk_x << blk_size_shift;
+ pos_y = blk_y << blk_size_shift;
+ pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);
+
+ /*********************************************************************/
+ /* For every blk in the picture, the search range needs to be derived*/
+ /* Any blk can have any mv, but practical search constraints are */
+ /* imposed by the picture boundary and amt of padding. */
+ /*********************************************************************/
+ /* MV limit is different based on ref. PIC */
+ for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
+ {
+ if(!s_search_prms_blk.i4_use_rec)
+ {
+ hme_derive_search_range(
+ &as_range_prms_inp[ref_ctr],
+ &s_pic_limit_inp,
+ &as_mv_limit[ref_ctr],
+ pos_x,
+ pos_y,
+ blk_wd,
+ blk_ht);
+ }
+ else
+ {
+ hme_derive_search_range(
+ &as_range_prms_rec[ref_ctr],
+ &s_pic_limit_rec,
+ &as_mv_limit[ref_ctr],
+ pos_x,
+ pos_y,
+ blk_wd,
+ blk_ht);
+ }
+ }
+ s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
+ s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
+ /* Select search results from a suitable search result in the context */
+ {
+ ps_search_results = &ps_ctxt->as_search_results_16x16[blk_id_in_full_ctb];
+
+ if(ps_refine_prms->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
+ {
+ S32 i;
+
+ for(i = 0; i < 2; i++)
+ {
+ ps_search_results->as_pred_ctxt[i].lambda = lambda_recon;
+ }
+ }
+ }
+
+ u1_is_cu_noisy = au1_is_16x16Blk_noisy
+ [(s_search_prms_blk.i4_cu_x_off >> 4) + (s_search_prms_blk.i4_cu_y_off >> 2)];
+
+ s_subpel_prms.u1_is_cu_noisy = u1_is_cu_noisy;
+
+#if ME_LAMBDA_DISCOUNT_WHEN_NOISY
+ if(u1_is_cu_noisy)
+ {
+ S32 j;
+ S32 lambda;
+
+ lambda = lambda_recon;
+ lambda = ((float)lambda * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
+
+ for(j = 0; j < 2; j++)
+ {
+ ps_search_results->as_pred_ctxt[j].lambda = lambda;
+ }
+ }
+ else
+ {
+ S32 j;
+ S32 lambda;
+
+ lambda = lambda_recon;
+
+ for(j = 0; j < 2; j++)
+ {
+ ps_search_results->as_pred_ctxt[j].lambda = lambda;
+ }
+ }
+#endif
+
+ s_search_prms_blk.ps_search_results = ps_search_results;
+
+ s_search_prms_blk.i4_part_mask = hme_part_mask_populator(
+ pu1_inp,
+ i4_inp_stride,
+ ps_refine_prms->limit_active_partitions,
+ ps_ctxt->ps_hme_frm_prms->bidir_enabled,
+ ps_ctxt->u1_is_curFrame_a_refFrame,
+ blk_8x8_mask,
+ e_me_quality_presets);
+
+ if(ME_PRISTINE_QUALITY == e_me_quality_presets)
+ {
+ ps_ctb_cluster_info->ai4_part_mask[blk_id_in_full_ctb] =
+ s_search_prms_blk.i4_part_mask;
+ }
+
+ /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
+ {
+ /* Setting u1_num_active_refs to 2 */
+ /* for the sole purpose of the */
+ /* function called below */
+ ps_search_results->u1_num_active_ref = (ps_refine_prms->bidir_enabled) ? 2 : 1;
+
+ hme_reset_search_results(
+ ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);
+
+ ps_search_results->u1_num_active_ref = i4_num_pred_dir;
+ }
+
+ if(0 == blk_id_in_ctb)
+ {
+ UWORD8 u1_ctr;
+ for(u1_ctr = 0; u1_ctr < (ps_ctxt->s_frm_prms.u1_num_active_ref_l0 +
+ ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
+ u1_ctr++)
+ {
+ WORD32 i4_max_dep_ctb_y;
+ WORD32 i4_max_dep_ctb_x;
+
+ /* Set max mv in ctb units */
+ i4_max_mv_x_in_ctb =
+ (ps_curr_layer->i2_max_mv_x + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
+ ps_ctxt->log_ctb_size;
+
+ i4_max_mv_y_in_ctb =
+ (as_mv_limit[u1_ctr].i2_max_y + ((1 << ps_ctxt->log_ctb_size) - 1)) >>
+ ps_ctxt->log_ctb_size;
+ /********************************************************************/
+ /* Set max ctb_x and ctb_y dependency on reference picture */
+ /* Note +1 is due to delayed deblock, SAO, subpel plan dependency */
+ /********************************************************************/
+ i4_max_dep_ctb_x = CLIP3(
+ (i4_ctb_x + i4_max_mv_x_in_ctb + 1),
+ 0,
+ ps_frm_ctb_prms->i4_num_ctbs_horz - 1);
+ i4_max_dep_ctb_y = CLIP3(
+ (i4_ctb_y + i4_max_mv_y_in_ctb + 1),
+ 0,
+ ps_frm_ctb_prms->i4_num_ctbs_vert - 1);
+
+ ihevce_dmgr_map_chk_sync(
+ ps_curr_layer->ppv_dep_mngr_recon[u1_ctr],
+ ps_ctxt->thrd_id,
+ i4_ctb_x,
+ i4_ctb_y,
+ i4_max_mv_x_in_ctb,
+ i4_max_mv_y_in_ctb);
+ }
+ }
+
+ /* Loop across different Ref IDx */
+ for(u1_pred_dir_ctr = 0; u1_pred_dir_ctr < i4_num_pred_dir; u1_pred_dir_ctr++)
+ {
+ S32 resultid;
+ S08 u1_default_ref_id;
+ S32 i4_num_srch_cands = 0;
+ S32 i4_num_refinement_iterations;
+ S32 i4_refine_iter_ctr;
+
+ if((i4_num_pred_dir == 2) || (!ps_ctxt->s_frm_prms.bidir_enabled) ||
+ (ps_ctxt->s_frm_prms.u1_num_active_ref_l1 == 0))
+ {
+ u1_pred_dir = u1_pred_dir_ctr;
+ }
+ else if(ps_ctxt->s_frm_prms.u1_num_active_ref_l0 == 0)
+ {
+ u1_pred_dir = 1;
+ }
+
+ u1_default_ref_id = (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list[0]
+ : ps_ctxt->ai1_future_list[0];
+ au1_pred_dir_searched[u1_pred_dir_ctr] = u1_pred_dir;
+
+ i4_num_srch_cands = 0;
+ resultid = 0;
+
+ /* START OF NEW CTB MEANS FILL UP NEOGHBOURS IN 18x18 GRID */
+ if(0 == blk_id_in_ctb)
+ {
+ /*****************************************************************/
+ /* Initialize the mv grid with results of neighbours for the next*/
+ /* ctb. */
+ /*****************************************************************/
+ hme_fill_ctb_neighbour_mvs(
+ ps_curr_layer,
+ blk_x,
+ blk_y,
+ aps_mv_grid[u1_pred_dir],
+ u1_pred_dir_ctr,
+ u1_default_ref_id,
+ ps_ctxt->s_frm_prms.u1_num_active_ref_l0);
+ }
+
+ s_search_prms_blk.i1_ref_idx = u1_pred_dir;
+
+ {
+ if((blk_id_in_full_ctb % 4) == 0)
+ {
+ ps_ctxt->as_search_results_32x32[blk_id_in_full_ctb >> 2]
+ .as_pred_ctxt[u1_pred_dir]
+ .proj_used = (blk_id_in_full_ctb == 8) ? 0 : 1;
+ }
+
+ if(blk_id_in_full_ctb == 0)
+ {
+ ps_ctxt->s_search_results_64x64.as_pred_ctxt[u1_pred_dir].proj_used = 1;
+ }
+
+ ps_search_results->as_pred_ctxt[u1_pred_dir].proj_used =
+ !gau1_encode_to_raster_y[blk_id_in_full_ctb];
+ }
+
+ {
+ S32 x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
+ S32 y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
+ U08 u1_is_blk_at_ctb_boundary = !y;
+
+ s_srch_cand_init_data.u1_is_left_available =
+ !(left_ctb_in_diff_tile && !s_search_prms_blk.i4_cu_x_off);
+
+ if(u1_is_blk_at_ctb_boundary)
+ {
+ s_srch_cand_init_data.u1_is_topRight_available = 0;
+ s_srch_cand_init_data.u1_is_topLeft_available = 0;
+ s_srch_cand_init_data.u1_is_top_available = 0;
+ }
+ else
+ {
+ s_srch_cand_init_data.u1_is_topRight_available =
+ gau1_cu_tr_valid[y][x] && ((pos_x + blk_wd) < i4_pic_wd);
+ s_srch_cand_init_data.u1_is_top_available = 1;
+ s_srch_cand_init_data.u1_is_topLeft_available =
+ s_srch_cand_init_data.u1_is_left_available;
+ }
+ }
+
+ s_srch_cand_init_data.i1_default_ref_id = u1_default_ref_id;
+ s_srch_cand_init_data.i1_alt_default_ref_id = ps_ctxt->ai1_past_list[1];
+ s_srch_cand_init_data.i4_pos_x = pos_x;
+ s_srch_cand_init_data.i4_pos_y = pos_y;
+ s_srch_cand_init_data.u1_pred_dir = u1_pred_dir;
+ s_srch_cand_init_data.u1_pred_dir_ctr = u1_pred_dir_ctr;
+ s_srch_cand_init_data.u1_search_candidate_list_index =
+ au1_search_candidate_list_index[u1_pred_dir];
+
+ i4_num_srch_cands = hme_populate_search_candidates(&s_srch_cand_init_data);
+
+ /* Note this block also clips the MV range for all candidates */
+ {
+ S08 i1_check_for_mult_refs;
+
+ i1_check_for_mult_refs = u1_pred_dir ? (ps_ctxt->num_ref_future > 1)
+ : (ps_ctxt->num_ref_past > 1);
+
+ ps_me_optimised_function_list->pf_mv_clipper(
+ &s_search_prms_blk,
+ i4_num_srch_cands,
+ i1_check_for_mult_refs,
+ ps_refine_prms->i4_num_steps_fpel_refine,
+ ps_refine_prms->i4_num_steps_hpel_refine,
+ ps_refine_prms->i4_num_steps_qpel_refine);
+ }
+
+#if ENABLE_EXPLICIT_SEARCH_IN_P_IN_L0
+ i4_num_refinement_iterations =
+ ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1))
+ ? ((e_me_quality_presets == ME_HIGH_QUALITY) ? 2 : i4_num_act_ref_l0)
+ : 1;
+#else
+ i4_num_refinement_iterations =
+ ((!ps_ctxt->s_frm_prms.bidir_enabled) && (i4_num_act_ref_l0 > 1)) ? 2 : 1;
+#endif
+
+#if ENABLE_EXPLICIT_SEARCH_IN_PQ
+ if(e_me_quality_presets == ME_PRISTINE_QUALITY)
+ {
+ i4_num_refinement_iterations = (u1_pred_dir == 0) ? i4_num_act_ref_l0
+ : i4_num_act_ref_l1;
+ }
+#endif
+
+ for(i4_refine_iter_ctr = 0; i4_refine_iter_ctr < i4_num_refinement_iterations;
+ i4_refine_iter_ctr++)
+ {
+ S32 center_x;
+ S32 center_y;
+ S32 center_ref_idx;
+
+ S08 *pi1_pred_dir_to_ref_idx =
+ (u1_pred_dir == 0) ? ps_ctxt->ai1_past_list : ps_ctxt->ai1_future_list;
+
+ {
+ WORD32 i4_i;
+
+ for(i4_i = 0; i4_i < TOT_NUM_PARTS; i4_i++)
+ {
+ ps_fullpel_refine_ctxt->i2_tot_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
+ ps_fullpel_refine_ctxt->i2_mv_cost[0][i4_i] = MAX_SIGNED_16BIT_VAL;
+ ps_fullpel_refine_ctxt->i2_stim_injected_cost[0][i4_i] =
+ MAX_SIGNED_16BIT_VAL;
+ ps_fullpel_refine_ctxt->i2_mv_x[0][i4_i] = 0;
+ ps_fullpel_refine_ctxt->i2_mv_y[0][i4_i] = 0;
+ ps_fullpel_refine_ctxt->i2_ref_idx[0][i4_i] = u1_default_ref_id;
+
+ if(ps_refine_prms->i4_num_results_per_part == 2)
+ {
+ ps_fullpel_refine_ctxt->i2_tot_cost[1][i4_i] =
+ MAX_SIGNED_16BIT_VAL;
+ ps_fullpel_refine_ctxt->i2_mv_cost[1][i4_i] =
+ MAX_SIGNED_16BIT_VAL;
+ ps_fullpel_refine_ctxt->i2_stim_injected_cost[1][i4_i] =
+ MAX_SIGNED_16BIT_VAL;
+ ps_fullpel_refine_ctxt->i2_mv_x[1][i4_i] = 0;
+ ps_fullpel_refine_ctxt->i2_mv_y[1][i4_i] = 0;
+ ps_fullpel_refine_ctxt->i2_ref_idx[1][i4_i] = u1_default_ref_id;
+ }
+ }
+
+ s_search_prms_blk.ps_fullpel_refine_ctxt = ps_fullpel_refine_ctxt;
+ s_subpel_prms.ps_subpel_refine_ctxt = ps_fullpel_refine_ctxt;
+ }
+
+ {
+ search_node_t *ps_coloc_node;
+
+ S32 i = 0;
+
+ if(i4_num_refinement_iterations > 1)
+ {
+ for(i = 0; i < ai4_num_coloc_cands[u1_pred_dir]; i++)
+ {
+ ps_coloc_node =
+ s_search_prms_blk.ps_search_candts[ai4_id_coloc[i]]
+ .ps_search_node;
+
+ if(pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr] ==
+ ps_coloc_node->i1_ref_idx)
+ {
+ break;
+ }
+ }
+
+ if(i == ai4_num_coloc_cands[u1_pred_dir])
+ {
+ i = 0;
+ }
+ }
+ else
+ {
+ ps_coloc_node = s_search_prms_blk.ps_search_candts[ai4_id_coloc[0]]
+ .ps_search_node;
+ }
+
+ hme_set_mvp_node(
+ ps_search_results,
+ ps_coloc_node,
+ u1_pred_dir,
+ (i4_num_refinement_iterations > 1)
+ ? pi1_pred_dir_to_ref_idx[i4_refine_iter_ctr]
+ : u1_default_ref_id);
+
+ center_x = ps_coloc_node->ps_mv->i2_mvx;
+ center_y = ps_coloc_node->ps_mv->i2_mvy;
+ center_ref_idx = ps_coloc_node->i1_ref_idx;
+ }
+
+ /* Full-Pel search */
+ {
+ S32 num_unique_nodes;
+
+ memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));
+
+ num_unique_nodes = hme_remove_duplicate_fpel_search_candidates(
+ as_unique_search_nodes,
+ s_search_prms_blk.ps_search_candts,
+ au4_unique_node_map,
+ pi1_pred_dir_to_ref_idx,
+ i4_num_srch_cands,
+ s_search_prms_blk.i4_num_init_candts,
+ i4_refine_iter_ctr,
+ i4_num_refinement_iterations,
+ i4_num_act_ref_l0,
+ center_ref_idx,
+ center_x,
+ center_y,
+ ps_ctxt->s_frm_prms.bidir_enabled,
+ e_me_quality_presets);
+
+ /*************************************************************************/
+ /* This array stores the ids of the partitions whose */
+ /* SADs are updated. Since the partitions whose SADs are updated may not */
+ /* be in contiguous order, we supply another level of indirection. */
+ /*************************************************************************/
+ ps_fullpel_refine_ctxt->i4_num_valid_parts = hme_create_valid_part_ids(
+ s_search_prms_blk.i4_part_mask,
+ &ps_fullpel_refine_ctxt->ai4_part_id[0]);
+
+ if(!i4_refine_iter_ctr && !u1_pred_dir_ctr && u1_is_cu_noisy)
+ {
+ S32 i;
+ /*i4_sigma_array_offset : takes care of pointing to the appropriate 4x4 block's sigmaX and sigmaX-squared value in a CTB out of 256 values*/
+ S32 i4_sigma_array_offset = (s_search_prms_blk.i4_cu_x_off / 4) +
+ (s_search_prms_blk.i4_cu_y_off * 4);
+
+ for(i = 0; i < ps_fullpel_refine_ctxt->i4_num_valid_parts; i++)
+ {
+ S32 i4_part_id = ps_fullpel_refine_ctxt->ai4_part_id[i];
+
+ hme_compute_final_sigma_of_pu_from_base_blocks(
+ ps_ctxt->au4_4x4_src_sigmaX + i4_sigma_array_offset,
+ ps_ctxt->au4_4x4_src_sigmaXSquared + i4_sigma_array_offset,
+ au8_final_src_sigmaX,
+ au8_final_src_sigmaXSquared,
+ 16,
+ 4,
+ i4_part_id,
+ 16);
+ }
+
+ s_common_frm_prms.pu8_part_src_sigmaX = au8_final_src_sigmaX;
+ s_common_frm_prms.pu8_part_src_sigmaXSquared =
+ au8_final_src_sigmaXSquared;
+
+ s_search_prms_blk.pu8_part_src_sigmaX = au8_final_src_sigmaX;
+ s_search_prms_blk.pu8_part_src_sigmaXSquared =
+ au8_final_src_sigmaXSquared;
+ }
+
+ if(0 == num_unique_nodes)
+ {
+ continue;
+ }
+
+ if(num_unique_nodes >= 2)
+ {
+ s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
+ s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
+ if(ps_ctxt->i4_pic_type != IV_P_FRAME)
+ {
+ if(ps_ctxt->i4_temporal_layer == 1)
+ {
+ hme_fullpel_cand_sifter(
+ &s_search_prms_blk,
+ ps_curr_layer,
+ &ps_ctxt->s_wt_pred,
+ ALPHA_FOR_NOISE_TERM_IN_ME,
+ u1_is_cu_noisy,
+ ps_me_optimised_function_list);
+ }
+ else
+ {
+ hme_fullpel_cand_sifter(
+ &s_search_prms_blk,
+ ps_curr_layer,
+ &ps_ctxt->s_wt_pred,
+ ALPHA_FOR_NOISE_TERM_IN_ME,
+ u1_is_cu_noisy,
+ ps_me_optimised_function_list);
+ }
+ }
+ else
+ {
+ hme_fullpel_cand_sifter(
+ &s_search_prms_blk,
+ ps_curr_layer,
+ &ps_ctxt->s_wt_pred,
+ ALPHA_FOR_NOISE_TERM_IN_ME_P,
+ u1_is_cu_noisy,
+ ps_me_optimised_function_list);
+ }
+ }
+
+ s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
+
+ hme_fullpel_refine(
+ ps_refine_prms,
+ &s_search_prms_blk,
+ ps_curr_layer,
+ &ps_ctxt->s_wt_pred,
+ au4_unique_node_map,
+ num_unique_nodes,
+ blk_8x8_mask,
+ center_x,
+ center_y,
+ center_ref_idx,
+ e_me_quality_presets,
+ ps_me_optimised_function_list);
+ }
+
+ /* Sub-Pel search */
+ {
+ hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
+
+ s_subpel_prms.pu1_wkg_mem = (U08 *)hme_get_wkg_mem(
+ &ps_ctxt->s_buf_mgr,
+ INTERP_INTERMED_BUF_SIZE + INTERP_OUT_BUF_SIZE);
+ /* MV limit is different based on ref. PIC */
+ for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
+ {
+ SCALE_RANGE_PRMS(
+ as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
+ SCALE_RANGE_PRMS(
+ as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
+ }
+ s_subpel_prms.i4_ctb_x_off = i4_ctb_x << 6;
+ s_subpel_prms.i4_ctb_y_off = i4_ctb_y << 6;
+
+ hme_subpel_refine_cu_hs(
+ &s_subpel_prms,
+ ps_curr_layer,
+ ps_search_results,
+ u1_pred_dir,
+ &ps_ctxt->s_wt_pred,
+ blk_8x8_mask,
+ ps_ctxt->ps_func_selector,
+ ps_cmn_utils_optimised_function_list,
+ ps_me_optimised_function_list);
+ }
+ }
+ }
+ /* Populate the new PU struct with the results post subpel refinement*/
+ {
+ inter_cu_results_t *ps_cu_results;
+ WORD32 best_inter_cost, intra_cost, posx, posy;
+
+ UWORD8 intra_8x8_enabled = 0;
+
+ /* cost of 16x16 cu parent */
+ WORD32 parent_cost = MAX_32BIT_VAL;
+
+ /* cost of 8x8 cu children */
+ /*********************************************************************/
+ /* Assuming parent is not split, then we signal 1 bit for this parent*/
+ /* CU. If split, then 1 bit for parent CU + 4 bits for each child CU */
+ /* So, 4*lambda is extra for children cost. */
+ /*********************************************************************/
+ WORD32 child_cost = 0;
+
+ ps_cu_results = ps_search_results->ps_cu_results;
+
+ /* Initialize the pu_results pointers to the first struct in the stack array */
+ ps_pu_results = as_inter_pu_results;
+
+ hme_reset_wkg_mem(&ps_ctxt->s_buf_mgr);
+
+ hme_populate_pus(
+ ps_thrd_ctxt,
+ ps_ctxt,
+ &s_subpel_prms,
+ ps_search_results,
+ ps_cu_results,
+ ps_pu_results,
+ &(as_pu_results[0][0][0]),
+ &s_common_frm_prms,
+ &ps_ctxt->s_wt_pred,
+ ps_curr_layer,
+ au1_pred_dir_searched,
+ i4_num_pred_dir);
+
+ ps_cu_results->i4_inp_offset =
+ (ps_cu_results->u1_x_off) + (ps_cu_results->u1_y_off * 64);
+
+ hme_decide_part_types(
+ ps_cu_results,
+ ps_pu_results,
+ &s_common_frm_prms,
+ ps_ctxt,
+ ps_cmn_utils_optimised_function_list,
+ ps_me_optimised_function_list
+
+ );
+
+ /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
+ /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
+ if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
+ {
+ WORD32 res_ctr;
+
+ for(res_ctr = 0; res_ctr < ps_cu_results->u1_num_best_results; res_ctr++)
+ {
+ WORD32 num_part = 2, part_ctr;
+ part_type_results_t *ps_best_results =
+ &ps_cu_results->ps_best_results[res_ctr];
+
+ if(PRT_2Nx2N == ps_best_results->u1_part_type)
+ num_part = 1;
+
+ for(part_ctr = 0; part_ctr < num_part; part_ctr++)
+ {
+ pu_result_t *ps_pu_results =
+ &ps_best_results->as_pu_results[part_ctr];
+
+ ASSERT(PRED_L0 == ps_pu_results->pu.b2_pred_mode);
+
+ hme_update_dynamic_search_params(
+ &ps_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p]
+ .as_dyn_range_prms[ps_pu_results->pu.mv.i1_l0_ref_idx],
+ ps_pu_results->pu.mv.s_l0_mv.i2_mvy);
+
+ /* Sanity Check */
+ ASSERT(
+ ps_pu_results->pu.mv.i1_l0_ref_idx <
+ ps_ctxt->s_frm_prms.u1_num_active_ref_l0);
+
+ /* No L1 for P Pic. */
+ ASSERT(PRED_L1 != ps_pu_results->pu.b2_pred_mode);
+ /* No BI for P Pic. */
+ ASSERT(PRED_BI != ps_pu_results->pu.b2_pred_mode);
+ }
+ }
+ }
+
+ /*****************************************************************/
+ /* INSERT INTRA RESULTS AT 16x16 LEVEL. */
+ /*****************************************************************/
+
+#if DISABLE_INTRA_IN_BPICS
+ if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
+ (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
+#endif
+ {
+ if(!(DISABLE_INTRA_WHEN_NOISY && s_common_frm_prms.u1_is_cu_noisy))
+ {
+ hme_insert_intra_nodes_post_bipred(
+ ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
+ }
+ }
+
+#if DISABLE_INTRA_IN_BPICS
+ if((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
+ (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
+ {
+ intra_8x8_enabled = 0;
+ }
+ else
+#endif
+ {
+ /*TRAQO intra flag updation*/
+ if(1 == ps_cu_results->ps_best_results->as_pu_results[0].pu.b1_intra_flag)
+ {
+ best_inter_cost =
+ ps_cu_results->ps_best_results->as_pu_results[1].i4_tot_cost;
+ intra_cost =
+ ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
+ /*@16x16 level*/
+ posx = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_x
+ << 2) >>
+ 4;
+ posy = (ps_cu_results->ps_best_results->as_pu_results[1].pu.b4_pos_y
+ << 2) >>
+ 4;
+ }
+ else
+ {
+ best_inter_cost =
+ ps_cu_results->ps_best_results->as_pu_results[0].i4_tot_cost;
+ posx = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_x
+ << 2) >>
+ 3;
+ posy = (ps_cu_results->ps_best_results->as_pu_results[0].pu.b4_pos_y
+ << 2) >>
+ 3;
+ }
+
+ /* Disable intra16/32/64 flags based on split flags recommended by IPE */
+ if(ps_cur_ipe_ctb->u1_split_flag)
+ {
+ /* Id of the 32x32 block, 16x16 block in a CTB */
+ WORD32 i4_32x32_id =
+ (ps_cu_results->u1_y_off >> 5) * 2 + (ps_cu_results->u1_x_off >> 5);
+ WORD32 i4_16x16_id = ((ps_cu_results->u1_y_off >> 4) & 0x1) * 2 +
+ ((ps_cu_results->u1_x_off >> 4) & 0x1);
+
+ if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
+ {
+ if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
+ .as_intra16_analyse[i4_16x16_id]
+ .b1_split_flag)
+ {
+ intra_8x8_enabled =
+ ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
+ .as_intra16_analyse[i4_16x16_id]
+ .as_intra8_analyse[0]
+ .b1_valid_cu;
+ intra_8x8_enabled &=
+ ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
+ .as_intra16_analyse[i4_16x16_id]
+ .as_intra8_analyse[1]
+ .b1_valid_cu;
+ intra_8x8_enabled &=
+ ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
+ .as_intra16_analyse[i4_16x16_id]
+ .as_intra8_analyse[2]
+ .b1_valid_cu;
+ intra_8x8_enabled &=
+ ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
+ .as_intra16_analyse[i4_16x16_id]
+ .as_intra8_analyse[3]
+ .b1_valid_cu;
+ }
+ }
+ }
+ }
+
+ if(blk_8x8_mask == 0xf)
+ {
+ parent_cost =
+ ps_search_results->ps_cu_results->ps_best_results[0].i4_tot_cost;
+ ps_search_results->u1_split_flag = 0;
+ }
+ else
+ {
+ ps_search_results->u1_split_flag = 1;
+ }
+
+ ps_cu_results = &ps_ctxt->as_cu8x8_results[blk_id_in_full_ctb << 2];
+
+ if(s_common_frm_prms.u1_is_cu_noisy)
+ {
+ intra_8x8_enabled = 0;
+ }
+
+ /* Evalaute 8x8 if NxN part id is enabled */
+ if((ps_search_results->i4_part_mask & ENABLE_NxN) || intra_8x8_enabled)
+ {
+ /* Populates the PU's for the 4 8x8's in one call */
+ hme_populate_pus_8x8_cu(
+ ps_thrd_ctxt,
+ ps_ctxt,
+ &s_subpel_prms,
+ ps_search_results,
+ ps_cu_results,
+ ps_pu_results,
+ &(as_pu_results[0][0][0]),
+ &s_common_frm_prms,
+ au1_pred_dir_searched,
+ i4_num_pred_dir,
+ blk_8x8_mask);
+
+ /* Re-initialize the pu_results pointers to the first struct in the stack array */
+ ps_pu_results = as_inter_pu_results;
+
+ for(i = 0; i < 4; i++)
+ {
+ if((blk_8x8_mask & (1 << i)))
+ {
+ if(ps_cu_results->i4_part_mask)
+ {
+ hme_decide_part_types(
+ ps_cu_results,
+ ps_pu_results,
+ &s_common_frm_prms,
+ ps_ctxt,
+ ps_cmn_utils_optimised_function_list,
+ ps_me_optimised_function_list
+
+ );
+ }
+ /*****************************************************************/
+ /* INSERT INTRA RESULTS AT 8x8 LEVEL. */
+ /*****************************************************************/
+#if DISABLE_INTRA_IN_BPICS
+ if(1 != ((ME_XTREME_SPEED_25 == e_me_quality_presets) &&
+ (ps_ctxt->s_frm_prms.i4_temporal_layer_id >
+ TEMPORAL_LAYER_DISABLE)))
+#endif
+ {
+ if(!(DISABLE_INTRA_WHEN_NOISY &&
+ s_common_frm_prms.u1_is_cu_noisy))
+ {
+ hme_insert_intra_nodes_post_bipred(
+ ps_cu_results, ps_cur_ipe_ctb, ps_ctxt->frm_qstep);
+ }
+ }
+
+ child_cost += ps_cu_results->ps_best_results[0].i4_tot_cost;
+ }
+
+ ps_cu_results++;
+ ps_pu_results++;
+ }
+
+ /* Compare 16x16 vs 8x8 cost */
+ if(child_cost < parent_cost)
+ {
+ ps_search_results->best_cu_cost = child_cost;
+ ps_search_results->u1_split_flag = 1;
+ }
+ }
+ }
+
+ hme_update_mv_bank_encode(
+ ps_search_results,
+ ps_curr_layer->ps_layer_mvbank,
+ blk_x,
+ blk_y,
+ &s_mv_update_prms,
+ au1_pred_dir_searched,
+ i4_num_act_ref_l0);
+
+ /*********************************************************************/
+ /* Map the best results to an MV Grid. This is a 18x18 grid that is */
+ /* useful for doing things like predictor for cost calculation or */
+ /* also for merge calculations if need be. */
+ /*********************************************************************/
+ hme_map_mvs_to_grid(
+ &aps_mv_grid[0], ps_search_results, au1_pred_dir_searched, i4_num_pred_dir);
+ }
+
+ /* Set the CU tree nodes appropriately */
+ if(e_me_quality_presets != ME_PRISTINE_QUALITY)
+ {
+ WORD32 i, j;
+
+ for(i = 0; i < 16; i++)
+ {
+ cur_ctb_cu_tree_t *ps_tree_node =
+ ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
+ search_results_t *ps_results = &ps_ctxt->as_search_results_16x16[i];
+
+ switch(i >> 2)
+ {
+ case 0:
+ {
+ ps_tree_node = ps_tree_node->ps_child_node_tl;
+
+ break;
+ }
+ case 1:
+ {
+ ps_tree_node = ps_tree_node->ps_child_node_tr;
+
+ break;
+ }
+ case 2:
+ {
+ ps_tree_node = ps_tree_node->ps_child_node_bl;
+
+ break;
+ }
+ case 3:
+ {
+ ps_tree_node = ps_tree_node->ps_child_node_br;
+
+ break;
+ }
+ }
+
+ switch(i % 4)
+ {
+ case 0:
+ {
+ ps_tree_node = ps_tree_node->ps_child_node_tl;
+
+ break;
+ }
+ case 1:
+ {
+ ps_tree_node = ps_tree_node->ps_child_node_tr;
+
+ break;
+ }
+ case 2:
+ {
+ ps_tree_node = ps_tree_node->ps_child_node_bl;
+
+ break;
+ }
+ case 3:
+ {
+ ps_tree_node = ps_tree_node->ps_child_node_br;
+
+ break;
+ }
+ }
+
+ if(ai4_blk_8x8_mask[i] == 15)
+ {
+ if(!ps_results->u1_split_flag)
+ {
+ ps_tree_node->is_node_valid = 1;
+ NULLIFY_THE_CHILDREN_NODES(ps_tree_node);
+ }
+ else
+ {
+ ps_tree_node->is_node_valid = 0;
+ ENABLE_THE_CHILDREN_NODES(ps_tree_node);
+ }
+ }
+ else
+ {
+ cur_ctb_cu_tree_t *ps_tree_child;
+
+ ps_tree_node->is_node_valid = 0;
+
+ for(j = 0; j < 4; j++)
+ {
+ switch(j)
+ {
+ case 0:
+ {
+ ps_tree_child = ps_tree_node->ps_child_node_tl;
+
+ break;
+ }
+ case 1:
+ {
+ ps_tree_child = ps_tree_node->ps_child_node_tr;
+
+ break;
+ }
+ case 2:
+ {
+ ps_tree_child = ps_tree_node->ps_child_node_bl;
+
+ break;
+ }
+ case 3:
+ {
+ ps_tree_child = ps_tree_node->ps_child_node_br;
+
+ break;
+ }
+ }
+
+ ps_tree_child->is_node_valid = !!(ai4_blk_8x8_mask[i] & (1 << j));
+ }
+ }
+ }
+ }
+
+ if(ME_PRISTINE_QUALITY == e_me_quality_presets)
+ {
+ cur_ctb_cu_tree_t *ps_tree = ps_ctb_cluster_info->ps_cu_tree_root;
+
+ hme_analyse_mv_clustering(
+ ps_ctxt->as_search_results_16x16,
+ ps_ctxt->as_cu16x16_results,
+ ps_ctxt->as_cu8x8_results,
+ ps_ctxt->ps_ctb_cluster_info,
+ ps_ctxt->ai1_future_list,
+ ps_ctxt->ai1_past_list,
+ ps_ctxt->s_frm_prms.bidir_enabled,
+ e_me_quality_presets);
+
+#if DISABLE_BLK_MERGE_WHEN_NOISY
+ ps_tree->ps_child_node_tl->is_node_valid = !au1_is_32x32Blk_noisy[0];
+ ps_tree->ps_child_node_tr->is_node_valid = !au1_is_32x32Blk_noisy[1];
+ ps_tree->ps_child_node_bl->is_node_valid = !au1_is_32x32Blk_noisy[2];
+ ps_tree->ps_child_node_br->is_node_valid = !au1_is_32x32Blk_noisy[3];
+ ps_tree->ps_child_node_tl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[0];
+ ps_tree->ps_child_node_tr->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[1];
+ ps_tree->ps_child_node_bl->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[2];
+ ps_tree->ps_child_node_br->u1_inter_eval_enable = !au1_is_32x32Blk_noisy[3];
+ ps_tree->is_node_valid = !au1_is_64x64Blk_noisy[0];
+ ps_tree->u1_inter_eval_enable = !au1_is_64x64Blk_noisy[0];
+#endif
+
+ en_merge_32x32 = (ps_tree->ps_child_node_tl->is_node_valid << 0) |
+ (ps_tree->ps_child_node_tr->is_node_valid << 1) |
+ (ps_tree->ps_child_node_bl->is_node_valid << 2) |
+ (ps_tree->ps_child_node_br->is_node_valid << 3);
+
+ en_merge_execution = (ps_tree->ps_child_node_tl->u1_inter_eval_enable << 0) |
+ (ps_tree->ps_child_node_tr->u1_inter_eval_enable << 1) |
+ (ps_tree->ps_child_node_bl->u1_inter_eval_enable << 2) |
+ (ps_tree->ps_child_node_br->u1_inter_eval_enable << 3) |
+ (ps_tree->u1_inter_eval_enable << 4);
+ }
+ else
+ {
+ en_merge_execution = 0x1f;
+
+#if DISABLE_BLK_MERGE_WHEN_NOISY
+ en_merge_32x32 = ((!au1_is_32x32Blk_noisy[0] << 0) & (en_merge_32x32 & 1)) |
+ ((!au1_is_32x32Blk_noisy[1] << 1) & (en_merge_32x32 & 2)) |
+ ((!au1_is_32x32Blk_noisy[2] << 2) & (en_merge_32x32 & 4)) |
+ ((!au1_is_32x32Blk_noisy[3] << 3) & (en_merge_32x32 & 8));
+#endif
+ }
+
+ /* Re-initialize the pu_results pointers to the first struct in the stack array */
+ ps_pu_results = as_inter_pu_results;
+
+ {
+ WORD32 ref_ctr;
+
+ s_ctb_prms.i4_ctb_x = i4_ctb_x << 6;
+ s_ctb_prms.i4_ctb_y = i4_ctb_y << 6;
+
+ /* MV limit is different based on ref. PIC */
+ for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
+ {
+ SCALE_RANGE_PRMS(as_range_prms_hpel[ref_ctr], as_range_prms_rec[ref_ctr], 1);
+ SCALE_RANGE_PRMS(as_range_prms_qpel[ref_ctr], as_range_prms_rec[ref_ctr], 2);
+ }
+
+ e_merge_result = CU_SPLIT;
+ merge_count_32x32 = 0;
+
+ if((en_merge_32x32 & 1) && (en_merge_execution & 1))
+ {
+ range_prms_t *ps_pic_limit;
+ if(s_merge_prms_32x32_tl.i4_use_rec == 1)
+ {
+ ps_pic_limit = &s_pic_limit_rec;
+ }
+ else
+ {
+ ps_pic_limit = &s_pic_limit_inp;
+ }
+ /* MV limit is different based on ref. PIC */
+ for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
+ {
+ hme_derive_search_range(
+ s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
+ ps_pic_limit,
+ &as_mv_limit[ref_ctr],
+ i4_ctb_x << 6,
+ i4_ctb_y << 6,
+ 32,
+ 32);
+
+ SCALE_RANGE_PRMS_POINTERS(
+ s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
+ s_merge_prms_32x32_tl.aps_mv_range[ref_ctr],
+ 2);
+ }
+ s_merge_prms_32x32_tl.i4_ctb_x_off = i4_ctb_x << 6;
+ s_merge_prms_32x32_tl.i4_ctb_y_off = i4_ctb_y << 6;
+ s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[0];
+
+ e_merge_result = hme_try_merge_high_speed(
+ ps_thrd_ctxt,
+ ps_ctxt,
+ ps_cur_ipe_ctb,
+ &s_subpel_prms,
+ &s_merge_prms_32x32_tl,
+ ps_pu_results,
+ &as_pu_results[0][0][0]);
+
+ if(e_merge_result == CU_MERGED)
+ {
+ inter_cu_results_t *ps_cu_results =
+ s_merge_prms_32x32_tl.ps_results_merge->ps_cu_results;
+
+ if(!((ps_cu_results->u1_num_best_results == 1) &&
+ (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
+ {
+ hme_map_mvs_to_grid(
+ &aps_mv_grid[0],
+ s_merge_prms_32x32_tl.ps_results_merge,
+ s_merge_prms_32x32_tl.au1_pred_dir_searched,
+ s_merge_prms_32x32_tl.i4_num_pred_dir_actual);
+ }
+
+ if(ME_PRISTINE_QUALITY != e_me_quality_presets)
+ {
+ ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
+ .ps_child_node_tl->is_node_valid = 1;
+ NULLIFY_THE_CHILDREN_NODES(
+ ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
+ .ps_child_node_tl);
+ }
+
+ merge_count_32x32++;
+ e_merge_result = CU_SPLIT;
+ }
+ else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
+ {
+#if ENABLE_CU_TREE_CULLING
+ cur_ctb_cu_tree_t *ps_tree =
+ ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
+
+ ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
+ en_merge_execution = (en_merge_execution & (~(1 << 4)));
+ ENABLE_THE_CHILDREN_NODES(ps_tree);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
+#endif
+ }
+ }
+ else if((en_merge_32x32 & 1) && (!(en_merge_execution & 1)))
+ {
+#if ENABLE_CU_TREE_CULLING
+ cur_ctb_cu_tree_t *ps_tree =
+ ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tl;
+
+ ENABLE_THE_CHILDREN_NODES(ps_tree);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
+#endif
+
+ if(au1_is_32x32Blk_noisy[0] && DISABLE_INTRA_WHEN_NOISY)
+ {
+ ps_tree->is_node_valid = 0;
+ ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
+ en_merge_execution = (en_merge_execution & (~(1 << 4)));
+ }
+ }
+
+ if((en_merge_32x32 & 2) && (en_merge_execution & 2))
+ {
+ range_prms_t *ps_pic_limit;
+ if(s_merge_prms_32x32_tr.i4_use_rec == 1)
+ {
+ ps_pic_limit = &s_pic_limit_rec;
+ }
+ else
+ {
+ ps_pic_limit = &s_pic_limit_inp;
+ }
+ /* MV limit is different based on ref. PIC */
+ for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
+ {
+ hme_derive_search_range(
+ s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
+ ps_pic_limit,
+ &as_mv_limit[ref_ctr],
+ (i4_ctb_x << 6) + 32,
+ i4_ctb_y << 6,
+ 32,
+ 32);
+ SCALE_RANGE_PRMS_POINTERS(
+ s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
+ s_merge_prms_32x32_tr.aps_mv_range[ref_ctr],
+ 2);
+ }
+ s_merge_prms_32x32_tr.i4_ctb_x_off = i4_ctb_x << 6;
+ s_merge_prms_32x32_tr.i4_ctb_y_off = i4_ctb_y << 6;
+ s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[1];
+
+ e_merge_result = hme_try_merge_high_speed(
+ ps_thrd_ctxt,
+ ps_ctxt,
+ ps_cur_ipe_ctb,
+ &s_subpel_prms,
+ &s_merge_prms_32x32_tr,
+ ps_pu_results,
+ &as_pu_results[0][0][0]);
+
+ if(e_merge_result == CU_MERGED)
+ {
+ inter_cu_results_t *ps_cu_results =
+ s_merge_prms_32x32_tr.ps_results_merge->ps_cu_results;
+
+ if(!((ps_cu_results->u1_num_best_results == 1) &&
+ (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
+ {
+ hme_map_mvs_to_grid(
+ &aps_mv_grid[0],
+ s_merge_prms_32x32_tr.ps_results_merge,
+ s_merge_prms_32x32_tr.au1_pred_dir_searched,
+ s_merge_prms_32x32_tr.i4_num_pred_dir_actual);
+ }
+
+ if(ME_PRISTINE_QUALITY != e_me_quality_presets)
+ {
+ ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
+ .ps_child_node_tr->is_node_valid = 1;
+ NULLIFY_THE_CHILDREN_NODES(
+ ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
+ .ps_child_node_tr);
+ }
+
+ merge_count_32x32++;
+ e_merge_result = CU_SPLIT;
+ }
+ else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
+ {
+#if ENABLE_CU_TREE_CULLING
+ cur_ctb_cu_tree_t *ps_tree =
+ ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
+
+ ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
+ en_merge_execution = (en_merge_execution & (~(1 << 4)));
+ ENABLE_THE_CHILDREN_NODES(ps_tree);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
+#endif
+ }
+ }
+ else if((en_merge_32x32 & 2) && (!(en_merge_execution & 2)))
+ {
+#if ENABLE_CU_TREE_CULLING
+ cur_ctb_cu_tree_t *ps_tree =
+ ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_tr;
+
+ ENABLE_THE_CHILDREN_NODES(ps_tree);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
+#endif
+
+ if(au1_is_32x32Blk_noisy[1] && DISABLE_INTRA_WHEN_NOISY)
+ {
+ ps_tree->is_node_valid = 0;
+ ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
+ en_merge_execution = (en_merge_execution & (~(1 << 4)));
+ }
+ }
+
+ if((en_merge_32x32 & 4) && (en_merge_execution & 4))
+ {
+ range_prms_t *ps_pic_limit;
+ if(s_merge_prms_32x32_bl.i4_use_rec == 1)
+ {
+ ps_pic_limit = &s_pic_limit_rec;
+ }
+ else
+ {
+ ps_pic_limit = &s_pic_limit_inp;
+ }
+ /* MV limit is different based on ref. PIC */
+ for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
+ {
+ hme_derive_search_range(
+ s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
+ ps_pic_limit,
+ &as_mv_limit[ref_ctr],
+ i4_ctb_x << 6,
+ (i4_ctb_y << 6) + 32,
+ 32,
+ 32);
+ SCALE_RANGE_PRMS_POINTERS(
+ s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
+ s_merge_prms_32x32_bl.aps_mv_range[ref_ctr],
+ 2);
+ }
+ s_merge_prms_32x32_bl.i4_ctb_x_off = i4_ctb_x << 6;
+ s_merge_prms_32x32_bl.i4_ctb_y_off = i4_ctb_y << 6;
+ s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[2];
+
+ e_merge_result = hme_try_merge_high_speed(
+ ps_thrd_ctxt,
+ ps_ctxt,
+ ps_cur_ipe_ctb,
+ &s_subpel_prms,
+ &s_merge_prms_32x32_bl,
+ ps_pu_results,
+ &as_pu_results[0][0][0]);
+
+ if(e_merge_result == CU_MERGED)
+ {
+ inter_cu_results_t *ps_cu_results =
+ s_merge_prms_32x32_bl.ps_results_merge->ps_cu_results;
+
+ if(!((ps_cu_results->u1_num_best_results == 1) &&
+ (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
+ {
+ hme_map_mvs_to_grid(
+ &aps_mv_grid[0],
+ s_merge_prms_32x32_bl.ps_results_merge,
+ s_merge_prms_32x32_bl.au1_pred_dir_searched,
+ s_merge_prms_32x32_bl.i4_num_pred_dir_actual);
+ }
+
+ if(ME_PRISTINE_QUALITY != e_me_quality_presets)
+ {
+ ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
+ .ps_child_node_bl->is_node_valid = 1;
+ NULLIFY_THE_CHILDREN_NODES(
+ ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
+ .ps_child_node_bl);
+ }
+
+ merge_count_32x32++;
+ e_merge_result = CU_SPLIT;
+ }
+ else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
+ {
+#if ENABLE_CU_TREE_CULLING
+ cur_ctb_cu_tree_t *ps_tree =
+ ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
+
+ ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
+ en_merge_execution = (en_merge_execution & (~(1 << 4)));
+ ENABLE_THE_CHILDREN_NODES(ps_tree);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
+#endif
+ }
+ }
+ else if((en_merge_32x32 & 4) && (!(en_merge_execution & 4)))
+ {
+#if ENABLE_CU_TREE_CULLING
+ cur_ctb_cu_tree_t *ps_tree =
+ ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_bl;
+
+ ENABLE_THE_CHILDREN_NODES(ps_tree);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
+#endif
+
+ if(au1_is_32x32Blk_noisy[2] && DISABLE_INTRA_WHEN_NOISY)
+ {
+ ps_tree->is_node_valid = 0;
+ ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
+ en_merge_execution = (en_merge_execution & (~(1 << 4)));
+ }
+ }
+
+ if((en_merge_32x32 & 8) && (en_merge_execution & 8))
+ {
+ range_prms_t *ps_pic_limit;
+ if(s_merge_prms_32x32_br.i4_use_rec == 1)
+ {
+ ps_pic_limit = &s_pic_limit_rec;
+ }
+ else
+ {
+ ps_pic_limit = &s_pic_limit_inp;
+ }
+ /* MV limit is different based on ref. PIC */
+ for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
+ {
+ hme_derive_search_range(
+ s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
+ ps_pic_limit,
+ &as_mv_limit[ref_ctr],
+ (i4_ctb_x << 6) + 32,
+ (i4_ctb_y << 6) + 32,
+ 32,
+ 32);
+
+ SCALE_RANGE_PRMS_POINTERS(
+ s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
+ s_merge_prms_32x32_br.aps_mv_range[ref_ctr],
+ 2);
+ }
+ s_merge_prms_32x32_br.i4_ctb_x_off = i4_ctb_x << 6;
+ s_merge_prms_32x32_br.i4_ctb_y_off = i4_ctb_y << 6;
+ s_subpel_prms.u1_is_cu_noisy = au1_is_32x32Blk_noisy[3];
+
+ e_merge_result = hme_try_merge_high_speed(
+ ps_thrd_ctxt,
+ ps_ctxt,
+ ps_cur_ipe_ctb,
+ &s_subpel_prms,
+ &s_merge_prms_32x32_br,
+ ps_pu_results,
+ &as_pu_results[0][0][0]);
+
+ if(e_merge_result == CU_MERGED)
+ {
+ /*inter_cu_results_t *ps_cu_results = s_merge_prms_32x32_br.ps_results_merge->ps_cu_results;
+
+ if(!((ps_cu_results->u1_num_best_results == 1) &&
+ (ps_cu_results->ps_best_results->as_pu_results->pu.b1_intra_flag)))
+ {
+ hme_map_mvs_to_grid
+ (
+ &aps_mv_grid[0],
+ s_merge_prms_32x32_br.ps_results_merge,
+ s_merge_prms_32x32_br.au1_pred_dir_searched,
+ s_merge_prms_32x32_br.i4_num_pred_dir_actual
+ );
+ }*/
+
+ if(ME_PRISTINE_QUALITY != e_me_quality_presets)
+ {
+ ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
+ .ps_child_node_br->is_node_valid = 1;
+ NULLIFY_THE_CHILDREN_NODES(
+ ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
+ .ps_child_node_br);
+ }
+
+ merge_count_32x32++;
+ e_merge_result = CU_SPLIT;
+ }
+ else if(ME_PRISTINE_QUALITY == e_me_quality_presets)
+ {
+#if ENABLE_CU_TREE_CULLING
+ cur_ctb_cu_tree_t *ps_tree =
+ ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
+
+ ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
+ en_merge_execution = (en_merge_execution & (~(1 << 4)));
+ ENABLE_THE_CHILDREN_NODES(ps_tree);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
+#endif
+ }
+ }
+ else if((en_merge_32x32 & 8) && (!(en_merge_execution & 8)))
+ {
+#if ENABLE_CU_TREE_CULLING
+ cur_ctb_cu_tree_t *ps_tree =
+ ps_ctb_cluster_info->ps_cu_tree_root->ps_child_node_br;
+
+ ENABLE_THE_CHILDREN_NODES(ps_tree);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tl);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_tr);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_bl);
+ ENABLE_THE_CHILDREN_NODES(ps_tree->ps_child_node_br);
+#endif
+
+ if(au1_is_32x32Blk_noisy[3] && DISABLE_INTRA_WHEN_NOISY)
+ {
+ ps_tree->is_node_valid = 0;
+ ps_ctb_cluster_info->ps_cu_tree_root->is_node_valid = 0;
+ en_merge_execution = (en_merge_execution & (~(1 << 4)));
+ }
+ }
+
+ /* Try merging all 32x32 to 64x64 candts */
+ if(((en_merge_32x32 & 0xf) == 0xf) &&
+ (((merge_count_32x32 == 4) && (e_me_quality_presets != ME_PRISTINE_QUALITY)) ||
+ ((en_merge_execution & 16) && (e_me_quality_presets == ME_PRISTINE_QUALITY))))
+ if((((e_me_quality_presets == ME_XTREME_SPEED_25) &&
+ !DISABLE_64X64_BLOCK_MERGE_IN_ME_IN_XS25) ||
+ (e_me_quality_presets != ME_XTREME_SPEED_25)))
+ {
+ range_prms_t *ps_pic_limit;
+ if(s_merge_prms_64x64.i4_use_rec == 1)
+ {
+ ps_pic_limit = &s_pic_limit_rec;
+ }
+ else
+ {
+ ps_pic_limit = &s_pic_limit_inp;
+ }
+ /* MV limit is different based on ref. PIC */
+ for(ref_ctr = 0; ref_ctr < num_act_ref_pics; ref_ctr++)
+ {
+ hme_derive_search_range(
+ s_merge_prms_64x64.aps_mv_range[ref_ctr],
+ ps_pic_limit,
+ &as_mv_limit[ref_ctr],
+ i4_ctb_x << 6,
+ i4_ctb_y << 6,
+ 64,
+ 64);
+
+ SCALE_RANGE_PRMS_POINTERS(
+ s_merge_prms_64x64.aps_mv_range[ref_ctr],
+ s_merge_prms_64x64.aps_mv_range[ref_ctr],
+ 2);
+ }
+ s_merge_prms_64x64.i4_ctb_x_off = i4_ctb_x << 6;
+ s_merge_prms_64x64.i4_ctb_y_off = i4_ctb_y << 6;
+ s_subpel_prms.u1_is_cu_noisy = au1_is_64x64Blk_noisy[0];
+
+ e_merge_result = hme_try_merge_high_speed(
+ ps_thrd_ctxt,
+ ps_ctxt,
+ ps_cur_ipe_ctb,
+ &s_subpel_prms,
+ &s_merge_prms_64x64,
+ ps_pu_results,
+ &as_pu_results[0][0][0]);
+
+ if((e_merge_result == CU_MERGED) &&
+ (ME_PRISTINE_QUALITY != e_me_quality_presets))
+ {
+ ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
+ .is_node_valid = 1;
+ NULLIFY_THE_CHILDREN_NODES(
+ ps_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE));
+ }
+ else if(
+ (e_merge_result == CU_SPLIT) &&
+ (ME_PRISTINE_QUALITY == e_me_quality_presets))
+ {
+ ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]
+ .is_node_valid = 0;
+ }
+ }
+
+ /*****************************************************************/
+ /* UPDATION OF RESULT TO EXTERNAL STRUCTURES */
+ /*****************************************************************/
+ pf_ext_update_fxn((void *)ps_thrd_ctxt, (void *)ps_ctxt, i4_ctb_x, i4_ctb_y);
+
+ {
+#ifdef _DEBUG
+ S32 wd = ((i4_pic_wd - s_common_frm_prms.i4_ctb_x_off) >= 64)
+ ? 64
+ : i4_pic_wd - s_common_frm_prms.i4_ctb_x_off;
+ S32 ht = ((i4_pic_ht - s_common_frm_prms.i4_ctb_y_off) >= 64)
+ ? 64
+ : i4_pic_ht - s_common_frm_prms.i4_ctb_y_off;
+ ASSERT(
+ (wd * ht) ==
+ ihevce_compute_area_of_valid_cus_in_ctb(
+ &ps_ctxt->ps_cu_tree_curr_row[(i4_ctb_x * MAX_NUM_NODES_CU_TREE)]));
+#endif
+ }
+ }
+
+ /* set the dependency for the corresponding row in enc loop */
+ ihevce_dmgr_set_row_row_sync(
+ pv_dep_mngr_encloop_dep_me,
+ (i4_ctb_x + 1),
+ i4_ctb_y,
+ tile_col_idx /* Col Tile No. */);
+
+ left_ctb_in_diff_tile = 0;
+ }
+ }
+}
+
+/**
+********************************************************************************
+* @fn void hme_refine_no_encode(coarse_me_ctxt_t *ps_ctxt,
+* refine_layer_prms_t *ps_refine_prms)
+*
+* @brief Top level entry point for refinement ME
+*
+* @param[in,out] ps_ctxt: ME Handle
+*
+* @param[in] ps_refine_prms : refinement layer prms
+*
+* @return None
+********************************************************************************
+*/
+void hme_refine_no_encode(
+ coarse_me_ctxt_t *ps_ctxt,
+ refine_prms_t *ps_refine_prms,
+ multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
+ S32 lyr_job_type,
+ WORD32 i4_ping_pong,
+ void **ppv_dep_mngr_hme_sync)
+{
+ BLK_SIZE_T e_search_blk_size, e_result_blk_size;
+ ME_QUALITY_PRESETS_T e_me_quality_presets =
+ ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
+
+ /*************************************************************************/
+ /* Complexity of search: Low to High */
+ /*************************************************************************/
+ SEARCH_COMPLEXITY_T e_search_complexity;
+
+ /*************************************************************************/
+ /* Config parameter structures for varius ME submodules */
+ /*************************************************************************/
+ hme_search_prms_t s_search_prms_blk;
+ mvbank_update_prms_t s_mv_update_prms;
+
+ /*************************************************************************/
+ /* All types of search candidates for predictor based search. */
+ /*************************************************************************/
+ S32 num_init_candts = 0;
+ search_candt_t *ps_search_candts, as_search_candts[MAX_INIT_CANDTS];
+ search_node_t as_top_neighbours[4], as_left_neighbours[3];
+ search_node_t *ps_candt_zeromv, *ps_candt_tl, *ps_candt_tr;
+ search_node_t *ps_candt_l, *ps_candt_t;
+ search_node_t *ps_candt_prj_br[2], *ps_candt_prj_b[2], *ps_candt_prj_r[2];
+ search_node_t *ps_candt_prj_bl[2];
+ search_node_t *ps_candt_prj_tr[2], *ps_candt_prj_t[2], *ps_candt_prj_tl[2];
+ search_node_t *ps_candt_prj_coloc[2];
+
+ pf_get_wt_inp fp_get_wt_inp;
+
+ search_node_t as_unique_search_nodes[MAX_INIT_CANDTS * 9];
+ U32 au4_unique_node_map[MAP_X_MAX * 2];
+
+ /*EIID */
+ WORD32 i4_num_inter_wins = 0; //debug code to find stat of
+ WORD32 i4_num_comparisions = 0; //debug code
+ WORD32 i4_threshold_multiplier;
+ WORD32 i4_threshold_divider;
+ WORD32 i4_temporal_layer =
+ ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_temporal_lyr_id;
+
+ /*************************************************************************/
+ /* points ot the search results for the blk level search (8x8/16x16) */
+ /*************************************************************************/
+ search_results_t *ps_search_results;
+
+ /*************************************************************************/
+ /* Coordinates */
+ /*************************************************************************/
+ S32 blk_x, i4_ctb_x, blk_id_in_ctb;
+ //S32 i4_ctb_y;
+ S32 pos_x, pos_y;
+ S32 blk_id_in_full_ctb;
+ S32 i4_num_srch_cands;
+
+ S32 blk_y;
+
+ /*************************************************************************/
+ /* Related to dimensions of block being searched and pic dimensions */
+ /*************************************************************************/
+ S32 blk_wd, blk_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic;
+ S32 i4_pic_wd, i4_pic_ht, num_blks_in_this_ctb;
+ S32 num_results_prev_layer;
+
+ /*************************************************************************/
+ /* Size of a basic unit for this layer. For non encode layers, we search */
+ /* in block sizes of 8x8. For encode layers, though we search 16x16s the */
+ /* basic unit size is the ctb size. */
+ /*************************************************************************/
+ S32 unit_size;
+
+ /*************************************************************************/
+ /* Pointers to context in current and coarser layers */
+ /*************************************************************************/
+ layer_ctxt_t *ps_curr_layer, *ps_coarse_layer;
+
+ /*************************************************************************/
+ /* to store mv range per blk, and picture limit, allowed search range */
+ /* range prms in hpel and qpel units as well */
+ /*************************************************************************/
+ range_prms_t s_range_prms_inp, s_range_prms_rec;
+ range_prms_t s_pic_limit_inp, s_pic_limit_rec, as_mv_limit[MAX_NUM_REF];
+ /*************************************************************************/
+ /* These variables are used to track number of references at different */
+ /* stages of ME. */
+ /*************************************************************************/
+ S32 i4_num_ref_fpel, i4_num_ref_before_merge;
+ S32 i4_num_ref_each_dir, i, i4_num_ref_prev_layer;
+ S32 lambda_inp = ps_refine_prms->lambda_inp;
+
+ /*************************************************************************/
+ /* When a layer is implicit, it means that it searches on 1 or 2 ref idx */
+ /* Explicit means it searches on all active ref idx. */
+ /*************************************************************************/
+ S32 curr_layer_implicit, prev_layer_implicit;
+
+ /*************************************************************************/
+ /* Variables for loop counts */
+ /*************************************************************************/
+ S32 id;
+ S08 i1_ref_idx;
+
+ /*************************************************************************/
+ /* Input pointer and stride */
+ /*************************************************************************/
+ U08 *pu1_inp;
+ S32 i4_inp_stride;
+
+ S32 end_of_frame;
+
+ S32 num_sync_units_in_row;
+
+ PF_HME_PROJECT_COLOC_CANDT_FXN pf_hme_project_coloc_candt;
+ ASSERT(ps_refine_prms->i4_layer_id < ps_ctxt->num_layers - 1);
+
+ /*************************************************************************/
+ /* Pointers to current and coarse layer are needed for projection */
+ /* Pointer to prev layer are needed for other candts like coloc */
+ /*************************************************************************/
+ ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id];
+
+ ps_coarse_layer = ps_ctxt->ps_curr_descr->aps_layers[ps_refine_prms->i4_layer_id + 1];
+
+ num_results_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_mvs_per_ref;
+
+ /* Function pointer is selected based on the C vc X86 macro */
+
+ fp_get_wt_inp = ((ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list)
+ ->pf_get_wt_inp_8x8;
+
+ i4_inp_stride = ps_curr_layer->i4_inp_stride;
+ i4_pic_wd = ps_curr_layer->i4_wd;
+ i4_pic_ht = ps_curr_layer->i4_ht;
+ e_search_complexity = ps_refine_prms->e_search_complexity;
+
+ end_of_frame = 0;
+
+ /* If the previous layer is non-encode layer, then use dyadic projection */
+ if(0 == ps_ctxt->u1_encode[ps_refine_prms->i4_layer_id + 1])
+ pf_hme_project_coloc_candt = hme_project_coloc_candt_dyadic;
+ else
+ pf_hme_project_coloc_candt = hme_project_coloc_candt;
+
+ /* This points to all the initial candts */
+ ps_search_candts = &as_search_candts[0];
+
+ {
+ e_search_blk_size = BLK_8x8;
+ blk_wd = blk_ht = 8;
+ blk_size_shift = 3;
+ s_mv_update_prms.i4_shift = 0;
+ /*********************************************************************/
+ /* In case we do not encode this layer, we search 8x8 with or without*/
+ /* enable 4x4 SAD. */
+ /*********************************************************************/
+ {
+ S32 i4_mask = (ENABLE_2Nx2N);
+
+ e_result_blk_size = BLK_8x8;
+ if(ps_refine_prms->i4_enable_4x4_part)
+ {
+ i4_mask |= (ENABLE_NxN);
+ e_result_blk_size = BLK_4x4;
+ s_mv_update_prms.i4_shift = 1;
+ }
+
+ s_search_prms_blk.i4_part_mask = i4_mask;
+ }
+
+ unit_size = blk_wd;
+ s_search_prms_blk.i4_inp_stride = unit_size;
+ }
+
+ /* This is required to properly update the layer mv bank */
+ s_mv_update_prms.e_search_blk_size = e_search_blk_size;
+ s_search_prms_blk.e_blk_size = e_search_blk_size;
+
+ /*************************************************************************/
+ /* If current layer is explicit, then the number of ref frames are to */
+ /* be same as previous layer. Else it will be 2 */
+ /*************************************************************************/
+ i4_num_ref_prev_layer = ps_coarse_layer->ps_layer_mvbank->i4_num_ref;
+ if(ps_refine_prms->explicit_ref)
+ {
+ curr_layer_implicit = 0;
+ i4_num_ref_fpel = i4_num_ref_prev_layer;
+ /* 100578 : Using same mv cost fun. for all presets. */
+ s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_refine;
+ }
+ else
+ {
+ i4_num_ref_fpel = 2;
+ curr_layer_implicit = 1;
+ {
+ if(ME_MEDIUM_SPEED > e_me_quality_presets)
+ {
+ s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit;
+ }
+ else
+ {
+#if USE_MODIFIED == 1
+ s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed_modified;
+#else
+ s_search_prms_blk.pf_mv_cost_compute = compute_mv_cost_implicit_high_speed;
+#endif
+ }
+ }
+ }
+
+ i4_num_ref_fpel = MIN(i4_num_ref_fpel, i4_num_ref_prev_layer);
+ if(ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type ==
+ IV_IDR_FRAME ||
+ ps_multi_thrd_ctxt->aps_curr_inp_pre_enc[i4_ping_pong]->s_lap_out.i4_pic_type == IV_I_FRAME)
+ {
+ i4_num_ref_fpel = 1;
+ }
+ if(i4_num_ref_prev_layer <= 2)
+ {
+ prev_layer_implicit = 1;
+ curr_layer_implicit = 1;
+ i4_num_ref_each_dir = 1;
+ }
+ else
+ {
+ /* It is assumed that we have equal number of references in each dir */
+ //ASSERT(!(i4_num_ref_prev_layer & 1));
+ prev_layer_implicit = 0;
+ i4_num_ref_each_dir = i4_num_ref_prev_layer >> 1;
+ }
+ s_mv_update_prms.i4_num_ref = i4_num_ref_fpel;
+ s_mv_update_prms.i4_num_active_ref_l0 = ps_ctxt->s_frm_prms.u1_num_active_ref_l0;
+ s_mv_update_prms.i4_num_active_ref_l1 = ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
+
+ /* this can be kept to 1 or 2 */
+ i4_num_ref_before_merge = 2;
+ i4_num_ref_before_merge = MIN(i4_num_ref_before_merge, i4_num_ref_fpel);
+
+ /* Set up place holders to hold the search nodes of each initial candt */
+ for(i = 0; i < MAX_INIT_CANDTS; i++)
+ {
+ ps_search_candts[i].ps_search_node = &ps_ctxt->s_init_search_node[i];
+ INIT_SEARCH_NODE(ps_search_candts[i].ps_search_node, 0);
+ }
+
+ /* redundant, but doing it here since it is used in pred ctxt init */
+ ps_candt_zeromv = ps_search_candts[0].ps_search_node;
+ for(i = 0; i < 3; i++)
+ {
+ search_node_t *ps_search_node;
+ ps_search_node = &as_left_neighbours[i];
+ INIT_SEARCH_NODE(ps_search_node, 0);
+ ps_search_node = &as_top_neighbours[i];
+ INIT_SEARCH_NODE(ps_search_node, 0);
+ }
+
+ INIT_SEARCH_NODE(&as_top_neighbours[3], 0);
+ /* bottom left node always not available for the blk being searched */
+ as_left_neighbours[2].u1_is_avail = 0;
+ /*************************************************************************/
+ /* Initialize all the search results structure here. We update all the */
+ /* search results to default values, and configure things like blk sizes */
+ /*************************************************************************/
+ if(ps_refine_prms->i4_encode == 0)
+ {
+ S32 pred_lx;
+ search_results_t *ps_search_results;
+
+ ps_search_results = &ps_ctxt->s_search_results_8x8;
+ hme_init_search_results(
+ ps_search_results,
+ i4_num_ref_fpel,
+ ps_refine_prms->i4_num_fpel_results,
+ ps_refine_prms->i4_num_results_per_part,
+ e_search_blk_size,
+ 0,
+ 0,
+ &ps_ctxt->au1_is_past[0]);
+ for(pred_lx = 0; pred_lx < 2; pred_lx++)
+ {
+ hme_init_pred_ctxt_no_encode(
+ &ps_search_results->as_pred_ctxt[pred_lx],
+ ps_search_results,
+ &as_top_neighbours[0],
+ &as_left_neighbours[0],
+ &ps_candt_prj_coloc[0],
+ ps_candt_zeromv,
+ ps_candt_zeromv,
+ pred_lx,
+ lambda_inp,
+ ps_refine_prms->lambda_q_shift,
+ &ps_ctxt->apu1_ref_bits_tlu_lc[0],
+ &ps_ctxt->ai2_ref_scf[0]);
+ }
+ }
+
+ /*********************************************************************/
+ /* Initialize the dyn. search range params. for each reference index */
+ /* in current layer ctxt */
+ /*********************************************************************/
+ /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
+ if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
+ {
+ WORD32 ref_ctr;
+
+ for(ref_ctr = 0; ref_ctr < s_mv_update_prms.i4_num_ref; ref_ctr++)
+ {
+ INIT_DYN_SEARCH_PRMS(
+ &ps_ctxt->s_coarse_dyn_range_prms
+ .as_dyn_range_prms[ps_refine_prms->i4_layer_id][ref_ctr],
+ ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr]);
+ }
+ }
+
+ /* Next set up initial candidates according to a given set of rules. */
+ /* The number of initial candidates affects the quality of ME in the */
+ /* case of motion with multiple degrees of freedom. In case of simple */
+ /* translational motion, a current and a few causal and non causal */
+ /* candts would suffice. More candidates help to cover more complex */
+ /* cases like partitions, rotation/zoom, occlusion in/out, fine motion */
+ /* where multiple ref helps etc. */
+ /* The candidate choice also depends on the following parameters. */
+ /* e_search_complexity: SRCH_CX_LOW, SRCH_CX_MED, SRCH_CX_HIGH */
+ /* Whether we encode or not, and the type of search across reference */
+ /* i.e. the previous layer may have been explicit/implicit and curr */
+ /* layer may be explicit/implicit */
+
+ /* 0, 0, L, T, projected coloc best always presnt by default */
+ id = hme_decide_search_candidate_priority_in_l1_and_l2_me(ZERO_MV, e_me_quality_presets);
+ ps_candt_zeromv = ps_search_candts[id].ps_search_node;
+ ps_search_candts[id].u1_num_steps_refine = 0;
+ ps_candt_zeromv->s_mv.i2_mvx = 0;
+ ps_candt_zeromv->s_mv.i2_mvy = 0;
+
+ id = hme_decide_search_candidate_priority_in_l1_and_l2_me(SPATIAL_LEFT0, e_me_quality_presets);
+ ps_candt_l = ps_search_candts[id].ps_search_node;
+ ps_search_candts[id].u1_num_steps_refine = 0;
+
+ /* Even in ME_HIGH_SPEED mode, in layer 0, blocks */
+ /* not at the CTB boundary use the causal T and */
+ /* not the projected T, although the candidate is */
+ /* still pointed to by ps_candt_prj_t[0] */
+ if(ME_MEDIUM_SPEED <= e_me_quality_presets)
+ {
+ /* Using Projected top to eliminate sync */
+ id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
+ PROJECTED_TOP0, e_me_quality_presets);
+ ps_candt_prj_t[0] = ps_search_candts[id].ps_search_node;
+ ps_search_candts[id].u1_num_steps_refine = 1;
+ }
+ else
+ {
+ id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
+ SPATIAL_TOP0, e_me_quality_presets);
+ ps_candt_t = ps_search_candts[id].ps_search_node;
+ ps_search_candts[id].u1_num_steps_refine = 0;
+ }
+
+ id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
+ PROJECTED_COLOC0, e_me_quality_presets);
+ ps_candt_prj_coloc[0] = ps_search_candts[id].ps_search_node;
+ ps_search_candts[id].u1_num_steps_refine = 1;
+
+ id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
+ PROJECTED_COLOC1, e_me_quality_presets);
+ ps_candt_prj_coloc[1] = ps_search_candts[id].ps_search_node;
+ ps_search_candts[id].u1_num_steps_refine = 1;
+
+ if(ME_MEDIUM_SPEED <= e_me_quality_presets)
+ {
+ id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
+ PROJECTED_TOP_RIGHT0, e_me_quality_presets);
+ ps_candt_prj_tr[0] = ps_search_candts[id].ps_search_node;
+ ps_search_candts[id].u1_num_steps_refine = 1;
+
+ id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
+ PROJECTED_TOP_LEFT0, e_me_quality_presets);
+ ps_candt_prj_tl[0] = ps_search_candts[id].ps_search_node;
+ ps_search_candts[id].u1_num_steps_refine = 1;
+ }
+ else
+ {
+ id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
+ SPATIAL_TOP_RIGHT0, e_me_quality_presets);
+ ps_candt_tr = ps_search_candts[id].ps_search_node;
+ ps_search_candts[id].u1_num_steps_refine = 0;
+
+ id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
+ SPATIAL_TOP_LEFT0, e_me_quality_presets);
+ ps_candt_tl = ps_search_candts[id].ps_search_node;
+ ps_search_candts[id].u1_num_steps_refine = 0;
+ }
+
+ id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
+ PROJECTED_RIGHT0, e_me_quality_presets);
+ ps_candt_prj_r[0] = ps_search_candts[id].ps_search_node;
+ ps_search_candts[id].u1_num_steps_refine = 1;
+
+ id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
+ PROJECTED_BOTTOM0, e_me_quality_presets);
+ ps_candt_prj_b[0] = ps_search_candts[id].ps_search_node;
+ ps_search_candts[id].u1_num_steps_refine = 1;
+
+ id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
+ PROJECTED_BOTTOM_RIGHT0, e_me_quality_presets);
+ ps_candt_prj_br[0] = ps_search_candts[id].ps_search_node;
+ ps_search_candts[id].u1_num_steps_refine = 1;
+
+ id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
+ PROJECTED_BOTTOM_LEFT0, e_me_quality_presets);
+ ps_candt_prj_bl[0] = ps_search_candts[id].ps_search_node;
+ ps_search_candts[id].u1_num_steps_refine = 1;
+
+ id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
+ PROJECTED_RIGHT1, e_me_quality_presets);
+ ps_candt_prj_r[1] = ps_search_candts[id].ps_search_node;
+ ps_search_candts[id].u1_num_steps_refine = 1;
+
+ id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
+ PROJECTED_BOTTOM1, e_me_quality_presets);
+ ps_candt_prj_b[1] = ps_search_candts[id].ps_search_node;
+ ps_search_candts[id].u1_num_steps_refine = 1;
+
+ id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
+ PROJECTED_BOTTOM_RIGHT1, e_me_quality_presets);
+ ps_candt_prj_br[1] = ps_search_candts[id].ps_search_node;
+ ps_search_candts[id].u1_num_steps_refine = 1;
+
+ id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
+ PROJECTED_BOTTOM_LEFT1, e_me_quality_presets);
+ ps_candt_prj_bl[1] = ps_search_candts[id].ps_search_node;
+ ps_search_candts[id].u1_num_steps_refine = 1;
+
+ id = hme_decide_search_candidate_priority_in_l1_and_l2_me(PROJECTED_TOP1, e_me_quality_presets);
+ ps_candt_prj_t[1] = ps_search_candts[id].ps_search_node;
+ ps_search_candts[id].u1_num_steps_refine = 1;
+
+ id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
+ PROJECTED_TOP_RIGHT1, e_me_quality_presets);
+ ps_candt_prj_tr[1] = ps_search_candts[id].ps_search_node;
+ ps_search_candts[id].u1_num_steps_refine = 1;
+
+ id = hme_decide_search_candidate_priority_in_l1_and_l2_me(
+ PROJECTED_TOP_LEFT1, e_me_quality_presets);
+ ps_candt_prj_tl[1] = ps_search_candts[id].ps_search_node;
+ ps_search_candts[id].u1_num_steps_refine = 1;
+
+ /*************************************************************************/
+ /* Now that the candidates have been ordered, to choose the right number */
+ /* of initial candidates. */
+ /*************************************************************************/
+ if(curr_layer_implicit && !prev_layer_implicit)
+ {
+ if(e_search_complexity == SEARCH_CX_LOW)
+ num_init_candts = 7;
+ else if(e_search_complexity == SEARCH_CX_MED)
+ num_init_candts = 13;
+ else if(e_search_complexity == SEARCH_CX_HIGH)
+ num_init_candts = 18;
+ else
+ ASSERT(0);
+ }
+ else
+ {
+ if(e_search_complexity == SEARCH_CX_LOW)
+ num_init_candts = 5;
+ else if(e_search_complexity == SEARCH_CX_MED)
+ num_init_candts = 11;
+ else if(e_search_complexity == SEARCH_CX_HIGH)
+ num_init_candts = 16;
+ else
+ ASSERT(0);
+ }
+
+ if(ME_XTREME_SPEED_25 == e_me_quality_presets)
+ {
+ num_init_candts = NUM_INIT_SEARCH_CANDS_IN_L1_AND_L2_ME_IN_XS25;
+ }
+
+ /*************************************************************************/
+ /* The following search parameters are fixed throughout the search across*/
+ /* all blks. So these are configured outside processing loop */
+ /*************************************************************************/
+ s_search_prms_blk.i4_num_init_candts = num_init_candts;
+ s_search_prms_blk.i4_start_step = 1;
+ s_search_prms_blk.i4_use_satd = 0;
+ s_search_prms_blk.i4_num_steps_post_refine = ps_refine_prms->i4_num_steps_post_refine_fpel;
+ /* we use recon only for encoded layers, otherwise it is not available */
+ s_search_prms_blk.i4_use_rec = ps_refine_prms->i4_encode & ps_refine_prms->i4_use_rec_in_fpel;
+
+ s_search_prms_blk.ps_search_candts = ps_search_candts;
+ /* We use the same mv_range for all ref. pic. So assign to member 0 */
+ if(s_search_prms_blk.i4_use_rec)
+ s_search_prms_blk.aps_mv_range[0] = &s_range_prms_rec;
+ else
+ s_search_prms_blk.aps_mv_range[0] = &s_range_prms_inp;
+ /*************************************************************************/
+ /* Initialize coordinates. Meaning as follows */
+ /* blk_x : x coordinate of the 16x16 blk, in terms of number of blks */
+ /* blk_y : same as above, y coord. */
+ /* num_blks_in_this_ctb : number of blks in this given ctb that starts */
+ /* at i4_ctb_x, i4_ctb_y. This may not be 16 at picture boundaries. */
+ /* i4_ctb_x, i4_ctb_y: pixel coordinate of the ctb realtive to top left */
+ /* corner of the picture. Always multiple of 64. */
+ /* blk_id_in_ctb : encode order id of the blk in the ctb. */
+ /*************************************************************************/
+ blk_y = 0;
+ blk_id_in_ctb = 0;
+
+ GET_NUM_BLKS_IN_PIC(i4_pic_wd, i4_pic_ht, blk_size_shift, num_blks_in_row, num_blks_in_pic);
+
+ /* Get the number of sync units in a row based on encode/non enocde layer */
+ num_sync_units_in_row = num_blks_in_row;
+
+ /*************************************************************************/
+ /* Picture limit on all 4 sides. This will be used to set mv limits for */
+ /* every block given its coordinate. Note thsi assumes that the min amt */
+ /* of padding to right of pic is equal to the blk size. If we go all the */
+ /* way upto 64x64, then the min padding on right size of picture should */
+ /* be 64, and also on bottom side of picture. */
+ /*************************************************************************/
+ SET_PIC_LIMIT(
+ s_pic_limit_inp,
+ ps_curr_layer->i4_pad_x_inp,
+ ps_curr_layer->i4_pad_y_inp,
+ ps_curr_layer->i4_wd,
+ ps_curr_layer->i4_ht,
+ s_search_prms_blk.i4_num_steps_post_refine);
+
+ SET_PIC_LIMIT(
+ s_pic_limit_rec,
+ ps_curr_layer->i4_pad_x_rec,
+ ps_curr_layer->i4_pad_y_rec,
+ ps_curr_layer->i4_wd,
+ ps_curr_layer->i4_ht,
+ s_search_prms_blk.i4_num_steps_post_refine);
+
+ /*************************************************************************/
+ /* set the MV limit per ref. pic. */
+ /* - P pic. : Based on the config params. */
+ /* - B/b pic: Based on the Max/Min MV from prev. P and config. param. */
+ /*************************************************************************/
+ {
+ WORD32 ref_ctr;
+ /* Only for B/b pic. */
+ if(1 == ps_ctxt->s_frm_prms.bidir_enabled)
+ {
+ WORD16 i2_mv_y_per_poc, i2_max_mv_y;
+ WORD32 cur_poc, ref_poc, abs_poc_diff;
+
+ cur_poc = ps_ctxt->i4_curr_poc;
+
+ /* Get abs MAX for symmetric search */
+ i2_mv_y_per_poc = MAX(
+ ps_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[ps_refine_prms->i4_layer_id],
+ (ABS(ps_ctxt->s_coarse_dyn_range_prms
+ .i2_dyn_min_y_per_poc[ps_refine_prms->i4_layer_id])));
+
+ for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
+ {
+ ref_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr];
+ abs_poc_diff = ABS((cur_poc - ref_poc));
+ /* Get the cur. max MV based on POC distance */
+ i2_max_mv_y = i2_mv_y_per_poc * abs_poc_diff;
+ i2_max_mv_y = MIN(i2_max_mv_y, ps_curr_layer->i2_max_mv_y);
+
+ as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
+ as_mv_limit[ref_ctr].i2_min_y = -i2_max_mv_y;
+ as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
+ as_mv_limit[ref_ctr].i2_max_y = i2_max_mv_y;
+ }
+ }
+ else
+ {
+ /* Set the Config. File Params for P pic. */
+ for(ref_ctr = 0; ref_ctr < i4_num_ref_fpel; ref_ctr++)
+ {
+ as_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
+ as_mv_limit[ref_ctr].i2_min_y = -ps_curr_layer->i2_max_mv_y;
+ as_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
+ as_mv_limit[ref_ctr].i2_max_y = ps_curr_layer->i2_max_mv_y;
+ }
+ }
+ }
+
+ /* EIID: Calculate threshold based on quality preset and/or temporal layers */
+ if(e_me_quality_presets == ME_MEDIUM_SPEED)
+ {
+ i4_threshold_multiplier = 1;
+ i4_threshold_divider = 4;
+ }
+ else if(e_me_quality_presets == ME_HIGH_SPEED)
+ {
+ i4_threshold_multiplier = 1;
+ i4_threshold_divider = 2;
+ }
+ else if((e_me_quality_presets == ME_XTREME_SPEED) || (e_me_quality_presets == ME_XTREME_SPEED_25))
+ {
+#if OLD_XTREME_SPEED
+ /* Hard coding the temporal ID value to 1, if it is older xtreme speed */
+ i4_temporal_layer = 1;
+#endif
+ if(i4_temporal_layer == 0)
+ {
+ i4_threshold_multiplier = 3;
+ i4_threshold_divider = 4;
+ }
+ else if(i4_temporal_layer == 1)
+ {
+ i4_threshold_multiplier = 3;
+ i4_threshold_divider = 4;
+ }
+ else if(i4_temporal_layer == 2)
+ {
+ i4_threshold_multiplier = 1;
+ i4_threshold_divider = 1;
+ }
+ else
+ {
+ i4_threshold_multiplier = 5;
+ i4_threshold_divider = 4;
+ }
+ }
+ else if(e_me_quality_presets == ME_HIGH_QUALITY)
+ {
+ i4_threshold_multiplier = 1;
+ i4_threshold_divider = 1;
+ }
+
+ /*************************************************************************/
+ /*************************************************************************/
+ /*************************************************************************/
+ /* START OF THE CORE LOOP */
+ /* If Encode is 0, then we just loop over each blk */
+ /*************************************************************************/
+ /*************************************************************************/
+ /*************************************************************************/
+ while(0 == end_of_frame)
+ {
+ job_queue_t *ps_job;
+ ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_row; //EIID
+ WORD32 i4_ctb_row_ctr; //counter to calculate CTB row counter. It's (row_ctr /4)
+ WORD32 i4_num_ctbs_in_row = (num_blks_in_row + 3) / 4; //calculations verified for L1 only
+ //+3 to get ceil values when divided by 4
+ WORD32 i4_num_4x4_blocks_in_ctb_at_l1 =
+ 8 * 8; //considering CTB size 32x32 at L1. hardcoded for now
+ //if there is variable for ctb size use that and this variable can be derived
+ WORD32 offset_val, check_dep_pos, set_dep_pos;
+ void *pv_hme_dep_mngr;
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_row;
+
+ /* Get the current layer HME Dep Mngr */
+ /* Note : Use layer_id - 1 in HME layers */
+
+ pv_hme_dep_mngr = ppv_dep_mngr_hme_sync[ps_refine_prms->i4_layer_id - 1];
+
+ /* Get the current row from the job queue */
+ ps_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job(
+ ps_multi_thrd_ctxt, lyr_job_type, 1, i4_ping_pong);
+
+ /* If all rows are done, set the end of process flag to 1, */
+ /* and the current row to -1 */
+ if(NULL == ps_job)
+ {
+ blk_y = -1;
+ end_of_frame = 1;
+
+ continue;
+ }
+
+ if(1 == ps_ctxt->s_frm_prms.is_i_pic)
+ {
+ /* set the output dependency of current row */
+ ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
+ continue;
+ }
+
+ blk_y = ps_job->s_job_info.s_me_job_info.i4_vert_unit_row_no;
+ blk_x = 0;
+ i4_ctb_x = 0;
+
+ /* wait for Corresponding Pre intra Job to be completed */
+ if(1 == ps_refine_prms->i4_layer_id)
+ {
+ volatile UWORD32 i4_l1_done;
+ volatile UWORD32 *pi4_l1_done;
+ pi4_l1_done = (volatile UWORD32 *)&ps_multi_thrd_ctxt
+ ->aai4_l1_pre_intra_done[i4_ping_pong][blk_y >> 2];
+ i4_l1_done = *pi4_l1_done;
+ while(!i4_l1_done)
+ {
+ i4_l1_done = *pi4_l1_done;
+ }
+ }
+ /* Set Variables for Dep. Checking and Setting */
+ set_dep_pos = blk_y + 1;
+ if(blk_y > 0)
+ {
+ offset_val = 2;
+ check_dep_pos = blk_y - 1;
+ }
+ else
+ {
+ /* First row should run without waiting */
+ offset_val = -1;
+ check_dep_pos = 0;
+ }
+
+ /* EIID: calculate ed_blk_ctxt pointer for current row */
+ /* valid for only layer-1. not varified and used for other layers */
+ i4_ctb_row_ctr = blk_y / 4;
+ ps_ed_blk_ctxt_curr_row =
+ ps_ctxt->ps_ed_blk + (i4_ctb_row_ctr * i4_num_ctbs_in_row *
+ i4_num_4x4_blocks_in_ctb_at_l1); //valid for L1 only
+ ps_ed_ctb_l1_row = ps_ctxt->ps_ed_ctb_l1 + (i4_ctb_row_ctr * i4_num_ctbs_in_row);
+
+ /* if non-encode layer then i4_ctb_x will be same as blk_x */
+ /* loop over all the units is a row */
+ for(; i4_ctb_x < num_sync_units_in_row; i4_ctb_x++)
+ {
+ ihevce_ed_blk_t *ps_ed_blk_ctxt_curr_ctb; //EIDD
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_curr;
+ WORD32 i4_ctb_blk_ctr = i4_ctb_x / 4;
+
+ /* Wait till top row block is processed */
+ /* Currently checking till top right block*/
+
+ /* Disabled since all candidates, except for */
+ /* L and C, are projected from the coarser layer, */
+ /* only in ME_HIGH_SPEED mode */
+ if((ME_MEDIUM_SPEED > e_me_quality_presets))
+ {
+ if(i4_ctb_x < (num_sync_units_in_row - 1))
+ {
+ ihevce_dmgr_chk_row_row_sync(
+ pv_hme_dep_mngr,
+ i4_ctb_x,
+ offset_val,
+ check_dep_pos,
+ 0, /* Col Tile No. : Not supported in PreEnc*/
+ ps_ctxt->thrd_id);
+ }
+ }
+
+ {
+ /* for non encoder layer only one block is processed */
+ num_blks_in_this_ctb = 1;
+ }
+
+ /* EIID: derive ed_ctxt ptr for current CTB */
+ ps_ed_blk_ctxt_curr_ctb =
+ ps_ed_blk_ctxt_curr_row +
+ (i4_ctb_blk_ctr *
+ i4_num_4x4_blocks_in_ctb_at_l1); //currently valid for l1 layer only
+ ps_ed_ctb_l1_curr = ps_ed_ctb_l1_row + i4_ctb_blk_ctr;
+
+ /* loop over all the blocks in CTB will always be 1 */
+ for(blk_id_in_ctb = 0; blk_id_in_ctb < num_blks_in_this_ctb; blk_id_in_ctb++)
+ {
+ {
+ /* non encode layer */
+ blk_x = i4_ctb_x;
+ blk_id_in_full_ctb = 0;
+ s_search_prms_blk.i4_cu_x_off = s_search_prms_blk.i4_cu_y_off = 0;
+ }
+
+ /* get the current input blk point */
+ pos_x = blk_x << blk_size_shift;
+ pos_y = blk_y << blk_size_shift;
+ pu1_inp = ps_curr_layer->pu1_inp + pos_x + (pos_y * i4_inp_stride);
+
+ /*********************************************************************/
+ /* replicate the inp buffer at blk or ctb level for each ref id, */
+ /* Instead of searching with wk * ref(k), we search with Ik = I / wk */
+ /* thereby avoiding a bloat up of memory. If we did all references */
+ /* weighted pred, we will end up with a duplicate copy of each ref */
+ /* at each layer, since we need to preserve the original reference. */
+ /* ToDo: Need to observe performance with this mechanism and compare */
+ /* with case where ref is weighted. */
+ /*********************************************************************/
+ if(blk_id_in_ctb == 0)
+ {
+ fp_get_wt_inp(
+ ps_curr_layer,
+ &ps_ctxt->s_wt_pred,
+ unit_size,
+ pos_x,
+ pos_y,
+ unit_size,
+ ps_ctxt->num_ref_future + ps_ctxt->num_ref_past,
+ ps_ctxt->i4_wt_pred_enable_flag);
+ }
+
+ s_search_prms_blk.i4_x_off = blk_x << blk_size_shift;
+ s_search_prms_blk.i4_y_off = blk_y << blk_size_shift;
+ /* Select search results from a suitable search result in the context */
+ {
+ ps_search_results = &ps_ctxt->s_search_results_8x8;
+ }
+
+ s_search_prms_blk.ps_search_results = ps_search_results;
+
+ /* RESET ALL SEARCH RESULTS FOR THE NEW BLK */
+ hme_reset_search_results(
+ ps_search_results, s_search_prms_blk.i4_part_mask, MV_RES_FPEL);
+
+ /* Loop across different Ref IDx */
+ for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref_fpel; i1_ref_idx++)
+ {
+ S32 next_blk_offset = (e_search_blk_size == BLK_16x16) ? 22 : 12;
+ S32 prev_blk_offset = 6;
+ S32 resultid;
+
+ /*********************************************************************/
+ /* For every blk in the picture, the search range needs to be derived*/
+ /* Any blk can have any mv, but practical search constraints are */
+ /* imposed by the picture boundary and amt of padding. */
+ /*********************************************************************/
+ /* MV limit is different based on ref. PIC */
+ hme_derive_search_range(
+ &s_range_prms_inp,
+ &s_pic_limit_inp,
+ &as_mv_limit[i1_ref_idx],
+ pos_x,
+ pos_y,
+ blk_wd,
+ blk_ht);
+ hme_derive_search_range(
+ &s_range_prms_rec,
+ &s_pic_limit_rec,
+ &as_mv_limit[i1_ref_idx],
+ pos_x,
+ pos_y,
+ blk_wd,
+ blk_ht);
+
+ s_search_prms_blk.i1_ref_idx = i1_ref_idx;
+ ps_candt_zeromv->i1_ref_idx = i1_ref_idx;
+
+ i4_num_srch_cands = 1;
+
+ if(1 != ps_refine_prms->i4_layer_id)
+ {
+ S32 x, y;
+ x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
+ y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
+
+ if(ME_MEDIUM_SPEED > e_me_quality_presets)
+ {
+ hme_get_spatial_candt(
+ ps_curr_layer,
+ e_search_blk_size,
+ blk_x,
+ blk_y,
+ i1_ref_idx,
+ &as_top_neighbours[0],
+ &as_left_neighbours[0],
+ 0,
+ ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
+ 0,
+ ps_refine_prms->i4_encode);
+
+ *ps_candt_tr = as_top_neighbours[3];
+ *ps_candt_t = as_top_neighbours[1];
+ *ps_candt_tl = as_top_neighbours[0];
+ i4_num_srch_cands += 3;
+ }
+ else
+ {
+ layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
+ S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
+ S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
+ search_node_t *ps_search_node;
+ S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
+ hme_mv_t *ps_mv, *ps_mv_base;
+ S08 *pi1_ref_idx, *pi1_ref_idx_base;
+ S32 jump = 1, mvs_in_blk, mvs_in_row;
+ S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);
+
+ if(i4_blk_size1 != i4_blk_size2)
+ {
+ blk_x_temp <<= 1;
+ blk_y_temp <<= 1;
+ jump = 2;
+ if((i4_blk_size1 << 2) == i4_blk_size2)
+ {
+ blk_x_temp <<= 1;
+ blk_y_temp <<= 1;
+ jump = 4;
+ }
+ }
+
+ mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
+ mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
+
+ /* Adjust teh blk coord to point to top left locn */
+ blk_x_temp -= 1;
+ blk_y_temp -= 1;
+
+ /* Pick up the mvs from the location */
+ i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
+ i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);
+
+ ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
+ pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
+
+ ps_mv += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);
+ pi1_ref_idx += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref);
+
+ ps_mv_base = ps_mv;
+ pi1_ref_idx_base = pi1_ref_idx;
+
+ ps_search_node = &as_left_neighbours[0];
+ ps_mv = ps_mv_base + mvs_in_row;
+ pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
+ COPY_MV_TO_SEARCH_NODE(
+ ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
+
+ i4_num_srch_cands++;
+ }
+ }
+ else
+ {
+ S32 x, y;
+ x = gau1_encode_to_raster_x[blk_id_in_full_ctb];
+ y = gau1_encode_to_raster_y[blk_id_in_full_ctb];
+
+ if(ME_MEDIUM_SPEED > e_me_quality_presets)
+ {
+ hme_get_spatial_candt_in_l1_me(
+ ps_curr_layer,
+ e_search_blk_size,
+ blk_x,
+ blk_y,
+ i1_ref_idx,
+ !ps_search_results->pu1_is_past[i1_ref_idx],
+ &as_top_neighbours[0],
+ &as_left_neighbours[0],
+ 0,
+ ((ps_refine_prms->i4_encode) ? gau1_cu_tr_valid[y][x] : 1),
+ 0,
+ ps_ctxt->s_frm_prms.u1_num_active_ref_l0,
+ ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
+
+ *ps_candt_tr = as_top_neighbours[3];
+ *ps_candt_t = as_top_neighbours[1];
+ *ps_candt_tl = as_top_neighbours[0];
+
+ i4_num_srch_cands += 3;
+ }
+ else
+ {
+ layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
+ S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
+ S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
+ S32 i4_mv_pos_in_implicit_array;
+ search_node_t *ps_search_node;
+ S32 i4_offset, blk_x_temp = blk_x, blk_y_temp = blk_y;
+ hme_mv_t *ps_mv, *ps_mv_base;
+ S08 *pi1_ref_idx, *pi1_ref_idx_base;
+ S32 jump = 1, mvs_in_blk, mvs_in_row;
+ S32 shift = (ps_refine_prms->i4_encode ? 2 : 0);
+ U08 u1_pred_dir = !ps_search_results->pu1_is_past[i1_ref_idx];
+ S32 i4_num_results_in_given_dir =
+ ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
+ ps_ctxt->s_frm_prms.u1_num_active_ref_l1)
+ : (ps_layer_mvbank->i4_num_mvs_per_ref *
+ ps_ctxt->s_frm_prms.u1_num_active_ref_l0));
+
+ if(i4_blk_size1 != i4_blk_size2)
+ {
+ blk_x_temp <<= 1;
+ blk_y_temp <<= 1;
+ jump = 2;
+ if((i4_blk_size1 << 2) == i4_blk_size2)
+ {
+ blk_x_temp <<= 1;
+ blk_y_temp <<= 1;
+ jump = 4;
+ }
+ }
+
+ mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
+ mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
+
+ /* Adjust teh blk coord to point to top left locn */
+ blk_x_temp -= 1;
+ blk_y_temp -= 1;
+
+ /* Pick up the mvs from the location */
+ i4_offset = (blk_x_temp * ps_layer_mvbank->i4_num_mvs_per_blk);
+ i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y_temp);
+
+ i4_offset +=
+ ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref *
+ ps_ctxt->s_frm_prms.u1_num_active_ref_l0)
+ : 0);
+
+ ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
+ pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
+
+ ps_mv_base = ps_mv;
+ pi1_ref_idx_base = pi1_ref_idx;
+
+ {
+ /* ps_mv and pi1_ref_idx now point to the top left locn */
+ ps_search_node = &as_left_neighbours[0];
+ ps_mv = ps_mv_base + mvs_in_row;
+ pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
+
+ i4_mv_pos_in_implicit_array =
+ hme_find_pos_of_implicitly_stored_ref_id(
+ pi1_ref_idx, i1_ref_idx, 0, i4_num_results_in_given_dir);
+
+ if(-1 != i4_mv_pos_in_implicit_array)
+ {
+ COPY_MV_TO_SEARCH_NODE(
+ ps_search_node,
+ &ps_mv[i4_mv_pos_in_implicit_array],
+ &pi1_ref_idx[i4_mv_pos_in_implicit_array],
+ i1_ref_idx,
+ shift);
+ }
+ else
+ {
+ ps_search_node->u1_is_avail = 0;
+ ps_search_node->s_mv.i2_mvx = 0;
+ ps_search_node->s_mv.i2_mvy = 0;
+ ps_search_node->i1_ref_idx = i1_ref_idx;
+ }
+
+ i4_num_srch_cands++;
+ }
+ }
+ }
+
+ *ps_candt_l = as_left_neighbours[0];
+
+ /* when 16x16 is searched in an encode layer, and the prev layer */
+ /* stores results for 4x4 blks, we project 5 candts corresponding */
+ /* to (2,2), (2,14), (14,2), 14,14) and 2nd best of (2,2) */
+ /* However in other cases, only 2,2 best and 2nd best reqd */
+ resultid = 0;
+ pf_hme_project_coloc_candt(
+ ps_candt_prj_coloc[0],
+ ps_curr_layer,
+ ps_coarse_layer,
+ pos_x + 2,
+ pos_y + 2,
+ i1_ref_idx,
+ resultid);
+
+ i4_num_srch_cands++;
+
+ resultid = 1;
+ if(num_results_prev_layer > 1)
+ {
+ pf_hme_project_coloc_candt(
+ ps_candt_prj_coloc[1],
+ ps_curr_layer,
+ ps_coarse_layer,
+ pos_x + 2,
+ pos_y + 2,
+ i1_ref_idx,
+ resultid);
+
+ i4_num_srch_cands++;
+ }
+
+ resultid = 0;
+
+ if(ME_MEDIUM_SPEED <= e_me_quality_presets)
+ {
+ pf_hme_project_coloc_candt(
+ ps_candt_prj_t[0],
+ ps_curr_layer,
+ ps_coarse_layer,
+ pos_x,
+ pos_y - prev_blk_offset,
+ i1_ref_idx,
+ resultid);
+
+ i4_num_srch_cands++;
+ }
+
+ {
+ pf_hme_project_coloc_candt(
+ ps_candt_prj_br[0],
+ ps_curr_layer,
+ ps_coarse_layer,
+ pos_x + next_blk_offset,
+ pos_y + next_blk_offset,
+ i1_ref_idx,
+ resultid);
+ pf_hme_project_coloc_candt(
+ ps_candt_prj_bl[0],
+ ps_curr_layer,
+ ps_coarse_layer,
+ pos_x - prev_blk_offset,
+ pos_y + next_blk_offset,
+ i1_ref_idx,
+ resultid);
+ pf_hme_project_coloc_candt(
+ ps_candt_prj_r[0],
+ ps_curr_layer,
+ ps_coarse_layer,
+ pos_x + next_blk_offset,
+ pos_y,
+ i1_ref_idx,
+ resultid);
+ pf_hme_project_coloc_candt(
+ ps_candt_prj_b[0],
+ ps_curr_layer,
+ ps_coarse_layer,
+ pos_x,
+ pos_y + next_blk_offset,
+ i1_ref_idx,
+ resultid);
+
+ i4_num_srch_cands += 4;
+
+ if(ME_MEDIUM_SPEED <= e_me_quality_presets)
+ {
+ pf_hme_project_coloc_candt(
+ ps_candt_prj_tr[0],
+ ps_curr_layer,
+ ps_coarse_layer,
+ pos_x + next_blk_offset,
+ pos_y - prev_blk_offset,
+ i1_ref_idx,
+ resultid);
+ pf_hme_project_coloc_candt(
+ ps_candt_prj_tl[0],
+ ps_curr_layer,
+ ps_coarse_layer,
+ pos_x - prev_blk_offset,
+ pos_y - prev_blk_offset,
+ i1_ref_idx,
+ resultid);
+
+ i4_num_srch_cands += 2;
+ }
+ }
+ if((num_results_prev_layer > 1) && (e_search_complexity >= SEARCH_CX_MED))
+ {
+ resultid = 1;
+ pf_hme_project_coloc_candt(
+ ps_candt_prj_br[1],
+ ps_curr_layer,
+ ps_coarse_layer,
+ pos_x + next_blk_offset,
+ pos_y + next_blk_offset,
+ i1_ref_idx,
+ resultid);
+ pf_hme_project_coloc_candt(
+ ps_candt_prj_bl[1],
+ ps_curr_layer,
+ ps_coarse_layer,
+ pos_x - prev_blk_offset,
+ pos_y + next_blk_offset,
+ i1_ref_idx,
+ resultid);
+ pf_hme_project_coloc_candt(
+ ps_candt_prj_r[1],
+ ps_curr_layer,
+ ps_coarse_layer,
+ pos_x + next_blk_offset,
+ pos_y,
+ i1_ref_idx,
+ resultid);
+ pf_hme_project_coloc_candt(
+ ps_candt_prj_b[1],
+ ps_curr_layer,
+ ps_coarse_layer,
+ pos_x,
+ pos_y + next_blk_offset,
+ i1_ref_idx,
+ resultid);
+
+ i4_num_srch_cands += 4;
+
+ pf_hme_project_coloc_candt(
+ ps_candt_prj_tr[1],
+ ps_curr_layer,
+ ps_coarse_layer,
+ pos_x + next_blk_offset,
+ pos_y - prev_blk_offset,
+ i1_ref_idx,
+ resultid);
+ pf_hme_project_coloc_candt(
+ ps_candt_prj_tl[1],
+ ps_curr_layer,
+ ps_coarse_layer,
+ pos_x - prev_blk_offset,
+ pos_y - prev_blk_offset,
+ i1_ref_idx,
+ resultid);
+ pf_hme_project_coloc_candt(
+ ps_candt_prj_t[1],
+ ps_curr_layer,
+ ps_coarse_layer,
+ pos_x,
+ pos_y - prev_blk_offset,
+ i1_ref_idx,
+ resultid);
+
+ i4_num_srch_cands += 3;
+ }
+
+ /* Note this block also clips the MV range for all candidates */
+#ifdef _DEBUG
+ {
+ S32 candt;
+ range_prms_t *ps_range_prms;
+
+ S32 num_ref_valid = ps_ctxt->num_ref_future + ps_ctxt->num_ref_past;
+ for(candt = 0; candt < i4_num_srch_cands; candt++)
+ {
+ search_node_t *ps_search_node;
+
+ ps_search_node =
+ s_search_prms_blk.ps_search_candts[candt].ps_search_node;
+
+ ps_range_prms = s_search_prms_blk.aps_mv_range[0];
+
+ if((ps_search_node->i1_ref_idx >= num_ref_valid) ||
+ (ps_search_node->i1_ref_idx < 0))
+ {
+ ASSERT(0);
+ }
+ }
+ }
+#endif
+
+ {
+ S32 srch_cand;
+ S32 num_unique_nodes = 0;
+ S32 num_nodes_searched = 0;
+ S32 num_best_cand = 0;
+ S08 i1_grid_enable = 0;
+ search_node_t as_best_two_proj_node[TOT_NUM_PARTS * 2];
+ /* has list of valid partition to search terminated by -1 */
+ S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
+ S32 center_x;
+ S32 center_y;
+
+ /* indicates if the centre point of grid needs to be explicitly added for search */
+ S32 add_centre = 0;
+
+ memset(au4_unique_node_map, 0, sizeof(au4_unique_node_map));
+ center_x = ps_candt_prj_coloc[0]->s_mv.i2_mvx;
+ center_y = ps_candt_prj_coloc[0]->s_mv.i2_mvy;
+
+ for(srch_cand = 0;
+ (srch_cand < i4_num_srch_cands) &&
+ (num_unique_nodes <= s_search_prms_blk.i4_num_init_candts);
+ srch_cand++)
+ {
+ search_node_t s_search_node_temp =
+ s_search_prms_blk.ps_search_candts[srch_cand].ps_search_node[0];
+
+ s_search_node_temp.i1_ref_idx = i1_ref_idx; //TEMP FIX;
+
+ /* Clip the motion vectors as well here since after clipping
+ two candidates can become same and they will be removed during deduplication */
+ CLIP_MV_WITHIN_RANGE(
+ s_search_node_temp.s_mv.i2_mvx,
+ s_search_node_temp.s_mv.i2_mvy,
+ s_search_prms_blk.aps_mv_range[0],
+ ps_refine_prms->i4_num_steps_fpel_refine,
+ ps_refine_prms->i4_num_steps_hpel_refine,
+ ps_refine_prms->i4_num_steps_qpel_refine);
+
+ /* PT_C */
+ INSERT_NEW_NODE(
+ as_unique_search_nodes,
+ num_unique_nodes,
+ s_search_node_temp,
+ 0,
+ au4_unique_node_map,
+ center_x,
+ center_y,
+ 1);
+
+ num_nodes_searched += 1;
+ }
+ num_unique_nodes =
+ MIN(num_unique_nodes, s_search_prms_blk.i4_num_init_candts);
+
+ /* If number of candidates projected/number of candidates to be refined are more than 2,
+ then filter out and choose the best two here */
+ if(num_unique_nodes >= 2)
+ {
+ S32 num_results;
+ S32 cnt;
+ S32 *pi4_valid_part_ids;
+ s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
+ s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
+ pi4_valid_part_ids = &ai4_valid_part_ids[0];
+
+ /* pi4_valid_part_ids is updated inside */
+ hme_pred_search_no_encode(
+ &s_search_prms_blk,
+ ps_curr_layer,
+ &ps_ctxt->s_wt_pred,
+ pi4_valid_part_ids,
+ 1,
+ e_me_quality_presets,
+ i1_grid_enable,
+ (ihevce_me_optimised_function_list_t *)
+ ps_ctxt->pv_me_optimised_function_list
+
+ );
+
+ num_best_cand = 0;
+ cnt = 0;
+ num_results = ps_search_results->u1_num_results_per_part;
+
+ while((id = pi4_valid_part_ids[cnt++]) >= 0)
+ {
+ num_results =
+ MIN(ps_refine_prms->pu1_num_best_results[id], num_results);
+
+ for(i = 0; i < num_results; i++)
+ {
+ search_node_t s_search_node_temp;
+ s_search_node_temp =
+ *(ps_search_results->aps_part_results[i1_ref_idx][id] + i);
+ if(s_search_node_temp.i1_ref_idx >= 0)
+ {
+ INSERT_NEW_NODE_NOMAP(
+ as_best_two_proj_node,
+ num_best_cand,
+ s_search_node_temp,
+ 0);
+ }
+ }
+ }
+ }
+ else
+ {
+ add_centre = 1;
+ num_best_cand = num_unique_nodes;
+ as_best_two_proj_node[0] = as_unique_search_nodes[0];
+ }
+
+ num_unique_nodes = 0;
+ num_nodes_searched = 0;
+
+ if(1 == num_best_cand)
+ {
+ search_node_t s_search_node_temp = as_best_two_proj_node[0];
+ S16 i2_mv_x = s_search_node_temp.s_mv.i2_mvx;
+ S16 i2_mv_y = s_search_node_temp.s_mv.i2_mvy;
+ S08 i1_ref_idx = s_search_node_temp.i1_ref_idx;
+
+ i1_grid_enable = 1;
+
+ as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
+ as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
+ as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
+
+ as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
+ as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
+ as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
+
+ as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
+ as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y - 1;
+ as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
+
+ as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
+ as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
+ as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
+
+ as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
+ as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
+ as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
+
+ as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x - 1;
+ as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
+ as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
+
+ as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
+ as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
+ as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
+
+ as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x + 1;
+ as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y + 1;
+ as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
+
+ if(add_centre)
+ {
+ as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvx = i2_mv_x;
+ as_unique_search_nodes[num_unique_nodes].s_mv.i2_mvy = i2_mv_y;
+ as_unique_search_nodes[num_unique_nodes++].i1_ref_idx = i1_ref_idx;
+ }
+ }
+ else
+ {
+ /* For the candidates where refinement was required, choose the best two */
+ for(srch_cand = 0; srch_cand < num_best_cand; srch_cand++)
+ {
+ search_node_t s_search_node_temp = as_best_two_proj_node[srch_cand];
+ WORD32 mv_x = s_search_node_temp.s_mv.i2_mvx;
+ WORD32 mv_y = s_search_node_temp.s_mv.i2_mvy;
+
+ /* Because there may not be two best unique candidates (because of clipping),
+ second best candidate can be uninitialized, ignore that */
+ if(s_search_node_temp.s_mv.i2_mvx == INTRA_MV ||
+ s_search_node_temp.i1_ref_idx < 0)
+ {
+ num_nodes_searched++;
+ continue;
+ }
+
+ /* PT_C */
+ /* Since the center point has already be evaluated and best results are persistent,
+ it will not be evaluated again */
+ if(add_centre) /* centre point added explicitly again if search results is not updated */
+ {
+ INSERT_NEW_NODE(
+ as_unique_search_nodes,
+ num_unique_nodes,
+ s_search_node_temp,
+ 0,
+ au4_unique_node_map,
+ center_x,
+ center_y,
+ 1);
+ }
+
+ /* PT_L */
+ s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
+ s_search_node_temp.s_mv.i2_mvy = mv_y;
+ INSERT_NEW_NODE(
+ as_unique_search_nodes,
+ num_unique_nodes,
+ s_search_node_temp,
+ 0,
+ au4_unique_node_map,
+ center_x,
+ center_y,
+ 1);
+
+ /* PT_T */
+ s_search_node_temp.s_mv.i2_mvx = mv_x;
+ s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
+ INSERT_NEW_NODE(
+ as_unique_search_nodes,
+ num_unique_nodes,
+ s_search_node_temp,
+ 0,
+ au4_unique_node_map,
+ center_x,
+ center_y,
+ 1);
+
+ /* PT_R */
+ s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
+ s_search_node_temp.s_mv.i2_mvy = mv_y;
+ INSERT_NEW_NODE(
+ as_unique_search_nodes,
+ num_unique_nodes,
+ s_search_node_temp,
+ 0,
+ au4_unique_node_map,
+ center_x,
+ center_y,
+ 1);
+
+ /* PT_B */
+ s_search_node_temp.s_mv.i2_mvx = mv_x;
+ s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
+ INSERT_NEW_NODE(
+ as_unique_search_nodes,
+ num_unique_nodes,
+ s_search_node_temp,
+ 0,
+ au4_unique_node_map,
+ center_x,
+ center_y,
+ 1);
+
+ /* PT_TL */
+ s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
+ s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
+ INSERT_NEW_NODE(
+ as_unique_search_nodes,
+ num_unique_nodes,
+ s_search_node_temp,
+ 0,
+ au4_unique_node_map,
+ center_x,
+ center_y,
+ 1);
+
+ /* PT_TR */
+ s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
+ s_search_node_temp.s_mv.i2_mvy = mv_y - 1;
+ INSERT_NEW_NODE(
+ as_unique_search_nodes,
+ num_unique_nodes,
+ s_search_node_temp,
+ 0,
+ au4_unique_node_map,
+ center_x,
+ center_y,
+ 1);
+
+ /* PT_BL */
+ s_search_node_temp.s_mv.i2_mvx = mv_x - 1;
+ s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
+ INSERT_NEW_NODE(
+ as_unique_search_nodes,
+ num_unique_nodes,
+ s_search_node_temp,
+ 0,
+ au4_unique_node_map,
+ center_x,
+ center_y,
+ 1);
+
+ /* PT_BR */
+ s_search_node_temp.s_mv.i2_mvx = mv_x + 1;
+ s_search_node_temp.s_mv.i2_mvy = mv_y + 1;
+ INSERT_NEW_NODE(
+ as_unique_search_nodes,
+ num_unique_nodes,
+ s_search_node_temp,
+ 0,
+ au4_unique_node_map,
+ center_x,
+ center_y,
+ 1);
+ }
+ }
+
+ s_search_prms_blk.ps_search_nodes = &as_unique_search_nodes[0];
+ s_search_prms_blk.i4_num_search_nodes = num_unique_nodes;
+
+ /*****************************************************************/
+ /* Call the search algorithm, this includes: */
+ /* Pre-Search-Refinement (for coarse candts) */
+ /* Search on each candidate */
+ /* Post Search Refinement on winners/other new candidates */
+ /*****************************************************************/
+
+ hme_pred_search_no_encode(
+ &s_search_prms_blk,
+ ps_curr_layer,
+ &ps_ctxt->s_wt_pred,
+ ai4_valid_part_ids,
+ 0,
+ e_me_quality_presets,
+ i1_grid_enable,
+ (ihevce_me_optimised_function_list_t *)
+ ps_ctxt->pv_me_optimised_function_list);
+
+ i1_grid_enable = 0;
+ }
+ }
+
+ /* for non encode layer update MV and end processing for block */
+ {
+ WORD32 i4_ref_id, min_cost = 0x7fffffff, min_sad = 0;
+ search_node_t *ps_search_node;
+ /* now update the reqd results back to the layer mv bank. */
+ if(1 == ps_refine_prms->i4_layer_id)
+ {
+ hme_update_mv_bank_in_l1_me(
+ ps_search_results,
+ ps_curr_layer->ps_layer_mvbank,
+ blk_x,
+ blk_y,
+ &s_mv_update_prms);
+ }
+ else
+ {
+ hme_update_mv_bank_noencode(
+ ps_search_results,
+ ps_curr_layer->ps_layer_mvbank,
+ blk_x,
+ blk_y,
+ &s_mv_update_prms);
+ }
+
+ /* UPDATE the MIN and MAX MVs for Dynamical Search Range for each ref. pic. */
+ /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
+ if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
+ {
+ WORD32 i4_j;
+ layer_mv_t *ps_layer_mv = ps_curr_layer->ps_layer_mvbank;
+
+ //if (ps_layer_mv->e_blk_size == s_mv_update_prms.e_search_blk_size)
+ /* Not considering this for Dyn. Search Update */
+ {
+ for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
+ i4_ref_id++)
+ {
+ ps_search_node =
+ ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
+
+ for(i4_j = 0; i4_j < ps_layer_mv->i4_num_mvs_per_ref; i4_j++)
+ {
+ hme_update_dynamic_search_params(
+ &ps_ctxt->s_coarse_dyn_range_prms
+ .as_dyn_range_prms[ps_refine_prms->i4_layer_id]
+ [i4_ref_id],
+ ps_search_node->s_mv.i2_mvy);
+
+ ps_search_node++;
+ }
+ }
+ }
+ }
+
+ if(1 == ps_refine_prms->i4_layer_id)
+ {
+ WORD32 wt_pred_val, log_wt_pred_val;
+ WORD32 ref_id_of_nearest_poc = 0;
+ WORD32 max_val = 0x7fffffff;
+ WORD32 max_l0_val = 0x7fffffff;
+ WORD32 max_l1_val = 0x7fffffff;
+ WORD32 cur_val;
+ WORD32 i4_local_weighted_sad, i4_local_cost_weighted_pred;
+
+ WORD32 bestl0_sad = 0x7fffffff;
+ WORD32 bestl1_sad = 0x7fffffff;
+ search_node_t *ps_best_l0_blk = NULL, *ps_best_l1_blk = NULL;
+
+ for(i4_ref_id = 0; i4_ref_id < (S32)s_mv_update_prms.i4_num_ref;
+ i4_ref_id++)
+ {
+ wt_pred_val = ps_ctxt->s_wt_pred.a_wpred_wt[i4_ref_id];
+ log_wt_pred_val = ps_ctxt->s_wt_pred.wpred_log_wdc;
+
+ ps_search_node =
+ ps_search_results->aps_part_results[i4_ref_id][PART_ID_2Nx2N];
+
+ i4_local_weighted_sad = ((ps_search_node->i4_sad * wt_pred_val) +
+ ((1 << log_wt_pred_val) >> 1)) >>
+ log_wt_pred_val;
+
+ i4_local_cost_weighted_pred =
+ i4_local_weighted_sad +
+ (ps_search_node->i4_tot_cost - ps_search_node->i4_sad);
+ //the loop is redundant as the results are already sorted based on total cost
+ //for (i4_j = 0; i4_j < ps_curr_layer->ps_layer_mvbank->i4_num_mvs_per_ref; i4_j++)
+ {
+ if(i4_local_cost_weighted_pred < min_cost)
+ {
+ min_cost = i4_local_cost_weighted_pred;
+ min_sad = i4_local_weighted_sad;
+ }
+ }
+
+ /* For P frame, calculate the nearest poc which is either P or I frame*/
+ if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
+ {
+ if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id])
+ {
+ cur_val =
+ ABS(ps_ctxt->i4_curr_poc -
+ ps_coarse_layer->ai4_ref_id_to_poc_lc[i4_ref_id]);
+ if(cur_val < max_val)
+ {
+ max_val = cur_val;
+ ref_id_of_nearest_poc = i4_ref_id;
+ }
+ }
+ }
+ }
+ /*Store me cost wrt. to past frame only for P frame */
+ if(ps_ctxt->s_frm_prms.is_i_pic == ps_ctxt->s_frm_prms.bidir_enabled)
+ {
+ if(-1 != ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
+ {
+ WORD16 i2_mvx, i2_mvy;
+
+ WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
+ WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
+ WORD32 z_scan_idx =
+ gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
+ WORD32 wt, log_wt;
+
+ /*ASSERT((ps_ctxt->i4_curr_poc - ps_coarse_layer->ai4_ref_id_to_poc_lc[ref_id_of_nearest_poc])
+ <= (1 + ps_ctxt->num_b_frms));*/
+
+ /*obtain mvx and mvy */
+ i2_mvx =
+ ps_search_results
+ ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
+ ->s_mv.i2_mvx;
+ i2_mvy =
+ ps_search_results
+ ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
+ ->s_mv.i2_mvy;
+
+ /*register the min cost for l1 me in blk context */
+ wt = ps_ctxt->s_wt_pred.a_wpred_wt[ref_id_of_nearest_poc];
+ log_wt = ps_ctxt->s_wt_pred.wpred_log_wdc;
+
+ /*register the min cost for l1 me in blk context */
+ ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] =
+ ((ps_search_results
+ ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
+ ->i4_sad *
+ wt) +
+ ((1 << log_wt) >> 1)) >>
+ log_wt;
+ ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] =
+ ps_ed_ctb_l1_curr->i4_sad_me_for_ref[z_scan_idx >> 2] +
+ (ps_search_results
+ ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
+ ->i4_tot_cost -
+ ps_search_results
+ ->aps_part_results[ref_id_of_nearest_poc][PART_ID_2Nx2N]
+ ->i4_sad);
+ /*for complexity change detection*/
+ ps_ctxt->i4_num_blks++;
+ if(ps_ed_ctb_l1_curr->i4_sad_cost_me_for_ref[z_scan_idx >> 2] >
+ (8 /*blk width*/ * 8 /*blk height*/ * (1 + ps_ctxt->num_b_frms)))
+ {
+ ps_ctxt->i4_num_blks_high_sad++;
+ }
+ }
+ }
+ }
+
+ /* EIID: Early inter intra decisions */
+ /* tap L1 level SAD for inter intra decisions */
+ if((e_me_quality_presets >= ME_MEDIUM_SPEED) &&
+ (!ps_ctxt->s_frm_prms
+ .is_i_pic)) //for high-quality preset->disable early decisions
+ {
+ if(1 == ps_refine_prms->i4_layer_id)
+ {
+ WORD32 i4_min_sad_cost_8x8_block = min_cost;
+ ihevce_ed_blk_t *ps_curr_ed_blk_ctxt;
+ WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
+ WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
+ WORD32 z_scan_idx =
+ gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
+ ps_curr_ed_blk_ctxt = ps_ed_blk_ctxt_curr_ctb + z_scan_idx;
+
+ /*register the min cost for l1 me in blk context */
+ ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
+ i4_min_sad_cost_8x8_block;
+ i4_num_comparisions++;
+
+ /* take early inter-intra decision here */
+ ps_curr_ed_blk_ctxt->intra_or_inter = 3; /*init saying eval both */
+#if DISABLE_INTRA_IN_BPICS
+ if((e_me_quality_presets == ME_XTREME_SPEED_25) &&
+ (ps_ctxt->s_frm_prms.i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE))
+ {
+ ps_curr_ed_blk_ctxt->intra_or_inter =
+ 2; /*eval only inter if inter cost is less */
+ i4_num_inter_wins++;
+ }
+ else
+#endif
+ {
+ if(ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] <
+ ((ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2] *
+ i4_threshold_multiplier) /
+ i4_threshold_divider))
+ {
+ ps_curr_ed_blk_ctxt->intra_or_inter =
+ 2; /*eval only inter if inter cost is less */
+ i4_num_inter_wins++;
+ }
+ }
+
+ //{
+ // DBG_PRINTF ("(blk x, blk y):(%d, %d)\t me:(ctb_x, ctb_y):(%d, %d)\t intra_SAD_COST: %d\tInter_SAD_COST: %d\n",
+ // blk_x,blk_y,
+ // i4_ctb_blk_ctr, i4_ctb_row_ctr,
+ // ps_curr_ed_blk_ctxt->i4_best_sad_8x8_l1_ipe,
+ // i4_min_sad_cost_8x8_block
+ // );
+ //}
+
+ } //end of layer-1
+ } //end of if (e_me_quality_presets >= ME_MEDIUM_SPEED)
+ else
+ {
+ if(1 == ps_refine_prms->i4_layer_id)
+ {
+ WORD32 i4_min_sad_cost_8x8_block = min_cost;
+ WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
+ WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
+ WORD32 z_scan_idx =
+ gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
+
+ /*register the min cost for l1 me in blk context */
+ ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_me[z_scan_idx >> 2] =
+ i4_min_sad_cost_8x8_block;
+ }
+ }
+ if(1 == ps_refine_prms->i4_layer_id)
+ {
+ WORD32 i4_diff_col_ctr = blk_x - (i4_ctb_blk_ctr * 4);
+ WORD32 i4_diff_row_ctr = blk_y - (i4_ctb_row_ctr * 4);
+ WORD32 z_scan_idx =
+ gau1_raster_scan_to_ctb[i4_diff_row_ctr][i4_diff_col_ctr];
+
+ ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me_for_decide[z_scan_idx >> 2] =
+ min_sad;
+
+ if(min_cost <
+ ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2])
+ {
+ ps_ctxt->i4_L1_hme_best_cost += min_cost;
+ ps_ctxt->i4_L1_hme_sad += min_sad;
+ ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] = min_sad;
+ }
+ else
+ {
+ ps_ctxt->i4_L1_hme_best_cost +=
+ ps_ed_ctb_l1_curr->i4_best_sad_cost_8x8_l1_ipe[z_scan_idx >> 2];
+ ps_ctxt->i4_L1_hme_sad +=
+ ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
+ ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_me[z_scan_idx >> 2] =
+ ps_ed_ctb_l1_curr->i4_best_sad_8x8_l1_ipe[z_scan_idx >> 2];
+ }
+ }
+ }
+ }
+
+ /* Update the number of blocks processed in the current row */
+ if((ME_MEDIUM_SPEED > e_me_quality_presets))
+ {
+ ihevce_dmgr_set_row_row_sync(
+ pv_hme_dep_mngr,
+ (i4_ctb_x + 1),
+ blk_y,
+ 0 /* Col Tile No. : Not supported in PreEnc*/);
+ }
+ }
+
+ /* set the output dependency after completion of row */
+ ihevce_pre_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_ping_pong);
+ }
+}
diff --git a/encoder/hme_refine.h b/encoder/hme_refine.h
new file mode 100644
index 0000000..2ef62bb
--- /dev/null
+++ b/encoder/hme_refine.h
@@ -0,0 +1,66 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file hme_refine.h
+*
+* \brief
+* Prototypes for coarse layer refine functions
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _HME_REFINE_H_
+#define _HME_REFINE_H_
+
+/*****************************************************************************/
+/* Functions */
+/*****************************************************************************/
+
+void hme_refine(
+ me_ctxt_t *ps_ctxt,
+ refine_prms_t *ps_refine_prms,
+ PF_EXT_UPDATE_FXN_T pf_ext_update_fxn,
+ //PF_GET_INTRA_CU_AND_COST pf_get_intra_cu_and_cost,
+ layer_ctxt_t *ps_coarse_layer,
+ multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
+ S32 lyr_job_type,
+ S32 thrd_id,
+ S32 me_frm_id,
+ pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input);
+
+void hme_refine_no_encode(
+ coarse_me_ctxt_t *ps_ctxt,
+ refine_prms_t *ps_refine_prms,
+ multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
+ S32 lyr_job_type,
+ WORD32 i4_ping_pong,
+ void **ppv_dep_mngr_hme_sync);
+
+void hme_refine_frm_init(
+ layer_ctxt_t *ps_curr_layer, refine_prms_t *ps_refine_prms, layer_ctxt_t *ps_coarse_layer);
+
+#endif /* #ifndef _HME_REFINE_H_ */
diff --git a/encoder/hme_search_algo.c b/encoder/hme_search_algo.c
new file mode 100644
index 0000000..82778f0
--- /dev/null
+++ b/encoder/hme_search_algo.c
@@ -0,0 +1,798 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+******************************************************************************
+* @file hme_search_algo.c
+*
+* @brief
+* Contains various search algorithms to be used by coarse/refinement layers
+*
+* @author
+* Ittiam
+*
+*
+* List of Functions
+* hme_compute_grid_results_step_gt_1()
+* hme_compute_grid_results_step_1()
+* hme_pred_search_square_stepn()
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+#include <limits.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_bs_compute_ctb.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_dep_mngr_interface.h"
+#include "hme_datatype.h"
+#include "hme_interface.h"
+#include "hme_common_defs.h"
+#include "hme_defs.h"
+#include "ihevce_me_instr_set_router.h"
+#include "hme_globals.h"
+#include "hme_utils.h"
+#include "hme_coarse.h"
+#include "hme_fullpel.h"
+#include "hme_subpel.h"
+#include "hme_refine.h"
+#include "hme_err_compute.h"
+#include "hme_common_utils.h"
+#include "hme_search_algo.h"
+#include "ihevce_stasino_helpers.h"
+#include "ihevce_common_utils.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+********************************************************************************
+* @fn void hme_compute_grid_results_step_1(err_prms_t *ps_err_prms,
+result_upd_prms_t *ps_result_prms,
+BLK_SIZE_T e_blk_size)
+*
+* @brief Updates results for a grid of step = 1
+*
+* @param[in] ps_err_prms: Various parameters to this function
+*
+* @param[in] ps_result_prms : Parameters pertaining to result updation
+*
+* @param[out] e_blk_size: Block size of the blk being searched for
+*
+* @return none
+********************************************************************************
+*/
+void hme_compute_grid_results(
+ err_prms_t *ps_err_prms, result_upd_prms_t *ps_result_prms, BLK_SIZE_T e_blk_size)
+{
+ PF_RESULT_FXN_T pf_hme_result_fxn;
+ PF_SAD_FXN_T pf_sad_fxn;
+ S32 i4_num_results;
+ S32 part_id;
+
+ part_id = ps_result_prms->pi4_valid_part_ids[0];
+
+ i4_num_results = (S32)ps_result_prms->ps_search_results->u1_num_results_per_part;
+
+ pf_sad_fxn = hme_get_sad_fxn(e_blk_size, ps_err_prms->i4_grid_mask, ps_err_prms->i4_part_mask);
+
+ pf_hme_result_fxn =
+ hme_get_result_fxn(ps_err_prms->i4_grid_mask, ps_err_prms->i4_part_mask, i4_num_results);
+
+ pf_sad_fxn(ps_err_prms);
+ pf_hme_result_fxn(ps_result_prms);
+}
+
+/**
+********************************************************************************
+* @fn void hme_pred_search_square_stepn(hme_search_prms_t *ps_search_prms,
+* layer_ctxt_t *ps_layer_ctxt)
+*
+* @brief Implements predictive search, with square grid refinement. In this
+* case, we start with a bigger step size, like 4, refining upto a
+* variable number of pts, till we hit end of search range or hit a
+* minima. Then we refine using smaller steps. The bigger step size
+* like 4 or 2, do not use optimized SAD functions, they evaluate
+* SAD for each individual pt.
+*
+* @param[in,out] ps_search_prms: All the params to this function
+*
+* @param[in] ps_layer_ctxt: Context for the layer
+*
+* @return None
+********************************************************************************
+*/
+void hme_pred_search_square_stepn(
+ hme_search_prms_t *ps_search_prms,
+ layer_ctxt_t *ps_layer_ctxt,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ ME_QUALITY_PRESETS_T e_me_quality_preset,
+ ihevce_me_optimised_function_list_t *ps_me_optimised_function_list
+
+)
+{
+ /* Stores the SAD for all parts at each pt in the grid */
+ S32 ai4_sad_grid[9][TOT_NUM_PARTS];
+
+ S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
+
+ /* Atributes of input candidates */
+ search_candt_t *ps_search_candts;
+ search_node_t s_search_node;
+
+ /* Number of candidates to search */
+ S32 i4_num_candts, max_num_iters, i4_num_results;
+
+ /* Input and reference attributes */
+ S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;
+
+ /* The reference is actually an array of ptrs since there are several */
+ /* reference id. So an array gets passed form calling function */
+ U08 **ppu1_ref;
+
+ /* Holds the search results at the end of this fxn */
+ search_results_t *ps_search_results;
+
+ /* These control number of parts and number of pts in grid to search */
+ S32 i4_part_mask, i4_grid_mask;
+
+ /* Blk width, blk height and blk size are derived from input params */
+ BLK_SIZE_T e_blk_size;
+ CU_SIZE_T e_cu_size;
+ S32 i4_blk_wd, i4_blk_ht, i4_step, i4_candt, i4_iter;
+ S32 i4_inp_off;
+ S32 i4_min_id;
+ /* Points to the range limits for mv */
+ range_prms_t *ps_range_prms;
+
+ /*************************************************************************/
+ /* These functions pointers for calculating Err and the result update */
+ /* Each carries its own parameters structure, which is generated on the */
+ /* fly in this function */
+ /*************************************************************************/
+ err_prms_t s_err_prms;
+ result_upd_prms_t s_result_prms;
+
+ max_num_iters = ps_search_prms->i4_max_iters;
+ /* Using the member 0 to store for all ref. idx., see in coarsest */
+ ps_range_prms = ps_search_prms->aps_mv_range[0];
+ i4_inp_stride = ps_search_prms->i4_inp_stride;
+ /* Move to the location of the search blk in inp buffer */
+ i4_inp_off = ps_search_prms->i4_cu_x_off;
+ i4_inp_off += (ps_search_prms->i4_cu_y_off * i4_inp_stride);
+
+ ps_search_results = ps_search_prms->ps_search_results;
+
+ /*************************************************************************/
+ /* Depending on flag i4_use_rec, we use either input of previously */
+ /* encoded pictures or we use recon of previously encoded pictures. */
+ /*************************************************************************/
+ if(ps_search_prms->i4_use_rec == 1)
+ {
+ i4_ref_stride = ps_layer_ctxt->i4_rec_stride;
+ ppu1_ref = ps_layer_ctxt->ppu1_list_rec_fxfy;
+ }
+ else
+ {
+ i4_ref_stride = ps_layer_ctxt->i4_inp_stride;
+ ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
+ }
+ i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
+
+ /*************************************************************************/
+ /* Obtain the blk size of the search blk. Assumed here that the search */
+ /* is done on a CU size, rather than any arbitrary blk size. */
+ /*************************************************************************/
+ ps_search_results = ps_search_prms->ps_search_results;
+ e_blk_size = ps_search_prms->e_blk_size;
+ i4_blk_wd = (S32)gau1_blk_size_to_wd[e_blk_size];
+ i4_blk_ht = (S32)gau1_blk_size_to_ht[e_blk_size];
+ e_cu_size = ps_search_results->e_cu_size;
+ i4_num_results = (S32)ps_search_results->u1_num_results_per_part;
+
+ ps_search_candts = ps_search_prms->ps_search_candts;
+ i4_num_candts = ps_search_prms->i4_num_init_candts;
+ i4_part_mask = ps_search_prms->i4_part_mask;
+
+ /*************************************************************************/
+ /* This array stores the ids of the partitions whose */
+ /* SADs are updated. Since the partitions whose SADs are updated may not */
+ /* be in contiguous order, we supply another level of indirection. */
+ /*************************************************************************/
+ hme_create_valid_part_ids(i4_part_mask, ai4_valid_part_ids);
+
+ /* Update the parameters used to pass to SAD */
+ /* input ptr, strides, SAD Grid, part mask, blk width and ht */
+ /* The above are fixed ptrs, only pu1_ref and grid mask are */
+ /* varying params which are updated just before calling fxn */
+ s_err_prms.i4_inp_stride = i4_inp_stride;
+ s_err_prms.i4_ref_stride = i4_ref_stride;
+ s_err_prms.i4_part_mask = i4_part_mask;
+ s_err_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
+ s_err_prms.i4_blk_wd = i4_blk_wd;
+ s_err_prms.i4_blk_ht = i4_blk_ht;
+ s_err_prms.pi4_valid_part_ids = ai4_valid_part_ids;
+
+ s_result_prms.pf_mv_cost_compute = ps_search_prms->pf_mv_cost_compute;
+ s_result_prms.ps_search_results = ps_search_results;
+ s_result_prms.pi4_valid_part_ids = ai4_valid_part_ids;
+ s_result_prms.i1_ref_idx = ps_search_prms->i1_ref_idx;
+ s_result_prms.i4_part_mask = ps_search_prms->i4_part_mask;
+ s_result_prms.ps_search_node_base = &s_search_node;
+ s_result_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
+
+ /* Run through each of the candts in a loop */
+ for(i4_candt = 0; i4_candt < i4_num_candts; i4_candt++)
+ {
+ S32 i4_num_refine;
+
+ i4_step = ps_search_prms->i4_start_step;
+
+ s_search_node = *(ps_search_candts->ps_search_node);
+
+ /* initialize minimum cost for this candidate. As we search around */
+ /* this candidate, this is used to check early exit, when in any */
+ /* given iteration, the center pt of the grid is lowest value */
+ s_result_prms.i4_min_cost = MAX_32BIT_VAL;
+
+ /* If we need to do refinements, then we need to evaluate */
+ /* neighbouring pts. Before doing so, we have to do */
+ /* basic range checks against max allowed mvs */
+ i4_num_refine = ps_search_candts->u1_num_steps_refine;
+
+ CLIP_MV_WITHIN_RANGE(
+ s_search_node.s_mv.i2_mvx, s_search_node.s_mv.i2_mvy, ps_range_prms, 0, 0, 0);
+
+ /* The first time, we search all 8 pts around init candt plus the init candt */
+ i4_grid_mask = 0x1ff;
+ s_err_prms.pu1_inp = ps_wt_inp_prms->apu1_wt_inp[s_search_node.i1_ref_idx] + i4_inp_off;
+
+ for(i4_iter = 0; i4_iter < max_num_iters; i4_iter++)
+ {
+ i4_grid_mask &= hme_clamp_grid_by_mvrange(&s_search_node, i4_step, ps_range_prms);
+
+ s_err_prms.i4_grid_mask = i4_grid_mask;
+ s_err_prms.pu1_ref = ppu1_ref[s_search_node.i1_ref_idx] + i4_ref_offset;
+ s_err_prms.pu1_ref +=
+ (s_search_node.s_mv.i2_mvx +
+ (s_search_node.s_mv.i2_mvy * s_err_prms.i4_ref_stride));
+
+ s_result_prms.i4_step = i4_step;
+ s_err_prms.i4_step = i4_step;
+ s_result_prms.i4_grid_mask = i4_grid_mask;
+
+ /* For Top,TopLeft and Left cand., get only center point SAD */
+ /* and do early exit */
+ if(0 == i4_num_refine)
+ {
+ s_err_prms.i4_grid_mask = 0x1;
+ s_result_prms.i4_grid_mask = 0x1;
+
+ /* sad pt fun. populates sad to 0th location, whereas update */
+ /* fun. takes it based on part. id */
+ s_err_prms.pi4_sad_grid =
+ s_result_prms.pi4_sad_grid + (1 * s_result_prms.pi4_valid_part_ids[0]);
+
+ ps_me_optimised_function_list->pf_evalsad_pt_npu_mxn_8bit(&s_err_prms);
+
+ s_err_prms.pi4_sad_grid = s_result_prms.pi4_sad_grid;
+
+ if(ME_XTREME_SPEED_25 == e_me_quality_preset)
+ hme_update_results_grid_pu_bestn_xtreme_speed(&s_result_prms);
+ else
+ hme_update_results_grid_pu_bestn(&s_result_prms);
+
+ i4_min_id = (S32)PT_C; /* Center Point */
+ i4_step = 0; /* No further refinment */
+ s_result_prms.i4_step = i4_step;
+ s_err_prms.i4_step = i4_step;
+ }
+ else
+ {
+ if(ME_XTREME_SPEED_25 == e_me_quality_preset)
+ {
+ err_prms_t *ps_err_prms = &s_err_prms;
+ ASSERT(ps_err_prms->i4_grid_mask != 1);
+ ASSERT((ps_err_prms->i4_part_mask == 4) || (ps_err_prms->i4_part_mask == 16));
+
+ /*****************************************************************/
+ /* In this case, there are no partial updates. The blk can be */
+ /* of any type and need not be a CU. The only thing that matters */
+ /* here is the width of the blk, 4/8/(>=16) */
+ /*****************************************************************/
+ ps_me_optimised_function_list->pf_evalsad_grid_npu_MxN(&s_err_prms);
+
+ hme_update_results_grid_pu_bestn_xtreme_speed(&s_result_prms);
+ }
+ else
+ {
+ /* Obtain SAD for all 9 pts in grid*/
+ hme_compute_grid_results(&s_err_prms, &s_result_prms, e_blk_size);
+ }
+
+ /* Early exit in case of centre being local minima */
+ i4_min_id = s_result_prms.i4_min_id;
+ }
+
+ i4_grid_mask = gai4_opt_grid_mask[i4_min_id];
+
+ s_search_node.s_mv.i2_mvx += (i4_step * gai1_grid_id_to_x[i4_min_id]);
+ s_search_node.s_mv.i2_mvy += (i4_step * gai1_grid_id_to_y[i4_min_id]);
+ if(i4_min_id == (S32)PT_C)
+ break;
+ }
+
+ /* Next keep reducing stepsize by factor of 2 */
+ i4_step >>= 1;
+ while(i4_step)
+ {
+ i4_grid_mask = 0x1fe &
+ hme_clamp_grid_by_mvrange(&s_search_node, i4_step, ps_range_prms);
+ //i4_grid_mask &= 0x1fe;
+
+ s_err_prms.i4_grid_mask = i4_grid_mask;
+ s_result_prms.i4_grid_mask = i4_grid_mask;
+ s_err_prms.i4_step = i4_step;
+ s_result_prms.i4_step = i4_step;
+ s_err_prms.pu1_ref = ppu1_ref[s_search_node.i1_ref_idx] + i4_ref_offset;
+ s_err_prms.pu1_ref +=
+ (s_search_node.s_mv.i2_mvx +
+ (s_search_node.s_mv.i2_mvy * s_err_prms.i4_ref_stride));
+ if(ME_XTREME_SPEED_25 == e_me_quality_preset)
+ {
+ err_prms_t *ps_err_prms = &s_err_prms;
+ ASSERT(ps_err_prms->i4_grid_mask != 1);
+ ASSERT((ps_err_prms->i4_part_mask == 4) || (ps_err_prms->i4_part_mask == 16));
+
+ /*****************************************************************/
+ /* In this case, there are no partial updates. The blk can be */
+ /* of any type and need not be a CU. The only thing that matters */
+ /* here is the width of the blk, 4/8/(>=16) */
+ /*****************************************************************/
+ ps_me_optimised_function_list->pf_evalsad_grid_npu_MxN(&s_err_prms);
+
+ hme_update_results_grid_pu_bestn_xtreme_speed(&s_result_prms);
+ }
+ else
+ {
+ hme_compute_grid_results(&s_err_prms, &s_result_prms, e_blk_size);
+ }
+
+ i4_min_id = s_result_prms.i4_min_id;
+
+ s_search_node.s_mv.i2_mvx += (i4_step * gai1_grid_id_to_x[i4_min_id]);
+ s_search_node.s_mv.i2_mvy += (i4_step * gai1_grid_id_to_y[i4_min_id]);
+
+ i4_step >>= 1;
+ }
+
+ ps_search_candts++;
+ }
+}
+
+/**
+********************************************************************************
+* @fn hme_pred_search_square_step1(hme_search_prms_t *ps_search_prms,
+* layer_ctxt_t *ps_layer_ctxt)
+*
+* @brief Implements predictive search with square grid refinement. In this
+* case, the square grid is of step 1 always. since this is considered
+* to be more of a refinement search
+*
+* @param[in,out] ps_search_prms: All the params to this function
+*
+* @param[in] ps_layer_ctxt: All info about this layer
+*
+* @return None
+********************************************************************************
+*/
+/**
+********************************************************************************
+* @fn hme_pred_search(hme_search_prms_t *ps_search_prms,
+* layer_ctxt_t *ps_layer_ctxt)
+*
+* @brief Implements predictive search after removing duplicate candidates
+* from initial list. Each square grid (of step 1) is expanded
+* to nine search pts before the dedeuplication process. one point
+* cost is then evaluated for each unique node after the deduplication
+* process
+*
+* @param[in,out] ps_search_prms: All the params to this function
+*
+* @param[in] ps_layer_ctxt: All info about this layer
+*
+* @return None
+********************************************************************************
+*/
+void hme_pred_search(
+ hme_search_prms_t *ps_search_prms,
+ layer_ctxt_t *ps_layer_ctxt,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ S08 i1_grid_flag,
+ ihevce_me_optimised_function_list_t *ps_me_optimised_function_list
+
+)
+{
+ /* Stores the SAD for all parts at each pt in the grid */
+ S32 ai4_sad_grid[9 * TOT_NUM_PARTS];
+
+ /* Atributes of input candidates */
+ search_node_t *ps_search_node;
+
+ search_results_t *ps_search_results;
+ S32 i4_num_nodes, i4_candt;
+
+ /* Input and reference attributes */
+ S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;
+
+ /* The reference is actually an array of ptrs since there are several */
+ /* reference id. So an array gets passed form calling function */
+ U08 **ppu1_ref;
+
+ /* These control number of parts and number of pts in grid to search */
+ S32 i4_part_mask, i4_grid_mask;
+
+ S32 shift_for_cu_size;
+
+ /* Blk width, blk height and blk size are derived from input params */
+ BLK_SIZE_T e_blk_size;
+ CU_SIZE_T e_cu_size;
+ S32 i4_blk_wd, i4_blk_ht;
+
+ /*************************************************************************/
+ /* These functions pointers for calculating Err and the result update */
+ /* Each carries its own parameters structure, which is generated on the */
+ /* fly in this function */
+ /*************************************************************************/
+ PF_RESULT_FXN_T pf_hme_result_fxn;
+ PF_SAD_FXN_T pf_sad_fxn;
+ PF_CALC_SAD_AND_RESULT pf_calc_sad_and_result;
+ err_prms_t s_err_prms;
+ result_upd_prms_t s_result_prms;
+ S32 i4_num_results;
+ S32 i4_inp_off;
+ fullpel_refine_ctxt_t *ps_fullpel_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt;
+
+ i4_inp_stride = ps_search_prms->i4_inp_stride;
+
+ /* Move to the location of the search blk in inp buffer */
+ i4_inp_off = ps_search_prms->i4_cu_x_off;
+ i4_inp_off += ps_search_prms->i4_cu_y_off * i4_inp_stride;
+
+ /*************************************************************************/
+ /* Depending on flag i4_use_rec, we use either input of previously */
+ /* encoded pictures or we use recon of previously encoded pictures. */
+ /*************************************************************************/
+ if(ps_search_prms->i4_use_rec == 1)
+ {
+ i4_ref_stride = ps_layer_ctxt->i4_rec_stride;
+ ppu1_ref = ps_layer_ctxt->ppu1_list_rec_fxfy;
+ }
+ else
+ {
+ i4_ref_stride = ps_layer_ctxt->i4_rec_stride;
+ ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
+ }
+ i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
+ /* Obtain the blk size of the search blk. Assumed here that the search */
+ /* is done on a CU size, rather than any arbitrary blk size. */
+ ps_search_results = ps_search_prms->ps_search_results;
+ e_blk_size = ps_search_prms->e_blk_size;
+ i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
+ i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
+ e_cu_size = ps_search_results->e_cu_size;
+
+ /* Assuming cu size of 8x8 as enum 0, the other will be 1, 2, 3 */
+ /* This will also set the shift w.r.t. the base cu size of 8x8 */
+ shift_for_cu_size = e_cu_size;
+
+ ps_search_node = ps_search_prms->ps_search_nodes;
+ i4_num_nodes = ps_search_prms->i4_num_search_nodes;
+ i4_part_mask = ps_search_prms->i4_part_mask;
+
+ /* Update the parameters used to pass to SAD */
+ /* input ptr, strides, SAD Grid, part mask, blk width and ht */
+ /* The above are fixed ptrs, only pu1_ref and grid mask are */
+ /* varying params which are updated just before calling fxn */
+ s_err_prms.i4_inp_stride = i4_inp_stride;
+ s_err_prms.i4_ref_stride = i4_ref_stride;
+ s_err_prms.i4_part_mask = i4_part_mask;
+ s_err_prms.pi4_sad_grid = &ai4_sad_grid[0];
+ s_err_prms.i4_blk_wd = i4_blk_wd;
+ s_err_prms.i4_blk_ht = i4_blk_ht;
+ s_err_prms.i4_step = 1;
+ s_err_prms.i4_num_partitions = ps_fullpel_refine_ctxt->i4_num_valid_parts;
+
+ s_result_prms.pf_mv_cost_compute = ps_search_prms->pf_mv_cost_compute;
+ s_result_prms.ps_search_results = ps_search_results;
+ s_result_prms.i1_ref_idx = (S08)ps_search_prms->i1_ref_idx;
+ s_result_prms.pi4_sad_grid = ai4_sad_grid;
+ s_result_prms.i4_part_mask = i4_part_mask;
+ s_result_prms.i4_step = 1;
+ pf_calc_sad_and_result = hme_get_calc_sad_and_result_fxn(
+ i1_grid_flag,
+ ps_search_prms->u1_is_cu_noisy,
+ i4_part_mask,
+ ps_fullpel_refine_ctxt->i4_num_valid_parts,
+ ps_search_results->u1_num_results_per_part);
+
+ pf_calc_sad_and_result(
+ ps_search_prms, ps_wt_inp_prms, &s_err_prms, &s_result_prms, ppu1_ref, i4_ref_stride);
+}
+
+static __inline FT_CALC_SAD_AND_RESULT *hme_get_calc_sad_and_result_explicit_fxn(
+ ihevce_me_optimised_function_list_t *ps_me_optimised_function_list,
+ S32 i4_part_mask,
+ S32 i4_num_partitions,
+ S08 i1_grid_enable,
+ U08 u1_num_results_per_part)
+{
+ FT_CALC_SAD_AND_RESULT *pf_func = NULL;
+
+ if(2 == u1_num_results_per_part)
+ {
+ if(i4_part_mask == 1)
+ {
+ ASSERT(i4_num_partitions == 1);
+
+ if(i1_grid_enable == 0)
+ {
+ pf_func =
+ ps_me_optimised_function_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8;
+ }
+ else
+ {
+ pf_func = ps_me_optimised_function_list
+ ->pf_calc_pt_sad_and_2_best_results_explicit_8x8_for_grid;
+ }
+ }
+ else
+ {
+ ASSERT(i4_num_partitions == 5);
+
+ pf_func =
+ ps_me_optimised_function_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8_4x4;
+ }
+ }
+ else if(1 == u1_num_results_per_part)
+ {
+ if(i4_part_mask == 1)
+ {
+ ASSERT(i4_num_partitions == 1);
+
+ if(i1_grid_enable == 0)
+ {
+ pf_func =
+ ps_me_optimised_function_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8;
+ }
+ else
+ {
+ pf_func = ps_me_optimised_function_list
+ ->pf_calc_pt_sad_and_1_best_result_explicit_8x8_for_grid;
+ }
+ }
+ else
+ {
+ ASSERT(i4_num_partitions == 5);
+
+ pf_func =
+ ps_me_optimised_function_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8_4x4;
+ }
+ }
+
+ return pf_func;
+}
+
+/**
+********************************************************************************
+* @fn void hme_pred_search_no_encode(hme_search_prms_t *ps_search_prms,
+* layer_ctxt_t *ps_layer_ctxt,
+* wgt_pred_ctxt_t *ps_wt_inp_prms,
+* S32 *pi4_valid_part_ids,
+* S32 disable_refine,
+* ME_QUALITY_PRESETS_T e_me_quality_preset)
+*
+* @brief Implements predictive search after removing duplicate candidates
+* from initial list. Each square grid (of step 1) is expanded
+* to nine search pts before the dedeuplication process. one point
+* cost is then evaluated for each unique node after the deduplication
+* process
+*
+* @param[in,out] ps_search_prms: All the params to this function
+*
+* @param[in] ps_layer_ctxt: All info about this layer
+*
+* @return None
+********************************************************************************
+*/
+void hme_pred_search_no_encode(
+ hme_search_prms_t *ps_search_prms,
+ layer_ctxt_t *ps_layer_ctxt,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ S32 *pi4_valid_part_ids,
+ S32 disable_refine,
+ ME_QUALITY_PRESETS_T e_me_quality_preset,
+ S08 i1_grid_enable,
+ ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
+{
+ /* Stores the SAD for all parts at each pt in the grid */
+ S32 ai4_sad_grid[9 * TOT_NUM_PARTS];
+
+ /* Atributes of input candidates */
+ search_node_t *ps_search_node;
+ search_results_t *ps_search_results;
+ S32 i4_num_nodes;
+
+ /* Input and reference attributes */
+ S32 i4_inp_stride, i4_ref_stride, i4_ref_offset;
+
+ /* The reference is actually an array of ptrs since there are several */
+ /* reference id. So an array gets passed form calling function */
+ U08 **ppu1_ref;
+
+ /* These control number of parts and number of pts in grid to search */
+ S32 i4_part_mask; // i4_grid_mask;
+
+ S32 shift_for_cu_size;
+ /* Blk width, blk height and blk size are derived from input params */
+ BLK_SIZE_T e_blk_size;
+ CU_SIZE_T e_cu_size;
+ S32 i4_blk_wd, i4_blk_ht;
+
+ /*************************************************************************/
+ /* These functions pointers for calculating Err and the result update */
+ /* Each carries its own parameters structure, which is generated on the */
+ /* fly in this function */
+ /*************************************************************************/
+ PF_CALC_SAD_AND_RESULT pf_calc_sad_and_result;
+ err_prms_t s_err_prms;
+ result_upd_prms_t s_result_prms;
+ S32 i4_num_results;
+ S32 i4_search_idx = ps_search_prms->i1_ref_idx;
+ S32 i4_inp_off;
+ S32 i4_num_partitions;
+
+ i4_inp_stride = ps_search_prms->i4_inp_stride;
+
+ /* Move to the location of the search blk in inp buffer */
+ i4_inp_off = ps_search_prms->i4_cu_x_off;
+ i4_inp_off += ps_search_prms->i4_cu_y_off * i4_inp_stride;
+
+ /*************************************************************************/
+ /* Depending on flag i4_use_rec, we use either input of previously */
+ /* encoded pictures or we use recon of previously encoded pictures. */
+ /*************************************************************************/
+ if(ps_search_prms->i4_use_rec == 1)
+ {
+ i4_ref_stride = ps_layer_ctxt->i4_rec_stride;
+ ppu1_ref = ps_layer_ctxt->ppu1_list_rec_fxfy;
+ }
+ else
+ {
+ i4_ref_stride = ps_layer_ctxt->i4_inp_stride;
+ ppu1_ref = ps_layer_ctxt->ppu1_list_inp;
+ }
+ i4_ref_offset = (i4_ref_stride * ps_search_prms->i4_y_off) + ps_search_prms->i4_x_off;
+ /* Obtain the blk size of the search blk. Assumed here that the search */
+ /* is done on a CU size, rather than any arbitrary blk size. */
+ ps_search_results = ps_search_prms->ps_search_results;
+ e_blk_size = ps_search_prms->e_blk_size;
+ i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
+ i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
+ e_cu_size = ps_search_results->e_cu_size;
+
+ /* Assuming cu size of 8x8 as enum 0, the other will be 1, 2, 3 */
+ /* This will also set the shift w.r.t. the base cu size of 8x8 */
+ shift_for_cu_size = e_cu_size;
+
+ ps_search_node = ps_search_prms->ps_search_nodes;
+ i4_num_nodes = ps_search_prms->i4_num_search_nodes;
+ i4_part_mask = ps_search_prms->i4_part_mask;
+
+ /*************************************************************************/
+ /* This array stores the ids of the partitions whose */
+ /* SADs are updated. Since the partitions whose SADs are updated may not */
+ /* be in contiguous order, we supply another level of indirection. */
+ /*************************************************************************/
+ i4_num_partitions = hme_create_valid_part_ids(i4_part_mask, pi4_valid_part_ids);
+
+ /* Update the parameters used to pass to SAD */
+ /* input ptr, strides, SAD Grid, part mask, blk width and ht */
+ /* The above are fixed ptrs, only pu1_ref and grid mask are */
+ /* varying params which are updated just before calling fxn */
+ s_err_prms.i4_inp_stride = i4_inp_stride;
+ s_err_prms.i4_ref_stride = i4_ref_stride;
+ s_err_prms.i4_part_mask = i4_part_mask;
+ s_err_prms.pi4_sad_grid = &ai4_sad_grid[0];
+ s_err_prms.i4_blk_wd = i4_blk_wd;
+ s_err_prms.i4_blk_ht = i4_blk_ht;
+ s_err_prms.i4_step = 1;
+ s_err_prms.pi4_valid_part_ids = pi4_valid_part_ids;
+ s_err_prms.i4_num_partitions = i4_num_partitions;
+
+ s_result_prms.pf_mv_cost_compute = ps_search_prms->pf_mv_cost_compute;
+ s_result_prms.ps_search_results = ps_search_results;
+ s_result_prms.pi4_valid_part_ids = pi4_valid_part_ids;
+ s_result_prms.i1_ref_idx = (S08)ps_search_prms->i1_ref_idx;
+ s_result_prms.pi4_sad_grid = ai4_sad_grid;
+ s_result_prms.i4_part_mask = i4_part_mask;
+ s_result_prms.i4_step = 1;
+
+ pf_calc_sad_and_result = hme_get_calc_sad_and_result_explicit_fxn(
+ ps_me_optimised_function_list,
+ i4_part_mask,
+ i4_num_partitions,
+ i1_grid_enable,
+ ps_search_results->u1_num_results_per_part);
+
+ pf_calc_sad_and_result(
+ ps_search_prms, ps_wt_inp_prms, &s_err_prms, &s_result_prms, ppu1_ref, i4_ref_stride);
+}
diff --git a/encoder/hme_search_algo.h b/encoder/hme_search_algo.h
new file mode 100644
index 0000000..eb06b7e
--- /dev/null
+++ b/encoder/hme_search_algo.h
@@ -0,0 +1,164 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file hme_search_algo.h
+*
+* \brief
+* contains prototypes for search algorithms called by coarse/refinement
+* layers.
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _HME_SEARCH_ALGO_H_
+#define _HME_SEARCH_ALGO_H_
+
+/*****************************************************************************/
+/* Functions */
+/*****************************************************************************/
+/**
+********************************************************************************
+* @fn void hme_pred_search_square_stepn(hme_search_prms_t *ps_search_prms,
+* layer_ctxt_t *ps_layer_ctxt)
+*
+* @brief Implements predictive search, with square grid refinement. In this
+* case, we start with a bigger step size, like 4, refining upto a
+* variable number of pts, till we hit end of search range or hit a
+* minima. Then we refine using smaller steps. The bigger step size
+* like 4 or 2, do not use optimized SAD functions, they evaluate
+* SAD for each individual pt.
+*
+* @param[in,out] ps_search_prms: All the params to this function
+*
+* @param[in] ps_layer_ctxt: Context for the layer
+*
+* @return None
+********************************************************************************
+*/
+void hme_pred_search_square_stepn(
+ hme_search_prms_t *ps_search_prms,
+ layer_ctxt_t *ps_layer_ctxt,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ ME_QUALITY_PRESETS_T e_me_quality_preset,
+ ihevce_me_optimised_function_list_t *ps_me_optimised_function_list);
+
+/**
+********************************************************************************
+* @fn hme_do_fullsearch(hme_search_prms_t *ps_search_prms,
+* layer_ctxt_t *ps_layer_ctxt,
+* hme_mv_t *ps_best_mv,
+* pred_ctxt_t *ps_pred_ctxt,
+* PF_MV_COST_FXN pf_mv_cost_compute)
+*
+* @brief Does a full search on entire srch window with a given step size
+*
+* @param[in] ps_search_prms : Search prms structure containing info like
+* blk dimensions, search range etc
+*
+* @param[in] ps_layer_ctxt: All info about this layer
+*
+* @param[out] ps_best_mv : type hme_mv_t contains best mv x and y
+*
+* @param[in] ps_pred_ctxt : Prediction ctxt for cost computation
+*
+* @param[in] pf_mv_cost_compute : mv cost computation function
+*
+* @return void
+********************************************************************************
+*/
+void hme_do_fullsearch(
+ hme_search_prms_t *ps_search_prms,
+ layer_ctxt_t *ps_layer_ctxt,
+ hme_mv_t *ps_best_mv,
+ pred_ctxt_t *ps_pred_ctxt,
+ PF_MV_COST_FXN pf_mv_cost_compute,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ ME_QUALITY_PRESETS_T e_me_quality_preset,
+ range_prms_t *ps_range_prms);
+
+/**
+********************************************************************************
+* @fn hme_pred_search(hme_search_prms_t *ps_search_prms,
+* layer_ctxt_t *ps_layer_ctxt)
+*
+* @brief Implements predictive search after removing duplicate candidates
+* from initial list. Each square grid (of step 1) is expanded
+* to nine search pts before the dedeuplication process. one point
+* cost is then evaluated for each unique node after the deduplication
+* process
+*
+* @param[in,out] ps_search_prms: All the params to this function
+*
+* @param[in] ps_layer_ctxt: All info about this layer
+*
+* @param[out] pi4_valid_part_ids: Array to hold valid partitions
+*
+* @param[in] i4_disable_refine flag to disable refinement
+*
+* @return None
+********************************************************************************
+*/
+void hme_pred_search(
+ hme_search_prms_t *ps_search_prms,
+ layer_ctxt_t *ps_layer_ctxt,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ S08 i1_grid_flag,
+ ihevce_me_optimised_function_list_t *ps_me_optimised_function_list);
+
+/**
+********************************************************************************
+* @fn void hme_pred_search_no_encode(hme_search_prms_t *ps_search_prms,
+* layer_ctxt_t *ps_layer_ctxt,
+* wgt_pred_ctxt_t *ps_wt_inp_prms,
+* S32 *pi4_valid_part_ids,
+* S32 disable_refine,
+* ME_QUALITY_PRESETS_T e_me_quality_preset)
+*
+* @brief Implements predictive search after removing duplicate candidates
+* from initial list. Each square grid (of step 1) is expanded
+* to nine search pts before the dedeuplication process. one point
+* cost is then evaluated for each unique node after the deduplication
+* process
+*
+* @param[in,out] ps_search_prms: All the params to this function
+*
+* @param[in] ps_layer_ctxt: All info about this layer
+*
+* @return None
+********************************************************************************
+*/
+void hme_pred_search_no_encode(
+ hme_search_prms_t *ps_search_prms,
+ layer_ctxt_t *ps_layer_ctxt,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ S32 *pi4_valid_part_ids,
+ S32 disable_refine,
+ ME_QUALITY_PRESETS_T e_me_quality_preset,
+ S08 i1_grid_enable,
+ ihevce_me_optimised_function_list_t *ps_me_optimised_function_list);
+
+#endif /* #ifndef _HME_SEARCH_ALGO_H_*/
diff --git a/encoder/hme_subpel.c b/encoder/hme_subpel.c
new file mode 100644
index 0000000..6d853b3
--- /dev/null
+++ b/encoder/hme_subpel.c
@@ -0,0 +1,4429 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file hme_subpel.c
+*
+* @brief
+* Subpel refinement modules for ME algo
+*
+* @author
+* Ittiam
+*
+*
+* List of Functions
+* hme_qpel_interp_avg()
+* hme_subpel_refine_ctblist_bck()
+* hme_subpel_refine_ctblist_fwd()
+* hme_refine_bidirect()
+* hme_subpel_refinement()
+* hme_subpel_refine_ctb_fwd()
+* hme_subpel_refine_ctb_bck()
+* hme_create_bck_inp()
+* hme_subpel_refine_search_node()
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+#include <limits.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_bs_compute_ctb.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_dep_mngr_interface.h"
+#include "hme_datatype.h"
+#include "hme_interface.h"
+#include "hme_common_defs.h"
+#include "hme_defs.h"
+#include "ihevce_me_instr_set_router.h"
+#include "hme_globals.h"
+#include "hme_utils.h"
+#include "hme_coarse.h"
+#include "hme_fullpel.h"
+#include "hme_subpel.h"
+#include "hme_refine.h"
+#include "hme_err_compute.h"
+#include "hme_common_utils.h"
+#include "hme_search_algo.h"
+#include "ihevce_stasino_helpers.h"
+#include "ihevce_common_utils.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+void hme_qpel_interp_avg(interp_prms_t *ps_prms, S32 i4_mv_x, S32 i4_mv_y, S32 i4_buf_id)
+{
+ U08 *pu1_src1, *pu1_src2, *pu1_dst;
+ qpel_input_buf_cfg_t *ps_inp_cfg;
+ S32 i4_mv_x_frac, i4_mv_y_frac, i4_offset;
+
+ /*************************************************************************/
+ /* For a given QPEL pt, we need to determine the 2 source pts that are */
+ /* needed to do the QPEL averaging. The logic to do this is as follows */
+ /* i4_mv_x and i4_mv_y are the motion vectors in QPEL units that are */
+ /* pointing to the pt of interest. Obviously, they are w.r.t. the 0,0 */
+ /* pt of th reference blk that is colocated to the inp blk. */
+ /* A j E k B */
+ /* l m n o p */
+ /* F q G r H */
+ /* s t u v w */
+ /* C x I y D */
+ /* In above diagram, A. B, C, D are full pts at offsets (0,0),(1,0),(0,1)*/
+ /* and (1,1) respectively in the fpel buffer (id = 0) */
+ /* E and I are hxfy pts in offsets (0,0),(0,1) respectively in hxfy buf */
+ /* F and H are fxhy pts in offsets (0,0),(1,0) respectively in fxhy buf */
+ /* G is hxhy pt in offset 0,0 in hxhy buf */
+ /* All above offsets are computed w.r.t. motion displaced pt in */
+ /* respective bufs. This means that A corresponds to (i4_mv_x >> 2) and */
+ /* (i4_mv_y >> 2) in fxfy buf. Ditto with E, F and G */
+ /* fxfy buf is buf id 0, hxfy is buf id 1, fxhy is buf id 2, hxhy is 3 */
+ /* If we consider pt v to be derived. v has a fractional comp of 3, 3 */
+ /* v is avg of H and I. So the table look up of v should give following */
+ /* buf 1 (H) : offset = (1, 0) buf id = 2. */
+ /* buf 2 (I) : offset = 0 , 1) buf id = 1. */
+ /* NOTE: For pts that are fxfy/hxfy/fxhy/hxhy, bufid 1 will be -1. */
+ /*************************************************************************/
+ i4_mv_x_frac = i4_mv_x & 3;
+ i4_mv_y_frac = i4_mv_y & 3;
+
+ i4_offset = (i4_mv_x >> 2) + (i4_mv_y >> 2) * ps_prms->i4_ref_stride;
+
+ /* Derive the descriptor that has all offset and size info */
+ ps_inp_cfg = &gas_qpel_inp_buf_cfg[i4_mv_y_frac][i4_mv_x_frac];
+
+ if(ps_inp_cfg->i1_buf_id1 == ps_inp_cfg->i1_buf_id2)
+ {
+ /* This is case for fxfy/hxfy/fxhy/hxhy */
+ ps_prms->pu1_final_out = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id1];
+ ps_prms->pu1_final_out += ps_inp_cfg->i1_buf_xoff1 + i4_offset;
+ ps_prms->pu1_final_out += (ps_inp_cfg->i1_buf_yoff1 * ps_prms->i4_ref_stride);
+ ps_prms->i4_final_out_stride = ps_prms->i4_ref_stride;
+
+ return;
+ }
+
+ pu1_src1 = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id1];
+ pu1_src1 += ps_inp_cfg->i1_buf_xoff1 + i4_offset;
+ pu1_src1 += (ps_inp_cfg->i1_buf_yoff1 * ps_prms->i4_ref_stride);
+
+ pu1_src2 = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id2];
+ pu1_src2 += ps_inp_cfg->i1_buf_xoff2 + i4_offset;
+ pu1_src2 += (ps_inp_cfg->i1_buf_yoff2 * ps_prms->i4_ref_stride);
+
+ pu1_dst = ps_prms->apu1_interp_out[i4_buf_id];
+ hevc_avg_2d(
+ pu1_src1,
+ pu1_src2,
+ ps_prms->i4_ref_stride,
+ ps_prms->i4_ref_stride,
+ ps_prms->i4_blk_wd,
+ ps_prms->i4_blk_ht,
+ pu1_dst,
+ ps_prms->i4_out_stride);
+ ps_prms->pu1_final_out = pu1_dst;
+ ps_prms->i4_final_out_stride = ps_prms->i4_out_stride;
+}
+
+static __inline void hme_qpel_interp_avg_2pt_vert_no_reuse(
+ interp_prms_t *ps_prms,
+ S32 i4_mv_x,
+ S32 i4_mv_y,
+ U08 **ppu1_final,
+ S32 *pi4_final_stride,
+ FT_QPEL_INTERP_AVG_1PT *pf_qpel_interp_avg_1pt)
+{
+ pf_qpel_interp_avg_1pt(ps_prms, i4_mv_x, i4_mv_y + 1, 3, ppu1_final, pi4_final_stride);
+
+ pf_qpel_interp_avg_1pt(ps_prms, i4_mv_x, i4_mv_y - 1, 1, ppu1_final, pi4_final_stride);
+}
+
+static __inline void hme_qpel_interp_avg_2pt_horz_no_reuse(
+ interp_prms_t *ps_prms,
+ S32 i4_mv_x,
+ S32 i4_mv_y,
+ U08 **ppu1_final,
+ S32 *pi4_final_stride,
+ FT_QPEL_INTERP_AVG_1PT *pf_qpel_interp_avg_1pt)
+{
+ pf_qpel_interp_avg_1pt(ps_prms, i4_mv_x + 1, i4_mv_y, 2, ppu1_final, pi4_final_stride);
+
+ pf_qpel_interp_avg_1pt(ps_prms, i4_mv_x - 1, i4_mv_y, 0, ppu1_final, pi4_final_stride);
+}
+
+/********************************************************************************
+* @fn hme_qpel_interp_comprehensive
+*
+* @brief Interpolates 2 qpel points by hpel averaging
+*
+* @param[in,out] ps_prms: Both input buffer ptrs and location of output
+*
+* @param[in] i4_mv_x : x component of motion vector in QPEL units
+*
+* @param[in] i4_mv_y : y component of motion vector in QPEL units
+*
+* @param[in] i4_grid_mask : mask which determines qpels to be computed
+*
+* @param[out] ppu1_final : storage for final buffer pointers
+*
+* @param[out] pi4_final_stride : storage for final buffer strides
+*
+* @return None
+********************************************************************************
+*/
+static __inline void hme_qpel_interp_comprehensive(
+ interp_prms_t *ps_prms,
+ U08 **ppu1_final,
+ S32 *pi4_final_stride,
+ S32 i4_mv_x,
+ S32 i4_mv_y,
+ S32 i4_grid_mask,
+ ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
+{
+ S32 pt_select_for_TB, pt_select_for_LR;
+ S32 dx, dy, dydx;
+ S32 vert_func_selector, horz_func_selector;
+
+ S32 i4_ref_stride = ps_prms->i4_ref_stride;
+
+ pt_select_for_TB =
+ ((i4_grid_mask & (1 << PT_B)) >> PT_B) + ((i4_grid_mask & (1 << PT_T)) >> (PT_T - 1));
+
+ pt_select_for_LR =
+ ((i4_grid_mask & (1 << PT_R)) >> PT_R) + ((i4_grid_mask & (1 << PT_L)) >> (PT_L - 1));
+
+ dx = (i4_mv_x & 3);
+ dy = (i4_mv_y & 3);
+ dydx = (dx + (dy << 2));
+
+ vert_func_selector = gai4_select_qpel_function_vert[pt_select_for_TB][dydx];
+ horz_func_selector = gai4_select_qpel_function_horz[pt_select_for_LR][dydx];
+
+ /* case descriptions */
+ /* Let T = (gridmask & T) & B = (gridmask & B) */
+ /* & hp = pt is an hpel or an fpel */
+ /* & r = reuse possible */
+ /* 0 => T || B = 0 */
+ /* 1 => (!T) && (B) && hp */
+ /* 2 => (T) && (!B) && hp */
+ /* 3 => (!T) && (B) && !hp */
+ /* 4 => (T) && (!B) && !hp */
+ /* 5 => (T) && (B) && !hp && r */
+ /* 6 => (T) && (B) && !hp && !r */
+ /* 7 => (T) && (B) && hp */
+
+ switch(vert_func_selector)
+ {
+ case 0:
+ {
+ break;
+ }
+ case 1:
+ {
+ S32 i4_mv_x_frac, i4_mv_y_frac, i4_offset;
+ qpel_input_buf_cfg_t *ps_inp_cfg;
+ S32 i4_mvyp1 = (i4_mv_y + 1);
+
+ i4_mv_x_frac = dx;
+ i4_mv_y_frac = i4_mvyp1 & 3;
+
+ i4_offset = (i4_mv_x >> 2) + (i4_mvyp1 >> 2) * i4_ref_stride;
+
+ /* Derive the descriptor that has all offset and size info */
+ ps_inp_cfg = &gas_qpel_inp_buf_cfg[i4_mv_y_frac][i4_mv_x_frac];
+
+ ppu1_final[3] = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id1];
+ ppu1_final[3] += ps_inp_cfg->i1_buf_xoff1 + i4_offset;
+ ppu1_final[3] += (ps_inp_cfg->i1_buf_yoff1 * i4_ref_stride);
+ pi4_final_stride[3] = i4_ref_stride;
+
+ break;
+ }
+ case 2:
+ {
+ S32 i4_mv_x_frac, i4_mv_y_frac, i4_offset;
+ qpel_input_buf_cfg_t *ps_inp_cfg;
+ S32 i4_mvym1 = (i4_mv_y - 1);
+
+ i4_mv_x_frac = dx;
+ i4_mv_y_frac = i4_mvym1 & 3;
+
+ i4_offset = (i4_mv_x >> 2) + (i4_mvym1 >> 2) * i4_ref_stride;
+
+ /* Derive the descriptor that has all offset and size info */
+ ps_inp_cfg = &gas_qpel_inp_buf_cfg[i4_mv_y_frac][i4_mv_x_frac];
+
+ ppu1_final[1] = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id1];
+ ppu1_final[1] += ps_inp_cfg->i1_buf_xoff1 + i4_offset;
+ ppu1_final[1] += (ps_inp_cfg->i1_buf_yoff1 * i4_ref_stride);
+ pi4_final_stride[1] = i4_ref_stride;
+
+ break;
+ }
+ case 3:
+ {
+ ps_me_optimised_function_list->pf_qpel_interp_avg_1pt(
+ ps_prms, i4_mv_x, i4_mv_y + 1, 3, ppu1_final, pi4_final_stride);
+
+ break;
+ }
+ case 4:
+ {
+ ps_me_optimised_function_list->pf_qpel_interp_avg_1pt(
+ ps_prms, i4_mv_x, i4_mv_y - 1, 1, ppu1_final, pi4_final_stride);
+
+ break;
+ }
+ case 5:
+ {
+ ps_me_optimised_function_list->pf_qpel_interp_avg_2pt_vert_with_reuse(
+ ps_prms, i4_mv_x, i4_mv_y, ppu1_final, pi4_final_stride);
+ break;
+ }
+ case 6:
+ {
+ hme_qpel_interp_avg_2pt_vert_no_reuse(
+ ps_prms,
+ i4_mv_x,
+ i4_mv_y,
+ ppu1_final,
+ pi4_final_stride,
+ ps_me_optimised_function_list->pf_qpel_interp_avg_1pt);
+ break;
+ }
+ case 7:
+ {
+ S32 i4_mv_x_frac, i4_mv_y_frac, i4_offset;
+ qpel_input_buf_cfg_t *ps_inp_cfg;
+
+ S32 i4_mvyp1 = (i4_mv_y + 1);
+ S32 i4_mvym1 = (i4_mv_y - 1);
+
+ i4_mv_x_frac = dx;
+ i4_mv_y_frac = i4_mvyp1 & 3;
+
+ i4_offset = (i4_mv_x >> 2) + (i4_mvyp1 >> 2) * i4_ref_stride;
+
+ /* Derive the descriptor that has all offset and size info */
+ ps_inp_cfg = &gas_qpel_inp_buf_cfg[i4_mv_y_frac][i4_mv_x_frac];
+
+ ppu1_final[3] = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id1];
+ ppu1_final[3] += ps_inp_cfg->i1_buf_xoff1 + i4_offset;
+ ppu1_final[3] += (ps_inp_cfg->i1_buf_yoff1 * i4_ref_stride);
+ pi4_final_stride[3] = i4_ref_stride;
+
+ i4_mv_y_frac = i4_mvym1 & 3;
+
+ i4_offset = (i4_mv_x >> 2) + (i4_mvym1 >> 2) * i4_ref_stride;
+
+ /* Derive the descriptor that has all offset and size info */
+ ps_inp_cfg = &gas_qpel_inp_buf_cfg[i4_mv_y_frac][i4_mv_x_frac];
+
+ ppu1_final[1] = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id1];
+ ppu1_final[1] += ps_inp_cfg->i1_buf_xoff1 + i4_offset;
+ ppu1_final[1] += (ps_inp_cfg->i1_buf_yoff1 * i4_ref_stride);
+ pi4_final_stride[1] = i4_ref_stride;
+
+ break;
+ }
+ }
+
+ /* case descriptions */
+ /* Let L = (gridmask & L) & R = (gridmask & R) */
+ /* & hp = pt is an hpel or an fpel */
+ /* & r = reuse possible */
+ /* 0 => L || R = 0 */
+ /* 1 => (!L) && (R) && hp */
+ /* 2 => (L) && (!R) && hp */
+ /* 3 => (!L) && (R) && !hp */
+ /* 4 => (L) && (!R) && !hp */
+ /* 5 => (L) && (R) && !hp && r */
+ /* 6 => (L) && (R) && !hp && !r */
+ /* 7 => (L) && (R) && hp */
+
+ switch(horz_func_selector)
+ {
+ case 0:
+ {
+ break;
+ }
+ case 1:
+ {
+ S32 i4_mv_x_frac, i4_mv_y_frac, i4_offset;
+ qpel_input_buf_cfg_t *ps_inp_cfg;
+ S32 i4_mvxp1 = (i4_mv_x + 1);
+
+ i4_mv_x_frac = i4_mvxp1 & 3;
+ i4_mv_y_frac = dy;
+
+ i4_offset = (i4_mvxp1 >> 2) + (i4_mv_y >> 2) * i4_ref_stride;
+
+ /* Derive the descriptor that has all offset and size info */
+ ps_inp_cfg = &gas_qpel_inp_buf_cfg[i4_mv_y_frac][i4_mv_x_frac];
+
+ ppu1_final[2] = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id1];
+ ppu1_final[2] += ps_inp_cfg->i1_buf_xoff1 + i4_offset;
+ ppu1_final[2] += (ps_inp_cfg->i1_buf_yoff1 * i4_ref_stride);
+ pi4_final_stride[2] = i4_ref_stride;
+
+ break;
+ }
+ case 2:
+ {
+ S32 i4_mv_x_frac, i4_mv_y_frac, i4_offset;
+ qpel_input_buf_cfg_t *ps_inp_cfg;
+ S32 i4_mvxm1 = (i4_mv_x - 1);
+
+ i4_mv_x_frac = i4_mvxm1 & 3;
+ i4_mv_y_frac = dy;
+
+ i4_offset = (i4_mvxm1 >> 2) + (i4_mv_y >> 2) * i4_ref_stride;
+
+ /* Derive the descriptor that has all offset and size info */
+ ps_inp_cfg = &gas_qpel_inp_buf_cfg[i4_mv_y_frac][i4_mv_x_frac];
+
+ ppu1_final[0] = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id1];
+ ppu1_final[0] += ps_inp_cfg->i1_buf_xoff1 + i4_offset;
+ ppu1_final[0] += (ps_inp_cfg->i1_buf_yoff1 * i4_ref_stride);
+ pi4_final_stride[0] = i4_ref_stride;
+
+ break;
+ }
+ case 3:
+ {
+ ps_me_optimised_function_list->pf_qpel_interp_avg_1pt(
+ ps_prms, i4_mv_x + 1, i4_mv_y, 2, ppu1_final, pi4_final_stride);
+
+ break;
+ }
+ case 4:
+ {
+ ps_me_optimised_function_list->pf_qpel_interp_avg_1pt(
+ ps_prms, i4_mv_x - 1, i4_mv_y, 0, ppu1_final, pi4_final_stride);
+
+ break;
+ }
+ case 5:
+ {
+ ps_me_optimised_function_list->pf_qpel_interp_avg_2pt_horz_with_reuse(
+ ps_prms, i4_mv_x, i4_mv_y, ppu1_final, pi4_final_stride);
+ break;
+ }
+ case 6:
+ {
+ hme_qpel_interp_avg_2pt_horz_no_reuse(
+ ps_prms,
+ i4_mv_x,
+ i4_mv_y,
+ ppu1_final,
+ pi4_final_stride,
+ ps_me_optimised_function_list->pf_qpel_interp_avg_1pt);
+ break;
+ }
+ case 7:
+ {
+ S32 i4_mv_x_frac, i4_mv_y_frac, i4_offset;
+ qpel_input_buf_cfg_t *ps_inp_cfg;
+
+ S32 i4_mvxp1 = (i4_mv_x + 1);
+ S32 i4_mvxm1 = (i4_mv_x - 1);
+
+ i4_mv_x_frac = i4_mvxp1 & 3;
+ i4_mv_y_frac = dy;
+
+ i4_offset = (i4_mvxp1 >> 2) + (i4_mv_y >> 2) * i4_ref_stride;
+
+ /* Derive the descriptor that has all offset and size info */
+ ps_inp_cfg = &gas_qpel_inp_buf_cfg[i4_mv_y_frac][i4_mv_x_frac];
+
+ ppu1_final[2] = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id1];
+ ppu1_final[2] += ps_inp_cfg->i1_buf_xoff1 + i4_offset;
+ ppu1_final[2] += (ps_inp_cfg->i1_buf_yoff1 * i4_ref_stride);
+ pi4_final_stride[2] = i4_ref_stride;
+
+ i4_mv_x_frac = i4_mvxm1 & 3;
+
+ i4_offset = (i4_mvxm1 >> 2) + (i4_mv_y >> 2) * i4_ref_stride;
+
+ /* Derive the descriptor that has all offset and size info */
+ ps_inp_cfg = &gas_qpel_inp_buf_cfg[i4_mv_y_frac][i4_mv_x_frac];
+
+ ppu1_final[0] = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id1];
+ ppu1_final[0] += ps_inp_cfg->i1_buf_xoff1 + i4_offset;
+ ppu1_final[0] += (ps_inp_cfg->i1_buf_yoff1 * i4_ref_stride);
+ pi4_final_stride[0] = i4_ref_stride;
+
+ break;
+ }
+ }
+}
+
+/**
+********************************************************************************
+* @fn S32 hme_compute_pred_and_evaluate_bi(hme_subpel_prms_t *ps_prms,
+* search_results_t *ps_search_results,
+* layer_ctxt_t *ps_curr_layer,
+* U08 **ppu1_pred)
+*
+*
+* @brief Evaluates the best bipred cost as avg(P0, P1) where P0 and P1 are
+* best L0 and L1 bufs respectively for the entire CU
+*
+* @param[in] ps_prms: subpel prms input to this function
+*
+* @param[in] ps_curr_layer: points to the current layer ctxt
+*
+* @return The best BI cost of best uni cost, whichever better
+********************************************************************************
+*/
+void hme_compute_pred_and_evaluate_bi(
+ inter_cu_results_t *ps_cu_results,
+ inter_pu_results_t *ps_pu_results,
+ inter_ctb_prms_t *ps_inter_ctb_prms,
+ part_type_results_t *ps_part_type_result,
+ ULWORD64 *pu8_winning_pred_sigmaXSquare,
+ ULWORD64 *pu8_winning_pred_sigmaX,
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
+ ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
+{
+ /* Idx0 - Uni winner */
+ /* Idx1 - Uni runner-up */
+ /* Idx2 - Bi winner */
+ hme_pred_buf_info_t as_pred_buf_data[3][NUM_INTER_PU_PARTS];
+ err_prms_t s_err_prms;
+ interp_prms_t s_interp_prms;
+
+ PF_SAD_FXN_T pf_err_compute;
+
+ S32 i, j;
+ S32 x_off, y_off, x_pic, y_pic;
+ S32 i4_sad_grid;
+ U08 e_cu_size;
+ S32 i4_part_type;
+ U08 u1_cu_size;
+ S32 shift;
+ S32 x_part, y_part, num_parts;
+ S32 inp_stride, ref_stride;
+ U08 au1_pred_buf_array_indixes[3];
+ S32 cur_iter_best_cost;
+ S32 uni_cost, bi_cost, best_cost, tot_cost;
+ /* Idx0 - Uni winner */
+ /* Idx1 - Bi winner */
+ ULWORD64 au8_sigmaX[2][NUM_INTER_PU_PARTS];
+ ULWORD64 au8_sigmaXSquared[2][NUM_INTER_PU_PARTS];
+#if USE_NOISE_TERM_DURING_BICAND_SEARCH
+ S32 i4_noise_term;
+#endif
+
+ interp_prms_t *ps_interp_prms = &s_interp_prms;
+
+ S32 best_cand_in_opp_dir_idx = 0;
+ S32 is_best_cand_an_intra = 0;
+ U08 u1_is_cu_noisy = ps_inter_ctb_prms->u1_is_cu_noisy;
+#if USE_NOISE_TERM_DURING_BICAND_SEARCH
+ const S32 i4_default_src_wt = ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
+#endif
+ tot_cost = 0;
+
+ /* Start of the CU w.r.t. CTB */
+ x_off = ps_cu_results->u1_x_off;
+ y_off = ps_cu_results->u1_y_off;
+
+ inp_stride = ps_inter_ctb_prms->i4_inp_stride;
+ ref_stride = ps_inter_ctb_prms->i4_rec_stride;
+
+ ps_interp_prms->i4_ref_stride = ref_stride;
+
+ /* Start of the CU w.r.t. Pic 0,0 */
+ x_pic = x_off + ps_inter_ctb_prms->i4_ctb_x_off;
+ y_pic = y_off + ps_inter_ctb_prms->i4_ctb_y_off;
+
+ u1_cu_size = ps_cu_results->u1_cu_size;
+ e_cu_size = u1_cu_size;
+ shift = (S32)e_cu_size;
+ i4_part_type = ps_part_type_result->u1_part_type;
+ num_parts = gau1_num_parts_in_part_type[i4_part_type];
+
+ for(i = 0; i < 3; i++)
+ {
+ hme_init_pred_buf_info(
+ &as_pred_buf_data[i],
+ &ps_inter_ctb_prms->s_pred_buf_mngr,
+ (ps_part_type_result->as_pu_results->pu.b4_wd + 1) << 2,
+ (ps_part_type_result->as_pu_results->pu.b4_ht + 1) << 2,
+ (PART_TYPE_T)i4_part_type);
+
+ au1_pred_buf_array_indixes[i] = as_pred_buf_data[i][0].u1_pred_buf_array_id;
+ }
+
+ for(j = 0; j < num_parts; j++)
+ {
+ UWORD8 *apu1_hpel_ref[2][4];
+ PART_ID_T e_part_id;
+ BLK_SIZE_T e_blk_size;
+ WORD8 i1_ref_idx;
+ UWORD8 pred_dir;
+ WORD32 ref_offset, inp_offset, wd, ht;
+ pu_result_t *ps_pu_node1, *ps_pu_node2, *ps_pu_result;
+ mv_t *aps_mv[2];
+ UWORD8 num_active_ref_opp;
+ UWORD8 num_results_per_part;
+ WORD32 luma_weight_ref1, luma_offset_ref1;
+ WORD32 luma_weight_ref2, luma_offset_ref2;
+ WORD32 pu_node2_found = 0;
+
+ e_part_id = ge_part_type_to_part_id[i4_part_type][j];
+ e_blk_size = ge_part_id_to_blk_size[e_cu_size][e_part_id];
+
+ x_part = gas_part_attr_in_cu[e_part_id].u1_x_start << shift;
+ y_part = gas_part_attr_in_cu[e_part_id].u1_y_start << shift;
+
+ ref_offset = (x_part + x_pic) + (y_pic + y_part) * ref_stride;
+ inp_offset = (x_part + y_part * inp_stride) + ps_cu_results->i4_inp_offset;
+
+ pred_dir = ps_part_type_result->as_pu_results[j].pu.b2_pred_mode;
+
+ ps_pu_node1 = &(ps_part_type_result->as_pu_results[j]);
+
+ if(PRED_L0 == pred_dir)
+ {
+ i1_ref_idx = ps_pu_node1->pu.mv.i1_l0_ref_idx;
+ aps_mv[0] = &(ps_pu_node1->pu.mv.s_l0_mv);
+
+ num_active_ref_opp =
+ ps_inter_ctb_prms->u1_num_active_ref_l1 * (ps_inter_ctb_prms->i4_bidir_enabled);
+ num_results_per_part = ps_pu_results->u1_num_results_per_part_l0[e_part_id];
+
+ ps_pu_result = ps_pu_results->aps_pu_results[PRED_L0][e_part_id];
+
+ ASSERT(i1_ref_idx >= 0);
+
+ apu1_hpel_ref[0][0] =
+ (UWORD8 *)(ps_inter_ctb_prms->pps_rec_list_l0[i1_ref_idx]->s_yuv_buf_desc.pv_y_buf) +
+ ref_offset;
+ apu1_hpel_ref[0][1] =
+ ps_inter_ctb_prms->pps_rec_list_l0[i1_ref_idx]->apu1_y_sub_pel_planes[0] +
+ ref_offset;
+ apu1_hpel_ref[0][2] =
+ ps_inter_ctb_prms->pps_rec_list_l0[i1_ref_idx]->apu1_y_sub_pel_planes[1] +
+ ref_offset;
+ apu1_hpel_ref[0][3] =
+ ps_inter_ctb_prms->pps_rec_list_l0[i1_ref_idx]->apu1_y_sub_pel_planes[2] +
+ ref_offset;
+
+ luma_weight_ref1 = (WORD32)ps_inter_ctb_prms->pps_rec_list_l0[i1_ref_idx]
+ ->s_weight_offset.i2_luma_weight;
+ luma_offset_ref1 = (WORD32)ps_inter_ctb_prms->pps_rec_list_l0[i1_ref_idx]
+ ->s_weight_offset.i2_luma_offset;
+ }
+ else
+ {
+ i1_ref_idx = ps_pu_node1->pu.mv.i1_l1_ref_idx;
+ aps_mv[0] = &(ps_pu_node1->pu.mv.s_l1_mv);
+
+ ASSERT(i1_ref_idx >= 0);
+
+ num_active_ref_opp =
+ ps_inter_ctb_prms->u1_num_active_ref_l0 * (ps_inter_ctb_prms->i4_bidir_enabled);
+ num_results_per_part = ps_pu_results->u1_num_results_per_part_l1[e_part_id];
+
+ ps_pu_result = ps_pu_results->aps_pu_results[PRED_L1][e_part_id];
+
+ apu1_hpel_ref[0][0] =
+ (UWORD8 *)(ps_inter_ctb_prms->pps_rec_list_l1[i1_ref_idx]->s_yuv_buf_desc.pv_y_buf) +
+ ref_offset;
+ apu1_hpel_ref[0][1] =
+ ps_inter_ctb_prms->pps_rec_list_l1[i1_ref_idx]->apu1_y_sub_pel_planes[0] +
+ ref_offset;
+ apu1_hpel_ref[0][2] =
+ ps_inter_ctb_prms->pps_rec_list_l1[i1_ref_idx]->apu1_y_sub_pel_planes[1] +
+ ref_offset;
+ apu1_hpel_ref[0][3] =
+ ps_inter_ctb_prms->pps_rec_list_l1[i1_ref_idx]->apu1_y_sub_pel_planes[2] +
+ ref_offset;
+
+ luma_weight_ref1 = (WORD32)ps_inter_ctb_prms->pps_rec_list_l1[i1_ref_idx]
+ ->s_weight_offset.i2_luma_weight;
+ luma_offset_ref1 = (WORD32)ps_inter_ctb_prms->pps_rec_list_l1[i1_ref_idx]
+ ->s_weight_offset.i2_luma_offset;
+ }
+
+ if(aps_mv[0]->i2_mvx == INTRA_MV)
+ {
+ uni_cost = ps_pu_node1->i4_tot_cost;
+ cur_iter_best_cost = ps_pu_node1->i4_tot_cost;
+ best_cost = MIN(uni_cost, cur_iter_best_cost);
+ tot_cost += best_cost;
+ continue;
+ }
+
+ ps_interp_prms->i4_blk_wd = wd = gau1_blk_size_to_wd[e_blk_size];
+ ps_interp_prms->i4_blk_ht = ht = gau1_blk_size_to_ht[e_blk_size];
+ ps_interp_prms->i4_out_stride = MAX_CU_SIZE;
+
+ if(num_active_ref_opp)
+ {
+ if(PRED_L0 == pred_dir)
+ {
+ if(ps_pu_results->u1_num_results_per_part_l1[e_part_id])
+ {
+ ps_pu_node2 = ps_pu_results->aps_pu_results[1][e_part_id];
+ pu_node2_found = 1;
+ }
+ }
+ else
+ {
+ if(ps_pu_results->u1_num_results_per_part_l0[e_part_id])
+ {
+ ps_pu_node2 = ps_pu_results->aps_pu_results[0][e_part_id];
+ pu_node2_found = 1;
+ }
+ }
+ }
+
+ if(!pu_node2_found)
+ {
+ bi_cost = INT_MAX >> 1;
+
+ s_interp_prms.apu1_interp_out[0] = as_pred_buf_data[0][j].pu1_pred;
+ ps_interp_prms->ppu1_ref = &apu1_hpel_ref[0][0];
+
+ ps_me_optimised_function_list->pf_qpel_interp_avg_generic(
+ ps_interp_prms, aps_mv[0]->i2_mvx, aps_mv[0]->i2_mvy, 0);
+
+ if(ps_interp_prms->pu1_final_out != s_interp_prms.apu1_interp_out[0])
+ {
+ as_pred_buf_data[0][j].u1_pred_buf_array_id = UCHAR_MAX;
+ as_pred_buf_data[0][j].pu1_pred = ps_interp_prms->pu1_final_out;
+ as_pred_buf_data[0][j].i4_pred_stride = ps_interp_prms->i4_final_out_stride;
+ }
+
+ if(u1_is_cu_noisy && ps_inter_ctb_prms->i4_alpha_stim_multiplier)
+ {
+ hme_compute_sigmaX_and_sigmaXSquared(
+ as_pred_buf_data[0][j].pu1_pred,
+ as_pred_buf_data[0][j].i4_pred_stride,
+ &au8_sigmaX[0][j],
+ &au8_sigmaXSquared[0][j],
+ ps_interp_prms->i4_blk_wd,
+ ps_interp_prms->i4_blk_ht,
+ ps_interp_prms->i4_blk_wd,
+ ps_interp_prms->i4_blk_ht,
+ 0,
+ 1);
+ }
+ }
+ else
+ {
+ i = 0;
+ bi_cost = MAX_32BIT_VAL;
+ is_best_cand_an_intra = 0;
+ best_cand_in_opp_dir_idx = 0;
+
+ pred_dir = ps_pu_node2[i].pu.b2_pred_mode;
+
+ if(PRED_L0 == pred_dir)
+ {
+ i1_ref_idx = ps_pu_node2[i].pu.mv.i1_l0_ref_idx;
+ aps_mv[1] = &(ps_pu_node2[i].pu.mv.s_l0_mv);
+
+ ASSERT(i1_ref_idx >= 0);
+
+ apu1_hpel_ref[1][0] =
+ (UWORD8 *)(ps_inter_ctb_prms->pps_rec_list_l0[i1_ref_idx]
+ ->s_yuv_buf_desc.pv_y_buf) +
+ ref_offset; //>ppu1_list_rec_fxfy[0][i1_ref_idx] + ref_offset;
+ apu1_hpel_ref[1][1] =
+ ps_inter_ctb_prms->pps_rec_list_l0[i1_ref_idx]->apu1_y_sub_pel_planes[0] +
+ ref_offset;
+ apu1_hpel_ref[1][2] =
+ ps_inter_ctb_prms->pps_rec_list_l0[i1_ref_idx]->apu1_y_sub_pel_planes[1] +
+ ref_offset;
+ apu1_hpel_ref[1][3] =
+ ps_inter_ctb_prms->pps_rec_list_l0[i1_ref_idx]->apu1_y_sub_pel_planes[2] +
+ ref_offset;
+
+ luma_weight_ref2 = (WORD32)ps_inter_ctb_prms->pps_rec_list_l0[i1_ref_idx]
+ ->s_weight_offset.i2_luma_weight;
+ luma_offset_ref2 = (WORD32)ps_inter_ctb_prms->pps_rec_list_l0[i1_ref_idx]
+ ->s_weight_offset.i2_luma_offset;
+ }
+ else
+ {
+ i1_ref_idx = ps_pu_node2[i].pu.mv.i1_l1_ref_idx;
+ aps_mv[1] = &(ps_pu_node2[i].pu.mv.s_l1_mv);
+
+ ASSERT(i1_ref_idx >= 0);
+
+ apu1_hpel_ref[1][0] =
+ (UWORD8 *)(ps_inter_ctb_prms->pps_rec_list_l1[i1_ref_idx]
+ ->s_yuv_buf_desc.pv_y_buf) +
+ ref_offset; //>ppu1_list_rec_fxfy[0][i1_ref_idx] + ref_offset;
+ apu1_hpel_ref[1][1] =
+ ps_inter_ctb_prms->pps_rec_list_l1[i1_ref_idx]->apu1_y_sub_pel_planes[0] +
+ ref_offset;
+ apu1_hpel_ref[1][2] =
+ ps_inter_ctb_prms->pps_rec_list_l1[i1_ref_idx]->apu1_y_sub_pel_planes[1] +
+ ref_offset;
+ apu1_hpel_ref[1][3] =
+ ps_inter_ctb_prms->pps_rec_list_l1[i1_ref_idx]->apu1_y_sub_pel_planes[2] +
+ ref_offset;
+
+ luma_weight_ref2 = (WORD32)ps_inter_ctb_prms->pps_rec_list_l1[i1_ref_idx]
+ ->s_weight_offset.i2_luma_weight;
+ luma_offset_ref2 = (WORD32)ps_inter_ctb_prms->pps_rec_list_l1[i1_ref_idx]
+ ->s_weight_offset.i2_luma_offset;
+ }
+
+ if(aps_mv[1]->i2_mvx == INTRA_MV)
+ {
+ uni_cost = ps_pu_node1->i4_tot_cost;
+ cur_iter_best_cost = ps_pu_node2[i].i4_tot_cost;
+
+ if(cur_iter_best_cost < bi_cost)
+ {
+ bi_cost = cur_iter_best_cost;
+ best_cand_in_opp_dir_idx = i;
+ is_best_cand_an_intra = 1;
+ }
+
+ best_cost = MIN(uni_cost, bi_cost);
+ tot_cost += best_cost;
+ continue;
+ }
+
+ s_interp_prms.apu1_interp_out[0] = as_pred_buf_data[0][j].pu1_pred;
+ ps_interp_prms->ppu1_ref = &apu1_hpel_ref[0][0];
+
+ ps_me_optimised_function_list->pf_qpel_interp_avg_generic(
+ ps_interp_prms, aps_mv[0]->i2_mvx, aps_mv[0]->i2_mvy, 0);
+
+ if(ps_interp_prms->pu1_final_out != s_interp_prms.apu1_interp_out[0])
+ {
+ as_pred_buf_data[0][j].u1_pred_buf_array_id = UCHAR_MAX;
+ as_pred_buf_data[0][j].pu1_pred = ps_interp_prms->pu1_final_out;
+ as_pred_buf_data[0][j].i4_pred_stride = ps_interp_prms->i4_final_out_stride;
+ }
+
+ if(u1_is_cu_noisy && ps_inter_ctb_prms->i4_alpha_stim_multiplier)
+ {
+ hme_compute_sigmaX_and_sigmaXSquared(
+ as_pred_buf_data[0][j].pu1_pred,
+ as_pred_buf_data[0][j].i4_pred_stride,
+ &au8_sigmaX[0][j],
+ &au8_sigmaXSquared[0][j],
+ ps_interp_prms->i4_blk_wd,
+ ps_interp_prms->i4_blk_ht,
+ ps_interp_prms->i4_blk_wd,
+ ps_interp_prms->i4_blk_ht,
+ 0,
+ 1);
+ }
+
+ s_interp_prms.apu1_interp_out[0] = as_pred_buf_data[1][j].pu1_pred;
+ ps_interp_prms->ppu1_ref = &apu1_hpel_ref[1][0];
+
+ ps_me_optimised_function_list->pf_qpel_interp_avg_generic(
+ ps_interp_prms, aps_mv[1]->i2_mvx, aps_mv[1]->i2_mvy, 0);
+
+ if(ps_interp_prms->pu1_final_out != s_interp_prms.apu1_interp_out[0])
+ {
+ as_pred_buf_data[1][j].u1_pred_buf_array_id = UCHAR_MAX;
+ as_pred_buf_data[1][j].pu1_pred = ps_interp_prms->pu1_final_out;
+ as_pred_buf_data[1][j].i4_pred_stride = ps_interp_prms->i4_final_out_stride;
+ }
+
+ ps_cmn_utils_optimised_function_list->pf_wt_avg_2d(
+ as_pred_buf_data[0][j].pu1_pred,
+ as_pred_buf_data[1][j].pu1_pred,
+ as_pred_buf_data[0][j].i4_pred_stride,
+ as_pred_buf_data[1][j].i4_pred_stride,
+ wd,
+ ht,
+ as_pred_buf_data[2][j].pu1_pred,
+ as_pred_buf_data[2][j].i4_pred_stride,
+ luma_weight_ref1,
+ luma_weight_ref2,
+ luma_offset_ref1,
+ luma_offset_ref2,
+ ps_inter_ctb_prms->wpred_log_wdc);
+
+ if(u1_is_cu_noisy && ps_inter_ctb_prms->i4_alpha_stim_multiplier)
+ {
+ hme_compute_sigmaX_and_sigmaXSquared(
+ as_pred_buf_data[2][j].pu1_pred,
+ as_pred_buf_data[2][j].i4_pred_stride,
+ &au8_sigmaX[1][j],
+ &au8_sigmaXSquared[1][j],
+ ps_interp_prms->i4_blk_wd,
+ ps_interp_prms->i4_blk_ht,
+ ps_interp_prms->i4_blk_wd,
+ ps_interp_prms->i4_blk_ht,
+ 0,
+ 1);
+ }
+
+ s_err_prms.pu1_inp = (U08 *)ps_inter_ctb_prms->pu1_non_wt_inp + inp_offset;
+ s_err_prms.i4_inp_stride = inp_stride;
+ s_err_prms.i4_ref_stride = as_pred_buf_data[2][j].i4_pred_stride;
+ s_err_prms.i4_part_mask = (ENABLE_2Nx2N);
+ s_err_prms.i4_grid_mask = 1;
+ s_err_prms.pi4_sad_grid = &i4_sad_grid;
+ s_err_prms.i4_blk_wd = wd;
+ s_err_prms.i4_blk_ht = ht;
+ s_err_prms.pu1_ref = as_pred_buf_data[2][j].pu1_pred;
+ s_err_prms.ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list;
+
+ if(ps_inter_ctb_prms->u1_use_satd)
+ {
+ pf_err_compute = compute_satd_8bit;
+ }
+ else
+ {
+ pf_err_compute = ps_me_optimised_function_list->pf_evalsad_pt_npu_mxn_8bit;
+ }
+
+ pf_err_compute(&s_err_prms);
+
+#if USE_NOISE_TERM_DURING_BICAND_SEARCH
+ if(u1_is_cu_noisy && ps_inter_ctb_prms->i4_alpha_stim_multiplier)
+ {
+ unsigned long u4_shift_val;
+ ULWORD64 u8_src_variance, u8_pred_variance, u8_pred_sigmaSquareX;
+ ULWORD64 u8_temp_var, u8_temp_var1;
+ S32 i4_bits_req;
+
+ S32 i4_q_level = STIM_Q_FORMAT + ALPHA_Q_FORMAT;
+
+ u8_pred_sigmaSquareX = (au8_sigmaX[1][j] * au8_sigmaX[1][j]);
+ u8_pred_variance = au8_sigmaXSquared[1][j] - u8_pred_sigmaSquareX;
+
+ if(e_cu_size == CU_8x8)
+ {
+ PART_ID_T e_part_id =
+ (PART_ID_T)((PART_ID_NxN_TL) + (x_off & 1) + ((y_off & 1) << 1));
+
+ u4_shift_val = ihevce_calc_stim_injected_variance(
+ ps_inter_ctb_prms->pu8_part_src_sigmaX,
+ ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
+ &u8_src_variance,
+ i4_default_src_wt,
+ 0,
+ ps_inter_ctb_prms->wpred_log_wdc,
+ e_part_id);
+ }
+ else
+ {
+ u4_shift_val = ihevce_calc_stim_injected_variance(
+ ps_inter_ctb_prms->pu8_part_src_sigmaX,
+ ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
+ &u8_src_variance,
+ i4_default_src_wt,
+ 0,
+ ps_inter_ctb_prms->wpred_log_wdc,
+ e_part_id);
+ }
+
+ u8_pred_variance = u8_pred_variance >> u4_shift_val;
+
+ GETRANGE64(i4_bits_req, u8_pred_variance);
+
+ if(i4_bits_req > 27)
+ {
+ u8_pred_variance = u8_pred_variance >> (i4_bits_req - 27);
+ u8_src_variance = u8_src_variance >> (i4_bits_req - 27);
+ }
+
+ if(u8_src_variance == u8_pred_variance)
+ {
+ u8_temp_var = (1 << STIM_Q_FORMAT);
+ }
+ else
+ {
+ u8_temp_var = (2 * u8_src_variance * u8_pred_variance);
+ u8_temp_var = (u8_temp_var * (1 << STIM_Q_FORMAT));
+ u8_temp_var1 =
+ (u8_src_variance * u8_src_variance) + (u8_pred_variance * u8_pred_variance);
+ u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2));
+ u8_temp_var = (u8_temp_var / u8_temp_var1);
+ }
+
+ i4_noise_term = (UWORD32)u8_temp_var;
+
+ i4_noise_term *= ps_inter_ctb_prms->i4_alpha_stim_multiplier;
+
+ ASSERT(i4_noise_term >= 0);
+
+ u8_temp_var = i4_sad_grid;
+ u8_temp_var *= ((1 << (i4_q_level)) - (i4_noise_term));
+ u8_temp_var += (1 << ((i4_q_level)-1));
+ i4_sad_grid = (UWORD32)(u8_temp_var >> (i4_q_level));
+ }
+#endif
+
+ cur_iter_best_cost = i4_sad_grid;
+ cur_iter_best_cost += ps_pu_node1->i4_mv_cost;
+ cur_iter_best_cost += ps_pu_node2[i].i4_mv_cost;
+
+ if(cur_iter_best_cost < bi_cost)
+ {
+ bi_cost = cur_iter_best_cost;
+ best_cand_in_opp_dir_idx = i;
+ is_best_cand_an_intra = 0;
+ }
+ }
+
+ uni_cost = ps_pu_node1->i4_tot_cost;
+
+#if USE_NOISE_TERM_DURING_BICAND_SEARCH
+ if(u1_is_cu_noisy && ps_inter_ctb_prms->i4_alpha_stim_multiplier)
+ {
+ unsigned long u4_shift_val;
+ ULWORD64 u8_src_variance, u8_pred_variance, u8_pred_sigmaSquareX;
+ ULWORD64 u8_temp_var, u8_temp_var1;
+ S32 i4_bits_req;
+
+ S32 i4_q_level = STIM_Q_FORMAT + ALPHA_Q_FORMAT;
+
+ S08 i1_ref_idx =
+ (PRED_L0 == ps_pu_node1->pu.b2_pred_mode)
+ ? ps_inter_ctb_prms->pi1_past_list[ps_pu_node1->pu.mv.i1_l0_ref_idx]
+ : ps_inter_ctb_prms->pi1_future_list[ps_pu_node1->pu.mv.i1_l1_ref_idx];
+ S32 i4_sad = ps_pu_node1->i4_tot_cost - ps_pu_node1->i4_mv_cost;
+
+ u8_pred_sigmaSquareX = (au8_sigmaX[0][j] * au8_sigmaX[0][j]);
+ u8_pred_variance = au8_sigmaXSquared[0][j] - u8_pred_sigmaSquareX;
+
+ if(e_cu_size == CU_8x8)
+ {
+ PART_ID_T e_part_id =
+ (PART_ID_T)((PART_ID_NxN_TL) + (x_off & 1) + ((y_off & 1) << 1));
+
+ u4_shift_val = ihevce_calc_stim_injected_variance(
+ ps_inter_ctb_prms->pu8_part_src_sigmaX,
+ ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
+ &u8_src_variance,
+ ps_inter_ctb_prms->pi4_inv_wt[i1_ref_idx],
+ ps_inter_ctb_prms->pi4_inv_wt_shift_val[i1_ref_idx],
+ ps_inter_ctb_prms->wpred_log_wdc,
+ e_part_id);
+ }
+ else
+ {
+ u4_shift_val = ihevce_calc_stim_injected_variance(
+ ps_inter_ctb_prms->pu8_part_src_sigmaX,
+ ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
+ &u8_src_variance,
+ ps_inter_ctb_prms->pi4_inv_wt[i1_ref_idx],
+ ps_inter_ctb_prms->pi4_inv_wt_shift_val[i1_ref_idx],
+ ps_inter_ctb_prms->wpred_log_wdc,
+ e_part_id);
+ }
+
+ u8_pred_variance = u8_pred_variance >> (u4_shift_val);
+
+ GETRANGE64(i4_bits_req, u8_pred_variance);
+
+ if(i4_bits_req > 27)
+ {
+ u8_pred_variance = u8_pred_variance >> (i4_bits_req - 27);
+ u8_src_variance = u8_src_variance >> (i4_bits_req - 27);
+ }
+
+ if(u8_src_variance == u8_pred_variance)
+ {
+ u8_temp_var = (1 << STIM_Q_FORMAT);
+ }
+ else
+ {
+ u8_temp_var = (2 * u8_src_variance * u8_pred_variance);
+ u8_temp_var = (u8_temp_var * (1 << STIM_Q_FORMAT));
+ u8_temp_var1 =
+ (u8_src_variance * u8_src_variance) + (u8_pred_variance * u8_pred_variance);
+ u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2));
+ u8_temp_var = (u8_temp_var / u8_temp_var1);
+ }
+
+ i4_noise_term = (UWORD32)u8_temp_var;
+
+ i4_noise_term *= ps_inter_ctb_prms->i4_alpha_stim_multiplier;
+
+ ASSERT(i4_noise_term >= 0);
+
+ u8_temp_var = i4_sad;
+ u8_temp_var *= ((1 << (i4_q_level)) - (i4_noise_term));
+ u8_temp_var += (1 << ((i4_q_level)-1));
+ i4_sad = (UWORD32)(u8_temp_var >> (i4_q_level));
+
+ uni_cost = i4_sad + ps_pu_node1->i4_mv_cost;
+
+ pu8_winning_pred_sigmaX[j] = au8_sigmaX[0][j];
+ pu8_winning_pred_sigmaXSquare[j] = au8_sigmaXSquared[0][j];
+ }
+#endif
+
+ if((bi_cost < uni_cost) && (!is_best_cand_an_intra))
+ {
+ if(u1_is_cu_noisy && ps_inter_ctb_prms->i4_alpha_stim_multiplier)
+ {
+ pu8_winning_pred_sigmaX[j] = au8_sigmaX[1][j];
+ pu8_winning_pred_sigmaXSquare[j] = au8_sigmaXSquared[1][j];
+ }
+
+ if(PRED_L0 == ps_pu_node1->pu.b2_pred_mode)
+ {
+ ps_pu_node1->pu.b2_pred_mode = PRED_BI;
+
+ if(PRED_L0 == ps_pu_node2[best_cand_in_opp_dir_idx].pu.b2_pred_mode)
+ {
+ ps_pu_node1->pu.mv.i1_l1_ref_idx =
+ ps_pu_node2[best_cand_in_opp_dir_idx].pu.mv.i1_l0_ref_idx;
+ ps_pu_node1->pu.mv.s_l1_mv.i2_mvx =
+ ps_pu_node2[best_cand_in_opp_dir_idx].pu.mv.s_l0_mv.i2_mvx;
+ ps_pu_node1->pu.mv.s_l1_mv.i2_mvy =
+ ps_pu_node2[best_cand_in_opp_dir_idx].pu.mv.s_l0_mv.i2_mvy;
+ }
+ else
+ {
+ ps_pu_node1->pu.mv.i1_l1_ref_idx =
+ ps_pu_node2[best_cand_in_opp_dir_idx].pu.mv.i1_l1_ref_idx;
+ ps_pu_node1->pu.mv.s_l1_mv.i2_mvx =
+ ps_pu_node2[best_cand_in_opp_dir_idx].pu.mv.s_l1_mv.i2_mvx;
+ ps_pu_node1->pu.mv.s_l1_mv.i2_mvy =
+ ps_pu_node2[best_cand_in_opp_dir_idx].pu.mv.s_l1_mv.i2_mvy;
+ }
+ }
+ else
+ {
+ ps_pu_node1->pu.b2_pred_mode = PRED_BI;
+
+ if(PRED_L0 == ps_pu_node2[best_cand_in_opp_dir_idx].pu.b2_pred_mode)
+ {
+ ps_pu_node1->pu.mv.i1_l0_ref_idx =
+ ps_pu_node2[best_cand_in_opp_dir_idx].pu.mv.i1_l0_ref_idx;
+ ps_pu_node1->pu.mv.s_l0_mv.i2_mvx =
+ ps_pu_node2[best_cand_in_opp_dir_idx].pu.mv.s_l0_mv.i2_mvx;
+ ps_pu_node1->pu.mv.s_l0_mv.i2_mvy =
+ ps_pu_node2[best_cand_in_opp_dir_idx].pu.mv.s_l0_mv.i2_mvy;
+ }
+ else
+ {
+ ps_pu_node1->pu.mv.i1_l0_ref_idx =
+ ps_pu_node2[best_cand_in_opp_dir_idx].pu.mv.i1_l1_ref_idx;
+ ps_pu_node1->pu.mv.s_l0_mv.i2_mvx =
+ ps_pu_node2[best_cand_in_opp_dir_idx].pu.mv.s_l1_mv.i2_mvx;
+ ps_pu_node1->pu.mv.s_l0_mv.i2_mvy =
+ ps_pu_node2[best_cand_in_opp_dir_idx].pu.mv.s_l1_mv.i2_mvy;
+ }
+ }
+
+ ps_part_type_result->as_pu_results[j].i4_tot_cost = bi_cost;
+ }
+
+ best_cost = MIN(uni_cost, bi_cost);
+ tot_cost += best_cost;
+ }
+
+ hme_debrief_bipred_eval(
+ ps_part_type_result,
+ as_pred_buf_data,
+ &ps_inter_ctb_prms->s_pred_buf_mngr,
+ au1_pred_buf_array_indixes,
+ ps_cmn_utils_optimised_function_list);
+
+ ps_part_type_result->i4_tot_cost = tot_cost;
+}
+
+WORD32 hme_evalsatd_pt_pu_8x8_tu_rec(
+ err_prms_t *ps_prms,
+ WORD32 lambda,
+ WORD32 lambda_q_shift,
+ WORD32 i4_frm_qstep,
+ me_func_selector_t *ps_func_selector)
+{
+ S32 ai4_satd_4x4[4]; /* num 4x4s in a 8x8 */
+ S32 i4_satd_8x8;
+ S16 *pi2_had_out;
+ S32 i4_tu_split_flag = 0;
+ S32 i4_tu_early_cbf = 0;
+
+ S32 i4_early_cbf = 1;
+ // S32 i4_i, i4_k;
+ S32 i4_total_satd_cost = 0;
+ S32 best_cost_tu_split;
+
+ /* Initialize array of ptrs to hold partial SATDs at all levels of 16x16 */
+ S32 *api4_satd_pu[HAD_32x32 + 1];
+ S32 *api4_tu_split[HAD_32x32 + 1];
+ S32 *api4_tu_early_cbf[HAD_32x32 + 1];
+
+ S32 *pi4_sad_grid = ps_prms->pi4_sad_grid;
+ S32 *pi4_tu_split = ps_prms->pi4_tu_split_flags;
+ S32 *pi4_early_cbf = ps_prms->pi4_tu_early_cbf;
+
+ U08 *pu1_inp = ps_prms->pu1_inp;
+ U08 *pu1_ref = ps_prms->pu1_ref;
+
+ S32 inp_stride = ps_prms->i4_inp_stride;
+ S32 ref_stride = ps_prms->i4_ref_stride;
+
+ /* Initialize tu_split_cost to "0" */
+ ps_prms->i4_tu_split_cost = 0;
+ pi2_had_out = (S16 *)ps_prms->pu1_wkg_mem;
+
+ api4_satd_pu[HAD_4x4] = &ai4_satd_4x4[0];
+ api4_satd_pu[HAD_8x8] = &i4_satd_8x8;
+ api4_satd_pu[HAD_16x16] = NULL;
+ api4_satd_pu[HAD_32x32] = NULL; /* 32x32 not used for 16x16 subpel refine */
+
+ api4_tu_split[HAD_4x4] = NULL;
+ api4_tu_split[HAD_8x8] = &i4_tu_split_flag;
+ api4_tu_split[HAD_16x16] = NULL;
+ api4_tu_split[HAD_32x32] = NULL; /* 32x32 not used for 16x16 subpel refine */
+
+ api4_tu_early_cbf[HAD_4x4] = NULL;
+ api4_tu_early_cbf[HAD_8x8] = &i4_tu_early_cbf;
+ api4_tu_early_cbf[HAD_16x16] = NULL;
+ api4_tu_early_cbf[HAD_32x32] = NULL; /* 32x32 not used for 16x16 subpel refine */
+
+ /* Call recursive 16x16 HAD module; updates satds for 4x4, 8x8 and 16x16 */
+
+ /* Return value is merge of both best_stad_cost and tu_split_flags */
+ best_cost_tu_split = ps_func_selector->pf_had_8x8_using_4_4x4_r(
+ pu1_inp,
+ inp_stride,
+ pu1_ref,
+ ref_stride,
+ pi2_had_out,
+ 8,
+ api4_satd_pu,
+ api4_tu_split,
+ api4_tu_early_cbf,
+ 0,
+ 2,
+ 0,
+ 0,
+ i4_frm_qstep,
+ 0,
+ ps_prms->u1_max_tr_depth,
+ ps_prms->u1_max_tr_size,
+ &(ps_prms->i4_tu_split_cost),
+ NULL);
+
+ /* For SATD computation following TU size are assumed for a 8x8 CU */
+ /* 8 for 2Nx2N, 4 for Nx2N,2NxN */
+
+ i4_total_satd_cost = best_cost_tu_split >> 2;
+
+ /* Second last bit has the tu pslit flag */
+ i4_tu_split_flag = (best_cost_tu_split & 0x3) >> 1;
+
+ /* Last bit corrsponds to the Early CBF flag */
+ i4_early_cbf = (best_cost_tu_split & 0x1);
+
+ /* Update 8x8 SATDs */
+ pi4_sad_grid[PART_ID_2Nx2N] = i4_satd_8x8;
+ pi4_tu_split[PART_ID_2Nx2N] = i4_tu_split_flag;
+ pi4_early_cbf[PART_ID_2Nx2N] = i4_early_cbf;
+
+ return i4_total_satd_cost;
+}
+//#endif
+/**
+********************************************************************************
+* @fn S32 hme_evalsatd_update_1_best_result_pt_pu_16x16
+*
+* @brief Evaluates the SATD with partial updates for all the best partitions
+* of a 16x16 CU based on recursive Hadamard 16x16, 8x8 and 4x4 satds
+*
+* @param[inout] ps_prms: error prms containg current and ref ptr, strides,
+* pointer to sad grid of each partitions
+*
+* @return None
+********************************************************************************
+*/
+
+void hme_evalsatd_update_2_best_results_pt_pu_16x16(
+ err_prms_t *ps_prms, result_upd_prms_t *ps_result_prms)
+{
+ S32 ai4_satd_4x4[16]; /* num 4x4s in a 16x16 */
+ S32 ai4_satd_8x8[4]; /* num 8x8s in a 16x16 */
+ S32 i4_satd_16x16; /* 16x16 satd cost */
+ S32 i;
+ S16 ai2_8x8_had[256];
+ S16 *pi2_y0;
+ U08 *pu1_src, *pu1_pred;
+ S32 pos_x_y_4x4_0, pos_x_y_4x4 = 0;
+ S32 *ppi4_hsad;
+
+ /* Initialize array of ptrs to hold partial SATDs at all levels of 16x16 */
+ S32 *api4_satd_pu[HAD_32x32 + 1];
+ S32 *pi4_sad_grid = ps_prms->pi4_sad_grid;
+
+ U08 *pu1_inp = ps_prms->pu1_inp;
+ U08 *pu1_ref = ps_prms->pu1_ref;
+
+ S32 inp_stride = ps_prms->i4_inp_stride;
+ S32 ref_stride = ps_prms->i4_ref_stride;
+
+ api4_satd_pu[HAD_4x4] = &ai4_satd_4x4[0];
+ api4_satd_pu[HAD_8x8] = &ai4_satd_8x8[0];
+ api4_satd_pu[HAD_16x16] = &i4_satd_16x16;
+ api4_satd_pu[HAD_32x32] = NULL; /* 32x32 not used for 16x16 subpel refine */
+
+ ppi4_hsad = api4_satd_pu[HAD_16x16];
+
+ /* Call recursive 16x16 HAD module; updates satds for 4x4, 8x8 and 16x16 */
+ for(i = 0; i < 4; i++)
+ {
+ pu1_src = pu1_inp + (i & 0x01) * 8 + (i >> 1) * inp_stride * 8;
+ pu1_pred = pu1_ref + (i & 0x01) * 8 + (i >> 1) * ref_stride * 8;
+ pi2_y0 = ai2_8x8_had + (i & 0x01) * 8 + (i >> 1) * 16 * 8;
+ pos_x_y_4x4_0 = pos_x_y_4x4 + (i & 0x01) * 2 + (i >> 1) * (2 << 16);
+
+ ihevce_had_8x8_using_4_4x4(
+ pu1_src, inp_stride, pu1_pred, ref_stride, pi2_y0, 16, api4_satd_pu, pos_x_y_4x4_0, 4);
+ }
+
+ /* For SATD computation following TU size are assumed for a 16x16 CU */
+ /* 16 for 2Nx2N, 8 for NxN/Nx2N,2NxN and mix of 4 and 8 for AMPs */
+
+ /* Update 8x8 SATDs */
+ /* Modified to cost calculation using only 4x4 SATD */
+
+ // ai4_satd_8x8[0] = ai4_satd_4x4[0] + ai4_satd_4x4[1] + ai4_satd_4x4[4] + ai4_satd_4x4[5];
+ // ai4_satd_8x8[1] = ai4_satd_4x4[2] + ai4_satd_4x4[3] + ai4_satd_4x4[6] + ai4_satd_4x4[7];
+ // ai4_satd_8x8[2] = ai4_satd_4x4[8] + ai4_satd_4x4[9] + ai4_satd_4x4[12] + ai4_satd_4x4[13];
+ // ai4_satd_8x8[3] = ai4_satd_4x4[10] + ai4_satd_4x4[11] + ai4_satd_4x4[14] + ai4_satd_4x4[15];
+
+ /* Update 16x16 SATDs */
+ pi4_sad_grid[PART_ID_2Nx2N] =
+ ai4_satd_8x8[0] + ai4_satd_8x8[1] + ai4_satd_8x8[2] + ai4_satd_8x8[3];
+
+ pi4_sad_grid[PART_ID_NxN_TL] = ai4_satd_8x8[0];
+ pi4_sad_grid[PART_ID_NxN_TR] = ai4_satd_8x8[1];
+ pi4_sad_grid[PART_ID_NxN_BL] = ai4_satd_8x8[2];
+ pi4_sad_grid[PART_ID_NxN_BR] = ai4_satd_8x8[3];
+
+ /* Update 8x16 / 16x8 SATDs */
+ pi4_sad_grid[PART_ID_Nx2N_L] = ai4_satd_8x8[0] + ai4_satd_8x8[2];
+ pi4_sad_grid[PART_ID_Nx2N_R] = ai4_satd_8x8[1] + ai4_satd_8x8[3];
+ pi4_sad_grid[PART_ID_2NxN_T] = ai4_satd_8x8[0] + ai4_satd_8x8[1];
+ pi4_sad_grid[PART_ID_2NxN_B] = ai4_satd_8x8[2] + ai4_satd_8x8[3];
+
+ /* Update AMP SATDs 16x12,16x4, 12x16,4x16 */
+ pi4_sad_grid[PART_ID_nLx2N_L] =
+ ai4_satd_4x4[0] + ai4_satd_4x4[4] + ai4_satd_4x4[8] + ai4_satd_4x4[12];
+
+ pi4_sad_grid[PART_ID_nLx2N_R] = ai4_satd_4x4[1] + ai4_satd_4x4[5] + ai4_satd_4x4[9] +
+ ai4_satd_4x4[13] + pi4_sad_grid[PART_ID_Nx2N_R];
+
+ pi4_sad_grid[PART_ID_nRx2N_L] = ai4_satd_4x4[2] + ai4_satd_4x4[6] + ai4_satd_4x4[10] +
+ ai4_satd_4x4[14] + pi4_sad_grid[PART_ID_Nx2N_L];
+
+ pi4_sad_grid[PART_ID_nRx2N_R] =
+ ai4_satd_4x4[3] + ai4_satd_4x4[7] + ai4_satd_4x4[11] + ai4_satd_4x4[15];
+
+ pi4_sad_grid[PART_ID_2NxnU_T] =
+ ai4_satd_4x4[0] + ai4_satd_4x4[1] + ai4_satd_4x4[2] + ai4_satd_4x4[3];
+
+ pi4_sad_grid[PART_ID_2NxnU_B] = ai4_satd_4x4[4] + ai4_satd_4x4[5] + ai4_satd_4x4[6] +
+ ai4_satd_4x4[7] + pi4_sad_grid[PART_ID_2NxN_B];
+
+ pi4_sad_grid[PART_ID_2NxnD_T] = ai4_satd_4x4[8] + ai4_satd_4x4[9] + ai4_satd_4x4[10] +
+ ai4_satd_4x4[11] + pi4_sad_grid[PART_ID_2NxN_T];
+
+ pi4_sad_grid[PART_ID_2NxnD_B] =
+ ai4_satd_4x4[12] + ai4_satd_4x4[13] + ai4_satd_4x4[14] + ai4_satd_4x4[15];
+
+ /* Call the update results function */
+ {
+ S32 i4_count = 0, i4_sad, i4_mv_cost, i4_tot_cost;
+ mv_refine_ctxt_t *ps_subpel_refine_ctxt = ps_result_prms->ps_subpel_refine_ctxt;
+ S32 *pi4_valid_part_ids = &ps_subpel_refine_ctxt->ai4_part_id[0];
+ S32 best_node_cost;
+ S32 second_best_node_cost;
+
+ /*For each valid partition, update the refine_prm structure to reflect the best and second
+ best candidates for that partition*/
+
+ for(i4_count = 0; i4_count < ps_subpel_refine_ctxt->i4_num_valid_parts; i4_count++)
+ {
+ S32 update_required = 0;
+ S32 part_id = pi4_valid_part_ids[i4_count];
+ S32 index = (ps_subpel_refine_ctxt->i4_num_valid_parts > 8) ? part_id : i4_count;
+
+ /* Use a pre-computed cost instead of freshly evaluating subpel cost */
+ i4_mv_cost = ps_subpel_refine_ctxt->i2_mv_cost[0][index];
+
+ /*Calculate total cost*/
+ i4_sad = CLIP3(pi4_sad_grid[part_id], 0, 0x7fff);
+ i4_tot_cost = CLIP_S16(i4_sad + i4_mv_cost);
+
+ /*****************************************************************/
+ /* We do not labor through the results if the total cost worse */
+ /* than the last of the results. */
+ /*****************************************************************/
+ best_node_cost = CLIP_S16(ps_subpel_refine_ctxt->i2_tot_cost[0][index]);
+ second_best_node_cost = CLIP_S16(ps_subpel_refine_ctxt->i2_tot_cost[1][index]);
+
+ if(i4_tot_cost < second_best_node_cost)
+ {
+ update_required = 2;
+
+ /*************************************************************/
+ /* Identify where the current result isto be placed.Basically*/
+ /* find the node which has cost just higher thannodeundertest*/
+ /*************************************************************/
+ if(i4_tot_cost < best_node_cost)
+ {
+ update_required = 1;
+ }
+ else if(i4_tot_cost == ps_subpel_refine_ctxt->i2_tot_cost[0][index])
+ {
+ update_required = 0;
+ }
+ if(update_required == 2)
+ {
+ ps_subpel_refine_ctxt->i2_tot_cost[1][index] = i4_tot_cost;
+ ps_subpel_refine_ctxt->i2_mv_cost[1][index] = i4_mv_cost;
+ ps_subpel_refine_ctxt->i2_mv_x[1][index] = ps_result_prms->i2_mv_x;
+ ps_subpel_refine_ctxt->i2_mv_y[1][index] = ps_result_prms->i2_mv_y;
+ ps_subpel_refine_ctxt->i2_ref_idx[1][index] = ps_result_prms->i1_ref_idx;
+ }
+ else if(update_required == 1)
+ {
+ ps_subpel_refine_ctxt->i2_tot_cost[1][index] =
+ ps_subpel_refine_ctxt->i2_tot_cost[0][index];
+ ps_subpel_refine_ctxt->i2_mv_cost[1][index] =
+ ps_subpel_refine_ctxt->i2_mv_cost[0][index];
+ ps_subpel_refine_ctxt->i2_mv_x[1][index] =
+ ps_subpel_refine_ctxt->i2_mv_x[0][index];
+ ps_subpel_refine_ctxt->i2_mv_y[1][index] =
+ ps_subpel_refine_ctxt->i2_mv_y[0][index];
+ ps_subpel_refine_ctxt->i2_ref_idx[1][index] =
+ ps_subpel_refine_ctxt->i2_ref_idx[0][index];
+
+ ps_subpel_refine_ctxt->i2_tot_cost[0][index] = i4_tot_cost;
+ ps_subpel_refine_ctxt->i2_mv_cost[0][index] = i4_mv_cost;
+ ps_subpel_refine_ctxt->i2_mv_x[0][index] = ps_result_prms->i2_mv_x;
+ ps_subpel_refine_ctxt->i2_mv_y[0][index] = ps_result_prms->i2_mv_y;
+ ps_subpel_refine_ctxt->i2_ref_idx[0][index] = ps_result_prms->i1_ref_idx;
+ }
+ }
+ }
+ }
+}
+
+//#if COMPUTE_16x16_R == C
+void hme_evalsatd_update_1_best_result_pt_pu_16x16(
+ err_prms_t *ps_prms, result_upd_prms_t *ps_result_prms)
+{
+ S32 ai4_satd_4x4[16]; /* num 4x4s in a 16x16 */
+ S32 ai4_satd_8x8[4]; /* num 8x8s in a 16x16 */
+ S32 i4_satd_16x16; /* 16x16 satd cost */
+ S32 i;
+ S16 ai2_8x8_had[256];
+ S16 *pi2_y0;
+ U08 *pu1_src, *pu1_pred;
+ S32 pos_x_y_4x4_0, pos_x_y_4x4 = 0;
+ S32 *ppi4_hsad;
+
+ /* Initialize array of ptrs to hold partial SATDs at all levels of 16x16 */
+ S32 *api4_satd_pu[HAD_32x32 + 1];
+ S32 *pi4_sad_grid = ps_prms->pi4_sad_grid;
+
+ U08 *pu1_inp = ps_prms->pu1_inp;
+ U08 *pu1_ref = ps_prms->pu1_ref;
+
+ S32 inp_stride = ps_prms->i4_inp_stride;
+ S32 ref_stride = ps_prms->i4_ref_stride;
+
+ api4_satd_pu[HAD_4x4] = &ai4_satd_4x4[0];
+ api4_satd_pu[HAD_8x8] = &ai4_satd_8x8[0];
+ api4_satd_pu[HAD_16x16] = &i4_satd_16x16;
+ api4_satd_pu[HAD_32x32] = NULL; /* 32x32 not used for 16x16 subpel refine */
+
+ ppi4_hsad = api4_satd_pu[HAD_16x16];
+
+ /* Call recursive 16x16 HAD module; updates satds for 4x4, 8x8 and 16x16 */
+ for(i = 0; i < 4; i++)
+ {
+ pu1_src = pu1_inp + (i & 0x01) * 8 + (i >> 1) * inp_stride * 8;
+ pu1_pred = pu1_ref + (i & 0x01) * 8 + (i >> 1) * ref_stride * 8;
+ pi2_y0 = ai2_8x8_had + (i & 0x01) * 8 + (i >> 1) * 16 * 8;
+ pos_x_y_4x4_0 = pos_x_y_4x4 + (i & 0x01) * 2 + (i >> 1) * (2 << 16);
+
+ ihevce_had_8x8_using_4_4x4(
+ pu1_src, inp_stride, pu1_pred, ref_stride, pi2_y0, 16, api4_satd_pu, pos_x_y_4x4_0, 4);
+ }
+
+ /* For SATD computation following TU size are assumed for a 16x16 CU */
+ /* 16 for 2Nx2N, 8 for NxN/Nx2N,2NxN and mix of 4 and 8 for AMPs */
+
+ /* Update 8x8 SATDs */
+ /* Modified to cost calculation using only 4x4 SATD */
+
+ // ai4_satd_8x8[0] = ai4_satd_4x4[0] + ai4_satd_4x4[1] + ai4_satd_4x4[4] + ai4_satd_4x4[5];
+ // ai4_satd_8x8[1] = ai4_satd_4x4[2] + ai4_satd_4x4[3] + ai4_satd_4x4[6] + ai4_satd_4x4[7];
+ // ai4_satd_8x8[2] = ai4_satd_4x4[8] + ai4_satd_4x4[9] + ai4_satd_4x4[12] + ai4_satd_4x4[13];
+ // ai4_satd_8x8[3] = ai4_satd_4x4[10] + ai4_satd_4x4[11] + ai4_satd_4x4[14] + ai4_satd_4x4[15];
+
+ /* Update 16x16 SATDs */
+ pi4_sad_grid[PART_ID_2Nx2N] =
+ ai4_satd_8x8[0] + ai4_satd_8x8[1] + ai4_satd_8x8[2] + ai4_satd_8x8[3];
+
+ pi4_sad_grid[PART_ID_NxN_TL] = ai4_satd_8x8[0];
+ pi4_sad_grid[PART_ID_NxN_TR] = ai4_satd_8x8[1];
+ pi4_sad_grid[PART_ID_NxN_BL] = ai4_satd_8x8[2];
+ pi4_sad_grid[PART_ID_NxN_BR] = ai4_satd_8x8[3];
+
+ /* Update 8x16 / 16x8 SATDs */
+ pi4_sad_grid[PART_ID_Nx2N_L] = ai4_satd_8x8[0] + ai4_satd_8x8[2];
+ pi4_sad_grid[PART_ID_Nx2N_R] = ai4_satd_8x8[1] + ai4_satd_8x8[3];
+ pi4_sad_grid[PART_ID_2NxN_T] = ai4_satd_8x8[0] + ai4_satd_8x8[1];
+ pi4_sad_grid[PART_ID_2NxN_B] = ai4_satd_8x8[2] + ai4_satd_8x8[3];
+
+ /* Update AMP SATDs 16x12,16x4, 12x16,4x16 */
+ pi4_sad_grid[PART_ID_nLx2N_L] =
+ ai4_satd_4x4[0] + ai4_satd_4x4[2] + ai4_satd_4x4[8] + ai4_satd_4x4[10];
+ pi4_sad_grid[PART_ID_nRx2N_R] =
+ ai4_satd_4x4[5] + ai4_satd_4x4[7] + ai4_satd_4x4[13] + ai4_satd_4x4[15];
+ pi4_sad_grid[PART_ID_2NxnU_T] =
+ ai4_satd_4x4[0] + ai4_satd_4x4[1] + ai4_satd_4x4[4] + ai4_satd_4x4[5];
+ pi4_sad_grid[PART_ID_2NxnD_B] =
+ ai4_satd_4x4[10] + ai4_satd_4x4[11] + ai4_satd_4x4[14] + ai4_satd_4x4[15];
+
+ pi4_sad_grid[PART_ID_nLx2N_R] = pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_nLx2N_L];
+ pi4_sad_grid[PART_ID_nRx2N_L] = pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_nRx2N_R];
+ pi4_sad_grid[PART_ID_2NxnU_B] = pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_2NxnU_T];
+ pi4_sad_grid[PART_ID_2NxnD_T] = pi4_sad_grid[PART_ID_2Nx2N] - pi4_sad_grid[PART_ID_2NxnD_B];
+
+ /* Call the update results function */
+ {
+ S32 i4_count = 0, i4_sad, i4_mv_cost, i4_tot_cost;
+ mv_refine_ctxt_t *ps_subpel_refine_ctxt = ps_result_prms->ps_subpel_refine_ctxt;
+ S32 *pi4_valid_part_ids = &ps_subpel_refine_ctxt->ai4_part_id[0];
+ S32 best_node_cost;
+ S32 second_best_node_cost;
+
+ /*For each valid partition, update the refine_prm structure to reflect the best and second
+ best candidates for that partition*/
+
+ for(i4_count = 0; i4_count < ps_subpel_refine_ctxt->i4_num_valid_parts; i4_count++)
+ {
+ S32 update_required = 0;
+ S32 part_id = pi4_valid_part_ids[i4_count];
+ S32 index = (ps_subpel_refine_ctxt->i4_num_valid_parts > 8) ? part_id : i4_count;
+
+ /* Use a pre-computed cost instead of freshly evaluating subpel cost */
+ i4_mv_cost = ps_subpel_refine_ctxt->i2_mv_cost[0][index];
+
+ /*Calculate total cost*/
+ i4_sad = CLIP3(pi4_sad_grid[part_id], 0, 0x7fff);
+ i4_tot_cost = CLIP_S16(i4_sad + i4_mv_cost);
+
+ /*****************************************************************/
+ /* We do not labor through the results if the total cost worse */
+ /* than the last of the results. */
+ /*****************************************************************/
+ best_node_cost = CLIP_S16(ps_subpel_refine_ctxt->i2_tot_cost[0][index]);
+ second_best_node_cost = SHRT_MAX;
+
+ if(i4_tot_cost < second_best_node_cost)
+ {
+ update_required = 0;
+
+ /*************************************************************/
+ /* Identify where the current result isto be placed.Basically*/
+ /* find the node which has cost just higher thannodeundertest*/
+ /*************************************************************/
+ if(i4_tot_cost < best_node_cost)
+ {
+ update_required = 1;
+ }
+ else if(i4_tot_cost == ps_subpel_refine_ctxt->i2_tot_cost[0][index])
+ {
+ update_required = 0;
+ }
+ if(update_required == 2)
+ {
+ ps_subpel_refine_ctxt->i2_tot_cost[1][index] = i4_tot_cost;
+ ps_subpel_refine_ctxt->i2_mv_cost[1][index] = i4_mv_cost;
+ ps_subpel_refine_ctxt->i2_mv_x[1][index] = ps_result_prms->i2_mv_x;
+ ps_subpel_refine_ctxt->i2_mv_y[1][index] = ps_result_prms->i2_mv_y;
+ ps_subpel_refine_ctxt->i2_ref_idx[1][index] = ps_result_prms->i1_ref_idx;
+ }
+ else if(update_required == 1)
+ {
+ ps_subpel_refine_ctxt->i2_tot_cost[0][index] = i4_tot_cost;
+ ps_subpel_refine_ctxt->i2_mv_cost[0][index] = i4_mv_cost;
+ ps_subpel_refine_ctxt->i2_mv_x[0][index] = ps_result_prms->i2_mv_x;
+ ps_subpel_refine_ctxt->i2_mv_y[0][index] = ps_result_prms->i2_mv_y;
+ ps_subpel_refine_ctxt->i2_ref_idx[0][index] = ps_result_prms->i1_ref_idx;
+ }
+ }
+ }
+ }
+}
+
+WORD32 hme_evalsatd_pt_pu_16x16_tu_rec(
+ err_prms_t *ps_prms,
+ WORD32 lambda,
+ WORD32 lambda_q_shift,
+ WORD32 i4_frm_qstep,
+ me_func_selector_t *ps_func_selector)
+{
+ S32 ai4_satd_4x4[16]; /* num 4x4s in a 16x16 */
+ S32 ai4_satd_8x8[4]; /* num 8x8s in a 16x16 */
+ S32 ai4_tu_split_8x8[16];
+ S32 i4_satd_16x16; /* 16x16 satd cost */
+
+ S32 ai4_tu_early_cbf_8x8[16];
+
+ //S16 ai2_had_out[256];
+ S16 *pi2_had_out;
+ S32 tu_split_flag = 0;
+ S32 early_cbf_flag = 0;
+ S32 total_satd_cost = 0;
+
+ /* Initialize array of ptrs to hold partial SATDs at all levels of 16x16 */
+ S32 *api4_satd_pu[HAD_32x32 + 1];
+ S32 *api4_tu_split[HAD_32x32 + 1];
+ S32 *api4_tu_early_cbf[HAD_32x32 + 1];
+
+ U08 *pu1_inp = ps_prms->pu1_inp;
+ U08 *pu1_ref = ps_prms->pu1_ref;
+
+ S32 inp_stride = ps_prms->i4_inp_stride;
+ S32 ref_stride = ps_prms->i4_ref_stride;
+
+ /* Initialize tu_split_cost to "0" */
+ ps_prms->i4_tu_split_cost = 0;
+
+ pi2_had_out = (S16 *)ps_prms->pu1_wkg_mem;
+
+ api4_satd_pu[HAD_4x4] = &ai4_satd_4x4[0];
+ api4_satd_pu[HAD_8x8] = &ai4_satd_8x8[0];
+ api4_satd_pu[HAD_16x16] = &i4_satd_16x16;
+ api4_satd_pu[HAD_32x32] = NULL; /* 32x32 not used for 16x16 subpel refine */
+
+ api4_tu_split[HAD_4x4] = NULL;
+ api4_tu_split[HAD_8x8] = &ai4_tu_split_8x8[0];
+ api4_tu_split[HAD_16x16] = &tu_split_flag;
+ api4_tu_split[HAD_32x32] = NULL; /* 32x32 not used for 16x16 subpel refine */
+
+ api4_tu_early_cbf[HAD_4x4] = NULL;
+ api4_tu_early_cbf[HAD_8x8] = &ai4_tu_early_cbf_8x8[0];
+ api4_tu_early_cbf[HAD_16x16] = &early_cbf_flag;
+ api4_tu_early_cbf[HAD_32x32] = NULL; /* 32x32 not used for 16x16 subpel refine */
+
+ /* Call recursive 16x16 HAD module; updates satds for 4x4, 8x8 and 16x16 */
+ ps_func_selector->pf_had_16x16_r(
+ pu1_inp,
+ inp_stride,
+ pu1_ref,
+ ref_stride,
+ pi2_had_out,
+ 16,
+ api4_satd_pu,
+ api4_tu_split,
+ api4_tu_early_cbf,
+ 0,
+ 4,
+ lambda,
+ lambda_q_shift,
+ i4_frm_qstep,
+ 0,
+ ps_prms->u1_max_tr_depth,
+ ps_prms->u1_max_tr_size,
+ &(ps_prms->i4_tu_split_cost),
+ NULL);
+
+ total_satd_cost = i4_satd_16x16;
+
+ ps_prms->pi4_tu_split_flags[0] = tu_split_flag;
+
+ ps_prms->pi4_tu_early_cbf[0] = early_cbf_flag;
+
+ return total_satd_cost;
+}
+
+/**
+********************************************************************************
+* @fn S32 hme_evalsatd_pt_pu_32x32
+*
+* @brief Evaluates the SATD with partial updates for all the best partitions
+* of a 32x32 CU based on recursive Hadamard 16x16, 8x8 and 4x4 satds
+*
+* @param[inout] ps_prms: error prms containg current and ref ptr, strides,
+* pointer to sad grid of each partitions
+*
+* @return None
+********************************************************************************
+*/
+void hme_evalsatd_pt_pu_32x32(err_prms_t *ps_prms)
+{
+ //S32 ai4_satd_4x4[64]; /* num 4x4s in a 32x32 */
+ S32 ai4_satd_8x8[16]; /* num 8x8s in a 32x32 */
+ S32 ai4_satd_16x16[4]; /* num 16x16 in a 32x32 */
+ S32 i4_satd_32x32;
+ // S16 ai2_had_out[32*32];
+ U08 *pu1_src;
+ U08 *pu1_pred;
+ S32 i;
+
+ /* Initialize array of ptrs to hold partial SATDs at all levels of 16x16 */
+ S32 *api4_satd_pu[HAD_32x32 + 1];
+ S32 *pi4_sad_grid = ps_prms->pi4_sad_grid;
+
+ U08 *pu1_inp = ps_prms->pu1_inp;
+ U08 *pu1_ref = ps_prms->pu1_ref;
+
+ S32 inp_stride = ps_prms->i4_inp_stride;
+ S32 ref_stride = ps_prms->i4_ref_stride;
+
+ //api4_satd_pu[HAD_4x4] = &ai4_satd_4x4[0];
+ api4_satd_pu[HAD_8x8] = &ai4_satd_8x8[0];
+ api4_satd_pu[HAD_16x16] = &ai4_satd_16x16[0];
+ api4_satd_pu[HAD_32x32] = &i4_satd_32x32;
+
+ /* 32x32 SATD is calculates as the sum of the 4 8x8's in the block */
+ for(i = 0; i < 16; i++)
+ {
+ pu1_src = pu1_inp + ((i & 0x3) << 3) + ((i >> 2) * inp_stride * 8);
+
+ pu1_pred = pu1_ref + ((i & 0x3) << 3) + ((i >> 2) * ref_stride * 8);
+
+ ai4_satd_8x8[i] = ps_prms->ps_cmn_utils_optimised_function_list->pf_HAD_8x8_8bit(
+ pu1_src, inp_stride, pu1_pred, ref_stride, NULL, 1);
+ }
+
+ /* Modified to cost calculation using only 8x8 SATD for 32x32*/
+ ai4_satd_16x16[0] = ai4_satd_8x8[0] + ai4_satd_8x8[1] + ai4_satd_8x8[4] + ai4_satd_8x8[5];
+ ai4_satd_16x16[1] = ai4_satd_8x8[2] + ai4_satd_8x8[3] + ai4_satd_8x8[6] + ai4_satd_8x8[7];
+ ai4_satd_16x16[2] = ai4_satd_8x8[8] + ai4_satd_8x8[9] + ai4_satd_8x8[12] + ai4_satd_8x8[13];
+ ai4_satd_16x16[3] = ai4_satd_8x8[10] + ai4_satd_8x8[11] + ai4_satd_8x8[14] + ai4_satd_8x8[15];
+
+ /* Update 32x32 SATD */
+ pi4_sad_grid[PART_ID_2Nx2N] =
+ ai4_satd_16x16[0] + ai4_satd_16x16[1] + ai4_satd_16x16[2] + ai4_satd_16x16[3];
+
+ /* Update 16x16 SATDs */
+ pi4_sad_grid[PART_ID_NxN_TL] = ai4_satd_16x16[0];
+ pi4_sad_grid[PART_ID_NxN_TR] = ai4_satd_16x16[1];
+ pi4_sad_grid[PART_ID_NxN_BL] = ai4_satd_16x16[2];
+ pi4_sad_grid[PART_ID_NxN_BR] = ai4_satd_16x16[3];
+
+ /* Update 16x32 / 32x16 SATDs */
+ pi4_sad_grid[PART_ID_Nx2N_L] = ai4_satd_16x16[0] + ai4_satd_16x16[2];
+ pi4_sad_grid[PART_ID_Nx2N_R] = ai4_satd_16x16[1] + ai4_satd_16x16[3];
+ pi4_sad_grid[PART_ID_2NxN_T] = ai4_satd_16x16[0] + ai4_satd_16x16[1];
+ pi4_sad_grid[PART_ID_2NxN_B] = ai4_satd_16x16[2] + ai4_satd_16x16[3];
+
+ /* Update AMP SATDs 32x24,32x8, 24x32,8x32 */
+ pi4_sad_grid[PART_ID_nLx2N_L] =
+ ai4_satd_8x8[0] + ai4_satd_8x8[4] + ai4_satd_8x8[8] + ai4_satd_8x8[12];
+
+ pi4_sad_grid[PART_ID_nLx2N_R] = ai4_satd_8x8[1] + ai4_satd_8x8[5] + ai4_satd_8x8[9] +
+ ai4_satd_8x8[13] + pi4_sad_grid[PART_ID_Nx2N_R];
+
+ pi4_sad_grid[PART_ID_nRx2N_L] = ai4_satd_8x8[2] + ai4_satd_8x8[6] + ai4_satd_8x8[10] +
+ ai4_satd_8x8[14] + pi4_sad_grid[PART_ID_Nx2N_L];
+
+ pi4_sad_grid[PART_ID_nRx2N_R] =
+ ai4_satd_8x8[3] + ai4_satd_8x8[7] + ai4_satd_8x8[11] + ai4_satd_8x8[15];
+
+ pi4_sad_grid[PART_ID_2NxnU_T] =
+ ai4_satd_8x8[0] + ai4_satd_8x8[1] + ai4_satd_8x8[2] + ai4_satd_8x8[3];
+
+ pi4_sad_grid[PART_ID_2NxnU_B] = ai4_satd_8x8[4] + ai4_satd_8x8[5] + ai4_satd_8x8[6] +
+ ai4_satd_8x8[7] + pi4_sad_grid[PART_ID_2NxN_B];
+
+ pi4_sad_grid[PART_ID_2NxnD_T] = ai4_satd_8x8[8] + ai4_satd_8x8[9] + ai4_satd_8x8[10] +
+ ai4_satd_8x8[11] + pi4_sad_grid[PART_ID_2NxN_T];
+
+ pi4_sad_grid[PART_ID_2NxnD_B] =
+ ai4_satd_8x8[12] + ai4_satd_8x8[13] + ai4_satd_8x8[14] + ai4_satd_8x8[15];
+}
+
+WORD32 hme_evalsatd_pt_pu_32x32_tu_rec(
+ err_prms_t *ps_prms,
+ WORD32 lambda,
+ WORD32 lambda_q_shift,
+ WORD32 i4_frm_qstep,
+ me_func_selector_t *ps_func_selector)
+{
+ S32 ai4_satd_4x4[64]; /* num 4x4s in a 32x32 */
+ S32 ai4_satd_8x8[16]; /* num 8x8s in a 32x32 */
+ S32 ai4_tu_split_8x8[16];
+ S32 ai4_satd_16x16[4]; /* num 16x16 in a 32x32 */
+ S32 ai4_tu_split_16x16[4];
+ S32 i4_satd_32x32;
+
+ S32 ai4_tu_early_cbf_8x8[16];
+ S32 ai4_tu_early_cbf_16x16[4];
+ S32 early_cbf_flag;
+
+ S16 *pi2_had_out;
+
+ /* Initialize array of ptrs to hold partial SATDs at all levels of 16x16 */
+ S32 *api4_satd_pu[HAD_32x32 + 1];
+ S32 *api4_tu_split[HAD_32x32 + 1];
+ S32 *api4_tu_early_cbf[HAD_32x32 + 1];
+
+ S32 *pi4_sad_grid = ps_prms->pi4_sad_grid;
+ S32 *pi4_tu_split_flag = ps_prms->pi4_tu_split_flags;
+ S32 *pi4_tu_early_cbf = ps_prms->pi4_tu_early_cbf;
+
+ S32 tu_split_flag = 0;
+ S32 total_satd_cost = 0;
+
+ U08 *pu1_inp = ps_prms->pu1_inp;
+ U08 *pu1_ref = ps_prms->pu1_ref;
+
+ S32 inp_stride = ps_prms->i4_inp_stride;
+ S32 ref_stride = ps_prms->i4_ref_stride;
+
+ /* Initialize tu_split_cost to "0" */
+ ps_prms->i4_tu_split_cost = 0;
+
+ pi2_had_out = (S16 *)ps_prms->pu1_wkg_mem;
+
+ api4_satd_pu[HAD_4x4] = &ai4_satd_4x4[0];
+ api4_satd_pu[HAD_8x8] = &ai4_satd_8x8[0];
+ api4_satd_pu[HAD_16x16] = &ai4_satd_16x16[0];
+ api4_satd_pu[HAD_32x32] = &i4_satd_32x32;
+
+ api4_tu_split[HAD_4x4] = NULL;
+ api4_tu_split[HAD_8x8] = &ai4_tu_split_8x8[0];
+ api4_tu_split[HAD_16x16] = &ai4_tu_split_16x16[0];
+ api4_tu_split[HAD_32x32] = &tu_split_flag;
+
+ api4_tu_early_cbf[HAD_4x4] = NULL;
+ api4_tu_early_cbf[HAD_8x8] = &ai4_tu_early_cbf_8x8[0];
+ api4_tu_early_cbf[HAD_16x16] = &ai4_tu_early_cbf_16x16[0];
+ api4_tu_early_cbf[HAD_32x32] = &early_cbf_flag;
+
+ /* Call recursive 32x32 HAD module; updates satds for 4x4, 8x8, 16x16 and 32x32 */
+ ihevce_had_32x32_r(
+ pu1_inp,
+ inp_stride,
+ pu1_ref,
+ ref_stride,
+ pi2_had_out,
+ 32,
+ api4_satd_pu,
+ api4_tu_split,
+ api4_tu_early_cbf,
+ 0,
+ 8,
+ lambda,
+ lambda_q_shift,
+ i4_frm_qstep,
+ 0,
+ ps_prms->u1_max_tr_depth,
+ ps_prms->u1_max_tr_size,
+ &(ps_prms->i4_tu_split_cost),
+ ps_func_selector);
+
+ total_satd_cost = i4_satd_32x32;
+
+ /*The structure of the TU_SPLIT flag for the current 32x32 is as follows
+ TL_16x16 - 5bits (4 for child and LSBit for 16x16 split)
+ TR_16x16 - 5bits (4 for child and LSBit for 16x16 split)
+ BL_16x16 - 5bits (4 for child and LSBit for 16x16 split)
+ BR_16x16 - 5bits (4 for child and LSBit for 16x16 split)
+ 32x32_split - 1bit (LSBit)
+
+ TU_SPLIT : (TL_16x16)_(TR_16x16)_(BL_16x16)_(BR_16x16)_32x32_split (21bits)*/
+
+ pi4_sad_grid[PART_ID_2Nx2N] = total_satd_cost;
+ pi4_tu_split_flag[PART_ID_2Nx2N] = tu_split_flag;
+ pi4_tu_early_cbf[PART_ID_2Nx2N] = early_cbf_flag;
+
+ return total_satd_cost;
+}
+
+/**
+********************************************************************************
+* @fn S32 hme_evalsatd_pt_pu_64x64
+*
+* @brief Evaluates the SATD with partial updates for all the best partitions
+* of a 64x64 CU based on accumulated Hadamard 32x32 and 16x16 satds
+*
+* Note : 64x64 SATD does not do hadamard Transform using 32x32 hadamard
+* outputs but directly uses four 32x32 SATD and 16 16x16 SATDS as
+* TU size of 64 is not supported in HEVC
+*
+* @param[inout] ps_prms: error prms containg current and ref ptr, strides,
+* pointer to sad grid of each partitions
+*
+* @return None
+********************************************************************************
+*/
+
+void hme_evalsatd_pt_pu_64x64(err_prms_t *ps_prms)
+{
+ //S32 ai4_satd_4x4[4][64]; /* num 4x4s in a 32x32 * num 32x32 in 64x64 */
+ S32 ai4_satd_8x8[4][16]; /* num 8x8s in a 32x32 * num 32x32 in 64x64 */
+ S32 ai4_satd_16x16[4][4]; /* num 16x16 in a 32x32* num 32x32 in 64x64 */
+ S32 ai4_satd_32x32[4]; /* num 32x32 in 64x64 */
+ // S16 ai2_had_out[32*32];
+ S32 i, j;
+
+ // S32 ai4_tu_split_8x8[4][16];
+ // S32 ai4_tu_split_16x16[4][4];
+ // S32 ai4_tu_split_32x32[4];
+
+ /* Initialize array of ptrs to hold partial SATDs at all levels of 16x16 */
+ S32 *api4_satd_pu[HAD_32x32 + 1];
+ // S32 *api4_tu_split[HAD_32x32 + 1];
+
+ S32 *pi4_sad_grid = ps_prms->pi4_sad_grid;
+
+ U08 *pu1_inp = ps_prms->pu1_inp;
+ U08 *pu1_ref = ps_prms->pu1_ref;
+ U08 *pu1_src;
+ U08 *pu1_pred;
+
+ S32 inp_stride = ps_prms->i4_inp_stride;
+ S32 ref_stride = ps_prms->i4_ref_stride;
+
+ for(i = 0; i < 4; i++)
+ {
+ S32 blkx = (i & 0x1);
+ S32 blky = (i >> 1);
+ U08 *pu1_pi0, *pu1_pi1;
+
+ //api4_satd_pu[HAD_4x4] = &ai4_satd_4x4[i][0];
+ api4_satd_pu[HAD_8x8] = &ai4_satd_8x8[i][0];
+ api4_satd_pu[HAD_16x16] = &ai4_satd_16x16[i][0];
+ api4_satd_pu[HAD_32x32] = &ai4_satd_32x32[i];
+
+ pu1_pi0 = pu1_inp + (blkx * 32) + (blky * 32 * inp_stride);
+ pu1_pi1 = pu1_ref + (blkx * 32) + (blky * 32 * ref_stride);
+
+ /* 64x64 SATD is calculates as the sum of the 4 16x16's in the block */
+ for(j = 0; j < 16; j++)
+ {
+ pu1_src = pu1_pi0 + ((j & 0x3) << 3) + ((j >> 2) * inp_stride * 8);
+
+ pu1_pred = pu1_pi1 + ((j & 0x3) << 3) + ((j >> 2) * ref_stride * 8);
+
+ ai4_satd_8x8[i][j] = ps_prms->ps_cmn_utils_optimised_function_list->pf_HAD_8x8_8bit(
+ pu1_src, inp_stride, pu1_pred, ref_stride, NULL, 1);
+ }
+
+ /* Modified to cost calculation using only 8x8 SATD for 32x32*/
+ ai4_satd_16x16[i][0] =
+ ai4_satd_8x8[i][0] + ai4_satd_8x8[i][1] + ai4_satd_8x8[i][4] + ai4_satd_8x8[i][5];
+ ai4_satd_16x16[i][1] =
+ ai4_satd_8x8[i][2] + ai4_satd_8x8[i][3] + ai4_satd_8x8[i][6] + ai4_satd_8x8[i][7];
+ ai4_satd_16x16[i][2] =
+ ai4_satd_8x8[i][8] + ai4_satd_8x8[i][9] + ai4_satd_8x8[i][12] + ai4_satd_8x8[i][13];
+ ai4_satd_16x16[i][3] =
+ ai4_satd_8x8[i][10] + ai4_satd_8x8[i][11] + ai4_satd_8x8[i][14] + ai4_satd_8x8[i][15];
+ }
+
+ /* Modified to cost calculation using only 8x8 SATD for 32x32*/
+
+ ai4_satd_32x32[0] =
+ ai4_satd_16x16[0][0] + ai4_satd_16x16[0][1] + ai4_satd_16x16[0][2] + ai4_satd_16x16[0][3];
+ ai4_satd_32x32[1] =
+ ai4_satd_16x16[1][0] + ai4_satd_16x16[1][1] + ai4_satd_16x16[1][2] + ai4_satd_16x16[1][3];
+ ai4_satd_32x32[2] =
+ ai4_satd_16x16[2][0] + ai4_satd_16x16[2][1] + ai4_satd_16x16[2][2] + ai4_satd_16x16[2][3];
+ ai4_satd_32x32[3] =
+ ai4_satd_16x16[3][0] + ai4_satd_16x16[3][1] + ai4_satd_16x16[3][2] + ai4_satd_16x16[3][3];
+
+ /* Update 64x64 SATDs */
+ pi4_sad_grid[PART_ID_2Nx2N] =
+ ai4_satd_32x32[0] + ai4_satd_32x32[1] + ai4_satd_32x32[2] + ai4_satd_32x32[3];
+
+ /* Update 32x32 SATDs */
+ pi4_sad_grid[PART_ID_NxN_TL] = ai4_satd_32x32[0];
+ pi4_sad_grid[PART_ID_NxN_TR] = ai4_satd_32x32[1];
+ pi4_sad_grid[PART_ID_NxN_BL] = ai4_satd_32x32[2];
+ pi4_sad_grid[PART_ID_NxN_BR] = ai4_satd_32x32[3];
+
+ /* Update 32x64 / 64x32 SATDs */
+ pi4_sad_grid[PART_ID_Nx2N_L] = ai4_satd_32x32[0] + ai4_satd_32x32[2];
+ pi4_sad_grid[PART_ID_Nx2N_R] = ai4_satd_32x32[1] + ai4_satd_32x32[3];
+ pi4_sad_grid[PART_ID_2NxN_T] = ai4_satd_32x32[0] + ai4_satd_32x32[1];
+ pi4_sad_grid[PART_ID_2NxN_B] = ai4_satd_32x32[2] + ai4_satd_32x32[3];
+
+ /* Update AMP SATDs 64x48,64x16, 48x64,16x64 */
+ pi4_sad_grid[PART_ID_nLx2N_L] =
+ ai4_satd_16x16[0][0] + ai4_satd_16x16[0][2] + ai4_satd_16x16[2][0] + ai4_satd_16x16[2][2];
+
+ pi4_sad_grid[PART_ID_nLx2N_R] = ai4_satd_16x16[0][1] + ai4_satd_16x16[0][3] +
+ ai4_satd_16x16[2][1] + ai4_satd_16x16[2][3] +
+ pi4_sad_grid[PART_ID_Nx2N_R];
+
+ pi4_sad_grid[PART_ID_nRx2N_L] = ai4_satd_16x16[1][0] + ai4_satd_16x16[1][2] +
+ ai4_satd_16x16[3][0] + ai4_satd_16x16[3][2] +
+ pi4_sad_grid[PART_ID_Nx2N_L];
+
+ pi4_sad_grid[PART_ID_nRx2N_R] =
+ ai4_satd_16x16[1][1] + ai4_satd_16x16[1][3] + ai4_satd_16x16[3][1] + ai4_satd_16x16[3][3];
+
+ pi4_sad_grid[PART_ID_2NxnU_T] =
+ ai4_satd_16x16[0][0] + ai4_satd_16x16[0][1] + ai4_satd_16x16[1][0] + ai4_satd_16x16[1][1];
+
+ pi4_sad_grid[PART_ID_2NxnU_B] = ai4_satd_16x16[0][2] + ai4_satd_16x16[0][3] +
+ ai4_satd_16x16[1][2] + ai4_satd_16x16[1][3] +
+ pi4_sad_grid[PART_ID_2NxN_B];
+
+ pi4_sad_grid[PART_ID_2NxnD_T] = ai4_satd_16x16[2][0] + ai4_satd_16x16[2][1] +
+ ai4_satd_16x16[3][0] + ai4_satd_16x16[3][1] +
+ pi4_sad_grid[PART_ID_2NxN_T];
+
+ pi4_sad_grid[PART_ID_2NxnD_B] =
+ ai4_satd_16x16[2][2] + ai4_satd_16x16[2][3] + ai4_satd_16x16[3][2] + ai4_satd_16x16[3][3];
+}
+
+WORD32 hme_evalsatd_pt_pu_64x64_tu_rec(
+ err_prms_t *ps_prms,
+ WORD32 lambda,
+ WORD32 lambda_q_shift,
+ WORD32 i4_frm_qstep,
+ me_func_selector_t *ps_func_selector)
+{
+ S32 ai4_satd_4x4[64]; /* num 4x4s in a 32x32 * num 32x32 in 64x64 */
+ S32 ai4_satd_8x8[16]; /* num 8x8s in a 32x32 * num 32x32 in 64x64 */
+ S32 ai4_satd_16x16[4]; /* num 16x16 in a 32x32* num 32x32 in 64x64 */
+ S32 ai4_satd_32x32[4]; /* num 32x32 in 64x64 */
+
+ S32 ai4_tu_split_8x8[16];
+ S32 ai4_tu_split_16x16[4];
+
+ S32 ai4_tu_early_cbf_8x8[16];
+ S32 ai4_tu_early_cbf_16x16[4];
+
+ S16 *pi2_had_out;
+ S32 i;
+
+ /* Initialize array of ptrs to hold partial SATDs at all levels of 16x16 */
+ S32 *api4_satd_pu[HAD_32x32 + 1];
+ S32 *api4_tu_split[HAD_32x32 + 1];
+ S32 *api4_tu_early_cbf[HAD_32x32 + 1];
+
+ S32 *pi4_sad_grid = ps_prms->pi4_sad_grid;
+
+ S32 tu_split_flag = 0;
+ S32 total_satd_cost = 0;
+
+ U08 *pu1_inp = ps_prms->pu1_inp;
+ U08 *pu1_ref = ps_prms->pu1_ref;
+
+ S32 inp_stride = ps_prms->i4_inp_stride;
+ S32 ref_stride = ps_prms->i4_ref_stride;
+
+ /* Initialize tu_split_cost to "0" */
+ ps_prms->i4_tu_split_cost = 0;
+
+ pi2_had_out = (S16 *)ps_prms->pu1_wkg_mem;
+
+ for(i = 0; i < 4; i++)
+ {
+ S32 blkx = (i & 0x1);
+ S32 blky = (i >> 1);
+ U08 *pu1_pi0, *pu1_pi1;
+ tu_split_flag = 0;
+
+ api4_satd_pu[HAD_4x4] = &ai4_satd_4x4[0];
+ api4_satd_pu[HAD_8x8] = &ai4_satd_8x8[0];
+ api4_satd_pu[HAD_16x16] = &ai4_satd_16x16[0];
+ api4_satd_pu[HAD_32x32] = &ai4_satd_32x32[i];
+
+ api4_tu_split[HAD_4x4] = NULL;
+ api4_tu_split[HAD_8x8] = &ai4_tu_split_8x8[0];
+ api4_tu_split[HAD_16x16] = &ai4_tu_split_16x16[0];
+ api4_tu_split[HAD_32x32] = &ps_prms->pi4_tu_split_flags[i];
+
+ api4_tu_early_cbf[HAD_4x4] = NULL;
+ api4_tu_early_cbf[HAD_8x8] = &ai4_tu_early_cbf_8x8[0];
+ api4_tu_early_cbf[HAD_16x16] = &ai4_tu_early_cbf_16x16[0];
+ api4_tu_early_cbf[HAD_32x32] = &ps_prms->pi4_tu_early_cbf[i];
+
+ pu1_pi0 = pu1_inp + (blkx * 32) + (blky * 32 * inp_stride);
+ pu1_pi1 = pu1_ref + (blkx * 32) + (blky * 32 * ref_stride);
+
+ /* Call recursive 32x32 HAD module; updates satds for 4x4, 8x8, 16x16 and 32x32 */
+ ihevce_had_32x32_r(
+ pu1_pi0,
+ inp_stride,
+ pu1_pi1,
+ ref_stride,
+ pi2_had_out,
+ 32,
+ api4_satd_pu,
+ api4_tu_split,
+ api4_tu_early_cbf,
+ 0,
+ 8,
+ lambda,
+ lambda_q_shift,
+ i4_frm_qstep,
+ 1,
+ ps_prms->u1_max_tr_depth,
+ ps_prms->u1_max_tr_size,
+ &(ps_prms->i4_tu_split_cost),
+ ps_func_selector);
+ }
+
+ total_satd_cost = ai4_satd_32x32[0] + ai4_satd_32x32[1] + ai4_satd_32x32[2] + ai4_satd_32x32[3];
+
+ /* Update 64x64 SATDs */
+ pi4_sad_grid[PART_ID_2Nx2N] =
+ ai4_satd_32x32[0] + ai4_satd_32x32[1] + ai4_satd_32x32[2] + ai4_satd_32x32[3];
+
+ return total_satd_cost;
+}
+
+/**
+********************************************************************************
+* @fn void hme_subpel_refine_search_node(search_node_t *ps_search_node,
+* hme_subpel_prms_t *ps_prms,
+* layer_ctxt_t *ps_curr_layer,
+* BLK_SIZE_T e_blk_size,
+* S32 x_off,
+* S32 y_off)
+*
+* @brief Refines a given partition within a CU
+*
+* @param[in,out] ps_search_node: supplies starting mv and also ref id.
+* updated with the accurate subpel mv
+*
+* @param[in] ps_prms: subpel prms input to this function
+*
+* @param[in] ps_curr_layer : layer context
+*
+* @param[in] e_blk_size : Block size enumeration
+*
+* @param[in] x_off : x offset of the partition w.r.t. pic start
+*
+* @param[in] y_off : y offset of the partition w.r.t. pic start
+*
+* @return None
+********************************************************************************
+*/
+
+static __inline PF_SAD_RESULT_FXN_T hme_get_calc_sad_and_result_subpel_fxn(
+ me_func_selector_t *ps_func_selector,
+ ihevce_me_optimised_function_list_t *ps_me_optimised_function_list,
+ S32 i4_part_mask,
+ U08 u1_use_satd,
+ U08 u1_num_parts,
+ U08 u1_num_results)
+{
+ PF_SAD_RESULT_FXN_T pf_err_compute;
+
+ ASSERT((1 == u1_num_results) || (2 == u1_num_results));
+
+ if(1 == u1_num_results)
+ {
+ if(u1_use_satd)
+ {
+ if(u1_num_parts == 1)
+ {
+ pf_err_compute =
+ ps_func_selector->pf_evalsatd_update_1_best_result_pt_pu_16x16_num_part_eq_1;
+ }
+ else if((u1_num_parts > 1) && (u1_num_parts <= 8))
+ {
+ pf_err_compute =
+ ps_func_selector->pf_evalsatd_update_1_best_result_pt_pu_16x16_num_part_lt_9;
+ }
+ else
+ {
+ pf_err_compute =
+ ps_func_selector->pf_evalsatd_update_1_best_result_pt_pu_16x16_num_part_lt_17;
+ }
+ }
+ else
+ {
+ if(u1_num_parts == 1)
+ {
+ pf_err_compute = ps_me_optimised_function_list
+ ->pf_calc_sad_and_1_best_result_subpel_num_part_eq_1;
+ }
+ else if(((i4_part_mask & ENABLE_SQUARE_PARTS) != 0) && (u1_num_parts == 5))
+ {
+ pf_err_compute =
+ ps_me_optimised_function_list->pf_calc_sad_and_1_best_result_subpel_square_parts;
+ }
+ else if((u1_num_parts > 1) && (u1_num_parts <= 8))
+ {
+ pf_err_compute = ps_me_optimised_function_list
+ ->pf_calc_sad_and_1_best_result_subpel_num_part_lt_9;
+ }
+ else
+ {
+ pf_err_compute = ps_me_optimised_function_list
+ ->pf_calc_sad_and_1_best_result_subpel_num_part_lt_17;
+ }
+ }
+ }
+ else
+ {
+ if(u1_use_satd)
+ {
+ if(u1_num_parts == 1)
+ {
+ pf_err_compute =
+ ps_func_selector->pf_evalsatd_update_2_best_results_pt_pu_16x16_num_part_eq_1;
+ }
+ else if((u1_num_parts > 1) && (u1_num_parts <= 8))
+ {
+ pf_err_compute =
+ ps_func_selector->pf_evalsatd_update_2_best_results_pt_pu_16x16_num_part_lt_9;
+ }
+ else
+ {
+ pf_err_compute =
+ ps_func_selector->pf_evalsatd_update_2_best_results_pt_pu_16x16_num_part_lt_17;
+ }
+ }
+ else
+ {
+ if(u1_num_parts == 1)
+ {
+ pf_err_compute = ps_me_optimised_function_list
+ ->pf_calc_sad_and_2_best_results_subpel_num_part_eq_1;
+ }
+ else if(((i4_part_mask & ENABLE_SQUARE_PARTS) != 0) && (u1_num_parts == 5))
+ {
+ pf_err_compute = ps_me_optimised_function_list
+ ->pf_calc_sad_and_2_best_results_subpel_square_parts;
+ }
+ else if((u1_num_parts > 1) && (u1_num_parts <= 8))
+ {
+ pf_err_compute = ps_me_optimised_function_list
+ ->pf_calc_sad_and_2_best_results_subpel_num_part_lt_9;
+ }
+ else
+ {
+ pf_err_compute = ps_me_optimised_function_list
+ ->pf_calc_sad_and_2_best_results_subpel_num_part_lt_17;
+ }
+ }
+ }
+
+ return pf_err_compute;
+}
+
+#if DIAMOND_GRID == 1
+S32 hme_subpel_refine_search_node_high_speed(
+ search_node_t *ps_search_node,
+ hme_subpel_prms_t *ps_prms,
+ layer_ctxt_t *ps_curr_layer,
+ BLK_SIZE_T e_blk_size,
+ S32 x_off,
+ S32 y_off,
+ search_results_t *ps_search_results,
+ S32 pred_lx,
+ S32 i4_part_mask,
+ S32 *pi4_valid_part_ids,
+ S32 search_idx,
+ subpel_dedup_enabler_t *ps_dedup_enabler,
+ me_func_selector_t *ps_func_selector,
+ ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
+{
+ S32 i4_num_hpel_refine, i4_num_qpel_refine;
+ S32 i4_offset, i4_grid_mask;
+ S08 i1_ref_idx;
+ S32 i4_blk_wd, i4_blk_ht;
+ S32 i4_ref_stride, i4_i;
+ pred_ctxt_t *ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
+ result_upd_prms_t s_result_prms;
+ search_node_t s_temp_search_node;
+
+ /*************************************************************************/
+ /* Tracks current MV with the fractional component. */
+ /*************************************************************************/
+ S32 i4_mv_x, i4_mv_y;
+ S32 i4_frac_x, i4_frac_y;
+
+ /*************************************************************************/
+ /* Function pointer for SAD/SATD, array and prms structure to pass to */
+ /* This function */
+ /*************************************************************************/
+ PF_SAD_RESULT_FXN_T pf_err_compute;
+
+ S32 ai4_sad_grid[17], i4_tot_cost;
+ err_prms_t s_err_prms;
+
+ /*************************************************************************/
+ /* Allowed MV RANGE */
+ /*************************************************************************/
+ range_prms_t *ps_range_prms;
+
+ /*************************************************************************/
+ /* stores min id in grid with associated min cost. */
+ /*************************************************************************/
+ S32 i4_min_cost, i4_min_sad;
+ GRID_PT_T e_min_id;
+
+ PF_INTERP_FXN_T pf_qpel_interp;
+ /*************************************************************************/
+ /* For hpel and qpel we move in diamonds and hence each point in the */
+ /* diamond will belong to a completely different plane. To simplify the */
+ /* look up of the ref ptr, we declare a 2x2 array of ref ptrs for the */
+ /* hpel planes which are interpolated during recon. */
+ /*************************************************************************/
+ U08 *apu1_hpel_ref[4], *pu1_ref;
+
+ interp_prms_t s_interp_prms;
+
+ /*************************************************************************/
+ /* Maintains the minimum id of interpolated buffers, and the pointer that*/
+ /* points to the corresponding predicted buf with its stride. */
+ /* Note that the pointer cannot be derived just from the id, since the */
+ /* pointer may also point to the hpel buffer (in case we request interp */
+ /* of a hpel pt, which already exists in the recon hpel planes) */
+ /*************************************************************************/
+ U08 *pu1_final_out;
+ S32 i4_final_out_stride;
+ S32 part_id;
+ S32 check_for_duplicate = 0;
+
+ subpel_refine_ctxt_t *ps_subpel_refine_ctxt = ps_prms->ps_subpel_refine_ctxt;
+
+ S32 mvx_qpel;
+ S32 mvy_qpel;
+
+ pf_err_compute = hme_get_calc_sad_and_result_subpel_fxn(
+ ps_func_selector,
+ ps_me_optimised_function_list,
+ i4_part_mask,
+ ps_prms->i4_use_satd,
+ ps_subpel_refine_ctxt->i4_num_valid_parts,
+ ps_search_results->u1_num_results_per_part);
+
+ i4_num_hpel_refine = ps_prms->i4_num_steps_hpel_refine;
+ i4_num_qpel_refine = ps_prms->i4_num_steps_qpel_refine;
+
+ /* Prediction contet should now deal with qpel units */
+ HME_SET_MVPRED_RES(ps_pred_ctxt, MV_RES_QPEL);
+
+ /* Buffer allocation for subpel */
+ /* Current design is that there may be many partitions and different mvs */
+ /* that attempt subpel refinemnt. While there is possibility of overlap, the */
+ /* hashing to detect and avoid overlap may be very complex. So, currently, */
+ /* the only thing done is to store the eventual predicted buffer with every */
+ /* ctb node that holds the result of hte best subpel search */
+
+ /* Compute the base pointer for input, interpolated buffers */
+ /* The base pointers point as follows: */
+ /* fx fy : 0, 0 :: fx, hy : 0, 0.5, hx, fy: 0.5, 0, hx, fy: 0.5, 0.5 */
+ /* To these, we need to add the offset of the current node */
+ i4_ref_stride = ps_curr_layer->i4_rec_stride;
+ i4_offset = x_off + (y_off * i4_ref_stride);
+ i1_ref_idx = ps_search_node->i1_ref_idx;
+
+ apu1_hpel_ref[0] = ps_curr_layer->ppu1_list_rec_fxfy[i1_ref_idx] + i4_offset;
+ apu1_hpel_ref[1] = ps_curr_layer->ppu1_list_rec_hxfy[i1_ref_idx] + i4_offset;
+ apu1_hpel_ref[2] = ps_curr_layer->ppu1_list_rec_fxhy[i1_ref_idx] + i4_offset;
+ apu1_hpel_ref[3] = ps_curr_layer->ppu1_list_rec_hxhy[i1_ref_idx] + i4_offset;
+
+ /* Initialize result params used for partition update */
+ s_result_prms.pf_mv_cost_compute = NULL;
+ s_result_prms.ps_search_results = ps_search_results;
+ s_result_prms.pi4_valid_part_ids = pi4_valid_part_ids;
+ s_result_prms.i1_ref_idx = ps_search_node->i1_ref_idx;
+ s_result_prms.u1_pred_lx = search_idx;
+ s_result_prms.i4_part_mask = i4_part_mask;
+ s_result_prms.ps_search_node_base = ps_search_node;
+ s_result_prms.pi4_sad_grid = &ai4_sad_grid[0];
+ s_result_prms.i4_grid_mask = 1;
+ s_result_prms.ps_search_node = &s_temp_search_node;
+ s_temp_search_node.i1_ref_idx = ps_search_node->i1_ref_idx;
+
+ /* convert to hpel units */
+ i4_mv_x = ps_search_node->s_mv.i2_mvx >> 1;
+ i4_mv_y = ps_search_node->s_mv.i2_mvy >> 1;
+
+ /* for first pt, we compute at all locations in the grid, 4 + 1 centre */
+ ps_range_prms = ps_prms->aps_mv_range_qpel[i1_ref_idx];
+ i4_grid_mask = (GRID_DIAMOND_ENABLE_ALL);
+ i4_grid_mask &= hme_clamp_grid_by_mvrange(ps_search_node, 2, ps_range_prms);
+
+ i4_min_cost = MAX_32BIT_VAL;
+ i4_min_sad = MAX_32BIT_VAL;
+
+ /*************************************************************************/
+ /* Prepare the input params to SAD/SATD function. Note that input is */
+ /* passed from the calling funcion since it may be I (normal subpel */
+ /* refinement) or 2I - P0 in case of bidirect subpel refinement. */
+ /* Both cases are handled here. */
+ /*************************************************************************/
+ s_err_prms.pu1_inp = (U08 *)ps_prms->pv_inp;
+ s_err_prms.i4_inp_stride = ps_prms->i4_inp_stride;
+ s_err_prms.i4_ref_stride = i4_ref_stride;
+ s_err_prms.i4_part_mask = (ENABLE_2Nx2N);
+ s_err_prms.i4_grid_mask = 1;
+ s_err_prms.pi4_sad_grid = &ai4_sad_grid[0];
+ s_err_prms.i4_blk_wd = i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
+ s_err_prms.i4_blk_ht = i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
+
+ s_result_prms.ps_subpel_refine_ctxt = ps_subpel_refine_ctxt;
+
+ part_id = ps_search_node->u1_part_id;
+ for(i4_i = 0; i4_i < i4_num_hpel_refine; i4_i++)
+ {
+ e_min_id = PT_C;
+
+ mvx_qpel = i4_mv_x << 1;
+ mvy_qpel = i4_mv_y << 1;
+
+ /* Central pt */
+ if(i4_grid_mask & BIT_EN(PT_C))
+ {
+ //ps_search_node->i2_mv_x = (S16)i4_mv_x;
+ //ps_search_node->i2_mv_x = (S16)i4_mv_y;
+ /* central pt is i4_mv_x, i4_mv_y */
+ CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(
+ ps_dedup_enabler, 1, mvx_qpel, mvy_qpel, check_for_duplicate);
+
+ i4_frac_x = i4_mv_x & 1;
+ i4_frac_y = i4_mv_y & 1;
+ pu1_ref = apu1_hpel_ref[i4_frac_y * 2 + i4_frac_x];
+ s_err_prms.pu1_ref = pu1_ref + (i4_mv_x >> 1) + ((i4_mv_y >> 1) * i4_ref_stride);
+
+ /* Update the mv's with the current candt motion vectors */
+ s_result_prms.i2_mv_x = mvx_qpel;
+ s_result_prms.i2_mv_y = mvy_qpel;
+ s_temp_search_node.s_mv.i2_mvx = mvx_qpel;
+ s_temp_search_node.s_mv.i2_mvy = mvy_qpel;
+
+ pf_err_compute(&s_err_prms, &s_result_prms);
+
+ i4_tot_cost = s_err_prms.pi4_sad_grid[part_id];
+ if(i4_tot_cost < i4_min_cost)
+ {
+ i4_min_cost = i4_tot_cost;
+ i4_min_sad = s_err_prms.pi4_sad_grid[part_id];
+ e_min_id = PT_C;
+ pu1_final_out = s_err_prms.pu1_ref;
+ }
+ }
+
+ /* left pt */
+ if(i4_grid_mask & BIT_EN(PT_L))
+ {
+ CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(
+ ps_dedup_enabler, 1, mvx_qpel - 2, mvy_qpel, check_for_duplicate);
+
+ if(!check_for_duplicate)
+ {
+ /* search node mv is stored in qpel units */
+ ps_search_node->s_mv.i2_mvx = (S16)((i4_mv_x - 1) << 1);
+ ps_search_node->s_mv.i2_mvy = (S16)(i4_mv_y << 1);
+ /* central pt is i4_mv_x - 1, i4_mv_y */
+ i4_frac_x = (i4_mv_x - 1) & 1; // same as (x-1)&1
+ i4_frac_y = i4_mv_y & 1;
+ pu1_ref = apu1_hpel_ref[i4_frac_y * 2 + i4_frac_x];
+ s_err_prms.pu1_ref =
+ pu1_ref + ((i4_mv_x - 1) >> 1) + ((i4_mv_y >> 1) * i4_ref_stride);
+
+ /* Update the mv's with the current candt motion vectors */
+ s_result_prms.i2_mv_x = mvx_qpel - 2;
+ s_result_prms.i2_mv_y = mvy_qpel;
+ s_temp_search_node.s_mv.i2_mvx = mvx_qpel - 2;
+ s_temp_search_node.s_mv.i2_mvy = mvy_qpel;
+
+ pf_err_compute(&s_err_prms, &s_result_prms);
+ //hme_update_results_pt_pu_best1_subpel_hs(&s_result_prms);
+ i4_tot_cost = s_err_prms.pi4_sad_grid[part_id];
+ if(i4_tot_cost < i4_min_cost)
+ {
+ i4_min_cost = i4_tot_cost;
+ i4_min_sad = s_err_prms.pi4_sad_grid[part_id];
+ e_min_id = PT_L;
+ pu1_final_out = s_err_prms.pu1_ref;
+ }
+ }
+ }
+ /* top pt */
+ if(i4_grid_mask & BIT_EN(PT_T))
+ {
+ CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(
+ ps_dedup_enabler, 1, mvx_qpel, mvy_qpel - 2, check_for_duplicate);
+
+ if(!check_for_duplicate)
+ {
+ /* search node mv is stored in qpel units */
+ ps_search_node->s_mv.i2_mvx = (S16)(i4_mv_x << 1);
+ ps_search_node->s_mv.i2_mvy = (S16)((i4_mv_y - 1) << 1);
+ /* top pt is i4_mv_x, i4_mv_y - 1 */
+ i4_frac_x = i4_mv_x & 1;
+ i4_frac_y = (i4_mv_y - 1) & 1;
+ pu1_ref = apu1_hpel_ref[i4_frac_y * 2 + i4_frac_x];
+ s_err_prms.pu1_ref =
+ pu1_ref + (i4_mv_x >> 1) + (((i4_mv_y - 1) >> 1) * i4_ref_stride);
+
+ /* Update the mv's with the current candt motion vectors */
+ s_result_prms.i2_mv_x = mvx_qpel;
+ s_result_prms.i2_mv_y = mvy_qpel - 2;
+ s_temp_search_node.s_mv.i2_mvx = mvx_qpel;
+ s_temp_search_node.s_mv.i2_mvy = mvy_qpel - 2;
+
+ pf_err_compute(&s_err_prms, &s_result_prms);
+ //hme_update_results_pt_pu_best1_subpel_hs(&s_result_prms);
+ i4_tot_cost = s_err_prms.pi4_sad_grid[part_id];
+ if(i4_tot_cost < i4_min_cost)
+ {
+ i4_min_cost = i4_tot_cost;
+ i4_min_sad = s_err_prms.pi4_sad_grid[part_id];
+ e_min_id = PT_T;
+ pu1_final_out = s_err_prms.pu1_ref;
+ }
+ }
+ }
+ /* right pt */
+ if(i4_grid_mask & BIT_EN(PT_R))
+ {
+ CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(
+ ps_dedup_enabler, num_unique_nodes, mvx_qpel + 2, mvy_qpel, check_for_duplicate);
+ if(!check_for_duplicate)
+ {
+ /* search node mv is stored in qpel units */
+ ps_search_node->s_mv.i2_mvx = (S16)((i4_mv_x + 1) << 1);
+ ps_search_node->s_mv.i2_mvy = (S16)(i4_mv_y << 1);
+ /* right pt is i4_mv_x + 1, i4_mv_y */
+ i4_frac_x = (i4_mv_x + 1) & 1;
+ i4_frac_y = i4_mv_y & 1;
+
+ pu1_ref = apu1_hpel_ref[i4_frac_y * 2 + i4_frac_x];
+ s_err_prms.pu1_ref =
+ pu1_ref + ((i4_mv_x + 1) >> 1) + ((i4_mv_y >> 1) * i4_ref_stride);
+
+ /* Update the mv's with the current candt motion vectors */
+ s_result_prms.i2_mv_x = mvx_qpel + 2;
+ s_result_prms.i2_mv_y = mvy_qpel;
+ s_temp_search_node.s_mv.i2_mvx = mvx_qpel + 2;
+ s_temp_search_node.s_mv.i2_mvy = mvy_qpel;
+
+ pf_err_compute(&s_err_prms, &s_result_prms);
+ //hme_update_results_pt_pu_best1_subpel_hs(&s_result_prms);
+ i4_tot_cost = s_err_prms.pi4_sad_grid[part_id];
+ if(i4_tot_cost < i4_min_cost)
+ {
+ i4_min_cost = i4_tot_cost;
+ i4_min_sad = s_err_prms.pi4_sad_grid[part_id];
+ e_min_id = PT_R;
+ pu1_final_out = s_err_prms.pu1_ref;
+ }
+ }
+ }
+ /* bottom pt */
+ if(i4_grid_mask & BIT_EN(PT_B))
+ {
+ CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(
+ ps_dedup_enabler, num_unique_nodes, mvx_qpel, mvy_qpel + 2, check_for_duplicate);
+ if(!check_for_duplicate)
+ {
+ /* search node mv is stored in qpel units */
+ ps_search_node->s_mv.i2_mvx = ((S16)i4_mv_x << 1);
+ ps_search_node->s_mv.i2_mvy = ((S16)(i4_mv_y + 1) << 1);
+ i4_frac_x = i4_mv_x & 1;
+ i4_frac_y = (i4_mv_y + 1) & 1;
+ pu1_ref = apu1_hpel_ref[i4_frac_y * 2 + i4_frac_x];
+ s_err_prms.pu1_ref =
+ pu1_ref + (i4_mv_x >> 1) + (((i4_mv_y + 1) >> 1) * i4_ref_stride);
+
+ /* Update the mv's with the current candt motion vectors */
+ s_result_prms.i2_mv_x = mvx_qpel;
+ s_result_prms.i2_mv_y = mvy_qpel + 2;
+ s_temp_search_node.s_mv.i2_mvx = mvx_qpel;
+ s_temp_search_node.s_mv.i2_mvy = mvy_qpel + 2;
+
+ pf_err_compute(&s_err_prms, &s_result_prms);
+ //hme_update_results_pt_pu_best1_subpel_hs(&s_result_prms);
+ i4_tot_cost = s_err_prms.pi4_sad_grid[part_id];
+ if(i4_tot_cost < i4_min_cost)
+ {
+ i4_min_cost = i4_tot_cost;
+ i4_min_sad = s_err_prms.pi4_sad_grid[part_id];
+ e_min_id = PT_B;
+ pu1_final_out = s_err_prms.pu1_ref;
+ }
+ }
+ }
+ /* Early exit in case of central point */
+ if(e_min_id == PT_C)
+ break;
+
+ /*********************************************************************/
+ /* Depending on the best result location, we may be able to skip */
+ /* atleast two pts, centre pt and one more pt. E.g. if right pt is */
+ /* the best result, the next iteration need not do centre, left pts */
+ /*********************************************************************/
+ i4_grid_mask = gai4_opt_grid_mask_diamond[e_min_id];
+ i4_mv_x += gai1_grid_id_to_x[e_min_id];
+ i4_mv_y += gai1_grid_id_to_y[e_min_id];
+ ps_search_node->s_mv.i2_mvx = (S16)i4_mv_x;
+ ps_search_node->s_mv.i2_mvy = (S16)i4_mv_y;
+ i4_grid_mask &= hme_clamp_grid_by_mvrange(ps_search_node, 2, ps_range_prms);
+ }
+
+ /* Convert to QPEL units */
+ i4_mv_x <<= 1;
+ i4_mv_y <<= 1;
+
+ ps_search_node->s_mv.i2_mvx = (S16)i4_mv_x;
+ ps_search_node->s_mv.i2_mvy = (S16)i4_mv_y;
+
+ /* Exact interpolation or averaging chosen here */
+ pf_qpel_interp = ps_prms->pf_qpel_interp;
+
+ /* Next QPEL ME */
+ /* In this case, we have option of doing exact QPEL interpolation or avg */
+ /*************************************************************************/
+ /* x */
+ /* A b C d */
+ /* e f g h */
+ /* I j K l */
+ /* m n o p */
+ /* Q r S t */
+ /* */
+ /* Approximate QPEL logic */
+ /* b = avg(A,C) f = avg(I,C), g= avg(C,K) j=avg(I,K) */
+ /* for any given pt, we can get all the information required about */
+ /* the surrounding 4 pts. For example, given point C (0.5, 0) */
+ /* surrounding pts info: */
+ /* b : qpel offset: 1, 0, generated by averaging. buffer1: fpel buf */
+ /* buffer 2: hxfy, offsets for both are 0, 0 */
+ /* similarly for other pts the info can be gotten */
+ /*************************************************************************/
+ i4_grid_mask = GRID_DIAMOND_ENABLE_ALL ^ (BIT_EN(PT_C));
+ i4_grid_mask &= hme_clamp_grid_by_mvrange(ps_search_node, 1, ps_range_prms);
+
+ /*************************************************************************/
+ /* One time preparation of non changing interpolation params. These */
+ /* include a set of ping pong result buf ptrs, input buf ptrs and some */
+ /* working memory (not used though in case of averaging). */
+ /*************************************************************************/
+ s_interp_prms.ppu1_ref = &apu1_hpel_ref[0];
+ s_interp_prms.i4_ref_stride = i4_ref_stride;
+ s_interp_prms.i4_blk_wd = i4_blk_wd;
+ s_interp_prms.i4_blk_ht = i4_blk_ht;
+
+ i4_final_out_stride = i4_ref_stride;
+
+ {
+ U08 *pu1_mem;
+ /*********************************************************************/
+ /* Allocation of working memory for interpolated buffers. We maintain*/
+ /* an intermediate working buffer, and 2 ping pong interpolated out */
+ /* buffers, purpose of ping pong explained later below */
+ /*********************************************************************/
+ pu1_mem = ps_prms->pu1_wkg_mem;
+ s_interp_prms.pu1_wkg_mem = pu1_mem;
+
+ //pu1_mem += (INTERP_INTERMED_BUF_SIZE);
+ s_interp_prms.apu1_interp_out[0] = pu1_mem;
+
+ pu1_mem += (INTERP_OUT_BUF_SIZE);
+ s_interp_prms.apu1_interp_out[1] = pu1_mem;
+
+ pu1_mem += (INTERP_OUT_BUF_SIZE);
+ s_interp_prms.apu1_interp_out[2] = pu1_mem;
+
+ pu1_mem += (INTERP_OUT_BUF_SIZE);
+ s_interp_prms.apu1_interp_out[3] = pu1_mem;
+
+ pu1_mem += (INTERP_OUT_BUF_SIZE);
+ s_interp_prms.apu1_interp_out[4] = pu1_mem;
+
+ /*********************************************************************/
+ /* Stride of interpolated output is just a function of blk width of */
+ /* this partition and hence remains constant for this partition */
+ /*********************************************************************/
+ s_interp_prms.i4_out_stride = (i4_blk_wd);
+ }
+
+ {
+ UWORD8 *apu1_final[4];
+ WORD32 ai4_ref_stride[4];
+ /*************************************************************************/
+ /* Ping pong design for interpolated buffers. We use a min id, which */
+ /* tracks the id of the ppu1_interp_out that stores the best result. */
+ /* When new interp to be done, it uses 1 - bes result id to do the interp*/
+ /* min id is toggled when any new result becomes the best result. */
+ /*************************************************************************/
+
+ for(i4_i = 0; i4_i < i4_num_qpel_refine; i4_i++)
+ {
+ e_min_id = PT_C;
+
+ mvx_qpel = i4_mv_x;
+ mvy_qpel = i4_mv_y;
+ hme_qpel_interp_comprehensive(
+ &s_interp_prms,
+ apu1_final,
+ ai4_ref_stride,
+ i4_mv_x,
+ i4_mv_y,
+ i4_grid_mask,
+ ps_me_optimised_function_list);
+ if(i4_grid_mask & BIT_EN(PT_L))
+ {
+ CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(
+ ps_dedup_enabler,
+ num_unique_nodes,
+ mvx_qpel - 1,
+ mvy_qpel - 0,
+ check_for_duplicate);
+
+ if(!check_for_duplicate)
+ {
+ ps_search_node->s_mv.i2_mvx = (S16)i4_mv_x - 1;
+ ps_search_node->s_mv.i2_mvy = (S16)i4_mv_y;
+
+ s_err_prms.pu1_ref = apu1_final[0];
+ s_err_prms.i4_ref_stride = ai4_ref_stride[0];
+
+ /* Update the mv's with the current candt motion vectors */
+ s_result_prms.i2_mv_x = mvx_qpel - 1;
+ s_result_prms.i2_mv_y = mvy_qpel;
+ s_temp_search_node.s_mv.i2_mvx = mvx_qpel - 1;
+ s_temp_search_node.s_mv.i2_mvy = mvy_qpel;
+
+ pf_err_compute(&s_err_prms, &s_result_prms);
+ //hme_update_results_pt_pu_best1_subpel_hs(&s_result_prms);
+
+ i4_tot_cost = s_err_prms.pi4_sad_grid[part_id];
+ if(i4_tot_cost < i4_min_cost)
+ {
+ e_min_id = PT_L;
+ i4_min_cost = i4_tot_cost;
+ i4_min_sad = s_err_prms.pi4_sad_grid[part_id];
+ }
+ }
+ }
+ if(i4_grid_mask & BIT_EN(PT_T))
+ {
+ CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(
+ ps_dedup_enabler,
+ num_unique_nodes,
+ mvx_qpel - 0,
+ mvy_qpel - 1,
+ check_for_duplicate);
+
+ if(!check_for_duplicate)
+ {
+ ps_search_node->s_mv.i2_mvx = (S16)i4_mv_x;
+ ps_search_node->s_mv.i2_mvy = (S16)i4_mv_y - 1;
+
+ s_err_prms.pu1_ref = apu1_final[1];
+ s_err_prms.i4_ref_stride = ai4_ref_stride[1];
+
+ /* Update the mv's with the current candt motion vectors */
+ s_result_prms.i2_mv_x = mvx_qpel;
+ s_result_prms.i2_mv_y = mvy_qpel - 1;
+
+ s_temp_search_node.s_mv.i2_mvx = mvx_qpel;
+ s_temp_search_node.s_mv.i2_mvy = mvy_qpel - 1;
+
+ pf_err_compute(&s_err_prms, &s_result_prms);
+
+ //hme_update_results_pt_pu_best1_subpel_hs(&s_result_prms);
+ i4_tot_cost = s_err_prms.pi4_sad_grid[part_id];
+ if(i4_tot_cost < i4_min_cost)
+ {
+ e_min_id = PT_T;
+ i4_min_cost = i4_tot_cost;
+ i4_min_sad = s_err_prms.pi4_sad_grid[part_id];
+ }
+ }
+ }
+ if(i4_grid_mask & BIT_EN(PT_R))
+ {
+ CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(
+ ps_dedup_enabler, num_unique_nodes, mvx_qpel + 1, mvy_qpel, check_for_duplicate);
+
+ if(!check_for_duplicate)
+ {
+ ps_search_node->s_mv.i2_mvx = (S16)i4_mv_x + 1;
+ ps_search_node->s_mv.i2_mvy = (S16)i4_mv_y;
+
+ s_err_prms.pu1_ref = apu1_final[2];
+ s_err_prms.i4_ref_stride = ai4_ref_stride[2];
+
+ /* Update the mv's with the current candt motion vectors */
+ s_result_prms.i2_mv_x = mvx_qpel + 1;
+ s_result_prms.i2_mv_y = mvy_qpel;
+
+ s_temp_search_node.s_mv.i2_mvx = mvx_qpel + 1;
+ s_temp_search_node.s_mv.i2_mvy = mvy_qpel;
+
+ pf_err_compute(&s_err_prms, &s_result_prms);
+
+ //hme_update_results_pt_pu_best1_subpel_hs(&s_result_prms);
+
+ i4_tot_cost = s_err_prms.pi4_sad_grid[part_id];
+ if(i4_tot_cost < i4_min_cost)
+ {
+ e_min_id = PT_R;
+ i4_min_cost = i4_tot_cost;
+ i4_min_sad = s_err_prms.pi4_sad_grid[part_id];
+ }
+ }
+ }
+ /* i4_mv_x and i4_mv_y will always be the centre pt */
+ /* for qpel we start with least hpel, and hence compute of center pt never reqd */
+ if(i4_grid_mask & BIT_EN(PT_B))
+ {
+ CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(
+ ps_dedup_enabler, num_unique_nodes, mvx_qpel, mvy_qpel + 1, check_for_duplicate);
+
+ if(!check_for_duplicate)
+ {
+ ps_search_node->s_mv.i2_mvx = (S16)i4_mv_x;
+ ps_search_node->s_mv.i2_mvy = (S16)i4_mv_y + 1;
+
+ s_err_prms.pu1_ref = apu1_final[3];
+ s_err_prms.i4_ref_stride = ai4_ref_stride[3];
+
+ /* Update the mv's with the current candt motion vectors */
+ s_result_prms.i2_mv_x = mvx_qpel;
+ s_result_prms.i2_mv_y = mvy_qpel + 1;
+
+ s_temp_search_node.s_mv.i2_mvx = mvx_qpel;
+ s_temp_search_node.s_mv.i2_mvy = mvy_qpel + 1;
+
+ pf_err_compute(&s_err_prms, &s_result_prms);
+
+ //hme_update_results_pt_pu_best1_subpel_hs(&s_result_prms);
+ i4_tot_cost = s_err_prms.pi4_sad_grid[part_id];
+ if(i4_tot_cost < i4_min_cost)
+ {
+ e_min_id = PT_B;
+ i4_min_cost = i4_tot_cost;
+ i4_min_sad = s_err_prms.pi4_sad_grid[part_id];
+ }
+ }
+ }
+
+ /* New QPEL mv x and y */
+ if(e_min_id == PT_C)
+ break;
+ i4_grid_mask = gai4_opt_grid_mask_diamond[e_min_id];
+ i4_mv_x += gai1_grid_id_to_x[e_min_id];
+ i4_mv_y += gai1_grid_id_to_y[e_min_id];
+ ps_search_node->s_mv.i2_mvx = (S16)i4_mv_x;
+ ps_search_node->s_mv.i2_mvy = (S16)i4_mv_y;
+ i4_grid_mask &= hme_clamp_grid_by_mvrange(ps_search_node, 1, ps_range_prms);
+ }
+ }
+
+ /* update modified motion vectors and cost at end of subpel */
+ ps_search_node->s_mv.i2_mvx = (S16)i4_mv_x;
+ ps_search_node->s_mv.i2_mvy = (S16)i4_mv_y;
+ ps_search_node->i4_tot_cost = i4_min_cost;
+ ps_search_node->i4_sad = i4_min_sad;
+
+ /********************************************************************************/
+ /* TODO: Restoring back Sad lambda from Hadamard lambda */
+ /* Need to pass the had/satd lambda in more cleaner way for subpel cost compute */
+ /********************************************************************************/
+ //ps_pred_ctxt->lambda >>= 1;
+
+ return (i4_min_cost);
+}
+#elif DIAMOND_GRID == 0
+S32 hme_subpel_refine_search_node_high_speed(
+ search_node_t *ps_search_node,
+ hme_subpel_prms_t *ps_prms,
+ layer_ctxt_t *ps_curr_layer,
+ BLK_SIZE_T e_blk_size,
+ S32 x_off,
+ S32 y_off,
+ search_results_t *ps_search_results,
+ S32 pred_lx,
+ S32 i4_part_mask,
+ S32 *pi4_valid_part_ids,
+ S32 search_idx,
+ subpel_dedup_enabler_t *ps_dedup_enabler,
+ me_func_selector_t *ps_func_selector)
+{
+ S32 i4_num_hpel_refine, i4_num_qpel_refine;
+ S32 i4_offset, i4_grid_mask;
+ S08 i1_ref_idx;
+ S32 i4_blk_wd, i4_blk_ht;
+ S32 i4_ref_stride, i4_i;
+ pred_ctxt_t *ps_pred_ctxt = &ps_search_results->as_pred_ctxt[pred_lx];
+ result_upd_prms_t s_result_prms;
+
+ /*************************************************************************/
+ /* Tracks current MV with the fractional component. */
+ /*************************************************************************/
+ S32 i4_mv_x, i4_mv_y;
+ S32 i4_frac_x, i4_frac_y;
+
+ /*************************************************************************/
+ /* Function pointer for SAD/SATD, array and prms structure to pass to */
+ /* This function */
+ /*************************************************************************/
+ PF_SAD_FXN_T pf_err_compute;
+ S32 ai4_sad_grid[9][17], i4_tot_cost;
+ err_prms_t s_err_prms;
+
+ /*************************************************************************/
+ /* Allowed MV RANGE */
+ /*************************************************************************/
+ range_prms_t *ps_range_prms;
+
+ /*************************************************************************/
+ /* stores min id in grid with associated min cost. */
+ /*************************************************************************/
+ S32 i4_min_cost, i4_min_sad;
+ GRID_PT_T e_min_id;
+
+ PF_INTERP_FXN_T pf_qpel_interp;
+ /*************************************************************************/
+ /* For hpel and qpel we move in diamonds and hence each point in the */
+ /* diamond will belong to a completely different plane. To simplify the */
+ /* look up of the ref ptr, we declare a 2x2 array of ref ptrs for the */
+ /* hpel planes which are interpolated during recon. */
+ /*************************************************************************/
+ U08 *apu1_hpel_ref[4], *pu1_ref;
+
+ interp_prms_t s_interp_prms;
+
+ /*************************************************************************/
+ /* Maintains the minimum id of interpolated buffers, and the pointer that*/
+ /* points to the corresponding predicted buf with its stride. */
+ /* Note that the pointer cannot be derived just from the id, since the */
+ /* pointer may also point to the hpel buffer (in case we request interp */
+ /* of a hpel pt, which already exists in the recon hpel planes) */
+ /*************************************************************************/
+ U08 *pu1_final_out;
+ S32 i4_final_out_stride;
+ S32 part_id;
+ S32 check_for_duplicate = 0;
+
+ S32 mvx_qpel;
+ S32 mvy_qpel;
+
+ /*************************************************************************/
+ /* Appropriate Err compute fxn, depends on SAD/SATD, blk size and remains*/
+ /* fixed through this subpel refinement for this partition. */
+ /* Note, we do not enable grid sads since each pt is different buffers. */
+ /* Hence, part mask is also nearly dont care and we use 2Nx2N enabled. */
+ /*************************************************************************/
+ if(ps_prms->i4_use_satd)
+ {
+ pf_err_compute = hme_evalsatd_update_1_best_result_pt_pu_16x16;
+ }
+ else
+ {
+ pf_err_compute = hme_evalsad_grid_pu_16x16; /* hme_evalsad_pt_pu_16x16; */
+ }
+
+ i4_num_hpel_refine = ps_prms->i4_num_steps_hpel_refine;
+ i4_num_qpel_refine = ps_prms->i4_num_steps_qpel_refine;
+
+ /* Prediction contet should now deal with qpel units */
+ HME_SET_MVPRED_RES(ps_pred_ctxt, MV_RES_QPEL);
+
+ /* Buffer allocation for subpel */
+ /* Current design is that there may be many partitions and different mvs */
+ /* that attempt subpel refinemnt. While there is possibility of overlap, the */
+ /* hashing to detect and avoid overlap may be very complex. So, currently, */
+ /* the only thing done is to store the eventual predicted buffer with every */
+ /* ctb node that holds the result of hte best subpel search */
+
+ /* Compute the base pointer for input, interpolated buffers */
+ /* The base pointers point as follows:
+ /* fx fy : 0, 0 :: fx, hy : 0, 0.5, hx, fy: 0.5, 0, hx, fy: 0.5, 0.5 */
+ /* To these, we need to add the offset of the current node */
+ i4_ref_stride = ps_curr_layer->i4_rec_stride;
+ i4_offset = x_off + (y_off * i4_ref_stride);
+ i1_ref_idx = ps_search_node->i1_ref_idx;
+
+ apu1_hpel_ref[0] = ps_curr_layer->ppu1_list_rec_fxfy[i1_ref_idx] + i4_offset;
+ apu1_hpel_ref[1] = ps_curr_layer->ppu1_list_rec_hxfy[i1_ref_idx] + i4_offset;
+ apu1_hpel_ref[2] = ps_curr_layer->ppu1_list_rec_fxhy[i1_ref_idx] + i4_offset;
+ apu1_hpel_ref[3] = ps_curr_layer->ppu1_list_rec_hxhy[i1_ref_idx] + i4_offset;
+
+ /* Initialize result params used for partition update */
+ s_result_prms.pf_mv_cost_compute = NULL;
+ s_result_prms.ps_search_results = ps_search_results;
+ s_result_prms.pi4_valid_part_ids = pi4_valid_part_ids;
+ s_result_prms.i1_ref_idx = search_idx;
+ s_result_prms.i4_part_mask = i4_part_mask;
+ s_result_prms.ps_search_node_base = ps_search_node;
+ s_result_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
+ s_result_prms.i4_grid_mask = 1;
+
+ /* convert to hpel units */
+ i4_mv_x = ps_search_node->s_mv.i2_mvx >> 1;
+ i4_mv_y = ps_search_node->s_mv.i2_mvy >> 1;
+
+ /* for first pt, we compute at all locations in the grid, 4 + 1 centre */
+ ps_range_prms = ps_prms->ps_mv_range_qpel;
+ i4_grid_mask = (GRID_ALL_PTS_VALID);
+ i4_grid_mask &= hme_clamp_grid_by_mvrange(ps_search_node, 2, ps_range_prms);
+
+ i4_min_cost = MAX_32BIT_VAL;
+ i4_min_sad = MAX_32BIT_VAL;
+
+ /*************************************************************************/
+ /* Prepare the input params to SAD/SATD function. Note that input is */
+ /* passed from the calling funcion since it may be I (normal subpel */
+ /* refinement) or 2I - P0 in case of bidirect subpel refinement. */
+ /* Both cases are handled here. */
+ /*************************************************************************/
+ s_err_prms.pu1_inp = (U08 *)ps_prms->pv_inp;
+ s_err_prms.i4_inp_stride = ps_prms->i4_inp_stride;
+ s_err_prms.i4_ref_stride = i4_ref_stride;
+ s_err_prms.i4_part_mask = (ENABLE_2Nx2N);
+ s_err_prms.i4_grid_mask = 1;
+ s_err_prms.pi4_sad_grid = &ai4_sad_grid[0][0];
+ s_err_prms.i4_blk_wd = i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
+ s_err_prms.i4_blk_ht = i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
+
+ /* TODO: Currently doubling lambda for Hadamard Sad instead of 1.9*sadlambda */
+ //ps_pred_ctxt->lambda <<= 1;
+ part_id = ps_search_node->u1_part_id;
+ for(i4_i = 0; i4_i < i4_num_hpel_refine; i4_i++)
+ {
+ e_min_id = PT_C;
+
+ mvx_qpel = i4_mv_x << 1;
+ mvy_qpel = i4_mv_y << 1;
+
+ /* Central pt */
+ if(i4_grid_mask & BIT_EN(PT_C))
+ {
+ //ps_search_node->i2_mv_x = (S16)i4_mv_x;
+ //ps_search_node->i2_mv_x = (S16)i4_mv_y;
+ /* central pt is i4_mv_x, i4_mv_y */
+ CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(
+ ps_dedup_enabler, 1, mvx_qpel, mvy_qpel, check_for_duplicate);
+
+ i4_frac_x = i4_mv_x & 1;
+ i4_frac_y = i4_mv_y & 1;
+ pu1_ref = apu1_hpel_ref[i4_frac_y * 2 + i4_frac_x];
+ s_err_prms.pu1_ref = pu1_ref + (i4_mv_x >> 1) + ((i4_mv_y >> 1) * i4_ref_stride);
+ pf_err_compute(&s_err_prms);
+ /* Update the mv's with the current candt motion vectors */
+ s_result_prms.i2_mv_x = mvx_qpel;
+ s_result_prms.i2_mv_y = mvy_qpel;
+ hme_update_results_pt_pu_best1_subpel_hs(&s_result_prms);
+ i4_tot_cost = s_err_prms.pi4_sad_grid[part_id];
+ if(i4_tot_cost < i4_min_cost)
+ {
+ i4_min_cost = i4_tot_cost;
+ i4_min_sad = s_err_prms.pi4_sad_grid[part_id];
+ e_min_id = PT_C;
+ pu1_final_out = s_err_prms.pu1_ref;
+ }
+ }
+
+ /* left pt */
+ if(i4_grid_mask & BIT_EN(PT_L))
+ {
+ CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(
+ ps_dedup_enabler, 1, mvx_qpel - 2, mvy_qpel, check_for_duplicate);
+
+ if(!check_for_duplicate)
+ {
+ /* search node mv is stored in qpel units */
+ ps_search_node->s_mv.i2_mvx = (S16)((i4_mv_x - 1) << 1);
+ ps_search_node->s_mv.i2_mvy = (S16)(i4_mv_y << 1);
+ /* central pt is i4_mv_x - 1, i4_mv_y */
+ i4_frac_x = (i4_mv_x - 1) & 1; // same as (x-1)&1
+ i4_frac_y = i4_mv_y & 1;
+ pu1_ref = apu1_hpel_ref[i4_frac_y * 2 + i4_frac_x];
+ s_err_prms.pu1_ref =
+ pu1_ref + ((i4_mv_x - 1) >> 1) + ((i4_mv_y >> 1) * i4_ref_stride);
+
+ pf_err_compute(&s_err_prms);
+ /* Update the mv's with the current candt motion vectors */
+ s_result_prms.i2_mv_x = mvx_qpel;
+ s_result_prms.i2_mv_y = mvy_qpel;
+ hme_update_results_pt_pu_best1_subpel_hs(&s_result_prms);
+
+ i4_tot_cost = s_err_prms.pi4_sad_grid[part_id];
+
+ if(i4_tot_cost < i4_min_cost)
+ {
+ i4_min_cost = i4_tot_cost;
+ i4_min_sad = s_err_prms.pi4_sad_grid[part_id];
+ e_min_id = PT_L;
+ pu1_final_out = s_err_prms.pu1_ref;
+ }
+ }
+ }
+ /* top pt */
+ if(i4_grid_mask & BIT_EN(PT_T))
+ {
+ CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(
+ ps_dedup_enabler, 1, mvx_qpel, mvy_qpel - 2, check_for_duplicate);
+
+ if(!check_for_duplicate)
+ {
+ /* search node mv is stored in qpel units */
+ ps_search_node->s_mv.i2_mvx = (S16)(i4_mv_x << 1);
+ ps_search_node->s_mv.i2_mvy = (S16)((i4_mv_y - 1) << 1);
+ /* top pt is i4_mv_x, i4_mv_y - 1 */
+ i4_frac_x = i4_mv_x & 1;
+ i4_frac_y = (i4_mv_y - 1) & 1;
+ pu1_ref = apu1_hpel_ref[i4_frac_y * 2 + i4_frac_x];
+ s_err_prms.pu1_ref =
+ pu1_ref + (i4_mv_x >> 1) + (((i4_mv_y - 1) >> 1) * i4_ref_stride);
+ pf_err_compute(&s_err_prms);
+ /* Update the mv's with the current candt motion vectors */
+ s_result_prms.i2_mv_x = mvx_qpel;
+ s_result_prms.i2_mv_y = mvy_qpel - 2;
+ hme_update_results_pt_pu_best1_subpel_hs(&s_result_prms);
+
+ i4_tot_cost = s_err_prms.pi4_sad_grid[part_id];
+
+ if(i4_tot_cost < i4_min_cost)
+ {
+ i4_min_cost = i4_tot_cost;
+ i4_min_sad = s_err_prms.pi4_sad_grid[part_id];
+ e_min_id = PT_T;
+ pu1_final_out = s_err_prms.pu1_ref;
+ }
+ }
+ }
+ /* right pt */
+ if(i4_grid_mask & BIT_EN(PT_R))
+ {
+ CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(
+ ps_dedup_enabler, 1, mvx_qpel + 2, mvy_qpel, check_for_duplicate);
+
+ if(!check_for_duplicate)
+ {
+ /* search node mv is stored in qpel units */
+ ps_search_node->s_mv.i2_mvx = (S16)((i4_mv_x + 1) << 1);
+ ps_search_node->s_mv.i2_mvy = (S16)(i4_mv_y << 1);
+ /* right pt is i4_mv_x + 1, i4_mv_y */
+ i4_frac_x = (i4_mv_x + 1) & 1;
+ i4_frac_y = i4_mv_y & 1;
+
+ pu1_ref = apu1_hpel_ref[i4_frac_y * 2 + i4_frac_x];
+ s_err_prms.pu1_ref =
+ pu1_ref + ((i4_mv_x + 1) >> 1) + ((i4_mv_y >> 1) * i4_ref_stride);
+ pf_err_compute(&s_err_prms);
+ /* Update the mv's with the current candt motion vectors */
+ s_result_prms.i2_mv_x = mvx_qpel + 2;
+ s_result_prms.i2_mv_y = mvy_qpel;
+ hme_update_results_pt_pu_best1_subpel_hs(&s_result_prms);
+
+ i4_tot_cost = s_err_prms.pi4_sad_grid[part_id];
+
+ if(i4_tot_cost < i4_min_cost)
+ {
+ i4_min_cost = i4_tot_cost;
+ i4_min_sad = s_err_prms.pi4_sad_grid[part_id];
+ e_min_id = PT_R;
+ pu1_final_out = s_err_prms.pu1_ref;
+ }
+ }
+ }
+ /* bottom pt */
+ if(i4_grid_mask & BIT_EN(PT_B))
+ {
+ CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(
+ ps_dedup_enabler, 1, mvx_qpel, mvy_qpel + 2, check_for_duplicate);
+
+ if(!check_for_duplicate)
+ {
+ /* search node mv is stored in qpel units */
+ ps_search_node->s_mv.i2_mvx = ((S16)i4_mv_x << 1);
+ ps_search_node->s_mv.i2_mvy = ((S16)(i4_mv_y + 1) << 1);
+ i4_frac_x = i4_mv_x & 1;
+ i4_frac_y = (i4_mv_y + 1) & 1;
+ pu1_ref = apu1_hpel_ref[i4_frac_y * 2 + i4_frac_x];
+ s_err_prms.pu1_ref =
+ pu1_ref + (i4_mv_x >> 1) + (((i4_mv_y + 1) >> 1) * i4_ref_stride);
+
+ pf_err_compute(&s_err_prms);
+ /* Update the mv's with the current candt motion vectors */
+ s_result_prms.i2_mv_x = mvx_qpel;
+ s_result_prms.i2_mv_y = mvy_qpel + 2;
+ hme_update_results_pt_pu_best1_subpel_hs(&s_result_prms);
+
+ i4_tot_cost = s_err_prms.pi4_sad_grid[part_id];
+
+ if(i4_tot_cost < i4_min_cost)
+ {
+ i4_min_cost = i4_tot_cost;
+ i4_min_sad = s_err_prms.pi4_sad_grid[part_id];
+ e_min_id = PT_B;
+ pu1_final_out = s_err_prms.pu1_ref;
+ }
+ }
+ }
+ if(e_min_id == PT_C)
+ {
+ if(!i4_i)
+ {
+ /* TL pt */
+ if(i4_grid_mask & BIT_EN(PT_TL))
+ {
+ S32 mvx_minus_1 = (i4_mv_x - 1);
+ S32 mvy_minus_1 = (i4_mv_y - 1);
+
+ CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(
+ ps_dedup_enabler, 1, mvx_qpel - 2, mvy_qpel - 2, check_for_duplicate);
+
+ if(!check_for_duplicate)
+ {
+ /* search node mv is stored in qpel units */
+ ps_search_node->s_mv.i2_mvx = ((S16)mvx_minus_1 << 1);
+ ps_search_node->s_mv.i2_mvy = ((S16)mvy_minus_1 << 1);
+ i4_frac_x = mvx_minus_1 & 1;
+ i4_frac_y = mvy_minus_1 & 1;
+ pu1_ref = apu1_hpel_ref[i4_frac_y * 2 + i4_frac_x];
+ s_err_prms.pu1_ref =
+ pu1_ref + (mvx_minus_1 >> 1) + ((mvy_minus_1 >> 1) * i4_ref_stride);
+
+ pf_err_compute(&s_err_prms);
+ /* Update the mv's with the current candt motion vectors */
+ s_result_prms.i2_mv_x = mvx_qpel - 2;
+ s_result_prms.i2_mv_y = mvy_qpel - 2;
+ hme_update_results_pt_pu_best1_subpel_hs(&s_result_prms);
+
+ i4_tot_cost = s_err_prms.pi4_sad_grid[part_id];
+
+ if(i4_tot_cost < i4_min_cost)
+ {
+ i4_min_cost = i4_tot_cost;
+ i4_min_sad = s_err_prms.pi4_sad_grid[part_id];
+ e_min_id = PT_TL;
+ pu1_final_out = s_err_prms.pu1_ref;
+ }
+ }
+ }
+ /* TR pt */
+ if(i4_grid_mask & BIT_EN(PT_TR))
+ {
+ S32 mvx_plus_1 = (i4_mv_x + 1);
+ S32 mvy_minus_1 = (i4_mv_y - 1);
+
+ CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(
+ ps_dedup_enabler, 1, mvx_qpel + 2, mvy_qpel - 2, check_for_duplicate);
+
+ if(!check_for_duplicate)
+ {
+ /* search node mv is stored in qpel units */
+ ps_search_node->s_mv.i2_mvx = ((S16)mvx_plus_1 << 1);
+ ps_search_node->s_mv.i2_mvy = ((S16)mvy_minus_1 << 1);
+ i4_frac_x = mvx_plus_1 & 1;
+ i4_frac_y = mvy_minus_1 & 1;
+ pu1_ref = apu1_hpel_ref[i4_frac_y * 2 + i4_frac_x];
+ s_err_prms.pu1_ref =
+ pu1_ref + (mvx_plus_1 >> 1) + ((mvy_minus_1 >> 1) * i4_ref_stride);
+
+ pf_err_compute(&s_err_prms);
+ /* Update the mv's with the current candt motion vectors */
+ s_result_prms.i2_mv_x = mvx_qpel + 2;
+ s_result_prms.i2_mv_y = mvy_qpel - 2;
+ hme_update_results_pt_pu_best1_subpel_hs(&s_result_prms);
+
+ i4_tot_cost = s_err_prms.pi4_sad_grid[part_id];
+
+ if(i4_tot_cost < i4_min_cost)
+ {
+ i4_min_cost = i4_tot_cost;
+ i4_min_sad = s_err_prms.pi4_sad_grid[part_id];
+ e_min_id = PT_TR;
+ pu1_final_out = s_err_prms.pu1_ref;
+ }
+ }
+ }
+ /* BL pt */
+ if(i4_grid_mask & BIT_EN(PT_BL))
+ {
+ S32 mvx_minus_1 = (i4_mv_x - 1);
+ S32 mvy_plus_1 = (i4_mv_y + 1);
+
+ CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(
+ ps_dedup_enabler, 1, mvx_qpel - 2, mvy_qpel + 2, check_for_duplicate);
+
+ if(!check_for_duplicate)
+ {
+ /* search node mv is stored in qpel units */
+ ps_search_node->s_mv.i2_mvx = ((S16)mvx_minus_1 << 1);
+ ps_search_node->s_mv.i2_mvy = ((S16)mvy_plus_1 << 1);
+ i4_frac_x = mvx_minus_1 & 1;
+ i4_frac_y = mvy_plus_1 & 1;
+ pu1_ref = apu1_hpel_ref[i4_frac_y * 2 + i4_frac_x];
+ s_err_prms.pu1_ref =
+ pu1_ref + (mvx_minus_1 >> 1) + ((mvy_plus_1 >> 1) * i4_ref_stride);
+
+ pf_err_compute(&s_err_prms);
+ /* Update the mv's with the current candt motion vectors */
+ s_result_prms.i2_mv_x = mvx_qpel - 2;
+ s_result_prms.i2_mv_y = mvy_qpel + 2;
+ hme_update_results_pt_pu_best1_subpel_hs(&s_result_prms);
+
+ i4_tot_cost = s_err_prms.pi4_sad_grid[part_id];
+
+ if(i4_tot_cost < i4_min_cost)
+ {
+ i4_min_cost = i4_tot_cost;
+ i4_min_sad = s_err_prms.pi4_sad_grid[part_id];
+ e_min_id = PT_BL;
+ pu1_final_out = s_err_prms.pu1_ref;
+ }
+ }
+ }
+ /* BR pt */
+ if(i4_grid_mask & BIT_EN(PT_BR))
+ {
+ S32 mvx_plus_1 = (i4_mv_x + 1);
+ S32 mvy_plus_1 = (i4_mv_y + 1);
+ CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(
+ ps_dedup_enabler, 1, mvx_qpel + 2, mvy_qpel + 2, check_for_duplicate);
+
+ if(!check_for_duplicate)
+ {
+ /* search node mv is stored in qpel units */
+ ps_search_node->s_mv.i2_mvx = ((S16)mvx_plus_1 << 1);
+ ps_search_node->s_mv.i2_mvy = ((S16)mvy_plus_1 << 1);
+ i4_frac_x = mvx_plus_1 & 1;
+ i4_frac_y = mvy_plus_1 & 1;
+ pu1_ref = apu1_hpel_ref[i4_frac_y * 2 + i4_frac_x];
+ s_err_prms.pu1_ref =
+ pu1_ref + (mvx_plus_1 >> 1) + ((mvy_plus_1 >> 1) * i4_ref_stride);
+
+ pf_err_compute(&s_err_prms);
+ /* Update the mv's with the current candt motion vectors */
+ s_result_prms.i2_mv_x = mvx_qpel + 2;
+ s_result_prms.i2_mv_y = mvy_qpel + 2;
+ hme_update_results_pt_pu_best1_subpel_hs(&s_result_prms);
+
+ i4_tot_cost = s_err_prms.pi4_sad_grid[part_id];
+
+ if(i4_tot_cost < i4_min_cost)
+ {
+ i4_min_cost = i4_tot_cost;
+ i4_min_sad = s_err_prms.pi4_sad_grid[part_id];
+ e_min_id = PT_BR;
+ pu1_final_out = s_err_prms.pu1_ref;
+ }
+ }
+ }
+ if(e_min_id == PT_C)
+ {
+ break;
+ }
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ /*********************************************************************/
+ /* Depending on the best result location, we may be able to skip */
+ /* atleast two pts, centre pt and one more pt. E.g. if right pt is */
+ /* the best result, the next iteration need not do centre, left pts */
+ /*********************************************************************/
+ if(i4_i)
+ {
+ i4_grid_mask = gai4_opt_grid_mask_diamond[e_min_id];
+ }
+ else
+ {
+ i4_grid_mask = gai4_opt_grid_mask_conventional[e_min_id];
+ }
+ i4_mv_x += gai1_grid_id_to_x[e_min_id];
+ i4_mv_y += gai1_grid_id_to_y[e_min_id];
+ ps_search_node->s_mv.i2_mvx = (S16)(i4_mv_x << 1);
+ ps_search_node->s_mv.i2_mvy = (S16)(i4_mv_y << 1);
+ i4_grid_mask &= hme_clamp_grid_by_mvrange(ps_search_node, 2, ps_range_prms);
+ }
+
+ /* Convert to QPEL units */
+ i4_mv_x <<= 1;
+ i4_mv_y <<= 1;
+
+ ps_search_node->s_mv.i2_mvx = (S16)i4_mv_x;
+ ps_search_node->s_mv.i2_mvy = (S16)i4_mv_y;
+
+ /* Early exit if this partition is visiting same hpel mv again */
+ /* Assumption : Checkin for early exit in best result of partition */
+ if((ps_search_results->aps_part_results[search_idx][part_id][0].i2_best_hpel_mv_x ==
+ ps_search_node->s_mv.i2_mvx) &&
+ (ps_search_results->aps_part_results[search_idx][part_id][0].i2_best_hpel_mv_y ==
+ ps_search_node->s_mv.i2_mvy))
+ {
+ return (ps_search_results->aps_part_results[search_idx][part_id][0].i4_tot_cost);
+ }
+ else
+ {
+ /* Store the best hpel mv for future early exit checks */
+ ps_search_results->aps_part_results[search_idx][part_id][0].i2_best_hpel_mv_x =
+ (S16)i4_mv_x;
+ ps_search_results->aps_part_results[search_idx][part_id][0].i2_best_hpel_mv_y =
+ (S16)i4_mv_y;
+ }
+
+ /* Early exit if this partition is visiting same hpel mv again */
+ /* Assumption : Checkin for early exit in second best result of partition */
+ if((ps_search_results->aps_part_results[search_idx][part_id][1].i2_best_hpel_mv_x ==
+ ps_search_node->s_mv.i2_mvx) &&
+ (ps_search_results->aps_part_results[search_idx][part_id][1].i2_best_hpel_mv_y ==
+ ps_search_node->s_mv.i2_mvy))
+ {
+ return (ps_search_results->aps_part_results[search_idx][part_id][1].i4_tot_cost);
+ }
+ else
+ {
+ /* Store the best hpel mv for future early exit checks */
+ ps_search_results->aps_part_results[search_idx][part_id][1].i2_best_hpel_mv_x =
+ (S16)i4_mv_x;
+ ps_search_results->aps_part_results[search_idx][part_id][1].i2_best_hpel_mv_y =
+ (S16)i4_mv_y;
+ }
+
+ /* Exact interpolation or averaging chosen here */
+ pf_qpel_interp = ps_prms->pf_qpel_interp;
+
+ /* Next QPEL ME */
+ /* In this case, we have option of doing exact QPEL interpolation or avg */
+ /*************************************************************************/
+ /* x */
+ /* A b C d */
+ /* e f g h */
+ /* I j K l */
+ /* m n o p */
+ /* Q r S t */
+ /* */
+ /* Approximate QPEL logic */
+ /* b = avg(A,C) f = avg(I,C), g= avg(C,K) j=avg(I,K) */
+ /* for any given pt, we can get all the information required about */
+ /* the surrounding 4 pts. For example, given point C (0.5, 0) */
+ /* surrounding pts info: */
+ /* b : qpel offset: 1, 0, generated by averaging. buffer1: fpel buf */
+ /* buffer 2: hxfy, offsets for both are 0, 0 */
+ /* similarly for other pts the info can be gotten */
+ /*************************************************************************/
+ i4_grid_mask = GRID_ALL_PTS_VALID ^ (BIT_EN(PT_C));
+ i4_grid_mask &= hme_clamp_grid_by_mvrange(ps_search_node, 1, ps_range_prms);
+
+ /*************************************************************************/
+ /* One time preparation of non changing interpolation params. These */
+ /* include a set of ping pong result buf ptrs, input buf ptrs and some */
+ /* working memory (not used though in case of averaging). */
+ /*************************************************************************/
+ s_interp_prms.ppu1_ref = &apu1_hpel_ref[0];
+ s_interp_prms.i4_ref_stride = i4_ref_stride;
+ s_interp_prms.i4_blk_wd = i4_blk_wd;
+ s_interp_prms.i4_blk_ht = i4_blk_ht;
+
+ i4_final_out_stride = i4_ref_stride;
+
+ {
+ U08 *pu1_mem;
+ /*********************************************************************/
+ /* Allocation of working memory for interpolated buffers. We maintain*/
+ /* an intermediate working buffer, and 2 ping pong interpolated out */
+ /* buffers, purpose of ping pong explained later below */
+ /*********************************************************************/
+ pu1_mem = ps_prms->pu1_wkg_mem;
+ s_interp_prms.pu1_wkg_mem = pu1_mem;
+
+ //pu1_mem += (INTERP_INTERMED_BUF_SIZE);
+ s_interp_prms.apu1_interp_out[0] = pu1_mem;
+
+ pu1_mem += (INTERP_OUT_BUF_SIZE);
+ s_interp_prms.apu1_interp_out[1] = pu1_mem;
+
+ pu1_mem += (INTERP_OUT_BUF_SIZE);
+ s_interp_prms.apu1_interp_out[2] = pu1_mem;
+
+ pu1_mem += (INTERP_OUT_BUF_SIZE);
+ s_interp_prms.apu1_interp_out[3] = pu1_mem;
+
+ pu1_mem += (INTERP_OUT_BUF_SIZE);
+ s_interp_prms.apu1_interp_out[4] = pu1_mem;
+
+ /*********************************************************************/
+ /* Stride of interpolated output is just a function of blk width of */
+ /* this partition and hence remains constant for this partition */
+ /*********************************************************************/
+ s_interp_prms.i4_out_stride = (i4_blk_wd);
+ }
+
+ {
+ UWORD8 *apu1_final[4];
+ WORD32 ai4_ref_stride[4];
+ /*************************************************************************/
+ /* Ping pong design for interpolated buffers. We use a min id, which */
+ /* tracks the id of the ppu1_interp_out that stores the best result. */
+ /* When new interp to be done, it uses 1 - bes result id to do the interp*/
+ /* min id is toggled when any new result becomes the best result. */
+ /*************************************************************************/
+
+ for(i4_i = 0; i4_i < i4_num_qpel_refine; i4_i++)
+ {
+ e_min_id = PT_C;
+
+ hme_qpel_interp_comprehensive(
+ &s_interp_prms, apu1_final, ai4_ref_stride, i4_mv_x, i4_mv_y, i4_grid_mask);
+
+ mvx_qpel = i4_mv_x;
+ mvy_qpel = i4_mv_y;
+
+ if(i4_grid_mask & BIT_EN(PT_L))
+ {
+ CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(
+ ps_dedup_enabler, 1, mvx_qpel - 1, mvy_qpel - 0, check_for_duplicate);
+
+ if(!check_for_duplicate)
+ {
+ ps_search_node->s_mv.i2_mvx = (S16)i4_mv_x - 1;
+ ps_search_node->s_mv.i2_mvy = (S16)i4_mv_y;
+
+ s_err_prms.pu1_ref = apu1_final[0];
+ s_err_prms.i4_ref_stride = ai4_ref_stride[0];
+
+ pf_err_compute(&s_err_prms);
+ /* Update the mv's with the current candt motion vectors */
+ s_result_prms.i2_mv_x = mvx_qpel - 1;
+ s_result_prms.i2_mv_y = mvy_qpel;
+ hme_update_results_pt_pu_best1_subpel_hs(&s_result_prms);
+
+ i4_tot_cost = s_err_prms.pi4_sad_grid[part_id];
+ if(i4_tot_cost < i4_min_cost)
+ {
+ e_min_id = PT_L;
+ i4_min_cost = i4_tot_cost;
+ i4_min_sad = s_err_prms.pi4_sad_grid[part_id];
+ }
+ }
+ }
+ if(i4_grid_mask & BIT_EN(PT_T))
+ {
+ CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(
+ ps_dedup_enabler, 1, mvx_qpel - 0, mvy_qpel - 1, check_for_duplicate);
+
+ if(!check_for_duplicate)
+ {
+ ps_search_node->s_mv.i2_mvx = (S16)i4_mv_x;
+ ps_search_node->s_mv.i2_mvy = (S16)i4_mv_y - 1;
+
+ s_err_prms.pu1_ref = apu1_final[1];
+ s_err_prms.i4_ref_stride = ai4_ref_stride[1];
+
+ pf_err_compute(&s_err_prms);
+ /* Update the mv's with the current candt motion vectors */
+ s_result_prms.i2_mv_x = mvx_qpel;
+ s_result_prms.i2_mv_y = mvy_qpel - 1;
+ hme_update_results_pt_pu_best1_subpel_hs(&s_result_prms);
+ i4_tot_cost = s_err_prms.pi4_sad_grid[part_id];
+ if(i4_tot_cost < i4_min_cost)
+ {
+ e_min_id = PT_T;
+ i4_min_cost = i4_tot_cost;
+ i4_min_sad = s_err_prms.pi4_sad_grid[part_id];
+ }
+ }
+ }
+ if(i4_grid_mask & BIT_EN(PT_R))
+ {
+ CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(
+ ps_dedup_enabler, 1, mvx_qpel + 1, mvy_qpel, check_for_duplicate);
+
+ if(!check_for_duplicate)
+ {
+ ps_search_node->s_mv.i2_mvx = (S16)i4_mv_x + 1;
+ ps_search_node->s_mv.i2_mvy = (S16)i4_mv_y;
+
+ s_err_prms.pu1_ref = apu1_final[2];
+ s_err_prms.i4_ref_stride = ai4_ref_stride[2];
+
+ pf_err_compute(&s_err_prms);
+ /* Update the mv's with the current candt motion vectors */
+ s_result_prms.i2_mv_x = mvx_qpel + 1;
+ s_result_prms.i2_mv_y = mvy_qpel;
+ hme_update_results_pt_pu_best1_subpel_hs(&s_result_prms);
+
+ i4_tot_cost = s_err_prms.pi4_sad_grid[part_id];
+ if(i4_tot_cost < i4_min_cost)
+ {
+ e_min_id = PT_R;
+ i4_min_cost = i4_tot_cost;
+ i4_min_sad = s_err_prms.pi4_sad_grid[part_id];
+ }
+ }
+ }
+ /* i4_mv_x and i4_mv_y will always be the centre pt */
+ /* for qpel we start with least hpel, and hence compute of center pt never reqd */
+ if(i4_grid_mask & BIT_EN(PT_B))
+ {
+ CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(
+ ps_dedup_enabler, 1, mvx_qpel, mvy_qpel + 1, check_for_duplicate);
+
+ if(!check_for_duplicate)
+ {
+ ps_search_node->s_mv.i2_mvx = (S16)i4_mv_x;
+ ps_search_node->s_mv.i2_mvy = (S16)i4_mv_y + 1;
+
+ s_err_prms.pu1_ref = apu1_final[3];
+ s_err_prms.i4_ref_stride = ai4_ref_stride[3];
+
+ pf_err_compute(&s_err_prms);
+ /* Update the mv's with the current candt motion vectors */
+ s_result_prms.i2_mv_x = mvx_qpel;
+ s_result_prms.i2_mv_y = mvy_qpel + 1;
+ hme_update_results_pt_pu_best1_subpel_hs(&s_result_prms);
+ i4_tot_cost = s_err_prms.pi4_sad_grid[part_id];
+ if(i4_tot_cost < i4_min_cost)
+ {
+ e_min_id = PT_B;
+ i4_min_cost = i4_tot_cost;
+ i4_min_sad = s_err_prms.pi4_sad_grid[part_id];
+ }
+ }
+ }
+
+ if(e_min_id == PT_C)
+ {
+ if(!i4_i)
+ {
+ S32 i4_interp_buf_id = 0;
+
+ if(i4_grid_mask & BIT_EN(PT_TL))
+ {
+ CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(
+ ps_dedup_enabler, 1, mvx_qpel - 1, mvy_qpel - 1, check_for_duplicate);
+
+ if(!check_for_duplicate)
+ {
+ ps_search_node->s_mv.i2_mvx = (S16)i4_mv_x - 1;
+ ps_search_node->s_mv.i2_mvy = (S16)i4_mv_y - 1;
+
+ /* Carry out the interpolation */
+ pf_qpel_interp(
+ &s_interp_prms, i4_mv_x - 1, i4_mv_y - 1, i4_interp_buf_id);
+
+ s_err_prms.pu1_ref = s_interp_prms.pu1_final_out;
+ s_err_prms.i4_ref_stride = s_interp_prms.i4_final_out_stride;
+
+ pf_err_compute(&s_err_prms);
+ /* Update the mv's with the current candt motion vectors */
+ s_result_prms.i2_mv_x = mvx_qpel - 1;
+ s_result_prms.i2_mv_y = mvy_qpel - 1;
+ hme_update_results_pt_pu_best1_subpel_hs(&s_result_prms);
+
+ i4_tot_cost = s_err_prms.pi4_sad_grid[part_id];
+
+ if(i4_tot_cost < i4_min_cost)
+ {
+ e_min_id = PT_TL;
+ i4_min_cost = i4_tot_cost;
+ i4_min_sad = s_err_prms.pi4_sad_grid[part_id];
+ }
+ }
+ }
+ if(i4_grid_mask & BIT_EN(PT_TR))
+ {
+ CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(
+ ps_dedup_enabler, 1, mvx_qpel + 1, mvy_qpel - 1, check_for_duplicate);
+
+ if(!check_for_duplicate)
+ {
+ ps_search_node->s_mv.i2_mvx = (S16)i4_mv_x + 1;
+ ps_search_node->s_mv.i2_mvy = (S16)i4_mv_y - 1;
+
+ /* Carry out the interpolation */
+ pf_qpel_interp(
+ &s_interp_prms, i4_mv_x + 1, i4_mv_y - 1, i4_interp_buf_id);
+
+ s_err_prms.pu1_ref = s_interp_prms.pu1_final_out;
+ s_err_prms.i4_ref_stride = s_interp_prms.i4_final_out_stride;
+
+ pf_err_compute(&s_err_prms);
+ /* Update the mv's with the current candt motion vectors */
+ s_result_prms.i2_mv_x = mvx_qpel + 1;
+ s_result_prms.i2_mv_y = mvy_qpel - 1;
+ hme_update_results_pt_pu_best1_subpel_hs(&s_result_prms);
+
+ i4_tot_cost = s_err_prms.pi4_sad_grid[part_id];
+
+ if(i4_tot_cost < i4_min_cost)
+ {
+ e_min_id = PT_TR;
+ i4_min_cost = i4_tot_cost;
+ i4_min_sad = s_err_prms.pi4_sad_grid[part_id];
+ }
+ }
+ }
+ if(i4_grid_mask & BIT_EN(PT_BL))
+ {
+ CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(
+ ps_dedup_enabler, 1, mvx_qpel - 1, mvy_qpel + 1, check_for_duplicate);
+
+ if(!check_for_duplicate)
+ {
+ ps_search_node->s_mv.i2_mvx = (S16)i4_mv_x - 1;
+ ps_search_node->s_mv.i2_mvy = (S16)i4_mv_y + 1;
+
+ /* Carry out the interpolation */
+ pf_qpel_interp(
+ &s_interp_prms, i4_mv_x - 1, i4_mv_y + 1, i4_interp_buf_id);
+
+ s_err_prms.pu1_ref = s_interp_prms.pu1_final_out;
+ s_err_prms.i4_ref_stride = s_interp_prms.i4_final_out_stride;
+
+ pf_err_compute(&s_err_prms);
+ /* Update the mv's with the current candt motion vectors */
+ s_result_prms.i2_mv_x = mvx_qpel - 1;
+ s_result_prms.i2_mv_y = mvy_qpel + 1;
+ hme_update_results_pt_pu_best1_subpel_hs(&s_result_prms);
+
+ i4_tot_cost = s_err_prms.pi4_sad_grid[part_id];
+
+ if(i4_tot_cost < i4_min_cost)
+ {
+ e_min_id = PT_BL;
+ i4_min_cost = i4_tot_cost;
+ i4_min_sad = s_err_prms.pi4_sad_grid[part_id];
+ }
+ }
+ }
+ /* i4_mv_x and i4_mv_y will always be the centre pt */
+ /* for qpel we start with least hpel, and hence compute of center pt never reqd */
+ if(i4_grid_mask & BIT_EN(PT_BR))
+ {
+ CHECK_FOR_DUPES_AND_INSERT_UNIQUE_NODES(
+ ps_dedup_enabler, 1, mvx_qpel + 1, mvy_qpel + 1, check_for_duplicate);
+
+ if(!check_for_duplicate)
+ {
+ ps_search_node->s_mv.i2_mvx = (S16)i4_mv_x + 1;
+ ps_search_node->s_mv.i2_mvy = (S16)i4_mv_y + 1;
+
+ /* Carry out the interpolation */
+ pf_qpel_interp(
+ &s_interp_prms, i4_mv_x + 1, i4_mv_y + 1, i4_interp_buf_id);
+
+ s_err_prms.pu1_ref = s_interp_prms.pu1_final_out;
+ s_err_prms.i4_ref_stride = s_interp_prms.i4_final_out_stride;
+
+ pf_err_compute(&s_err_prms);
+ /* Update the mv's with the current candt motion vectors */
+ s_result_prms.i2_mv_x = mvx_qpel + 1;
+ s_result_prms.i2_mv_y = mvy_qpel + 1;
+ hme_update_results_pt_pu_best1_subpel_hs(&s_result_prms);
+
+ i4_tot_cost = s_err_prms.pi4_sad_grid[part_id];
+
+ if(i4_tot_cost < i4_min_cost)
+ {
+ e_min_id = PT_BR;
+ i4_min_cost = i4_tot_cost;
+ i4_min_sad = s_err_prms.pi4_sad_grid[part_id];
+ }
+ }
+ }
+ if(e_min_id == PT_C)
+ {
+ break;
+ }
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ if(i4_i)
+ {
+ i4_grid_mask = gai4_opt_grid_mask_diamond[e_min_id];
+ }
+ else
+ {
+ i4_grid_mask = gai4_opt_grid_mask_conventional[e_min_id];
+ }
+ i4_mv_x += gai1_grid_id_to_x[e_min_id];
+ i4_mv_y += gai1_grid_id_to_y[e_min_id];
+ ps_search_node->s_mv.i2_mvx = (S16)i4_mv_x;
+ ps_search_node->s_mv.i2_mvy = (S16)i4_mv_y;
+ i4_grid_mask &= hme_clamp_grid_by_mvrange(ps_search_node, 1, ps_range_prms);
+ }
+ }
+
+ /* update modified motion vectors and cost at end of subpel */
+ ps_search_node->s_mv.i2_mvx = (S16)i4_mv_x;
+ ps_search_node->s_mv.i2_mvy = (S16)i4_mv_y;
+ ps_search_node->i4_tot_cost = i4_min_cost;
+ ps_search_node->i4_sad = i4_min_sad;
+
+ /********************************************************************************/
+ /* TODO: Restoring back Sad lambda from Hadamard lambda */
+ /* Need to pass the had/satd lambda in more cleaner way for subpel cost compute */
+ /********************************************************************************/
+ //ps_pred_ctxt->lambda >>= 1;
+
+ return (i4_min_cost);
+}
+#endif
+
+static void hme_subpel_refine_struct_to_search_results_struct_converter(
+ subpel_refine_ctxt_t *ps_subpel_refine_ctxt,
+ search_results_t *ps_search_results,
+ U08 u1_pred_dir,
+ ME_QUALITY_PRESETS_T e_quality_preset)
+{
+ U08 i;
+
+ U08 u1_num_results_per_part = ps_search_results->u1_num_results_per_part;
+
+ for(i = 0; i < ps_subpel_refine_ctxt->i4_num_valid_parts; i++)
+ {
+ S32 index;
+ S32 i4_sad;
+
+ S32 part_id = ps_subpel_refine_ctxt->ai4_part_id[i];
+
+ search_node_t *ps_best_node = ps_search_results->aps_part_results[u1_pred_dir][part_id];
+
+ if(ps_subpel_refine_ctxt->i4_num_valid_parts > 8)
+ {
+ index = part_id;
+ }
+ else
+ {
+ index = i;
+ }
+
+ if(!ps_best_node->u1_subpel_done)
+ {
+ i4_sad = ps_subpel_refine_ctxt->i2_tot_cost[0][index] -
+ ps_subpel_refine_ctxt->i2_mv_cost[0][index];
+ ps_best_node[0].i4_sdi = 0;
+ ASSERT((e_quality_preset == ME_PRISTINE_QUALITY) ? (ps_best_node[0].i4_sdi >= 0) : 1);
+ ps_best_node[0].i4_tot_cost = ps_subpel_refine_ctxt->i2_tot_cost[0][index];
+
+ if(ps_subpel_refine_ctxt->i2_tot_cost[0][index] == MAX_SIGNED_16BIT_VAL)
+ {
+ i4_sad = MAX_SIGNED_16BIT_VAL;
+ }
+
+ ps_best_node[0].i4_sad = i4_sad;
+ ps_best_node[0].i4_mv_cost = ps_subpel_refine_ctxt->i2_mv_cost[0][index];
+ ps_best_node[0].s_mv.i2_mvx = ps_subpel_refine_ctxt->i2_mv_x[0][index];
+ ps_best_node[0].s_mv.i2_mvy = ps_subpel_refine_ctxt->i2_mv_y[0][index];
+ ps_best_node[0].i1_ref_idx = (WORD8)ps_subpel_refine_ctxt->i2_ref_idx[0][index];
+ ps_best_node->u1_subpel_done = 1;
+
+ if(2 == u1_num_results_per_part)
+ {
+ i4_sad = ps_subpel_refine_ctxt->i2_tot_cost[1][index] -
+ ps_subpel_refine_ctxt->i2_mv_cost[1][index];
+ ps_best_node[1].i4_sdi = 0;
+ ps_best_node[1].i4_tot_cost = ps_subpel_refine_ctxt->i2_tot_cost[1][index];
+
+ if(ps_subpel_refine_ctxt->i2_tot_cost[1][index] == MAX_SIGNED_16BIT_VAL)
+ {
+ i4_sad = MAX_SIGNED_16BIT_VAL;
+ }
+
+ ps_best_node[1].i4_sad = i4_sad;
+ ps_best_node[1].i4_mv_cost = ps_subpel_refine_ctxt->i2_mv_cost[1][index];
+ ps_best_node[1].s_mv.i2_mvx = ps_subpel_refine_ctxt->i2_mv_x[1][index];
+ ps_best_node[1].s_mv.i2_mvy = ps_subpel_refine_ctxt->i2_mv_y[1][index];
+ ps_best_node[1].i1_ref_idx = (WORD8)ps_subpel_refine_ctxt->i2_ref_idx[1][index];
+ ps_best_node[1].u1_subpel_done = 1;
+ }
+ }
+ else if(
+ (2 == u1_num_results_per_part) &&
+ (ps_subpel_refine_ctxt->i2_tot_cost[0][index] < ps_best_node[1].i4_tot_cost))
+ {
+ if(ps_subpel_refine_ctxt->i2_tot_cost[1][index] < ps_best_node[0].i4_tot_cost)
+ {
+ i4_sad = ps_subpel_refine_ctxt->i2_tot_cost[0][index] -
+ ps_subpel_refine_ctxt->i2_mv_cost[0][index];
+ ps_best_node[0].i4_sdi = 0;
+ ps_best_node[0].i4_tot_cost = ps_subpel_refine_ctxt->i2_tot_cost[0][index];
+
+ if(ps_subpel_refine_ctxt->i2_tot_cost[0][index] == MAX_SIGNED_16BIT_VAL)
+ {
+ i4_sad = MAX_SIGNED_16BIT_VAL;
+ }
+
+ ps_best_node[0].i4_sad = i4_sad;
+ ps_best_node[0].i4_mv_cost = ps_subpel_refine_ctxt->i2_mv_cost[0][index];
+ ps_best_node[0].s_mv.i2_mvx = ps_subpel_refine_ctxt->i2_mv_x[0][index];
+ ps_best_node[0].s_mv.i2_mvy = ps_subpel_refine_ctxt->i2_mv_y[0][index];
+ ps_best_node[0].i1_ref_idx = (S08)ps_subpel_refine_ctxt->i2_ref_idx[0][index];
+
+ i4_sad = ps_subpel_refine_ctxt->i2_tot_cost[1][index] -
+ ps_subpel_refine_ctxt->i2_mv_cost[1][index];
+ ps_best_node[1].i4_sdi = 0;
+ ps_best_node[1].i4_tot_cost = ps_subpel_refine_ctxt->i2_tot_cost[1][index];
+
+ if(ps_subpel_refine_ctxt->i2_tot_cost[1][index] == MAX_SIGNED_16BIT_VAL)
+ {
+ i4_sad = MAX_SIGNED_16BIT_VAL;
+ }
+
+ ps_best_node[1].i4_sad = i4_sad;
+ ps_best_node[1].i4_mv_cost = ps_subpel_refine_ctxt->i2_mv_cost[1][index];
+ ps_best_node[1].s_mv.i2_mvx = ps_subpel_refine_ctxt->i2_mv_x[1][index];
+ ps_best_node[1].s_mv.i2_mvy = ps_subpel_refine_ctxt->i2_mv_y[1][index];
+ ps_best_node[1].i1_ref_idx = (S08)ps_subpel_refine_ctxt->i2_ref_idx[1][index];
+ }
+ else if(ps_subpel_refine_ctxt->i2_tot_cost[1][index] > ps_best_node[0].i4_tot_cost)
+ {
+ if(ps_subpel_refine_ctxt->i2_tot_cost[0][index] >= ps_best_node[0].i4_tot_cost)
+ {
+ i4_sad = ps_subpel_refine_ctxt->i2_tot_cost[0][index] -
+ ps_subpel_refine_ctxt->i2_mv_cost[0][index];
+ ps_best_node[1].i4_sdi = 0;
+ ps_best_node[1].i4_tot_cost = ps_subpel_refine_ctxt->i2_tot_cost[0][index];
+
+ if(ps_subpel_refine_ctxt->i2_tot_cost[0][index] == MAX_SIGNED_16BIT_VAL)
+ {
+ i4_sad = MAX_SIGNED_16BIT_VAL;
+ }
+
+ ps_best_node[1].i4_sad = i4_sad;
+ ps_best_node[1].i4_mv_cost = ps_subpel_refine_ctxt->i2_mv_cost[0][index];
+ ps_best_node[1].s_mv.i2_mvx = ps_subpel_refine_ctxt->i2_mv_x[0][index];
+ ps_best_node[1].s_mv.i2_mvy = ps_subpel_refine_ctxt->i2_mv_y[0][index];
+ ps_best_node[1].i1_ref_idx = (S08)ps_subpel_refine_ctxt->i2_ref_idx[0][index];
+ }
+ else if(ps_subpel_refine_ctxt->i2_tot_cost[0][index] < ps_best_node[0].i4_tot_cost)
+ {
+ memmove(&ps_best_node[1], &ps_best_node[0], sizeof(search_node_t));
+
+ i4_sad = ps_subpel_refine_ctxt->i2_tot_cost[0][index] -
+ ps_subpel_refine_ctxt->i2_mv_cost[0][index];
+ ps_best_node[0].i4_sdi = 0;
+ ps_best_node[0].i4_tot_cost = ps_subpel_refine_ctxt->i2_tot_cost[0][index];
+
+ if(ps_subpel_refine_ctxt->i2_tot_cost[0][index] == MAX_SIGNED_16BIT_VAL)
+ {
+ i4_sad = MAX_SIGNED_16BIT_VAL;
+ }
+
+ ps_best_node[0].i4_sad = i4_sad;
+ ps_best_node[0].i4_mv_cost = ps_subpel_refine_ctxt->i2_mv_cost[0][index];
+ ps_best_node[0].s_mv.i2_mvx = ps_subpel_refine_ctxt->i2_mv_x[0][index];
+ ps_best_node[0].s_mv.i2_mvy = ps_subpel_refine_ctxt->i2_mv_y[0][index];
+ ps_best_node[0].i1_ref_idx = (S08)ps_subpel_refine_ctxt->i2_ref_idx[0][index];
+ }
+ }
+ }
+ else if(
+ (1 == u1_num_results_per_part) &&
+ (ps_subpel_refine_ctxt->i2_tot_cost[0][index] < ps_best_node[0].i4_tot_cost))
+ {
+ i4_sad = ps_subpel_refine_ctxt->i2_tot_cost[0][index] -
+ ps_subpel_refine_ctxt->i2_mv_cost[0][index];
+ ps_best_node[0].i4_sdi = 0;
+ ps_best_node[0].i4_tot_cost = ps_subpel_refine_ctxt->i2_tot_cost[0][index];
+
+ if(ps_subpel_refine_ctxt->i2_tot_cost[0][index] == MAX_SIGNED_16BIT_VAL)
+ {
+ i4_sad = MAX_SIGNED_16BIT_VAL;
+ }
+
+ ps_best_node[0].i4_sad = i4_sad;
+ ps_best_node[0].i4_mv_cost = ps_subpel_refine_ctxt->i2_mv_cost[0][index];
+ ps_best_node[0].s_mv.i2_mvx = ps_subpel_refine_ctxt->i2_mv_x[0][index];
+ ps_best_node[0].s_mv.i2_mvy = ps_subpel_refine_ctxt->i2_mv_y[0][index];
+ ps_best_node[0].i1_ref_idx = (S08)ps_subpel_refine_ctxt->i2_ref_idx[0][index];
+ }
+ }
+}
+
+/**
+********************************************************************************
+* @fn S32 hme_subpel_refine_cu_hs
+*
+* @brief Evaluates the best subpel mvs for active partitions of an MB in L0
+* layer for the high speed preset. Recursive hadamard SATD / SAD
+* and mv cost is used for 2NxN and NxN partitions with active partition
+* update
+*
+* @param[in] ps_prms: subpel prms input to this function
+*
+* @param[in] ps_curr_layer: points to the current layer ctxt
+*
+* @param[out] ps_search_results: points to the search resutls that get updated
+* with best results
+*
+* @param[in] search_idx: ref id of the frame for which results get updated
+*
+* @param[in] ps_wt_inp_prms: current frame input params
+*
+* @return None
+********************************************************************************
+*/
+void hme_subpel_refine_cu_hs(
+ hme_subpel_prms_t *ps_prms,
+ layer_ctxt_t *ps_curr_layer,
+ search_results_t *ps_search_results,
+ S32 search_idx,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ WORD32 blk_8x8_mask,
+ me_func_selector_t *ps_func_selector,
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
+ ihevce_me_optimised_function_list_t *ps_me_optimised_function_list)
+{
+ /* Unique search node list for 2nx2n and nxn partitions */
+ search_node_t as_nodes_2nx2n[MAX_RESULTS_PER_PART * 5];
+ subpel_dedup_enabler_t as_subpel_dedup_enabler[MAX_NUM_REF];
+ search_node_t *ps_search_node;
+
+ S32 i, i4_part_mask, j;
+ S32 i4_sad_grid;
+ S32 max_subpel_cand;
+ WORD32 index;
+ S32 num_unique_nodes_2nx2n;
+ S32 part_id;
+ S32 x_off, y_off;
+ S32 i4_inp_off;
+
+ CU_SIZE_T e_cu_size;
+ BLK_SIZE_T e_blk_size;
+
+ subpel_refine_ctxt_t *ps_subpel_refine_ctxt = ps_prms->ps_subpel_refine_ctxt;
+
+ S32 i4_use_satd = ps_prms->i4_use_satd;
+ S32 i4_num_act_refs = ps_prms->i4_num_act_ref_l0 + ps_prms->i4_num_act_ref_l1;
+
+ ASSERT(ps_search_results->u1_num_results_per_part <= MAX_RESULTS_PER_PART);
+
+ if(!DISABLE_SUBPEL_REFINEMENT_WHEN_SRC_IS_NOISY || !ps_prms->u1_is_cu_noisy)
+ {
+ e_cu_size = ps_search_results->e_cu_size;
+ i4_part_mask = ps_search_results->i4_part_mask;
+
+ ps_prms->i4_inp_type = sizeof(U08);
+
+ num_unique_nodes_2nx2n = 0;
+
+ for(i = 0; i < i4_num_act_refs; i++)
+ {
+ as_subpel_dedup_enabler[i].u1_ref_idx = MAX_NUM_REF;
+ }
+
+ /************************************************************************/
+ /* */
+ /* Initialize SATD cost for each valid partition id.one time before */
+ /* doing full pel time. This is because of the following reasons: */
+ /* 1. Full pel cost was done in SAD while subpel is in SATD mode */
+ /* 2. Partitions like AMP, Nx2N and 2NxN are refined on the fly while */
+ /* doing Diamond search for 2Nx2N and NxN. This partitions are */
+ /* not explicitly refine in high speed mode */
+ /* */
+ /************************************************************************/
+ for(i = 0; i < ps_subpel_refine_ctxt->i4_num_valid_parts; i++)
+ {
+ S32 enable_subpel = 0;
+ S32 part_type;
+
+ /* Derive the x and y offsets of this part id */
+ part_id = ps_subpel_refine_ctxt->ai4_part_id[i];
+ if(ps_subpel_refine_ctxt->i4_num_valid_parts > 8)
+ {
+ index = part_id;
+ }
+ else
+ {
+ index = i;
+ }
+
+ part_type = ge_part_id_to_part_type[part_id];
+ x_off = gas_part_attr_in_cu[part_id].u1_x_start << e_cu_size;
+ y_off = gas_part_attr_in_cu[part_id].u1_y_start << e_cu_size;
+ x_off += ps_search_results->u1_x_off;
+ y_off += ps_search_results->u1_y_off;
+ i4_inp_off = x_off + y_off * ps_prms->i4_inp_stride;
+ e_blk_size = ge_part_id_to_blk_size[e_cu_size][part_id];
+
+ x_off += ps_prms->i4_ctb_x_off;
+ y_off += ps_prms->i4_ctb_y_off;
+
+ max_subpel_cand = 0;
+
+ /* Choose the minimum number of candidates to be used for Sub pel refinement */
+ if(PART_ID_2Nx2N == part_type)
+ {
+ max_subpel_cand =
+ MIN(ps_prms->u1_max_subpel_candts_2Nx2N,
+ ps_search_results->u1_num_results_per_part);
+ }
+ else if(PRT_NxN == part_type)
+ {
+ max_subpel_cand = MIN(
+ ps_prms->u1_max_subpel_candts_NxN, ps_search_results->u1_num_results_per_part);
+ }
+
+ /* If incomplete CTB, NxN num candidates should be forced to min 1 */
+ if((0 == max_subpel_cand) && (blk_8x8_mask != 15))
+ {
+ max_subpel_cand = 1;
+ }
+
+ if((PART_ID_2Nx2N == part_type) || (PRT_NxN == part_type))
+ {
+ enable_subpel = 1;
+ }
+
+ /* Compute full pel SATD for each result per partition before subpel */
+ /* refinement starts. */
+ /* Also prepare unique candidate list for 2Nx2N and NxN partitions */
+ for(j = 0; j < ps_search_results->u1_num_results_per_part; j++)
+ {
+ err_prms_t s_err_prms;
+ S32 i4_satd = 0;
+ S32 i1_ref_idx;
+ U08 *pu1_ref_base;
+ S32 i4_ref_stride = ps_curr_layer->i4_rec_stride;
+ S32 i4_mv_x, i4_mv_y;
+
+ ps_search_node = ps_search_results->aps_part_results[search_idx][part_id] + j;
+
+ if(ps_subpel_refine_ctxt->i2_mv_x[j][index] == INTRA_MV)
+ {
+ ps_search_node->u1_subpel_done = 1;
+ continue;
+ }
+
+ i1_ref_idx = ps_subpel_refine_ctxt->i2_ref_idx[j][index];
+ ps_prms->pv_inp = (void *)(ps_wt_inp_prms->apu1_wt_inp[i1_ref_idx] + i4_inp_off);
+ pu1_ref_base = ps_curr_layer->ppu1_list_rec_fxfy[i1_ref_idx];
+
+ i4_mv_x = ps_subpel_refine_ctxt->i2_mv_x[j][index];
+ i4_mv_y = ps_subpel_refine_ctxt->i2_mv_y[j][index];
+
+ if(i4_use_satd)
+ {
+ s_err_prms.pu1_inp = (U08 *)ps_prms->pv_inp;
+ s_err_prms.i4_inp_stride = ps_prms->i4_inp_stride;
+ s_err_prms.pu1_ref = pu1_ref_base + x_off + (y_off * i4_ref_stride) + i4_mv_x +
+ (i4_mv_y * i4_ref_stride);
+
+ s_err_prms.i4_ref_stride = i4_ref_stride;
+ s_err_prms.i4_part_mask = (ENABLE_2Nx2N);
+ s_err_prms.i4_grid_mask = 1;
+ s_err_prms.pi4_sad_grid = &i4_sad_grid;
+ s_err_prms.i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
+ s_err_prms.i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
+
+ s_err_prms.ps_cmn_utils_optimised_function_list =
+ ps_cmn_utils_optimised_function_list;
+
+ compute_satd_8bit(&s_err_prms);
+
+ i4_satd = s_err_prms.pi4_sad_grid[0];
+
+ ps_subpel_refine_ctxt->i2_tot_cost[j][index] =
+ CLIP_S16(ps_subpel_refine_ctxt->i2_mv_cost[j][index] + i4_satd);
+ ps_subpel_refine_ctxt->ai2_fullpel_satd[j][index] = i4_satd;
+ }
+
+ /* Sub-pel candidate filtration */
+ if(j)
+ {
+ S16 i2_best_sad;
+ S32 i4_best_mvx;
+ S32 i4_best_mvy;
+
+ search_node_t *ps_node =
+ ps_search_results->aps_part_results[search_idx][part_id];
+
+ U08 u1_is_subpel_done = ps_node->u1_subpel_done;
+ S16 i2_curr_sad = ps_subpel_refine_ctxt->ai2_fullpel_satd[j][index];
+ S32 i4_curr_mvx = i4_mv_x << 2;
+ S32 i4_curr_mvy = i4_mv_y << 2;
+
+ if(u1_is_subpel_done)
+ {
+ i2_best_sad = ps_node->i4_sad;
+
+ if(ps_node->i1_ref_idx == i1_ref_idx)
+ {
+ i4_best_mvx = ps_node->s_mv.i2_mvx;
+ i4_best_mvy = ps_node->s_mv.i2_mvy;
+ }
+ else if(i1_ref_idx == ps_subpel_refine_ctxt->i2_ref_idx[0][index])
+ {
+ i4_best_mvx = ps_subpel_refine_ctxt->i2_mv_x[0][index];
+ i4_best_mvy = ps_subpel_refine_ctxt->i2_mv_y[0][index];
+ }
+ else
+ {
+ i4_best_mvx = INTRA_MV;
+ i4_best_mvy = INTRA_MV;
+ }
+ }
+ else
+ {
+ i2_best_sad = ps_subpel_refine_ctxt->i2_tot_cost[0][index] -
+ ps_subpel_refine_ctxt->i2_mv_cost[0][index];
+
+ if(i1_ref_idx == ps_subpel_refine_ctxt->i2_ref_idx[0][index])
+ {
+ i4_best_mvx = ps_subpel_refine_ctxt->i2_mv_x[0][index];
+ i4_best_mvy = ps_subpel_refine_ctxt->i2_mv_y[0][index];
+ }
+ else
+ {
+ i4_best_mvx = INTRA_MV;
+ i4_best_mvy = INTRA_MV;
+ }
+ }
+
+ i2_best_sad += (i2_best_sad >> ps_prms->u1_subpel_candt_threshold);
+
+ if(((ABS(i4_curr_mvx - i4_best_mvx) < 2) &&
+ (ABS(i4_curr_mvy - i4_best_mvy) < 2)) ||
+ (i2_curr_sad > i2_best_sad))
+ {
+ enable_subpel = 0;
+ }
+ }
+
+ ps_search_node->u1_part_id = part_id;
+
+ /* Convert mvs in part results from FPEL to QPEL units */
+ ps_subpel_refine_ctxt->i2_mv_x[j][index] <<= 2;
+ ps_subpel_refine_ctxt->i2_mv_y[j][index] <<= 2;
+
+ /* If the candidate number is more than the number of candts
+ set initally, do not add those candts for refinement */
+ if(j >= max_subpel_cand)
+ {
+ enable_subpel = 0;
+ }
+
+ if(enable_subpel)
+ {
+ if(num_unique_nodes_2nx2n == 0)
+ {
+ S32 i4_index = ps_subpel_refine_ctxt->i2_ref_idx[j][index];
+
+ as_subpel_dedup_enabler[i4_index].i2_mv_x =
+ ps_subpel_refine_ctxt->i2_mv_x[j][index];
+ as_subpel_dedup_enabler[i4_index].i2_mv_y =
+ ps_subpel_refine_ctxt->i2_mv_y[j][index];
+ as_subpel_dedup_enabler[i4_index].u1_ref_idx =
+ (U08)ps_subpel_refine_ctxt->i2_ref_idx[j][index];
+ memset(
+ as_subpel_dedup_enabler[i4_index].au4_node_map,
+ 0,
+ sizeof(U32) * 2 * MAP_X_MAX);
+ }
+ INSERT_NEW_NODE_NOMAP_ALTERNATE(
+ as_nodes_2nx2n, num_unique_nodes_2nx2n, ps_subpel_refine_ctxt, j, i);
+ }
+ }
+
+ /*********************************************************************************************/
+ /* If sad_1 < sad_2, then satd_1 need not be lesser than satd_2. Therefore, after conversion */
+ /* to satd, tot_cost_1 may not be lesser than tot_cost_2. So we need to sort the search nodes*/
+ /* for each partition again, based on the new costs */
+ /*********************************************************************************************/
+ /*********************************************************************************************/
+ /* Because right now, we store only the two best candidates for each partition, the sort will*/
+ /* converge to a simple swap. */
+ /* ASSUMPTION : We store only two best results per partition */
+ /*********************************************************************************************/
+ if(ps_search_results->u1_num_results_per_part == 2)
+ {
+ if(ps_subpel_refine_ctxt->i2_tot_cost[0][index] >
+ ps_subpel_refine_ctxt->i2_tot_cost[1][index])
+ {
+ SWAP(
+ ps_subpel_refine_ctxt->i2_tot_cost[0][index],
+ ps_subpel_refine_ctxt->i2_tot_cost[1][index]);
+
+ SWAP(
+ ps_subpel_refine_ctxt->i2_mv_cost[0][index],
+ ps_subpel_refine_ctxt->i2_mv_cost[1][index]);
+
+ SWAP(
+ ps_subpel_refine_ctxt->i2_mv_x[0][index],
+ ps_subpel_refine_ctxt->i2_mv_x[1][index]);
+
+ SWAP(
+ ps_subpel_refine_ctxt->i2_mv_y[0][index],
+ ps_subpel_refine_ctxt->i2_mv_y[1][index]);
+
+ SWAP(
+ ps_subpel_refine_ctxt->i2_ref_idx[0][index],
+ ps_subpel_refine_ctxt->i2_ref_idx[1][index]);
+
+ SWAP(
+ ps_subpel_refine_ctxt->ai2_fullpel_satd[0][index],
+ ps_subpel_refine_ctxt->ai2_fullpel_satd[1][index]);
+ }
+ }
+ }
+
+ if(blk_8x8_mask == 0xf)
+ {
+ num_unique_nodes_2nx2n =
+ MIN(num_unique_nodes_2nx2n, ps_prms->u1_max_num_subpel_refine_centers);
+ }
+ {
+ x_off = gas_part_attr_in_cu[0].u1_x_start << e_cu_size;
+ y_off = gas_part_attr_in_cu[0].u1_y_start << e_cu_size;
+ x_off += ps_search_results->u1_x_off;
+ y_off += ps_search_results->u1_y_off;
+ i4_inp_off = x_off + y_off * ps_prms->i4_inp_stride;
+ e_blk_size = ge_part_id_to_blk_size[e_cu_size][0];
+
+ for(j = 0; j < num_unique_nodes_2nx2n; j++)
+ {
+ S32 pred_lx;
+ ps_search_node = &as_nodes_2nx2n[j];
+
+ if(ps_search_node->s_mv.i2_mvx == INTRA_MV)
+ {
+ continue;
+ }
+
+ {
+ S08 i1_ref_idx = ps_search_node->i1_ref_idx;
+ subpel_dedup_enabler_t *ps_dedup_enabler =
+ &(as_subpel_dedup_enabler[i1_ref_idx]);
+
+ if(ps_dedup_enabler->u1_ref_idx == MAX_NUM_REF)
+ {
+ as_subpel_dedup_enabler[i1_ref_idx].i2_mv_x = ps_search_node->s_mv.i2_mvx;
+ as_subpel_dedup_enabler[i1_ref_idx].i2_mv_y = ps_search_node->s_mv.i2_mvy;
+ as_subpel_dedup_enabler[i1_ref_idx].u1_ref_idx = i1_ref_idx;
+ memset(
+ as_subpel_dedup_enabler[i1_ref_idx].au4_node_map,
+ 0,
+ sizeof(U32) * 2 * MAP_X_MAX);
+ }
+ }
+
+ pred_lx = search_idx;
+ ps_prms->pv_inp =
+ (void *)(ps_wt_inp_prms->apu1_wt_inp[ps_search_node->i1_ref_idx] + i4_inp_off);
+
+ hme_subpel_refine_search_node_high_speed(
+ ps_search_node,
+ ps_prms,
+ ps_curr_layer,
+ e_blk_size,
+ x_off + ps_prms->i4_ctb_x_off,
+ y_off + ps_prms->i4_ctb_y_off,
+ ps_search_results,
+ pred_lx,
+ i4_part_mask,
+ &ps_subpel_refine_ctxt->ai4_part_id[0],
+ search_idx,
+ &(as_subpel_dedup_enabler[ps_search_node->i1_ref_idx]),
+ ps_func_selector,
+ ps_me_optimised_function_list);
+ }
+ }
+ }
+ else
+ {
+ for(i = 0; i < ps_subpel_refine_ctxt->i4_num_valid_parts; i++)
+ {
+ S32 i4_index;
+
+ S32 i4_part_id = ps_subpel_refine_ctxt->ai4_part_id[i];
+
+ if(ps_subpel_refine_ctxt->i4_num_valid_parts > 8)
+ {
+ i4_index = i4_part_id;
+ }
+ else
+ {
+ i4_index = i;
+ }
+
+ for(j = 0; j < ps_search_results->u1_num_results_per_part; j++)
+ {
+ ps_subpel_refine_ctxt->i2_mv_x[j][i4_index] <<= 2;
+ ps_subpel_refine_ctxt->i2_mv_y[j][i4_index] <<= 2;
+ }
+ }
+ }
+
+ hme_subpel_refine_struct_to_search_results_struct_converter(
+ ps_subpel_refine_ctxt, ps_search_results, search_idx, ps_prms->e_me_quality_presets);
+}
diff --git a/encoder/hme_subpel.h b/encoder/hme_subpel.h
new file mode 100644
index 0000000..44fcc47
--- /dev/null
+++ b/encoder/hme_subpel.h
@@ -0,0 +1,56 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file hme_subpel.h
+*
+* \brief
+* Prototypes for subpel modules called from refinement layer
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _HME_SUBPEL_H_
+#define _HME_SUBPEL_H_
+
+#include "ihevce_me_instr_set_router.h"
+
+/*****************************************************************************/
+/* Functions */
+/*****************************************************************************/
+
+void hme_subpel_refine_cu_hs(
+ hme_subpel_prms_t *ps_prms,
+ layer_ctxt_t *ps_curr_layer,
+ search_results_t *ps_search_results,
+ S32 search_idx,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ WORD32 blk_8x8_mask,
+ me_func_selector_t *ps_func_selector,
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
+ ihevce_me_optimised_function_list_t *ps_me_optimised_function_list);
+
+#endif /* #ifndef _HME_SUBPEL_H_ */
diff --git a/encoder/hme_utils.c b/encoder/hme_utils.c
new file mode 100644
index 0000000..d3a06bf
--- /dev/null
+++ b/encoder/hme_utils.c
@@ -0,0 +1,5707 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+#include <limits.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_inter_pred.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_dep_mngr_interface.h"
+#include "hme_datatype.h"
+#include "hme_interface.h"
+#include "hme_common_defs.h"
+#include "hme_defs.h"
+#include "ihevce_me_instr_set_router.h"
+#include "hme_globals.h"
+#include "hme_utils.h"
+#include "hme_coarse.h"
+#include "hme_fullpel.h"
+#include "hme_subpel.h"
+#include "hme_refine.h"
+#include "hme_err_compute.h"
+#include "hme_common_utils.h"
+#include "hme_search_algo.h"
+#include "ihevce_stasino_helpers.h"
+#include "ihevce_common_utils.h"
+
+/*****************************************************************************/
+/* Macros */
+/*****************************************************************************/
+#define UNI_SATD_SCALE 1
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+void ihevce_open_loop_pred_data(
+ me_frm_ctxt_t *ps_ctxt,
+ inter_pu_results_t *ps_pu_results,
+ U08 *pu1_src,
+ U08 *pu1_temp_pred,
+ S32 stride,
+ S32 src_strd,
+ UWORD8 e_part_id)
+{
+ S32 best_sad_l0 = -1, best_sad_l1 = -1;
+ S32 sad_diff, status;
+ inter_pred_me_ctxt_t *ps_inter_pred_me_ctxt;
+ U08 enable_bi = 0;
+ pu_t s_pu;
+
+ ps_inter_pred_me_ctxt = &ps_ctxt->s_mc_ctxt;
+ ps_ctxt->i4_count++;
+ /* L0*/
+ if(ps_pu_results->u1_num_results_per_part_l0[e_part_id])
+ {
+ pu_result_t *ps_best_l0_pu;
+ ps_best_l0_pu = ps_pu_results->aps_pu_results[0][PRT_2Nx2N];
+ best_sad_l0 = ps_best_l0_pu->i4_tot_cost - ps_best_l0_pu->i4_mv_cost;
+ s_pu.b2_pred_mode = PRED_L0;
+ s_pu.b4_ht = ps_best_l0_pu->pu.b4_ht;
+ s_pu.b4_wd = ps_best_l0_pu->pu.b4_wd;
+ s_pu.b4_pos_x = ps_best_l0_pu->pu.b4_pos_x;
+ s_pu.b4_pos_y = ps_best_l0_pu->pu.b4_pos_y;
+ s_pu.b1_intra_flag = 0;
+ s_pu.mv.s_l0_mv.i2_mvx = ps_best_l0_pu->pu.mv.s_l0_mv.i2_mvx;
+ s_pu.mv.s_l0_mv.i2_mvy = ps_best_l0_pu->pu.mv.s_l0_mv.i2_mvy;
+ s_pu.mv.i1_l0_ref_idx = ps_best_l0_pu->pu.mv.i1_l0_ref_idx;
+ }
+ /*L1*/
+ if(ps_pu_results->u1_num_results_per_part_l1[e_part_id])
+ {
+ pu_result_t *ps_best_l1_pu;
+ ps_best_l1_pu = ps_pu_results->aps_pu_results[1][PRT_2Nx2N];
+ best_sad_l1 = ps_best_l1_pu->i4_tot_cost - ps_best_l1_pu->i4_mv_cost;
+ s_pu.b2_pred_mode = PRED_L1;
+ s_pu.b4_ht = ps_best_l1_pu->pu.b4_ht;
+ s_pu.b4_wd = ps_best_l1_pu->pu.b4_wd;
+ s_pu.b4_pos_x = ps_best_l1_pu->pu.b4_pos_x;
+ s_pu.b4_pos_y = ps_best_l1_pu->pu.b4_pos_y;
+ s_pu.b1_intra_flag = 0;
+ s_pu.mv.s_l1_mv.i2_mvx = ps_best_l1_pu->pu.mv.s_l1_mv.i2_mvx;
+ s_pu.mv.s_l1_mv.i2_mvy = ps_best_l1_pu->pu.mv.s_l1_mv.i2_mvy;
+ s_pu.mv.i1_l1_ref_idx = ps_best_l1_pu->pu.mv.i1_l1_ref_idx;
+ }
+ ASSERT((best_sad_l0 != -1) || (best_sad_l1 != -1));
+ /*bi selection*/
+ if((best_sad_l0 != -1) && (best_sad_l1 != -1))
+ {
+ sad_diff = abs(best_sad_l0 - best_sad_l1);
+ if((sad_diff < (best_sad_l0 * 0.15)) && (sad_diff < (best_sad_l1 * 0.15)))
+ {
+ enable_bi = 1;
+ s_pu.b2_pred_mode = PRED_BI;
+ }
+ if(!enable_bi)
+ {
+ if(best_sad_l0 < best_sad_l1)
+ {
+ s_pu.b2_pred_mode = PRED_L0;
+ }
+ else
+ {
+ s_pu.b2_pred_mode = PRED_L1;
+ }
+ }
+ }
+ status = ihevce_luma_inter_pred_pu(ps_inter_pred_me_ctxt, &s_pu, pu1_temp_pred, stride, 1);
+ if(status == -1)
+ {
+ ASSERT(0);
+ }
+}
+
+/**
+********************************************************************************
+* @fn void *hme_get_wkg_mem(buf_mgr_t *ps_buf_mgr, S32 i4_size)
+*
+* @brief Allocates a block of size = i4_size from working memory and returns
+*
+* @param[in,out] ps_buf_mgr: Buffer manager for wkg memory
+*
+* @param[in] i4_size : size required
+*
+* @return void pointer to allocated memory, NULL if failure
+********************************************************************************
+*/
+void *hme_get_wkg_mem(buf_mgr_t *ps_buf_mgr, S32 i4_size)
+{
+ U08 *pu1_mem;
+
+ if(ps_buf_mgr->i4_used + i4_size > ps_buf_mgr->i4_total)
+ return NULL;
+
+ pu1_mem = ps_buf_mgr->pu1_wkg_mem + ps_buf_mgr->i4_used;
+ ps_buf_mgr->i4_used += i4_size;
+
+ return ((void *)pu1_mem);
+}
+
+/**
+********************************************************************************
+* @fn hme_init_histogram(
+*
+* @brief Top level entry point for Coarse ME. Runs across blocks and does the
+* needful by calling other low level routines.
+*
+* @param[in,out] ps_hist : the histogram structure
+*
+* @param[in] i4_max_mv_x : Maximum mv allowed in x direction (fpel units)
+*
+* @param[in] i4_max_mv_y : Maximum mv allowed in y direction (fpel units)
+*
+* @return None
+********************************************************************************
+*/
+
+void hme_init_histogram(mv_hist_t *ps_hist, S32 i4_max_mv_x, S32 i4_max_mv_y)
+{
+ S32 i4_num_bins, i4_num_cols, i4_num_rows;
+ S32 i4_shift_x, i4_shift_y, i, i4_range, i4_val;
+
+ /*************************************************************************/
+ /* Evaluate the shift_x and shift_y. For this, we use the following logic*/
+ /* Assuming that we use up all MAX_NUM_BINS. Then the number of bins is */
+ /* given by formula ((max_mv_x * 2) >> shift_x)*((max_mv_y * 2)>>shift_y)*/
+ /* or shift_x + shift_y is log ((max_mv_x * max_mv_y * 4) / MAX_NUM_BINS)*/
+ /* if above quantity is negative, then we make it zero. */
+ /* If result is odd, then shift_y is result >> 1, shift_x is shift_y + 1 */
+ /*************************************************************************/
+ i4_val = i4_max_mv_x * i4_max_mv_y * 4;
+ i4_range = (hme_get_range(i4_val - 1)) + 1;
+ if(i4_range > LOG_MAX_NUM_BINS)
+ {
+ i4_shift_y = (i4_range - LOG_MAX_NUM_BINS);
+ i4_shift_x = (i4_shift_y + 1) >> 1;
+ i4_shift_y >>= 1;
+ }
+ else
+ {
+ i4_shift_y = 0;
+ i4_shift_x = 0;
+ }
+
+ /* we assume the mv range is -max_mv_x to +max_mv_x, ditto for y */
+ /* So number of columns is 2*max_mv_x >> i4_shift_x. Ditto for rows */
+ /* this helps us compute num bins that are active for this histo session */
+ i4_num_cols = (i4_max_mv_x << 1) >> i4_shift_x;
+ i4_num_rows = (i4_max_mv_y << 1) >> i4_shift_y;
+ i4_num_bins = i4_num_rows * i4_num_cols;
+
+ ASSERT(i4_num_bins <= MAX_NUM_BINS);
+
+ ps_hist->i4_num_rows = i4_num_rows;
+ ps_hist->i4_num_cols = i4_num_cols;
+ ps_hist->i4_min_x = -i4_max_mv_x;
+ ps_hist->i4_min_y = -i4_max_mv_y;
+ ps_hist->i4_shift_x = i4_shift_x;
+ ps_hist->i4_shift_y = i4_shift_y;
+ ps_hist->i4_lobe1_size = 5;
+ ps_hist->i4_lobe2_size = 3;
+
+ ps_hist->i4_num_bins = i4_num_bins;
+
+ for(i = 0; i < i4_num_bins; i++)
+ {
+ ps_hist->ai4_bin_count[i] = 0;
+ }
+}
+
+/**
+********************************************************************************
+* @fn hme_update_histogram(
+*
+* @brief Updates the histogram given an mv entry
+*
+* @param[in,out] ps_hist : the histogram structure
+*
+* @param[in] i4_mv_x : x component of the mv (fpel units)
+*
+* @param[in] i4_mv_y : y component of the mv (fpel units)
+*
+* @return None
+********************************************************************************
+*/
+void hme_update_histogram(mv_hist_t *ps_hist, S32 i4_mv_x, S32 i4_mv_y)
+{
+ S32 i4_bin_index, i4_col, i4_row;
+
+ i4_col = (i4_mv_x - ps_hist->i4_min_x) >> ps_hist->i4_shift_x;
+ i4_row = (i4_mv_y - ps_hist->i4_min_y) >> ps_hist->i4_shift_y;
+
+ i4_bin_index = i4_col + (i4_row * ps_hist->i4_num_cols);
+ /* Sanity Check */
+ ASSERT(i4_bin_index < MAX_NUM_BINS);
+
+ ps_hist->ai4_bin_count[i4_bin_index]++;
+}
+
+/**
+********************************************************************************
+* @fn hme_get_global_mv(
+*
+* @brief returns the global mv of a previous picture. Accounts for the fact
+* that the delta poc of the previous picture may have been different
+* from delta poc of current picture. Delta poc is POC difference
+* between a picture and its reference.
+*
+* @param[out] ps_mv: mv_t structure where the motion vector is returned
+*
+* @param[in] i4_delta_poc: the delta poc for the current pic w.r.t. reference
+*
+* @return None
+********************************************************************************
+*/
+void hme_get_global_mv(layer_ctxt_t *ps_prev_layer, hme_mv_t *ps_mv, S32 i4_delta_poc)
+{
+ S16 i2_mv_x, i2_mv_y;
+ S32 i4_delta_poc_prev;
+ S32 i4_poc_prev = ps_prev_layer->i4_poc;
+ S32 i4_poc_prev_ref = ps_prev_layer->ai4_ref_id_to_poc_lc[0];
+
+ i4_delta_poc_prev = i4_poc_prev - i4_poc_prev_ref;
+ i2_mv_x = ps_prev_layer->s_global_mv[0][GMV_THICK_LOBE].i2_mv_x;
+ i2_mv_y = ps_prev_layer->s_global_mv[0][GMV_THICK_LOBE].i2_mv_y;
+
+ i2_mv_x = (S16)((i2_mv_x * i4_delta_poc) / i4_delta_poc_prev);
+ i2_mv_y = (S16)((i2_mv_y * i4_delta_poc) / i4_delta_poc_prev);
+
+ ps_mv->i2_mv_x = i2_mv_x;
+ ps_mv->i2_mv_y = i2_mv_y;
+}
+
+/**
+********************************************************************************
+* @fn hme_calculate_global_mv(
+*
+* @brief Calculates global mv for a given histogram
+*
+* @param[in] ps_hist : the histogram structure
+*
+* @param[in] ps_mv : used to return the global mv
+*
+* @param[in] e_lobe_type : refer to GMV_MVTYPE_T
+*
+* @return None
+********************************************************************************
+*/
+void hme_calculate_global_mv(mv_hist_t *ps_hist, hme_mv_t *ps_mv, GMV_MVTYPE_T e_lobe_type)
+{
+ S32 i4_offset, i4_lobe_size, i4_y, i4_x, *pi4_bin_count;
+ S32 i4_max_sum = -1;
+ S32 i4_max_x = 0, i4_max_y = 0;
+
+ if(e_lobe_type == GMV_THICK_LOBE)
+ i4_lobe_size = ps_hist->i4_lobe1_size;
+ else
+ i4_lobe_size = ps_hist->i4_lobe2_size;
+
+ i4_offset = i4_lobe_size >> 1;
+ for(i4_y = i4_offset; i4_y < ps_hist->i4_num_rows - i4_offset; i4_y++)
+ {
+ for(i4_x = i4_offset; i4_x < ps_hist->i4_num_cols - i4_offset; i4_x++)
+ {
+ S32 i4_bin_id, i4_sum;
+ i4_bin_id = (i4_x - 2) + ((i4_y - 2) * ps_hist->i4_num_cols);
+
+ pi4_bin_count = &ps_hist->ai4_bin_count[i4_bin_id];
+ i4_sum = hme_compute_2d_sum_unsigned(
+ (void *)pi4_bin_count,
+ i4_lobe_size,
+ i4_lobe_size,
+ ps_hist->i4_num_cols,
+ sizeof(U32));
+
+ if(i4_sum > i4_max_sum)
+ {
+ i4_max_x = i4_x;
+ i4_max_y = i4_y;
+ i4_max_sum = i4_sum;
+ }
+ }
+ }
+
+ ps_mv->i2_mv_y = (S16)((i4_max_y << ps_hist->i4_shift_y) + ps_hist->i4_min_y);
+ ps_mv->i2_mv_x = (S16)((i4_max_x << ps_hist->i4_shift_x) + ps_hist->i4_min_x);
+}
+
+/**
+********************************************************************************
+* @fn ctb_node_t *hme_get_ctb_node(ctb_mem_mgr_t *ps_mem_mgr)
+*
+* @brief returns a new ctb node usable for creating a new ctb candidate
+*
+* @param[in] ps_mem_mgr : memory manager holding all ctb nodes
+*
+* @return NULL if no free nodes, else ptr to the new ctb node
+********************************************************************************
+*/
+ctb_node_t *hme_get_ctb_node(ctb_mem_mgr_t *ps_mem_mgr)
+{
+ U08 *pu1_ret;
+ if((ps_mem_mgr->i4_used + ps_mem_mgr->i4_size) > ps_mem_mgr->i4_tot)
+ return (NULL);
+ pu1_ret = ps_mem_mgr->pu1_mem + ps_mem_mgr->i4_used;
+ ps_mem_mgr->i4_used += ps_mem_mgr->i4_size;
+ return ((ctb_node_t *)pu1_ret);
+}
+
+/**
+********************************************************************************
+* @fn hme_map_mvs_to_grid(mv_grid_t **pps_mv_grid,
+search_results_t *ps_search_results, S32 i4_num_ref)
+*
+* @brief For a given CU whose results are in ps_search_results, the 17x17
+* mv grid is updated for future use within the CTB
+*
+* @param[in] ps_search_results : Search results data structure
+*
+* @param[out] pps_mv_grid: The mv grid (as many as num ref)
+*
+* @param[in] i4_num_ref: nuber of search iterations to update
+*
+* @return None
+********************************************************************************
+*/
+void hme_map_mvs_to_grid(
+ mv_grid_t **pps_mv_grid,
+ search_results_t *ps_search_results,
+ U08 *pu1_pred_dir_searched,
+ S32 i4_num_pred_dir)
+{
+ S32 i4_cu_start_offset;
+ /*************************************************************************/
+ /* Start x, y offset of CU relative to CTB. To update the mv grid which */
+ /* stores 1 mv per 4x4, we convert pixel offset to 4x4 blk offset */
+ /*************************************************************************/
+ S32 i4_cu_offset_x = (S32)ps_search_results->u1_x_off >> 2;
+ S32 i4_cu_offset_y = (S32)ps_search_results->u1_y_off >> 2;
+
+ /* Controls the attribute of a given partition within CU */
+ /* , i.e. start locn, size */
+ part_attr_t *ps_part_attr;
+
+ S32 i4_part, i4_part_id, num_parts, i4_stride;
+ S16 i2_mv_x, i2_mv_y;
+ S08 i1_ref_idx;
+
+ /* Per partition, attributes w.r.t. CU start */
+ S32 x_start, y_start, x_end, y_end, i4_x, i4_y;
+ PART_TYPE_T e_part_type;
+
+ /* Points to exact mv structures within the grid to be udpated */
+ search_node_t *ps_grid_node, *ps_grid_node_tmp;
+
+ /* points to exact mv grid (based on search iteration) to be updated */
+ mv_grid_t *ps_mv_grid;
+
+ search_node_t *ps_search_node;
+
+ S32 shift, i, mv_shift = 2;
+ /* Proportional to the size of CU, controls the number of 4x4 blks */
+ /* to be updated */
+ shift = ps_search_results->e_cu_size;
+ ASSERT(i4_num_pred_dir <= 2);
+
+ e_part_type = (PART_TYPE_T)ps_search_results->ps_cu_results->ps_best_results[0].u1_part_type;
+
+ if((ps_search_results->e_cu_size == CU_16x16) && (ps_search_results->u1_split_flag) &&
+ (ps_search_results->i4_part_mask & ENABLE_NxN))
+ {
+ e_part_type = PRT_NxN;
+ }
+
+ for(i = 0; i < i4_num_pred_dir; i++)
+ {
+ num_parts = gau1_num_parts_in_part_type[e_part_type];
+ ps_mv_grid = pps_mv_grid[pu1_pred_dir_searched[i]];
+ i4_stride = ps_mv_grid->i4_stride;
+
+ i4_cu_start_offset =
+ i4_cu_offset_x + i4_cu_offset_y * i4_stride + ps_mv_grid->i4_start_offset;
+
+ /* Move to the appropriate 2d locn of CU start within Grid */
+ ps_grid_node = &ps_mv_grid->as_node[i4_cu_start_offset];
+
+ for(i4_part = 0; i4_part < num_parts; i4_part++)
+ {
+ i4_part_id = ge_part_type_to_part_id[e_part_type][i4_part];
+
+ /* Pick the mvx and y and ref id corresponding to this partition */
+ ps_search_node =
+ ps_search_results->aps_part_results[pu1_pred_dir_searched[i]][i4_part_id];
+
+ i2_mv_x = ps_search_node->s_mv.i2_mvx;
+ i2_mv_y = ps_search_node->s_mv.i2_mvy;
+ i1_ref_idx = ps_search_node->i1_ref_idx;
+
+ /* Move to the appropriate location within the CU */
+ ps_part_attr = &gas_part_attr_in_cu[i4_part_id];
+ x_start = ps_part_attr->u1_x_start;
+ x_end = x_start + ps_part_attr->u1_x_count;
+ y_start = ps_part_attr->u1_y_start;
+ y_end = y_start + ps_part_attr->u1_y_count;
+
+ /* Convert attributes from 8x8 CU size to given CU size */
+ x_start = (x_start << shift) >> mv_shift;
+ x_end = (x_end << shift) >> mv_shift;
+ y_start = (y_start << shift) >> mv_shift;
+ y_end = (y_end << shift) >> mv_shift;
+
+ ps_grid_node_tmp = ps_grid_node + y_start * i4_stride;
+
+ /* Update all 4x4 blk mvs with the part mv */
+ /* For e.g. we update 4 units in case of NxN for 16x16 CU */
+ for(i4_y = y_start; i4_y < y_end; i4_y++)
+ {
+ for(i4_x = x_start; i4_x < x_end; i4_x++)
+ {
+ ps_grid_node_tmp[i4_x].s_mv.i2_mvx = i2_mv_x;
+ ps_grid_node_tmp[i4_x].s_mv.i2_mvy = i2_mv_y;
+ ps_grid_node_tmp[i4_x].i1_ref_idx = i1_ref_idx;
+ ps_grid_node_tmp[i4_x].u1_subpel_done = 1;
+ }
+ ps_grid_node_tmp += i4_stride;
+ }
+ }
+ }
+}
+
+void hme_set_ctb_pred_attr(ctb_node_t *ps_parent, U08 *pu1_pred0, U08 *pu1_pred1, S32 i4_stride)
+{
+ ps_parent->apu1_pred[0] = pu1_pred0;
+ ps_parent->apu1_pred[1] = pu1_pred1;
+ ps_parent->i4_pred_stride = i4_stride;
+ if(ps_parent->ps_tl != NULL)
+ {
+ S32 blk_wd = (S32)ps_parent->ps_tr->u1_x_off;
+ blk_wd -= (S32)ps_parent->u1_x_off;
+
+ hme_set_ctb_pred_attr(ps_parent->ps_tl, pu1_pred0, pu1_pred1, i4_stride >> 1);
+
+ hme_set_ctb_pred_attr(
+ ps_parent->ps_tr, pu1_pred0 + blk_wd, pu1_pred1 + blk_wd, i4_stride >> 1);
+
+ hme_set_ctb_pred_attr(
+ ps_parent->ps_bl,
+ pu1_pred0 + (blk_wd * i4_stride),
+ pu1_pred1 + (blk_wd * i4_stride),
+ i4_stride >> 1);
+
+ hme_set_ctb_pred_attr(
+ ps_parent->ps_tr,
+ pu1_pred0 + (blk_wd * (1 + i4_stride)),
+ pu1_pred1 + (blk_wd * (1 + i4_stride)),
+ i4_stride >> 1);
+ }
+}
+
+/**
+********************************************************************************
+* @fn hme_create_valid_part_ids(S32 i4_part_mask, S32 *pi4_valid_part_ids)
+*
+* @brief Expands the part mask to a list of valid part ids terminated by -1
+*
+* @param[in] i4_part_mask : bit mask of active partitino ids
+*
+* @param[out] pi4_valid_part_ids : array, each entry has one valid part id
+* Terminated by -1 to signal end.
+*
+* @return number of partitions
+********************************************************************************
+*/
+S32 hme_create_valid_part_ids(S32 i4_part_mask, S32 *pi4_valid_part_ids)
+{
+ S32 id = 0, i;
+ for(i = 0; i < TOT_NUM_PARTS; i++)
+ {
+ if(i4_part_mask & (1 << i))
+ {
+ pi4_valid_part_ids[id] = i;
+ id++;
+ }
+ }
+ pi4_valid_part_ids[id] = -1;
+
+ return id;
+}
+
+ctb_boundary_attrs_t *
+ get_ctb_attrs(S32 ctb_start_x, S32 ctb_start_y, S32 pic_wd, S32 pic_ht, me_frm_ctxt_t *ps_ctxt)
+{
+ S32 horz_crop, vert_crop;
+ ctb_boundary_attrs_t *ps_attrs;
+
+ horz_crop = ((ctb_start_x + 64) > pic_wd) ? 2 : 0;
+ vert_crop = ((ctb_start_y + 64) > pic_ht) ? 1 : 0;
+ switch(horz_crop + vert_crop)
+ {
+ case 0:
+ ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_CENTRE];
+ break;
+ case 1:
+ ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_BOT_PIC_BOUNDARY];
+ break;
+ case 2:
+ ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_RT_PIC_BOUNDARY];
+ break;
+ case 3:
+ ps_attrs = &ps_ctxt->as_ctb_bound_attrs[CTB_BOT_RT_PIC_BOUNDARY];
+ break;
+ }
+ return (ps_attrs);
+}
+
+/**
+********************************************************************************
+* @fn hevc_avg_2d(U08 *pu1_src1,
+* U08 *pu1_src2,
+* S32 i4_src1_stride,
+* S32 i4_src2_stride,
+* S32 i4_blk_wd,
+* S32 i4_blk_ht,
+* U08 *pu1_dst,
+* S32 i4_dst_stride)
+*
+*
+* @brief point wise average of two buffers into a third buffer
+*
+* @param[in] pu1_src1 : first source buffer
+*
+* @param[in] pu1_src2 : 2nd source buffer
+*
+* @param[in] i4_src1_stride : stride of source 1 buffer
+*
+* @param[in] i4_src2_stride : stride of source 2 buffer
+*
+* @param[in] i4_blk_wd : block width
+*
+* @param[in] i4_blk_ht : block height
+*
+* @param[out] pu1_dst : destination buffer
+*
+* @param[in] i4_dst_stride : stride of the destination buffer
+*
+* @return void
+********************************************************************************
+*/
+void hevc_avg_2d(
+ U08 *pu1_src1,
+ U08 *pu1_src2,
+ S32 i4_src1_stride,
+ S32 i4_src2_stride,
+ S32 i4_blk_wd,
+ S32 i4_blk_ht,
+ U08 *pu1_dst,
+ S32 i4_dst_stride)
+{
+ S32 i, j;
+
+ for(i = 0; i < i4_blk_ht; i++)
+ {
+ for(j = 0; j < i4_blk_wd; j++)
+ {
+ pu1_dst[j] = (pu1_src1[j] + pu1_src2[j] + 1) >> 1;
+ }
+ pu1_src1 += i4_src1_stride;
+ pu1_src2 += i4_src2_stride;
+ pu1_dst += i4_dst_stride;
+ }
+}
+/**
+********************************************************************************
+* @fn hme_pick_back_search_node(search_results_t *ps_search_results,
+* search_node_t *ps_search_node_fwd,
+* S32 i4_part_idx,
+* layer_ctxt_t *ps_curr_layer)
+*
+*
+* @brief returns the search node corresponding to a ref idx in same or
+* opp direction. Preference is given to opp direction, but if that
+* does not yield results, same direction is attempted.
+*
+* @param[in] ps_search_results: search results overall
+*
+* @param[in] ps_search_node_fwd: search node corresponding to "fwd" direction
+*
+* @param[in] i4_part_idx : partition id
+*
+* @param[in] ps_curr_layer : layer context for current layer.
+*
+* @return search node corresponding to hte "other direction"
+********************************************************************************
+*/
+//#define PICK_L1_REF_SAME_DIR
+search_node_t *hme_pick_back_search_node(
+ search_results_t *ps_search_results,
+ search_node_t *ps_search_node_fwd,
+ S32 i4_part_idx,
+ layer_ctxt_t *ps_curr_layer)
+{
+ S32 is_past_l0, is_past_l1, id, i, i4_poc;
+ S32 *pi4_ref_id_to_poc_lc = ps_curr_layer->ai4_ref_id_to_poc_lc;
+ //ref_attr_t *ps_ref_attr_lc;
+ S08 i1_ref_idx_fwd;
+ S16 i2_mv_x, i2_mv_y;
+ search_node_t *ps_search_node;
+
+ i1_ref_idx_fwd = ps_search_node_fwd->i1_ref_idx;
+ i2_mv_x = ps_search_node_fwd->s_mv.i2_mvx;
+ i2_mv_y = ps_search_node_fwd->s_mv.i2_mvy;
+ i4_poc = ps_curr_layer->i4_poc;
+
+ //ps_ref_attr_lc = &ps_curr_layer->as_ref_attr_lc[0];
+ /* If the ref id already picked up maps to a past pic, then we pick */
+ /* a result corresponding to future pic. If such a result is not */
+ /* to be found, then we pick a result corresponding to a past pic */
+ //is_past = ps_ref_attr_lc[i1_ref_idx_fwd].u1_is_past;
+ is_past_l0 = (i4_poc > pi4_ref_id_to_poc_lc[i1_ref_idx_fwd]) ? 1 : 0;
+
+ ASSERT(ps_search_results->u1_num_active_ref <= 2);
+
+ /* pick the right iteration of search nodes to pick up */
+#ifdef PICK_L1_REF_SAME_DIR
+ if(ps_search_results->u1_num_active_ref == 2)
+ id = !is_past_l0;
+#else
+ if(ps_search_results->u1_num_active_ref == 2)
+ id = is_past_l0;
+#endif
+ else
+ id = 0;
+
+ ps_search_node = ps_search_results->aps_part_results[id][i4_part_idx];
+
+ for(i = 0; i < ps_search_results->u1_num_results_per_part; i++)
+ {
+ S08 i1_ref_test = ps_search_node[i].i1_ref_idx;
+ is_past_l1 = (pi4_ref_id_to_poc_lc[i1_ref_test] < i4_poc) ? 1 : 0;
+ //if (ps_ref_attr_lc[ps_search_node[i].i1_ref_idx].u1_is_past != is_past)
+#ifdef PICK_L1_REF_SAME_DIR
+ if(is_past_l1 == is_past_l0)
+#else
+ if(is_past_l1 != is_past_l0)
+#endif
+ {
+ /* belongs to same direction as the ref idx passed, so continue */
+ return (ps_search_node + i);
+ }
+ }
+
+ /* Unable to find best result in opp direction, so try same direction */
+ /* However we need to ensure that we do not pick up same result */
+ for(i = 0; i < ps_search_results->u1_num_results_per_part; i++)
+ {
+ if((ps_search_node->i1_ref_idx != i1_ref_idx_fwd) ||
+ (ps_search_node->s_mv.i2_mvx != i2_mv_x) || (ps_search_node->s_mv.i2_mvy != i2_mv_y))
+ {
+ return (ps_search_node);
+ }
+ ps_search_node++;
+ }
+
+ //ASSERT(0);
+ return (ps_search_results->aps_part_results[id][i4_part_idx]);
+
+ //return (NULL);
+}
+
+/**
+********************************************************************************
+* @fn hme_study_input_segmentation(U08 *pu1_inp, S32 i4_inp_stride)
+*
+*
+* @brief Examines input 16x16 for possible edges and orientations of those,
+* and returns a bit mask of partitions that should be searched for
+*
+* @param[in] pu1_inp : input buffer
+*
+* @param[in] i4_inp_stride: input stride
+*
+* @return part mask (bit mask of active partitions to search)
+********************************************************************************
+*/
+
+S32 hme_study_input_segmentation(U08 *pu1_inp, S32 i4_inp_stride, S32 limit_active_partitions)
+{
+ S32 i4_rsum[16], i4_csum[16];
+ U08 *pu1_tmp, u1_tmp;
+ S32 i4_max_ridx, i4_max_cidx, i4_tmp;
+ S32 i, j, i4_ret;
+ S32 i4_max_rp[4], i4_max_cp[4];
+ S32 i4_seg_lutc[4] = { 0, ENABLE_nLx2N, ENABLE_Nx2N, ENABLE_nRx2N };
+ S32 i4_seg_lutr[4] = { 0, ENABLE_2NxnU, ENABLE_2NxN, ENABLE_2NxnD };
+#define EDGE_THR (15 * 16)
+#define HI_PASS(ptr, i) (2 * (ptr[i] - ptr[i - 1]) + (ptr[i + 1] - ptr[i - 2]))
+
+ if(0 == limit_active_partitions)
+ {
+ /*********************************************************************/
+ /* In this case, we do not optimize on active partitions and search */
+ /* brute force. This way, 17 partitinos would be enabled. */
+ /*********************************************************************/
+ return (ENABLE_ALL_PARTS);
+ }
+
+ /*************************************************************************/
+ /* Control passes below in case we wish to optimize on active partitions.*/
+ /* This is based on input characteristics, check how an edge passes along*/
+ /* an input 16x16 area, if at all, and decide active partitinos. */
+ /*************************************************************************/
+
+ /* Initialize row and col sums */
+ for(i = 0; i < 16; i++)
+ {
+ i4_rsum[i] = 0;
+ i4_csum[i] = 0;
+ }
+ pu1_tmp = pu1_inp;
+ for(i = 0; i < 16; i++)
+ {
+ for(j = 0; j < 16; j++)
+ {
+ u1_tmp = *pu1_tmp++;
+ i4_rsum[i] += u1_tmp;
+ i4_csum[j] += u1_tmp;
+ }
+ pu1_tmp += (i4_inp_stride - 16);
+ }
+
+ /* 0 is dummy; 1 is 4; 2 is 8; 3 is 12 */
+ i4_max_rp[0] = 0;
+ i4_max_cp[0] = 0;
+ i4_max_rp[1] = 0;
+ i4_max_cp[1] = 0;
+ i4_max_rp[2] = 0;
+ i4_max_cp[2] = 0;
+ i4_max_rp[3] = 0;
+ i4_max_cp[3] = 0;
+
+ /* Get Max edge strength across (2,3) (3,4) (4,5) */
+ for(i = 3; i < 6; i++)
+ {
+ /* Run [-1 -2 2 1] filter through rsum/csum */
+ i4_tmp = HI_PASS(i4_rsum, i);
+ if(ABS(i4_tmp) > i4_max_rp[1])
+ i4_max_rp[1] = i4_tmp;
+
+ i4_tmp = HI_PASS(i4_csum, i);
+ if(ABS(i4_tmp) > i4_max_cp[1])
+ i4_max_cp[1] = i4_tmp;
+ }
+
+ /* Get Max edge strength across (6,7) (7,8) (8,9) */
+ for(i = 7; i < 10; i++)
+ {
+ /* Run [-1 -2 2 1] filter through rsum/csum */
+ i4_tmp = HI_PASS(i4_rsum, i);
+ if(ABS(i4_tmp) > i4_max_rp[2])
+ i4_max_rp[2] = i4_tmp;
+
+ i4_tmp = HI_PASS(i4_csum, i);
+ if(ABS(i4_tmp) > i4_max_cp[2])
+ i4_max_cp[2] = i4_tmp;
+ }
+
+ /* Get Max edge strength across (10,11) (11,12) (12,13) */
+ for(i = 11; i < 14; i++)
+ {
+ /* Run [-1 -2 2 1] filter through rsum/csum */
+ i4_tmp = HI_PASS(i4_rsum, i);
+ if(ABS(i4_tmp) > i4_max_rp[3])
+ i4_max_rp[3] = i4_tmp;
+
+ i4_tmp = HI_PASS(i4_csum, i);
+ if(ABS(i4_tmp) > i4_max_cp[3])
+ i4_max_cp[3] = i4_tmp;
+ }
+
+ /* Find the maximum across the 3 and see whether the strength qualifies as edge */
+ i4_max_ridx = 1;
+ i4_max_cidx = 1;
+ for(i = 2; i <= 3; i++)
+ {
+ if(i4_max_rp[i] > i4_max_rp[i4_max_ridx])
+ i4_max_ridx = i;
+
+ if(i4_max_cp[i] > i4_max_cp[i4_max_cidx])
+ i4_max_cidx = i;
+ }
+
+ if(EDGE_THR > i4_max_rp[i4_max_ridx])
+ {
+ i4_max_ridx = 0;
+ }
+
+ if(EDGE_THR > i4_max_cp[i4_max_cidx])
+ {
+ i4_max_cidx = 0;
+ }
+
+ i4_ret = ENABLE_2Nx2N;
+
+ /* If only vertical discontinuity, go with one of 2Nx? */
+ if(0 == (i4_max_ridx + i4_max_cidx))
+ {
+ //num_me_parts++;
+ return i4_ret;
+ }
+
+ if(i4_max_ridx && (i4_max_cidx == 0))
+ {
+ //num_me_parts += 3;
+ return ((i4_ret | i4_seg_lutr[i4_max_ridx]));
+ }
+
+ /* If only horizontal discontinuity, go with one of ?x2N */
+ if(i4_max_cidx && (i4_max_ridx == 0))
+ {
+ //num_me_parts += 3;
+ return ((i4_ret | i4_seg_lutc[i4_max_cidx]));
+ }
+
+ /* If middle is dominant in both directions, go with NxN */
+ if((2 == i4_max_cidx) && (2 == i4_max_ridx))
+ {
+ //num_me_parts += 5;
+ return ((i4_ret | ENABLE_NxN));
+ }
+
+ /* Otherwise, conservatively, enable NxN and the 2 AMPs */
+ //num_me_parts += 9;
+ return (i4_ret | ENABLE_NxN | i4_seg_lutr[i4_max_ridx] | i4_seg_lutc[i4_max_cidx]);
+}
+
+/**
+********************************************************************************
+* @fn hme_init_search_results(search_results_t *ps_search_results,
+* S32 i4_num_ref,
+* S32 i4_num_best_results,
+* S32 i4_num_results_per_part,
+* BLK_SIZE_T e_blk_size,
+* S32 i4_x_off,
+* S32 i4_y_off)
+*
+* @brief Initializes the search results structure with some key attributes
+*
+* @param[out] ps_search_results : search results structure to initialise
+*
+* @param[in] i4_num_Ref: corresponds to the number of ref ids searched
+*
+* @param[in] i4_num_best_results: Number of best results for the CU to
+* be maintained in the result structure
+*
+* @param[in] i4_num_results_per_part: Per active partition the number of best
+* results to be maintained
+*
+* @param[in] e_blk_size: blk size of the CU for which this structure used
+*
+* @param[in] i4_x_off: x offset of the top left of CU from CTB top left
+*
+* @param[in] i4_y_off: y offset of the top left of CU from CTB top left
+*
+* @param[in] pu1_is_past : points ot an array that tells whether a given ref id
+* has prominence in L0 or in L1 list (past or future )
+*
+* @return void
+********************************************************************************
+*/
+void hme_init_search_results(
+ search_results_t *ps_search_results,
+ S32 i4_num_ref,
+ S32 i4_num_best_results,
+ S32 i4_num_results_per_part,
+ BLK_SIZE_T e_blk_size,
+ S32 i4_x_off,
+ S32 i4_y_off,
+ U08 *pu1_is_past)
+{
+ CU_SIZE_T e_cu_size = ge_blk_size_to_cu_size[e_blk_size];
+
+ ASSERT(e_cu_size != -1);
+ ps_search_results->e_cu_size = e_cu_size;
+ ps_search_results->u1_x_off = (U08)i4_x_off;
+ ps_search_results->u1_y_off = (U08)i4_y_off;
+ ps_search_results->u1_num_active_ref = (U08)i4_num_ref;
+ ps_search_results->u1_num_best_results = (U08)i4_num_best_results;
+ ps_search_results->u1_num_results_per_part = (U08)i4_num_results_per_part;
+ ps_search_results->pu1_is_past = pu1_is_past;
+ ps_search_results->u1_split_flag = 0;
+ ps_search_results->best_cu_cost = MAX_32BIT_VAL;
+}
+
+/**
+********************************************************************************
+* @fn hme_reset_search_results((search_results_t *ps_search_results,
+* S32 i4_part_mask)
+*
+*
+* @brief Resets the best results to maximum values, so as to allow search
+* for the new CU's partitions. The existing results may be from an
+* older CU using same structure.
+*
+* @param[in] ps_search_results: search results structure
+*
+* @param[in] i4_part_mask : bit mask of active partitions
+*
+* @return part mask (bit mask of active partitions to search)
+********************************************************************************
+*/
+void hme_reset_search_results(search_results_t *ps_search_results, S32 i4_part_mask, S32 mv_res)
+{
+ S32 i4_num_ref = (S32)ps_search_results->u1_num_active_ref;
+ S08 i1_ref_idx;
+ S32 i, j;
+ search_node_t *ps_search_node;
+
+ /* store this for future use */
+ ps_search_results->i4_part_mask = i4_part_mask;
+
+ /* Reset the spli_flag to zero */
+ ps_search_results->u1_split_flag = 0;
+
+ HME_SET_MVPRED_RES((&ps_search_results->as_pred_ctxt[0]), mv_res);
+ HME_SET_MVPRED_RES((&ps_search_results->as_pred_ctxt[1]), mv_res);
+
+ for(i1_ref_idx = 0; i1_ref_idx < i4_num_ref; i1_ref_idx++)
+ {
+ /* Reset the individual partitino results */
+ for(i = 0; i < TOT_NUM_PARTS; i++)
+ {
+ if(!(i4_part_mask & (1 << i)))
+ continue;
+
+ ps_search_node = ps_search_results->aps_part_results[i1_ref_idx][i];
+
+ for(j = 0; j < ps_search_results->u1_num_results_per_part; j++)
+ {
+ ps_search_node[j].s_mv.i2_mvx = 0;
+ ps_search_node[j].s_mv.i2_mvy = 0;
+ ps_search_node[j].i4_tot_cost = MAX_32BIT_VAL;
+ ps_search_node[j].i4_sad = MAX_32BIT_VAL;
+ ps_search_node[j].i4_sdi = 0;
+ ps_search_node[j].i1_ref_idx = -1;
+ ps_search_node[j].u1_subpel_done = 0;
+ ps_search_node[j].u1_is_avail = 1;
+ ps_search_node[j].i4_mv_cost = 0;
+ }
+ }
+ }
+}
+/**
+********************************************************************************
+* @fn hme_clamp_grid_by_mvrange(search_node_t *ps_search_node,
+* S32 i4_step,
+* range_prms_t *ps_mvrange)
+*
+* @brief Given a central pt within mv range, and a grid of points surrounding
+* this pt, this function returns a grid mask of pts within search rng
+*
+* @param[in] ps_search_node: the centre pt of the grid
+*
+* @param[in] i4_step: step size of grid
+*
+* @param[in] ps_mvrange: structure containing the current mv range
+*
+* @return bitmask of the pts in grid within search range
+********************************************************************************
+*/
+S32 hme_clamp_grid_by_mvrange(search_node_t *ps_search_node, S32 i4_step, range_prms_t *ps_mvrange)
+{
+ S32 i4_mask = GRID_ALL_PTS_VALID;
+ if(ps_search_node->s_mv.i2_mvx + i4_step >= ps_mvrange->i2_max_x)
+ {
+ i4_mask &= (GRID_RT_3_INVALID);
+ }
+ if(ps_search_node->s_mv.i2_mvx - i4_step < ps_mvrange->i2_min_x)
+ {
+ i4_mask &= (GRID_LT_3_INVALID);
+ }
+ if(ps_search_node->s_mv.i2_mvy + i4_step >= ps_mvrange->i2_max_y)
+ {
+ i4_mask &= (GRID_BOT_3_INVALID);
+ }
+ if(ps_search_node->s_mv.i2_mvy - i4_step < ps_mvrange->i2_min_y)
+ {
+ i4_mask &= (GRID_TOP_3_INVALID);
+ }
+ return i4_mask;
+}
+
+/**
+********************************************************************************
+* @fn layer_ctxt_t *hme_get_past_layer_ctxt(me_ctxt_t *ps_ctxt,
+S32 i4_layer_id)
+*
+* @brief returns the layer ctxt of the layer with given id from the temporally
+* previous frame
+*
+* @param[in] ps_ctxt : ME context
+*
+* @param[in] i4_layer_id : id of layer required
+*
+* @return layer ctxt of given layer id in temporally previous frame
+********************************************************************************
+*/
+layer_ctxt_t *hme_get_past_layer_ctxt(
+ me_ctxt_t *ps_ctxt, me_frm_ctxt_t *ps_frm_ctxt, S32 i4_layer_id, S32 i4_num_me_frm_pllel)
+{
+ S32 i4_poc = ps_frm_ctxt->ai4_ref_idx_to_poc_lc[0];
+ S32 i;
+ layers_descr_t *ps_desc;
+
+ for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * i4_num_me_frm_pllel) + 1; i++)
+ {
+ ps_desc = &ps_ctxt->as_ref_descr[i];
+ if(i4_poc == ps_desc->aps_layers[i4_layer_id]->i4_poc)
+ return (ps_desc->aps_layers[i4_layer_id]);
+ }
+ return NULL;
+}
+
+/**
+********************************************************************************
+* @fn layer_ctxt_t *hme_coarse_get_past_layer_ctxt(me_ctxt_t *ps_ctxt,
+S32 i4_layer_id)
+*
+* @brief returns the layer ctxt of the layer with given id from the temporally
+* previous frame
+*
+* @param[in] ps_ctxt : ME context
+*
+* @param[in] i4_layer_id : id of layer required
+*
+* @return layer ctxt of given layer id in temporally previous frame
+********************************************************************************
+*/
+layer_ctxt_t *hme_coarse_get_past_layer_ctxt(coarse_me_ctxt_t *ps_ctxt, S32 i4_layer_id)
+{
+ S32 i4_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[0];
+ S32 i;
+ layers_descr_t *ps_desc;
+
+ for(i = 0; i < ps_ctxt->max_num_ref + 1 + NUM_BUFS_DECOMP_HME; i++)
+ {
+ ps_desc = &ps_ctxt->as_ref_descr[i];
+ if(i4_poc == ps_desc->aps_layers[i4_layer_id]->i4_poc)
+ return (ps_desc->aps_layers[i4_layer_id]);
+ }
+ return NULL;
+}
+
+/**
+********************************************************************************
+* @fn void hme_init_mv_bank(layer_ctxt_t *ps_layer_ctxt,
+BLK_SIZE_T e_blk_size,
+S32 i4_num_ref,
+S32 i4_num_results_per_part)
+*
+* @brief Given a blk size to be used for this layer, this function initialize
+* the mv bank to make it ready to store and return results.
+*
+* @param[in, out] ps_layer_ctxt: pointer to layer ctxt
+*
+* @param[in] e_blk_size : resolution at which mvs are stored
+*
+* @param[in] i4_num_ref: number of reference frames corresponding to which
+* results are stored.
+*
+* @param[in] e_blk_size : resolution at which mvs are stored
+*
+* @param[in] i4_num_results_per_part : Number of results to be stored per
+* ref idx. So these many best results stored
+*
+* @return void
+********************************************************************************
+*/
+void hme_init_mv_bank(
+ layer_ctxt_t *ps_layer_ctxt,
+ BLK_SIZE_T e_blk_size,
+ S32 i4_num_ref,
+ S32 i4_num_results_per_part,
+ U08 u1_enc)
+{
+ layer_mv_t *ps_mv_bank;
+ hme_mv_t *ps_mv1, *ps_mv2;
+ S08 *pi1_ref_id1, *pi1_ref_id2;
+ S32 blk_wd, mvs_in_blk, blks_in_row, mvs_in_row, blks_in_col;
+ S32 i4_i, i4_j, blk_ht;
+
+ ps_mv_bank = ps_layer_ctxt->ps_layer_mvbank;
+ ps_mv_bank->i4_num_mvs_per_ref = i4_num_results_per_part;
+ ps_mv_bank->i4_num_ref = i4_num_ref;
+ mvs_in_blk = i4_num_ref * i4_num_results_per_part;
+ ps_mv_bank->i4_num_mvs_per_blk = mvs_in_blk;
+
+ /*************************************************************************/
+ /* Store blk size, from blk size derive blk width and use this to compute*/
+ /* number of blocks every row. We also pad to left and top by 1, to */
+ /* support the prediction mechanism. */
+ /*************************************************************************/
+ ps_mv_bank->e_blk_size = e_blk_size;
+ blk_wd = gau1_blk_size_to_wd[e_blk_size];
+ blk_ht = gau1_blk_size_to_ht[e_blk_size];
+
+ blks_in_row = (ps_layer_ctxt->i4_wd + (blk_wd - 1)) / blk_wd;
+ blks_in_col = (ps_layer_ctxt->i4_ht + (blk_ht - 1)) / blk_ht;
+
+ if(u1_enc)
+ {
+ /* TODO: CTB64x64 is assumed. FIX according to actual CTB */
+ WORD32 num_ctb_cols = ((ps_layer_ctxt->i4_wd + 63) >> 6);
+ WORD32 num_ctb_rows = ((ps_layer_ctxt->i4_ht + 63) >> 6);
+
+ blks_in_row = (num_ctb_cols << 3);
+ blks_in_col = (num_ctb_rows << 3);
+ }
+
+ blks_in_row += 2;
+ mvs_in_row = blks_in_row * mvs_in_blk;
+
+ ps_mv_bank->i4_num_blks_per_row = blks_in_row;
+ ps_mv_bank->i4_num_mvs_per_row = mvs_in_row;
+
+ /* To ensure run time requirements fall within allocation time request */
+ ASSERT(ps_mv_bank->i4_num_mvs_per_row <= ps_mv_bank->max_num_mvs_per_row);
+
+ /*************************************************************************/
+ /* Increment by one full row at top for padding and one column in left */
+ /* this gives us the actual start of mv for 0,0 blk */
+ /*************************************************************************/
+ ps_mv_bank->ps_mv = ps_mv_bank->ps_mv_base + mvs_in_row + mvs_in_blk;
+ ps_mv_bank->pi1_ref_idx = ps_mv_bank->pi1_ref_idx_base + mvs_in_row + mvs_in_blk;
+
+ memset(ps_mv_bank->ps_mv_base, 0, mvs_in_row * sizeof(hme_mv_t));
+ memset(ps_mv_bank->pi1_ref_idx_base, -1, mvs_in_row * sizeof(U08));
+
+ /*************************************************************************/
+ /* Initialize top row, left col and right col with zeros since these are */
+ /* used as candidates during searches. */
+ /*************************************************************************/
+ ps_mv1 = ps_mv_bank->ps_mv_base + mvs_in_row;
+ ps_mv2 = ps_mv1 + mvs_in_row - mvs_in_blk;
+ pi1_ref_id1 = ps_mv_bank->pi1_ref_idx_base + mvs_in_row;
+ pi1_ref_id2 = pi1_ref_id1 + mvs_in_row - mvs_in_blk;
+ for(i4_i = 0; i4_i < blks_in_col; i4_i++)
+ {
+ for(i4_j = 0; i4_j < mvs_in_blk; i4_j++)
+ {
+ ps_mv1[i4_j].i2_mv_x = 0;
+ ps_mv1[i4_j].i2_mv_y = 0;
+ ps_mv2[i4_j].i2_mv_x = 0;
+ ps_mv2[i4_j].i2_mv_y = 0;
+ pi1_ref_id1[i4_j] = -1;
+ pi1_ref_id2[i4_j] = -1;
+ }
+ ps_mv1 += mvs_in_row;
+ ps_mv2 += mvs_in_row;
+ pi1_ref_id1 += mvs_in_row;
+ pi1_ref_id2 += mvs_in_row;
+ }
+}
+void hme_fill_mvbank_intra(layer_ctxt_t *ps_layer_ctxt)
+{
+ layer_mv_t *ps_mv_bank;
+ hme_mv_t *ps_mv;
+ S08 *pi1_ref_id;
+ S32 blk_wd, blks_in_row, mvs_in_row, blks_in_col;
+ S32 i, j, blk_ht;
+ BLK_SIZE_T e_blk_size;
+
+ ps_mv_bank = ps_layer_ctxt->ps_layer_mvbank;
+
+ /*************************************************************************/
+ /* Store blk size, from blk size derive blk width and use this to compute*/
+ /* number of blocks every row. We also pad to left and top by 1, to */
+ /* support the prediction mechanism. */
+ /*************************************************************************/
+ e_blk_size = ps_mv_bank->e_blk_size;
+ blk_wd = gau1_blk_size_to_wd[e_blk_size];
+ blk_ht = gau1_blk_size_to_wd[e_blk_size];
+ blks_in_row = ps_layer_ctxt->i4_wd / blk_wd;
+ blks_in_col = ps_layer_ctxt->i4_ht / blk_ht;
+ mvs_in_row = blks_in_row * ps_mv_bank->i4_num_mvs_per_blk;
+
+ /*************************************************************************/
+ /* Increment by one full row at top for padding and one column in left */
+ /* this gives us the actual start of mv for 0,0 blk */
+ /*************************************************************************/
+ ps_mv = ps_mv_bank->ps_mv;
+ pi1_ref_id = ps_mv_bank->pi1_ref_idx;
+
+ for(i = 0; i < blks_in_col; i++)
+ {
+ for(j = 0; j < blks_in_row; j++)
+ {
+ ps_mv[j].i2_mv_x = INTRA_MV;
+ ps_mv[j].i2_mv_y = INTRA_MV;
+ pi1_ref_id[j] = -1;
+ }
+ ps_mv += ps_mv_bank->i4_num_mvs_per_row;
+ pi1_ref_id += ps_mv_bank->i4_num_mvs_per_row;
+ }
+}
+
+/**
+********************************************************************************
+* @fn void hme_derive_search_range(range_prms_t *ps_range,
+* range_prms_t *ps_pic_limit,
+* range_prms_t *ps_mv_limit,
+* S32 i4_x,
+* S32 i4_y,
+* S32 blk_wd,
+* S32 blk_ht)
+*
+* @brief given picture limits and blk dimensions and mv search limits, obtains
+* teh valid search range such that the blk stays within pic boundaries,
+* where picture boundaries include padded portions of picture
+*
+* @param[out] ps_range: updated with actual search range
+*
+* @param[in] ps_pic_limit : picture boundaries
+*
+* @param[in] ps_mv_limit: Search range limits for the mvs
+*
+* @param[in] i4_x : x coordinate of the blk
+*
+* @param[in] i4_y : y coordinate of the blk
+*
+* @param[in] blk_wd : blk width
+*
+* @param[in] blk_ht : blk height
+*
+* @return void
+********************************************************************************
+*/
+void hme_derive_search_range(
+ range_prms_t *ps_range,
+ range_prms_t *ps_pic_limit,
+ range_prms_t *ps_mv_limit,
+ S32 i4_x,
+ S32 i4_y,
+ S32 blk_wd,
+ S32 blk_ht)
+{
+ ps_range->i2_max_x =
+ MIN((ps_pic_limit->i2_max_x - (S16)blk_wd - (S16)i4_x), ps_mv_limit->i2_max_x);
+ ps_range->i2_min_x = MAX((ps_pic_limit->i2_min_x - (S16)i4_x), ps_mv_limit->i2_min_x);
+ ps_range->i2_max_y =
+ MIN((ps_pic_limit->i2_max_y - (S16)blk_ht - (S16)i4_y), ps_mv_limit->i2_max_y);
+ ps_range->i2_min_y = MAX((ps_pic_limit->i2_min_y - (S16)i4_y), ps_mv_limit->i2_min_y);
+}
+
+/**
+********************************************************************************
+* @fn void hme_get_spatial_candt(search_node_t *ps_search_node,
+* layer_ctxt_t *ps_curr_layer,
+* S32 i4_blk_x,
+* S32 i4_blk_y,
+* S08 i1_ref_id,
+* S32 i4_result_id)
+*
+* @brief obtains a candt from the same mv bank as the current one, its called
+* spatial candt as it does not require scaling for temporal distances
+*
+* @param[out] ps_search_node: mv and ref id updated here of the candt
+*
+* @param[in] ps_curr_layer: layer ctxt, has the mv bank structure pointer
+*
+* @param[in] i4_blk_x : x coordinate of the block in mv bank
+*
+* @param[in] i4_blk_y : y coordinate of the block in mv bank
+*
+* @param[in] i1_ref_id : Corresponds to ref idx from which to pick up mv
+* results, useful if multiple ref idx candts maintained separately.
+*
+* @param[in] i4_result_id : If multiple results stored per ref idx, this
+* pts to the id of the result
+*
+* @param[in] tr_avail : top right availability of the block
+*
+* @param[in] bl_avail : bottom left availability of the block
+*
+* @return void
+********************************************************************************
+*/
+void hme_get_spatial_candt(
+ layer_ctxt_t *ps_curr_layer,
+ BLK_SIZE_T e_search_blk_size,
+ S32 i4_blk_x,
+ S32 i4_blk_y,
+ S08 i1_ref_idx,
+ search_node_t *ps_top_neighbours,
+ search_node_t *ps_left_neighbours,
+ S32 i4_result_id,
+ S32 tr_avail,
+ S32 bl_avail,
+ S32 encode)
+
+{
+ layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
+ S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
+ S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
+ search_node_t *ps_search_node;
+ S32 i4_offset;
+ hme_mv_t *ps_mv, *ps_mv_base;
+ S08 *pi1_ref_idx, *pi1_ref_idx_base;
+ S32 jump = 1, mvs_in_blk, mvs_in_row;
+ S32 shift = (encode ? 2 : 0);
+
+ if(i4_blk_size1 != i4_blk_size2)
+ {
+ i4_blk_x <<= 1;
+ i4_blk_y <<= 1;
+ jump = 2;
+ if((i4_blk_size1 << 2) == i4_blk_size2)
+ {
+ i4_blk_x <<= 1;
+ i4_blk_y <<= 1;
+ jump = 4;
+ }
+ }
+
+ mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
+ mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
+
+ /* Adjust teh blk coord to point to top left locn */
+ i4_blk_x -= 1;
+ i4_blk_y -= 1;
+ /* Pick up the mvs from the location */
+ i4_offset = (i4_blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
+ i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * i4_blk_y);
+
+ ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
+ pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
+
+ ps_mv += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref) + i4_result_id;
+ pi1_ref_idx += (i1_ref_idx * ps_layer_mvbank->i4_num_mvs_per_ref) + i4_result_id;
+
+ ps_mv_base = ps_mv;
+ pi1_ref_idx_base = pi1_ref_idx;
+
+ /* ps_mv and pi1_ref_idx now point to the top left locn */
+ /* Get 4 mvs as follows: */
+ ps_search_node = ps_top_neighbours;
+ COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
+
+ /* Move to top */
+ ps_search_node++;
+ ps_mv += mvs_in_blk;
+ pi1_ref_idx += mvs_in_blk;
+ COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
+
+ /* Move to t1 : relevant for 4x4 part searches or for partitions i 16x16 */
+ if(ps_layer_mvbank->i4_num_mvs_per_ref > 1)
+ {
+ ps_search_node++;
+ ps_mv += (mvs_in_blk * (jump >> 1));
+ pi1_ref_idx += (mvs_in_blk * (jump >> 1));
+ COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
+ }
+ else
+ {
+ ps_search_node++;
+ ps_search_node->s_mv.i2_mvx = 0;
+ ps_search_node->s_mv.i2_mvy = 0;
+ ps_search_node->i1_ref_idx = i1_ref_idx;
+ ps_search_node->u1_is_avail = 0;
+ ps_search_node->u1_subpel_done = 0;
+ }
+
+ /* Move to tr: this will be tr w.r.t. the blk being searched */
+ ps_search_node++;
+ if(tr_avail == 0)
+ {
+ ps_search_node->s_mv.i2_mvx = 0;
+ ps_search_node->s_mv.i2_mvy = 0;
+ ps_search_node->i1_ref_idx = i1_ref_idx;
+ ps_search_node->u1_is_avail = 0;
+ ps_search_node->u1_subpel_done = 0;
+ }
+ else
+ {
+ ps_mv = ps_mv_base + (mvs_in_blk * (1 + jump));
+ pi1_ref_idx = pi1_ref_idx_base + (mvs_in_blk * (1 + jump));
+ COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
+ }
+
+ /* Move to left */
+ ps_search_node = ps_left_neighbours;
+ ps_mv = ps_mv_base + mvs_in_row;
+ pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
+ COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
+
+ /* Move to l1 */
+ if(ps_layer_mvbank->i4_num_mvs_per_ref > 1)
+ {
+ ps_search_node++;
+ ps_mv += (mvs_in_row * (jump >> 1));
+ pi1_ref_idx += (mvs_in_row * (jump >> 1));
+ COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
+ }
+ else
+ {
+ ps_search_node++;
+ ps_search_node->s_mv.i2_mvx = 0;
+ ps_search_node->s_mv.i2_mvy = 0;
+ ps_search_node->i1_ref_idx = i1_ref_idx;
+ ps_search_node->u1_is_avail = 0;
+ ps_search_node->u1_subpel_done = 0;
+ }
+
+ /* Move to bl */
+ ps_search_node++;
+ if(bl_avail == 0)
+ {
+ ps_search_node->s_mv.i2_mvx = 0;
+ ps_search_node->s_mv.i2_mvy = 0;
+ ps_search_node->i1_ref_idx = i1_ref_idx;
+ ps_search_node->u1_is_avail = 0;
+ }
+ else
+ {
+ ps_mv = ps_mv_base + (mvs_in_row * (1 + jump));
+ pi1_ref_idx = pi1_ref_idx_base + (mvs_in_row * (1 + jump));
+ COPY_MV_TO_SEARCH_NODE(ps_search_node, ps_mv, pi1_ref_idx, i1_ref_idx, shift);
+ }
+}
+
+void hme_get_spatial_candt_in_l1_me(
+ layer_ctxt_t *ps_curr_layer,
+ BLK_SIZE_T e_search_blk_size,
+ S32 i4_blk_x,
+ S32 i4_blk_y,
+ S08 i1_ref_idx,
+ U08 u1_pred_dir,
+ search_node_t *ps_top_neighbours,
+ search_node_t *ps_left_neighbours,
+ S32 i4_result_id,
+ S32 tr_avail,
+ S32 bl_avail,
+ S32 i4_num_act_ref_l0,
+ S32 i4_num_act_ref_l1)
+{
+ search_node_t *ps_search_node;
+ hme_mv_t *ps_mv, *ps_mv_base;
+
+ S32 i4_offset;
+ S32 mvs_in_blk, mvs_in_row;
+ S08 *pi1_ref_idx, *pi1_ref_idx_base;
+ S32 i4_mv_pos_in_implicit_array;
+
+ layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
+
+ S32 i4_blk_size1 = gau1_blk_size_to_wd[ps_layer_mvbank->e_blk_size];
+ S32 i4_blk_size2 = gau1_blk_size_to_wd[e_search_blk_size];
+ S32 jump = 1;
+ S32 shift = 0;
+ S32 i4_num_results_in_given_dir =
+ ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref * i4_num_act_ref_l1)
+ : (ps_layer_mvbank->i4_num_mvs_per_ref * i4_num_act_ref_l0));
+
+ if(i4_blk_size1 != i4_blk_size2)
+ {
+ i4_blk_x <<= 1;
+ i4_blk_y <<= 1;
+ jump = 2;
+ if((i4_blk_size1 << 2) == i4_blk_size2)
+ {
+ i4_blk_x <<= 1;
+ i4_blk_y <<= 1;
+ jump = 4;
+ }
+ }
+
+ mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
+ mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
+
+ /* Adjust the blk coord to point to top left locn */
+ i4_blk_x -= 1;
+ i4_blk_y -= 1;
+ /* Pick up the mvs from the location */
+ i4_offset = (i4_blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
+ i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * i4_blk_y);
+
+ i4_offset +=
+ ((u1_pred_dir == 1) ? (ps_layer_mvbank->i4_num_mvs_per_ref * i4_num_act_ref_l0) : 0);
+
+ ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
+ pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
+
+ ps_mv_base = ps_mv;
+ pi1_ref_idx_base = pi1_ref_idx;
+
+ /* TL */
+ {
+ /* ps_mv and pi1_ref_idx now point to the top left locn */
+ ps_search_node = ps_top_neighbours;
+
+ i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
+ pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
+
+ if(-1 != i4_mv_pos_in_implicit_array)
+ {
+ COPY_MV_TO_SEARCH_NODE(
+ ps_search_node,
+ &ps_mv[i4_mv_pos_in_implicit_array],
+ &pi1_ref_idx[i4_mv_pos_in_implicit_array],
+ i1_ref_idx,
+ shift);
+ }
+ else
+ {
+ ps_search_node->u1_is_avail = 0;
+ ps_search_node->s_mv.i2_mvx = 0;
+ ps_search_node->s_mv.i2_mvy = 0;
+ ps_search_node->i1_ref_idx = i1_ref_idx;
+ }
+ }
+
+ /* Move to top */
+ {
+ /* ps_mv and pi1_ref_idx now point to the top left locn */
+ ps_search_node++;
+ ps_mv += mvs_in_blk;
+ pi1_ref_idx += mvs_in_blk;
+
+ i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
+ pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
+
+ if(-1 != i4_mv_pos_in_implicit_array)
+ {
+ COPY_MV_TO_SEARCH_NODE(
+ ps_search_node,
+ &ps_mv[i4_mv_pos_in_implicit_array],
+ &pi1_ref_idx[i4_mv_pos_in_implicit_array],
+ i1_ref_idx,
+ shift);
+ }
+ else
+ {
+ ps_search_node->u1_is_avail = 0;
+ ps_search_node->s_mv.i2_mvx = 0;
+ ps_search_node->s_mv.i2_mvy = 0;
+ ps_search_node->i1_ref_idx = i1_ref_idx;
+ }
+ }
+
+ /* Move to t1 : relevant for 4x4 part searches or for partitions i 16x16 */
+ if(ps_layer_mvbank->i4_num_mvs_per_ref > 1)
+ {
+ ps_search_node++;
+ ps_mv += (mvs_in_blk * (jump >> 1));
+ pi1_ref_idx += (mvs_in_blk * (jump >> 1));
+
+ i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
+ pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
+
+ if(-1 != i4_mv_pos_in_implicit_array)
+ {
+ COPY_MV_TO_SEARCH_NODE(
+ ps_search_node,
+ &ps_mv[i4_mv_pos_in_implicit_array],
+ &pi1_ref_idx[i4_mv_pos_in_implicit_array],
+ i1_ref_idx,
+ shift);
+ }
+ else
+ {
+ ps_search_node->u1_is_avail = 0;
+ ps_search_node->s_mv.i2_mvx = 0;
+ ps_search_node->s_mv.i2_mvy = 0;
+ ps_search_node->i1_ref_idx = i1_ref_idx;
+ }
+ }
+ else
+ {
+ ps_search_node++;
+ ps_search_node->u1_is_avail = 0;
+ ps_search_node->s_mv.i2_mvx = 0;
+ ps_search_node->s_mv.i2_mvy = 0;
+ ps_search_node->i1_ref_idx = i1_ref_idx;
+ }
+
+ /* Move to tr: this will be tr w.r.t. the blk being searched */
+ ps_search_node++;
+ if(tr_avail == 0)
+ {
+ ps_search_node->s_mv.i2_mvx = 0;
+ ps_search_node->s_mv.i2_mvy = 0;
+ ps_search_node->i1_ref_idx = i1_ref_idx;
+ ps_search_node->u1_is_avail = 0;
+ ps_search_node->u1_subpel_done = 0;
+ }
+ else
+ {
+ /* ps_mv and pi1_ref_idx now point to the top left locn */
+ ps_mv = ps_mv_base + (mvs_in_blk * (1 + jump));
+ pi1_ref_idx = pi1_ref_idx_base + (mvs_in_blk * (1 + jump));
+
+ i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
+ pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
+
+ if(-1 != i4_mv_pos_in_implicit_array)
+ {
+ COPY_MV_TO_SEARCH_NODE(
+ ps_search_node,
+ &ps_mv[i4_mv_pos_in_implicit_array],
+ &pi1_ref_idx[i4_mv_pos_in_implicit_array],
+ i1_ref_idx,
+ shift);
+ }
+ else
+ {
+ ps_search_node->u1_is_avail = 0;
+ ps_search_node->s_mv.i2_mvx = 0;
+ ps_search_node->s_mv.i2_mvy = 0;
+ ps_search_node->i1_ref_idx = i1_ref_idx;
+ }
+ }
+
+ /* Move to left */
+ {
+ /* ps_mv and pi1_ref_idx now point to the top left locn */
+ ps_search_node = ps_left_neighbours;
+ ps_mv = ps_mv_base + mvs_in_row;
+ pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
+
+ i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
+ pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
+
+ if(-1 != i4_mv_pos_in_implicit_array)
+ {
+ COPY_MV_TO_SEARCH_NODE(
+ ps_search_node,
+ &ps_mv[i4_mv_pos_in_implicit_array],
+ &pi1_ref_idx[i4_mv_pos_in_implicit_array],
+ i1_ref_idx,
+ shift);
+ }
+ else
+ {
+ ps_search_node->u1_is_avail = 0;
+ ps_search_node->s_mv.i2_mvx = 0;
+ ps_search_node->s_mv.i2_mvy = 0;
+ ps_search_node->i1_ref_idx = i1_ref_idx;
+ }
+ }
+
+ /* Move to l1 */
+ if(ps_layer_mvbank->i4_num_mvs_per_ref > 1)
+ {
+ /* ps_mv and pi1_ref_idx now point to the top left locn */
+ ps_search_node++;
+ ps_mv += (mvs_in_row * (jump >> 1));
+ pi1_ref_idx += (mvs_in_row * (jump >> 1));
+
+ i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
+ pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
+
+ if(-1 != i4_mv_pos_in_implicit_array)
+ {
+ COPY_MV_TO_SEARCH_NODE(
+ ps_search_node,
+ &ps_mv[i4_mv_pos_in_implicit_array],
+ &pi1_ref_idx[i4_mv_pos_in_implicit_array],
+ i1_ref_idx,
+ shift);
+ }
+ else
+ {
+ ps_search_node->u1_is_avail = 0;
+ ps_search_node->s_mv.i2_mvx = 0;
+ ps_search_node->s_mv.i2_mvy = 0;
+ ps_search_node->i1_ref_idx = i1_ref_idx;
+ }
+ }
+ else
+ {
+ ps_search_node++;
+ ps_search_node->u1_is_avail = 0;
+ ps_search_node->s_mv.i2_mvx = 0;
+ ps_search_node->s_mv.i2_mvy = 0;
+ ps_search_node->i1_ref_idx = i1_ref_idx;
+ }
+
+ /* Move to bl */
+ ps_search_node++;
+ if(bl_avail == 0)
+ {
+ ps_search_node->s_mv.i2_mvx = 0;
+ ps_search_node->s_mv.i2_mvy = 0;
+ ps_search_node->i1_ref_idx = i1_ref_idx;
+ ps_search_node->u1_is_avail = 0;
+ }
+ else
+ {
+ /* ps_mv and pi1_ref_idx now point to the top left locn */
+ ps_mv = ps_mv_base + (mvs_in_row * (1 + jump));
+ pi1_ref_idx = pi1_ref_idx_base + (mvs_in_row * (1 + jump));
+
+ i4_mv_pos_in_implicit_array = hme_find_pos_of_implicitly_stored_ref_id(
+ pi1_ref_idx, i1_ref_idx, i4_result_id, i4_num_results_in_given_dir);
+
+ if(-1 != i4_mv_pos_in_implicit_array)
+ {
+ COPY_MV_TO_SEARCH_NODE(
+ ps_search_node,
+ &ps_mv[i4_mv_pos_in_implicit_array],
+ &pi1_ref_idx[i4_mv_pos_in_implicit_array],
+ i1_ref_idx,
+ shift);
+ }
+ else
+ {
+ ps_search_node->u1_is_avail = 0;
+ ps_search_node->s_mv.i2_mvx = 0;
+ ps_search_node->s_mv.i2_mvy = 0;
+ ps_search_node->i1_ref_idx = i1_ref_idx;
+ }
+ }
+}
+
+/**
+********************************************************************************
+* @fn void hme_fill_ctb_neighbour_mvs(layer_ctxt_t *ps_curr_layer,
+* S32 i4_blk_x,
+* S32 i4_blk_y,
+* mvgrid_t *ps_mv_grid ,
+* S32 i1_ref_id)
+*
+* @brief The 18x18 MV grid for a ctb, is filled in first row and 1st col
+* this corresponds to neighbours (TL, T, TR, L, BL)
+*
+* @param[in] ps_curr_layer: layer ctxt, has the mv bank structure pointer
+*
+* @param[in] blk_x : x coordinate of the block in mv bank
+*
+* @param[in] blk_y : y coordinate of the block in mv bank
+*
+* @param[in] ps_mv_grid : Grid (18x18 mvs at 4x4 level)
+*
+* @param[in] i1_ref_idx : Corresponds to ref idx from which to pick up mv
+* results, useful if multiple ref idx candts maintained separately.
+*
+* @return void
+********************************************************************************
+*/
+void hme_fill_ctb_neighbour_mvs(
+ layer_ctxt_t *ps_curr_layer,
+ S32 blk_x,
+ S32 blk_y,
+ mv_grid_t *ps_mv_grid,
+ U08 u1_pred_dir_ctr,
+ U08 u1_default_ref_id,
+ S32 i4_num_act_ref_l0)
+{
+ search_node_t *ps_grid_node;
+ layer_mv_t *ps_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
+ S32 i4_offset;
+ hme_mv_t *ps_mv, *ps_mv_base;
+ S08 *pi1_ref_idx, *pi1_ref_idx_base;
+ S32 jump = 0, inc, i, mvs_in_blk, mvs_in_row;
+
+ if(ps_layer_mvbank->e_blk_size == BLK_4x4)
+ {
+ /* searching 16x16, mvs are for 4x4 */
+ jump = 1;
+ blk_x <<= 2;
+ blk_y <<= 2;
+ }
+ else
+ {
+ /* Searching 16x16, mvs are for 8x8 */
+ blk_x <<= 1;
+ blk_y <<= 1;
+ }
+ ASSERT(ps_layer_mvbank->e_blk_size != BLK_16x16);
+
+ mvs_in_blk = ps_layer_mvbank->i4_num_mvs_per_blk;
+ mvs_in_row = ps_layer_mvbank->i4_num_mvs_per_row;
+
+ /* Adjust the blk coord to point to top left locn */
+ blk_x -= 1;
+ blk_y -= 1;
+
+ /* Pick up the mvs from the location */
+ i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
+ i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
+
+ i4_offset += (u1_pred_dir_ctr == 1);
+
+ ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
+ pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
+
+ ps_mv_base = ps_mv;
+ pi1_ref_idx_base = pi1_ref_idx;
+
+ /* the 0, 0 entry of the grid pts to top left for the ctb */
+ ps_grid_node = &ps_mv_grid->as_node[0];
+
+ /* Copy 18 mvs at 4x4 level including top left, 16 top mvs for ctb, 1 tr */
+ for(i = 0; i < 18; i++)
+ {
+ COPY_MV_TO_SEARCH_NODE(ps_grid_node, ps_mv, pi1_ref_idx, u1_default_ref_id, 0);
+ ps_grid_node++;
+ inc = 1;
+ /* If blk size is 8x8, then every 2 grid nodes are updated with same mv */
+ if(i & 1)
+ inc = jump;
+
+ ps_mv += (mvs_in_blk * inc);
+ pi1_ref_idx += (mvs_in_blk * inc);
+ }
+
+ ps_mv = ps_mv_base + mvs_in_row;
+ pi1_ref_idx = pi1_ref_idx_base + mvs_in_row;
+
+ /* now copy left 16 left mvs */
+ ps_grid_node = &ps_mv_grid->as_node[0];
+ ps_grid_node += (ps_mv_grid->i4_stride);
+ for(i = 0; i < 16; i++)
+ {
+ COPY_MV_TO_SEARCH_NODE(ps_grid_node, ps_mv, pi1_ref_idx, u1_default_ref_id, 0);
+ ps_grid_node += ps_mv_grid->i4_stride;
+ inc = 1;
+ /* If blk size is 8x8, then every 2 grid nodes are updated with same mv */
+ if(!(i & 1))
+ inc = jump;
+
+ ps_mv += (mvs_in_row * inc);
+ pi1_ref_idx += (mvs_in_row * inc);
+ }
+ /* last one set to invalid as bottom left not yet encoded */
+ ps_grid_node->u1_is_avail = 0;
+}
+
+void hme_reset_wkg_mem(buf_mgr_t *ps_buf_mgr)
+{
+ ps_buf_mgr->i4_used = 0;
+}
+void hme_init_wkg_mem(buf_mgr_t *ps_buf_mgr, U08 *pu1_mem, S32 size)
+{
+ ps_buf_mgr->pu1_wkg_mem = pu1_mem;
+ ps_buf_mgr->i4_total = size;
+ hme_reset_wkg_mem(ps_buf_mgr);
+}
+
+void hme_init_mv_grid(mv_grid_t *ps_mv_grid)
+{
+ S32 i, j;
+ search_node_t *ps_search_node;
+ /*************************************************************************/
+ /* We have a 64x64 CTB in the worst case. For this, we have 16x16 4x4 MVs*/
+ /* Additionally, we have 1 neighbour on each side. This makes it a 18x18 */
+ /* MV Grid. The boundary of this Grid on all sides are neighbours and the*/
+ /* left and top edges of this grid is filled run time. The center portion*/
+ /* represents the actual CTB MVs (16x16) and is also filled run time. */
+ /* However, the availability is always set as available (init time) */
+ /*************************************************************************/
+ ps_mv_grid->i4_stride = NUM_COLUMNS_IN_CTB_GRID;
+ ps_mv_grid->i4_start_offset = ps_mv_grid->i4_stride + CTB_MV_GRID_PAD;
+ ps_search_node = &ps_mv_grid->as_node[ps_mv_grid->i4_start_offset];
+ for(i = 0; i < 16; i++)
+ {
+ for(j = 0; j < 16; j++)
+ {
+ ps_search_node[j].u1_is_avail = 1;
+ }
+
+ ps_search_node += ps_mv_grid->i4_stride;
+ }
+}
+/**
+********************************************************************************
+* @fn void hme_pad_left(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
+*
+* @brief Pads horizontally to left side. Each pixel replicated across a line
+*
+* @param[in] pu1_dst : destination pointer. Points to the pixel to be repeated
+*
+* @param[in] stride : stride of destination buffer
+*
+* @param[in] pad_wd : Amt of horizontal padding to be done
+*
+* @param[in] pad_ht : Number of lines for which horizontal padding to be done
+*
+* @return void
+********************************************************************************
+*/
+void hme_pad_left(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
+{
+ S32 i, j;
+ U08 u1_val;
+ for(i = 0; i < pad_ht; i++)
+ {
+ u1_val = pu1_dst[0];
+ for(j = -pad_wd; j < 0; j++)
+ pu1_dst[j] = u1_val;
+
+ pu1_dst += stride;
+ }
+}
+/**
+********************************************************************************
+* @fn void hme_pad_right(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
+*
+* @brief Pads horizontally to rt side. Each pixel replicated across a line
+*
+* @param[in] pu1_dst : destination pointer. Points to the pixel to be repeated
+*
+* @param[in] stride : stride of destination buffer
+*
+* @param[in] pad_wd : Amt of horizontal padding to be done
+*
+* @param[in] pad_ht : Number of lines for which horizontal padding to be done
+*
+* @return void
+********************************************************************************
+*/
+void hme_pad_right(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
+{
+ S32 i, j;
+ U08 u1_val;
+ for(i = 0; i < pad_ht; i++)
+ {
+ u1_val = pu1_dst[0];
+ for(j = 1; j <= pad_wd; j++)
+ pu1_dst[j] = u1_val;
+
+ pu1_dst += stride;
+ }
+}
+/**
+********************************************************************************
+* @fn void hme_pad_top(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
+*
+* @brief Pads vertically on the top. Repeats the top line for top padding
+*
+* @param[in] pu1_dst : destination pointer. Points to the line to be repeated
+*
+* @param[in] stride : stride of destination buffer
+*
+* @param[in] pad_ht : Amt of vertical padding to be done
+*
+* @param[in] pad_wd : Number of columns for which vertical padding to be done
+*
+* @return void
+********************************************************************************
+*/
+void hme_pad_top(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
+{
+ S32 i;
+ for(i = 1; i <= pad_ht; i++)
+ memcpy(pu1_dst - (i * stride), pu1_dst, pad_wd);
+}
+/**
+********************************************************************************
+* @fn void hme_pad_bot(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
+*
+* @brief Pads vertically on the bot. Repeats the top line for top padding
+*
+* @param[in] pu1_dst : destination pointer. Points to the line to be repeated
+*
+* @param[in] stride : stride of destination buffer
+*
+* @param[in] pad_ht : Amt of vertical padding to be done
+*
+* @param[in] pad_wd : Number of columns for which vertical padding to be done
+*
+* @return void
+********************************************************************************
+*/
+void hme_pad_bot(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
+{
+ S32 i;
+ for(i = 1; i <= pad_ht; i++)
+ memcpy(pu1_dst + (i * stride), pu1_dst, pad_wd);
+}
+
+/**
+********************************************************************************
+* @fn void hme_get_wt_inp(layer_ctxt_t *ps_curr_layer, S32 pos_x,
+* S32 pos_y, S32 size)
+*
+* @brief Does weighting of the input in case the search needs to happen
+* with reference frames weighted
+*
+* @param[in] ps_curr_layer: layer ctxt
+*
+* @param[in] pos_x : x coordinate of the input blk in the picture
+*
+* @param[in] pos_y : y coordinate of hte input blk in the picture
+*
+* @param[in] size : size of the input block
+*
+* @param[in] num_ref : Number of reference frames
+*
+* @return void
+********************************************************************************
+*/
+void hme_get_wt_inp(
+ layer_ctxt_t *ps_curr_layer,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ S32 dst_stride,
+ S32 pos_x,
+ S32 pos_y,
+ S32 size,
+ S32 num_ref,
+ U08 u1_is_wt_pred_on)
+{
+ S32 ref, i, j;
+ U08 *pu1_src, *pu1_dst, *pu1_src_tmp;
+ S32 log_wdc = ps_wt_inp_prms->wpred_log_wdc;
+ S32 x_count, y_count;
+
+ /* Fixed source */
+ pu1_src = ps_curr_layer->pu1_inp;
+
+ /* Make sure the start positions of block are inside frame limits */
+ pos_x = MIN(pos_x, ps_curr_layer->i4_wd - 1);
+ pos_y = MIN(pos_y, ps_curr_layer->i4_ht - 1);
+
+ pu1_src += (pos_x + (pos_y * ps_curr_layer->i4_inp_stride));
+
+ /* In case we handle imcomplete CTBs, we copy only as much as reqd */
+ /* from input buffers to prevent out of bound accesses. In this */
+ /* case, we do padding in x or y or both dirns */
+ x_count = MIN(size, (ps_curr_layer->i4_wd - pos_x));
+ y_count = MIN(size, (ps_curr_layer->i4_ht - pos_y));
+
+ for(i = 0; i < num_ref + 1; i++)
+ {
+ ps_wt_inp_prms->apu1_wt_inp[i] = ps_wt_inp_prms->apu1_wt_inp_buf_array[num_ref];
+ }
+
+ /* Run thro all ref ids */
+ for(ref = 0; ref < num_ref + 1; ref++)
+ {
+ S32 wt, off;
+ S32 inv_wt;
+
+ pu1_src_tmp = pu1_src;
+
+ /* Each ref id may have differnet wt/offset. */
+ /* So we have unique inp buf for each ref id */
+ pu1_dst = ps_wt_inp_prms->apu1_wt_inp[ref];
+
+ if(ref == num_ref)
+ {
+ /* last ref will be non weighted input */
+ for(i = 0; i < y_count; i++)
+ {
+ for(j = 0; j < x_count; j++)
+ {
+ pu1_dst[j] = pu1_src_tmp[j];
+ }
+ pu1_src_tmp += ps_curr_layer->i4_inp_stride;
+ pu1_dst += dst_stride;
+ }
+ }
+ else
+ {
+ /* Wt and off specific to this ref id */
+ wt = ps_wt_inp_prms->a_wpred_wt[ref];
+ inv_wt = ps_wt_inp_prms->a_inv_wpred_wt[ref];
+ off = ps_wt_inp_prms->a_wpred_off[ref];
+
+ /* Generate size*size worth of modified input samples */
+ for(i = 0; i < y_count; i++)
+ {
+ for(j = 0; j < x_count; j++)
+ {
+ S32 tmp;
+
+ /* Since we scale input, we use inverse transform of wt pred */
+ //tmp = HME_INV_WT_PRED(pu1_src_tmp[j], wt, off, log_wdc);
+ tmp = HME_INV_WT_PRED1(pu1_src_tmp[j], inv_wt, off, log_wdc);
+ pu1_dst[j] = (U08)(HME_CLIP(tmp, 0, 255));
+ }
+ pu1_src_tmp += ps_curr_layer->i4_inp_stride;
+ pu1_dst += dst_stride;
+ }
+ }
+
+ /* Check and do padding in right direction if need be */
+ pu1_dst = ps_wt_inp_prms->apu1_wt_inp[ref];
+ if(x_count != size)
+ {
+ hme_pad_right(pu1_dst + x_count - 1, dst_stride, size - x_count, y_count);
+ }
+
+ /* Check and do padding in bottom directino if need be */
+ if(y_count != size)
+ {
+ hme_pad_bot(pu1_dst + (y_count - 1) * dst_stride, dst_stride, size - y_count, size);
+ }
+ }
+}
+/**
+****************************************************************************************
+* @fn hme_pick_best_pu_cand(pu_result_t *ps_pu_results_dst,
+* pu_result_t *ps_pu_results_inp,
+* UWORD8 u1_num_results_per_part,
+* UWORD8 u1_num_best_cand)
+*
+* @brief Does the candidate evaluation across all the current candidates and returns
+* the best two or one candidates across given lists
+*
+* @param[in] - ps_pu_results_inp : Pointer to the input candidates
+* - u1_num_results_per_part: Number of available candidates
+*
+* @param[out] - ps_pu_results_dst : Pointer to best PU results
+*
+****************************************************************************************
+*/
+void hme_pick_best_pu_cand(
+ pu_result_t *ps_pu_results_dst,
+ pu_result_t *ps_pu_results_list0,
+ pu_result_t *ps_pu_results_list1,
+ UWORD8 u1_num_results_per_part_l0,
+ UWORD8 u1_num_results_per_part_l1,
+ UWORD8 u1_candidate_rank)
+{
+ struct cand_pos_data
+ {
+ U08 u1_cand_list_id;
+
+ U08 u1_cand_id_in_cand_list;
+ } as_cand_pos_data[MAX_NUM_RESULTS_PER_PART_LIST << 1];
+
+ S32 ai4_costs[MAX_NUM_RESULTS_PER_PART_LIST << 1];
+ U08 i, j;
+
+ for(i = 0; i < u1_num_results_per_part_l0; i++)
+ {
+ ai4_costs[i] = ps_pu_results_list0[i].i4_tot_cost;
+ as_cand_pos_data[i].u1_cand_id_in_cand_list = i;
+ as_cand_pos_data[i].u1_cand_list_id = 0;
+ }
+
+ for(i = 0, j = u1_num_results_per_part_l0; i < u1_num_results_per_part_l1; i++, j++)
+ {
+ ai4_costs[j] = ps_pu_results_list1[i].i4_tot_cost;
+ as_cand_pos_data[j].u1_cand_id_in_cand_list = i;
+ as_cand_pos_data[j].u1_cand_list_id = 1;
+ }
+
+ SORT_PRIMARY_INTTYPE_ARRAY_AND_REORDER_GENERIC_COMPANION_ARRAY(
+ ai4_costs,
+ as_cand_pos_data,
+ u1_num_results_per_part_l0 + u1_num_results_per_part_l1,
+ struct cand_pos_data);
+
+ if(as_cand_pos_data[u1_candidate_rank].u1_cand_list_id)
+ {
+ ps_pu_results_dst[0] =
+ ps_pu_results_list1[as_cand_pos_data[u1_candidate_rank].u1_cand_id_in_cand_list];
+ }
+ else
+ {
+ ps_pu_results_dst[0] =
+ ps_pu_results_list0[as_cand_pos_data[u1_candidate_rank].u1_cand_id_in_cand_list];
+ }
+}
+
+/* Returns the number of candidates */
+static S32 hme_tu_recur_cand_harvester(
+ part_type_results_t *ps_cand_container,
+ inter_pu_results_t *ps_pu_data,
+ inter_ctb_prms_t *ps_inter_ctb_prms,
+ S32 i4_part_mask)
+{
+ part_type_results_t s_cand_data;
+
+ U08 i, j;
+ PART_ID_T e_part_id;
+
+ S32 i4_num_cands = 0;
+
+ /* 2Nx2N part_type decision part */
+ if(i4_part_mask & ENABLE_2Nx2N)
+ {
+ U08 u1_num_candt_to_pick;
+
+ e_part_id = ge_part_type_to_part_id[PRT_2Nx2N][0];
+
+ ASSERT(ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands >= 1);
+
+ if(!ps_inter_ctb_prms->i4_bidir_enabled || (i4_part_mask == ENABLE_2Nx2N))
+ {
+ u1_num_candt_to_pick =
+ MIN(ps_inter_ctb_prms->u1_max_2nx2n_tu_recur_cands,
+ ps_pu_data->u1_num_results_per_part_l0[e_part_id] +
+ ps_pu_data->u1_num_results_per_part_l1[e_part_id]);
+ }
+ else
+ {
+ u1_num_candt_to_pick =
+ MIN(1,
+ ps_pu_data->u1_num_results_per_part_l0[e_part_id] +
+ ps_pu_data->u1_num_results_per_part_l1[e_part_id]);
+ }
+
+ if(ME_XTREME_SPEED_25 == ps_inter_ctb_prms->i1_quality_preset)
+ {
+ u1_num_candt_to_pick = MIN(u1_num_candt_to_pick, MAX_NUM_TU_RECUR_CANDS_IN_XS25);
+ }
+
+ for(i = 0; i < u1_num_candt_to_pick; i++)
+ {
+ /* Picks the best two candidates of all the available ones */
+ hme_pick_best_pu_cand(
+ ps_cand_container[i4_num_cands].as_pu_results,
+ ps_pu_data->aps_pu_results[0][e_part_id],
+ ps_pu_data->aps_pu_results[1][e_part_id],
+ ps_pu_data->u1_num_results_per_part_l0[e_part_id],
+ ps_pu_data->u1_num_results_per_part_l1[e_part_id],
+ i);
+
+ /* Update the other params part_type and total_cost in part_type_results */
+ ps_cand_container[i4_num_cands].u1_part_type = e_part_id;
+ ps_cand_container[i4_num_cands].i4_tot_cost =
+ ps_cand_container[i4_num_cands].as_pu_results->i4_tot_cost;
+
+ i4_num_cands++;
+ }
+ }
+
+ /* SMP */
+ {
+ S32 i4_total_cost;
+
+ S32 num_part_types = PRT_Nx2N - PRT_2NxN + 1;
+ S32 start_part_type = PRT_2NxN;
+ S32 best_cost = MAX_32BIT_VAL;
+ S32 part_type_cnt = 0;
+
+ for(j = 0; j < num_part_types; j++)
+ {
+ if(!(i4_part_mask & gai4_part_type_to_part_mask[j + start_part_type]))
+ {
+ continue;
+ }
+
+ for(i = 0; i < gau1_num_parts_in_part_type[j + start_part_type]; i++)
+ {
+ e_part_id = ge_part_type_to_part_id[j + start_part_type][i];
+
+ /* Pick the best candidate for the partition acroos lists */
+ hme_pick_best_pu_cand(
+ &s_cand_data.as_pu_results[i],
+ ps_pu_data->aps_pu_results[0][e_part_id],
+ ps_pu_data->aps_pu_results[1][e_part_id],
+ ps_pu_data->u1_num_results_per_part_l0[e_part_id],
+ ps_pu_data->u1_num_results_per_part_l1[e_part_id],
+ 0);
+ }
+
+ i4_total_cost =
+ s_cand_data.as_pu_results[0].i4_tot_cost + s_cand_data.as_pu_results[1].i4_tot_cost;
+
+ if(i4_total_cost < best_cost)
+ {
+ /* Stores the index of the best part_type in the sub-catoegory */
+ best_cost = i4_total_cost;
+
+ ps_cand_container[i4_num_cands] = s_cand_data;
+
+ ps_cand_container[i4_num_cands].u1_part_type = j + start_part_type;
+ ps_cand_container[i4_num_cands].i4_tot_cost = i4_total_cost;
+ }
+
+ part_type_cnt++;
+ }
+
+ i4_num_cands = (part_type_cnt) ? (i4_num_cands + 1) : i4_num_cands;
+ }
+
+ /* AMP */
+ {
+ S32 i4_total_cost;
+
+ S32 num_part_types = PRT_nRx2N - PRT_2NxnU + 1;
+ S32 start_part_type = PRT_2NxnU;
+ S32 best_cost = MAX_32BIT_VAL;
+ S32 part_type_cnt = 0;
+
+ for(j = 0; j < num_part_types; j++)
+ {
+ if(!(i4_part_mask & gai4_part_type_to_part_mask[j + start_part_type]))
+ {
+ continue;
+ }
+
+ for(i = 0; i < gau1_num_parts_in_part_type[j + start_part_type]; i++)
+ {
+ e_part_id = ge_part_type_to_part_id[j + start_part_type][i];
+
+ /* Pick the best candidate for the partition acroos lists */
+ hme_pick_best_pu_cand(
+ &s_cand_data.as_pu_results[i],
+ ps_pu_data->aps_pu_results[0][e_part_id],
+ ps_pu_data->aps_pu_results[1][e_part_id],
+ ps_pu_data->u1_num_results_per_part_l0[e_part_id],
+ ps_pu_data->u1_num_results_per_part_l1[e_part_id],
+ 0);
+ }
+
+ i4_total_cost =
+ s_cand_data.as_pu_results[0].i4_tot_cost + s_cand_data.as_pu_results[1].i4_tot_cost;
+
+ if(i4_total_cost < best_cost)
+ {
+ /* Stores the index of the best part_type in the sub-catoegory */
+ best_cost = i4_total_cost;
+
+ ps_cand_container[i4_num_cands] = s_cand_data;
+
+ ps_cand_container[i4_num_cands].u1_part_type = j + start_part_type;
+ ps_cand_container[i4_num_cands].i4_tot_cost = i4_total_cost;
+ }
+
+ part_type_cnt++;
+ }
+
+ i4_num_cands = (part_type_cnt) ? (i4_num_cands + 1) : i4_num_cands;
+ }
+
+ return i4_num_cands;
+}
+
+/**
+*****************************************************************************
+* @fn hme_decide_part_types(search_results_t *ps_search_results)
+*
+* @brief Does uni/bi evaluation accross various partition types,
+* decides best inter partition types for the CU, compares
+* intra cost and decides the best K results for the CU
+*
+* This is called post subpel refinmenent for 16x16s, 8x8s and
+* for post merge evaluation for 32x32,64x64 CUs
+*
+* @param[in,out] ps_search_results : Search results data structure
+* - In : 2 lists of upto 2mvs & refids, active partition mask
+* - Out: Best results for final rdo evaluation of the cu
+*
+* @param[in] ps_subpel_prms : Sub pel params data structure
+*
+*
+* @par Description
+* --------------------------------------------------------------------------------
+* Flow:
+* for each category (SMP,AMP,2Nx2N based on part mask)
+* {
+* for each part_type
+* {
+* for each part
+* pick best candidate from each list
+* combine uni part type
+* update best results for part type
+* }
+* pick the best part type for given category (for SMP & AMP)
+* }
+* ||
+* ||
+* \/
+* Bi-Pred evaluation:
+* for upto 4 best part types
+* {
+* for each part
+* {
+* compute fixed size had for all uni and remember coeffs
+* compute bisatd
+* uni vs bi and gives upto two results
+* also gives the pt level pred buffer
+* }
+* }
+* ||
+* ||
+* \/
+* select X candidates for tu recursion as per the Note below
+* tu_rec_on_part_type (reuse transform coeffs)
+* ||
+* ||
+* \/
+* insert intra nodes at appropriate result id
+* ||
+* ||
+* \/
+* populate y best resuls for rdo based on preset
+*
+* Note :
+* number of TU rec for P pics : 2 2nx2n + 1 smp + 1 amp for ms or 9 for hq
+* number of TU rec for B pics : 1 2nx2n + 1 smp + 1 amp for ms or 2 uni 2nx2n + 1 smp + 1 amp for ms or 9 for hq
+* --------------------------------------------------------------------------------
+*
+* @return None
+********************************************************************************
+*/
+void hme_decide_part_types(
+ inter_cu_results_t *ps_cu_results,
+ inter_pu_results_t *ps_pu_results,
+ inter_ctb_prms_t *ps_inter_ctb_prms,
+ me_frm_ctxt_t *ps_ctxt,
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
+ ihevce_me_optimised_function_list_t *ps_me_optimised_function_list
+
+)
+{
+ S32 i, j;
+ S32 i4_part_mask;
+ ULWORD64 au8_pred_sigmaXSquare[NUM_BEST_ME_OUTPUTS][NUM_INTER_PU_PARTS];
+ ULWORD64 au8_pred_sigmaX[NUM_BEST_ME_OUTPUTS][NUM_INTER_PU_PARTS];
+ S32 i4_noise_term;
+ WORD32 e_part_id;
+
+ PF_SAD_FXN_TU_REC apf_err_compute[4];
+
+ part_type_results_t as_part_type_results[NUM_BEST_ME_OUTPUTS];
+ part_type_results_t *ps_part_type_results;
+
+ S32 num_best_cand = 0;
+ const S32 i4_default_src_wt = ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
+
+ i4_part_mask = ps_cu_results->i4_part_mask;
+
+ num_best_cand = hme_tu_recur_cand_harvester(
+ as_part_type_results, ps_pu_results, ps_inter_ctb_prms, i4_part_mask);
+
+ /* Partition ID for the current PU */
+ e_part_id = (UWORD8)ge_part_type_to_part_id[PRT_2Nx2N][0];
+
+ ps_part_type_results = as_part_type_results;
+ for(i = 0; i < num_best_cand; i++)
+ {
+ hme_compute_pred_and_evaluate_bi(
+ ps_cu_results,
+ ps_pu_results,
+ ps_inter_ctb_prms,
+ &(ps_part_type_results[i]),
+ au8_pred_sigmaXSquare[i],
+ au8_pred_sigmaX[i],
+ ps_cmn_utils_optimised_function_list,
+ ps_me_optimised_function_list
+
+ );
+ }
+ /* Perform TU_REC on the best candidates selected */
+ {
+ WORD32 i4_sad_grid;
+ WORD32 ai4_tu_split_flag[4];
+ WORD32 ai4_tu_early_cbf[4];
+
+ WORD32 best_cost[NUM_BEST_ME_OUTPUTS];
+ WORD32 ai4_final_idx[NUM_BEST_ME_OUTPUTS];
+ WORD16 i2_wght;
+ WORD32 i4_satd;
+
+ err_prms_t s_err_prms;
+ err_prms_t *ps_err_prms = &s_err_prms;
+
+ /* Default cost and final idx initialization */
+ for(i = 0; i < num_best_cand; i++)
+ {
+ best_cost[i] = MAX_32BIT_VAL;
+ ai4_final_idx[i] = -1;
+ }
+
+ /* Assign the stad function to the err_compute function pointer :
+ Implemented only for 32x32 and 64x64, hence 16x16 and 8x8 are kept NULL */
+ apf_err_compute[CU_64x64] = hme_evalsatd_pt_pu_64x64_tu_rec;
+ apf_err_compute[CU_32x32] = hme_evalsatd_pt_pu_32x32_tu_rec;
+ apf_err_compute[CU_16x16] = hme_evalsatd_pt_pu_16x16_tu_rec;
+ apf_err_compute[CU_8x8] = hme_evalsatd_pt_pu_8x8_tu_rec;
+
+ ps_err_prms->pi4_sad_grid = &i4_sad_grid;
+ ps_err_prms->pi4_tu_split_flags = ai4_tu_split_flag;
+ ps_err_prms->u1_max_tr_depth = ps_inter_ctb_prms->u1_max_tr_depth;
+ ps_err_prms->pi4_tu_early_cbf = ai4_tu_early_cbf;
+ ps_err_prms->i4_grid_mask = 1;
+ ps_err_prms->pu1_wkg_mem = ps_inter_ctb_prms->pu1_wkg_mem;
+ ps_err_prms->u1_max_tr_size = 32;
+
+ if(ps_inter_ctb_prms->u1_is_cu_noisy)
+ {
+ ps_err_prms->u1_max_tr_size = MAX_TU_SIZE_WHEN_NOISY;
+ }
+
+ /* TU_REC for the best candidates, as mentioned in NOTE above (except candidates that
+ are disabled by Part_mask */
+ for(i = 0; i < num_best_cand; i++)
+ {
+ part_type_results_t *ps_best_results;
+ pu_result_t *ps_pu_result;
+ WORD32 part_type_cost;
+ WORD32 cand_idx;
+
+ WORD32 pred_dir;
+ S32 i4_inp_off;
+
+ S32 lambda;
+ U08 lambda_qshift;
+ U08 *apu1_inp[MAX_NUM_INTER_PARTS];
+ S16 ai2_wt[MAX_NUM_INTER_PARTS];
+ S32 ai4_inv_wt[MAX_NUM_INTER_PARTS];
+ S32 ai4_inv_wt_shift_val[MAX_NUM_INTER_PARTS];
+
+ WORD32 part_type = ps_part_type_results[i].u1_part_type;
+ WORD32 e_cu_size = ps_cu_results->u1_cu_size;
+ WORD32 e_blk_size = ge_cu_size_to_blk_size[e_cu_size];
+ U08 u1_num_parts = gau1_num_parts_in_part_type[part_type];
+ U08 u1_inp_buf_idx = UCHAR_MAX;
+
+ ps_err_prms->i4_part_mask = i4_part_mask;
+ ps_err_prms->i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
+ ps_err_prms->i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
+ ps_err_prms->pu1_ref = ps_part_type_results[i].pu1_pred;
+ ps_err_prms->i4_ref_stride = ps_part_type_results[i].i4_pred_stride;
+
+ /* Current offset for the present part type */
+ i4_inp_off = ps_cu_results->i4_inp_offset;
+
+ ps_best_results = &(ps_part_type_results[i]);
+
+ part_type_cost = 0;
+ lambda = ps_inter_ctb_prms->i4_lamda;
+ lambda_qshift = ps_inter_ctb_prms->u1_lamda_qshift;
+
+ for(j = 0; j < u1_num_parts; j++)
+ {
+ ps_pu_result = &(ps_best_results->as_pu_results[j]);
+
+ pred_dir = ps_pu_result->pu.b2_pred_mode;
+
+ if(PRED_L0 == pred_dir)
+ {
+ apu1_inp[j] =
+ ps_inter_ctb_prms->apu1_wt_inp[PRED_L0][ps_pu_result->pu.mv.i1_l0_ref_idx] +
+ i4_inp_off;
+ ai2_wt[j] =
+ ps_inter_ctb_prms->pps_rec_list_l0[ps_pu_result->pu.mv.i1_l0_ref_idx]
+ ->s_weight_offset.i2_luma_weight;
+ ai4_inv_wt[j] =
+ ps_inter_ctb_prms->pi4_inv_wt
+ [ps_inter_ctb_prms->pi1_past_list[ps_pu_result->pu.mv.i1_l0_ref_idx]];
+ ai4_inv_wt_shift_val[j] =
+ ps_inter_ctb_prms->pi4_inv_wt_shift_val
+ [ps_inter_ctb_prms->pi1_past_list[ps_pu_result->pu.mv.i1_l0_ref_idx]];
+ }
+ else if(PRED_L1 == pred_dir)
+ {
+ apu1_inp[j] =
+ ps_inter_ctb_prms->apu1_wt_inp[PRED_L1][ps_pu_result->pu.mv.i1_l1_ref_idx] +
+ i4_inp_off;
+ ai2_wt[j] =
+ ps_inter_ctb_prms->pps_rec_list_l1[ps_pu_result->pu.mv.i1_l1_ref_idx]
+ ->s_weight_offset.i2_luma_weight;
+ ai4_inv_wt[j] =
+ ps_inter_ctb_prms->pi4_inv_wt
+ [ps_inter_ctb_prms->pi1_future_list[ps_pu_result->pu.mv.i1_l1_ref_idx]];
+ ai4_inv_wt_shift_val[j] =
+ ps_inter_ctb_prms->pi4_inv_wt_shift_val
+ [ps_inter_ctb_prms->pi1_future_list[ps_pu_result->pu.mv.i1_l1_ref_idx]];
+ }
+ else if(PRED_BI == pred_dir)
+ {
+ apu1_inp[j] = ps_inter_ctb_prms->pu1_non_wt_inp + i4_inp_off;
+ ai2_wt[j] = 1 << ps_inter_ctb_prms->wpred_log_wdc;
+ ai4_inv_wt[j] = i4_default_src_wt;
+ ai4_inv_wt_shift_val[j] = 0;
+ }
+ else
+ {
+ ASSERT(0);
+ }
+
+ part_type_cost += ps_pu_result->i4_mv_cost;
+ }
+
+ if((u1_num_parts == 1) || (ai2_wt[0] == ai2_wt[1]))
+ {
+ ps_err_prms->pu1_inp = apu1_inp[0];
+ ps_err_prms->i4_inp_stride = ps_inter_ctb_prms->i4_inp_stride;
+ i2_wght = ai2_wt[0];
+ }
+ else
+ {
+ if(1 != ihevce_get_free_pred_buf_indices(
+ &u1_inp_buf_idx,
+ &ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator,
+ 1))
+ {
+ ASSERT(0);
+ }
+ else
+ {
+ U08 *pu1_dst =
+ ps_inter_ctb_prms->s_pred_buf_mngr.apu1_pred_bufs[u1_inp_buf_idx];
+ U08 *pu1_src = apu1_inp[0];
+ U08 u1_pu1_wd = (ps_part_type_results[i].as_pu_results[0].pu.b4_wd + 1) << 2;
+ U08 u1_pu1_ht = (ps_part_type_results[i].as_pu_results[0].pu.b4_ht + 1) << 2;
+ U08 u1_pu2_wd = (ps_part_type_results[i].as_pu_results[1].pu.b4_wd + 1) << 2;
+ U08 u1_pu2_ht = (ps_part_type_results[i].as_pu_results[1].pu.b4_ht + 1) << 2;
+
+ ps_cmn_utils_optimised_function_list->pf_copy_2d(
+ pu1_dst,
+ MAX_CU_SIZE,
+ pu1_src,
+ ps_inter_ctb_prms->i4_inp_stride,
+ u1_pu1_wd,
+ u1_pu1_ht);
+
+ pu1_dst +=
+ (gai1_is_part_vertical[ge_part_type_to_part_id[part_type][0]]
+ ? u1_pu1_ht * MAX_CU_SIZE
+ : u1_pu1_wd);
+ pu1_src =
+ apu1_inp[1] + (gai1_is_part_vertical[ge_part_type_to_part_id[part_type][0]]
+ ? u1_pu1_ht * ps_inter_ctb_prms->i4_inp_stride
+ : u1_pu1_wd);
+
+ ps_cmn_utils_optimised_function_list->pf_copy_2d(
+ pu1_dst,
+ MAX_CU_SIZE,
+ pu1_src,
+ ps_inter_ctb_prms->i4_inp_stride,
+ u1_pu2_wd,
+ u1_pu2_ht);
+
+ ps_err_prms->pu1_inp =
+ ps_inter_ctb_prms->s_pred_buf_mngr.apu1_pred_bufs[u1_inp_buf_idx];
+ ps_err_prms->i4_inp_stride = MAX_CU_SIZE;
+ i2_wght = ai2_wt[1];
+ }
+ }
+
+#if !DISABLE_TU_RECURSION
+ i4_satd = apf_err_compute[e_cu_size](
+ ps_err_prms,
+ lambda,
+ lambda_qshift,
+ ps_inter_ctb_prms->i4_qstep_ls8,
+ ps_ctxt->ps_func_selector);
+#else
+ ps_err_prms->pi4_sad_grid = &i4_satd;
+
+ pf_err_compute(ps_err_prms);
+
+ if((part_type == PRT_2Nx2N) || (e_cu_size != CU_64x64))
+ {
+ ai4_tu_split_flag[0] = 1;
+ ai4_tu_split_flag[1] = 1;
+ ai4_tu_split_flag[2] = 1;
+ ai4_tu_split_flag[3] = 1;
+
+ ps_err_prms->i4_tu_split_cost = 0;
+ }
+ else
+ {
+ ai4_tu_split_flag[0] = 1;
+ ai4_tu_split_flag[1] = 1;
+ ai4_tu_split_flag[2] = 1;
+ ai4_tu_split_flag[3] = 1;
+
+ ps_err_prms->i4_tu_split_cost = 0;
+ }
+#endif
+
+#if UNI_SATD_SCALE
+ i4_satd = (i4_satd * i2_wght) >> ps_inter_ctb_prms->wpred_log_wdc;
+#endif
+
+ if(ps_inter_ctb_prms->u1_is_cu_noisy && ps_inter_ctb_prms->i4_alpha_stim_multiplier)
+ {
+ ULWORD64 u8_temp_var, u8_temp_var1, u8_pred_sigmaSquaredX;
+ ULWORD64 u8_src_variance, u8_pred_variance;
+ unsigned long u4_shift_val;
+ S32 i4_bits_req;
+ S32 i4_q_level = STIM_Q_FORMAT + ALPHA_Q_FORMAT;
+
+ if(1 == u1_num_parts)
+ {
+ u8_pred_sigmaSquaredX = au8_pred_sigmaX[i][0] * au8_pred_sigmaX[i][0];
+ u8_pred_variance = au8_pred_sigmaXSquare[i][0] - u8_pred_sigmaSquaredX;
+
+ if(e_cu_size == CU_8x8)
+ {
+ PART_ID_T e_part_id = (PART_ID_T)(
+ (PART_ID_NxN_TL) + (ps_cu_results->u1_x_off & 1) +
+ ((ps_cu_results->u1_y_off & 1) << 1));
+
+ u4_shift_val = ihevce_calc_stim_injected_variance(
+ ps_inter_ctb_prms->pu8_part_src_sigmaX,
+ ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
+ &u8_src_variance,
+ ai4_inv_wt[0],
+ ai4_inv_wt_shift_val[0],
+ ps_inter_ctb_prms->wpred_log_wdc,
+ e_part_id);
+ }
+ else
+ {
+ u4_shift_val = ihevce_calc_stim_injected_variance(
+ ps_inter_ctb_prms->pu8_part_src_sigmaX,
+ ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
+ &u8_src_variance,
+ ai4_inv_wt[0],
+ ai4_inv_wt_shift_val[0],
+ ps_inter_ctb_prms->wpred_log_wdc,
+ e_part_id);
+ }
+
+ u8_pred_variance = u8_pred_variance >> u4_shift_val;
+
+ GETRANGE64(i4_bits_req, u8_pred_variance);
+
+ if(i4_bits_req > 27)
+ {
+ u8_pred_variance = u8_pred_variance >> (i4_bits_req - 27);
+ u8_src_variance = u8_src_variance >> (i4_bits_req - 27);
+ }
+
+ if(u8_src_variance == u8_pred_variance)
+ {
+ u8_temp_var = (1 << STIM_Q_FORMAT);
+ }
+ else
+ {
+ u8_temp_var = (2 * u8_src_variance * u8_pred_variance);
+ u8_temp_var = (u8_temp_var * (1 << STIM_Q_FORMAT));
+ u8_temp_var1 = (u8_src_variance * u8_src_variance) +
+ (u8_pred_variance * u8_pred_variance);
+ u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2));
+ u8_temp_var = (u8_temp_var / u8_temp_var1);
+ }
+
+ i4_noise_term = (UWORD32)u8_temp_var;
+
+ ASSERT(i4_noise_term >= 0);
+
+ i4_noise_term *= ps_inter_ctb_prms->i4_alpha_stim_multiplier;
+
+ u8_temp_var = i4_satd;
+ u8_temp_var *= ((1 << (i4_q_level)) - (i4_noise_term));
+ u8_temp_var += (1 << ((i4_q_level)-1));
+ i4_satd = (UWORD32)(u8_temp_var >> (i4_q_level));
+ }
+ else /*if(e_cu_size <= CU_16x16)*/
+ {
+ unsigned long temp_shift_val;
+ PART_ID_T ae_part_id[MAX_NUM_INTER_PARTS] = {
+ ge_part_type_to_part_id[part_type][0], ge_part_type_to_part_id[part_type][1]
+ };
+
+ u4_shift_val = ihevce_calc_variance_for_diff_weights(
+ ps_inter_ctb_prms->pu8_part_src_sigmaX,
+ ps_inter_ctb_prms->pu8_part_src_sigmaXSquared,
+ &u8_src_variance,
+ ai4_inv_wt,
+ ai4_inv_wt_shift_val,
+ ps_best_results->as_pu_results,
+ ps_inter_ctb_prms->wpred_log_wdc,
+ ae_part_id,
+ gau1_blk_size_to_wd[e_blk_size],
+ u1_num_parts,
+ 1);
+
+ temp_shift_val = u4_shift_val;
+
+ u4_shift_val = ihevce_calc_variance_for_diff_weights(
+ au8_pred_sigmaX[i],
+ au8_pred_sigmaXSquare[i],
+ &u8_pred_variance,
+ ai4_inv_wt,
+ ai4_inv_wt_shift_val,
+ ps_best_results->as_pu_results,
+ 0,
+ ae_part_id,
+ gau1_blk_size_to_wd[e_blk_size],
+ u1_num_parts,
+ 0);
+
+ u8_pred_variance = u8_pred_variance >> temp_shift_val;
+
+ GETRANGE64(i4_bits_req, u8_pred_variance);
+
+ if(i4_bits_req > 27)
+ {
+ u8_pred_variance = u8_pred_variance >> (i4_bits_req - 27);
+ u8_src_variance = u8_src_variance >> (i4_bits_req - 27);
+ }
+
+ if(u8_src_variance == u8_pred_variance)
+ {
+ u8_temp_var = (1 << STIM_Q_FORMAT);
+ }
+ else
+ {
+ u8_temp_var = (2 * u8_src_variance * u8_pred_variance);
+ u8_temp_var = (u8_temp_var * (1 << STIM_Q_FORMAT));
+ u8_temp_var1 = (u8_src_variance * u8_src_variance) +
+ (u8_pred_variance * u8_pred_variance);
+ u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2));
+ u8_temp_var = (u8_temp_var / u8_temp_var1);
+ }
+
+ i4_noise_term = (UWORD32)u8_temp_var;
+
+ ASSERT(i4_noise_term >= 0);
+ ASSERT(i4_noise_term <= (1 << (STIM_Q_FORMAT + ALPHA_Q_FORMAT)));
+
+ i4_noise_term *= ps_inter_ctb_prms->i4_alpha_stim_multiplier;
+
+ u8_temp_var = i4_satd;
+ u8_temp_var *= ((1 << (i4_q_level)) - (i4_noise_term));
+ u8_temp_var += (1 << ((i4_q_level)-1));
+ i4_satd = (UWORD32)(u8_temp_var >> (i4_q_level));
+
+ ASSERT(i4_satd >= 0);
+ }
+ }
+
+ if(u1_inp_buf_idx != UCHAR_MAX)
+ {
+ ihevce_set_pred_buf_as_free(
+ &ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator,
+ u1_inp_buf_idx);
+ }
+
+ part_type_cost += i4_satd;
+
+ /*Update the best results with the new results */
+ ps_best_results->i4_tot_cost = part_type_cost;
+
+ ps_best_results->i4_tu_split_cost = ps_err_prms->i4_tu_split_cost;
+
+ ASSERT(ai4_tu_split_flag[0] >= 0);
+ if(e_cu_size == CU_64x64)
+ {
+ ps_best_results->ai4_tu_split_flag[0] = ai4_tu_split_flag[0];
+ ps_best_results->ai4_tu_split_flag[1] = ai4_tu_split_flag[1];
+ ps_best_results->ai4_tu_split_flag[2] = ai4_tu_split_flag[2];
+ ps_best_results->ai4_tu_split_flag[3] = ai4_tu_split_flag[3];
+
+ /* Update the TU early cbf flags into the best results structure */
+ ps_best_results->ai4_tu_early_cbf[0] = ai4_tu_early_cbf[0];
+ ps_best_results->ai4_tu_early_cbf[1] = ai4_tu_early_cbf[1];
+ ps_best_results->ai4_tu_early_cbf[2] = ai4_tu_early_cbf[2];
+ ps_best_results->ai4_tu_early_cbf[3] = ai4_tu_early_cbf[3];
+ }
+ else
+ {
+ ps_best_results->ai4_tu_split_flag[0] = ai4_tu_split_flag[0];
+ ps_best_results->ai4_tu_early_cbf[0] = ai4_tu_early_cbf[0];
+ }
+
+ if(part_type_cost < best_cost[num_best_cand - 1])
+ {
+ /* Push and sort current part type if it is one of the num_best_cand */
+ for(cand_idx = 0; cand_idx < i; cand_idx++)
+ {
+ if(part_type_cost <= best_cost[cand_idx])
+ {
+ memmove(
+ &ai4_final_idx[cand_idx + 1],
+ &ai4_final_idx[cand_idx],
+ sizeof(WORD32) * (i - cand_idx));
+ memmove(
+ &best_cost[cand_idx + 1],
+ &best_cost[cand_idx],
+ sizeof(WORD32) * (i - cand_idx));
+ break;
+ }
+ }
+
+ ai4_final_idx[cand_idx] = i;
+ best_cost[cand_idx] = part_type_cost;
+ }
+ }
+
+ ps_cu_results->u1_num_best_results = num_best_cand;
+
+ for(i = 0; i < num_best_cand; i++)
+ {
+ ASSERT(ai4_final_idx[i] < num_best_cand);
+
+ if(ai4_final_idx[i] != -1)
+ {
+ memcpy(
+ &(ps_cu_results->ps_best_results[i]),
+ &(ps_part_type_results[ai4_final_idx[i]]),
+ sizeof(part_type_results_t));
+ }
+ }
+ }
+
+ for(i = 0; i < (MAX_NUM_PRED_BUFS_USED_FOR_PARTTYPE_DECISIONS)-2; i++)
+ {
+ ihevce_set_pred_buf_as_free(
+ &ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator, i);
+ }
+}
+
+/**
+**************************************************************************************************
+* @fn hme_populate_pus(search_results_t *ps_search_results, inter_cu_results_t *ps_cu_results)
+*
+* @brief Does the population of the inter_cu_results structure with the results after the
+* subpel refinement
+*
+* This is called post subpel refinmenent for 16x16s, 8x8s and
+* for post merge evaluation for 32x32,64x64 CUs
+*
+* @param[in,out] ps_search_results : Search results data structure
+* - ps_cu_results : cu_results data structure
+* ps_pu_result : Pointer to the memory for storing PU's
+*
+****************************************************************************************************
+*/
+void hme_populate_pus(
+ me_ctxt_t *ps_thrd_ctxt,
+ me_frm_ctxt_t *ps_ctxt,
+ hme_subpel_prms_t *ps_subpel_prms,
+ search_results_t *ps_search_results,
+ inter_cu_results_t *ps_cu_results,
+ inter_pu_results_t *ps_pu_results,
+ pu_result_t *ps_pu_result,
+ inter_ctb_prms_t *ps_inter_ctb_prms,
+ wgt_pred_ctxt_t *ps_wt_prms,
+ layer_ctxt_t *ps_curr_layer,
+ U08 *pu1_pred_dir_searched,
+ WORD32 i4_num_active_ref)
+{
+ WORD32 i, j, k;
+ WORD32 i4_part_mask;
+ WORD32 i4_ref;
+ UWORD8 e_part_id;
+ pu_result_t *ps_curr_pu;
+ search_node_t *ps_search_node;
+ part_attr_t *ps_part_attr;
+ UWORD8 e_cu_size = ps_search_results->e_cu_size;
+ WORD32 num_results_per_part_l0 = 0;
+ WORD32 num_results_per_part_l1 = 0;
+ WORD32 i4_ref_id;
+ WORD32 i4_total_act_ref;
+
+ i4_part_mask = ps_search_results->i4_part_mask;
+
+ /* pred_buf_mngr init */
+ {
+ hme_get_wkg_mem(&ps_ctxt->s_buf_mgr, MAX_WKG_MEM_SIZE_PER_THREAD);
+
+ ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator = UINT_MAX;
+
+ for(i = 0; i < MAX_NUM_PRED_BUFS_USED_FOR_PARTTYPE_DECISIONS - 2; i++)
+ {
+ ps_inter_ctb_prms->s_pred_buf_mngr.apu1_pred_bufs[i] =
+ ps_ctxt->s_buf_mgr.pu1_wkg_mem + i * INTERP_OUT_BUF_SIZE;
+ ps_inter_ctb_prms->s_pred_buf_mngr.u4_pred_buf_usage_indicator &= ~(1 << i);
+ }
+
+ ps_inter_ctb_prms->pu1_wkg_mem = ps_ctxt->s_buf_mgr.pu1_wkg_mem + i * INTERP_OUT_BUF_SIZE;
+ }
+
+ ps_inter_ctb_prms->i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME;
+ ps_inter_ctb_prms->u1_is_cu_noisy = ps_subpel_prms->u1_is_cu_noisy;
+ ps_inter_ctb_prms->i4_lamda = ps_search_results->as_pred_ctxt[0].lambda;
+
+ /* Populate the CU level parameters */
+ ps_cu_results->u1_cu_size = ps_search_results->e_cu_size;
+ ps_cu_results->u1_num_best_results = ps_search_results->u1_num_best_results;
+ ps_cu_results->i4_part_mask = ps_search_results->i4_part_mask;
+ ps_cu_results->u1_x_off = ps_search_results->u1_x_off;
+ ps_cu_results->u1_y_off = ps_search_results->u1_y_off;
+
+ i4_total_act_ref =
+ ps_ctxt->s_frm_prms.u1_num_active_ref_l0 + ps_ctxt->s_frm_prms.u1_num_active_ref_l1;
+ /*Populate the partition results
+ Loop across all the active references that are enabled right now */
+ for(i = 0; i < MAX_PART_TYPES; i++)
+ {
+ if(!(i4_part_mask & gai4_part_type_to_part_mask[i]))
+ {
+ continue;
+ }
+
+ for(j = 0; j < gau1_num_parts_in_part_type[i]; j++)
+ {
+ /* Partition ID for the current PU */
+ e_part_id = (UWORD8)ge_part_type_to_part_id[i][j];
+ ps_part_attr = &gas_part_attr_in_cu[e_part_id];
+
+ num_results_per_part_l0 = 0;
+ num_results_per_part_l1 = 0;
+
+ ps_pu_results->aps_pu_results[0][e_part_id] =
+ ps_pu_result + (e_part_id * MAX_NUM_RESULTS_PER_PART_LIST);
+ ps_pu_results->aps_pu_results[1][e_part_id] =
+ ps_pu_result + ((e_part_id + TOT_NUM_PARTS) * MAX_NUM_RESULTS_PER_PART_LIST);
+
+ for(i4_ref = 0; i4_ref < i4_num_active_ref; i4_ref++)
+ {
+ U08 u1_pred_dir = pu1_pred_dir_searched[i4_ref];
+
+ for(k = 0; k < ps_search_results->u1_num_results_per_part; k++)
+ {
+ ps_search_node =
+ &ps_search_results->aps_part_results[u1_pred_dir][e_part_id][k];
+
+ /* If subpel is done then the node is a valid candidate else break the loop */
+ if(ps_search_node->u1_subpel_done)
+ {
+ i4_ref_id = ps_search_node->i1_ref_idx;
+
+ ASSERT(i4_ref_id >= 0);
+
+ /* Check whether current ref_id is past or future and assign the pointers to L0 or L1 list accordingly */
+ if(!u1_pred_dir)
+ {
+ ps_curr_pu = ps_pu_results->aps_pu_results[0][e_part_id] +
+ num_results_per_part_l0;
+
+ ASSERT(
+ ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id] <
+ ps_inter_ctb_prms->u1_num_active_ref_l0);
+
+ /* Always populate the ref_idx value in l0_ref_idx */
+ ps_curr_pu->pu.mv.i1_l0_ref_idx =
+ ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id];
+ ps_curr_pu->pu.mv.s_l0_mv = ps_search_node->s_mv;
+ ps_curr_pu->pu.mv.i1_l1_ref_idx = -1;
+ ps_curr_pu->pu.b2_pred_mode = PRED_L0;
+
+ ps_inter_ctb_prms->apu1_wt_inp[0][ps_curr_pu->pu.mv.i1_l0_ref_idx] =
+ ps_wt_prms->apu1_wt_inp[i4_ref_id];
+
+ num_results_per_part_l0++;
+ }
+ else
+ {
+ ps_curr_pu = ps_pu_results->aps_pu_results[1][e_part_id] +
+ num_results_per_part_l1;
+
+ ASSERT(
+ ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id] <
+ ps_inter_ctb_prms->u1_num_active_ref_l1);
+
+ /* populate the ref_idx value in l1_ref_idx */
+ ps_curr_pu->pu.mv.i1_l1_ref_idx =
+ ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id];
+ ps_curr_pu->pu.mv.s_l1_mv = ps_search_node->s_mv;
+ ps_curr_pu->pu.mv.i1_l0_ref_idx = -1;
+ ps_curr_pu->pu.b2_pred_mode = PRED_L1;
+
+ /* Copy the values from weighted params to common_frm_aprams */
+ ps_inter_ctb_prms->apu1_wt_inp[1][ps_curr_pu->pu.mv.i1_l1_ref_idx] =
+ ps_wt_prms->apu1_wt_inp[i4_ref_id];
+
+ num_results_per_part_l1++;
+ }
+ ps_curr_pu->i4_mv_cost = ps_search_node->i4_mv_cost;
+ ps_curr_pu->i4_sdi = ps_search_node->i4_sdi;
+
+#if UNI_SATD_SCALE
+ /*SATD is scaled by weight. Hence rescale the SATD */
+ ps_curr_pu->i4_tot_cost =
+ ((ps_search_node->i4_sad *
+ ps_ctxt->s_wt_pred.a_wpred_wt[ps_search_node->i1_ref_idx] +
+ (1 << (ps_inter_ctb_prms->wpred_log_wdc - 1))) >>
+ ps_inter_ctb_prms->wpred_log_wdc) +
+ ps_search_node->i4_mv_cost;
+#endif
+
+ /* Packed format of the width and height */
+ ps_curr_pu->pu.b4_wd = ((ps_part_attr->u1_x_count << e_cu_size) >> 2) - 1;
+ ps_curr_pu->pu.b4_ht = ((ps_part_attr->u1_y_count << e_cu_size) >> 2) - 1;
+
+ ps_curr_pu->pu.b4_pos_x =
+ (((ps_part_attr->u1_x_start << e_cu_size) + ps_cu_results->u1_x_off) >>
+ 2);
+ ps_curr_pu->pu.b4_pos_y =
+ (((ps_part_attr->u1_y_start << e_cu_size) + ps_cu_results->u1_y_off) >>
+ 2);
+
+ ps_curr_pu->pu.b1_intra_flag = 0;
+
+ /* Unweighted input */
+ ps_inter_ctb_prms->pu1_non_wt_inp =
+ ps_wt_prms->apu1_wt_inp[i4_total_act_ref];
+
+ ps_search_node++;
+ }
+ else
+ {
+ break;
+ }
+ }
+ }
+
+ ps_pu_results->u1_num_results_per_part_l0[e_part_id] = num_results_per_part_l0;
+ ps_pu_results->u1_num_results_per_part_l1[e_part_id] = num_results_per_part_l1;
+ }
+ }
+}
+
+/**
+*********************************************************************************************************
+* @fn hme_populate_pus_8x8_cu(search_results_t *ps_search_results, inter_cu_results_t *ps_cu_results)
+*
+* @brief Does the population of the inter_cu_results structure with the results after the
+* subpel refinement
+*
+* This is called post subpel refinmenent for 16x16s, 8x8s and
+* for post merge evaluation for 32x32,64x64 CUs
+*
+* @param[in,out] ps_search_results : Search results data structure
+* - ps_cu_results : cu_results data structure
+* ps_pu_results : Pointer for the PU's
+* ps_pu_result : Pointer to the memory for storing PU's
+*
+*********************************************************************************************************
+*/
+void hme_populate_pus_8x8_cu(
+ me_ctxt_t *ps_thrd_ctxt,
+ me_frm_ctxt_t *ps_ctxt,
+ hme_subpel_prms_t *ps_subpel_prms,
+ search_results_t *ps_search_results,
+ inter_cu_results_t *ps_cu_results,
+ inter_pu_results_t *ps_pu_results,
+ pu_result_t *ps_pu_result,
+ inter_ctb_prms_t *ps_inter_ctb_prms,
+ U08 *pu1_pred_dir_searched,
+ WORD32 i4_num_active_ref,
+ U08 u1_blk_8x8_mask)
+{
+ WORD32 i, k;
+ WORD32 i4_part_mask;
+ WORD32 i4_ref;
+ pu_result_t *ps_curr_pu;
+ search_node_t *ps_search_node;
+ WORD32 i4_ref_id;
+ WORD32 x_off, y_off;
+
+ /* Make part mask available as only 2Nx2N
+ Later support for 4x8 and 8x4 needs to be added */
+ i4_part_mask = ENABLE_2Nx2N;
+
+ x_off = ps_search_results->u1_x_off;
+ y_off = ps_search_results->u1_y_off;
+
+ for(i = 0; i < 4; i++)
+ {
+ if(u1_blk_8x8_mask & (1 << i))
+ {
+ UWORD8 u1_x_pos, u1_y_pos;
+
+ WORD32 num_results_per_part_l0 = 0;
+ WORD32 num_results_per_part_l1 = 0;
+
+ ps_cu_results->u1_cu_size = CU_8x8;
+ ps_cu_results->u1_num_best_results = ps_search_results->u1_num_best_results;
+ ps_cu_results->i4_part_mask = i4_part_mask;
+ ps_cu_results->u1_x_off = x_off + (i & 1) * 8;
+ ps_cu_results->u1_y_off = y_off + (i >> 1) * 8;
+ ps_cu_results->i4_inp_offset = ps_cu_results->u1_x_off + (ps_cu_results->u1_y_off * 64);
+
+ ps_cu_results->ps_best_results[0].i4_tot_cost = MAX_32BIT_VAL;
+ ps_cu_results->ps_best_results[0].i4_tu_split_cost = 0;
+
+ u1_x_pos = ps_cu_results->u1_x_off >> 2;
+ u1_y_pos = ps_cu_results->u1_y_off >> 2;
+
+ if(!(ps_search_results->i4_part_mask & ENABLE_NxN))
+ {
+ ps_curr_pu = &ps_cu_results->ps_best_results[0].as_pu_results[0];
+
+ ps_cu_results->i4_part_mask = 0;
+ ps_cu_results->u1_num_best_results = 0;
+
+ ps_curr_pu->i4_tot_cost = MAX_32BIT_VAL;
+
+ ps_curr_pu->pu.b4_wd = 1;
+ ps_curr_pu->pu.b4_ht = 1;
+ ps_curr_pu->pu.b4_pos_x = u1_x_pos;
+ ps_curr_pu->pu.b4_pos_y = u1_y_pos;
+ ps_cu_results->ps_best_results[0].i4_tu_split_cost = 0;
+
+ ps_cu_results++;
+ ps_pu_results++;
+
+ continue;
+ }
+
+ ps_pu_results->aps_pu_results[0][0] =
+ ps_pu_result + (i * MAX_NUM_RESULTS_PER_PART_LIST);
+ ps_pu_results->aps_pu_results[1][0] =
+ ps_pu_result + ((i + TOT_NUM_PARTS) * MAX_NUM_RESULTS_PER_PART_LIST);
+
+ for(i4_ref = 0; i4_ref < i4_num_active_ref; i4_ref++)
+ {
+ U08 u1_pred_dir = pu1_pred_dir_searched[i4_ref];
+
+ /* Select the NxN partition node for the current ref_idx in the search results*/
+ ps_search_node =
+ ps_search_results->aps_part_results[u1_pred_dir][PART_ID_NxN_TL + i];
+
+ for(k = 0; k < ps_search_results->u1_num_results_per_part; k++)
+ {
+ /* If subpel is done then the node is a valid candidate else break the loop */
+ if((ps_search_node->u1_is_avail) || (ps_search_node->u1_subpel_done))
+ {
+ i4_ref_id = ps_search_node->i1_ref_idx;
+
+ ASSERT(i4_ref_id >= 0);
+
+ if(!u1_pred_dir)
+ {
+ ps_curr_pu =
+ ps_pu_results->aps_pu_results[0][0] + num_results_per_part_l0;
+
+ ASSERT(
+ ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id] <
+ ps_inter_ctb_prms->u1_num_active_ref_l0);
+
+ ps_curr_pu->pu.mv.i1_l0_ref_idx =
+ ps_ctxt->a_ref_idx_lc_to_l0[i4_ref_id];
+ ps_curr_pu->pu.mv.s_l0_mv = ps_search_node->s_mv;
+ ps_curr_pu->pu.mv.i1_l1_ref_idx = -1;
+ ps_curr_pu->pu.b2_pred_mode = PRED_L0;
+
+ num_results_per_part_l0++;
+ }
+ else
+ {
+ ps_curr_pu =
+ ps_pu_results->aps_pu_results[1][0] + num_results_per_part_l1;
+
+ ASSERT(
+ ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id] <
+ ps_inter_ctb_prms->u1_num_active_ref_l1);
+
+ ps_curr_pu->pu.mv.i1_l1_ref_idx =
+ ps_ctxt->a_ref_idx_lc_to_l1[i4_ref_id];
+ ps_curr_pu->pu.mv.s_l1_mv = ps_search_node->s_mv;
+ ps_curr_pu->pu.mv.i1_l0_ref_idx = -1;
+ ps_curr_pu->pu.b2_pred_mode = PRED_L1;
+
+ num_results_per_part_l1++;
+ }
+ ps_curr_pu->i4_mv_cost = ps_search_node->i4_mv_cost;
+ ps_curr_pu->i4_sdi = ps_search_node->i4_sdi;
+
+#if UNI_SATD_SCALE
+ /*SATD is scaled by weight. Hence rescale the SATD */
+ ps_curr_pu->i4_tot_cost =
+ ((ps_search_node->i4_sad *
+ ps_ctxt->s_wt_pred.a_wpred_wt[ps_search_node->i1_ref_idx] +
+ (1 << (ps_inter_ctb_prms->wpred_log_wdc - 1))) >>
+ ps_inter_ctb_prms->wpred_log_wdc) +
+ ps_search_node->i4_mv_cost;
+#endif
+
+ ps_curr_pu->pu.b4_wd = 1;
+ ps_curr_pu->pu.b4_ht = 1;
+ ps_curr_pu->pu.b4_pos_x = u1_x_pos;
+ ps_curr_pu->pu.b4_pos_y = u1_y_pos;
+ ps_curr_pu->pu.b1_intra_flag = 0;
+
+ ps_search_node++;
+ }
+ else
+ {
+ /* if NxN was not evaluated at 16x16 level, assign max cost to 8x8 CU
+ to remove 8x8's as possible candidates during evaluation */
+
+ ps_curr_pu = ps_pu_results->aps_pu_results[0][0] + num_results_per_part_l0;
+
+ ps_curr_pu->i4_tot_cost = MAX_32BIT_VAL;
+
+ ps_curr_pu = ps_pu_results->aps_pu_results[1][0] + num_results_per_part_l1;
+
+ ps_curr_pu->i4_tot_cost = MAX_32BIT_VAL;
+
+ break;
+ }
+ }
+ }
+
+ /* Update the num_results per_part across lists L0 and L1 */
+ ps_pu_results->u1_num_results_per_part_l0[0] = num_results_per_part_l0;
+ ps_pu_results->u1_num_results_per_part_l1[0] = num_results_per_part_l1;
+ }
+ ps_cu_results++;
+ ps_pu_results++;
+ }
+}
+
+/**
+********************************************************************************
+* @fn hme_insert_intra_nodes_post_bipred
+*
+* @brief Compares intra costs (populated by IPE) with the best inter costs
+* (populated after evaluating bi-pred) and updates the best results
+* if intra cost is better
+*
+* @param[in,out] ps_cu_results [inout] : Best results structure of CU
+* ps_cur_ipe_ctb [in] : intra results for the current CTB
+* i4_frm_qstep [in] : current frame quantizer(qscale)*
+*
+* @return None
+********************************************************************************
+*/
+void hme_insert_intra_nodes_post_bipred(
+ inter_cu_results_t *ps_cu_results,
+ ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
+ WORD32 i4_frm_qstep)
+{
+ WORD32 i;
+ WORD32 num_results;
+ WORD32 cu_size = ps_cu_results->u1_cu_size;
+ UWORD8 u1_x_off = ps_cu_results->u1_x_off;
+ UWORD8 u1_y_off = ps_cu_results->u1_y_off;
+
+ /* Id of the 32x32 block, 16x16 block in a CTB */
+ WORD32 i4_32x32_id = (u1_y_off >> 5) * 2 + (u1_x_off >> 5);
+ WORD32 i4_16x16_id = ((u1_y_off >> 4) & 0x1) * 2 + ((u1_x_off >> 4) & 0x1);
+
+ /* Flags to indicate if intra64/intra32/intra16 cusize are invalid as per IPE decision */
+ WORD32 disable_intra64 = 0;
+ WORD32 disable_intra32 = 0;
+ WORD32 disable_intra16 = 0;
+
+ S32 i4_intra_2nx2n_cost;
+
+ /* ME final results for this CU (post seeding of best uni/bi pred results) */
+ part_type_results_t *ps_best_result;
+
+ i4_frm_qstep *= !L0ME_IN_OPENLOOP_MODE;
+
+ /*If inter candidates are enabled then enter the for loop to update the intra candidate */
+
+ if((ps_cu_results->u1_num_best_results == 0) && (CU_8x8 == ps_cu_results->u1_cu_size))
+ {
+ ps_cu_results->u1_num_best_results = 1;
+ }
+
+ num_results = ps_cu_results->u1_num_best_results;
+
+ ps_best_result = &ps_cu_results->ps_best_results[0];
+
+ /* Disable intra16/32/64 flags based on split flags recommended by IPE */
+ if(ps_cur_ipe_ctb->u1_split_flag)
+ {
+ disable_intra64 = 1;
+ if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id].b1_split_flag)
+ {
+ disable_intra32 = 1;
+
+ if(ps_cur_ipe_ctb->as_intra32_analyse[i4_32x32_id]
+ .as_intra16_analyse[i4_16x16_id]
+ .b1_split_flag)
+ {
+ disable_intra16 = 1;
+ }
+ }
+ }
+
+ /* Derive the intra cost based on current cu size and offset */
+ switch(cu_size)
+ {
+ case CU_8x8:
+ {
+ i4_intra_2nx2n_cost = ps_cur_ipe_ctb->ai4_best8x8_intra_cost[u1_y_off + (u1_x_off >> 3)];
+
+ /* Accounting for coding noise in the open loop IPE cost */
+ i4_intra_2nx2n_cost +=
+ ((i4_frm_qstep * 16) >> 2) /*+ ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */;
+
+ break;
+ }
+
+ case CU_16x16:
+ {
+ i4_intra_2nx2n_cost =
+ ps_cur_ipe_ctb->ai4_best16x16_intra_cost[(u1_y_off >> 4) * 4 + (u1_x_off >> 4)];
+
+ /* Accounting for coding noise in the open loop IPE cost */
+ i4_intra_2nx2n_cost +=
+ ((i4_frm_qstep * 16)); /* + ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */
+
+ if(disable_intra16)
+ {
+ /* Disable intra 2Nx2N (intra 16) as IPE suggested best mode as 8x8 */
+ i4_intra_2nx2n_cost = MAX_32BIT_VAL;
+ }
+ break;
+ }
+
+ case CU_32x32:
+ {
+ i4_intra_2nx2n_cost =
+ ps_cur_ipe_ctb->ai4_best32x32_intra_cost[(u1_y_off >> 5) * 2 + (u1_x_off >> 5)];
+
+ /* Accounting for coding noise in the open loop IPE cost */
+ i4_intra_2nx2n_cost +=
+ (i4_frm_qstep * 16 * 4) /* + ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */;
+
+ if(disable_intra32)
+ {
+ /* Disable intra 2Nx2N (intra 32) as IPE suggested best mode as 16x16 or 8x8 */
+ i4_intra_2nx2n_cost = MAX_32BIT_VAL;
+ }
+ break;
+ }
+
+ case CU_64x64:
+ {
+ i4_intra_2nx2n_cost = ps_cur_ipe_ctb->i4_best64x64_intra_cost;
+
+ /* Accounting for coding noise in the open loop IPE cost */
+ i4_intra_2nx2n_cost +=
+ (i4_frm_qstep * 16 * 16) /* + ((i4_frm_qstep*i4_intra_2nx2n_cost)/256) */;
+
+ if(disable_intra64)
+ {
+ /* Disable intra 2Nx2N (intra 64) as IPE suggested best mode as 32x32 /16x16 / 8x8 */
+ i4_intra_2nx2n_cost = MAX_32BIT_VAL;
+ }
+ break;
+ }
+
+ default:
+ ASSERT(0);
+ }
+
+ {
+ /*****************************************************************/
+ /* Intra / Inter cost comparison for 2Nx2N : cu size 8/16/32/64 */
+ /* Identify where the current result isto be placed. Basically */
+ /* find the node which has cost just higher than node under test */
+ /*****************************************************************/
+ for(i = 0; i < num_results; i++)
+ {
+ /* Subtrqact the tu_spli_flag_cost from total_inter_cost for fair comparision */
+ WORD32 inter_cost = ps_best_result[i].i4_tot_cost - ps_best_result[i].i4_tu_split_cost;
+
+ if(i4_intra_2nx2n_cost < inter_cost)
+ {
+ if(i < (num_results - 1))
+ {
+ memmove(
+ ps_best_result + i + 1,
+ ps_best_result + i,
+ sizeof(ps_best_result[0]) * (num_results - 1 - i));
+ }
+
+ /* Insert the intra node result */
+ ps_best_result[i].u1_part_type = PRT_2Nx2N;
+ ps_best_result[i].i4_tot_cost = i4_intra_2nx2n_cost;
+ ps_best_result[i].ai4_tu_split_flag[0] = 0;
+ ps_best_result[i].ai4_tu_split_flag[1] = 0;
+ ps_best_result[i].ai4_tu_split_flag[2] = 0;
+ ps_best_result[i].ai4_tu_split_flag[3] = 0;
+
+ /* Populate intra flag, cost and default mvs, refidx for intra pu */
+ ps_best_result[i].as_pu_results[0].i4_tot_cost = i4_intra_2nx2n_cost;
+ //ps_best_result[i].as_pu_results[0].i4_sad = i4_intra_2nx2n_cost;
+ ps_best_result[i].as_pu_results[0].i4_mv_cost = 0;
+ ps_best_result[i].as_pu_results[0].pu.b1_intra_flag = 1;
+ ps_best_result[i].as_pu_results[0].pu.mv.i1_l0_ref_idx = -1;
+ ps_best_result[i].as_pu_results[0].pu.mv.i1_l1_ref_idx = -1;
+ ps_best_result[i].as_pu_results[0].pu.mv.s_l0_mv.i2_mvx = INTRA_MV;
+ ps_best_result[i].as_pu_results[0].pu.mv.s_l0_mv.i2_mvy = INTRA_MV;
+ ps_best_result[i].as_pu_results[0].pu.mv.s_l1_mv.i2_mvx = INTRA_MV;
+ ps_best_result[i].as_pu_results[0].pu.mv.s_l1_mv.i2_mvy = INTRA_MV;
+
+ break;
+ }
+ }
+ }
+}
+
+S32 hme_recompute_lambda_from_min_8x8_act_in_ctb(
+ me_frm_ctxt_t *ps_ctxt, ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb)
+{
+ double lambda;
+ double lambda_modifier;
+ WORD32 i4_cu_qp;
+ frm_lambda_ctxt_t *ps_frm_lambda_ctxt;
+ //ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
+ WORD32 i4_frame_qp;
+ rc_quant_t *ps_rc_quant_ctxt;
+ WORD32 i4_is_bpic;
+
+ ps_frm_lambda_ctxt = &ps_ctxt->s_frm_lambda_ctxt;
+ //ps_cur_ipe_ctb = ps_ctxt->ps_ipe_l0_ctb_frm_base;
+ i4_frame_qp = ps_ctxt->s_frm_prms.i4_frame_qp;
+ ps_rc_quant_ctxt = ps_ctxt->ps_rc_quant_ctxt;
+ i4_is_bpic = ps_ctxt->s_frm_prms.bidir_enabled;
+
+ i4_cu_qp = ps_rc_quant_ctxt->pi4_qp_to_qscale[i4_frame_qp + ps_rc_quant_ctxt->i1_qp_offset];
+
+ {
+ if(ps_ctxt->i4_l0me_qp_mod)
+ {
+#if MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON
+#if LAMDA_BASED_ON_QUANT
+ WORD32 i4_activity = ps_cur_ipe_ctb->i4_64x64_act_factor[2][0];
+#else
+ WORD32 i4_activity = ps_cur_ipe_ctb->i4_64x64_act_factor[3][0];
+#endif
+ i4_cu_qp = (((i4_cu_qp)*i4_activity) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
+ QP_LEVEL_MOD_ACT_FACTOR;
+
+#endif
+ }
+ if(i4_cu_qp > ps_rc_quant_ctxt->i2_max_qscale)
+ i4_cu_qp = ps_rc_quant_ctxt->i2_max_qscale;
+ else if(i4_cu_qp < ps_rc_quant_ctxt->i2_min_qscale)
+ i4_cu_qp = ps_rc_quant_ctxt->i2_min_qscale;
+
+ i4_cu_qp = ps_rc_quant_ctxt->pi4_qscale_to_qp[i4_cu_qp];
+ }
+
+ if(i4_cu_qp > ps_rc_quant_ctxt->i2_max_qp)
+ i4_cu_qp = ps_rc_quant_ctxt->i2_max_qp;
+ else if(i4_cu_qp < ps_rc_quant_ctxt->i2_min_qp)
+ i4_cu_qp = ps_rc_quant_ctxt->i2_min_qp;
+
+ lambda = pow(2.0, (((double)(i4_cu_qp - 12)) / 3));
+
+ lambda_modifier = ps_frm_lambda_ctxt->lambda_modifier;
+
+ if(i4_is_bpic)
+ {
+ lambda_modifier = lambda_modifier * CLIP3((((double)(i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
+ }
+ if(ps_ctxt->i4_use_const_lamda_modifier)
+ {
+ if(ps_ctxt->s_frm_prms.is_i_pic)
+ {
+ lambda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
+ }
+ else
+ {
+ lambda_modifier = CONST_LAMDA_MOD_VAL;
+ }
+ }
+ lambda *= lambda_modifier;
+
+ return ((WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT)));
+}
+
+/**
+********************************************************************************
+* @fn hme_update_dynamic_search_params
+*
+* @brief Update the Dynamic search params based on the current MVs
+*
+* @param[in,out] ps_dyn_range_prms [inout] : Dyn. Range Param str.
+* i2_mvy [in] : current MV y comp.
+*
+* @return None
+********************************************************************************
+*/
+void hme_update_dynamic_search_params(dyn_range_prms_t *ps_dyn_range_prms, WORD16 i2_mvy)
+{
+ /* If MV is up large, update i2_dyn_max_y */
+ if(i2_mvy > ps_dyn_range_prms->i2_dyn_max_y)
+ ps_dyn_range_prms->i2_dyn_max_y = i2_mvy;
+ /* If MV is down large, update i2_dyn_min_y */
+ if(i2_mvy < ps_dyn_range_prms->i2_dyn_min_y)
+ ps_dyn_range_prms->i2_dyn_min_y = i2_mvy;
+}
+
+void hme_add_new_node_to_a_sorted_array(
+ search_node_t *ps_result_node,
+ search_node_t **pps_sorted_array,
+ U08 *pu1_shifts,
+ U32 u4_num_results_updated,
+ U08 u1_shift)
+{
+ U32 i;
+
+ if(NULL == pu1_shifts)
+ {
+ S32 i4_cur_node_cost = ps_result_node->i4_tot_cost;
+
+ for(i = 0; i < u4_num_results_updated; i++)
+ {
+ if(i4_cur_node_cost < pps_sorted_array[i]->i4_tot_cost)
+ {
+ memmove(
+ &pps_sorted_array[i + 1],
+ &pps_sorted_array[i],
+ (u4_num_results_updated - i) * sizeof(search_node_t *));
+
+ break;
+ }
+ }
+ }
+ else
+ {
+ S32 i4_cur_node_cost =
+ (u1_shift == 0) ? ps_result_node->i4_tot_cost
+ : (ps_result_node->i4_tot_cost + (1 << (u1_shift - 1))) >> u1_shift;
+
+ for(i = 0; i < u4_num_results_updated; i++)
+ {
+ S32 i4_prev_node_cost = (pu1_shifts[i] == 0) ? pps_sorted_array[i]->i4_tot_cost
+ : (pps_sorted_array[i]->i4_tot_cost +
+ (1 << (pu1_shifts[i] - 1))) >>
+ pu1_shifts[i];
+
+ if(i4_cur_node_cost < i4_prev_node_cost)
+ {
+ memmove(
+ &pps_sorted_array[i + 1],
+ &pps_sorted_array[i],
+ (u4_num_results_updated - i) * sizeof(search_node_t *));
+ memmove(
+ &pu1_shifts[i + 1], &pu1_shifts[i], (u4_num_results_updated - i) * sizeof(U08));
+
+ break;
+ }
+ }
+
+ pu1_shifts[i] = u1_shift;
+ }
+
+ pps_sorted_array[i] = ps_result_node;
+}
+
+S32 hme_find_pos_of_implicitly_stored_ref_id(
+ S08 *pi1_ref_idx, S08 i1_ref_idx, S32 i4_result_id, S32 i4_num_results)
+{
+ S32 i;
+
+ for(i = 0; i < i4_num_results; i++)
+ {
+ if(i1_ref_idx == pi1_ref_idx[i])
+ {
+ if(0 == i4_result_id)
+ {
+ return i;
+ }
+ else
+ {
+ i4_result_id--;
+ }
+ }
+ }
+
+ return -1;
+}
+
+static __inline void hme_search_node_populator(
+ search_node_t *ps_search_node, hme_mv_t *ps_mv, S08 i1_ref_idx, S08 i1_mv_magnitude_shift)
+{
+ ps_search_node->ps_mv->i2_mvx = SHL_NEG((WORD16)ps_mv->i2_mv_x, i1_mv_magnitude_shift);
+ ps_search_node->ps_mv->i2_mvy = SHL_NEG((WORD16)ps_mv->i2_mv_y, i1_mv_magnitude_shift);
+ ps_search_node->i1_ref_idx = i1_ref_idx;
+ ps_search_node->u1_is_avail = 1;
+ ps_search_node->u1_subpel_done = 0;
+}
+
+S32 hme_populate_search_candidates(fpel_srch_cand_init_data_t *ps_ctxt)
+{
+ hme_mv_t *ps_mv;
+
+ S32 wd_c, ht_c, wd_p, ht_p;
+ S32 blksize_p, blksize_c;
+ S32 i;
+ S08 *pi1_ref_idx;
+ /* Cache for storing offsets */
+ S32 ai4_cand_offsets[NUM_SEARCH_CAND_LOCATIONS];
+
+ layer_ctxt_t *ps_curr_layer = ps_ctxt->ps_curr_layer;
+ layer_ctxt_t *ps_coarse_layer = ps_ctxt->ps_coarse_layer;
+ layer_mv_t *ps_coarse_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
+ layer_mv_t *ps_curr_layer_mvbank = ps_curr_layer->ps_layer_mvbank;
+ search_candt_t *ps_search_cands = ps_ctxt->ps_search_cands;
+ hme_mv_t s_zero_mv = { 0 };
+
+ S32 i4_pos_x = ps_ctxt->i4_pos_x;
+ S32 i4_pos_y = ps_ctxt->i4_pos_y;
+ S32 i4_num_act_ref_l0 = ps_ctxt->i4_num_act_ref_l0;
+ S32 i4_num_act_ref_l1 = ps_ctxt->i4_num_act_ref_l1;
+ U08 u1_pred_dir = ps_ctxt->u1_pred_dir;
+ U08 u1_pred_dir_ctr = ps_ctxt->u1_pred_dir_ctr;
+ U08 u1_num_results_in_curr_mvbank = ps_ctxt->u1_num_results_in_mvbank;
+ U08 u1_num_results_in_coarse_mvbank =
+ (u1_pred_dir == 0) ? (i4_num_act_ref_l0 * ps_coarse_layer_mvbank->i4_num_mvs_per_ref)
+ : (i4_num_act_ref_l1 * ps_coarse_layer_mvbank->i4_num_mvs_per_ref);
+ S32 i4_init_offset_projected =
+ (u1_pred_dir == 1) ? (i4_num_act_ref_l0 * ps_coarse_layer_mvbank->i4_num_mvs_per_ref) : 0;
+ S32 i4_init_offset_spatial =
+ (u1_pred_dir_ctr == 1)
+ ? (ps_curr_layer_mvbank->i4_num_mvs_per_ref * u1_num_results_in_curr_mvbank)
+ : 0;
+ U08 u1_search_candidate_list_index = ps_ctxt->u1_search_candidate_list_index;
+ U08 u1_max_num_search_cands =
+ gau1_max_num_search_cands_in_l0_me[u1_search_candidate_list_index];
+ S32 i4_num_srch_cands = MIN(u1_max_num_search_cands, ps_ctxt->i4_max_num_init_cands << 1);
+ U16 u2_is_offset_available = 0;
+ U08 u1_search_blk_to_spatial_mvbank_blk_size_factor = 1;
+
+ /* Width and ht of current and prev layers */
+ wd_c = ps_curr_layer->i4_wd;
+ ht_c = ps_curr_layer->i4_ht;
+ wd_p = ps_coarse_layer->i4_wd;
+ ht_p = ps_coarse_layer->i4_ht;
+
+ blksize_p = gau1_blk_size_to_wd_shift[ps_coarse_layer_mvbank->e_blk_size];
+ blksize_c = gau1_blk_size_to_wd_shift[ps_curr_layer_mvbank->e_blk_size];
+
+ /* ASSERT for valid sizes */
+ ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
+
+ {
+ S32 x = i4_pos_x >> 4;
+ S32 y = i4_pos_y >> 4;
+
+ if(blksize_c != gau1_blk_size_to_wd_shift[ps_ctxt->e_search_blk_size])
+ {
+ x *= 2;
+ y *= 2;
+
+ u1_search_blk_to_spatial_mvbank_blk_size_factor = 2;
+ }
+
+ i4_init_offset_spatial += (x + y * ps_curr_layer_mvbank->i4_num_blks_per_row) *
+ ps_curr_layer_mvbank->i4_num_mvs_per_blk;
+ }
+
+ for(i = 0; i < i4_num_srch_cands; i++)
+ {
+ SEARCH_CANDIDATE_TYPE_T e_search_cand_type =
+ gae_search_cand_priority_to_search_cand_type_map_in_l0_me[u1_search_candidate_list_index]
+ [i];
+ SEARCH_CAND_LOCATIONS_T e_search_cand_loc =
+ gae_search_cand_type_to_location_map[e_search_cand_type];
+ S08 i1_result_id = MIN(
+ gai1_search_cand_type_to_result_id_map[e_search_cand_type],
+ (e_search_cand_loc < 0 ? 0
+ : ps_ctxt->pu1_num_fpel_search_cands[e_search_cand_loc] - 1));
+ U08 u1_is_spatial_cand = (1 == gau1_search_cand_type_to_spatiality_map[e_search_cand_type]);
+ U08 u1_is_proj_cand = (0 == gau1_search_cand_type_to_spatiality_map[e_search_cand_type]);
+ U08 u1_is_zeroMV_cand = (ZERO_MV == e_search_cand_type) ||
+ (ZERO_MV_ALTREF == e_search_cand_type);
+
+ /* When spatial candidates are available, use them, else use the projected candidates */
+ /* This is required since some blocks will never have certain spatial candidates, and in order */
+ /* to accomodate such instances in 'gae_search_cand_priority_to_search_cand_type_map_in_l0_me' list, */
+ /* all candidates apart from the 'LEFT' have been marked as projected */
+ if(((e_search_cand_loc == TOPLEFT) || (e_search_cand_loc == TOP) ||
+ (e_search_cand_loc == TOPRIGHT)) &&
+ (i1_result_id < u1_num_results_in_curr_mvbank) && u1_is_proj_cand)
+ {
+ if(e_search_cand_loc == TOPLEFT)
+ {
+ u1_is_spatial_cand = ps_ctxt->u1_is_topLeft_available ||
+ !ps_ctxt->u1_is_left_available;
+ }
+ else if(e_search_cand_loc == TOPRIGHT)
+ {
+ u1_is_spatial_cand = ps_ctxt->u1_is_topRight_available;
+ }
+ else
+ {
+ u1_is_spatial_cand = ps_ctxt->u1_is_top_available;
+ }
+
+ u1_is_proj_cand = !u1_is_spatial_cand;
+ }
+
+ switch(u1_is_zeroMV_cand + (u1_is_spatial_cand << 1) + (u1_is_proj_cand << 2))
+ {
+ case 1:
+ {
+ hme_search_node_populator(
+ ps_search_cands[i].ps_search_node,
+ &s_zero_mv,
+ (ZERO_MV == e_search_cand_type) ? ps_ctxt->i1_default_ref_id
+ : ps_ctxt->i1_alt_default_ref_id,
+ 0);
+
+ break;
+ }
+ case 2:
+ {
+ S08 i1_mv_magnitude_shift = 0;
+
+ S32 i4_offset = i4_init_offset_spatial;
+
+ i1_result_id = MIN(i1_result_id, u1_num_results_in_curr_mvbank - 1);
+ i4_offset += i1_result_id;
+
+ switch(e_search_cand_loc)
+ {
+ case LEFT:
+ {
+ if(ps_ctxt->u1_is_left_available)
+ {
+ i1_mv_magnitude_shift = -2;
+
+ i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_blk;
+
+ ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset;
+ pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset;
+ }
+ else
+ {
+ i1_mv_magnitude_shift = 0;
+
+ ps_mv = &s_zero_mv;
+ pi1_ref_idx = &ps_ctxt->i1_default_ref_id;
+ }
+
+ break;
+ }
+ case TOPLEFT:
+ {
+ if(ps_ctxt->u1_is_topLeft_available)
+ {
+ i1_mv_magnitude_shift = -2;
+
+ i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_blk;
+ i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_row;
+
+ ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset;
+ pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset;
+ }
+ else
+ {
+ i1_mv_magnitude_shift = 0;
+
+ ps_mv = &s_zero_mv;
+ pi1_ref_idx = &ps_ctxt->i1_default_ref_id;
+ }
+
+ break;
+ }
+ case TOP:
+ {
+ if(ps_ctxt->u1_is_top_available)
+ {
+ i1_mv_magnitude_shift = -2;
+
+ i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_row;
+
+ ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset;
+ pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset;
+ }
+ else
+ {
+ i1_mv_magnitude_shift = 0;
+
+ ps_mv = &s_zero_mv;
+ pi1_ref_idx = &ps_ctxt->i1_default_ref_id;
+ }
+
+ break;
+ }
+ case TOPRIGHT:
+ {
+ if(ps_ctxt->u1_is_topRight_available)
+ {
+ i1_mv_magnitude_shift = -2;
+
+ i4_offset += ps_curr_layer_mvbank->i4_num_mvs_per_blk *
+ u1_search_blk_to_spatial_mvbank_blk_size_factor;
+ i4_offset -= ps_curr_layer_mvbank->i4_num_mvs_per_row;
+
+ ps_mv = ps_curr_layer_mvbank->ps_mv + i4_offset;
+ pi1_ref_idx = ps_curr_layer_mvbank->pi1_ref_idx + i4_offset;
+ }
+ else
+ {
+ i1_mv_magnitude_shift = 0;
+ ps_mv = &s_zero_mv;
+ pi1_ref_idx = &ps_ctxt->i1_default_ref_id;
+ }
+
+ break;
+ }
+ default:
+ {
+ /* AiyAiyYo!! */
+ ASSERT(0);
+ }
+ }
+
+ hme_search_node_populator(
+ ps_search_cands[i].ps_search_node, ps_mv, pi1_ref_idx[0], i1_mv_magnitude_shift);
+
+ break;
+ }
+ case 4:
+ {
+ ASSERT(ILLUSORY_CANDIDATE != e_search_cand_type);
+ ASSERT(ILLUSORY_LOCATION != e_search_cand_loc);
+
+ i1_result_id = MIN(i1_result_id, u1_num_results_in_coarse_mvbank - 1);
+
+ if(!(u2_is_offset_available & (1 << e_search_cand_loc)))
+ {
+ S32 x, y;
+
+ x = i4_pos_x + gai4_search_cand_location_to_x_offset_map[e_search_cand_loc];
+ y = i4_pos_y + gai4_search_cand_location_to_y_offset_map[e_search_cand_loc];
+
+ /* Safety check to avoid uninitialized access across temporal layers */
+ x = CLIP3(x, 0, (wd_c - blksize_p));
+ y = CLIP3(y, 0, (ht_c - blksize_p));
+
+ /* Project the positions to prev layer */
+ x = x >> blksize_p;
+ y = y >> blksize_p;
+
+ ai4_cand_offsets[e_search_cand_loc] =
+ (x * ps_coarse_layer_mvbank->i4_num_mvs_per_blk);
+ ai4_cand_offsets[e_search_cand_loc] +=
+ (y * ps_coarse_layer_mvbank->i4_num_mvs_per_row);
+ ai4_cand_offsets[e_search_cand_loc] += i4_init_offset_projected;
+
+ u2_is_offset_available |= (1 << e_search_cand_loc);
+ }
+
+ ps_mv =
+ ps_coarse_layer_mvbank->ps_mv + ai4_cand_offsets[e_search_cand_loc] + i1_result_id;
+ pi1_ref_idx = ps_coarse_layer_mvbank->pi1_ref_idx +
+ ai4_cand_offsets[e_search_cand_loc] + i1_result_id;
+
+ hme_search_node_populator(ps_search_cands[i].ps_search_node, ps_mv, pi1_ref_idx[0], 1);
+
+ break;
+ }
+ default:
+ {
+ /* NoNoNoNoNooooooooNO! */
+ ASSERT(0);
+ }
+ }
+
+ ASSERT(ps_search_cands[i].ps_search_node->i1_ref_idx >= 0);
+ ASSERT(
+ !u1_pred_dir
+ ? (ps_ctxt->pi4_ref_id_lc_to_l0_map[ps_search_cands[i].ps_search_node->i1_ref_idx] <
+ i4_num_act_ref_l0)
+ : (ps_ctxt->pi4_ref_id_lc_to_l1_map[ps_search_cands[i].ps_search_node->i1_ref_idx] <
+ ps_ctxt->i4_num_act_ref_l1));
+ }
+
+ return i4_num_srch_cands;
+}
+
+void hme_mv_clipper(
+ hme_search_prms_t *ps_search_prms_blk,
+ S32 i4_num_srch_cands,
+ S08 i1_check_for_mult_refs,
+ U08 u1_fpel_refine_extent,
+ U08 u1_hpel_refine_extent,
+ U08 u1_qpel_refine_extent)
+{
+ S32 candt;
+ range_prms_t *ps_range_prms;
+
+ for(candt = 0; candt < i4_num_srch_cands; candt++)
+ {
+ search_node_t *ps_search_node;
+
+ ps_search_node = ps_search_prms_blk->ps_search_candts[candt].ps_search_node;
+ ps_range_prms = ps_search_prms_blk->aps_mv_range[ps_search_node->i1_ref_idx];
+
+ /* Clip the motion vectors as well here since after clipping
+ two candidates can become same and they will be removed during deduplication */
+ CLIP_MV_WITHIN_RANGE(
+ ps_search_node->ps_mv->i2_mvx,
+ ps_search_node->ps_mv->i2_mvy,
+ ps_range_prms,
+ u1_fpel_refine_extent,
+ u1_hpel_refine_extent,
+ u1_qpel_refine_extent);
+ }
+}
+
+void hme_init_pred_buf_info(
+ hme_pred_buf_info_t (*ps_info)[MAX_NUM_INTER_PARTS],
+ hme_pred_buf_mngr_t *ps_buf_mngr,
+ U08 u1_pu1_wd,
+ U08 u1_pu1_ht,
+ PART_TYPE_T e_part_type)
+{
+ U08 u1_pred_buf_array_id;
+
+ if(1 != ihevce_get_free_pred_buf_indices(
+ &u1_pred_buf_array_id, &ps_buf_mngr->u4_pred_buf_usage_indicator, 1))
+ {
+ ASSERT(0);
+ }
+ else
+ {
+ ps_info[0][0].i4_pred_stride = MAX_CU_SIZE;
+ ps_info[0][0].pu1_pred = ps_buf_mngr->apu1_pred_bufs[u1_pred_buf_array_id];
+ ps_info[0][0].u1_pred_buf_array_id = u1_pred_buf_array_id;
+
+ if(PRT_2Nx2N != e_part_type)
+ {
+ ps_info[0][1].i4_pred_stride = MAX_CU_SIZE;
+ ps_info[0][1].pu1_pred = ps_buf_mngr->apu1_pred_bufs[u1_pred_buf_array_id] +
+ (gai1_is_part_vertical[ge_part_type_to_part_id[e_part_type][0]]
+ ? u1_pu1_ht * ps_info[0][1].i4_pred_stride
+ : u1_pu1_wd);
+ ps_info[0][1].u1_pred_buf_array_id = u1_pred_buf_array_id;
+ }
+ }
+}
+
+void hme_debrief_bipred_eval(
+ part_type_results_t *ps_part_type_result,
+ hme_pred_buf_info_t (*ps_pred_buf_info)[MAX_NUM_INTER_PARTS],
+ hme_pred_buf_mngr_t *ps_pred_buf_mngr,
+ U08 *pu1_allocated_pred_buf_array_indixes,
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list
+
+)
+{
+ PART_TYPE_T e_part_type = (PART_TYPE_T)ps_part_type_result->u1_part_type;
+
+ U32 *pu4_pred_buf_usage_indicator = &ps_pred_buf_mngr->u4_pred_buf_usage_indicator;
+ U08 u1_is_part_vertical = gai1_is_part_vertical[ge_part_type_to_part_id[e_part_type][0]];
+
+ if(0 == ps_part_type_result->u1_part_type)
+ {
+ if(ps_part_type_result->as_pu_results->pu.b2_pred_mode == PRED_BI)
+ {
+ ASSERT(UCHAR_MAX != ps_pred_buf_info[2][0].u1_pred_buf_array_id);
+
+ ps_part_type_result->pu1_pred = ps_pred_buf_info[2][0].pu1_pred;
+ ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride;
+
+ ihevce_set_pred_buf_as_free(
+ pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
+
+ ihevce_set_pred_buf_as_free(
+ pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
+ }
+ else
+ {
+ ps_part_type_result->pu1_pred = ps_pred_buf_info[0][0].pu1_pred;
+ ps_part_type_result->i4_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride;
+
+ ihevce_set_pred_buf_as_free(
+ pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]);
+
+ ihevce_set_pred_buf_as_free(
+ pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
+
+ if(UCHAR_MAX == ps_pred_buf_info[0][0].u1_pred_buf_array_id)
+ {
+ ihevce_set_pred_buf_as_free(
+ pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
+ }
+ }
+ }
+ else
+ {
+ U08 *pu1_src_pred;
+ U08 *pu1_dst_pred;
+ S32 i4_src_pred_stride;
+ S32 i4_dst_pred_stride;
+
+ U08 u1_pu1_wd = (ps_part_type_result->as_pu_results[0].pu.b4_wd + 1) << 2;
+ U08 u1_pu1_ht = (ps_part_type_result->as_pu_results[0].pu.b4_ht + 1) << 2;
+ U08 u1_pu2_wd = (ps_part_type_result->as_pu_results[1].pu.b4_wd + 1) << 2;
+ U08 u1_pu2_ht = (ps_part_type_result->as_pu_results[1].pu.b4_ht + 1) << 2;
+
+ U08 u1_condition_for_switch =
+ (ps_part_type_result->as_pu_results[0].pu.b2_pred_mode == PRED_BI) |
+ ((ps_part_type_result->as_pu_results[1].pu.b2_pred_mode == PRED_BI) << 1);
+
+ switch(u1_condition_for_switch)
+ {
+ case 0:
+ {
+ ps_part_type_result->pu1_pred =
+ ps_pred_buf_mngr->apu1_pred_bufs[pu1_allocated_pred_buf_array_indixes[0]];
+ ps_part_type_result->i4_pred_stride = MAX_CU_SIZE;
+
+ ihevce_set_pred_buf_as_free(
+ pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]);
+
+ ihevce_set_pred_buf_as_free(
+ pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
+
+ if(UCHAR_MAX == ps_pred_buf_info[0][0].u1_pred_buf_array_id)
+ {
+ pu1_src_pred = ps_pred_buf_info[0][0].pu1_pred;
+ pu1_dst_pred = ps_part_type_result->pu1_pred;
+ i4_src_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride;
+ i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
+
+ ps_cmn_utils_optimised_function_list->pf_copy_2d(
+ pu1_dst_pred,
+ i4_dst_pred_stride,
+ pu1_src_pred,
+ i4_src_pred_stride,
+ u1_pu1_wd,
+ u1_pu1_ht);
+ }
+
+ if(UCHAR_MAX == ps_pred_buf_info[0][1].u1_pred_buf_array_id)
+ {
+ pu1_src_pred = ps_pred_buf_info[0][1].pu1_pred;
+ pu1_dst_pred = ps_part_type_result->pu1_pred +
+ (u1_is_part_vertical
+ ? u1_pu1_ht * ps_part_type_result->i4_pred_stride
+ : u1_pu1_wd);
+ i4_src_pred_stride = ps_pred_buf_info[0][1].i4_pred_stride;
+ i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
+
+ ps_cmn_utils_optimised_function_list->pf_copy_2d(
+ pu1_dst_pred,
+ i4_dst_pred_stride,
+ pu1_src_pred,
+ i4_src_pred_stride,
+ u1_pu2_wd,
+ u1_pu2_ht);
+ }
+
+ break;
+ }
+ case 1:
+ {
+ ASSERT(UCHAR_MAX != ps_pred_buf_info[2][0].u1_pred_buf_array_id);
+
+ ihevce_set_pred_buf_as_free(
+ pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
+
+ /* Copy PU1 pred into PU2's pred buf */
+ if(((u1_pu1_ht < u1_pu2_ht) || (u1_pu1_wd < u1_pu2_wd)) &&
+ (UCHAR_MAX != ps_pred_buf_info[0][1].u1_pred_buf_array_id))
+ {
+ ps_part_type_result->pu1_pred =
+ ps_pred_buf_info[0][1].pu1_pred -
+ (u1_is_part_vertical ? u1_pu1_ht * ps_pred_buf_info[0][1].i4_pred_stride
+ : u1_pu1_wd);
+ ps_part_type_result->i4_pred_stride = ps_pred_buf_info[0][1].i4_pred_stride;
+
+ ihevce_set_pred_buf_as_free(
+ pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]);
+
+ pu1_src_pred = ps_pred_buf_info[2][0].pu1_pred;
+ pu1_dst_pred = ps_part_type_result->pu1_pred;
+ i4_src_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride;
+ i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
+
+ ps_cmn_utils_optimised_function_list->pf_copy_2d(
+ pu1_dst_pred,
+ i4_dst_pred_stride,
+ pu1_src_pred,
+ i4_src_pred_stride,
+ u1_pu1_wd,
+ u1_pu1_ht);
+ }
+ else
+ {
+ ps_part_type_result->pu1_pred = ps_pred_buf_info[2][0].pu1_pred;
+ ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride;
+
+ ihevce_set_pred_buf_as_free(
+ pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
+
+ pu1_src_pred = ps_pred_buf_info[0][1].pu1_pred;
+ pu1_dst_pred = ps_part_type_result->pu1_pred;
+ i4_src_pred_stride = ps_pred_buf_info[0][1].i4_pred_stride;
+ i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
+
+ ps_cmn_utils_optimised_function_list->pf_copy_2d(
+ pu1_dst_pred,
+ i4_dst_pred_stride,
+ pu1_src_pred,
+ i4_src_pred_stride,
+ u1_pu2_wd,
+ u1_pu2_ht);
+ }
+
+ break;
+ }
+ case 2:
+ {
+ ASSERT(UCHAR_MAX != ps_pred_buf_info[2][1].u1_pred_buf_array_id);
+
+ ihevce_set_pred_buf_as_free(
+ pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[1]);
+
+ /* Copy PU2 pred into PU1's pred buf */
+ if(((u1_pu1_ht > u1_pu2_ht) || (u1_pu1_wd > u1_pu2_wd)) &&
+ (UCHAR_MAX != ps_pred_buf_info[0][0].u1_pred_buf_array_id))
+ {
+ ps_part_type_result->pu1_pred = ps_pred_buf_info[0][0].pu1_pred;
+ ps_part_type_result->i4_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride;
+
+ ihevce_set_pred_buf_as_free(
+ pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[2]);
+
+ pu1_src_pred = ps_pred_buf_info[2][1].pu1_pred;
+ pu1_dst_pred = ps_part_type_result->pu1_pred +
+ (u1_is_part_vertical
+ ? u1_pu1_ht * ps_part_type_result->i4_pred_stride
+ : u1_pu1_wd);
+ i4_src_pred_stride = ps_pred_buf_info[2][1].i4_pred_stride;
+ i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
+
+ ps_cmn_utils_optimised_function_list->pf_copy_2d(
+ pu1_dst_pred,
+ i4_dst_pred_stride,
+ pu1_src_pred,
+ i4_src_pred_stride,
+ u1_pu2_wd,
+ u1_pu2_ht);
+ }
+ else
+ {
+ ps_part_type_result->pu1_pred =
+ ps_pred_buf_info[2][1].pu1_pred -
+ (u1_is_part_vertical ? u1_pu1_ht * ps_pred_buf_info[2][1].i4_pred_stride
+ : u1_pu1_wd);
+ ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][1].i4_pred_stride;
+
+ ihevce_set_pred_buf_as_free(
+ pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
+
+ pu1_src_pred = ps_pred_buf_info[0][0].pu1_pred;
+ pu1_dst_pred = ps_part_type_result->pu1_pred;
+ i4_src_pred_stride = ps_pred_buf_info[0][0].i4_pred_stride;
+ i4_dst_pred_stride = ps_part_type_result->i4_pred_stride;
+
+ ps_cmn_utils_optimised_function_list->pf_copy_2d(
+ pu1_dst_pred,
+ i4_dst_pred_stride,
+ pu1_src_pred,
+ i4_src_pred_stride,
+ u1_pu1_wd,
+ u1_pu1_ht);
+ }
+
+ break;
+ }
+ case 3:
+ {
+ ASSERT(UCHAR_MAX != ps_pred_buf_info[2][0].u1_pred_buf_array_id);
+ ASSERT(UCHAR_MAX != ps_pred_buf_info[2][1].u1_pred_buf_array_id);
+ ASSERT(
+ ps_pred_buf_info[2][1].u1_pred_buf_array_id ==
+ ps_pred_buf_info[2][0].u1_pred_buf_array_id);
+
+ ps_part_type_result->pu1_pred = ps_pred_buf_info[2][0].pu1_pred;
+ ps_part_type_result->i4_pred_stride = ps_pred_buf_info[2][0].i4_pred_stride;
+
+ ihevce_set_pred_buf_as_free(
+ pu4_pred_buf_usage_indicator, pu1_allocated_pred_buf_array_indixes[0]);
+
+ break;
+ }
+ }
+ }
+}
+
+U08 hme_decide_search_candidate_priority_in_l1_and_l2_me(
+ SEARCH_CANDIDATE_TYPE_T e_cand_type, ME_QUALITY_PRESETS_T e_quality_preset)
+{
+ U08 u1_priority_val =
+ gau1_search_cand_priority_in_l1_and_l2_me[e_quality_preset >= ME_MEDIUM_SPEED][e_cand_type];
+
+ if(UCHAR_MAX == u1_priority_val)
+ {
+ ASSERT(0);
+ }
+
+ ASSERT(u1_priority_val <= MAX_INIT_CANDTS);
+
+ return u1_priority_val;
+}
+
+U08 hme_decide_search_candidate_priority_in_l0_me(SEARCH_CANDIDATE_TYPE_T e_cand_type, U08 u1_index)
+{
+ U08 u1_priority_val = gau1_search_cand_priority_in_l0_me[u1_index][e_cand_type];
+
+ if(UCHAR_MAX == u1_priority_val)
+ {
+ ASSERT(0);
+ }
+
+ ASSERT(u1_priority_val <= MAX_INIT_CANDTS);
+
+ return u1_priority_val;
+}
+
+void hme_search_cand_data_init(
+ S32 *pi4_id_Z,
+ S32 *pi4_id_coloc,
+ S32 *pi4_num_coloc_cands,
+ U08 *pu1_search_candidate_list_index,
+ S32 i4_num_act_ref_l0,
+ S32 i4_num_act_ref_l1,
+ U08 u1_is_bidir_enabled,
+ U08 u1_4x4_blk_in_l1me)
+{
+ S32 i, j;
+ S32 i4_num_coloc_cands;
+
+ U08 u1_search_candidate_list_index;
+
+ if(!u1_is_bidir_enabled && !u1_4x4_blk_in_l1me)
+ {
+ S32 i;
+
+ u1_search_candidate_list_index = (i4_num_act_ref_l0 - 1) * 2;
+ i4_num_coloc_cands = i4_num_act_ref_l0 * 2;
+
+ switch(i4_num_act_ref_l0)
+ {
+ case 1:
+ {
+ for(i = 0; i < 2; i++)
+ {
+ pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
+ (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
+ u1_search_candidate_list_index);
+ }
+
+ break;
+ }
+ case 2:
+ {
+ for(i = 0; i < 4; i++)
+ {
+ pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
+ (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
+ u1_search_candidate_list_index);
+ }
+
+ break;
+ }
+ case 3:
+ {
+ for(i = 0; i < 6; i++)
+ {
+ pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
+ (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
+ u1_search_candidate_list_index);
+ }
+
+ break;
+ }
+ case 4:
+ {
+ for(i = 0; i < 8; i++)
+ {
+ pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
+ (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
+ u1_search_candidate_list_index);
+ }
+
+ break;
+ }
+ default:
+ {
+ ASSERT(0);
+ }
+ }
+
+ *pi4_num_coloc_cands = i4_num_coloc_cands;
+ *pu1_search_candidate_list_index = u1_search_candidate_list_index;
+ }
+ else if(!u1_is_bidir_enabled && u1_4x4_blk_in_l1me)
+ {
+ S32 i;
+
+ i4_num_coloc_cands = i4_num_act_ref_l0 * 2;
+ u1_search_candidate_list_index = (i4_num_act_ref_l0 - 1) * 2 + 1;
+
+ switch(i4_num_act_ref_l0)
+ {
+ case 1:
+ {
+ for(i = 0; i < 2; i++)
+ {
+ pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
+ (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
+ u1_search_candidate_list_index);
+ }
+
+ pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
+ PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
+
+ pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
+ PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
+
+ pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
+ PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
+
+ i4_num_coloc_cands += 3;
+
+ break;
+ }
+ case 2:
+ {
+ for(i = 0; i < 4; i++)
+ {
+ pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
+ (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
+ u1_search_candidate_list_index);
+ }
+
+ pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
+ PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
+
+ pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
+ PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
+
+ pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
+ PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
+
+ pi4_id_coloc[i + 3] = hme_decide_search_candidate_priority_in_l0_me(
+ PROJECTED_COLOC_TR1, u1_search_candidate_list_index);
+
+ pi4_id_coloc[i + 4] = hme_decide_search_candidate_priority_in_l0_me(
+ PROJECTED_COLOC_BL1, u1_search_candidate_list_index);
+
+ pi4_id_coloc[i + 5] = hme_decide_search_candidate_priority_in_l0_me(
+ PROJECTED_COLOC_BR1, u1_search_candidate_list_index);
+
+ i4_num_coloc_cands += 6;
+
+ break;
+ }
+ case 3:
+ {
+ for(i = 0; i < 6; i++)
+ {
+ pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
+ (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
+ u1_search_candidate_list_index);
+ }
+
+ pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
+ PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
+
+ pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
+ PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
+
+ pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
+ PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
+
+ pi4_id_coloc[i + 3] = hme_decide_search_candidate_priority_in_l0_me(
+ PROJECTED_COLOC_TR1, u1_search_candidate_list_index);
+
+ pi4_id_coloc[i + 4] = hme_decide_search_candidate_priority_in_l0_me(
+ PROJECTED_COLOC_BL1, u1_search_candidate_list_index);
+
+ pi4_id_coloc[i + 5] = hme_decide_search_candidate_priority_in_l0_me(
+ PROJECTED_COLOC_BR1, u1_search_candidate_list_index);
+
+ i4_num_coloc_cands += 6;
+
+ break;
+ }
+ case 4:
+ {
+ for(i = 0; i < 8; i++)
+ {
+ pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
+ (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
+ u1_search_candidate_list_index);
+ }
+
+ pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
+ PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
+
+ pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
+ PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
+
+ pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
+ PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
+
+ pi4_id_coloc[i + 3] = hme_decide_search_candidate_priority_in_l0_me(
+ PROJECTED_COLOC_TR1, u1_search_candidate_list_index);
+
+ pi4_id_coloc[i + 4] = hme_decide_search_candidate_priority_in_l0_me(
+ PROJECTED_COLOC_BL1, u1_search_candidate_list_index);
+
+ pi4_id_coloc[i + 5] = hme_decide_search_candidate_priority_in_l0_me(
+ PROJECTED_COLOC_BR1, u1_search_candidate_list_index);
+
+ i4_num_coloc_cands += 6;
+
+ break;
+ }
+ default:
+ {
+ ASSERT(0);
+ }
+ }
+
+ *pi4_num_coloc_cands = i4_num_coloc_cands;
+ *pu1_search_candidate_list_index = u1_search_candidate_list_index;
+ }
+ else
+ {
+ /* The variable 'u1_search_candidate_list_index' is hardcoded */
+ /* to 10 and 11 respectively. But, these values are not returned */
+ /* by this function since the actual values are dependent on */
+ /* the number of refs in L0 and L1 respectively */
+ /* Hence, the actual return values are being recomputed */
+ /* in the latter part of this block */
+
+ if(!u1_4x4_blk_in_l1me)
+ {
+ u1_search_candidate_list_index = 10;
+
+ i4_num_coloc_cands = 2 + (2 * ((i4_num_act_ref_l0 > 1) || (i4_num_act_ref_l1 > 1)));
+
+ for(i = 0; i < i4_num_coloc_cands; i++)
+ {
+ pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
+ (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
+ u1_search_candidate_list_index);
+ }
+ }
+ else
+ {
+ u1_search_candidate_list_index = 11;
+
+ i4_num_coloc_cands = 2 + (2 * ((i4_num_act_ref_l0 > 1) || (i4_num_act_ref_l1 > 1)));
+
+ for(i = 0; i < i4_num_coloc_cands; i++)
+ {
+ pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
+ (SEARCH_CANDIDATE_TYPE_T)(PROJECTED_COLOC0 + i),
+ u1_search_candidate_list_index);
+ }
+
+ pi4_id_coloc[i] = hme_decide_search_candidate_priority_in_l0_me(
+ PROJECTED_COLOC_TR0, u1_search_candidate_list_index);
+
+ pi4_id_coloc[i + 1] = hme_decide_search_candidate_priority_in_l0_me(
+ PROJECTED_COLOC_BL0, u1_search_candidate_list_index);
+
+ pi4_id_coloc[i + 2] = hme_decide_search_candidate_priority_in_l0_me(
+ PROJECTED_COLOC_BR0, u1_search_candidate_list_index);
+ }
+
+ for(j = 0; j < 2; j++)
+ {
+ if(0 == j)
+ {
+ pu1_search_candidate_list_index[j] =
+ 8 + ((i4_num_act_ref_l0 > 1) * 2) + u1_4x4_blk_in_l1me;
+ pi4_num_coloc_cands[j] =
+ (u1_4x4_blk_in_l1me * 3) + 2 + ((i4_num_act_ref_l0 > 1) * 2);
+ }
+ else
+ {
+ pu1_search_candidate_list_index[j] =
+ 8 + ((i4_num_act_ref_l1 > 1) * 2) + u1_4x4_blk_in_l1me;
+ pi4_num_coloc_cands[j] =
+ (u1_4x4_blk_in_l1me * 3) + 2 + ((i4_num_act_ref_l1 > 1) * 2);
+ }
+ }
+ }
+
+ if(i4_num_act_ref_l0 || i4_num_act_ref_l1)
+ {
+ pi4_id_Z[0] = hme_decide_search_candidate_priority_in_l0_me(
+ (SEARCH_CANDIDATE_TYPE_T)ZERO_MV, pu1_search_candidate_list_index[0]);
+ }
+
+ if((i4_num_act_ref_l0 > 1) && !u1_is_bidir_enabled)
+ {
+ pi4_id_Z[1] = hme_decide_search_candidate_priority_in_l0_me(
+ (SEARCH_CANDIDATE_TYPE_T)ZERO_MV_ALTREF, pu1_search_candidate_list_index[0]);
+ }
+}
+
+static U08
+ hme_determine_base_block_size(S32 *pi4_valid_part_array, S32 i4_num_valid_parts, U08 u1_cu_size)
+{
+ ASSERT(i4_num_valid_parts > 0);
+
+ if(1 == i4_num_valid_parts)
+ {
+ ASSERT(pi4_valid_part_array[i4_num_valid_parts - 1] == PART_ID_2Nx2N);
+
+ return u1_cu_size;
+ }
+ else
+ {
+ if(pi4_valid_part_array[i4_num_valid_parts - 1] <= PART_ID_NxN_BR)
+ {
+ return u1_cu_size / 2;
+ }
+ else if(pi4_valid_part_array[i4_num_valid_parts - 1] <= PART_ID_nRx2N_R)
+ {
+ return u1_cu_size / 4;
+ }
+ }
+
+ return u1_cu_size / 4;
+}
+
+static U32 hme_compute_variance_of_pu_from_base_blocks(
+ ULWORD64 *pu8_SigmaX,
+ ULWORD64 *pu8_SigmaXSquared,
+ U08 u1_cu_size,
+ U08 u1_base_block_size,
+ S32 i4_part_id)
+{
+ U08 i, j;
+ ULWORD64 u8_final_variance;
+
+ U08 u1_part_dimension_multiplier = (u1_cu_size >> 4);
+ S32 i4_part_wd = gai1_part_wd_and_ht[i4_part_id][0] * u1_part_dimension_multiplier;
+ S32 i4_part_ht = gai1_part_wd_and_ht[i4_part_id][1] * u1_part_dimension_multiplier;
+ U08 u1_num_base_blocks_in_pu_row = i4_part_wd / u1_base_block_size;
+ U08 u1_num_base_blocks_in_pu_column = i4_part_ht / u1_base_block_size;
+ U08 u1_num_base_blocks_in_cu_row = u1_cu_size / u1_base_block_size;
+ U08 u1_num_base_blocks = (u1_num_base_blocks_in_pu_row * u1_num_base_blocks_in_pu_column);
+ U32 u4_num_pixels_in_base_block = u1_base_block_size * u1_base_block_size;
+ ULWORD64 u8_final_SigmaXSquared = 0;
+ ULWORD64 u8_final_SigmaX = 0;
+
+ if(ge_part_id_to_part_type[i4_part_id] != PRT_NxN)
+ {
+ U08 u1_column_start_index = gau1_part_id_to_part_num[i4_part_id]
+ ? (gai1_is_part_vertical[i4_part_id]
+ ? 0
+ : (u1_cu_size - i4_part_wd) / u1_base_block_size)
+ : 0;
+ U08 u1_row_start_index = gau1_part_id_to_part_num[i4_part_id]
+ ? (gai1_is_part_vertical[i4_part_id]
+ ? (u1_cu_size - i4_part_ht) / u1_base_block_size
+ : 0)
+ : 0;
+ U08 u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row;
+ U08 u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column;
+
+ for(i = u1_row_start_index; i < u1_row_end_index; i++)
+ {
+ for(j = u1_column_start_index; j < u1_column_end_index; j++)
+ {
+ u8_final_SigmaXSquared += pu8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row];
+ u8_final_SigmaX += pu8_SigmaX[j + i * u1_num_base_blocks_in_cu_row];
+ }
+ }
+
+ u8_final_variance =
+ u1_num_base_blocks * u4_num_pixels_in_base_block * u8_final_SigmaXSquared;
+ u8_final_variance -= u8_final_SigmaX * u8_final_SigmaX;
+ u8_final_variance +=
+ ((u1_num_base_blocks * u4_num_pixels_in_base_block) *
+ (u1_num_base_blocks * u4_num_pixels_in_base_block) / 2);
+ u8_final_variance /= (u1_num_base_blocks * u4_num_pixels_in_base_block) *
+ (u1_num_base_blocks * u4_num_pixels_in_base_block);
+
+ ASSERT(u8_final_variance <= UINT_MAX);
+ }
+ else
+ {
+ U08 u1_row_start_index;
+ U08 u1_column_start_index;
+ U08 u1_row_end_index;
+ U08 u1_column_end_index;
+
+ switch(gau1_part_id_to_part_num[i4_part_id])
+ {
+ case 0:
+ {
+ u1_row_start_index = 0;
+ u1_column_start_index = 0;
+
+ break;
+ }
+ case 1:
+ {
+ u1_row_start_index = 0;
+ u1_column_start_index = u1_num_base_blocks_in_pu_row;
+
+ break;
+ }
+ case 2:
+ {
+ u1_row_start_index = u1_num_base_blocks_in_pu_column;
+ u1_column_start_index = 0;
+
+ break;
+ }
+ case 3:
+ {
+ u1_row_start_index = u1_num_base_blocks_in_pu_column;
+ u1_column_start_index = u1_num_base_blocks_in_pu_row;
+
+ break;
+ }
+ }
+
+ u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row;
+ u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column;
+
+ for(i = u1_row_start_index; i < u1_row_end_index; i++)
+ {
+ for(j = u1_column_start_index; j < u1_column_end_index; j++)
+ {
+ u8_final_SigmaXSquared += pu8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row];
+ u8_final_SigmaX += pu8_SigmaX[j + i * u1_num_base_blocks_in_cu_row];
+ }
+ }
+
+ u8_final_variance =
+ u1_num_base_blocks * u4_num_pixels_in_base_block * u8_final_SigmaXSquared;
+ u8_final_variance -= u8_final_SigmaX * u8_final_SigmaX;
+ u8_final_variance +=
+ ((u1_num_base_blocks * u4_num_pixels_in_base_block) *
+ (u1_num_base_blocks * u4_num_pixels_in_base_block) / 2);
+ u8_final_variance /= (u1_num_base_blocks * u4_num_pixels_in_base_block) *
+ (u1_num_base_blocks * u4_num_pixels_in_base_block);
+
+ ASSERT(u8_final_variance <= UINT_MAX);
+ }
+
+ return u8_final_variance;
+}
+
+void hme_compute_variance_for_all_parts(
+ U08 *pu1_data,
+ S32 i4_data_stride,
+ S32 *pi4_valid_part_array,
+ U32 *pu4_variance,
+ S32 i4_num_valid_parts,
+ U08 u1_cu_size)
+{
+ ULWORD64 au8_SigmaX[16];
+ ULWORD64 au8_SigmaXSquared[16];
+ U08 i, j, k, l;
+ U08 u1_base_block_size;
+ U08 u1_num_base_blocks_in_cu_row;
+ U08 u1_num_base_blocks_in_cu_column;
+
+ u1_base_block_size =
+ hme_determine_base_block_size(pi4_valid_part_array, i4_num_valid_parts, u1_cu_size);
+
+ u1_num_base_blocks_in_cu_row = u1_num_base_blocks_in_cu_column =
+ u1_cu_size / u1_base_block_size;
+
+ ASSERT(u1_num_base_blocks_in_cu_row <= 4);
+
+ for(i = 0; i < u1_num_base_blocks_in_cu_column; i++)
+ {
+ for(j = 0; j < u1_num_base_blocks_in_cu_row; j++)
+ {
+ U08 *pu1_buf =
+ pu1_data + (u1_base_block_size * j) + (u1_base_block_size * i * i4_data_stride);
+
+ au8_SigmaX[j + i * u1_num_base_blocks_in_cu_row] = 0;
+ au8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row] = 0;
+
+ for(k = 0; k < u1_base_block_size; k++)
+ {
+ for(l = 0; l < u1_base_block_size; l++)
+ {
+ au8_SigmaX[j + i * u1_num_base_blocks_in_cu_row] +=
+ pu1_buf[l + k * i4_data_stride];
+ au8_SigmaXSquared[j + i * u1_num_base_blocks_in_cu_row] +=
+ pu1_buf[l + k * i4_data_stride] * pu1_buf[l + k * i4_data_stride];
+ }
+ }
+ }
+ }
+
+ for(i = 0; i < i4_num_valid_parts; i++)
+ {
+ pu4_variance[pi4_valid_part_array[i]] = hme_compute_variance_of_pu_from_base_blocks(
+ au8_SigmaX, au8_SigmaXSquared, u1_cu_size, u1_base_block_size, pi4_valid_part_array[i]);
+ }
+}
+
+void hme_compute_final_sigma_of_pu_from_base_blocks(
+ U32 *pu4_SigmaX,
+ U32 *pu4_SigmaXSquared,
+ ULWORD64 *pu8_final_sigmaX,
+ ULWORD64 *pu8_final_sigmaX_Squared,
+ U08 u1_cu_size,
+ U08 u1_base_block_size,
+ S32 i4_part_id,
+ U08 u1_base_blk_array_stride)
+{
+ U08 i, j;
+ //U08 u1_num_base_blocks_in_cu_row;
+
+ U08 u1_part_dimension_multiplier = (u1_cu_size >> 4);
+ S32 i4_part_wd = gai1_part_wd_and_ht[i4_part_id][0] * u1_part_dimension_multiplier;
+ S32 i4_part_ht = gai1_part_wd_and_ht[i4_part_id][1] * u1_part_dimension_multiplier;
+ U08 u1_num_base_blocks_in_pu_row = i4_part_wd / u1_base_block_size;
+ U08 u1_num_base_blocks_in_pu_column = i4_part_ht / u1_base_block_size;
+ U16 u2_num_base_blocks = (u1_num_base_blocks_in_pu_row * u1_num_base_blocks_in_pu_column);
+ U32 u4_num_pixels_in_base_block = u1_base_block_size * u1_base_block_size;
+ U32 u4_N = (u2_num_base_blocks * u4_num_pixels_in_base_block);
+
+ /*if (u1_is_for_src)
+ {
+ u1_num_base_blocks_in_cu_row = 16;
+ }
+ else
+ {
+ u1_num_base_blocks_in_cu_row = u1_cu_size / u1_base_block_size;
+ }*/
+
+ pu8_final_sigmaX[i4_part_id] = 0;
+ pu8_final_sigmaX_Squared[i4_part_id] = 0;
+
+ if(ge_part_id_to_part_type[i4_part_id] != PRT_NxN)
+ {
+ U08 u1_column_start_index = gau1_part_id_to_part_num[i4_part_id]
+ ? (gai1_is_part_vertical[i4_part_id]
+ ? 0
+ : (u1_cu_size - i4_part_wd) / u1_base_block_size)
+ : 0;
+ U08 u1_row_start_index = gau1_part_id_to_part_num[i4_part_id]
+ ? (gai1_is_part_vertical[i4_part_id]
+ ? (u1_cu_size - i4_part_ht) / u1_base_block_size
+ : 0)
+ : 0;
+ U08 u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row;
+ U08 u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column;
+
+ for(i = u1_row_start_index; i < u1_row_end_index; i++)
+ {
+ for(j = u1_column_start_index; j < u1_column_end_index; j++)
+ {
+ pu8_final_sigmaX_Squared[i4_part_id] +=
+ pu4_SigmaXSquared[j + i * u1_base_blk_array_stride];
+ pu8_final_sigmaX[i4_part_id] += pu4_SigmaX[j + i * u1_base_blk_array_stride];
+ }
+ }
+ }
+ else
+ {
+ U08 u1_row_start_index;
+ U08 u1_column_start_index;
+ U08 u1_row_end_index;
+ U08 u1_column_end_index;
+
+ switch(gau1_part_id_to_part_num[i4_part_id])
+ {
+ case 0:
+ {
+ u1_row_start_index = 0;
+ u1_column_start_index = 0;
+
+ break;
+ }
+ case 1:
+ {
+ u1_row_start_index = 0;
+ u1_column_start_index = u1_num_base_blocks_in_pu_row;
+
+ break;
+ }
+ case 2:
+ {
+ u1_row_start_index = u1_num_base_blocks_in_pu_column;
+ u1_column_start_index = 0;
+
+ break;
+ }
+ case 3:
+ {
+ u1_row_start_index = u1_num_base_blocks_in_pu_column;
+ u1_column_start_index = u1_num_base_blocks_in_pu_row;
+
+ break;
+ }
+ }
+
+ u1_column_end_index = u1_column_start_index + u1_num_base_blocks_in_pu_row;
+ u1_row_end_index = u1_row_start_index + u1_num_base_blocks_in_pu_column;
+
+ for(i = u1_row_start_index; i < u1_row_end_index; i++)
+ {
+ for(j = u1_column_start_index; j < u1_column_end_index; j++)
+ {
+ pu8_final_sigmaX_Squared[i4_part_id] +=
+ pu4_SigmaXSquared[j + i * u1_base_blk_array_stride];
+ pu8_final_sigmaX[i4_part_id] += pu4_SigmaX[j + i * u1_base_blk_array_stride];
+ }
+ }
+ }
+
+ pu8_final_sigmaX_Squared[i4_part_id] *= u4_N;
+}
+
+void hme_compute_stim_injected_distortion_for_all_parts(
+ U08 *pu1_pred,
+ S32 i4_pred_stride,
+ S32 *pi4_valid_part_array,
+ ULWORD64 *pu8_src_sigmaX,
+ ULWORD64 *pu8_src_sigmaXSquared,
+ S32 *pi4_sad_array,
+ S32 i4_alpha_stim_multiplier,
+ S32 i4_inv_wt,
+ S32 i4_inv_wt_shift_val,
+ S32 i4_num_valid_parts,
+ S32 i4_wpred_log_wdc,
+ U08 u1_cu_size)
+{
+ U32 au4_sigmaX[16], au4_sigmaXSquared[16];
+ ULWORD64 au8_final_ref_sigmaX[17], au8_final_ref_sigmaXSquared[17];
+ S32 i4_noise_term;
+ U16 i2_count;
+
+ ULWORD64 u8_temp_var, u8_temp_var1, u8_pure_dist;
+ ULWORD64 u8_ref_X_Square, u8_src_var, u8_ref_var;
+
+ U08 u1_base_block_size;
+
+ WORD32 i4_q_level = STIM_Q_FORMAT + ALPHA_Q_FORMAT;
+
+ u1_base_block_size =
+ hme_determine_base_block_size(pi4_valid_part_array, i4_num_valid_parts, u1_cu_size);
+
+ ASSERT(u1_cu_size >= 16);
+
+ hme_compute_sigmaX_and_sigmaXSquared(
+ pu1_pred,
+ i4_pred_stride,
+ au4_sigmaX,
+ au4_sigmaXSquared,
+ u1_base_block_size,
+ u1_base_block_size,
+ u1_cu_size,
+ u1_cu_size,
+ 1,
+ u1_cu_size / u1_base_block_size);
+
+ /* Noise Term Computation */
+ for(i2_count = 0; i2_count < i4_num_valid_parts; i2_count++)
+ {
+ unsigned long u4_shift_val;
+ S32 i4_bits_req;
+ S32 part_id = pi4_valid_part_array[i2_count];
+
+ if(i4_alpha_stim_multiplier)
+ {
+ /* Final SigmaX and SigmaX-Squared Calculation */
+ hme_compute_final_sigma_of_pu_from_base_blocks(
+ au4_sigmaX,
+ au4_sigmaXSquared,
+ au8_final_ref_sigmaX,
+ au8_final_ref_sigmaXSquared,
+ u1_cu_size,
+ u1_base_block_size,
+ part_id,
+ (u1_cu_size / u1_base_block_size));
+
+ u8_ref_X_Square = (au8_final_ref_sigmaX[part_id] * au8_final_ref_sigmaX[part_id]);
+ u8_ref_var = (au8_final_ref_sigmaXSquared[part_id] - u8_ref_X_Square);
+
+ u4_shift_val = ihevce_calc_stim_injected_variance(
+ pu8_src_sigmaX,
+ pu8_src_sigmaXSquared,
+ &u8_src_var,
+ i4_inv_wt,
+ i4_inv_wt_shift_val,
+ i4_wpred_log_wdc,
+ part_id);
+
+ u8_ref_var = u8_ref_var >> u4_shift_val;
+
+ GETRANGE64(i4_bits_req, u8_ref_var);
+
+ if(i4_bits_req > 27)
+ {
+ u8_ref_var = u8_ref_var >> (i4_bits_req - 27);
+ u8_src_var = u8_src_var >> (i4_bits_req - 27);
+ }
+
+ if(u8_src_var == u8_ref_var)
+ {
+ u8_temp_var = (1 << STIM_Q_FORMAT);
+ }
+ else
+ {
+ u8_temp_var = (u8_src_var * u8_ref_var * (1 << STIM_Q_FORMAT));
+ u8_temp_var1 = (u8_src_var * u8_src_var) + (u8_ref_var * u8_ref_var);
+ u8_temp_var = (u8_temp_var + (u8_temp_var1 / 2));
+ u8_temp_var = (u8_temp_var / u8_temp_var1);
+ u8_temp_var = (2 * u8_temp_var);
+ }
+
+ i4_noise_term = (UWORD32)u8_temp_var;
+
+ ASSERT(i4_noise_term >= 0);
+
+ i4_noise_term *= i4_alpha_stim_multiplier;
+ }
+ else
+ {
+ i4_noise_term = 0;
+ }
+
+ u8_pure_dist = pi4_sad_array[part_id];
+ u8_pure_dist *= ((1 << (i4_q_level)) - (i4_noise_term));
+ u8_pure_dist += (1 << ((i4_q_level)-1));
+ pi4_sad_array[part_id] = (UWORD32)(u8_pure_dist >> (i4_q_level));
+ }
+}
+
+void hme_compute_sigmaX_and_sigmaXSquared(
+ U08 *pu1_data,
+ S32 i4_buf_stride,
+ void *pv_sigmaX,
+ void *pv_sigmaXSquared,
+ U08 u1_base_blk_wd,
+ U08 u1_base_blk_ht,
+ U08 u1_blk_wd,
+ U08 u1_blk_ht,
+ U08 u1_is_sigma_pointer_size_32_bit,
+ U08 u1_array_stride)
+{
+ U08 i, j, k, l;
+ U08 u1_num_base_blks_in_row;
+ U08 u1_num_base_blks_in_column;
+
+ u1_num_base_blks_in_row = u1_blk_wd / u1_base_blk_wd;
+ u1_num_base_blks_in_column = u1_blk_ht / u1_base_blk_ht;
+
+ if(u1_is_sigma_pointer_size_32_bit)
+ {
+ U32 *sigmaX, *sigmaXSquared;
+
+ sigmaX = (U32 *)pv_sigmaX;
+ sigmaXSquared = (U32 *)pv_sigmaXSquared;
+
+ /* Loop to compute the sigma_X and sigma_X_Squared */
+ for(i = 0; i < u1_num_base_blks_in_column; i++)
+ {
+ for(j = 0; j < u1_num_base_blks_in_row; j++)
+ {
+ U32 u4_sigmaX = 0, u4_sigmaXSquared = 0;
+ U08 *pu1_buf =
+ pu1_data + (u1_base_blk_wd * j) + (u1_base_blk_ht * i * i4_buf_stride);
+
+ for(k = 0; k < u1_base_blk_ht; k++)
+ {
+ for(l = 0; l < u1_base_blk_wd; l++)
+ {
+ u4_sigmaX += pu1_buf[l + k * i4_buf_stride];
+ u4_sigmaXSquared +=
+ (pu1_buf[l + k * i4_buf_stride] * pu1_buf[l + k * i4_buf_stride]);
+ }
+ }
+
+ sigmaX[j + i * u1_array_stride] = u4_sigmaX;
+ sigmaXSquared[j + i * u1_array_stride] = u4_sigmaXSquared;
+ }
+ }
+ }
+ else
+ {
+ ULWORD64 *sigmaX, *sigmaXSquared;
+
+ sigmaX = (ULWORD64 *)pv_sigmaX;
+ sigmaXSquared = (ULWORD64 *)pv_sigmaXSquared;
+
+ /* Loop to compute the sigma_X and sigma_X_Squared */
+ for(i = 0; i < u1_num_base_blks_in_column; i++)
+ {
+ for(j = 0; j < u1_num_base_blks_in_row; j++)
+ {
+ ULWORD64 u8_sigmaX = 0, u8_sigmaXSquared = 0;
+ U08 *pu1_buf =
+ pu1_data + (u1_base_blk_wd * j) + (u1_base_blk_ht * i * i4_buf_stride);
+
+ for(k = 0; k < u1_base_blk_ht; k++)
+ {
+ for(l = 0; l < u1_base_blk_wd; l++)
+ {
+ u8_sigmaX += pu1_buf[l + k * i4_buf_stride];
+ u8_sigmaXSquared +=
+ (pu1_buf[l + k * i4_buf_stride] * pu1_buf[l + k * i4_buf_stride]);
+ }
+ }
+
+ u8_sigmaXSquared = u8_sigmaXSquared * u1_blk_wd * u1_blk_ht;
+
+ sigmaX[j + i * u1_array_stride] = u8_sigmaX;
+ sigmaXSquared[j + i * u1_array_stride] = u8_sigmaXSquared;
+ }
+ }
+ }
+}
+
+#if TEMPORAL_NOISE_DETECT
+WORD32 ihevce_16x16block_temporal_noise_detect(
+ WORD32 had_block_size,
+ WORD32 ctb_width,
+ WORD32 ctb_height,
+ ihevce_ctb_noise_params *ps_ctb_noise_params,
+ fpel_srch_cand_init_data_t *s_proj_srch_cand_init_data,
+ hme_search_prms_t *s_search_prms_blk,
+ me_frm_ctxt_t *ps_ctxt,
+ WORD32 num_pred_dir,
+ WORD32 i4_num_act_ref_l0,
+ WORD32 i4_num_act_ref_l1,
+ WORD32 i4_cu_x_off,
+ WORD32 i4_cu_y_off,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ WORD32 input_stride,
+ WORD32 index_8x8_block,
+ WORD32 num_horz_blocks,
+ WORD32 num_8x8_in_ctb_row,
+ WORD32 i4_16x16_index)
+{
+ WORD32 i;
+ WORD32 noise_detected;
+
+ UWORD8 *pu1_l0_block;
+ UWORD8 *pu1_l1_block;
+
+ WORD32 mean;
+ UWORD32 variance_8x8;
+
+ /* to store the mean and variance of each 8*8 block and find the variance of any higher block sizes later on. block */
+ WORD16 pi2_residue_16x16[256];
+ WORD32 mean_16x16;
+ UWORD32 variance_16x16[2];
+
+ /* throw errors in case of un- supported arguments */
+ /* assumptions size is 8 or 16 or 32 */
+ assert(
+ (had_block_size == 8) || (had_block_size == 16) || (had_block_size == 32)); //ihevc_assert
+
+ /* initialize the variables */
+ noise_detected = 0;
+ variance_8x8 = 0;
+
+ mean = 0;
+
+ {
+ i = 0;
+ /* get the ref/pred and source using the MV of both directions */
+ /* pick the best candidates in each direction */
+ /* Colocated cands */
+ {
+ // steps to be done
+ /* pick the candidates */
+ /* do motion compoensation using the candidates got from prev step : pick from the offset */
+ /* get the ref or the pred from the offset*/
+ /* get the source data */
+ /* send the pred - source to noise detect */
+ /* do noise detect on the residue of source and pred */
+
+ layer_mv_t *ps_layer_mvbank;
+ hme_mv_t *ps_mv;
+
+ //S32 i;
+ S32 wd_c, ht_c, wd_p, ht_p;
+ S32 blksize_p, blk_x, blk_y, i4_offset;
+ S08 *pi1_ref_idx;
+ fpel_srch_cand_init_data_t *ps_ctxt_2 = s_proj_srch_cand_init_data;
+ layer_ctxt_t *ps_curr_layer = ps_ctxt_2->ps_curr_layer;
+ layer_ctxt_t *ps_coarse_layer = ps_ctxt_2->ps_coarse_layer;
+ err_prms_t s_err_prms;
+ S32 i4_blk_wd;
+ S32 i4_blk_ht;
+ BLK_SIZE_T e_blk_size;
+ hme_search_prms_t *ps_search_prms;
+ S32 i4_part_mask;
+ S32 *pi4_valid_part_ids;
+
+ /* has list of valid partition to search terminated by -1 */
+ S32 ai4_valid_part_ids[TOT_NUM_PARTS + 1];
+
+ /*SEARCH_COMPLEXITY_T e_search_complexity = ps_ctxt->e_search_complexity;*/
+
+ S32 i4_pos_x;
+ S32 i4_pos_y;
+ U08 u1_pred_dir; // = ps_ctxt_2->u1_pred_dir;
+ U08 u1_default_ref_id = 0; //ps_ctxt_2->u1_default_ref_id;
+ S32 i4_inp_off, i4_ref_offset, i4_ref_stride;
+
+ /* The reference is actually an array of ptrs since there are several */
+ /* reference id. So an array gets passed form calling function */
+ U08 **ppu1_ref;
+
+ /* Atributes of input candidates */
+ search_node_t as_search_node[2];
+ wgt_pred_ctxt_t *ps_wt_inp_prms;
+
+ S32 posx;
+ S32 posy;
+ S32 i4_num_results_to_proj;
+ S32 ai4_sad_grid[9 * TOT_NUM_PARTS];
+ S32 i4_inp_stride;
+
+ /* intialize variables */
+ /* Width and ht of current and prev layers */
+ wd_c = ps_curr_layer->i4_wd;
+ ht_c = ps_curr_layer->i4_ht;
+ wd_p = ps_coarse_layer->i4_wd;
+ ht_p = ps_coarse_layer->i4_ht;
+
+ ps_search_prms = s_search_prms_blk;
+
+ ps_wt_inp_prms = &ps_ctxt->s_wt_pred;
+ e_blk_size = ps_search_prms->e_blk_size;
+ i4_part_mask = ps_search_prms->i4_part_mask;
+
+ i4_blk_wd = gau1_blk_size_to_wd[e_blk_size];
+ i4_blk_ht = gau1_blk_size_to_ht[e_blk_size];
+
+ ps_layer_mvbank = ps_coarse_layer->ps_layer_mvbank;
+ blksize_p = gau1_blk_size_to_wd_shift[ps_layer_mvbank->e_blk_size];
+
+ /* ASSERT for valid sizes */
+ ASSERT((blksize_p == 3) || (blksize_p == 4) || (blksize_p == 5));
+
+ i4_pos_x = i4_cu_x_off;
+ i4_pos_y = i4_cu_y_off;
+ posx = i4_pos_x + 2;
+ posy = i4_pos_y + 2;
+
+ i4_inp_stride = ps_search_prms->i4_inp_stride;
+ /* Move to the location of the search blk in inp buffer */
+ //i4_inp_off = i4_cu_x_off;
+ //i4_inp_off += i4_cu_y_off * i4_inp_stride;
+ i4_inp_off = (i4_16x16_index % 4) * 16;
+ i4_inp_off += (i4_16x16_index / 4) * 16 * i4_inp_stride;
+
+ /***********pick the candidates**************************************/
+ for(u1_pred_dir = 0; u1_pred_dir < num_pred_dir; u1_pred_dir++)
+ {
+ WORD32 actual_pred_dir = 0;
+
+ if(u1_pred_dir == 0 && i4_num_act_ref_l0 == 0)
+ {
+ actual_pred_dir = 1;
+ }
+ else if(u1_pred_dir == 0 && i4_num_act_ref_l0 != 0)
+ {
+ actual_pred_dir = 0;
+ }
+ else if(u1_pred_dir == 1)
+ {
+ actual_pred_dir = 1;
+ }
+
+ i4_num_results_to_proj = 1; // only the best proj
+
+ /* Safety check to avoid uninitialized access across temporal layers */
+ posx = CLIP3(posx, 0, (wd_c - blksize_p)); /* block position withing frAME */
+ posy = CLIP3(posy, 0, (ht_c - blksize_p));
+
+ /* Project the positions to prev layer */
+ blk_x = posx >> blksize_p;
+ blk_y = posy >> blksize_p;
+
+ /* Pick up the mvs from the location */
+ i4_offset = (blk_x * ps_layer_mvbank->i4_num_mvs_per_blk);
+ i4_offset += (ps_layer_mvbank->i4_num_mvs_per_row * blk_y);
+
+ ps_mv = ps_layer_mvbank->ps_mv + i4_offset;
+ pi1_ref_idx = ps_layer_mvbank->pi1_ref_idx + i4_offset;
+
+ if(actual_pred_dir == 1)
+ {
+ ps_mv += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
+ pi1_ref_idx += (i4_num_act_ref_l0 * ps_layer_mvbank->i4_num_mvs_per_ref);
+ }
+
+ {
+ as_search_node[actual_pred_dir].s_mv.i2_mvx = ps_mv[0].i2_mv_x << 1;
+ as_search_node[actual_pred_dir].s_mv.i2_mvy = ps_mv[0].i2_mv_y << 1;
+ as_search_node[actual_pred_dir].i1_ref_idx = pi1_ref_idx[0];
+
+ if((as_search_node[actual_pred_dir].i1_ref_idx < 0) ||
+ (as_search_node[actual_pred_dir].s_mv.i2_mvx == INTRA_MV))
+ {
+ as_search_node[actual_pred_dir].i1_ref_idx = u1_default_ref_id;
+ as_search_node[actual_pred_dir].s_mv.i2_mvx = 0;
+ as_search_node[actual_pred_dir].s_mv.i2_mvy = 0;
+ }
+ }
+
+ /********************************************************************************************/
+ {
+ /* declare the variables */
+ //ps_fullpel_refine_ctxt = ps_search_prms->ps_fullpel_refine_ctxt;
+
+ pi4_valid_part_ids = ai4_valid_part_ids;
+ i4_ref_stride = ps_curr_layer->i4_rec_stride;
+ s_err_prms.i4_inp_stride = i4_inp_stride;
+ s_err_prms.i4_ref_stride = i4_ref_stride;
+ s_err_prms.i4_part_mask = i4_part_mask;
+ s_err_prms.pi4_sad_grid = &ai4_sad_grid[0];
+ s_err_prms.i4_blk_wd = i4_blk_wd;
+ s_err_prms.i4_blk_ht = i4_blk_ht;
+ s_err_prms.i4_step = 1;
+ s_err_prms.pi4_valid_part_ids = pi4_valid_part_ids;
+ //s_err_prms.i4_num_partitions = ps_fullpel_refine_ctxt->i4_num_valid_parts;
+
+ /*************************************************************************/
+ /* Depending on flag i4_use_rec, we use either input of previously */
+ /* encoded pictures or we use recon of previously encoded pictures. */
+ i4_ref_stride = ps_curr_layer->i4_rec_stride;
+ ppu1_ref = ps_curr_layer->ppu1_list_rec_fxfy; // pointer to the pred
+
+ i4_ref_offset = (i4_ref_stride * i4_cu_y_off) + i4_cu_x_off; //i4_x_off;
+
+ s_err_prms.pu1_ref =
+ ppu1_ref[as_search_node[actual_pred_dir].i1_ref_idx] + i4_ref_offset;
+ s_err_prms.pu1_ref += as_search_node[actual_pred_dir].s_mv.i2_mvx;
+ s_err_prms.pu1_ref +=
+ as_search_node[actual_pred_dir].s_mv.i2_mvy * i4_ref_stride;
+
+ /*get the source */
+ s_err_prms.pu1_inp =
+ ps_wt_inp_prms->apu1_wt_inp[as_search_node[actual_pred_dir].i1_ref_idx] +
+ i4_inp_off; //pu1_src_input + i4_inp_off;//ps_wt_inp_prms->apu1_wt_inp[as_search_node[actual_pred_dir].i1_ref_idx] + i4_inp_off;
+
+ /* send the pred - source to noise detect */
+ // noise_detect_hme(noise_structure, s_err_prms.pu1_inp, s_err_prms.pu1_ref);
+ }
+ /* change the l0/l1 blcok pointer names accrodingle */
+
+ /* get memory pointers the input and the reference */
+ pu1_l0_block = s_err_prms.pu1_inp;
+ pu1_l1_block = s_err_prms.pu1_ref;
+
+ {
+ WORD32 i2, j2;
+ WORD32 dim = 16;
+ UWORD8 *buf1;
+ UWORD8 *buf2;
+ for(i2 = 0; i2 < dim; i2++)
+ {
+ buf1 = pu1_l0_block + i2 * i4_inp_stride;
+ buf2 = pu1_l1_block + i2 * i4_ref_stride;
+
+ for(j2 = 0; j2 < dim; j2++)
+ {
+ pi2_residue_16x16[i2 * dim + j2] = (WORD16)(buf1[j2] - buf2[j2]);
+ }
+ }
+
+ ihevce_calc_variance_signed(
+ pi2_residue_16x16, 16, &mean_16x16, &variance_16x16[u1_pred_dir], 16, 16);
+
+ /* compare the source and residue variance for this block ps_ctb_noise_params->i4_variance_src_16x16 */
+ if(variance_16x16[u1_pred_dir] >
+ ((TEMPORAL_VARIANCE_FACTOR *
+ ps_ctb_noise_params->au4_variance_src_16x16[i4_16x16_index]) >>
+ Q_TEMPORAL_VARIANCE_FACTOR))
+ {
+ /* update noisy block count only if all best MV in diff directions indicates noise */
+ if(u1_pred_dir == num_pred_dir - 1)
+ {
+ ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block] = 1;
+ ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block + 1] = 1;
+ ps_ctb_noise_params
+ ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row] = 1;
+ ps_ctb_noise_params
+ ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row + 1] = 1;
+ noise_detected = 1;
+ }
+ }
+ else /* if any one of the direction mv says it as non noise then dont check for the other directions MV , move for next block*/
+ {
+ noise_detected = 0;
+ ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block] = 0;
+ ps_ctb_noise_params->au1_is_8x8Blk_noisy[index_8x8_block + 1] = 0;
+ ps_ctb_noise_params
+ ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row] = 0;
+ ps_ctb_noise_params
+ ->au1_is_8x8Blk_noisy[index_8x8_block + num_8x8_in_ctb_row + 1] = 0;
+ break;
+ }
+ } // variance analysis and calculation
+ } // for each direction
+ } // HME code
+
+ } // for each 16x16 block
+
+ return (noise_detected);
+}
+#endif
+
+void hme_qpel_interp_avg_1pt(
+ interp_prms_t *ps_prms,
+ S32 i4_mv_x,
+ S32 i4_mv_y,
+ S32 i4_buf_id,
+ U08 **ppu1_final,
+ S32 *pi4_final_stride)
+{
+ U08 *pu1_src1, *pu1_src2, *pu1_dst;
+ qpel_input_buf_cfg_t *ps_inp_cfg;
+ S32 i4_mv_x_frac, i4_mv_y_frac, i4_offset;
+
+ /*************************************************************************/
+ /* For a given QPEL pt, we need to determine the 2 source pts that are */
+ /* needed to do the QPEL averaging. The logic to do this is as follows */
+ /* i4_mv_x and i4_mv_y are the motion vectors in QPEL units that are */
+ /* pointing to the pt of interest. Obviously, they are w.r.t. the 0,0 */
+ /* pt of th reference blk that is colocated to the inp blk. */
+ /* A j E k B */
+ /* l m n o p */
+ /* F q G r H */
+ /* s t u v w */
+ /* C x I y D */
+ /* In above diagram, A. B, C, D are full pts at offsets (0,0),(1,0),(0,1)*/
+ /* and (1,1) respectively in the fpel buffer (id = 0) */
+ /* E and I are hxfy pts in offsets (0,0),(0,1) respectively in hxfy buf */
+ /* F and H are fxhy pts in offsets (0,0),(1,0) respectively in fxhy buf */
+ /* G is hxhy pt in offset 0,0 in hxhy buf */
+ /* All above offsets are computed w.r.t. motion displaced pt in */
+ /* respective bufs. This means that A corresponds to (i4_mv_x >> 2) and */
+ /* (i4_mv_y >> 2) in fxfy buf. Ditto with E, F and G */
+ /* fxfy buf is buf id 0, hxfy is buf id 1, fxhy is buf id 2, hxhy is 3 */
+ /* If we consider pt v to be derived. v has a fractional comp of 3, 3 */
+ /* v is avg of H and I. So the table look up of v should give following */
+ /* buf 1 (H) : offset = (1, 0) buf id = 2. */
+ /* buf 2 (I) : offset = 0 , 1) buf id = 1. */
+ /* NOTE: For pts that are fxfy/hxfy/fxhy/hxhy, bufid 1 will be -1. */
+ /*************************************************************************/
+ i4_mv_x_frac = i4_mv_x & 3;
+ i4_mv_y_frac = i4_mv_y & 3;
+
+ i4_offset = (i4_mv_x >> 2) + (i4_mv_y >> 2) * ps_prms->i4_ref_stride;
+
+ /* Derive the descriptor that has all offset and size info */
+ ps_inp_cfg = &gas_qpel_inp_buf_cfg[i4_mv_y_frac][i4_mv_x_frac];
+
+ pu1_src1 = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id1];
+ pu1_src1 += ps_inp_cfg->i1_buf_xoff1 + i4_offset;
+ pu1_src1 += (ps_inp_cfg->i1_buf_yoff1 * ps_prms->i4_ref_stride);
+
+ pu1_src2 = ps_prms->ppu1_ref[ps_inp_cfg->i1_buf_id2];
+ pu1_src2 += ps_inp_cfg->i1_buf_xoff2 + i4_offset;
+ pu1_src2 += (ps_inp_cfg->i1_buf_yoff2 * ps_prms->i4_ref_stride);
+
+ pu1_dst = ps_prms->apu1_interp_out[i4_buf_id];
+ hevc_avg_2d(
+ pu1_src1,
+ pu1_src2,
+ ps_prms->i4_ref_stride,
+ ps_prms->i4_ref_stride,
+ ps_prms->i4_blk_wd,
+ ps_prms->i4_blk_ht,
+ pu1_dst,
+ ps_prms->i4_out_stride);
+ ppu1_final[i4_buf_id] = pu1_dst;
+ pi4_final_stride[i4_buf_id] = ps_prms->i4_out_stride;
+}
+
+void hme_qpel_interp_avg_2pt_vert_with_reuse(
+ interp_prms_t *ps_prms, S32 i4_mv_x, S32 i4_mv_y, U08 **ppu1_final, S32 *pi4_final_stride)
+{
+ hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x, i4_mv_y + 1, 3, ppu1_final, pi4_final_stride);
+
+ hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x, i4_mv_y - 1, 1, ppu1_final, pi4_final_stride);
+}
+
+void hme_qpel_interp_avg_2pt_horz_with_reuse(
+ interp_prms_t *ps_prms, S32 i4_mv_x, S32 i4_mv_y, U08 **ppu1_final, S32 *pi4_final_stride)
+{
+ hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x + 1, i4_mv_y, 2, ppu1_final, pi4_final_stride);
+
+ hme_qpel_interp_avg_1pt(ps_prms, i4_mv_x - 1, i4_mv_y, 0, ppu1_final, pi4_final_stride);
+}
+
+void hme_set_mv_limit_using_dvsr_data(
+ me_frm_ctxt_t *ps_ctxt,
+ layer_ctxt_t *ps_curr_layer,
+ range_prms_t *ps_mv_limit,
+ S16 *pi2_prev_enc_frm_max_mv_y,
+ U08 u1_num_act_ref_pics)
+{
+ WORD32 ref_ctr;
+
+ /* Only for B/b pic. */
+ if(1 == ps_ctxt->s_frm_prms.bidir_enabled)
+ {
+ WORD16 i2_mv_y_per_poc, i2_max_mv_y;
+ WORD32 cur_poc, prev_poc, ref_poc, abs_poc_diff;
+ WORD32 prev_poc_count = 0;
+ WORD32 i4_p_idx;
+
+ pi2_prev_enc_frm_max_mv_y[0] = 0;
+
+ cur_poc = ps_ctxt->i4_curr_poc;
+
+ i4_p_idx = 0;
+
+ /* Get abs MAX for symmetric search */
+ i2_mv_y_per_poc = ps_curr_layer->i2_max_mv_y;
+ /* Assuming P to P distance as 4 */
+ i2_mv_y_per_poc = (i2_mv_y_per_poc + 2) >> 2;
+
+ for(ref_ctr = 0; ref_ctr < u1_num_act_ref_pics; ref_ctr++)
+ {
+ /* Get the prev. encoded frame POC */
+ prev_poc = ps_ctxt->i4_prev_poc;
+
+ ref_poc = ps_ctxt->ai4_ref_idx_to_poc_lc[ref_ctr];
+ abs_poc_diff = ABS((cur_poc - ref_poc));
+ /* Get the cur. max MV based on POC distance */
+ i2_max_mv_y = i2_mv_y_per_poc * abs_poc_diff;
+ i2_max_mv_y = MIN(i2_max_mv_y, ps_curr_layer->i2_max_mv_y);
+
+ ps_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
+ ps_mv_limit[ref_ctr].i2_min_y = -i2_max_mv_y;
+ ps_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
+ ps_mv_limit[ref_ctr].i2_max_y = i2_max_mv_y;
+
+ /* Find the MAX MV for the prev. encoded frame to optimize */
+ /* the reverse dependency of ME on Enc.Loop */
+ if(ref_poc == prev_poc)
+ {
+ /* TO DO : Same thing for horz. search also */
+ pi2_prev_enc_frm_max_mv_y[0] = i2_max_mv_y;
+ prev_poc_count++;
+ }
+ }
+ }
+ else
+ {
+ ASSERT(0 == ps_ctxt->s_frm_prms.u1_num_active_ref_l1);
+
+ /* Set the Config. File Params for P pic. */
+ for(ref_ctr = 0; ref_ctr < ps_ctxt->s_frm_prms.u1_num_active_ref_l0; ref_ctr++)
+ {
+ ps_mv_limit[ref_ctr].i2_min_x = -ps_curr_layer->i2_max_mv_x;
+ ps_mv_limit[ref_ctr].i2_min_y = -ps_curr_layer->i2_max_mv_y;
+ ps_mv_limit[ref_ctr].i2_max_x = ps_curr_layer->i2_max_mv_x;
+ ps_mv_limit[ref_ctr].i2_max_y = ps_curr_layer->i2_max_mv_y;
+ }
+
+ /* For P PIC., go with Config. File Params */
+ pi2_prev_enc_frm_max_mv_y[0] = ps_curr_layer->i2_max_mv_y;
+ }
+}
+
+S32 hme_part_mask_populator(
+ U08 *pu1_inp,
+ S32 i4_inp_stride,
+ U08 u1_limit_active_partitions,
+ U08 u1_is_bPic,
+ U08 u1_is_refPic,
+ U08 u1_blk_8x8_mask,
+ ME_QUALITY_PRESETS_T e_me_quality_preset)
+{
+ if(15 != u1_blk_8x8_mask)
+ {
+ return ENABLE_NxN;
+ }
+ else
+ {
+ U08 u1_call_inp_segmentation_based_part_mask_populator =
+ (ME_XTREME_SPEED_25 != e_me_quality_preset) ||
+ (!u1_is_bPic && !DISABLE_8X8CUS_IN_PPICS_IN_P6) ||
+ (u1_is_bPic && u1_is_refPic && !DISABLE_8X8CUS_IN_REFBPICS_IN_P6) ||
+ (u1_is_bPic && !u1_is_refPic && !DISABLE_8X8CUS_IN_NREFBPICS_IN_P6);
+
+ if(u1_call_inp_segmentation_based_part_mask_populator)
+ {
+ S32 i4_part_mask =
+ hme_study_input_segmentation(pu1_inp, i4_inp_stride, u1_limit_active_partitions);
+
+ if(e_me_quality_preset == ME_XTREME_SPEED)
+ {
+ i4_part_mask &= ~ENABLE_AMP;
+ }
+
+ if(e_me_quality_preset == ME_XTREME_SPEED_25)
+ {
+ i4_part_mask &= ~ENABLE_AMP;
+
+ i4_part_mask &= ~ENABLE_SMP;
+ }
+
+ return i4_part_mask;
+ }
+ else
+ {
+ return ENABLE_2Nx2N;
+ }
+ }
+}
diff --git a/encoder/hme_utils.h b/encoder/hme_utils.h
new file mode 100644
index 0000000..6bdf1dc
--- /dev/null
+++ b/encoder/hme_utils.h
@@ -0,0 +1,1019 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file hme_utils.h
+*
+* \brief
+* Prototypes for various utilities used by coarse/refinement/subpel fxns
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _HME_UTILS_H_
+#define _HME_UTILS_H_
+
+/*****************************************************************************/
+/* Functions */
+/*****************************************************************************/
+
+/**
+********************************************************************************
+* @fn hme_init_histogram(
+*
+* @brief Top level entry point for Coarse ME. Runs across blocks and does the
+* needful by calling other low level routines.
+*
+* @param[in,out] ps_hist : the histogram structure
+*
+* @param[in] i4_max_mv_x : Maximum mv allowed in x direction (fpel units)
+*
+* @param[in] i4_max_mv_y : Maximum mv allowed in y direction (fpel units)
+*
+* @return None
+********************************************************************************
+*/
+void hme_init_histogram(mv_hist_t *ps_hist, S32 i4_max_mv_x, S32 i4_max_mv_y);
+
+/**
+********************************************************************************
+* @fn hme_update_histogram(
+*
+* @brief Updates the histogram given an mv entry
+*
+* @param[in,out] ps_hist : the histogram structure
+*
+* @param[in] i4_mv_x : x component of the mv (fpel units)
+*
+* @param[in] i4_mv_y : y component of the mv (fpel units)
+*
+* @return None
+********************************************************************************
+*/
+void hme_update_histogram(mv_hist_t *ps_hist, S32 i4_mv_x, S32 i4_mv_y);
+
+/**
+********************************************************************************
+* @fn hme_get_global_mv(
+*
+* @brief returns the global mv of a previous picture. Accounts for the fact
+* that the delta poc of the previous picture may have been different
+* from delta poc of current picture. Delta poc is POC difference
+* between a picture and its reference.
+*
+* @param[out] ps_mv: mv_t structure where the motion vector is returned
+*
+* @param[in] i4_delta_poc: the delta poc for the current pic w.r.t. reference
+*
+* @return None
+********************************************************************************
+*/
+void hme_get_global_mv(layer_ctxt_t *ps_prev_layer, hme_mv_t *ps_mv, S32 i4_delta_poc);
+
+/**
+********************************************************************************
+* @fn hme_calculate_global_mv(
+*
+* @brief Calculates global mv for a given histogram
+*
+* @param[in] ps_hist : the histogram structure
+*
+* @param[in] ps_mv : used to return the global mv
+*
+* @param[in] e_lobe_type : refer to GMV_MVTYPE_T
+*
+* @return None
+********************************************************************************
+*/
+void hme_calculate_global_mv(mv_hist_t *ps_hist, hme_mv_t *ps_mv, GMV_MVTYPE_T e_lobe_type);
+
+/**
+********************************************************************************
+* @fn hme_collate_fpel_results(search_results_t *ps_search_results,
+* S32 i1_ref_idx, S32 i1_idx_to_merge)
+*
+* @brief After full pel search and result seeding in every search iteration
+* results, this function called to collapse a given search iteration
+* results into another.
+*
+* @param[in,out] ps_search_results : Search results data structure
+* @param[in] i1_ref_idx: id of the search iteration where the results
+ will be collapsed
+* @param[in] i1_idx_to_merge : id of the search iteration from which the
+* results are picked up.
+
+*
+* @return None
+********************************************************************************
+*/
+void hme_collate_fpel_results(
+ search_results_t *ps_search_results, S08 i1_ref_idx, S08 i1_idx_to_merge);
+
+/**
+********************************************************************************
+* @fn hme_map_mvs_to_grid(mv_grid_t **pps_mv_grid,
+ search_results_t *ps_search_results, S32 i4_num_ref)
+*
+* @brief For a given CU whose results are in ps_search_results, the 17x17
+* mv grid is updated for future use within the CTB
+*
+* @param[in] ps_search_results : Search results data structure
+*
+* @param[out] pps_mv_grid: The mv grid (as many as num ref)
+*
+* @param[in] i4_num_ref: nuber of search iterations to update
+*
+* @param[in] mv_res_shift: Shift for resolution of mv (fpel/qpel)
+*
+* @return None
+********************************************************************************
+*/
+void hme_map_mvs_to_grid(
+ mv_grid_t **pps_mv_grid,
+ search_results_t *ps_search_results,
+ U08 *pu1_pred_dir_searched,
+ S32 i4_num_pred_dir);
+
+/**
+********************************************************************************
+* @fn hme_create_valid_part_ids(S32 i4_part_mask, S32 *pi4_valid_part_ids)
+*
+* @brief Expands the part mask to a list of valid part ids terminated by -1
+*
+* @param[in] i4_part_mask : bit mask of active partitino ids
+*
+* @param[out] pi4_valid_part_ids : array, each entry has one valid part id
+* Terminated by -1 to signal end.
+*
+* @return number of partitions
+********************************************************************************
+*/
+S32 hme_create_valid_part_ids(S32 i4_part_mask, S32 *pi4_valid_part_ids);
+
+/**
+********************************************************************************
+* @fn get_num_blks_in_ctb(S32 i4_ctb_x,
+ S32 i4_ctb_y,
+ S32 i4_pic_wd,
+ S32 i4_pic_ht,
+ S32 i4_blk_size)
+*
+* @brief returns the number of blks in the ctb (64x64 ctb)
+*
+* @param[in] i4_ctb_x : pixel x offset of the top left corner of ctb in pic
+*
+* @param[in] i4_ctb_y : pixel y offset of the top left corner of ctb in pic
+*
+* @param[in] i4_ctb_x : width of the picture in pixels
+*
+* @param[in] i4_pic_ht : height of hte picture in pixels
+*
+* @param[in] i4_blk_size : Size of the blk in pixels
+*
+* @return number of blks in the ctb
+********************************************************************************
+*/
+S32 get_num_blks_in_ctb(S32 i4_ctb_x, S32 i4_ctb_y, S32 i4_pic_wd, S32 i4_pic_ht, S32 i4_blk_size);
+
+/**
+********************************************************************************
+* @fn hevc_avg_2d(U08 *pu1_src1,
+* U08 *pu1_src2,
+* S32 i4_src1_stride,
+* S32 i4_src2_stride,
+* S32 i4_blk_wd,
+* S32 i4_blk_ht,
+* U08 *pu1_dst,
+* S32 i4_dst_stride)
+*
+*
+* @brief point wise average of two buffers into a third buffer
+*
+* @param[in] pu1_src1 : first source buffer
+*
+* @param[in] pu1_src2 : 2nd source buffer
+*
+* @param[in] i4_src1_stride : stride of source 1 buffer
+*
+* @param[in] i4_src2_stride : stride of source 2 buffer
+*
+* @param[in] i4_blk_wd : block width
+*
+* @param[in] i4_blk_ht : block height
+*
+* @param[out] pu1_dst : destination buffer
+*
+* @param[in] i4_dst_stride : stride of the destination buffer
+*
+* @return void
+********************************************************************************
+*/
+void hevc_avg_2d(
+ U08 *pu1_src1,
+ U08 *pu1_src2,
+ S32 i4_src1_stride,
+ S32 i4_src2_stride,
+ S32 i4_blk_wd,
+ S32 i4_blk_ht,
+ U08 *pu1_dst,
+ S32 i4_dst_stride);
+
+/**
+********************************************************************************
+* @fn hme_pick_back_search_node(search_results_t *ps_search_results,
+* search_node_t *ps_search_node_fwd,
+* S32 i4_part_idx,
+* layer_ctxt_t *ps_curr_layer)
+*
+*
+* @brief returns the search node corresponding to a ref idx in same or
+* opp direction. Preference is given to opp direction, but if that
+* does not yield results, same direction is attempted.
+*
+* @param[in] ps_search_results: search results overall
+*
+* @param[in] ps_search_node_fwd: search node corresponding to "fwd" direction
+*
+* @param[in] i4_part_idx : partition id
+*
+* @param[in] ps_curr_layer : layer context for current layer.
+*
+* @return search node corresponding to hte "other direction"
+********************************************************************************
+*/
+search_node_t *hme_pick_back_search_node(
+ search_results_t *ps_search_results,
+ search_node_t *ps_search_node_fwd,
+ S32 i4_part_idx,
+ layer_ctxt_t *ps_curr_layer);
+
+/**
+********************************************************************************
+* @fn hme_study_input_segmentation(U08 *pu1_inp,
+* S32 i4_inp_stride,
+* S32 limit_active_partitions)
+*
+*
+* @brief Examines input 16x16 for possible edges and orientations of those,
+* and returns a bit mask of partitions that should be searched for
+*
+* @param[in] pu1_inp : input buffer
+*
+* @param[in] i4_inp_stride: input stride
+*
+* @param[in] limit_active_partitions : 1: Edge algo done and partitions are
+* limited, 0 : Brute force, all partitions considered
+*
+* @return part mask (bit mask of active partitions to search)
+********************************************************************************
+*/
+S32 hme_study_input_segmentation(U08 *pu1_inp, S32 i4_inp_stride, S32 limit_active_partitions);
+
+/**
+********************************************************************************
+* @fn hme_init_search_results(search_results_t *ps_search_results,
+* S32 i4_num_ref,
+* S32 i4_num_best_results,
+* S32 i4_num_results_per_part,
+* BLK_SIZE_T e_blk_size,
+* S32 i4_x_off,
+* S32 i4_y_off)
+*
+* @brief Initializes the search results structure with some key attributes
+*
+* @param[out] ps_search_results : search results structure to initialise
+*
+* @param[in] i4_num_Ref: corresponds to the number of ref ids searched
+*
+* @param[in] i4_num_best_results: Number of best results for the CU to
+* be maintained in the result structure
+*
+* @param[in] i4_num_results_per_part: Per active partition the number of best
+* results to be maintained
+*
+* @param[in] e_blk_size: blk size of the CU for which this structure used
+*
+* @param[in] i4_x_off: x offset of the top left of CU from CTB top left
+*
+* @param[in] i4_y_off: y offset of the top left of CU from CTB top left
+*
+* @return void
+********************************************************************************
+*/
+void hme_init_search_results(
+ search_results_t *ps_search_results,
+ S32 i4_num_ref,
+ S32 i4_num_best_results,
+ S32 i4_num_results_per_part,
+ BLK_SIZE_T e_blk_size,
+ S32 i4_x_off,
+ S32 i4_y_off,
+ U08 *pu1_is_past);
+
+/**
+********************************************************************************
+* @fn hme_reset_search_results((search_results_t *ps_search_results,
+* S32 i4_part_mask)
+*
+*
+* @brief Resets the best results to maximum values, so as to allow search
+* for the new CU's partitions. The existing results may be from an
+* older CU using same structure.
+*
+* @param[in] ps_search_results: search results structure
+*
+* @param[in] i4_part_mask : bit mask of active partitions
+*
+* @param[in] mv_res : Resolution of the mv predictors (fpel/qpel)
+*
+* @return void
+********************************************************************************
+*/
+void hme_reset_search_results(search_results_t *ps_search_results, S32 i4_part_mask, S32 mv_res);
+
+/**
+********************************************************************************
+* @fn hme_clamp_grid_by_mvrange(search_node_t *ps_search_node,
+* S32 i4_step,
+* range_prms_t *ps_mvrange)
+*
+* @brief Given a central pt within mv range, and a grid of points surrounding
+* this pt, this function returns a grid mask of pts within search rng
+*
+* @param[in] ps_search_node: the centre pt of the grid
+*
+* @param[in] i4_step: step size of grid
+*
+* @param[in] ps_mvrange: structure containing the current mv range
+*
+* @return bitmask of the pts in grid within search range
+********************************************************************************
+*/
+S32 hme_clamp_grid_by_mvrange(search_node_t *ps_search_node, S32 i4_step, range_prms_t *ps_mvrange);
+
+/**
+********************************************************************************
+* @fn layer_ctxt_t *hme_get_past_layer_ctxt(me_ctxt_t *ps_ctxt,
+ S32 i4_layer_id)
+*
+* @brief returns the layer ctxt of the layer with given id from the temporally
+* previous frame
+*
+* @param[in] ps_ctxt : ME context
+*
+* @param[in] i4_layer_id : id of layer required
+*
+* @return layer ctxt of given layer id in temporally previous frame
+********************************************************************************
+*/
+layer_ctxt_t *hme_get_past_layer_ctxt(
+ me_ctxt_t *ps_ctxt, me_frm_ctxt_t *ps_frm_ctxt, S32 i4_layer_id, S32 i4_num_me_frm_pllel);
+
+layer_ctxt_t *hme_coarse_get_past_layer_ctxt(coarse_me_ctxt_t *ps_ctxt, S32 i4_layer_id);
+
+/**
+********************************************************************************
+* @fn void hme_init_mv_bank(layer_ctxt_t *ps_layer_ctxt,
+ BLK_SIZE_T e_blk_size,
+ S32 i4_num_ref,
+ S32 i4_num_results_per_part)
+*
+* @brief Given a blk size to be used for this layer, this function initialize
+* the mv bank to make it ready to store and return results.
+*
+* @param[in, out] ps_layer_ctxt: pointer to layer ctxt
+*
+* @param[in] e_blk_size : resolution at which mvs are stored
+*
+* @param[in] i4_num_ref: number of reference frames corresponding to which
+* results are stored.
+*
+* @param[in] e_blk_size : resolution at which mvs are stored
+*
+* @param[in] i4_num_results_per_part : Number of results to be stored per
+* ref idx. So these many best results stored
+*
+* @return void
+********************************************************************************
+*/
+void hme_init_mv_bank(
+ layer_ctxt_t *ps_layer_ctxt,
+ BLK_SIZE_T e_blk_size,
+ S32 i4_num_ref,
+ S32 i4_num_results_per_part,
+ U08 u1_enc);
+
+/**
+********************************************************************************
+* @fn void hme_derive_search_range(range_prms_t *ps_range,
+* range_prms_t *ps_pic_limit,
+* range_prms_t *ps_mv_limit,
+* S32 i4_x,
+* S32 i4_y,
+* S32 blk_wd,
+* S32 blk_ht)
+*
+* @brief given picture limits and blk dimensions and mv search limits, obtains
+* teh valid search range such that the blk stays within pic boundaries,
+* where picture boundaries include padded portions of picture
+*
+* @param[out] ps_range: updated with actual search range
+*
+* @param[in] ps_pic_limit : picture boundaries
+*
+* @param[in] ps_mv_limit: Search range limits for the mvs
+*
+* @param[in] i4_x : x coordinate of the blk
+*
+* @param[in] i4_y : y coordinate of the blk
+*
+* @param[in] blk_wd : blk width
+*
+* @param[in] blk_ht : blk height
+*
+* @return void
+********************************************************************************
+*/
+void hme_derive_search_range(
+ range_prms_t *ps_range,
+ range_prms_t *ps_pic_limit,
+ range_prms_t *ps_mv_limit,
+ S32 i4_x,
+ S32 i4_y,
+ S32 blk_wd,
+ S32 blk_ht);
+
+/**
+********************************************************************************
+* @fn void hme_get_spatial_candt(layer_ctxt_t *ps_curr_layer,
+* BLK_SIZE_T e_search_blk_size,
+* S32 blk_x,
+* S32 blk_y,
+* S08 i1_ref_idx,
+* search_node_t *ps_top_neighbours,
+* search_node_t *ps_left_neighbours,
+* S32 i4_result_id);
+*
+* @brief Obtains top, top left, top right and left adn bottom left candts
+*
+* @param[in] ps_curr_layer: layer ctxt, has the mv bank structure pointer
+*
+* @param[in] e_search_blk_size : search blk size of current layer
+*
+* @param[in] i4_blk_x : x coordinate of the block in mv bank
+*
+* @param[in] i4_blk_y : y coordinate of the block in mv bank
+*
+* @param[in] i1_ref_idx : Corresponds to ref idx from which to pick up mv
+* results, useful if multiple ref idx candts maintained separately.
+*
+* @param[out] ps_top_neighbours : T, TL, TR candts are output here
+*
+* @param[out] ps_left_neighbours : L BL candts outptu here
+*
+* @param[in] i4_result_id : If multiple results stored per ref idx, this
+* pts to the id of the result
+*
+* @return void
+********************************************************************************
+*/
+void hme_get_spatial_candt(
+ layer_ctxt_t *ps_curr_layer,
+ BLK_SIZE_T e_search_blk_size,
+ S32 blk_x,
+ S32 blk_y,
+ S08 i1_ref_idx,
+ search_node_t *ps_top_neighbours,
+ search_node_t *ps_left_neighbours,
+ S32 i4_result_id,
+ S32 i4_tr_avail,
+ S32 i4_bl_avail,
+ S32 encode);
+
+void hme_get_spatial_candt_in_l1_me(
+ layer_ctxt_t *ps_curr_layer,
+ BLK_SIZE_T e_search_blk_size,
+ S32 i4_blk_x,
+ S32 i4_blk_y,
+ S08 i1_ref_idx,
+ U08 u1_pred_dir,
+ search_node_t *ps_top_neighbours,
+ search_node_t *ps_left_neighbours,
+ S32 i4_result_id,
+ S32 tr_avail,
+ S32 bl_avail,
+ S32 i4_num_act_ref_l0,
+ S32 i4_num_act_ref_l1);
+
+/**
+********************************************************************************
+* @fn void hme_fill_ctb_neighbour_mvs(layer_ctxt_t *ps_curr_layer,
+* S32 i4_blk_x,
+* S32 i4_blk_y,
+* mvgrid_t *ps_mv_grid ,
+* S32 i1_ref_id)
+*
+* @brief The 18x18 MV grid for a ctb, is filled in first row and 1st col
+* this corresponds to neighbours (TL, T, TR, L, BL)
+*
+* @param[in] ps_curr_layer: layer ctxt, has the mv bank structure pointer
+*
+* @param[in] blk_x : x coordinate of the block in mv bank
+*
+* @param[in] blk_y : y coordinate of the block in mv bank
+*
+* @param[in] ps_mv_grid : Grid (18x18 mvs at 4x4 level)
+*
+* @param[in] u1_pred_lx : Corresponds to pred dir from which to pick up mv
+* results
+*
+* @return void
+********************************************************************************
+*/
+void hme_fill_ctb_neighbour_mvs(
+ layer_ctxt_t *ps_curr_layer,
+ S32 blk_x,
+ S32 blk_y,
+ mv_grid_t *ps_mv_grid,
+ U08 u1_pred_dir_ctr,
+ U08 u1_default_ref_id,
+ S32 i4_num_act_ref_l0);
+
+/**
+********************************************************************************
+* @fn void *hme_get_wkg_mem(buf_mgr_t *ps_buf_mgr, S32 i4_size)
+*
+* @brief Allocates a block of size = i4_size from working memory and returns
+*
+* @param[in,out] ps_buf_mgr: Buffer manager for wkg memory
+*
+* @param[in] i4_size : size required
+*
+* @return void pointer to allocated memory, NULL if failure
+********************************************************************************
+*/
+void *hme_get_wkg_mem(buf_mgr_t *ps_buf_mgr, S32 i4_size);
+
+void hme_reset_wkg_mem(buf_mgr_t *ps_buf_mgr);
+
+void hme_init_wkg_mem(buf_mgr_t *ps_buf_mgr, U08 *pu1_mem, S32 size);
+
+void hme_reset_ctb_mem_mgr(ctb_mem_mgr_t *ps_ctb_mem_mgr);
+
+void hme_init_ctb_mem_mgr(ctb_mem_mgr_t *ps_ctb_mem_mgr, U08 *pu1_mem, S32 size);
+
+void hme_fill_mvbank_intra(layer_ctxt_t *ps_layer_ctxt);
+
+void hme_scale_mv_grid(mv_grid_t *ps_mv_grid);
+
+void hme_downscale_mv_grid(mv_grid_t *ps_mv_grid);
+
+void hme_create_parent_ctb(
+ ctb_node_t *ps_ctb_node_parent,
+ ctb_node_t *ps_ctb_child_tl,
+ ctb_node_t *ps_ctb_child_tr,
+ ctb_node_t *ps_ctb_child_bl,
+ ctb_node_t *ps_ctb_child_br,
+ CU_SIZE_T e_cu_size_parent,
+ buf_mgr_t *ps_buf_mgr);
+
+void hme_create_merged_ctbs(
+ search_results_t *ps_results_merged,
+ ctb_mem_mgr_t *ps_ctb_mem_mgr,
+ buf_mgr_t *ps_buf_mgr,
+ ctb_node_t **pps_ctb_list_unified,
+ S32 num_candts);
+
+void hme_init_mv_grid(mv_grid_t *ps_mv_grid);
+
+typedef void (*pf_get_wt_inp)(
+ layer_ctxt_t *ps_curr_layer,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ S32 dst_stride,
+ S32 pos_x,
+ S32 pos_y,
+ S32 size,
+ S32 num_ref,
+ U08 u1_is_wt_pred_on);
+
+/**
+********************************************************************************
+* @fn void hme_pad_left(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
+*
+* @brief Pads horizontally to left side. Each pixel replicated across a line
+*
+* @param[in] pu1_dst : destination pointer. Points to the pixel to be repeated
+*
+* @param[in] stride : stride of destination buffer
+*
+* @param[in] pad_wd : Amt of horizontal padding to be done
+*
+* @param[in] pad_ht : Number of lines for which horizontal padding to be done
+*
+* @return void
+********************************************************************************
+*/
+void hme_pad_left(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht);
+
+/**
+********************************************************************************
+* @fn void hme_pad_right(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht)
+*
+* @brief Pads horizontally to rt side. Each pixel replicated across a line
+*
+* @param[in] pu1_dst : destination pointer. Points to the pixel to be repeated
+*
+* @param[in] stride : stride of destination buffer
+*
+* @param[in] pad_wd : Amt of horizontal padding to be done
+*
+* @param[in] pad_ht : Number of lines for which horizontal padding to be done
+*
+* @return void
+********************************************************************************
+*/
+void hme_pad_right(U08 *pu1_dst, S32 stride, S32 pad_wd, S32 pad_ht);
+
+/**
+********************************************************************************
+* @fn void hme_pad_top(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
+*
+* @brief Pads vertically on the top. Repeats the top line for top padding
+*
+* @param[in] pu1_dst : destination pointer. Points to the line to be repeated
+*
+* @param[in] stride : stride of destination buffer
+*
+* @param[in] pad_ht : Amt of vertical padding to be done
+*
+* @param[in] pad_wd : Number of columns for which vertical padding to be done
+*
+* @return void
+********************************************************************************
+*/
+void hme_pad_top(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd);
+
+/**
+********************************************************************************
+* @fn void hme_pad_bot(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd)
+*
+* @brief Pads vertically on the bot. Repeats the top line for top padding
+*
+* @param[in] pu1_dst : destination pointer. Points to the line to be repeated
+*
+* @param[in] stride : stride of destination buffer
+*
+* @param[in] pad_ht : Amt of vertical padding to be done
+*
+* @param[in] pad_wd : Number of columns for which vertical padding to be done
+*
+* @return void
+********************************************************************************
+*/
+void hme_pad_bot(U08 *pu1_dst, S32 stride, S32 pad_ht, S32 pad_wd);
+
+/**
+**************************************************************************************************
+* @fn hme_populate_pus(search_results_t *ps_search_results, inter_cu_results_t *ps_cu_results)
+*
+* @brief Population the pu_results structure with the results after the subpel refinement
+*
+* This is called post subpel refinmenent for 16x16s, 8x8s and
+* for post merge evaluation for 32x32,64x64 CUs
+*
+* @param[in,out] ps_search_results : Search results data structure
+* - ps_cu_results : cu_results data structure
+* ps_pu_result : Pointer to the memory for storing PU's
+*
+****************************************************************************************************
+*/
+void hme_populate_pus(
+ me_ctxt_t *ps_thrd_ctxt,
+ me_frm_ctxt_t *ps_ctxt,
+ hme_subpel_prms_t *ps_subpel_prms,
+ search_results_t *ps_search_results,
+ inter_cu_results_t *ps_cu_results,
+ inter_pu_results_t *ps_pu_results,
+ pu_result_t *ps_pu_result,
+ inter_ctb_prms_t *ps_inter_ctb_prms,
+ wgt_pred_ctxt_t *ps_wt_prms,
+ layer_ctxt_t *ps_curr_layer,
+ U08 *pu1_pred_dir_searched,
+ WORD32 i4_num_active_ref);
+
+void hme_populate_pus_8x8_cu(
+ me_ctxt_t *ps_thrd_ctxt,
+ me_frm_ctxt_t *ps_ctxt,
+ hme_subpel_prms_t *ps_subpel_prms,
+ search_results_t *ps_search_results,
+ inter_cu_results_t *ps_cu_results,
+ inter_pu_results_t *ps_pu_results,
+ pu_result_t *ps_pu_result,
+ inter_ctb_prms_t *ps_inter_ctb_prms,
+ U08 *pu1_pred_dir_searched,
+ WORD32 i4_num_active_ref,
+ U08 u1_blk_8x8_mask);
+
+S32 hme_recompute_lambda_from_min_8x8_act_in_ctb(
+ me_frm_ctxt_t *ps_ctxt, ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb);
+
+/**
+********************************************************************************
+* @fn hme_update_dynamic_search_params
+*
+* @brief Update the Dynamic search params based on the current MVs
+*
+* @param[in,out] ps_dyn_range_prms [inout] : Dyn. Range Param str.
+* i2_mvy [in] : current MV y comp.
+*
+* @return None
+********************************************************************************
+*/
+void hme_update_dynamic_search_params(dyn_range_prms_t *ps_dyn_range_prms, WORD16 i2_mvy);
+
+S32 hme_create_child_nodes_cu_tree(
+ cur_ctb_cu_tree_t *ps_cu_tree_root,
+ cur_ctb_cu_tree_t *ps_cu_tree_cur_node,
+ S32 nodes_already_created);
+
+void hme_add_new_node_to_a_sorted_array(
+ search_node_t *ps_result_node,
+ search_node_t **pps_sorted_array,
+ U08 *pu1_shifts,
+ U32 u4_num_results_updated,
+ U08 u1_shift);
+
+S32 hme_find_pos_of_implicitly_stored_ref_id(
+ S08 *pi1_ref_idx, S08 i1_ref_idx, S32 i4_result_id, S32 i4_num_results);
+
+S32 hme_populate_search_candidates(fpel_srch_cand_init_data_t *ps_ctxt);
+
+void hme_init_pred_buf_info(
+ hme_pred_buf_info_t (*ps_info)[MAX_NUM_INTER_PARTS],
+ hme_pred_buf_mngr_t *ps_buf_mngr,
+ U08 u1_pu1_wd,
+ U08 u1_pu1_ht,
+ PART_TYPE_T e_part_type);
+
+void hme_debrief_bipred_eval(
+ part_type_results_t *ps_part_type_result,
+ hme_pred_buf_info_t (*ps_pred_buf_info)[MAX_NUM_INTER_PARTS],
+ hme_pred_buf_mngr_t *ps_pred_buf_mngr,
+ U08 *pu1_allocated_pred_buf_array_indixes,
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list);
+
+U08 hme_decide_search_candidate_priority_in_l1_and_l2_me(
+ SEARCH_CANDIDATE_TYPE_T e_cand_type, ME_QUALITY_PRESETS_T e_quality_preset);
+
+U08 hme_decide_search_candidate_priority_in_l0_me(SEARCH_CANDIDATE_TYPE_T e_cand_type, U08 u1_index);
+
+void hme_search_cand_data_init(
+ S32 *pi4_id_Z,
+ S32 *pi4_id_coloc,
+ S32 *pi4_num_coloc_cands,
+ U08 *pu1_search_candidate_list_index,
+ S32 i4_num_act_ref_l0,
+ S32 i4_num_act_ref_l1,
+ U08 u1_is_bidir_enabled,
+ U08 u1_4x4_blk_in_l1me);
+
+void hme_compute_variance_for_all_parts(
+ U08 *pu1_data,
+ S32 i4_data_stride,
+ S32 *pi4_valid_part_array,
+ U32 *pu4_variance,
+ S32 i4_num_valid_parts,
+ U08 u1_cu_size);
+
+void hme_compute_sigmaX_and_sigmaXSquared(
+ U08 *pu1_data,
+ S32 i4_buf_stride,
+ void *pv_sigmaX,
+ void *pv_sigmaXSquared,
+ U08 u1_base_blk_wd,
+ U08 u1_base_blk_ht,
+ U08 u1_blk_wd,
+ U08 u1_blk_ht,
+ U08 u1_is_sigma_pointer_size_32_bit,
+ U08 u1_array_stride);
+
+void hme_compute_final_sigma_of_pu_from_base_blocks(
+ U32 *pu4_SigmaX,
+ U32 *pu4_SigmaXSquared,
+ ULWORD64 *pu8_final_sigmaX,
+ ULWORD64 *pu8_final_sigmaX_Squared,
+ U08 u1_cu_size,
+ U08 u1_base_block_size,
+ S32 i4_part_id,
+ U08 u1_base_blk_array_stride);
+
+void hme_compute_stim_injected_distortion_for_all_parts(
+ U08 *pu1_pred,
+ S32 i4_pred_stride,
+ S32 *pi4_valid_part_array,
+ ULWORD64 *pu8_src_sigmaX,
+ ULWORD64 *pu8_src_sigmaXSquared,
+ S32 *pi4_sad_array,
+ S32 i4_alpha_stim_multiplier,
+ S32 i4_inv_wt,
+ S32 i4_inv_wt_shift_val,
+ S32 i4_num_valid_parts,
+ S32 i4_wpred_log_wdc,
+ U08 u1_cu_size);
+
+void sigma_for_cusize_16_and_baseblock_size_16(
+ U08 *pu1_data, S32 i4_data_stride, U32 *pu4_sigmaX, U32 *pu4_sigmaXSquared);
+
+void sigma_for_cusize_16_and_baseblock_size_8(
+ U08 *pu1_data, S32 i4_data_stride, U32 *pu4_sigmaX, U32 *pu4_sigmaXSquared, U08 diff_cu_size);
+
+void sigma_for_cusize_16_and_baseblock_size_4(
+ U08 *pu1_data, S32 i4_data_stride, U32 *pu4_sigmaX, U32 *pu4_sigmaXSquared);
+
+void sigma_for_cusize_32_and_baseblock_size_32(
+ U08 *pu1_data, S32 i4_data_stride, U32 *pu4_sigmaX, U32 *pu4_sigmaXSquared);
+
+void sigma_for_cusize_64_and_baseblock_size_64(
+ U08 *pu1_data, S32 i4_data_stride, U32 *pu4_sigmaX, U32 *pu4_sigmaXSquared);
+
+void hme_choose_best_noise_preserver_amongst_fpel_and_subpel_winners(
+ fullpel_refine_ctxt_t *ps_fullpel_winner_data,
+ search_node_t **pps_part_results,
+ layer_ctxt_t *ps_curr_layer,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ U32 *pu4_src_variance,
+ S32 i4_cu_x_off_in_ctb,
+ S32 i4_cu_y_off_in_ctb,
+ S32 i4_ctb_x_off,
+ S32 i4_ctb_y_off,
+ S32 i4_inp_stride,
+ S32 i4_alpha_stim_multiplier,
+ U08 u1_subpel_uses_satd);
+
+#if TEMPORAL_NOISE_DETECT
+WORD32 ihevce_16x16block_temporal_noise_detect(
+ WORD32 had_block_size,
+ WORD32 ctb_width,
+ WORD32 ctb_height,
+ ihevce_ctb_noise_params *ps_ctb_noise_params,
+ fpel_srch_cand_init_data_t *s_proj_srch_cand_init_data,
+ hme_search_prms_t *s_search_prms_blk,
+ me_frm_ctxt_t *ps_ctxt,
+ WORD32 num_pred_dir,
+ WORD32 i4_num_act_ref_l0,
+ WORD32 i4_num_act_ref_l1,
+ WORD32 i4_cu_x_off,
+ WORD32 i4_cu_y_off,
+ wgt_pred_ctxt_t *ps_wt_inp_prms,
+ WORD32 input_stride,
+ WORD32 index_8x8_block,
+ WORD32 num_horz_blocks,
+ WORD32 num_8x8_in_ctb_row,
+ WORD32 i4_index_variance);
+#endif
+
+/**
+********************************************************************************
+* @fn hme_decide_part_types(search_results_t *ps_search_results)
+*
+* @brief Does uni/bi evaluation accross various partition types,
+* decides best inter partition types for the CU, compares
+* intra cost and decides the best K results for the CU
+*
+* This is called post subpel refinmenent for 16x16s, 8x8s and
+* for post merge evaluation for 32x32,64x64 CUs
+*
+* @param[in,out] ps_search_results : Search results data structure
+* - In : 2 lists of upto 2mvs & refids, active partition mask
+* - Out: Best results for final rdo evaluation of the cu
+*
+* @param[in] ps_subpel_prms : Sub pel params data structure
+
+*
+* @par Description
+* --------------------------------------------------------------------------------
+* Flow:
+* for each category (SMP,AMP,2Nx2N based on part mask)
+* {
+* for each part_type
+* {
+* for each part
+* pick best candidate from each list
+* combine uni part type
+* update best results for part type
+* }
+* pick the best part type for given category (for SMP & AMP)
+* }
+* ||
+* ||
+* \/
+* for upto 3 best part types
+* {
+* for each part
+* {
+* compute fixed size had for all uni and remember coeffs
+* compute bisatd
+* uni vs bi and gives upto two results
+* also gives the pt level pred buffer
+* }
+* }
+* ||
+* ||
+* \/
+* select X candidates for tu recursion as per the Note below
+* tu_rec_on_part_type (reuse transform coeffs)
+* ||
+* ||
+* \/
+* insert intra nodes at appropriate result id
+* ||
+* ||
+* \/
+* populate y best resuls for rdo based on preset
+*
+* Note :
+* number of TU rec for P pics : 2 2nx2n + 1 smp + 1 amp for ms or 9 for hq
+* number of TU rec for B pics : 1 2nx2n + 1 smp + 1 amp for ms or 2 uni 2nx2n + 1 smp + 1 amp for ms or 9 for hq
+* --------------------------------------------------------------------------------
+*
+* @return None
+********************************************************************************
+*/
+void hme_decide_part_types(
+ inter_cu_results_t *ps_cu_results,
+ inter_pu_results_t *ps_pu_results,
+ inter_ctb_prms_t *ps_inter_ctb_prms,
+ me_frm_ctxt_t *ps_ctxt,
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
+ ihevce_me_optimised_function_list_t *ps_me_optimised_function_list);
+
+void hme_compute_pred_and_evaluate_bi(
+ inter_cu_results_t *ps_cu_results,
+ inter_pu_results_t *ps_pu_results,
+ inter_ctb_prms_t *ps_inter_ctb_prms,
+ part_type_results_t *ps_part_type_result,
+ ULWORD64 *pu8_winning_pred_sigmaXSquare,
+ ULWORD64 *pu8_winning_pred_sigmaX,
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
+ ihevce_me_optimised_function_list_t *ps_me_optimised_function_list);
+
+/**
+********************************************************************************
+* @fn hme_insert_intra_nodes_post_bipred
+*
+* @brief Compares intra costs (populated by IPE) with the best inter costs
+* (populated after evaluating bi-pred) and updates the best results
+* if intra cost is better
+*
+* @param[in,out] ps_cu_results [inout] : Best results structure of CU
+* ps_cur_ipe_ctb [in] : intra results for the current CTB
+* i4_frm_qstep [in] : current frame quantizer(qscale)*
+*
+* @return None
+********************************************************************************
+*/
+void hme_insert_intra_nodes_post_bipred(
+ inter_cu_results_t *ps_cu_results,
+ ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
+ WORD32 i4_frm_qstep);
+
+void hme_set_mv_limit_using_dvsr_data(
+ me_frm_ctxt_t *ps_ctxt,
+ layer_ctxt_t *ps_curr_layer,
+ range_prms_t *ps_mv_limit,
+ S16 *pi2_prev_enc_frm_max_mv_y,
+ U08 u1_num_act_ref_pics);
+
+S32 hme_part_mask_populator(
+ U08 *pu1_inp,
+ S32 i4_inp_stride,
+ U08 u1_limit_active_partitions,
+ U08 u1_is_bPic,
+ U08 u1_is_refPic,
+ U08 u1_blk_8x8_mask,
+ ME_QUALITY_PRESETS_T e_me_quality_preset);
+
+#endif /* #ifndef _HME_UTILS_H_ */
diff --git a/encoder/ia_basic_ops32.h b/encoder/ia_basic_ops32.h
new file mode 100644
index 0000000..5f6defd
--- /dev/null
+++ b/encoder/ia_basic_ops32.h
@@ -0,0 +1,1433 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File name : ia_basic_ops32.h */
+/* */
+/* Description : this file has 32bit basic operations */
+/* */
+/* List of functions: 1. min32 */
+/* 2. max32 */
+/* 3. mult16x16in32 */
+/* 4. mult16x16in32_shl */
+/* 5. mult16x16in32_shl_sat */
+/* 6. shl32 */
+/* 7. shl32_sat */
+/* 8. shl32_dir */
+/* 9. shl32_dir_sat */
+/* 10. shr32 */
+/* 11. shr32_dir */
+/* 12. shr32_dir_sat */
+/* 13. add32 */
+/* 14. sub32 */
+/* 15. add32_sat */
+/* 16. sub32_sat */
+/* 17. norm32 */
+/* 18. bin_expo32 */
+/* 19. abs32 */
+/* 20. abs32_sat */
+/* 21. negate32 */
+/* 22. negate32_sat */
+/* 23. div32 */
+/* 24. mac16x16in32 */
+/* 25. mac16x16in32_shl */
+/* 26. mac16x16in32_shl_sat */
+/* 27. msu16x16in32 */
+/* 28. msu16x16in32_shl */
+/* 29. msu16x16in32_shl_sat */
+/* 30. add32_shr */
+/* 31. sub32_shr */
+/* */
+/* Issues / problems: none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* File includes */
+/* ia_type_def.h */
+/* ia_constants.h */
+/*****************************************************************************/
+
+#ifndef __IA_BASIC_OPS32_H__
+#define __IA_BASIC_OPS32_H__
+
+/*****************************************************************************/
+/* */
+/* Function name : min32 */
+/* */
+/* Description : returns the minima of 2 32 bit variables */
+/* */
+/* Inputs : WORD32 a, WORD32 b */
+/* */
+/* Globals : none */
+/* */
+/* Processing : minimum of 2 inputs */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 min_val - 32 bit signed value */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 min32(WORD32 a, WORD32 b)
+{
+ WORD32 min_val;
+
+ min_val = (a < b) ? a : b;
+
+ return min_val;
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : max32 */
+/* */
+/* Description : returns the maxima of 2 32 bit variables */
+/* */
+/* Inputs : WORD32 a, WORD32 b */
+/* */
+/* Globals : none */
+/* */
+/* Processing : maximum of 2 inputs */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 max_val - 32 bit signed value */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 max32(WORD32 a, WORD32 b)
+{
+ WORD32 max_val;
+
+ max_val = (a > b) ? a : b;
+
+ return max_val;
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : shl32 */
+/* */
+/* Description : shifts a 32-bit value left by specificed bits */
+/* */
+/* Inputs : WORD32 a, WORD b */
+/* */
+/* Globals : none */
+/* */
+/* Processing : shift a by b */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 out_val - 32 bit signed value */
+/* */
+/* assumptions : 0 <= b <= 31 */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+static PLATFORM_INLINE WORD32 shl32(WORD32 a, WORD b)
+{
+ WORD32 out_val;
+
+ if(b > 31)
+ out_val = 0;
+ else
+ out_val = (WORD32)a << b;
+
+ return out_val;
+}
+/*****************************************************************************/
+/* */
+/* Function name : shr32 */
+/* */
+/* Description : shifts a 32-bit value right by specificed bits */
+/* */
+/* Inputs : WORD32 a, WORD b */
+/* */
+/* Globals : none */
+/* */
+/* Processing : shift right a by b */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 out_val - 32 bit signed value */
+/* */
+/* assumptions : 0 <= b <= 31 */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 shr32(WORD32 a, WORD b)
+{
+ WORD32 out_val;
+
+ if(b > 31)
+ {
+ if(a < 0)
+ out_val = -1;
+ else
+ out_val = 0;
+ }
+ else
+ {
+ out_val = (WORD32)a >> b;
+ }
+
+ return out_val;
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : shl32_sat */
+/* */
+/* Description : shifts a 32-bit value left by specificed bits and */
+/* saturates it to 32 bits */
+/* */
+/* Inputs : WORD32 a, WORD b */
+/* */
+/* Globals : none */
+/* */
+/* Processing : shift a by 1 b times if crosses 32_bits saturate */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 out_val - 32 bit signed value */
+/* */
+/* assumptions : 0 <= b <= 31 */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 shl32_sat(WORD32 a, WORD b)
+{
+ WORD32 out_val = a;
+ /*clip the max shift value to avoid unnecessary looping*/
+ if(b > (WORD)((sizeof(WORD32) * 8)))
+ b = (sizeof(WORD32) * 8);
+ for(; b > 0; b--)
+ {
+ if(a > (WORD32)0X3fffffffL)
+ {
+ out_val = MAX_32;
+ break;
+ }
+ else if(a < (WORD32)0xc0000000L)
+ {
+ out_val = MIN_32;
+ break;
+ }
+
+ a = shl32(a, 1);
+ out_val = a;
+ }
+ return (out_val);
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : shl32_dir */
+/* */
+/* Description : shifts a 32-bit value left by specificed bits, shifts */
+/* it right if specified no. of bits is negative */
+/* */
+/* Inputs : WORD32 a, WORD b */
+/* */
+/* Globals : none */
+/* */
+/* Processing : if b -ve shift right else shift left */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 out_val - 32 bit signed value */
+/* */
+/* assumptions : -31 <= b <= 31 */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 shl32_dir(WORD32 a, WORD b)
+{
+ WORD32 out_val;
+
+ if(b < 0)
+ {
+ out_val = shr32(a, -b);
+ }
+ else
+ {
+ out_val = shl32(a, b);
+ }
+
+ return out_val;
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : shl32_dir_sat */
+/* */
+/* Description : shifts a 32-bit value left by specificed bits with sat, */
+/* shifts it right if specified no. of bits is negative */
+/* */
+/* Inputs : WORD32 a, WORD b */
+/* */
+/* Globals : none */
+/* */
+/* Processing : if b -ve shift right else shift left with sat */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 out_val - 32 bit signed value */
+/* */
+/* assumptions : -31 <= b <= 31 */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 shl32_dir_sat(WORD32 a, WORD b)
+{
+ WORD32 out_val;
+
+ if(b < 0)
+ {
+ out_val = shr32(a, -b);
+ }
+ else
+ {
+ out_val = shl32_sat(a, b);
+ }
+
+ return out_val;
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : shr32_dir */
+/* */
+/* Description : shifts a 32-bit value right by specificed bits, shifts */
+/* it left if specified no. of bits is negative */
+/* */
+/* Inputs : WORD32 a, WORD b */
+/* */
+/* Globals : none */
+/* */
+/* Processing : if b +ve shift right else shift left */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 out_val - 32 bit signed value */
+/* */
+/* assumptions : -31 <= b <= 31 */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 shr32_dir(WORD32 a, WORD b)
+{
+ WORD32 out_val;
+
+ if(b < 0)
+ {
+ out_val = shl32(a, -b);
+ }
+ else
+ {
+ out_val = shr32(a, b);
+ }
+
+ return out_val;
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : shr32_dir_sat */
+/* */
+/* Description : shifts a 32-bit value right by specificed bits, shifts */
+/* it left with sat if specified no. of bits is negative */
+/* */
+/* Inputs : WORD32 a, WORD b */
+/* */
+/* Globals : none */
+/* */
+/* Processing : if b +ve shift right else shift left with sat */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 out_val - 32 bit signed value */
+/* */
+/* assumptions : -31 <= b <= 31 */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 shr32_dir_sat(WORD32 a, WORD b)
+{
+ WORD32 out_val;
+
+ if(b < 0)
+ {
+ out_val = shl32_sat(a, -b);
+ }
+ else
+ {
+ out_val = shr32(a, b);
+ }
+
+ return out_val;
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : mult16x16in32 */
+/* */
+/* Description : multiplies two 16 bit numbers and returns their 32-bit */
+/* result */
+/* */
+/* Inputs : WORD16 a, WORD16 b */
+/* */
+/* Globals : none */
+/* */
+/* Processing : multiply 2 inputs */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 product - 32 bit signed value */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 mult16x16in32(WORD16 a, WORD16 b)
+{
+ WORD32 product;
+
+ product = (WORD32)a * (WORD32)b;
+
+ return product;
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : mult16x16in32_shl */
+/* */
+/* Description : multiplies two 16 bit numbers and returns their 32-bit */
+/* result after removing 1 redundant sign bit. no sat */
+/* */
+/* Inputs : WORD16 a, WORD16 b */
+/* */
+/* Globals : none */
+/* */
+/* Processing : multiply 2 inputs, shift left by 1 */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 product - 32 bit signed value */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 mult16x16in32_shl(WORD16 a, WORD16 b)
+{
+ WORD32 product;
+
+ product = shl32(mult16x16in32(a, b), 1);
+
+ return product;
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : mult16x16in32_shl_sat */
+/* */
+/* Description : multiplies two 16 bit numbers and returns their 32-bit */
+/* result after removing 1 redundant sign bit with sat */
+/* */
+/* Inputs : WORD16 a, WORD16 b */
+/* */
+/* Globals : none */
+/* */
+/* Processing : if inputs mi_ns return MAX32 else */
+/* multiply 2 inputs shift left by 1 */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 product - 32 bit signed value */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 mult16x16in32_shl_sat(WORD16 a, WORD16 b)
+{
+ WORD32 product;
+ product = (WORD32)a * (WORD32)b;
+ if(product != (WORD32)0x40000000L)
+ {
+ product = shl32(product, 1);
+ }
+ else
+ {
+ product = MAX_32;
+ }
+ return product;
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : add32 */
+/* */
+/* Description : adds 2 32 bit variables without sat */
+/* */
+/* Inputs : WORD32 a, WORD32 b */
+/* */
+/* Globals : none */
+/* */
+/* Processing : add 2 inputs */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 sum - 32 bit signed value */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 add32(WORD32 a, WORD32 b)
+{
+ WORD32 sum;
+
+ sum = (WORD32)a + (WORD32)b;
+
+ return sum;
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : sub32 */
+/* */
+/* Description : subs 2 32 bit variables without sat */
+/* */
+/* Inputs : WORD32 a, WORD32 b */
+/* */
+/* Globals : none */
+/* */
+/* Processing : sub 2 inputs */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 diff - 32 bit signed value */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 sub32(WORD32 a, WORD32 b)
+{
+ WORD32 diff;
+
+ diff = (WORD32)a - (WORD32)b;
+
+ return diff;
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : add32_sat */
+/* */
+/* Description : adds 2 32 bit variables with sat */
+/* */
+/* Inputs : WORD32 a, WORD32 b */
+/* */
+/* Globals : none */
+/* */
+/* Processing : add 2 inputs if overflow saturate */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 sum - 32 bit signed value */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 add32_sat(WORD32 a, WORD32 b)
+{
+ WORD32 sum;
+
+ sum = add32(a, b);
+
+ if((((WORD32)a ^ (WORD32)b) & (WORD32)MIN_32) == 0)
+ {
+ if(((WORD32)sum ^ (WORD32)a) & (WORD32)MIN_32)
+ {
+ sum = (a < 0) ? MIN_32 : MAX_32;
+ }
+ }
+
+ return sum;
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : sub32_sat */
+/* */
+/* Description : subs 2 32 bit variables with sat */
+/* */
+/* Inputs : WORD32 a, WORD32 b */
+/* */
+/* Globals : none */
+/* */
+/* Processing : sub 2 inputs, if overflow saturate */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 diff - 32 bit signed value */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 sub32_sat(WORD32 a, WORD32 b)
+{
+ WORD32 diff;
+
+ diff = sub32(a, b);
+
+ if((((WORD32)a ^ (WORD32)b) & (WORD32)MIN_32) != 0)
+ {
+ if(((WORD32)diff ^ (WORD32)a) & (WORD32)MIN_32)
+ {
+ diff = (a < 0L) ? MIN_32 : MAX_32;
+ }
+ }
+
+ return (diff);
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : norm32 */
+/* */
+/* Description : returns number of redundant sign bits in a */
+/* 32-bit value. for a value of 0, returns 0 */
+/* */
+/* Inputs : WORD32 a */
+/* */
+/* Globals : none */
+/* */
+/* Processing : abs input, left shift till reaches 0x40000000 */
+/* return no. of left shifts */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD norm_val - 0 <= norm_val < 32 */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD norm32(WORD32 a)
+{
+ WORD norm_val;
+
+ if(a == 0)
+ {
+ norm_val = 31;
+ }
+ else
+ {
+ if(a == (WORD32)0xffffffffL)
+ {
+ norm_val = 31;
+ }
+ else
+ {
+ if(a < 0)
+ {
+ a = ~a;
+ }
+ for(norm_val = 0; a < (WORD32)0x40000000L; norm_val++)
+ {
+ a <<= 1;
+ }
+ }
+ }
+
+ return norm_val;
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : bin_expo32 */
+/* */
+/* Description : returns the position of the most significant bit. */
+/* for negative numbers, it ignores leading zeros to */
+/* determine the position of most significant bit. */
+/* note: for a value of zero returns 31 */
+/* */
+/* Inputs : WORD32 a */
+/* */
+/* Globals : none */
+/* */
+/* Processing : substract 31 from norm_val */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD bin_expo_val - 0 <= val < 32 */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD bin_expo32(WORD32 a)
+{
+ WORD bin_expo_val;
+
+ bin_expo_val = 31 - norm32(a);
+
+ return bin_expo_val;
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : abs32 */
+/* */
+/* Description : returns the value of 32-bit number without sat. */
+/* */
+/* Inputs : WORD32 a */
+/* */
+/* Globals : none */
+/* */
+/* Processing : if -ve then negate */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 abs_val - 32 bit signed value */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 abs32(WORD32 a)
+{
+ WORD32 abs_val;
+
+ abs_val = a;
+
+ if(a < 0)
+ {
+ abs_val = -a;
+ }
+
+ return abs_val;
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : abs32_sat */
+/* */
+/* Description : returns the value of 32-bit number with sat. */
+/* */
+/* Inputs : WORD32 a */
+/* */
+/* Globals : none */
+/* */
+/* Processing : if -ve then negate, abs(-32768) is 32767 */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 abs_val - 32 bit signed value */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 abs32_sat(WORD32 a)
+{
+ WORD32 abs_val;
+
+ abs_val = a;
+
+ if(a == (WORD32)MIN_32)
+ {
+ abs_val = MAX_32;
+ }
+ else if(a < 0)
+ {
+ abs_val = -a;
+ }
+
+ return abs_val;
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : negate32 */
+/* */
+/* Description : returns the negated value of 32-bit number without sat. */
+/* */
+/* Inputs : WORD32 a */
+/* */
+/* Globals : none */
+/* */
+/* Processing : negate input */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 neg_val - 32 bit signed value */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 negate32(WORD32 a)
+{
+ WORD32 neg_val;
+
+ neg_val = -a;
+
+ return neg_val;
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : negate32 */
+/* */
+/* Description : returns the negated value of 32-bit number with sat. */
+/* */
+/* Inputs : WORD32 a */
+/* */
+/* Globals : none */
+/* */
+/* Processing : negate input, if -32768 then 32767 */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 neg_val - 32 bit signed value */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 negate32_sat(WORD32 a)
+{
+ WORD32 neg_val;
+
+ neg_val = -a;
+ if(a == (WORD32)MIN_32)
+ {
+ neg_val = MAX_32;
+ }
+
+ return neg_val;
+}
+/*****************************************************************************/
+/* */
+/* Function name : subc_32 */
+/* */
+/* Description : implemnets the subc operation c64x . */
+/* */
+/* Inputs : WORD32 a, WORD32 b */
+/* */
+/* Globals : none */
+/* */
+/* Processing : implemnets the subc operation c64x */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 neg_val - 32 bit signed value */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 Mudit Mehrotra created */
+/* */
+/*****************************************************************************/
+static PLATFORM_INLINE UWORD32 subc_32(UWORD32 nr, UWORD32 dr)
+{
+ UWORD32 result;
+ if(nr >= dr)
+ {
+ result = (((nr - dr) << 1) + 1);
+ }
+ else
+ {
+ result = (UWORD32)nr << 1;
+ }
+ return (result);
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : div32 */
+/* */
+/* Description : divides 2 32 bit variables and returns the quotient */
+/* the q-format of the result is modified */
+/* ( a/b to Q30 precision) */
+/* */
+/* Inputs : WORD32 a, WORD32 b, WORD16 *q_format */
+/* */
+/* Globals : none */
+/* */
+/* Processing : non-restoration algo(shift & substract) */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 quotient - 32 bit signed value */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+static PLATFORM_INLINE WORD32 div32(WORD32 a, WORD32 b, WORD *q_format)
+{
+ WORD32 quotient;
+ UWORD32 mantissa_nr, mantissa_dr;
+ WORD sign = 0;
+
+ LOOPINDEX i;
+ WORD q_nr, q_dr;
+
+ mantissa_nr = a;
+ mantissa_dr = b;
+ quotient = 0;
+
+ if((a < 0) && (0 != b))
+ {
+ a = -a;
+ sign = (WORD)(sign ^ -1);
+ }
+
+ if(b < 0)
+ {
+ b = -b;
+ sign = (WORD)(sign ^ -1);
+ }
+
+ if(0 == b)
+ {
+ *q_format = 0;
+ return (a);
+ }
+
+ quotient = 0;
+
+ q_nr = norm32(a);
+ mantissa_nr = (UWORD32)a << (q_nr);
+ q_dr = norm32(b);
+ mantissa_dr = (UWORD32)b << (q_dr);
+ *q_format = (WORD)(30 + q_nr - q_dr);
+
+ for(i = 0; i < 31; i++)
+ {
+ /* quotient = quotient << 1; */
+ WORD32 bit;
+
+ /*if(mantissa_nr >= mantissa_dr)
+ {
+
+ mantissa_nr = (((mantissa_nr - mantissa_dr) << 1) + 1);
+ }
+ else
+ {
+ mantissa_nr = (UWORD32)mantissa_nr << 1;
+ }
+ */
+ mantissa_nr = subc_32(mantissa_nr, mantissa_dr);
+ bit = (mantissa_nr & 0x00000001);
+ mantissa_nr = mantissa_nr & 0xfffffffe;
+ quotient = (quotient << 1) + bit;
+ }
+
+ if(sign < 0)
+ {
+ quotient = -quotient;
+ }
+
+ return quotient;
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : mac16x16in32 */
+/* */
+/* Description : multiplies two 16 bit numbers and accumulates their */
+/* result in a 32 bit variable without sat */
+/* */
+/* Inputs : WORD32 a, WORD32 b, WORD16 c */
+/* */
+/* Globals : none */
+/* */
+/* Processing : multiply & add */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 acc - 32 bit signed value */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 mac16x16in32(WORD32 a, WORD16 b, WORD16 c)
+{
+ WORD32 acc;
+
+ acc = mult16x16in32(b, c);
+
+ acc = add32(a, acc);
+
+ return acc;
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : mac16x16in32_shl */
+/* */
+/* Description : multiplies two 16 bit numbers and accumulates their */
+/* result in a 32 bit variable without sat */
+/* after removing a redundant sign bit in the product */
+/* */
+/* Inputs : WORD32 a, WORD16 b, WORD16 c */
+/* */
+/* Globals : none */
+/* */
+/* Processing : multiply, shift left by 1 & add */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 acc - 32 bit signed value */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 mac16x16in32_shl(WORD32 a, WORD16 b, WORD16 c)
+{
+ WORD32 acc;
+
+ acc = mult16x16in32_shl(b, c);
+
+ acc = add32(a, acc);
+
+ return acc;
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : mac16x16in32_shlsat */
+/* */
+/* Description : multiplies two 16 bit numbers and accumulates their */
+/* result in a 32 bit variable with sat */
+/* after removing a redundant sign bit in the product */
+/* */
+/* Inputs : WORD32 a, WORD16 b, WORD16 c */
+/* */
+/* Globals : none */
+/* */
+/* Processing : multiply, shift left by 1 & add with sat */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 acc - 32 bit signed value */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 mac16x16in32_shl_sat(WORD32 a, WORD16 b, WORD16 c)
+{
+ WORD32 acc;
+
+ acc = mult16x16in32_shl_sat(b, c);
+
+ acc = add32_sat(a, acc);
+
+ return acc;
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : msu16x16in32 */
+/* */
+/* Description : multiplies two 16 bit numbers and substracts their */
+/* result from a 32 bit variable without sat */
+/* */
+/* Inputs : WORD32 a, WORD32 b, WORD16 c */
+/* */
+/* Globals : none */
+/* */
+/* Processing : multiply & sub */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 acc - 32 bit signed value */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 msu16x16in32(WORD32 a, WORD16 b, WORD16 c)
+{
+ WORD32 acc;
+
+ acc = mult16x16in32(b, c);
+
+ acc = sub32(a, acc);
+
+ return acc;
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : msu16x16in32_shl */
+/* */
+/* Description : multiplies two 16 bit numbers and substracts their */
+/* result from a 32 bit variable without sat */
+/* after removing a redundant sign bit in the product */
+/* */
+/* Inputs : WORD32 a, WORD16 b, WORD16 c */
+/* */
+/* Globals : none */
+/* */
+/* Processing : multiply, shift left by 1 & sub */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 acc - 32 bit signed value */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 msu16x16in32_shl(WORD32 a, WORD16 b, WORD16 c)
+{
+ WORD32 acc;
+
+ acc = mult16x16in32_shl(b, c);
+
+ acc = sub32(a, acc);
+
+ return acc;
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : msu16x16in32_shlsat */
+/* */
+/* Description : multiplies two 16 bit numbers and substracts their */
+/* result from a 32 bit variable with sat */
+/* after removing a redundant sign bit in the product */
+/* */
+/* Inputs : WORD32 a, WORD16 b, WORD16 c */
+/* */
+/* Globals : none */
+/* */
+/* Processing : multiply, shift left by 1 & sub with sat */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 acc - 32 bit signed value */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 msu16x16in32_shl_sat(WORD32 a, WORD16 b, WORD16 c)
+{
+ WORD32 acc;
+
+ acc = mult16x16in32_shl_sat(b, c);
+
+ acc = sub32_sat(a, acc);
+
+ return acc;
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : add32_shr */
+/* */
+/* Description : adding two 32 bit numbers and taking care of overflow */
+/* by downshifting both numbers before addition */
+/* */
+/* Inputs : WORD32 a, WORD16 b, WORD16 c */
+/* */
+/* Globals : none */
+/* */
+/* Processing : shift right inputs by 1 & add */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 sum 32 bit signed value */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 add32_shr(WORD32 a, WORD32 b)
+{
+ WORD32 sum;
+
+ a = shr32(a, 1);
+ b = shr32(b, 1);
+
+ sum = add32(a, b);
+
+ return sum;
+}
+
+/*****************************************************************************/
+/* */
+/* Function name : sub32_shr */
+/* */
+/* Description : substracting two 32 bit numbers and taking care of */
+/* overflow by downshifting both numbers before addition */
+/* */
+/* Inputs : WORD32 a, WORD16 b, WORD16 c */
+/* */
+/* Globals : none */
+/* */
+/* Processing : shift right inputs by 1 & sub */
+/* */
+/* Outputs : none */
+/* */
+/* Returns : WORD32 diff - 32 bit signed value */
+/* */
+/* Issues : none */
+/* */
+/* Revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 20 11 2003 aadithya kamath created */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 sub32_shr(WORD32 a, WORD32 b)
+{
+ WORD32 diff;
+
+ a = shr32(a, 1);
+ b = shr32(b, 1);
+
+ diff = sub32(a, b);
+
+ return diff;
+}
+#endif /* __IA_BASIC_OPS32_H__ */
diff --git a/encoder/ia_basic_ops40.h b/encoder/ia_basic_ops40.h
new file mode 100644
index 0000000..826f8c1
--- /dev/null
+++ b/encoder/ia_basic_ops40.h
@@ -0,0 +1,1057 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* file name : ia_basic_ops40.h */
+/* */
+/* description : this file has all basic operations, which have */
+/* 40 bit intermediate operations */
+/* */
+/* list of functions: 1. norm40 */
+/* 2. add32_shr40 */
+/* 3. sub32_shr40 */
+/* 4. mult32x16in32_shl */
+/* 5. mult32x16in32 */
+/* 6. mult32x16in32_shl_sat */
+/* 7. mult32_shl */
+/* 8. mult32 */
+/* 9. mult32_shl_sat */
+/* 10.mac32x16in32 */
+/* 11.mac32x16in32_shl */
+/* 12.mac32x16in32_shl_sat */
+/* 13.mac32 */
+/* 14.mac32_shl */
+/* 15.mac32_shl_sat */
+/* 16.msu32x16in32 */
+/* 17.msu32x16in32_shl */
+/* 18.msu32x16in32_shl_sat */
+/* 19.msu32 */
+/* 20.msu32_shl */
+/* 21.msu32_shl_sat */
+/* 22.mac3216_arr40 */
+/* 23.mac32_arr40 */
+/* 24.mac16_arr40 */
+/* 25.add32_arr40 */
+/* */
+/* issues / problems: none */
+/* */
+/* revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 06 12 2002 ashok M/chetan K created */
+/* 21 11 2003 raghavendra K R modified(bug fixes) */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+#ifndef __IA_BASIC_OPS40_H__
+#define __IA_BASIC_OPS40_H__
+
+/*****************************************************************************/
+/* file includes */
+/* ia_type_def.h */
+/* ia_basic_ops32.h */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* */
+/* function name : norm40 */
+/* */
+/* description : normalize input to 32 bits, return denormalizing info */
+/* static function */
+/* */
+/* inputs : WORD40 *in */
+/* */
+/* globals : none */
+/* */
+/* processing : if input above 32_bits then only the upper 8 bits */
+/* normalized to fit in 32_bits else normal 32_bit norming */
+/* */
+/* outputs : normalized 32 bit value */
+/* */
+/* returns : WORD16 exponent */
+/* */
+/* assumptions : if supplied input is 0 the result returned is 31 */
+/* */
+/* issues : none */
+/* */
+/* revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 06 12 2002 ashok M/chetan K created */
+/* 21 11 2003 raghavendra K R modified(bug fixes) */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD16 norm40(WORD40 *in)
+{
+ WORD16 expo;
+ WORD32 tempo;
+ WORD40 cmp_val = (WORD40)-2147483648.0;
+
+ if(0 == (*in))
+ return 31;
+
+ if(((*in) <= 0x7fffffff) && ((WORD40)(*in) >= cmp_val))
+ {
+ tempo = (WORD32)(*in);
+ expo = norm32(tempo);
+ *in = tempo << expo;
+
+ return (expo);
+ }
+
+ tempo = (WORD32)((*in) >> 31);
+ expo = 31 - (norm32(tempo));
+ *in = (*in) >> expo;
+
+ return (-expo);
+}
+
+/*****************************************************************************/
+/* */
+/* function name : add32_shr40 */
+/* */
+/* description : adds two numbers and right shifts once */
+/* */
+/* inputs : WORD32 a, WORD32 b */
+/* */
+/* outputs : none */
+/* */
+/* globals : none */
+/* */
+/* processing : add and right shift */
+/* */
+/* returns : WORD32 sum */
+/* */
+/* issues : none */
+/* */
+/* revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 06 12 2002 ashok M/chetan K created */
+/* 21 11 2003 raghavendra K R modified(bug fixes) */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 add32_shr40(WORD32 a, WORD32 b)
+{
+ WORD40 sum;
+
+ sum = (WORD40)a + (WORD40)b;
+ sum = sum >> 1;
+
+ return ((WORD32)sum);
+}
+
+/*****************************************************************************/
+/* */
+/* function name : sub32_shr40 */
+/* */
+/* description : subtracts and right shifts once */
+/* */
+/* inputs : WORD32 a, WORD32 b */
+/* */
+/* outputs : none */
+/* */
+/* globals : none */
+/* */
+/* processing : substract and right shift */
+/* */
+/* returns : WORD32 sum */
+/* */
+/* issues : none */
+/* */
+/* revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 06 12 2002 ashok M/chetan K created */
+/* 21 11 2003 raghavendra K R modified(bug fixes) */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 sub32_shr40(WORD32 a, WORD32 b)
+{
+ WORD40 sum;
+
+ sum = (WORD40)a - (WORD40)b;
+ sum = sum >> 1;
+
+ return ((WORD32)sum);
+}
+
+/*****************************************************************************/
+/* */
+/* function name : mult32x16in32_shl */
+/* */
+/* description : multiply WORD32 with WORD16 return bits 46 to 15 */
+/* doesnt take care of saturation */
+/* */
+/* inputs : WORD32 a, WORD16 b */
+/* */
+/* outputs : none */
+/* */
+/* globals : none */
+/* */
+/* processing : multiply and right shift by 15 */
+/* */
+/* returns : WORD32 result */
+/* */
+/* issues : none */
+/* */
+/* revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 06 12 2002 ashok M/chetan K created */
+/* 21 11 2003 raghavendra K R modified(bug fixes) */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 mult32x16in32_shl(WORD32 a, WORD16 b)
+{
+ WORD32 result;
+ LWORD64 temp_result;
+
+ temp_result = (LWORD64)a * (LWORD64)b;
+
+ result = (WORD32)((temp_result + 16384) >> 15);
+
+ return (result);
+}
+
+/*****************************************************************************/
+/* */
+/* function name : mult32x16in32 */
+/* */
+/* description : multiply WORD32 with WORD16 return bits 47 to 16 */
+/* */
+/* inputs : WORD32 a, WORD16 b */
+/* */
+/* outputs : none */
+/* */
+/* globals : none */
+/* */
+/* processing : multiply and right shift by 16 */
+/* */
+/* returns : WORD32 result */
+/* */
+/* issues : none */
+/* */
+/* revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 06 12 2002 ashok M/chetan K created */
+/* 21 11 2003 raghavendra K R modified(bug fixes) */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 mult32x16in32(WORD32 a, WORD16 b)
+{
+ WORD32 result;
+ LWORD64 temp_result;
+
+ temp_result = (LWORD64)a * (LWORD64)b;
+
+ result = (WORD32)((temp_result + 16384) >> 16);
+
+ return (result);
+}
+
+/*****************************************************************************/
+/* */
+/* function name : mult32x16in32_shl_sat */
+/* */
+/* description : multiply WORD32 with WORD16 return bits 46 to 15 */
+/* take care of saturation (MIN32 x MIN16 = MAX32) */
+/* */
+/* inputs : WORD32 a, WORD16 b */
+/* */
+/* outputs : none */
+/* */
+/* globals : none */
+/* */
+/* processing : if input mi_ns return MAX32 else */
+/* multiply and right shift by 15 */
+/* */
+/* returns : WORD32 result */
+/* */
+/* issues : none */
+/* */
+/* revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 06 12 2002 ashok M/chetan K created */
+/* 21 11 2003 raghavendra K R modified(bug fixes) */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 mult32x16in32_shl_sat(WORD32 a, WORD16 b)
+{
+ WORD32 result;
+
+ if(a == (WORD32)0x80000000 && b == (WORD16)0x8000)
+ {
+ result = (WORD32)0x7fffffff;
+ }
+ else
+ {
+ result = mult32x16in32_shl(a, b);
+ }
+
+ return (result);
+}
+
+/*****************************************************************************/
+/* */
+/* function name : mult32_shl */
+/* */
+/* description : multiply WORD32 with WORD32 return bits 62 to 31 */
+/* doesnt take care of saturation */
+/* */
+/* inputs : WORD32 a, WORD32 b */
+/* */
+/* outputs : none */
+/* */
+/* globals : none */
+/* */
+/* processing : multiply and right shift by 31 */
+/* */
+/* returns : WORD32 result */
+/* */
+/* issues : none */
+/* */
+/* revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 06 12 2002 ashok M/chetan K created */
+/* 21 11 2003 raghavendra K R modified(bug fixes) */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 mult32_shl(WORD32 a, WORD32 b)
+{
+ WORD32 result;
+ LWORD64 temp_result;
+
+ temp_result = (LWORD64)a * (LWORD64)b;
+ result = (WORD32)(temp_result >> 31);
+
+ return (result);
+}
+
+/*****************************************************************************/
+/* */
+/* function name : mult32 */
+/* */
+/* description : multiply WORD32 with WORD32 return bits 63 to 32 */
+/* */
+/* inputs : WORD32 a, WORD16 b */
+/* */
+/* outputs : none */
+/* */
+/* globals : none */
+/* */
+/* processing : multiply and right shift by 32 */
+/* */
+/* returns : WORD32 result */
+/* */
+/* issues : none */
+/* */
+/* revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 06 12 2002 ashok M/chetan K created */
+/* 21 11 2003 raghavendra K R modified(bug fixes) */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 mult32(WORD32 a, WORD32 b)
+{
+ WORD32 result;
+ LWORD64 temp_result;
+
+ temp_result = (LWORD64)a * (LWORD64)b;
+ result = (WORD32)(temp_result >> 32);
+
+ return (result);
+}
+
+/*****************************************************************************/
+/* */
+/* function name : mult32_shl_sat */
+/* */
+/* description : multiply WORD32 with WORD32 return bits 62 to 31 */
+/* take care of saturation (MIN32 x MIN32 = MAX32) */
+/* */
+/* inputs : WORD32 a, WORD32 b */
+/* */
+/* outputs : none */
+/* */
+/* globals : none */
+/* */
+/* processing : if input mi_ns return MAX32 else */
+/* multiply and right shift by 31 */
+/* */
+/* returns : WORD32 result */
+/* */
+/* issues : none */
+/* */
+/* revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 06 12 2002 ashok M/chetan K created */
+/* 21 11 2003 raghavendra K R modified(bug fixes) */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+#define MPYHIRC(x, y) \
+ (((int)((short)(x >> 16) * (unsigned short)(y & 0x0000FFFF) + 0x4000) >> 15) + \
+ ((int)((short)(x >> 16) * (short)((y) >> 16)) << 1))
+
+#define MPYLUHS(x, y) ((int)((unsigned short)(x & 0x0000FFFF) * (short)(y >> 16)))
+
+static PLATFORM_INLINE WORD32 mult32_shl_sat(WORD32 a, WORD32 b)
+{
+ WORD32 high;
+
+ high = (MPYHIRC(a, b) + (MPYLUHS(a, b) >> 15));
+
+ return high;
+}
+
+/*****************************************************************************/
+/* */
+/* function name : mac32x16in32 */
+/* */
+/* description : multiply WORD32 with WORD16 add bits 47 to 16 to acc */
+/* */
+/* inputs : WORD32 a, WORD32 b, WORD16 c */
+/* */
+/* outputs : none */
+/* */
+/* globals : none */
+/* */
+/* processing : multiply, right shift by 16 & add to acc */
+/* */
+/* returns : WORD32 accumulated result */
+/* */
+/* issues : none */
+/* */
+/* revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 06 12 2002 ashok M/chetan K created */
+/* 21 11 2003 raghavendra K R modified(bug fixes) */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 mac32x16in32(WORD32 a, WORD32 b, WORD16 c)
+{
+ WORD32 result;
+
+ result = a + mult32x16in32(b, c);
+
+ return (result);
+}
+
+/*****************************************************************************/
+/* */
+/* function name : mac32x16in32_shl */
+/* */
+/* description : multiply WORD32 with WORD16 add bits 46 to 15 to acc */
+/* doesnt take care of saturation */
+/* */
+/* inputs : WORD32 a, WORD32 b, WORD16 c */
+/* */
+/* outputs : none */
+/* */
+/* globals : none */
+/* */
+/* processing : multiply, right shift by 15 & add to acc */
+/* */
+/* returns : WORD32 accumulated result */
+/* */
+/* issues : none */
+/* */
+/* revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 06 12 2002 ashok M/chetan K created */
+/* 21 11 2003 raghavendra K R modified(bug fixes) */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 mac32x16in32_shl(WORD32 a, WORD32 b, WORD16 c)
+{
+ WORD32 result;
+
+ result = a + mult32x16in32_shl(b, c);
+
+ return (result);
+}
+
+/*****************************************************************************/
+/* */
+/* function name : mac32x16in32_shl_sat */
+/* */
+/* description : multiply WORD32 with WORD16 add bits 46 to 15 to acc */
+/* takes care of saturation in multiply and addition */
+/* */
+/* inputs : WORD32 a, WORD32 b, WORD16 c */
+/* */
+/* outputs : none */
+/* */
+/* globals : none */
+/* */
+/* processing : if input mi_ns add MAX32 else multiply, */
+/* right shift by 15 & add to acc with saturation */
+/* */
+/* returns : WORD32 accumulated result */
+/* */
+/* issues : none */
+/* */
+/* revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 06 12 2002 ashok M/chetan K created */
+/* 21 11 2003 raghavendra K R modified(bug fixes) */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 mac32x16in32_shl_sat(WORD32 a, WORD32 b, WORD16 c)
+{
+ return (add32_sat(a, mult32x16in32_shl_sat(b, c)));
+}
+
+/*****************************************************************************/
+/* */
+/* function name : mac32 */
+/* */
+/* description : multiply WORD32 with WORD32 add bits 63 to 32 to acc */
+/* */
+/* inputs : WORD32 a, WORD32 b, WORD32 c */
+/* */
+/* outputs : none */
+/* */
+/* globals : none */
+/* */
+/* processing : multiply, right shift by 32 & add to acc */
+/* */
+/* returns : WORD32 accumulated result */
+/* */
+/* issues : none */
+/* */
+/* revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 06 12 2002 ashok M/chetan K created */
+/* 21 11 2003 raghavendra K R modified(bug fixes) */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 mac32(WORD32 a, WORD32 b, WORD32 c)
+{
+ WORD32 result;
+
+ result = a + mult32(b, c);
+
+ return (result);
+}
+
+/*****************************************************************************/
+/* */
+/* function name : mac32_shl */
+/* */
+/* description : multiply WORD32 with WORD32 add bits 62 to 31 to acc */
+/* doesnt take care of saturation */
+/* */
+/* inputs : WORD32 a, WORD32 b, WORD32 c */
+/* */
+/* outputs : none */
+/* */
+/* globals : none */
+/* */
+/* processing : multiply, right shift by 31 & add to acc */
+/* */
+/* returns : WORD32 accumulated result */
+/* */
+/* issues : none */
+/* */
+/* revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 06 12 2002 ashok M/chetan K created */
+/* 21 11 2003 raghavendra K R modified(bug fixes) */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 mac32_shl(WORD32 a, WORD32 b, WORD32 c)
+{
+ WORD32 result;
+
+ result = a + mult32_shl(b, c);
+
+ return (result);
+}
+
+/*****************************************************************************/
+/* */
+/* function name : mac32_shl_sat */
+/* */
+/* description : multiply WORD32 with WORD32 add bits 62 to 31 to acc */
+/* takes care of saturation in multiply and addition */
+/* */
+/* inputs : WORD32 a, WORD32 b, WORD32 c */
+/* */
+/* outputs : none */
+/* */
+/* globals : none */
+/* */
+/* processing : if input mi_ns add MAX32 else multiply, */
+/* right shift by 31 & add to acc with saturation */
+/* */
+/* returns : WORD32 accumulated result */
+/* */
+/* issues : none */
+/* */
+/* revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 06 12 2002 ashok M/chetan K created */
+/* 21 11 2003 raghavendra K R modified(bug fixes) */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 mac32_shl_sat(WORD32 a, WORD32 b, WORD32 c)
+{
+ return (add32_sat(a, mult32_shl_sat(b, c)));
+}
+
+/*****************************************************************************/
+/* */
+/* function name : msu32x16in32 */
+/* */
+/* description : multiply WORD32 with WORD16 sub bits 47 to 16 from acc */
+/* */
+/* inputs : WORD32 a, WORD32 b, WORD16 c */
+/* */
+/* outputs : none */
+/* */
+/* globals : none */
+/* */
+/* processing : multiply, right shift by 16 & sub from acc */
+/* */
+/* returns : WORD32 accumulated result */
+/* */
+/* issues : none */
+/* */
+/* revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 06 12 2002 ashok M/chetan K created */
+/* 21 11 2003 raghavendra K R modified(bug fixes) */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 msu32x16in32(WORD32 a, WORD32 b, WORD16 c)
+{
+ WORD32 result;
+
+ result = a - mult32x16in32(b, c);
+
+ return (result);
+}
+
+/*****************************************************************************/
+/* */
+/* function name : msu32x16in32_shl */
+/* */
+/* description : multiply WORD32 with WORD16 sub bits 46 to 15 from acc */
+/* doesnt take care of saturation */
+/* */
+/* inputs : WORD32 a, WORD32 b, WORD16 c */
+/* */
+/* outputs : none */
+/* */
+/* globals : none */
+/* */
+/* processing : multiply, right shift by 15 & sub from acc */
+/* */
+/* returns : WORD32 accumulated result */
+/* */
+/* issues : none */
+/* */
+/* revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 06 12 2002 ashok M/chetan K created */
+/* 21 11 2003 raghavendra K R modified(bug fixes) */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 msu32x16in32_shl(WORD32 a, WORD32 b, WORD16 c)
+{
+ WORD32 result;
+
+ result = a - mult32x16in32_shl(b, c);
+
+ return (result);
+}
+
+/*****************************************************************************/
+/* */
+/* function name : msu32x16in32_shl_sat */
+/* */
+/* description : multiply WORD32 with WORD16 sub bits 46 to 15 from acc */
+/* takes care of saturation in multiply and addition */
+/* */
+/* inputs : WORD32 a, WORD32 b, WORD16 c */
+/* */
+/* outputs : none */
+/* */
+/* globals : none */
+/* */
+/* processing : if input mi_ns sub MAX32 else multiply, */
+/* right shift by 15 & sub from acc with saturation */
+/* */
+/* returns : WORD32 accumulated result */
+/* */
+/* issues : none */
+/* */
+/* revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 06 12 2002 ashok M/chetan K created */
+/* 21 11 2003 raghavendra K R modified(bug fixes) */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 msu32x16in32_shl_sat(WORD32 a, WORD32 b, WORD16 c)
+{
+ return (sub32_sat(a, mult32x16in32_shl_sat(b, c)));
+}
+
+/*****************************************************************************/
+/* */
+/* function name : msu32 */
+/* */
+/* description : multiply WORD32 with WORD32 sub bits 63 to 32 from acc */
+/* */
+/* inputs : WORD32 a, WORD32 b, WORD32 c */
+/* */
+/* outputs : none */
+/* */
+/* globals : none */
+/* */
+/* processing : multiply, right shift by 32 & sub from acc */
+/* */
+/* returns : WORD32 accumulated result */
+/* */
+/* issues : none */
+/* */
+/* revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 06 12 2002 ashok M/chetan K created */
+/* 21 11 2003 raghavendra K R modified(bug fixes) */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 msu32(WORD32 a, WORD32 b, WORD32 c)
+{
+ WORD32 result;
+
+ result = a - mult32(b, c);
+
+ return (result);
+}
+
+/*****************************************************************************/
+/* */
+/* function name : msu32_shl */
+/* */
+/* description : multiply WORD32 with WORD32 sub bits 62 to 31 from acc */
+/* doesnt take care of saturation */
+/* */
+/* inputs : WORD32 a, WORD32 b, WORD32 c */
+/* */
+/* outputs : none */
+/* */
+/* globals : none */
+/* */
+/* processing : multiply, right shift by 31 & sub from acc */
+/* */
+/* returns : WORD32 accumulated result */
+/* */
+/* issues : none */
+/* */
+/* revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 06 12 2002 ashok M/chetan K created */
+/* 21 11 2003 raghavendra K R modified(bug fixes) */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 msu32_shl(WORD32 a, WORD32 b, WORD32 c)
+{
+ WORD32 result;
+
+ result = a - mult32_shl(b, c);
+
+ return (result);
+}
+
+/*****************************************************************************/
+/* */
+/* function name : msu32_shl_sat */
+/* */
+/* description : multiply WORD32 with WORD32 sub bits 62 to 31 from acc */
+/* takes care of saturation in multiply and addition */
+/* */
+/* inputs : WORD32 a, WORD32 b, WORD32 c */
+/* */
+/* outputs : none */
+/* */
+/* globals : none */
+/* */
+/* processing : if input mi_ns sub MAX32 else multiply, */
+/* right shift by 31 & sub from acc with saturation */
+/* */
+/* returns : WORD32 accumulated result */
+/* */
+/* issues : none */
+/* */
+/* revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 06 12 2002 ashok M/chetan K created */
+/* 21 11 2003 raghavendra K R modified(bug fixes) */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 msu32_shl_sat(WORD32 a, WORD32 b, WORD32 c)
+{
+ return (sub32_sat(a, mult32_shl_sat(b, c)));
+}
+
+/*****************************************************************************/
+/* */
+/* function name : mac3216_arr40 */
+/* */
+/* description : returns normalized 32 bit accumulated result and */
+/* denormalizing info */
+/* */
+/* inputs : WORD32 x[], WORD16 y[], LOOPINDEX length */
+/* */
+/* outputs : WORD16 *q_val */
+/* */
+/* globals : none */
+/* */
+/* processing : multiply and accumalate in WORD40 finally normalize */
+/* */
+/* returns : WORD32 accumulated result */
+/* */
+/* assumptions : length < 256 for strict definition of WORD40 */
+/* */
+/* issues : none */
+/* */
+/* revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 06 12 2002 ashok M/chetan K created */
+/* 21 11 2003 raghavendra K R modified(bug fixes) */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 mac3216_arr40(WORD32 *x, WORD16 *y, LOOPINDEX length, WORD16 *q_val)
+{
+ LOOPINDEX i;
+ WORD40 sum = 0;
+
+ for(i = 0; i < length; i++)
+ {
+ sum += (WORD40)(mult32x16in32(x[i], y[i]));
+ }
+
+ *q_val = norm40(&sum);
+
+ return (WORD32)sum;
+}
+
+/*****************************************************************************/
+/* */
+/* function name : mac32_arr40 */
+/* */
+/* description : returns normalized 32 bit accumulated result and */
+/* denormalizing info */
+/* */
+/* inputs : WORD32 x[], WORD32 y[], LOOPINDEX length */
+/* */
+/* outputs : WORD16 *q_val */
+/* */
+/* globals : none */
+/* */
+/* processing : multiply and accumalate in WORD40 finally normalize */
+/* */
+/* returns : WORD32 accumulated result */
+/* */
+/* assumptions : length < 256 for strict definition of WORD40 */
+/* */
+/* issues : none */
+/* */
+/* revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 06 12 2002 ashok M/chetan K created */
+/* 21 11 2003 raghavendra K R modified(bug fixes) */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 mac32_arr40(WORD32 *x, WORD32 *y, LOOPINDEX length, WORD16 *q_val)
+{
+ LOOPINDEX i;
+ WORD40 sum = 0;
+
+ for(i = 0; i < length; i++)
+ {
+ sum += (WORD40)(mult32(x[i], y[i]));
+ }
+
+ *q_val = norm40(&sum);
+
+ return ((WORD32)sum);
+}
+
+/*****************************************************************************/
+/* */
+/* function name : mac16_arr40 */
+/* */
+/* description : returns normalized 32 bit accumulated result and */
+/* denormalizing info */
+/* */
+/* inputs : WORD16 x[], WORD16 y[], LOOPINDEX length */
+/* */
+/* outputs : WORD16 *q_val */
+/* */
+/* globals : none */
+/* */
+/* processing : multiply and accumalate in WORD40 finally normalize */
+/* */
+/* returns : WORD32 accumulated result */
+/* */
+/* assumptions : length < 256 for strict definition of WORD40 */
+/* */
+/* issues : none */
+/* */
+/* revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 06 12 2002 ashok M/chetan K created */
+/* 21 11 2003 raghavendra K R modified(bug fixes) */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 mac16_arr40(WORD16 *x, WORD16 *y, LOOPINDEX length, WORD16 *q_val)
+{
+ LOOPINDEX i;
+ WORD40 sum = 0;
+
+ for(i = 0; i < length; i++)
+ {
+ sum += (WORD40)((WORD32)x[i] * (WORD32)y[i]);
+ }
+
+ *q_val = norm40(&sum);
+
+ return ((WORD32)sum);
+}
+
+/*****************************************************************************/
+/* */
+/* function name : add32_arr40 */
+/* */
+/* description : returns normalized 32 bit accumulated result and */
+/* denormalizing info */
+/* */
+/* inputs : WORD32 x[], LOOPINDEX length */
+/* */
+/* outputs : WORD16 *q_val */
+/* */
+/* globals : none */
+/* */
+/* processing : accumalate in WORD40 finally normalize */
+/* */
+/* returns : WORD32 accumulated result */
+/* */
+/* assumptions : length < 256 for strict definition of WORD40 */
+/* */
+/* issues : none */
+/* */
+/* revision history : */
+/* */
+/* DD MM YYYY author changes */
+/* 06 12 2002 ashok M/chetan K created */
+/* 21 11 2003 raghavendra K R modified(bug fixes) */
+/* 15 11 2004 tejaswi/vishal modified(bug fixes/cleanup) */
+/* */
+/*****************************************************************************/
+
+static PLATFORM_INLINE WORD32 add32_arr40(WORD32 *in_arr, LOOPINDEX length, WORD16 *q_val)
+{
+ LOOPINDEX i;
+ WORD40 sum = 0;
+
+ for(i = 0; i < length; i++)
+ {
+ sum += (WORD40)in_arr[i];
+ }
+
+ *q_val = norm40(&sum);
+
+ return ((WORD32)sum);
+}
+
+#endif /* __IA_BASIC_OPS40_H__ */
diff --git a/encoder/ia_type_def.h b/encoder/ia_type_def.h
new file mode 100644
index 0000000..60c48df
--- /dev/null
+++ b/encoder/ia_type_def.h
@@ -0,0 +1,102 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : ia_type_def.h */
+/* */
+/* Description : Type definations file */
+/* */
+/* List of Functions: None */
+/* */
+/* Issues / Problems: None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author Changes */
+/* 29 07 2005 ittiam Created */
+/* */
+/*****************************************************************************/
+
+#ifndef __TYPEDEFTEST_H__
+#define __TYPEDEFTEST_H__
+
+#include <stdint.h>
+
+/****************************************************************************/
+/* types type define prefix examples bytes */
+/************************ *********** ****** **************** ***** */
+typedef char CHAR8; /* c CHAR8 c_name 1 */
+typedef char *pCHAR8; /* pc pCHAR8 pc_nmae 1 */
+typedef int8_t WORD8; /* b WORD8 b_name 1 */
+typedef int8_t *pWORD8; /* pb pWORD8 pb_nmae 1 */
+typedef uint8_t UWORD8; /* ub UWORD8 ub_count 1 */
+typedef uint8_t *pUWORD8; /* pub pUWORD8 pub_count 1 */
+
+typedef int16_t WORD16; /* s WORD16 s_count 2 */
+typedef int16_t *pWORD16; /* ps pWORD16 ps_count 2 */
+typedef uint16_t UWORD16; /* us UWORD16 us_count 2 */
+typedef uint16_t *pUWORD16; /* pus pUWORD16 pus_count 2 */
+
+typedef int32_t WORD24; /* k WORD24 k_count 3 */
+typedef int32_t *pWORD24; /* pk pWORD24 pk_count 3 */
+typedef uint32_t UWORD24; /* uk UWORD24 uk_count 3 */
+typedef uint32_t *pUWORD24; /* puk pUWORD24 puk_count 3 */
+
+typedef int32_t WORD32; /* i WORD32 i_count 4 */
+typedef int32_t *pWORD32; /* pi pWORD32 pi_count 4 */
+typedef uint32_t UWORD32; /* ui UWORD32 ui_count 4 */
+typedef uint32_t *pUWORD32; /* pui pUWORD32 pui_count 4 */
+
+/* These typedefs remain same across C64xP and ARM */
+typedef int64_t WORD40; /* m WORD40 m_count 5 */
+typedef int64_t *pWORD40; /* pm pWORD40 pm_count 5 */
+typedef uint64_t UWORD40; /* um UWORD40 um_count 5 */
+typedef uint64_t *pUWORD40; /* pum pUWORD40 pum_count 5 */
+
+typedef int64_t LWORD64; /* h LWORD64 h_count 8 */
+typedef int64_t *pWORD64; /* ph pWORD64 ph_count 8 */
+typedef uint64_t ULWORD64; /* uh ULWORD64 uh_count 8 */
+typedef uint64_t *pUWORD64; /* puh pUWORD64 puh_count 8 */
+
+typedef float FLOAT32; /* f FLOAT32 f_count 4 */
+typedef float *pFLOAT32; /* pf pFLOAT32 pf_count 4 */
+typedef double FLOAT64; /* d UFLOAT64 d_count 8 */
+typedef double *pFlOAT64; /* pd pFLOAT64 pd_count 8 */
+
+typedef void VOID; /* v VOID v_flag 4 */
+typedef void *pVOID; /* pv pVOID pv_flag 4 */
+
+/* variable size types: platform optimized implementation */
+typedef int32_t BOOL; /* bool BOOL bool_true */
+typedef uint32_t UBOOL; /* ubool BOOL ubool_true */
+typedef int32_t FLAG; /* flag FLAG flag_false */
+typedef uint32_t UFLAG; /* uflag FLAG uflag_false */
+typedef int32_t LOOPIDX; /* lp LOOPIDX lp_index */
+typedef uint32_t ULOOPIDX; /* ulp SLOOPIDX ulp_index */
+typedef int32_t WORD; /* lp LOOPIDX lp_index */
+typedef uint32_t UWORD; /* ulp SLOOPIDX ulp_index */
+
+typedef LOOPIDX LOOPINDEX; /* lp LOOPIDX lp_index */
+typedef ULOOPIDX ULOOPINDEX; /* ulp SLOOPIDX ulp_index */
+
+#define PLATFORM_INLINE __inline
+
+#endif /* __TYPEDEFTEST_H__ */
diff --git a/encoder/ihevce_api.h b/encoder/ihevce_api.h
new file mode 100644
index 0000000..a8b4e25
--- /dev/null
+++ b/encoder/ihevce_api.h
@@ -0,0 +1,1432 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_api.h
+*
+* \brief
+* This file contains definitions and structures which are shared between
+* application and HEVC Encoder Processing interface layer
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_API_H_
+#define _IHEVCE_API_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define IHEVCE_MAX_IO_BUFFERS 3
+
+#define IHEVCE_EXTENDED_SAR 255
+
+#define IHEVCE_MBR_CORE_WEIGHTAGE 0.25f
+
+/** Maximum number of resolutions encoder can run */
+#define IHEVCE_MAX_NUM_RESOLUTIONS 1 //10
+
+/** Maximum number of bit-rate instances encoder can run */
+#define IHEVCE_MAX_NUM_BITRATES 1 //5
+
+#define MAX_NUM_CORES 8 // Supports upto 160 logical cores.
+
+/* Max length of filenames */
+#define MAX_LEN_FILENAME 200
+
+/* max number of tiles per row/cols */
+//Main/Main10 profile (=4096/256) //Don't change this
+#define MAX_TILE_COLUMNS 16
+//Main/Main10 profile (=2160/64) //Don't change this
+#define MAX_TILE_ROWS 34
+
+#define IHEVCE_ASYNCH_ERROR_START 0x0000E600
+#define IHEVCE_SYNCH_ERROR_START 0x0000E700
+
+#define MAX_NUM_DYN_BITRATE_CMDS (IHEVCE_MAX_NUM_RESOLUTIONS * IHEVCE_MAX_NUM_BITRATES)
+
+/* NAL units related definations */
+#define MAX_NUM_PREFIX_NALS_PER_AU 20
+#define MAX_NUM_SUFFIX_NALS_PER_AU 20
+#define MAX_NUM_VCL_NALS_PER_AU 200 /* as per level 5.1 from spec */
+
+/* Maximum number of processor groups supported */
+#define MAX_NUMBER_PROC_GRPS 4
+
+/** @brief maximum length of CC User Data in a single frame */
+#define MAX_SEI_PAYLOAD_PER_TLV (0x200)
+
+#define MAX_NUMBER_OF_SEI_PAYLOAD (10)
+
+#define IHEVCE_COMMANDS_TAG_MASK (0x0000FFFF)
+
+// Upper 16 bits are used to communicate payload type
+#define IHEVCE_PAYLOAD_TYPE_MASK (0xFFFF0000)
+
+#define IHEVCE_PAYLOAD_TYPE_SHIFT (16)
+
+#define MAX_FRAME_RATE 120.0
+#define MIN_FRAME_RATE 1.0
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+/**
+ * @brief Enumerations for Quality config.
+ */
+typedef enum
+{
+ IHEVCE_QUALITY_DUMMY = 0xFFFFFFFF,
+ IHEVCE_QUALITY_P0 = 0,
+ IHEVCE_QUALITY_P2 = 2,
+ IHEVCE_QUALITY_P3,
+ IHEVCE_QUALITY_P4,
+ IHEVCE_QUALITY_P5,
+ IHEVCE_QUALITY_P6,
+ IHEVCE_QUALITY_P7,
+ IHEVCE_NUM_QUALITY_PRESET
+} IHEVCE_QUALITY_CONFIG_T;
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+/**
+ * @brief Enumerations for Quality config for auxilary bitrate in case of MBR.
+ */
+typedef enum
+{
+ IHEVCE_MBR_DUMMY = -1,
+ IHEVCE_MBR_HIGH_QUALITY = 0,
+ IHEVCE_MBR_MEDIUM_SPEED,
+ IHEVCE_MBR_HIGH_SPEED,
+ IHEVCE_MBR_EXTREME_SPEED
+} IHEVCE_QUALITY_CONFIG_MBR_T;
+
+/**
+ * @brief Enumerations for Rate Control config.
+ */
+typedef enum
+{
+ IHEVCE_RC_DUMMY = 0xFFFFFFFF,
+ IHEVCE_RC_LOW_DELAY = 1,
+ IHEVCE_RC_STORAGE = 2,
+ IHEVCE_RC_TWOPASS = 3,
+ IHEVCE_RC_NONE = 4,
+ IHEVCE_RC_USER_DEFINED = 5,
+ IHEVCE_RC_RATECONTROLPRESET_DEFAULT = IHEVCE_RC_LOW_DELAY
+} IHEVCE_RATE_CONTROL_CONFIG_T;
+
+/**
+ * @brief Enumerations for Intra Refresh config.
+ */
+typedef enum
+{
+ IHEVCE_REFRESH_DUMMY = 0,
+ IHEVCE_I_SILICE = 1,
+ IHEVCE_COLUMN_BASED = 2,
+ IHEVCE_DBR = 3,
+ IHEVCE_GDR = 4
+} IHEVCE_REFRESH_CONFIG_T;
+
+/**
+ * @brief Enumerations for ASYNCH Control Commands Tags.
+ */
+typedef enum
+{
+ IHEVCE_ASYNCH_API_END_TAG = 0xFFFF,
+ IHEVCE_ASYNCH_API_SETBITRATE_TAG = 0x01,
+ IHEVCE_ASYNCH_API_SET_RF_TAG = 0x02,
+ IHEVCE_ASYNCH_API_FORCE_CLOSE_TAG = 0x03
+} IHEVCE_ASYNCH_API_COMMAND_TAG_T;
+
+typedef enum
+{
+ IHEVCE_ASYNCH_ERR_NO_END_TAG = IHEVCE_ASYNCH_ERROR_START + 0x01,
+ IHEVCE_ASYNCH_ERR_TLV_ERROR = IHEVCE_ASYNCH_ERROR_START + 0x02,
+ IHEVCE_ASYNCH_ERR_LENGTH_NOT_ZERO = IHEVCE_ASYNCH_ERROR_START + 0x03,
+ IHEVCE_ASYNCH_ERR_BR_NOT_BYTE = IHEVCE_ASYNCH_ERROR_START + 0x04,
+ IHEVCE_ASYNCH_FORCE_CLOSE_NOT_SUPPORTED = IHEVCE_ASYNCH_ERROR_START + 0x05
+} IHEVCE_ASYNCH_ERROR_TAG_T;
+
+/**
+ * @brief Enumerations for SYNCH Control Commands Tags.
+ */
+typedef enum
+{
+ IHEVCE_SYNCH_API_END_TAG = 0xFFFF,
+ IHEVCE_SYNCH_API_FLUSH_TAG = 0x21,
+ IHEVCE_SYNCH_API_FORCE_IDR_TAG = 0x22,
+ IHEVCE_SYNCH_API_REG_KEYFRAME_SEI_TAG = 0x23,
+ IHEVCE_SYNCH_API_REG_ALLFRAME_SEI_TAG = 0x24,
+ IHEVCE_SYNCH_API_SET_RES_TAG = 0x25
+} IHEVCE_SYNCH_API_COMMAND_TAG_T;
+
+typedef enum
+{
+ IHEVCE_SYNCH_ERR_NO_END_TAG = IHEVCE_SYNCH_ERROR_START + 0x11,
+ IHEVCE_SYNCH_ERR_TLV_ERROR = IHEVCE_SYNCH_ERROR_START + 0x12,
+ IHEVCE_SYNCH_ERR_LENGTH_NOT_ZERO = IHEVCE_SYNCH_ERROR_START + 0x13,
+ IHEVCE_SYNCH_ERR_NO_PADDING = IHEVCE_SYNCH_ERROR_START + 0x14,
+ IHEVCE_SYNCH_ERR_WRONG_LENGTH = IHEVCE_SYNCH_ERROR_START + 0x15,
+ IHEVCE_SYNCH_ERR_FREQ_FORCE_IDR_RECEIVED = IHEVCE_SYNCH_ERROR_START + 0x16,
+ IHEVCE_SYNCH_ERR_TOO_MANY_SEI_MSG = IHEVCE_SYNCH_ERROR_START + 0x17,
+ IHEVCE_SYNCH_ERR_SET_RES_NOT_SUPPORTED = IHEVCE_SYNCH_ERROR_START + 0x18
+} IHEVCE_SYNCH_ERROR_TAG_T;
+
+/**
+ * @brief Enumerations for output status identifier
+ */
+typedef enum
+{
+ IHEVCE_PROCESS = 0,
+ IHEVCE_CONTROL_STS,
+ IHEVCE_CREATE_STS,
+} IHEVCE_OUT_STS_ID_T;
+
+/**
+ * Scenetype enums
+ */
+typedef enum
+{
+ IHEVCE_SCENE_TYPE_NORMAL = 0,
+ IHEVCE_SCENE_TYPE_SCENE_CUT,
+ IHEVCE_SCENE_TYPE_FLASH,
+ IHEVCE_SCENE_TYPE_FADE_IN,
+ IHEVCE_SCENE_TYPE_FADE_OUT,
+ IHEVCE_SCENE_TYPE_DISSOLVE,
+ IHEVCE_MAX_NUM_SCENE_TYPES
+} IHEVCE_SCENE_TYPE;
+
+/**
+ * Type of data. Used for scanning the config file
+ */
+typedef enum
+{
+ IHEVCE_STRING = 0,
+ IHEVCE_INT,
+ IHEVCE_FLOAT
+} IHEVCE_DATA_TYPE;
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/**
+ * @brief Structure to describe the properties of Source of encoder.
+ */
+typedef struct
+{
+ /** Used for checking version compatibility */
+ WORD32 i4_size;
+
+ /** Input chroma format
+ * @sa : IV_COLOR_FORMAT_T
+ */
+ WORD32 inp_chr_format;
+
+ /** Internal chroma format
+ * @sa : IV_COLOR_FORMAT_T
+ */
+ WORD32 i4_chr_format;
+
+ /** Width of input luma */
+ WORD32 i4_width;
+
+ /** Height of input luma */
+ WORD32 i4_height;
+
+ /** Configured Width of input luma */
+ WORD32 i4_orig_width;
+
+ /** Configured Height of input luma */
+ WORD32 i4_orig_height;
+
+ /** Width of each pixel in bits */
+ WORD32 i4_input_bit_depth;
+
+ /** Input Content Type
+ * @sa : IV_CONTENT_TYPE_T
+ */
+ WORD32 i4_field_pic;
+
+ /** Frame/Field rate numerator
+ * (final fps = frame_rate_num/frame_rate_denom)
+ */
+ WORD32 i4_frm_rate_num;
+
+ /** Can be 1000 or 1001 to allow proper representation
+ * of fractional frame-rates
+ */
+ WORD32 i4_frm_rate_denom;
+
+ /**
+ * Whether Top field is encoded first or bottom
+ */
+ WORD32 i4_topfield_first;
+
+} ihevce_src_params_t;
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+/**
+ * @brief Structure to describe attributes of a layer.
+ */
+typedef struct
+{
+ /** Used for checking version compatibility */
+ WORD32 i4_size;
+
+ /** Width of input luma */
+ WORD32 i4_width;
+
+ /** Height of input luma */
+ WORD32 i4_height;
+
+ /** Frame/Field rate
+ * (final fps = src frame_rate_num/src frame_rate_denom/i4_frm_rate_scale_factor)
+ */
+ WORD32 i4_frm_rate_scale_factor;
+
+ /** Quality vs. complexity
+ * @sa : IHEVCE_QUALITY_CONFIG_T
+ */
+ IHEVCE_QUALITY_CONFIG_T i4_quality_preset;
+
+ /** 0 : Level 4, any level above this not supported */
+ WORD32 i4_codec_level;
+
+ /** Number of bit-rate instances for the current layer
+ */
+ WORD32 i4_num_bitrate_instances;
+
+ /** Target Bit-rate in bits for Constant bitrate cases */
+ WORD32 ai4_tgt_bitrate[IHEVCE_MAX_NUM_BITRATES];
+
+ /** Peak Bit-rate in bits for each bitrate */
+ WORD32 ai4_peak_bitrate[IHEVCE_MAX_NUM_BITRATES];
+
+ /** Maximum VBV buffer size in bits for each and each bitrate */
+ WORD32 ai4_max_vbv_buffer_size[IHEVCE_MAX_NUM_BITRATES];
+
+ /** Frame level Qp for Constant Qp mode */
+ WORD32 ai4_frame_qp[IHEVCE_MAX_NUM_BITRATES];
+
+} ihevce_tgt_params_t;
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+/**
+ * @brief Structure to describe the properties of target
+ resolution of encoder.
+ */
+typedef struct
+{
+ /** Used for checking version compatibility */
+ WORD32 i4_size;
+
+ /** Number of resolution layers
+ */
+ WORD32 i4_num_res_layers;
+
+ /* Applicable only for multi res cases.
+ Output of only one resolution to be dumped */
+
+ WORD32 i4_mres_single_out;
+
+ /* Specify starting resolution id for mres single out case.
+ This is only valid in mres_single out mode */
+
+ WORD32 i4_start_res_id;
+
+ /** To enable reuse across layers
+ */
+ WORD32 i4_multi_res_layer_reuse;
+
+ /** Quality vs. complexity for auxilary bitrates
+ * @sa : IHEVCE_QUALITY_CONFIG_MBR_T
+ */
+ IHEVCE_QUALITY_CONFIG_MBR_T i4_mbr_quality_setting;
+
+ /**
+ *Bit depth used by encoder
+ */
+ WORD32 i4_internal_bit_depth;
+
+ /**
+ *Temporal scalability enable Flag
+ */
+ WORD32 i4_enable_temporal_scalability;
+
+ /** Resolution and frame rate scaling factors for
+ * each layer
+ */
+ ihevce_tgt_params_t as_tgt_params[IHEVCE_MAX_NUM_RESOLUTIONS];
+
+ /*Scaler handle */
+ void *pv_scaler_handle;
+
+ /*Function pointer for scaling luma data*/
+ void (*pf_scale_luma)(
+ void *pv_scaler_handle,
+ UWORD8 *pu1_in_buf,
+ WORD32 i4_inp_width,
+ WORD32 i4_inp_height,
+ WORD32 i4_inp_stride,
+ UWORD8 *pu1_out_buf,
+ WORD32 i4_out_width,
+ WORD32 i4_out_height,
+ WORD32 i4_out_stride);
+
+ /*Function pointer for scaling chroma data*/
+ void (*pf_scale_chroma)(
+ void *pv_scaler_handle,
+ UWORD8 *pu1_in_buf,
+ WORD32 i4_inp_width,
+ WORD32 i4_inp_height,
+ WORD32 i4_inp_stride,
+ UWORD8 *pu1_out_buf,
+ WORD32 i4_out_width,
+ WORD32 i4_out_height,
+ WORD32 i4_out_stride);
+
+} ihevce_tgt_layer_params_t;
+
+/**
+ * @brief Structure to describe the stream level
+ * properties encoder should adhere to
+ */
+typedef struct
+{
+ /** Used for checking version compatibility */
+ WORD32 i4_size;
+
+ /** 0 - HEVC , no other value supported */
+ WORD32 i4_codec_type;
+
+ /**1 : Main Profile ,2: Main 10 Profile. no other value supported */
+ WORD32 i4_codec_profile;
+
+ /** 0: Main Tier ,1: High Tier. no other value supported */
+ WORD32 i4_codec_tier;
+
+ /** Enable VUI output 1: enable 0 : disable */
+ WORD32 i4_vui_enable;
+
+ /** Enable specific SEI messages in the stream
+ * 1: enable 0 : disable
+ */
+ WORD32 i4_sei_enable_flag;
+
+ /** Enable specific SEI payload (other than pic timing and buffering period) messages in the stream
+ * 1: enable 0 : disable
+ */
+ WORD32 i4_sei_payload_enable_flag;
+
+ /** Enable specific SEI buffering period messages in the stream
+ * 1: enable 0 : disable
+ */
+ WORD32 i4_sei_buffer_period_flags;
+
+ /** Enable specific SEI Picture timing messages in the stream
+ * 1: enable 0 : disable
+ */
+ WORD32 i4_sei_pic_timing_flags;
+
+ /** Enable specific SEI recovery point messages in the stream
+ * 1: enable 0 : disable
+ */
+ WORD32 i4_sei_recovery_point_flags;
+
+ /** Enable specific SEI mastering display colour volume in the stream
+ * 1: enable 0 : disable
+ */
+ WORD32 i4_sei_mastering_disp_colour_vol_flags;
+
+ /**
+ * Array to store the display_primaries_x values
+ */
+ UWORD16 au2_display_primaries_x[3];
+
+ /**
+ * Array to store the display_primaries_y values
+ */
+ UWORD16 au2_display_primaries_y[3];
+
+ /**
+ * Variable to store the white point x value
+ */
+ UWORD16 u2_white_point_x;
+
+ /**
+ * Variable to store the white point y value
+ */
+ UWORD16 u2_white_point_y;
+
+ /**
+ * Variable to store the max display mastering luminance value
+ */
+ UWORD32 u4_max_display_mastering_luminance;
+
+ /**
+ * Variable to store the min display mastering luminance value
+ */
+ UWORD32 u4_min_display_mastering_luminance;
+
+ /**
+ * Enable Content Level Light Info
+ */
+ WORD32 i4_sei_cll_enable;
+
+ /**
+ * 16bit unsigned number which indicates the maximum pixel intensity of all samples in bit-stream in units of 1 candela per square metre
+ */
+ UWORD16 u2_sei_max_cll;
+
+ /**
+ * 16bit unsigned number which indicates the average pixel intensity of all samples in bit-stream in units of 1 candela per square metre
+ */
+ UWORD16 u2_sei_avg_cll;
+
+ /** Enable/Disable SEI Hash on the Decoded picture & Hash type
+ * 3 : Checksum, 2 : CRC, 1 : MD5, 0 : disable
+ */
+ WORD32 i4_decoded_pic_hash_sei_flag;
+
+ /** Enable specific AUD messages in the stream
+ * 1: enable 0 : disable
+ */
+ WORD32 i4_aud_enable_flags;
+
+ /** Enable EOS messages in the stream
+ * 1: enable 0 : disable
+ */
+ WORD32 i4_eos_enable_flags;
+
+ /** Enable automatic insertion of SPS at each CDR
+ * 1: enable 0 : disable
+ */
+ WORD32 i4_sps_at_cdr_enable;
+
+ WORD32 i4_interop_flags;
+
+} ihevce_out_strm_params_t;
+
+/**
+ * @brief Structure to describe the Encoding Coding tools
+ * to be used by the Encoder
+ */
+typedef struct
+{
+ /** Used for checking version compatibility*/
+ WORD32 i4_size;
+
+ /** Max spacing between IDR frames -
+ * 0 indicates only at the beginning
+ */
+ WORD32 i4_max_closed_gop_period;
+
+ /** Min spacing between IDR frames -
+ * Max = Min provides fixed segment length
+ */
+ WORD32 i4_min_closed_gop_period;
+
+ /** Max spacing between CRA frames -
+ *
+ */
+ WORD32 i4_max_cra_open_gop_period;
+
+ /** Max spacing between I frames -
+ *
+ */
+ WORD32 i4_max_i_open_gop_period;
+
+ /** Maximum number of dyadic temporal layers */
+ WORD32 i4_max_temporal_layers;
+
+ /** Maximum number of reference frames */
+ WORD32 i4_max_reference_frames;
+
+ /** Enable weighted prediction
+ * 0 - disabled (default); 1 -enabled
+ */
+ WORD32 i4_weighted_pred_enable;
+
+ /** Deblocking type 0 - no deblocking;
+ * 1 - default; 2 - disable across slices
+ */
+ WORD32 i4_deblocking_type;
+
+ /** Use default scaling matrices
+ * 0 - disabled; 1 - enabled (default)
+ */
+ WORD32 i4_use_default_sc_mtx;
+
+ /** Cropping mode for cases where frame dimensions
+ * are not multiple of MIN CU size
+ * 1 - enable padding to min_cu multiple and generate cropping flags;
+ * 0 - report error
+ */
+ WORD32 i4_cropping_mode;
+
+ /** 0 - no slices; 1 - packet based; 2 - CU based */
+ WORD32 i4_slice_type;
+
+ /** Use default scaling matrices
+ * 0 - disabled; 1 - enabled (default)
+ */
+ WORD32 i4_enable_entropy_sync;
+
+ /** VQET control parameter */
+ WORD32 i4_vqet;
+
+} ihevce_coding_params_t;
+
+/**
+ * @brief Structure to describe the Configurable parameters of Encoder
+ */
+typedef struct
+{
+ /** Used for checking version compatibility */
+ WORD32 i4_size;
+
+ /* ---------- Tiles related parameters ------------ */
+
+ /* ----------- CU related parameters -------------- */
+
+ /** 4 - 16x16; 5 - 32x32 (default); 6 - 64x64 */
+ WORD32 i4_max_log2_cu_size;
+
+ /** 3 - 8x8; 4 - 16x16 (default); 5 - 32x32 ; 6 - 64x64 */
+ WORD32 i4_min_log2_cu_size;
+
+ /** 2 - 4x4 (default) ; 3 - 8x8; 4 - 16x16; 5 - 32x32 */
+ WORD32 i4_min_log2_tu_size;
+
+ /** 2 - 4x4; 3 - 8x8 (default); 4 - 16x16; 5 - 32x32 */
+ WORD32 i4_max_log2_tu_size;
+
+ /** Max transform tree depth for intra */
+ WORD32 i4_max_tr_tree_depth_I;
+
+ /** Max transform tree depth for inter */
+ WORD32 i4_max_tr_tree_depth_nI;
+
+ /* ---------- Rate Control related parameters ------ */
+
+ /** Rate control mode 0 - constant qp (default); 1- CBR */
+ WORD32 i4_rate_control_mode;
+
+ /** CU level Qp modulation
+ 0 - No Qp modulation at CU level;
+ 1 - QP modulation level 1
+ 2 - QP modulation level 2
+ 3 - QP modulation level 3*/
+ WORD32 i4_cu_level_rc;
+
+ /* Factor used in capped VBR mode to fine tune for quality */
+ WORD32 i4_rate_factor;
+
+ /** Enable stuffing 0 - disabled (default); 1 -enabled */
+ WORD32 i4_stuffing_enable;
+
+ /*The max deivaiton allowed from file size (used only in VBR, in CBR vbv buffer size dictates the deviaiton allowed)*/
+ WORD32 i4_vbr_max_peak_rate_dur;
+
+ /*Number of frames to encode. required to control allowed bit deviation at any point of time*/
+ WORD32 i4_num_frms_to_encode;
+
+ /** Initial buffer fullness when decoding starts */
+ WORD32 i4_init_vbv_fullness;
+
+ /** Frame level I frame max qp in rate control mode */
+ WORD32 i4_max_frame_qp;
+
+ /** Frame level I frame min qp in rate control mode */
+ WORD32 i4_min_frame_qp;
+ /* --------- ME related parameters ---------------- */
+
+ /** Maximum search range in full pel units. horizontal direction */
+ WORD32 i4_max_search_range_horz;
+
+ /** Maximum search range in full pel units. vertical direction */
+ WORD32 i4_max_search_range_vert;
+
+ /* Variable used to save old rate factor */
+ /* Used only for plugin */
+ WORD32 i4_old_rate_factor;
+
+} ihevce_config_prms_t;
+
+/**
+ * @brief Structure to describe Dynamic configuralbe
+ * parameters of encoder
+ *
+ * these new params can be passed as async commands
+ * to the enocder by sending a IHEVCE_CMD_CTL_SETPARAMS command
+ */
+typedef struct
+{
+ /** Used for checking version compatibility */
+ WORD32 i4_size;
+
+ /** Resolution ID of the stream for which bitrate change needs to be applied */
+ WORD32 i4_tgt_res_id;
+
+ /** Bitrate ID in the Resolution ID of the stream for which bitrate change needs to be applied */
+ WORD32 i4_tgt_br_id;
+
+ /** New Target Bit-rate for on the fly change */
+ WORD32 i4_new_tgt_bitrate;
+
+ /** New Peak Bit-rate for on the fly change */
+ WORD32 i4_new_peak_bitrate;
+
+ /** New Rate Factor for on the fly change */
+ WORD32 i4_new_rate_factor;
+} ihevce_dyn_config_prms_t;
+
+/**
+ * @brief Structure to describe Dynamic configuralbe
+ * parameters of encoder for dynamic resolution change
+ *
+ * these new params can be passed as synchromous commands
+ * to the enocder by sending a IHEVCE_SYNCH_API_SET_RES_TAG command
+ */
+typedef struct
+{
+ /** Resolution ID of the stream for which bitrate change needs to be applied */
+ WORD32 i4_new_res_id;
+
+ /** New Target Bit-rate for on the fly change */
+ WORD32 i4_new_tgt_bitrate;
+
+} ihevce_dyn_res_prms_t;
+
+/**
+ * @brief Structure to describe the Look Ahead
+ * Processing Parameters of Encoder
+ */
+typedef struct
+{
+ /** Used for checking version compatibility */
+ WORD32 i4_size;
+
+ /** Number of frames to look-ahead for RC and adaptive quant -
+ * counts each fields as one frame for interlaced
+ */
+ WORD32 i4_rc_look_ahead_pics;
+
+ /** Enable computation of weights & offsets for weighted prediction */
+ WORD32 i4_enable_wts_ofsts;
+
+ /* Enables denoiser as a part of video preprocessing. */
+ WORD32 i4_denoise_enable;
+
+ /* Enable this flag if input is interlaced and output is progressive */
+ WORD32 i4_deinterlacer_enable;
+
+} ihevce_lap_params_t;
+
+/**
+ * @brief Structure to describe the parameters
+ * related to multi-bitrate encoding
+ */
+typedef struct
+{
+ /** Number of bit-rate instances */
+ WORD32 i4_num_bitrate_instances;
+
+ /* Number of intra modes to be evaluated for derived instance */
+ WORD32 i4_num_modes_intra;
+
+ /* Number of inter modes to be evaluated for derived instance */
+ WORD32 i4_num_modes_inter;
+
+} ihevce_mbr_params_t;
+
+/**
+ * @brief Vui/Sei parameters of Encoder
+ */
+typedef struct
+{
+ /**
+ * indicates the presence of aspect_ratio
+ */
+ UWORD8 u1_aspect_ratio_info_present_flag;
+
+ /**
+ * specifies the aspect ratio of the luma samples
+ */
+ UWORD8 au1_aspect_ratio_idc[IHEVCE_MAX_NUM_RESOLUTIONS];
+
+ /**
+ * width of the luma samples. user dependent
+ */
+ UWORD16 au2_sar_width[IHEVCE_MAX_NUM_RESOLUTIONS];
+
+ /**
+ * height of the luma samples. user dependent
+ */
+ UWORD16 au2_sar_height[IHEVCE_MAX_NUM_RESOLUTIONS];
+
+ /**
+ * if 1, specifies that the overscan_appropriate_flag is present
+ * if 0, the preferred display method for the video signal is unspecified
+ */
+ UWORD8 u1_overscan_info_present_flag;
+
+ /**
+ * if 1,indicates that the cropped decoded pictures output
+ * are suitable for display using overscan
+ */
+ UWORD8 u1_overscan_appropriate_flag;
+
+ /**
+ * if 1 specifies that video_format, video_full_range_flag and
+ * colour_description_present_flag are present
+ */
+ UWORD8 u1_video_signal_type_present_flag;
+
+ /**
+ *
+ */
+ UWORD8 u1_video_format;
+
+ /**
+ * indicates the black level and range of the luma and chroma signals
+ */
+ UWORD8 u1_video_full_range_flag;
+
+ /**
+ * if 1,to 1 specifies that colour_primaries, transfer_characteristics
+ * and matrix_coefficients are present
+ */
+ UWORD8 u1_colour_description_present_flag;
+
+ /**
+ * indicates the chromaticity coordinates of the source primaries
+ */
+ UWORD8 u1_colour_primaries;
+
+ /**
+ * indicates the opto-electronic transfer characteristic of the source picture
+ */
+ UWORD8 u1_transfer_characteristics;
+
+ /**
+ * the matrix coefficients used in deriving luma and chroma signals
+ * from the green, blue, and red primaries
+ */
+ UWORD8 u1_matrix_coefficients;
+
+ /**
+ * if 1, specifies that chroma_sample_loc_type_top_field and
+ * chroma_sample_loc_type_bottom_field are present
+ */
+ UWORD8 u1_chroma_loc_info_present_flag;
+
+ /**
+ * location of chroma samples
+ */
+ UWORD8 u1_chroma_sample_loc_type_top_field;
+
+ UWORD8 u1_chroma_sample_loc_type_bottom_field;
+
+ /**
+ * to 1 specifies that the syntax structure hrd_parameters is present in the vui_parameters syntax structue
+ */
+ UWORD8 u1_vui_hrd_parameters_present_flag;
+
+ /**
+ * VUI level HRD parameters
+ */
+ //hrd_params_t s_vui_hrd_parameters;
+
+ /**
+ * HRD parameter Indicates the presence of the
+ * num_units_in_ticks, time_scale flag
+ */
+ UWORD8 u1_timing_info_present_flag;
+
+ /**
+ * Nal- hrd parameters flag
+ */
+ UWORD8 u1_nal_hrd_parameters_present_flag;
+
+} ihevce_vui_sei_params_t;
+
+/**
+ * @brief Multi thread related parameters passed to the encoder during create
+ */
+
+typedef struct
+{
+ /** Kept for maintaining backwards compatibility in future */
+ WORD32 i4_size;
+
+ /** Total number of logical cores, which are assigned to be used by the encoder
+ */
+ WORD32 i4_max_num_cores;
+
+ /** Total number of groups in the machine on which encoder is run.
+ */
+ WORD32 i4_num_proc_groups;
+
+ /** Total number of logical cores present per processor group of the machine.
+ */
+ WORD32 ai4_num_cores_per_grp[MAX_NUMBER_PROC_GRPS];
+
+ /** Flag to enableUse thread affintiy feature
+ * 0: Thread affinity disabled
+ * 1: Thread affinity enabled
+ */
+ WORD32 i4_use_thrd_affinity;
+
+ /**
+ * Memory allocation control flag: Reserved (to be used later)
+ */
+ WORD32 i4_memory_alloc_ctrl_flag;
+
+ /**
+ * Array of thread affinity masks for frame processing threads
+ * PRE Enc Group
+ */
+ ULWORD64 au8_core_aff_mask[MAX_NUM_CORES];
+
+} ihevce_static_multi_thread_params_t;
+
+/**
+ * @brief File IO APIs
+ */
+typedef struct
+{
+ FILE *(*ihevce_fopen)(void *pv_cb_handle, const char *pi1_filename, const char *pi1_mode);
+
+ int (*ihevce_fclose)(void *pv_cb_handle, FILE *pf_stream);
+
+ int (*ihevce_fflush)(void *pv_cb_handle, FILE *pf_stream);
+
+ int (*ihevce_fseek)(void *pv_cb_handle, FILE *pf_stream, long i4_offset, int i4_origin);
+
+ size_t (*ihevce_fread)(
+ void *pv_cb_handle, void *pv_ptr, size_t u4_size, size_t u4_count, FILE *pf_stream);
+
+ int (*ihevce_fscanf)(
+ void *pv_cb_handle,
+ IHEVCE_DATA_TYPE e_data_type,
+ FILE *file_ptr,
+ const char *format,
+ void *pv_dst);
+
+ int (*ihevce_fprintf)(void *pv_cb_handle, FILE *pf_stream, const char *pi1_format, ...);
+
+ size_t (*ihevce_fwrite)(
+ void *pv_cb_handle, const void *pv_ptr, size_t i4_size, size_t i4_count, FILE *pf_stream);
+
+ char *(*ihevce_fgets)(void *pv_cb_handle, char *pi1_str, int i4_size, FILE *pf_stream);
+
+} ihevce_file_io_api_t;
+
+/**
+ * @brief System APIs to implement call back functions in encoder
+ */
+typedef struct
+{
+ /*Call back handle for all system api*/
+ void *pv_cb_handle;
+
+ /* Console APIs */
+ int (*ihevce_printf)(void *pv_cb_handle, const char *i1_str, ...);
+
+ //int (*ihevce_scanf) (void *pv_handle, const char *i1_str, ...);
+
+ int (*ihevce_sscanf)(void *pv_cb_handle, const char *pv_src, const char *format, int *p_dst_int);
+
+ int (*ihevce_sprintf)(void *pv_cb_handle, char *pi1_str, const char *format, ...);
+
+ int (*ihevce_sprintf_s)(
+ void *pv_cb_handle, char *pi1_str, size_t i4_size, const char *format, ...);
+
+ /* File I/O APIs */
+ ihevce_file_io_api_t s_file_io_api;
+
+} ihevce_sys_api_t;
+
+/**
+ * @brief Structure to describe multipass related params
+ */
+typedef struct
+{
+ /** Kept for maintaining backwards compatibility in future */
+ WORD32 i4_size;
+
+ /* 0:Normal mode 1: only dumps stat 2: 2nd pass reads from stat file and rewrites the same file*/
+ WORD32 i4_pass;
+
+ /* Flag to specify the algorithm used for bit-distribution
+ in second pass */
+ WORD32 i4_multi_pass_algo_mode;
+
+ /* Stat file to read or write data of frame statistics */
+ WORD8 *pi1_frame_stats_filename;
+
+ /* stat file to read or write data of gop level statstics*/
+ WORD8 *pi1_gop_stats_filename;
+
+ /* Stat file to read or write CTB level data*/
+ WORD8 *pi1_sub_frames_stats_filename;
+
+} ihevce_pass_prms_t;
+
+/**
+ * @brief Structure to describe tile params
+ */
+typedef struct
+{
+ /** Kept for maintaining backwards compatibility in future */
+ WORD32 i4_size;
+
+ /* flag to indicate tile encoding enabled/disabled */
+ WORD32 i4_tiles_enabled_flag;
+
+ /* flag to indicate unifrom spacing of tiles */
+ WORD32 i4_uniform_spacing_flag;
+
+ /* num syntactical tiles in a frame */
+ WORD32 i4_num_tile_cols;
+ WORD32 i4_num_tile_rows;
+
+ /* Column width array to store width of each tile column */
+ WORD32 ai4_column_width[MAX_TILE_COLUMNS];
+
+ /* Row height array to store height of each tile row */
+ WORD32 ai4_row_height[MAX_TILE_ROWS];
+
+} ihevce_app_tile_params_t;
+
+/**
+ * @brief Structure to describe slice params
+ */
+typedef struct
+{
+ /** Kept for maintaining backwards compatibility in future */
+ WORD32 i4_size;
+
+ /** Flag to control dependent slices.
+ 0: Disable all slice segment limits
+ 1: Enforce max number of CTBs
+ 2: Enforce max number of bytes **/
+ WORD32 i4_slice_segment_mode;
+
+ /** Depending on i4_slice_segment_mode being:
+ 1: max number of CTBs per slice segment
+ 2: max number of bytes per slice segment **/
+ WORD32 i4_slice_segment_argument;
+
+} ihevce_slice_params_t;
+
+/**
+ * @brief Static configuration parameters of Encoder
+ */
+typedef struct
+{
+ /** Kept for maintaining backwards compatibility in future */
+ WORD32 i4_size;
+
+ /** Structure describing the input parameters - Applciatiopn should populate
+ * maximum values in this structure . Run time values
+ * should always be lessthan create time values
+ */
+ ihevce_src_params_t s_src_prms;
+
+ /** Parmeters for target use-case */
+ ihevce_tgt_layer_params_t s_tgt_lyr_prms;
+
+ /** Output stream parameters */
+ ihevce_out_strm_params_t s_out_strm_prms;
+
+ /** Coding parameters for the encoder */
+ ihevce_coding_params_t s_coding_tools_prms;
+
+ /** Configurable parameters for Encoder */
+ ihevce_config_prms_t s_config_prms;
+
+ /** VUI SEI app parameters*/
+ ihevce_vui_sei_params_t s_vui_sei_prms;
+
+ /** Multi threads specific pamrameters */
+ ihevce_static_multi_thread_params_t s_multi_thrd_prms;
+
+ /** Look-ahead processor related parameters */
+ ihevce_lap_params_t s_lap_prms;
+
+ /** Save Recon flag */
+ WORD32 i4_save_recon;
+
+ /** Compute PSNR Flag */
+ /* 0: No logs
+ 1: (Frame level:Bits generation + POC) + (summary level: BitRate)
+ 2: (Frame level:Bits generation + POC + Qp + Pic-type) + (summary level: BitRate + PSNR)
+ */
+ WORD32 i4_log_dump_level;
+
+ WORD32 i4_enable_csv_dump;
+
+ FILE *apF_csv_file[IHEVCE_MAX_NUM_RESOLUTIONS][IHEVCE_MAX_NUM_BITRATES];
+
+ /** Enable Logo for Eval versions */
+ WORD32 i4_enable_logo;
+
+ /* API structure for exporting console and file I/O operation */
+ ihevce_sys_api_t s_sys_api;
+
+ /* Structure to describe multipass related params */
+ ihevce_pass_prms_t s_pass_prms;
+
+ /* Structure to describe tile params */
+ ihevce_app_tile_params_t s_app_tile_params;
+
+ /** Structure to describe slice segment params */
+ ihevce_slice_params_t s_slice_params;
+
+ /** Resolution ID of the current encoder context **/
+ WORD32 i4_res_id;
+
+ /** Bitrate ID of the current encoder context **/
+ WORD32 i4_br_id;
+
+ /* Architecture type */
+ IV_ARCH_T e_arch_type;
+
+ /* Control to free the entropy output buffers */
+ /* 1 for non_blocking mode */
+ /* and 0 for blocking mode */
+ WORD32 i4_outbuf_buf_free_control;
+
+} ihevce_static_cfg_params_t;
+
+/**
+ * @brief Input structure in which input data and
+ * other parameters are sent to Encoder
+ */
+typedef struct
+{
+ /** Kept for maintaining backwards compatibility in future */
+ WORD32 i4_size;
+
+ /** Buffer id for the current buffer */
+ WORD32 i4_buf_id;
+
+ /** is bottom field 0 = top field, 1 = bottom field */
+ WORD32 i4_bottom_field;
+
+ /** top field first input in case of interlaced case */
+ WORD32 i4_topfield_first;
+
+ /** input time stamp in terms of ticks: lower 32 */
+ WORD32 i4_inp_timestamp_low;
+
+ /** input time stamp in terms of ticks: higher 32 */
+ WORD32 i4_inp_timestamp_high;
+
+ /** colour format of input,
+ * should be same as create time value
+ */
+ WORD32 u1_colour_format;
+
+ /**
+ * Input frame buffer valid flag
+ * 1 : valid data is present in the s_input_buf
+ * 0 : Only command buffer is valid input buffer is a non valid input (dumy input)
+ */
+ WORD32 i4_inp_frm_data_valid_flag;
+
+ /** Synchronous control commands buffer
+ * this will an Tag Length Value (TLV) buffer.
+ * All commands must be terminated with a tag
+ * Tag should be set to IHEVCE_SYNCH_API_END_TAG
+ */
+ void *pv_synch_ctrl_bufs;
+
+ /**
+ * Synchronous control commands buffer
+ * size in number of bytes
+ */
+ WORD32 i4_cmd_buf_size;
+
+ /** for system use if run time buffer allocation is used*/
+ void *pv_metadata;
+
+ /** for system to pass frame context from Input to Output
+ Same pointer will be returned on the output buffer of this frame */
+ void *pv_app_frm_ctxt;
+
+ /** Input YUV buffers pointers and related parameters
+ * are set in this structure
+ */
+ iv_yuv_buf_t s_input_buf;
+
+} iv_input_data_ctrl_buffs_t;
+
+/**
+ * @brief Input structure in which input async control
+ * commands are sent to Encoder
+ */
+typedef struct
+{
+ /** Kept for maintaining backwards compatibility in future */
+ WORD32 i4_size;
+
+ /** Buffer id for the current buffer */
+ WORD32 i4_buf_id;
+
+ /** Asynchronous control commands buffer
+ * this will an Tag Length Value (TLV) buffer.
+ * The buffer must be ended with a IHEVCE_ASYNCH_API_END_TAG
+ */
+ void *pv_asynch_ctrl_bufs;
+
+ /**
+ * Asynchronous control commands buffer
+ * size in number of bytes
+ */
+ WORD32 i4_cmd_buf_size;
+
+} iv_input_ctrl_buffs_t;
+
+/**
+ * @brief Ouput structure in which ouput data
+ * and related parameters are sent from Encoder
+ */
+typedef struct
+{
+ /** Kept for maintaining backwards compatibility in future */
+ WORD32 i4_size;
+
+ /** Buffer id for the current buffer */
+ WORD32 i4_buf_id;
+
+ /** processing status of the current output returned */
+ WORD32 i4_process_ret_sts;
+
+ /** if error encountered the error code */
+ WORD32 i4_process_error_code;
+
+ /** picture type of the current encoded output */
+ IV_PICTURE_CODING_TYPE_T i4_encoded_frame_type;
+
+ /** output time stamp of curr encoded buffer : lower 32 */
+ WORD32 i4_out_timestamp_low;
+
+ /** output time stamp of curr encoded buffer : higher 32 */
+ WORD32 i4_out_timestamp_high;
+
+ /** skip status of the current encoded output */
+ WORD32 i4_frame_skipped;
+
+ /** bytes generated in the output buffer */
+ WORD32 i4_bytes_generated;
+
+ /** End flag to communicate this is last frame output from encoder */
+ WORD32 i4_end_flag;
+
+ /** End flag to communicate encoder that this is the last buffer from application
+ 1 - Last buf, 0 - Not last buffer. No other values are supported.
+ Application has to set the appropriate value before queing in encoder queue */
+ WORD32 i4_is_last_buf;
+
+ /** DBF level after the dynamic bitrate change
+ -1 - Value not set by codec
+ Encoder sets to positive value when bitrate change control call is done*/
+ LWORD64 i8_cur_vbv_level;
+
+ /** Output buffer pointer */
+ void *pv_bitstream_bufs;
+
+ /** Output buffer size */
+ WORD32 i4_bitstream_buf_size;
+
+ /** Can be used for tracking purpose if run time buffer allocation is used*/
+ void *pv_metadata;
+
+ /** for system to retrive frame context from Input to Output */
+ void *pv_app_frm_ctxt;
+
+ /** Can be used for tracking the buffer that is sent back during callback */
+ WORD32 i4_cb_buf_id;
+
+ /** Number of Prefix Non-VCL NAL units in the output buffer */
+ WORD32 i4_num_non_vcl_prefix_nals;
+
+ /** Number of Suffix Non-VCL NAL units in the output buffer */
+ WORD32 i4_num_non_vcl_suffix_nals;
+
+ /** Number of VCL NAL units in the output buffer */
+ WORD32 i4_num_vcl_nals;
+
+ /************************************************************************/
+ /* Size of each NAL based on type: Non-VCL Prefix/ VCL / Non-VCL Suffix */
+ /* */
+ /* Ordering of NALS in output buffer is as follows: */
+ /* Non-VCL Prefix NALs -> VCL NALs -> Non-VCL Suffix NALs */
+ /* */
+ /* As there are no holes between adjacent NALs, these sizes can be used */
+ /* to compute the offsets w.r.t start of the output buffer */
+ /************************************************************************/
+
+ /** Array to the store the size in bytes of Prefix Non-VCL NAL units */
+ WORD32 ai4_size_non_vcl_prefix_nals[MAX_NUM_PREFIX_NALS_PER_AU];
+
+ /* Array to the store the size in bytes of Suffix Non-VCL NAL units */
+ WORD32 ai4_size_non_vcl_suffix_nals[MAX_NUM_SUFFIX_NALS_PER_AU];
+
+ /** Array to the store the size in bytes of VCL NAL units */
+ WORD32 ai4_size_vcl_nals[MAX_NUM_VCL_NALS_PER_AU];
+
+} iv_output_data_buffs_t;
+
+/**
+ * @brief Output structure in which output async control
+ * acknowledgement are sent from Encoder
+ */
+typedef struct
+{
+ /** Kept for maintaining backwards compatibility in future */
+ WORD32 i4_size;
+
+ /** Buffer id for the current buffer */
+ WORD32 i4_buf_id;
+
+ /** Asynchronous control commands ack buffer
+ * this will an Tag Length Value (TLV) buffer.
+ */
+ void *pv_status_bufs;
+
+} iv_output_status_buffs_t;
+
+/**
+ * @brief structure in which recon data
+ * and related parameters are sent from Encoder
+ */
+typedef struct
+{
+ /** Kept for maintaining backwards compatibility in future */
+ WORD32 i4_size;
+
+ /** Buffer id for the current buffer */
+ WORD32 i4_buf_id;
+
+ /** POC of the current buffer */
+ WORD32 i4_poc;
+
+ /** End flag to communicate this is last frame output from encoder */
+ WORD32 i4_end_flag;
+
+ /** End flag to communicate encoder that this is the last buffer from application
+ 1 - Last buf, 0 - Not last buffer. No other values are supported.
+ Application has to set the appropriate value before queing in encoder queue */
+ WORD32 i4_is_last_buf;
+
+ /** Recon luma buffer pointer */
+ void *pv_y_buf;
+
+ /** Recon cb buffer pointer */
+ void *pv_cb_buf;
+
+ /** Recon cr buffer pointer */
+ void *pv_cr_buf;
+
+ /** Luma size **/
+ WORD32 i4_y_pixels;
+
+ /** Chroma size **/
+ WORD32 i4_uv_pixels;
+
+} iv_recon_data_buffs_t;
+
+/* @brief iv_res_layer_output_bufs_req_t: This structure contains the parameters
+ * related to output (data and control) buffer requirements of the codec for all
+ * target resolution layers
+ * Application can call the memory query API to get these requirements
+ */
+
+typedef struct
+{
+ /** i4_size of the structure : used for verison tracking */
+ WORD32 i4_size;
+
+ /*Memory requirements for each of target resolutions*/
+ iv_output_bufs_req_t s_output_buf_req[IHEVCE_MAX_NUM_RESOLUTIONS][IHEVCE_MAX_NUM_BITRATES];
+
+} iv_res_layer_output_bufs_req_t;
+
+/* @brief iv_res_layer_recon_bufs_req_t: This structure contains the parameters
+ * related to recon buffer requirements of the codec for all target resolution layers
+ * Application can call the memory query API to get these requirements
+ */
+
+typedef struct
+{
+ /** i4_size of the structure : used for verison tracking */
+ WORD32 i4_size;
+
+ /*Memory requirements for each of target resolutions*/
+ iv_recon_bufs_req_t s_recon_buf_req[IHEVCE_MAX_NUM_RESOLUTIONS][IHEVCE_MAX_NUM_BITRATES];
+} iv_res_layer_recon_bufs_req_t;
+
+/* @brief iv_res_layer_output_data_buffs_desc_t: This structure contains
+ * the parameters related to output data buffers for all target resolution layers
+ */
+
+typedef struct
+{
+ /** i4_size of the structure : used for verison tracking */
+ WORD32 i4_size;
+
+ /*Output buffer requirements of each taregt resolution layer*/
+ iv_output_data_buffs_desc_t s_output_data_buffs[IHEVCE_MAX_NUM_RESOLUTIONS]
+ [IHEVCE_MAX_NUM_BITRATES];
+
+} iv_res_layer_output_data_buffs_desc_t;
+
+/* @brief iv_res_layer_output_status_buffs_desc_t: This structure contains
+ * the parameters related to recon data buffers for all target resolution layers
+ */
+
+typedef struct
+{
+ /** i4_size of the structure : used for verison tracking */
+ WORD32 i4_size;
+
+ /*Output buffer requirements of each taregt resolution layer*/
+ iv_recon_data_buffs_desc_t s_recon_data_buffs[IHEVCE_MAX_NUM_RESOLUTIONS]
+ [IHEVCE_MAX_NUM_BITRATES];
+
+} iv_res_layer_recon_data_buffs_desc_t;
+
+#endif // _IHEVCE_API_H_
diff --git a/encoder/ihevce_bitstream.c b/encoder/ihevce_bitstream.c
new file mode 100644
index 0000000..f48b0cd
--- /dev/null
+++ b/encoder/ihevce_bitstream.c
@@ -0,0 +1,466 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+******************************************************************************
+* @file ihevce_bitstream.c
+*
+* @brief
+* This file contains function definitions related to bitstream generation
+*
+* @author
+* ittiam
+*
+* @List of Functions
+* ihevce_bitstrm_init()
+* ihevce_put_bits()
+* ihevce_put_bit()
+* ihevce_put_rbsp_trailing_bits()
+* ihevce_put_uev()
+* ihevce_put_sev()
+* ihevce_put_nal_start_code_prefix()
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <assert.h>
+#include <math.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "ihevc_debug.h"
+#include "ihevc_platform_macros.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_defs.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+/**
+******************************************************************************
+*
+* @brief Initializes the encoder bitstream engine
+*
+* @par Description
+* This routine needs to be called at start of slice/frame encode
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] p1_bitstrm_buf
+* bitstream buffer pointer where the encoded stream is generated in byte order
+*
+* @param[in] u4_max_bitstrm_size
+* indicates maximum bitstream buffer size. (in bytes)
+* If actual stream size exceeds the maximum size, encoder should
+* 1. Not corrput data beyond u4_max_bitstrm_size bytes
+* 2. Report an error back to application indicating overflow
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IHEVCE_ERROR_T
+ ihevce_bitstrm_init(bitstrm_t *ps_bitstrm, UWORD8 *pu1_bitstrm_buf, UWORD32 u4_max_bitstrm_size)
+{
+ ps_bitstrm->pu1_strm_buffer = pu1_bitstrm_buf;
+ ps_bitstrm->u4_max_strm_size = u4_max_bitstrm_size;
+
+ /* Default init values for other members of bitstream context */
+ ps_bitstrm->u4_strm_buf_offset = 0;
+ ps_bitstrm->u4_cur_word = 0;
+ ps_bitstrm->i4_bits_left_in_cw = WORD_SIZE;
+ ps_bitstrm->i4_zero_bytes_run = 0;
+
+ return (IHEVCE_SUCCESS);
+}
+
+/**
+******************************************************************************
+*
+* @brief puts a code with specified number of bits into the bitstream
+*
+* @par Description
+* inserts code_len number of bits from lsb of code_val into the
+* bitstream. updates context members like u4_cur_word, u4_strm_buf_offset and
+* i4_bits_left_in_cw. If the total words (u4_strm_buf_offset) exceeds max
+* available size (u4_max_strm_size), returns error without corrupting data
+* beyond it
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] u4_code_val
+* code value that needs to be inserted in the stream.
+*
+* @param[in] code_len
+* indicates code length (in bits) of code_val that would be inserted in
+* bitstream buffer size. Range of length[1:WORD_SIZE]
+*
+* @remarks Assumptions: all bits from bit position code_len to msb of
+* code_val shall be zero
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IHEVCE_ERROR_T ihevce_put_bits(bitstrm_t *ps_bitstrm, UWORD32 u4_code_val, WORD32 code_len)
+{
+ UWORD32 u4_cur_word = ps_bitstrm->u4_cur_word;
+ WORD32 bits_left_in_cw = ps_bitstrm->i4_bits_left_in_cw;
+
+ /* check assumptions made in the module */
+ ASSERT(code_len > 0 && code_len <= WORD_SIZE);
+
+ if(code_len < WORD_SIZE)
+ ASSERT((u4_code_val >> code_len) == 0);
+
+ /* sanity check on the bitstream engine state */
+ ASSERT(bits_left_in_cw > 0 && bits_left_in_cw <= WORD_SIZE);
+
+ ASSERT(ps_bitstrm->i4_zero_bytes_run <= EPB_ZERO_BYTES);
+
+ ASSERT(ps_bitstrm->pu1_strm_buffer != NULL);
+
+ if(bits_left_in_cw > code_len)
+ {
+ /*******************************************************************/
+ /* insert the code in local bitstream word and return */
+ /* code is inserted in position of bits left (post decrement) */
+ /*******************************************************************/
+ bits_left_in_cw -= code_len;
+ u4_cur_word |= (u4_code_val << bits_left_in_cw);
+
+ ps_bitstrm->u4_cur_word = u4_cur_word;
+ ps_bitstrm->i4_bits_left_in_cw = bits_left_in_cw;
+
+ return (IHEVCE_SUCCESS);
+ }
+ else
+ {
+ /********************************************************************/
+ /* 1. insert parital code corresponding to bits left in cur word */
+ /* 2. flush all the bits of cur word to bitstream */
+ /* 3. insert emulation prevention bytes while flushing the bits */
+ /* 4. insert remaining bits of code starting from msb of cur word */
+ /* 5. update bitsleft in current word and stream buffer offset */
+ /********************************************************************/
+ UWORD32 u4_strm_buf_offset = ps_bitstrm->u4_strm_buf_offset;
+
+ UWORD32 u4_max_strm_size = ps_bitstrm->u4_max_strm_size;
+
+ WORD32 zero_run = ps_bitstrm->i4_zero_bytes_run;
+
+ UWORD8 *pu1_strm_buf = ps_bitstrm->pu1_strm_buffer;
+
+ WORD32 i, rem_bits = (code_len - bits_left_in_cw);
+
+ /*********************************************************************/
+ /* Bitstream overflow check */
+ /* NOTE: corner case of epb bytes (max 2 for 32bit word) not handled */
+ /*********************************************************************/
+ if((u4_strm_buf_offset + (WORD_SIZE >> 3)) >= u4_max_strm_size)
+ {
+ /* return without corrupting the buffer beyond its size */
+ return (IHEVCE_BITSTREAM_BUFFER_OVERFLOW);
+ }
+
+ /* insert parital code corresponding to bits left in cur word */
+ u4_cur_word |= u4_code_val >> rem_bits;
+
+ for(i = WORD_SIZE; i > 0; i -= 8)
+ {
+ /* flush the bits in cur word byte by byte and copy to stream */
+ UWORD8 u1_next_byte = (u4_cur_word >> (i - 8)) & 0xFF;
+
+ PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, u1_next_byte, zero_run);
+ }
+
+ /* insert the remaining bits from code val into current word */
+ u4_cur_word = rem_bits ? (u4_code_val << (WORD_SIZE - rem_bits)) : 0;
+
+ /* update the state variables and return success */
+ ps_bitstrm->u4_cur_word = u4_cur_word;
+ ps_bitstrm->i4_bits_left_in_cw = WORD_SIZE - rem_bits;
+ ps_bitstrm->i4_zero_bytes_run = zero_run;
+ ps_bitstrm->u4_strm_buf_offset = u4_strm_buf_offset;
+ return (IHEVCE_SUCCESS);
+ }
+}
+
+/**
+******************************************************************************
+*
+* @brief inserts a 1-bit code into the bitstream
+*
+* @par Description
+* inserts 1bit lsb of code_val into the bitstream
+* updates context members like u4_cur_word, u4_strm_buf_offset and
+* i4_bits_left_in_cw. If the total words (u4_strm_buf_offset) exceeds max
+* available size (u4_max_strm_size), returns error without corrupting data
+* beyond it
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] u4_code_val
+* code value that needs to be inserted in the stream.
+*
+* @remarks Assumptions: all bits from bit position 1 to msb of code_val
+* shall be zero
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IHEVCE_ERROR_T ihevce_put_bit(bitstrm_t *ps_bitstrm, UWORD32 u4_code_val)
+{
+ /* call the put bits function for 1 bit and return */
+ return (ihevce_put_bits(ps_bitstrm, u4_code_val, 1));
+}
+
+/**
+******************************************************************************
+*
+* @brief inserts rbsp trailing bits at the end of stream buffer (NAL)
+*
+* @par Description
+* inserts rbsp trailing bits, updates context members like u4_cur_word and
+* i4_bits_left_in_cw and flushes the same in the bitstream buffer. If the
+* total words (u4_strm_buf_offset) exceeds max available size
+* (u4_max_strm_size), returns error without corrupting data beyond it
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IHEVCE_ERROR_T ihevce_put_rbsp_trailing_bits(bitstrm_t *ps_bitstrm)
+{
+ WORD32 i;
+ UWORD32 u4_cur_word = ps_bitstrm->u4_cur_word;
+ WORD32 bits_left_in_cw = ps_bitstrm->i4_bits_left_in_cw;
+ WORD32 bytes_left_in_cw = (bits_left_in_cw - 1) >> 3;
+
+ UWORD32 u4_strm_buf_offset = ps_bitstrm->u4_strm_buf_offset;
+ UWORD32 u4_max_strm_size = ps_bitstrm->u4_max_strm_size;
+ WORD32 zero_run = ps_bitstrm->i4_zero_bytes_run;
+ UWORD8 *pu1_strm_buf = ps_bitstrm->pu1_strm_buffer;
+
+ /*********************************************************************/
+ /* Bitstream overflow check */
+ /* NOTE: corner case of epb bytes (max 2 for 32bit word) not handled */
+ /*********************************************************************/
+ if((u4_strm_buf_offset + (WORD_SIZE >> 3) - bytes_left_in_cw) >= u4_max_strm_size)
+ {
+ /* return without corrupting the buffer beyond its size */
+ return (IHEVCE_BITSTREAM_BUFFER_OVERFLOW);
+ }
+
+ /* insert a 1 at the end of current word and flush all the bits */
+ u4_cur_word |= (1U << (bits_left_in_cw - 1));
+
+ /* get the bits to be inserted in msbdb of the word */
+ // u4_cur_word <<= (WORD_SIZE - bytes_left_in_cw + 1);
+
+ for(i = WORD_SIZE; i > (bytes_left_in_cw * 8); i -= 8)
+ {
+ /* flush the bits in cur word byte by byte and copy to stream */
+ UWORD8 u1_next_byte = (u4_cur_word >> (i - 8)) & 0xFF;
+
+ PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, u1_next_byte, zero_run);
+ }
+
+ /* update the stream offset */
+ ps_bitstrm->u4_strm_buf_offset = u4_strm_buf_offset;
+
+ /* Default init values for scratch variables of bitstream context */
+ ps_bitstrm->u4_cur_word = 0;
+ ps_bitstrm->i4_bits_left_in_cw = WORD_SIZE;
+ ps_bitstrm->i4_zero_bytes_run = 0;
+
+ return (IHEVCE_SUCCESS);
+}
+
+/**
+******************************************************************************
+*
+* @brief puts exponential golomb code of a unsigned integer into bitstream
+*
+* @par Description
+* computes uev code for given syntax element and inserts the same into
+* bitstream by calling ihevce_put_bits() interface.
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] u4_code_num
+* unsigned integer input whose golomb code is written in stream
+*
+* @remarks Assumptions: code value can be represented in less than 16bits
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IHEVCE_ERROR_T ihevce_put_uev(bitstrm_t *ps_bitstrm, UWORD32 u4_code_num)
+{
+ UWORD32 u4_bit_str, u4_range;
+ IHEVCE_ERROR_T e_error;
+
+ /* convert the codenum to exp-golomb bit code: Table 9-2 JCTVC-J1003_d7 */
+ u4_bit_str = u4_code_num + 1;
+
+ /* get range of the bit string and put using put_bits() */
+ GETRANGE(u4_range, u4_bit_str);
+
+ e_error = ihevce_put_bits(ps_bitstrm, u4_bit_str, (2 * u4_range - 1));
+
+ return (e_error);
+}
+
+/**
+******************************************************************************
+*
+* @brief puts exponential golomb code of a signed integer into bitstream
+*
+* @par Description
+* computes sev code for given syntax element and inserts the same into
+* bitstream by calling ihevce_put_bits() interface.
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] syntax_elem
+* signed integer input whose golomb code is written in stream
+*
+* @remarks Assumptions: code value can be represented in less than 16bits
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IHEVCE_ERROR_T ihevce_put_sev(bitstrm_t *ps_bitstrm, WORD32 syntax_elem)
+{
+ UWORD32 u4_code_num, u4_bit_str, u4_range;
+ IHEVCE_ERROR_T e_error;
+
+ /************************************************************************/
+ /* convert the codenum to exp-golomb bit code for signed syntax element */
+ /* See Table9-2 and Table 9-3 of standard JCTVC-J1003_d7 */
+ /************************************************************************/
+ if(syntax_elem <= 0)
+ {
+ /* codeNum for non-positive integer = 2*abs(x) : Table9-3 */
+ u4_code_num = ((-syntax_elem) << 1);
+ }
+ else
+ {
+ /* codeNum for positive integer = 2x-1 : Table9-3 */
+ u4_code_num = (syntax_elem << 1) - 1;
+ }
+
+ /* convert the codenum to exp-golomb bit code: Table 9-2 JCTVC-J1003_d7 */
+ u4_bit_str = u4_code_num + 1;
+
+ /* get range of the bit string and put using put_bits() */
+ GETRANGE(u4_range, u4_bit_str);
+
+ e_error = ihevce_put_bits(ps_bitstrm, u4_bit_str, (2 * u4_range - 1));
+
+ return (e_error);
+}
+
+/**
+******************************************************************************
+*
+* @brief insert NAL start code prefix (0x000001) into bitstream with an option
+* of inserting leading_zero_8bits (which makes startcode prefix as 0x00000001)
+*
+* @par Description
+* Although start code prefix could have been put by calling ihevce_put_bits(),
+* ihevce_put_nal_start_code_prefix() is specially added to make sure emulation
+* prevention insertion is not done for the NAL start code prefix which will
+* surely happen otherwise by calling ihevce_put_bits() interface.
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] insert_leading_zero_8bits
+* flag indicating if one more zero bytes needs to prefixed before start code
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+IHEVCE_ERROR_T
+ ihevce_put_nal_start_code_prefix(bitstrm_t *ps_bitstrm, WORD32 insert_leading_zero_8bits)
+{
+ UWORD32 u4_strm_buf_offset = ps_bitstrm->u4_strm_buf_offset;
+ UWORD8 *pu1_strm_buf = ps_bitstrm->pu1_strm_buffer;
+ WORD32 num_nals = ps_bitstrm->i4_num_nal;
+
+ /* Bitstream buffer overflow check assuming worst case of 4 bytes */
+ if((u4_strm_buf_offset + 4) > ps_bitstrm->u4_max_strm_size)
+ {
+ return (IHEVCE_BITSTREAM_BUFFER_OVERFLOW);
+ }
+
+ /* Update the current NAL start ptr and increment counter */
+ ASSERT(num_nals >= 0);
+ ASSERT(num_nals < MAX_NALS_IN_AU);
+ if(num_nals < MAX_NALS_IN_AU)
+ {
+ ps_bitstrm->apu1_nal_start[num_nals] = pu1_strm_buf + u4_strm_buf_offset;
+ ps_bitstrm->i4_num_nal++;
+ }
+
+ /* Insert leading zero 8 bits conditionally */
+ if(insert_leading_zero_8bits)
+ {
+ pu1_strm_buf[u4_strm_buf_offset] = 0x00;
+ u4_strm_buf_offset++;
+ }
+
+ /* Insert NAL start code prefix 0x00 00 01 */
+ pu1_strm_buf[u4_strm_buf_offset] = 0x00;
+ u4_strm_buf_offset++;
+
+ pu1_strm_buf[u4_strm_buf_offset] = 0x00;
+ u4_strm_buf_offset++;
+
+ pu1_strm_buf[u4_strm_buf_offset] = 0x01;
+ u4_strm_buf_offset++;
+
+ /* update the stream offset */
+ ps_bitstrm->u4_strm_buf_offset = u4_strm_buf_offset;
+
+ return (IHEVCE_SUCCESS);
+}
diff --git a/encoder/ihevce_bitstream.h b/encoder/ihevce_bitstream.h
new file mode 100644
index 0000000..ac0f660
--- /dev/null
+++ b/encoder/ihevce_bitstream.h
@@ -0,0 +1,200 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+*
+* @file
+* ihevce_bitstream.h
+*
+* @brief
+* This file contains encoder bitstream engine related structures and
+* interface prototypes
+*
+* @author
+* ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_BITSTREAM_H_
+#define _IHEVCE_BITSTREAM_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+* @brief defines the maximum number of bits in a bitstream word
+******************************************************************************
+*/
+#define WORD_SIZE 32
+
+/**
+******************************************************************************
+* @brief The number of consecutive zero bytes for emulation prevention check
+******************************************************************************
+*/
+#define EPB_ZERO_BYTES 2
+
+/**
+******************************************************************************
+* @brief Emulation prevention insertion byte
+******************************************************************************
+*/
+#define EPB_BYTE 0x03
+
+/**
+******************************************************************************
+* @brief Maximum number of NALs in a frame
+******************************************************************************
+*/
+#define MAX_NALS_IN_AU 256
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+* @brief Macro to check if emulation prevention byte insertion is required
+******************************************************************************
+*/
+#define INSERT_EPB(zero_run, next_byte) ((zero_run) == EPB_ZERO_BYTES) && (0 == ((next_byte)&0xFC))
+
+/**
+******************************************************************************
+* @brief returns bits required to code a value
+******************************************************************************
+*/
+#define UE_LENGTH(bits, x) \
+ { \
+ UWORD32 r_bit; \
+ GETRANGE(r_bit, x + 1) \
+ bits = (((r_bit - 1) << 1) + 1); \
+ }
+
+/**
+******************************************************************************
+* @brief Inserts 1 byte and Emulation Prevention Byte(if any) into bitstream
+* Increments the stream offset and zero run correspondingly
+******************************************************************************
+*/
+#define PUTBYTE_EPB(ptr, off, byte, zero_run) \
+ { \
+ if(INSERT_EPB(zero_run, byte)) \
+ { \
+ ptr[off] = EPB_BYTE; \
+ off++; \
+ zero_run = 0; \
+ } \
+ \
+ ptr[off] = byte; \
+ off++; \
+ zero_run = byte ? 0 : zero_run + 1; \
+ }
+
+/**
+******************************************************************************
+* @brief Ensures Byte alignment of the slice header
+******************************************************************************
+*/
+
+#define BYTE_ALIGNMENT(ps_bitstrm) ihevce_put_rbsp_trailing_bits(ps_bitstrm)
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+* @brief Bitstream context for encoder
+******************************************************************************
+*/
+typedef struct bitstrm
+{
+ /** points to start of stream buffer. */
+ UWORD8 *pu1_strm_buffer;
+
+ /**
+ * max bitstream size (in bytes).
+ * Encoded stream shall not exceed this size.
+ */
+ UWORD32 u4_max_strm_size;
+
+ /**
+ `* byte offset (w.r.t pu1_strm_buffer) where next byte would be written
+ * Bitstream engine makes sure it would not corrupt data beyond
+ * u4_max_strm_size bytes
+ */
+ UWORD32 u4_strm_buf_offset;
+
+ /**
+ * current bitstream word; It is a scratch word containing max of
+ * WORD_SIZE bits. Will be copied to stream buffer when the word is
+ * full
+ */
+ UWORD32 u4_cur_word;
+
+ /**
+ * signifies number of bits available in u4_cur_word
+ * bits from msb to i4_bits_left_in_cw of u4_cur_word have already been
+ * inserted next bits would be inserted from pos [i4_bits_left_in_cw-1]
+ * Range of this variable [1 : WORD_SIZE]
+ */
+ WORD32 i4_bits_left_in_cw;
+
+ /**
+ * signifies the number of consecutive zero bytes propogated from previous
+ * word. It is used for emulation prevention byte insertion in the stream
+ */
+ WORD32 i4_zero_bytes_run;
+
+ /** Total number of NAL units in the output buffer; Shall not exceed
+ * MAX_NALS_IN_AU */
+ WORD32 i4_num_nal;
+
+ /** Pointer to start of each NAL unit in the output buffer */
+ UWORD8 *apu1_nal_start[MAX_NALS_IN_AU];
+
+} bitstrm_t;
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+IHEVCE_ERROR_T
+ ihevce_bitstrm_init(bitstrm_t *ps_bitstrm, UWORD8 *pu1_bitstrm_buf, UWORD32 u4_max_bitstrm_size);
+
+IHEVCE_ERROR_T ihevce_put_bits(bitstrm_t *ps_bitstrm, UWORD32 u4_code_val, WORD32 code_len);
+
+IHEVCE_ERROR_T ihevce_put_bit(bitstrm_t *ps_bitstrm, UWORD32 u4_code_val);
+
+IHEVCE_ERROR_T ihevce_put_rbsp_trailing_bits(bitstrm_t *ps_bitstrm);
+
+IHEVCE_ERROR_T ihevce_put_uev(bitstrm_t *ps_bitstrm, UWORD32 u4_code_num);
+
+IHEVCE_ERROR_T ihevce_put_sev(bitstrm_t *ps_bitstrm, WORD32 syntax_elem);
+
+IHEVCE_ERROR_T
+ ihevce_put_nal_start_code_prefix(bitstrm_t *ps_bitstrm, WORD32 insert_leading_zero_8bits);
+
+#endif /* _IHEVCE_BITSTREAM_H_ */
diff --git a/encoder/ihevce_bs_compute_ctb.c b/encoder/ihevce_bs_compute_ctb.c
new file mode 100644
index 0000000..ecd9aaf
--- /dev/null
+++ b/encoder/ihevce_bs_compute_ctb.c
@@ -0,0 +1,785 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+******************************************************************************
+* @file ihevce_bs_compute_ctb.c
+*
+* @brief
+* This file contains functions needed for boundary strength calculation
+*
+* @author
+* ittiam
+*
+* @List of Functions
+* ihevce_bs_init_ctb()
+* ihevce_bs_compute_ctb()
+* ihevce_bs_clear_invalid()
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_bs_compute_ctb.h"
+#include "ihevce_global_tables.h"
+
+/*****************************************************************************/
+/* Global Tables */
+/*****************************************************************************/
+// clang-format off
+UWORD16 gau2_bs_table[2][8] =
+{
+ { BS_INTRA_4, BS_INTRA_8, BS_INVALID, BS_INTRA_16, BS_INVALID, BS_INVALID, BS_INVALID, BS_INTRA_32 },
+ { BS_CBF_4, BS_CBF_8, BS_INVALID, BS_CBF_16, BS_INVALID, BS_INVALID, BS_INVALID, BS_CBF_32 }
+};
+// clang-format on
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Initialize the Boundary Strength at a CTB level
+*
+* @par Description
+* Initialize the Boundary Strength at a CTB level to zeros*
+*
+* @param[out] ps_deblk_prms
+* Pointer to structure s_deblk_prms, which contains
+* s_deblk_prms.au4_horz_bs : max of 8 such conti. bs to be comp. for 64x64 ctb
+* s_deblk_prms.au4_vert_bs : max of 8 such conti. bs to be comp. for 64x64 ctb
+*
+* @param[in] ctb_size
+* Size in pels (can be 16, 32 or 64)
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ihevce_bs_init_ctb(
+ deblk_bs_ctb_ctxt_t *ps_deblk_prms,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ WORD32 ctb_ctr,
+ WORD32 vert_ctr)
+{
+ WORD32 ctb_size = ps_frm_ctb_prms->i4_ctb_size;
+
+ /* Pointer to the array to store the packed BS values in horizontal dir. */
+ UWORD32 *pu4_horz_bs = &ps_deblk_prms->au4_horz_bs[0];
+ /* Pointer to the array to store the packed BS values in vertical dir. */
+ UWORD32 *pu4_vert_bs = &ps_deblk_prms->au4_vert_bs[0];
+
+ WORD32 i4_top_ctb_tile_id, i4_left_ctb_tile_id;
+ WORD32 *pi4_tile_id_map_temp;
+
+ pi4_tile_id_map_temp = ps_frm_ctb_prms->pi4_tile_id_map +
+ vert_ctr * ps_frm_ctb_prms->i4_tile_id_ctb_map_stride + ctb_ctr;
+
+ i4_left_ctb_tile_id = *(pi4_tile_id_map_temp - 1);
+ i4_top_ctb_tile_id = *(pi4_tile_id_map_temp - ps_frm_ctb_prms->i4_tile_id_ctb_map_stride);
+
+ ps_deblk_prms->u1_not_first_ctb_row_of_frame = (i4_top_ctb_tile_id != -1);
+ ps_deblk_prms->u1_not_first_ctb_col_of_frame = (i4_left_ctb_tile_id != -1);
+
+ /* BS should be set to NULL in the following cases
+ Frame boundaries
+ Edges if deblocking is disabled by disable_deblocking_filter_flag
+ Slice boundaries if deblocking across slices is disabled
+ Tile boundaries if deblocking across slices is disabled
+ These are not considered now, except the frame boundary
+ */
+
+ /* Initializing the bs array to 0. array size = (ctb_size/8 + 1)*4 bytes */
+ memset(pu4_horz_bs, 0, ((ctb_size >> 3) + 1) * sizeof(UWORD32));
+ memset(pu4_vert_bs, 0, ((ctb_size >> 3) + 1) * sizeof(UWORD32));
+}
+
+/**
+*******************************************************************************
+*
+* @brief Calculate the Boundary Strength at CU level
+*
+* @par Description
+* Calculate the Boundary Strength at CU level
+*
+* @param[in] ps_cu_final
+* Pointer to the final CU structure, of which we use the following values
+* u2_num_tus_in_cu : Total TUs in this CU
+* ps_enc_tu : Pointer to first tu of this cu. Each TU need to be
+* populated in TU order.
+* u4_pred_mode_flag : The prediction mode flag for the CU
+* cu_size : CU size in terms of min CU (8x8) units
+* cu_pos_x : X Position of CU in current ctb
+* cu_pos_y : Y Position of CU in current ctb
+* u4_part_mode : Partition information for CU. For inter 0 : @sa PART_SIZE_E
+* ps_pu : Pointer to first pu of this cu
+*
+* @param[in] ps_top_nbr_4x4
+* Pointer to top 4x4 CU nbr structure
+*
+* @param[in] ps_left_nbr_4x4
+* Pointer to left 4x4 CU nbr structure
+*
+* @param[in] ps_curr_nbr_4x4
+* Pointer to current 4x4 ctb structure
+*
+* @param[in] nbr_4x4_left_strd
+* Left nbr buffer stride in terms of 4x4 units
+*
+* @param[in] num_4x4_in_ctb
+* Current buffer stride in terms of 4x4 units
+*
+* @param[out] ps_deblk_prms
+* Pointer to structure s_deblk_prms, which contains
+* s_deblk_prms.au4_horz_bs : max of 8 such conti. bs to be comp. for 64x64 ctb
+* s_deblk_prms.au4_vert_bs : max of 8 such conti. bs to be comp. for 64x64 ctb
+*
+* @returns none
+*
+* @remarks
+* 1 : Setting all 4 edges for a TU or PU block. Which is inefficient in
+* a) may set the BS twice b) set the frame/slice boundaries
+* 2 : always update BS using bit-wise OR, which may set BS to 3 also.
+* ( Deblocking should take care of it as 2 itself )
+*
+*******************************************************************************
+*/
+void ihevce_bs_compute_cu(
+ cu_enc_loop_out_t *ps_cu_final,
+ nbr_4x4_t *ps_top_nbr_4x4,
+ nbr_4x4_t *ps_left_nbr_4x4,
+ nbr_4x4_t *ps_curr_nbr_4x4,
+ WORD32 nbr_4x4_left_strd,
+ WORD32 num_4x4_in_ctb,
+ deblk_bs_ctb_ctxt_t *ps_deblk_prms)
+{
+ WORD32 i;
+ WORD32 j;
+ /* copy required arguments from pointer to CU structure */
+ /* Total TUs in this CU */
+ UWORD16 u2_num_tus_in_cu = ps_cu_final->u2_num_tus_in_cu;
+ /* Pointer to first tu of this cu */
+ tu_enc_loop_out_t *ps_enc_tu = ps_cu_final->ps_enc_tu;
+ /* The prediction mode flag for the CU */
+ UWORD32 u4_pred_mode_flag = ps_cu_final->b1_pred_mode_flag;
+ /* X Position of CU in current ctb in (8x8) units */
+ WORD32 cu_pos_x = ps_cu_final->b3_cu_pos_x;
+ /* Y Position of CU in current ctb in (8x8) units */
+ WORD32 cu_pos_y = ps_cu_final->b3_cu_pos_y;
+
+ /* Indicates partition information for CU */
+ UWORD32 u4_part_mode = ps_cu_final->b3_part_mode;
+
+ /* Pointer to first pu of this cu */
+ pu_t *ps_pu = ps_cu_final->ps_pu;
+
+ /* Number of pus in current cu */
+ WORD32 num_pus_in_cu;
+ /* Pointer to the array to store the packed BS values in horizontal dir. */
+ UWORD32 *pu4_horz_bs = &ps_deblk_prms->au4_horz_bs[0];
+ /* Pointer to the array to store the packed BS values in vertical dir. */
+ UWORD32 *pu4_vert_bs = &ps_deblk_prms->au4_vert_bs[0];
+
+ (void)ps_curr_nbr_4x4;
+ (void)num_4x4_in_ctb;
+
+ /* CTB boundary case setting the BS for intra and cbf non zero case for CU top edge */
+ if((ps_deblk_prms->u1_not_first_ctb_row_of_frame) && (0 == ps_cu_final->b3_cu_pos_y))
+ {
+ nbr_4x4_t *ps_nbr_4x4;
+ UWORD32 u4_temp_bs = *pu4_horz_bs;
+ WORD32 horz_bit_offset;
+ WORD32 ctr;
+
+ /* every 4x4 takes 2 bits in the register this is taken care in the loop */
+ /* deriving 4x4 position */
+ horz_bit_offset = (ps_cu_final->b3_cu_pos_x << 3) >> 2;
+
+ /* scanning through each 4x4 csb along horizontal direction */
+ for(ctr = 0; ctr < ((ps_cu_final->b4_cu_size << 3) >> 2); ctr++)
+ {
+ ps_nbr_4x4 = ps_top_nbr_4x4 + ctr;
+ if(ps_nbr_4x4->b1_intra_flag)
+ {
+ /* To store in BigEnd. format. BS[0]|BS[1]| .. |BS[15] */
+ u4_temp_bs = (u4_temp_bs | (2U << (30 - 2 * (ctr + horz_bit_offset))));
+ }
+ else if(ps_nbr_4x4->b1_y_cbf)
+ {
+ /* To store in BigEnd. format. BS[0]|BS[1]| .. |BS[15] */
+ u4_temp_bs = (u4_temp_bs | (1 << (30 - 2 * (ctr + horz_bit_offset))));
+ }
+ }
+
+ /* storing the BS computed for first row based on top ctb CUs */
+ *(pu4_horz_bs) = u4_temp_bs;
+ }
+
+ /* CTB boundary case setting the BS for intra and cbf non zero case for CU left edge */
+ if((ps_deblk_prms->u1_not_first_ctb_col_of_frame) && (0 == ps_cu_final->b3_cu_pos_x))
+ {
+ nbr_4x4_t *ps_nbr_4x4;
+ UWORD32 u4_temp_bs = *pu4_vert_bs;
+ WORD32 vert_bit_offset;
+ WORD32 ctr;
+
+ /* every 4x4 takes 2 bits in the register this is taken care in the loop */
+ /* deriving 4x4 position */
+ vert_bit_offset = (ps_cu_final->b3_cu_pos_y << 3) >> 2;
+
+ /* scanning through each 4x4 csb along vertical direction */
+ for(ctr = 0; ctr < ((ps_cu_final->b4_cu_size << 3) >> 2); ctr++)
+ {
+ ps_nbr_4x4 = ps_left_nbr_4x4 + ctr * nbr_4x4_left_strd;
+ if(ps_nbr_4x4->b1_intra_flag)
+ {
+ /* To store in BigEnd. format. BS[0]|BS[1]| .. |BS[15] */
+ u4_temp_bs = (u4_temp_bs | (2U << (30 - 2 * (ctr + vert_bit_offset))));
+ }
+ else if(ps_nbr_4x4->b1_y_cbf)
+ {
+ /* To store in BigEnd. format. BS[0]|BS[1]| .. |BS[15] */
+ u4_temp_bs = (u4_temp_bs | (1 << (30 - 2 * (ctr + vert_bit_offset))));
+ }
+ }
+
+ /* storing the BS computed for first col based on left ctb Cus */
+ *(pu4_vert_bs) = u4_temp_bs;
+ }
+
+ /* Passes through each TU inside the CU */
+ for(i = 0; i < u2_num_tus_in_cu; i++)
+ {
+ UWORD32 u4_tu_pos_x, u4_tu_pos_y;
+ UWORD32 u4_tu_size;
+ UWORD32 *pu4_tu_top_edge;
+ UWORD32 *pu4_tu_bottom_edge;
+ UWORD32 *pu4_tu_left_edge;
+ UWORD32 *pu4_tu_right_edge;
+ UWORD32 u4_bs_value;
+ WORD32 set_bs_flag = 0;
+ WORD32 tbl_idx = 1;
+
+ /* TU_size calculation */
+ u4_tu_size = 1 << ((ps_enc_tu->s_tu.b3_size) + 2);
+
+ /* TU X position in terms of min TU (4x4) units wrt ctb */
+ u4_tu_pos_x = ps_enc_tu->s_tu.b4_pos_x;
+ /* TU Y position in terms of min TU (4x4) units wrt ctb */
+ u4_tu_pos_y = ps_enc_tu->s_tu.b4_pos_y;
+
+ /* pointers to the edges of current TU */
+ pu4_tu_top_edge = pu4_horz_bs + (u4_tu_pos_y >> 1);
+ pu4_tu_bottom_edge = pu4_horz_bs + ((u4_tu_pos_y + 1) >> 1) + (u4_tu_size >> 3);
+ pu4_tu_left_edge = pu4_vert_bs + (u4_tu_pos_x >> 1);
+ pu4_tu_right_edge = pu4_vert_bs + ((u4_tu_pos_x + 1) >> 1) + (u4_tu_size >> 3);
+
+ /* chooose the table index based on pred_mode */
+ if(PRED_MODE_INTRA == u4_pred_mode_flag)
+ {
+ tbl_idx = 0;
+ }
+
+ /* get the BS value from table if required */
+ if((ps_enc_tu->s_tu.b1_y_cbf) || (PRED_MODE_INTRA == u4_pred_mode_flag))
+ {
+ set_bs_flag = 1;
+ u4_bs_value = gau2_bs_table[tbl_idx][(u4_tu_size >> 2) - 1];
+ }
+
+ if(1 == set_bs_flag)
+ {
+ /* Store the BS value */
+ if(4 == u4_tu_size)
+ {
+ if(0 == (u4_tu_pos_y & 1))
+ {
+ /* Only top TU edge came on a 8 pixel bounadey */
+ SET_VALUE_BIG((pu4_tu_top_edge), u4_bs_value, u4_tu_pos_x, u4_tu_size);
+ }
+ else
+ {
+ /* Only bottom TU edge came on a 8 pixel bounadey */
+ SET_VALUE_BIG((pu4_tu_bottom_edge), u4_bs_value, u4_tu_pos_x, u4_tu_size);
+ }
+ if(0 == (u4_tu_pos_x & 1))
+ {
+ /* Only left TU edge came on a 8 pixel bounadey */
+ SET_VALUE_BIG((pu4_tu_left_edge), u4_bs_value, u4_tu_pos_y, u4_tu_size);
+ }
+ else
+ {
+ /* Only right TU edge came on a 8 pixel bounadey */
+ SET_VALUE_BIG((pu4_tu_right_edge), u4_bs_value, u4_tu_pos_y, u4_tu_size);
+ }
+ }
+ /* set all edges for other TU sizes */
+ else
+ {
+ /* setting top TU edge */
+ SET_VALUE_BIG((pu4_tu_top_edge), u4_bs_value, u4_tu_pos_x, u4_tu_size);
+ /* setting bottom TU edge */
+ SET_VALUE_BIG((pu4_tu_bottom_edge), u4_bs_value, u4_tu_pos_x, u4_tu_size);
+ /* setting left TU edge */
+ SET_VALUE_BIG((pu4_tu_left_edge), u4_bs_value, u4_tu_pos_y, u4_tu_size);
+ /* setting right TU edge */
+ SET_VALUE_BIG((pu4_tu_right_edge), u4_bs_value, u4_tu_pos_y, u4_tu_size);
+ }
+ }
+
+ /* point to next TU inside CU in TU order */
+ ps_enc_tu++;
+ }
+
+ if(PRED_MODE_INTRA == u4_pred_mode_flag)
+ {
+ /* no mv based BS computation in INTRA case */
+ return;
+ }
+ /* BS update due to PU mv.s */
+ if(u4_part_mode == SIZE_2Nx2N) /* symmetric motion partition, 2Nx2N */
+ {
+ num_pus_in_cu = 1;
+ }
+ else if(u4_part_mode == SIZE_NxN) /* symmetric motion partition, NxN */
+ {
+ num_pus_in_cu = 4;
+ }
+ else /* other sym. or asym. partiotions */
+ {
+ num_pus_in_cu = 2;
+ }
+
+ /* Go through each PU inside CU in PU order and set the top & bottom */
+ /* PU edge BS accordingly */
+ for(i = 0; i < num_pus_in_cu; i++)
+ {
+ WORD32 k;
+ /* X Position of PU in terms of min PU (4x4) units in current ctb */
+ WORD32 pu_pos_x = ps_pu->b4_pos_x;
+ /* Y Position of PU in terms of min PU (4x4) units in current ctb */
+ WORD32 pu_pos_y = ps_pu->b4_pos_y;
+ /* PU width in 4 pixel unit */
+ WORD32 pu_wd = (ps_pu->b4_wd) + 1;
+ /* PU height in 4 pixel unit */
+ WORD32 pu_ht = (ps_pu->b4_ht) + 1;
+ /* Pred L0 flag */
+ WORD32 cur_pred_l0_flag;
+ /* pointer to current PU */
+ nbr_4x4_t *ps_curr_nbr_4x4_pu;
+
+ /* go through each 4x4 block along the PU edges and do BS calculation */
+ /* can optimize further with proper checks according to PU size */
+ /* but in that case also @CTB boundary, we should go by 4x4 nbr.s only*/
+
+ /* load cur. PU parameters */
+ WORD8 i1_cur_l0_ref_pic_buf_id, i1_cur_l1_ref_pic_buf_id;
+ WORD32 cur_mv_no;
+ WORD16 i2_mv_x0, i2_mv_y0, i2_mv_x1, i2_mv_y1;
+
+ ps_curr_nbr_4x4_pu = ps_curr_nbr_4x4 + (pu_pos_x - (cu_pos_x << 1)) +
+ (pu_pos_y - (cu_pos_y << 1)) * num_4x4_in_ctb;
+
+ cur_pred_l0_flag = ps_curr_nbr_4x4_pu->b1_pred_l0_flag;
+
+ /* L0 & L1 unique ref. pic. id for cur. PU, (stored in upper 4 bits) */
+ i1_cur_l0_ref_pic_buf_id = (ps_curr_nbr_4x4_pu->mv.i1_l0_ref_pic_buf_id);
+ i1_cur_l1_ref_pic_buf_id = (ps_curr_nbr_4x4_pu->mv.i1_l1_ref_pic_buf_id);
+
+ /* Number of motion vectors used for cur. PU */
+ cur_mv_no = cur_pred_l0_flag + ps_curr_nbr_4x4_pu->b1_pred_l1_flag;
+
+ /* x and y mv for L0 and L1, for cur. PU */
+ i2_mv_x0 = ps_curr_nbr_4x4_pu->mv.s_l0_mv.i2_mvx;
+ i2_mv_y0 = ps_curr_nbr_4x4_pu->mv.s_l0_mv.i2_mvy;
+ i2_mv_x1 = ps_curr_nbr_4x4_pu->mv.s_l1_mv.i2_mvx;
+ i2_mv_y1 = ps_curr_nbr_4x4_pu->mv.s_l1_mv.i2_mvy;
+
+ /* two cases for updating TOP and LEFT edges respectively */
+ /* k = 0 : TOP edge update, k = 1 : LEFT edge update */
+ for(k = 0; k < 2; k++)
+ {
+ WORD32 pu_pos_pointer_calc, pu_pos_bit_calc;
+ UWORD32 *pu4_pu_cur_edge;
+ WORD32 pu_dim, nbr_inc;
+ nbr_4x4_t *ps_nbr_4x4;
+
+ /* TOP edge case */
+ if(0 == k)
+ {
+ pu_pos_pointer_calc = pu_pos_y;
+ pu_pos_bit_calc = pu_pos_x;
+ pu4_pu_cur_edge = pu4_horz_bs + (pu_pos_y >> 1);
+ pu_dim = pu_wd;
+
+ /* top neighbours are accessed linearly */
+ nbr_inc = 1;
+
+ /* If the current 4x4 csb is in the first row of CTB */
+ if(0 == pu_pos_pointer_calc)
+ { /* then need to check if top CTB is physically available */
+ /* (slice bound. are considered as availabale) */
+ if(ps_deblk_prms->u1_not_first_ctb_row_of_frame)
+ {
+ ps_nbr_4x4 = ps_top_nbr_4x4 + (nbr_inc * (pu_pos_x - (cu_pos_x << 1)));
+ }
+ else
+ {
+ /* This is done for avoiding uninitialized memory access at pic. boundaries*/
+ ps_nbr_4x4 = ps_curr_nbr_4x4_pu;
+ }
+ }
+ /* within ctb, so top neighbour is available */
+ else
+ {
+ ps_nbr_4x4 = ps_curr_nbr_4x4_pu - num_4x4_in_ctb;
+ }
+ }
+ /* LEFT edge case */
+ else
+ {
+ pu_pos_pointer_calc = pu_pos_x;
+ pu_pos_bit_calc = pu_pos_y;
+ pu4_pu_cur_edge = pu4_vert_bs + (pu_pos_x >> 1);
+ pu_dim = pu_ht;
+
+ /* left neighbours are accessed using stride */
+ nbr_inc = nbr_4x4_left_strd;
+
+ /* If the current 4x4 csb is in the first col of CTB */
+ if(0 == pu_pos_pointer_calc)
+ { /* then need to check if left CTB is available */
+ if(ps_deblk_prms->u1_not_first_ctb_col_of_frame)
+ {
+ ps_nbr_4x4 = ps_left_nbr_4x4 + (nbr_inc * (pu_pos_y - (cu_pos_y << 1)));
+ }
+ else
+ {
+ /* This is done for avoiding uninitialized memory access at pic. boundaries*/
+ ps_nbr_4x4 = ps_curr_nbr_4x4_pu;
+ nbr_inc = num_4x4_in_ctb;
+ }
+ }
+ /* within ctb, so left neighbour is available */
+ else
+ {
+ ps_nbr_4x4 = ps_curr_nbr_4x4_pu - 1;
+ nbr_inc = num_4x4_in_ctb;
+ }
+ }
+
+ /* Only if the current edge falls on 8 pixel grid and ... */
+ if(0 == (pu_pos_pointer_calc & 1))
+ {
+ /* go through the edge in 4x4 unit. Can be optimized */
+ /* In that case special case for CTB boundary */
+ for(j = 0; j < pu_dim; j++)
+ {
+ //nbr_4x4_t *ps_temp_nbr_4x4;
+
+ /* ... and if the BS not set yet */
+ if(0 == EXTRACT_VALUE_BIG(pu4_pu_cur_edge, (pu_pos_bit_calc + j)))
+ {
+ WORD8 i1_nbr_l0_ref_pic_buf_id, i1_nbr_l1_ref_pic_buf_id;
+ WORD32 nbr_mv_no;
+ WORD32 bs_flag = 0;
+ WORD32 nbr_pred_l0_flag = ps_nbr_4x4->b1_pred_l0_flag;
+
+ /* L0 & L1 unique ref. pic. id for nbr. csb, in upper 4 bits */
+ i1_nbr_l0_ref_pic_buf_id = (ps_nbr_4x4->mv.i1_l0_ref_pic_buf_id);
+ i1_nbr_l1_ref_pic_buf_id = (ps_nbr_4x4->mv.i1_l1_ref_pic_buf_id);
+
+ /* Number of motion vectors used */
+ nbr_mv_no = nbr_pred_l0_flag + ps_nbr_4x4->b1_pred_l1_flag;
+
+ /* If diff. no. of motion vectors used */
+ if(cur_mv_no != nbr_mv_no)
+ {
+ bs_flag = 1;
+ }
+ /* If One motion vector is used */
+ else if(1 == cur_mv_no)
+ {
+ WORD16 i2_mv_x, i2_mv_y;
+
+ if(cur_pred_l0_flag)
+ { /* L0 used for cur. */
+ if(nbr_pred_l0_flag)
+ { /* L0 used for nbr. */
+ if(i1_cur_l0_ref_pic_buf_id != i1_nbr_l0_ref_pic_buf_id)
+ {
+ /* reference pictures used are different */
+ bs_flag = 1;
+ }
+ }
+ else
+ { /* L1 used for nbr. */
+ if(i1_cur_l0_ref_pic_buf_id != i1_nbr_l1_ref_pic_buf_id)
+ {
+ /* reference pictures used are different */
+ bs_flag = 1;
+ }
+ }
+ if(!bs_flag)
+ {
+ i2_mv_x = i2_mv_x0;
+ i2_mv_y = i2_mv_y0;
+ }
+ }
+ else
+ { /* L1 used for cur. */
+ if(nbr_pred_l0_flag)
+ { /* L0 used for nbr. */
+ if(i1_cur_l1_ref_pic_buf_id != i1_nbr_l0_ref_pic_buf_id)
+ {
+ /* reference pictures used are different */
+ bs_flag = 1;
+ }
+ }
+ else
+ { /* L1 used for nbr. */
+ if(i1_cur_l1_ref_pic_buf_id != i1_nbr_l1_ref_pic_buf_id)
+ {
+ /* reference pictures used are different */
+ bs_flag = 1;
+ }
+ }
+ if(!bs_flag)
+ {
+ i2_mv_x = i2_mv_x1;
+ i2_mv_y = i2_mv_y1;
+ }
+ }
+
+ if(!bs_flag)
+ {
+ WORD16 i2_nbr_mv_x, i2_nbr_mv_y;
+
+ if(nbr_pred_l0_flag)
+ {
+ i2_nbr_mv_x = ps_nbr_4x4->mv.s_l0_mv.i2_mvx;
+ i2_nbr_mv_y = ps_nbr_4x4->mv.s_l0_mv.i2_mvy;
+ }
+ else
+ {
+ i2_nbr_mv_x = ps_nbr_4x4->mv.s_l1_mv.i2_mvx;
+ i2_nbr_mv_y = ps_nbr_4x4->mv.s_l1_mv.i2_mvy;
+ }
+ // clang-format off
+ bs_flag =
+ (abs(i2_mv_x - i2_nbr_mv_x) < 4) &&
+ (abs(i2_mv_y - i2_nbr_mv_y) < 4)
+ ? 0
+ : 1;
+ // clang-format on
+ }
+ }
+ /* If two motion vectors are used */
+ else if(2 == cur_mv_no)
+ {
+ /* check whether same reference pictures used */
+ if((i1_cur_l0_ref_pic_buf_id == i1_nbr_l0_ref_pic_buf_id &&
+ i1_cur_l1_ref_pic_buf_id == i1_nbr_l1_ref_pic_buf_id) ||
+ (i1_cur_l0_ref_pic_buf_id == i1_nbr_l1_ref_pic_buf_id &&
+ i1_cur_l1_ref_pic_buf_id == i1_nbr_l0_ref_pic_buf_id))
+ {
+ WORD16 i2_nbr_mv_x0, i2_nbr_mv_y0, i2_nbr_mv_x1, i2_nbr_mv_y1;
+
+ /* x and y mv for L0 and L1, for nbr. csb*/
+ i2_nbr_mv_x0 = ps_nbr_4x4->mv.s_l0_mv.i2_mvx;
+ i2_nbr_mv_y0 = ps_nbr_4x4->mv.s_l0_mv.i2_mvy;
+ i2_nbr_mv_x1 = ps_nbr_4x4->mv.s_l1_mv.i2_mvx;
+ i2_nbr_mv_y1 = ps_nbr_4x4->mv.s_l1_mv.i2_mvy;
+
+ /* Different L0 and L1 */
+ if(i1_cur_l0_ref_pic_buf_id != i1_cur_l1_ref_pic_buf_id)
+ {
+ if(i1_cur_l0_ref_pic_buf_id == i1_nbr_l0_ref_pic_buf_id)
+ {
+ // clang-format off
+ bs_flag =
+ (abs(i2_mv_x0 - i2_nbr_mv_x0) < 4) &&
+ (abs(i2_mv_y0 - i2_nbr_mv_y0) < 4) &&
+ (abs(i2_mv_x1 - i2_nbr_mv_x1) < 4) &&
+ (abs(i2_mv_y1 - i2_nbr_mv_y1) < 4)
+ ? 0
+ : 1;
+ // clang-format on
+ }
+ else
+ {
+ // clang-format off
+ bs_flag =
+ (abs(i2_mv_x0 - i2_nbr_mv_x1) < 4) &&
+ (abs(i2_mv_y0 - i2_nbr_mv_y1) < 4) &&
+ (abs(i2_mv_x1 - i2_nbr_mv_x0) < 4) &&
+ (abs(i2_mv_y1 - i2_nbr_mv_y0) < 4)
+ ? 0
+ : 1;
+ // clang-format on
+ }
+ }
+ else /* Same L0 and L1 */
+ {
+ // clang-format off
+ bs_flag =
+ ((abs(i2_mv_x0 - i2_nbr_mv_x0) >= 4) ||
+ (abs(i2_mv_y0 - i2_nbr_mv_y0) >= 4) ||
+ (abs(i2_mv_x1 - i2_nbr_mv_x1) >= 4) ||
+ (abs(i2_mv_y1 - i2_nbr_mv_y1) >= 4)) &&
+ ((abs(i2_mv_x0 - i2_nbr_mv_x1) >= 4) ||
+ (abs(i2_mv_y0 - i2_nbr_mv_y1) >= 4) ||
+ (abs(i2_mv_x1 - i2_nbr_mv_x0) >= 4) ||
+ (abs(i2_mv_y1 - i2_nbr_mv_y0) >= 4))
+ ? 1
+ : 0;
+ // clang-format on
+ }
+ }
+ else /* If the reference pictures used are different */
+ {
+ bs_flag = 1;
+ }
+ }
+
+ if(bs_flag)
+ { /*Storing if BS set due to PU mvs */
+ /*Storing in BigEnd. format. BS[0]|BS[1]| .. |BS[15] & edge_size is 4*/
+ SET_VALUE_BIG((pu4_pu_cur_edge), BS_CBF_4, (pu_pos_bit_calc + j), 4);
+ }
+ }
+
+ /* increment the neighbour */
+ ps_nbr_4x4 += nbr_inc;
+ }
+ }
+ }
+ /* point to the next PU */
+ ps_pu++;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief Clear the invalid Boundary Strength which may be set by
+* ihevce_bs_compute_cu
+*
+* @par Description
+* Clear the invalid Boundary Strength which may be set by ihevce_bs_compute_cu
+* (as it does all 4 edges in a shot for some cases)
+*
+* @param[out] ps_deblk_prms
+* Pointer to structure s_deblk_prms, which contains
+* s_deblk_prms.au4_horz_bs : max of 8 such conti. bs to be comp. for 64x64 ctb
+* s_deblk_prms.au4_vert_bs : max of 8 such conti. bs to be comp. for 64x64 ctb
+*
+* @param[in] last_ctb_row_flag
+* Flag for checking whether the current CTB is in last ctb_row
+*
+* @param[in] last_ctb_in_row_flag
+* Flag for checking whether the current CTB is the last in current row
+*
+* @param[in] last_hz_ctb_wd
+* Valid Width (pixels) in the last CTB in every row (padding cases)
+*
+* @param[in] last_vt_ctb_ht
+* Valid Height (pixels) in the last CTB row (padding cases)
+*
+* @returns none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ihevce_bs_clear_invalid(
+ deblk_bs_ctb_ctxt_t *ps_deblk_prms,
+ WORD32 last_ctb_row_flag,
+ WORD32 last_ctb_in_row_flag,
+ WORD32 last_hz_ctb_wd,
+ WORD32 last_vt_ctb_ht)
+{
+ /* Rightmost CTB. Right padding may be there */
+ /* clear the last vert BS which might have set by ihevce_bs_compute_cu */
+ if(1 == last_ctb_in_row_flag)
+ {
+ ps_deblk_prms->au4_vert_bs[last_hz_ctb_wd >> 3] = 0;
+ }
+
+ /* Bottommost CTB. Bottom padding may be there */
+ /* clear the last horz BS which might have set by ihevce_bs_compute_cu */
+ if(1 == last_ctb_row_flag)
+ {
+ ps_deblk_prms->au4_horz_bs[last_vt_ctb_ht >> 3] = 0;
+ }
+}
diff --git a/encoder/ihevce_bs_compute_ctb.h b/encoder/ihevce_bs_compute_ctb.h
new file mode 100644
index 0000000..8985def
--- /dev/null
+++ b/encoder/ihevce_bs_compute_ctb.h
@@ -0,0 +1,158 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+*
+* @file ihevce_bs_compute_ctb.h
+*
+* @brief
+* This file contains encoder boundary strength related macros and
+* interface prototypes
+*
+* @author
+* ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_BS_COMPUTE_CTB_H_
+#define _IHEVCE_BS_COMPUTE_CTB_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief defines the BS for a 32x32 TU in INTRA mode
+******************************************************************************
+ */
+#define BS_INTRA_32 0xAAAA
+
+/**
+******************************************************************************
+ * @brief defines the BS for a 16x16 TU in INTRA mode
+******************************************************************************
+ */
+#define BS_INTRA_16 0xAA
+
+/**
+******************************************************************************
+ * @brief defines the BS for a 8x8 TU in INTRA mode
+******************************************************************************
+ */
+#define BS_INTRA_8 0xA
+
+/**
+******************************************************************************
+ * @brief defines the BS for a 4x4 TU in INTRA mode
+******************************************************************************
+ */
+#define BS_INTRA_4 0x2
+
+/**
+******************************************************************************
+ * @brief defines the invalid BS in global array
+******************************************************************************
+ */
+#define BS_INVALID 0xDEAF
+
+/**
+******************************************************************************
+ * @brief defines the BS for a coded inter 32x32 TU
+******************************************************************************
+ */
+#define BS_CBF_32 0x5555
+
+/**
+******************************************************************************
+ * @brief defines the BS for a coded inter 16x16 TU
+******************************************************************************
+ */
+#define BS_CBF_16 0x55
+
+/**
+******************************************************************************
+ * @brief defines the BS for a coded inter 8x8 TU
+******************************************************************************
+ */
+#define BS_CBF_8 0x5
+
+/**
+******************************************************************************
+ * @brief defines the BS for a coded inter 4x4 TU
+******************************************************************************
+ */
+#define BS_CBF_4 0x01
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief Macro to set the value in input pointer with given value starting
+ * from ( 32 - (ip_pos<<1) - (edge_size>>1) ). This is for storing in BigEndian
+ * with 2 bits per 4x4. edge_size in pixels & ip_pos in terms of 4x4
+ * (ip_pos<<1) : since 2bits per ip_pos (which is in 4x4)
+ * (edge_size>>1) : since no. of bits of value is (edge_size>>1), edge_size in pix
+******************************************************************************
+ */
+#define SET_VALUE_BIG(pu4_bs, value, ip_pos, edge_size) \
+ { \
+ *(pu4_bs) = *(pu4_bs) | (value << (32 - (ip_pos << 1) - (edge_size >> 1))); \
+ }
+
+/**
+******************************************************************************
+ * @brief extracts 2 bits starting from (30-2*ip_pos) from the value pointed
+ * by pu4_bs. This is for extracting from a BigEndian stored ip.
+******************************************************************************
+ */
+#define EXTRACT_VALUE_BIG(pu4_bs, ip_pos) (((*(pu4_bs)) >> (30 - 2 * ip_pos)) & 0x3)
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+void ihevce_bs_init_ctb(
+ deblk_bs_ctb_ctxt_t *ps_deblk_prms,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ WORD32 ctb_ctr,
+ WORD32 vert_ctr);
+
+void ihevce_bs_compute_cu(
+ cu_enc_loop_out_t *ps_cu_final,
+ nbr_4x4_t *ps_top_nbr_4x4,
+ nbr_4x4_t *ps_left_nbr_4x4,
+ nbr_4x4_t *ps_curr_nbr_4x4,
+ WORD32 nbr_4x4_left_strd,
+ WORD32 num_4x4_in_ctb,
+ deblk_bs_ctb_ctxt_t *ps_deblk_prms);
+
+void ihevce_bs_clear_invalid(
+ deblk_bs_ctb_ctxt_t *ps_deblk_prms,
+ WORD32 last_ctb_row_flag,
+ WORD32 last_ctb_in_row_flag,
+ WORD32 last_hz_ctb_wd,
+ WORD32 last_vt_ctb_ht);
+
+#endif /* _IHEVCE_BS_COMPUTE_CTB_H_ */
diff --git a/encoder/ihevce_buffer_que.c b/encoder/ihevce_buffer_que.c
new file mode 100644
index 0000000..2f89a1a
--- /dev/null
+++ b/encoder/ihevce_buffer_que.c
@@ -0,0 +1,416 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ihevce_buffer_que.c
+*
+* @brief
+* This file contains all the functions related to Buffer Queue manager
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* ihevce_buff_que_get_mem_recs
+* ihevce_buff_que_get_num_mem_recs
+* ihevce_buff_que_init
+* ihevce_buff_que_get_free_buf
+* ihevce_buff_que_get_next_buf
+* ihevce_buff_que_get_next_reorder_buf
+* ihevce_buff_que_set_buf_prod
+* ihevce_buff_que_rel_buf
+* ihevce_buff_que_get_active_bufs
+* ihevce_buff_que_set_reorder_buf
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System Include Files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <assert.h>
+
+/* User Include Files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_buffer_que_interface.h"
+#include "ihevce_buffer_que_private.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*!
+************************************************************************
+* \brief
+* return number of records used by Buffer Que manager.
+************************************************************************
+*/
+WORD32 ihevce_buff_que_get_num_mem_recs(void)
+{
+ return (NUM_BUFFER_QUE_MEM_RECS);
+}
+
+/*!
+************************************************************************
+* \brief
+* return each record attributes of Buffer Que manager
+************************************************************************
+*/
+WORD32 ihevce_buff_que_get_mem_recs(
+ iv_mem_rec_t *ps_mem_tab, WORD32 max_num_bufs_in_que, WORD32 i4_mem_space)
+{
+ /* Que manager state structure */
+ ps_mem_tab[BUFFER_QUE_CTXT].i4_mem_size = sizeof(buf_que_t);
+ ps_mem_tab[BUFFER_QUE_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+ ps_mem_tab[BUFFER_QUE_CTXT].i4_mem_alignment = 8;
+
+ /* number of users memory */
+ ps_mem_tab[BUFFER_QUE_NUM_USER_MEM].i4_mem_size = (sizeof(WORD32) * max_num_bufs_in_que);
+ ps_mem_tab[BUFFER_QUE_NUM_USER_MEM].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+ ps_mem_tab[BUFFER_QUE_NUM_USER_MEM].i4_mem_alignment = 8;
+
+ /* Produced status memory */
+ ps_mem_tab[BUFFER_QUE_PROD_STS_MEM].i4_mem_size = (sizeof(WORD32) * max_num_bufs_in_que);
+ ps_mem_tab[BUFFER_QUE_PROD_STS_MEM].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+ ps_mem_tab[BUFFER_QUE_PROD_STS_MEM].i4_mem_alignment = 8;
+
+ /* Encode sequence memory */
+ ps_mem_tab[BUFFER_QUE_ENC_SEQ_MEM].i4_mem_size = (sizeof(UWORD32) * max_num_bufs_in_que);
+ ps_mem_tab[BUFFER_QUE_ENC_SEQ_MEM].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+ ps_mem_tab[BUFFER_QUE_ENC_SEQ_MEM].i4_mem_alignment = 8;
+
+ /* Queued sequence memory */
+ ps_mem_tab[BUFFER_QUE_QUED_SEQ_MEM].i4_mem_size = (sizeof(UWORD32) * max_num_bufs_in_que);
+ ps_mem_tab[BUFFER_QUE_QUED_SEQ_MEM].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+ ps_mem_tab[BUFFER_QUE_QUED_SEQ_MEM].i4_mem_alignment = 8;
+
+ return (NUM_BUFFER_QUE_MEM_RECS);
+}
+
+/*!
+************************************************************************
+* \brief
+* Intialization for Buffer Que manager state structure
+************************************************************************
+*/
+void *ihevce_buff_que_init(iv_mem_rec_t *ps_mem_tab, WORD32 num_bufs_in_que, void **ppv_buff_ptrs)
+{
+ buf_que_t *ps_buf_que;
+ WORD32 i;
+
+ /* que manager state structure */
+ ps_buf_que = (buf_que_t *)ps_mem_tab[BUFFER_QUE_CTXT].pv_base;
+
+ /* buffer status memory init */
+ ps_buf_que->pi4_num_users = (WORD32 *)ps_mem_tab[BUFFER_QUE_NUM_USER_MEM].pv_base;
+
+ ps_buf_que->pi4_produced_sts = (WORD32 *)ps_mem_tab[BUFFER_QUE_PROD_STS_MEM].pv_base;
+
+ ps_buf_que->pu4_enc_seq = (UWORD32 *)ps_mem_tab[BUFFER_QUE_ENC_SEQ_MEM].pv_base;
+
+ ps_buf_que->pu4_que_seq = (UWORD32 *)ps_mem_tab[BUFFER_QUE_QUED_SEQ_MEM].pv_base;
+
+ /* reset the state structure variables */
+ ps_buf_que->i4_num_bufs = num_bufs_in_que;
+ ps_buf_que->i4_num_active_bufs = 0;
+ ps_buf_que->u4_last_prod = 0;
+ ps_buf_que->u4_last_cons = 0;
+ ps_buf_que->u4_next_disp_seq = 0;
+ ps_buf_que->u4_last_disp_seq = 0;
+ ps_buf_que->ppv_buff_ptrs = ppv_buff_ptrs;
+
+ /* init all the buffer status to default values */
+ for(i = 0; i < ps_buf_que->i4_num_bufs; i++)
+ {
+ ps_buf_que->pi4_num_users[i] = 0;
+ ps_buf_que->pi4_produced_sts[i] = 0;
+ ps_buf_que->pu4_enc_seq[i] = UINT32_MAX;
+ ps_buf_que->pu4_que_seq[i] = UINT32_MAX;
+ }
+
+ return ((void *)ps_buf_que);
+}
+
+/*!
+**************************************************************************
+* \brief
+* This function gets the next free buffer. This function is called by the
+* Producer to get a free buffer
+**************************************************************************
+*/
+void *ihevce_buff_que_get_free_buf(void *pv_buf_que, WORD32 *pi4_id)
+{
+ buf_que_t *ps_buf_que;
+ WORD32 i;
+ WORD32 num_bufs;
+
+ ps_buf_que = (buf_que_t *)pv_buf_que;
+ num_bufs = ps_buf_que->i4_num_bufs;
+
+ /* loop unitl a free buffer is found */
+ for(i = 0; i < num_bufs; i++)
+ {
+ if((ps_buf_que->pi4_num_users[i] == 0) && (ps_buf_que->pi4_produced_sts[i] == 0))
+ {
+ *(pi4_id) = i;
+ ps_buf_que->pi4_num_users[i] = 1;
+ ps_buf_que->pu4_que_seq[i] = ps_buf_que->u4_last_prod;
+ ps_buf_que->u4_last_prod += 1;
+
+ return (ps_buf_que->ppv_buff_ptrs[i]);
+ }
+ }
+ return (NULL);
+}
+
+/*!
+**************************************************************************
+* \brief
+* This function gets the next buffer in Que . This function will be called by
+* consumer to get the next buffer in Queued order.
+**************************************************************************
+*/
+void *ihevce_buff_que_get_next_buf(void *pv_buf_que, WORD32 *pi4_id)
+{
+ buf_que_t *ps_buf_que;
+ WORD32 i;
+ UWORD32 next_qued_seq;
+
+ ps_buf_que = (buf_que_t *)pv_buf_que;
+
+ /* get the next queued buffer to be sent */
+ next_qued_seq = ps_buf_que->u4_last_cons;
+
+ /* check for matching index */
+ for(i = 0; i < ps_buf_que->i4_num_bufs; i++)
+ {
+ if(next_qued_seq == ps_buf_que->pu4_que_seq[i])
+ {
+ if(1 == ps_buf_que->pi4_produced_sts[i])
+ {
+ *(pi4_id) = i;
+ ps_buf_que->u4_last_cons += 1;
+
+ return (ps_buf_que->ppv_buff_ptrs[i]);
+ }
+ else
+ {
+ break;
+ }
+ }
+ }
+
+ /* Buffer not ready for Consumption */
+ return (NULL);
+}
+
+/*!
+**************************************************************************
+* \brief
+* This function gives the buffer curresponding to the id passed
+**************************************************************************
+*/
+void *ihevce_buff_que_get_buf(void *pv_buf_que, WORD32 i4_id)
+{
+ buf_que_t *ps_buf_que;
+
+ ps_buf_que = (buf_que_t *)pv_buf_que;
+
+ if(i4_id >= ps_buf_que->i4_num_bufs)
+ return (NULL);
+
+ return (ps_buf_que->ppv_buff_ptrs[i4_id]);
+}
+
+/*!
+**************************************************************************
+* \brief
+* This function gets the next buffer for in reordered order. This function
+* will be called by consumer to get the next buffer in reordered order
+**************************************************************************
+*/
+void *ihevce_buff_que_get_next_reorder_buf(void *pv_buf_que, WORD32 *pi4_id)
+{
+ buf_que_t *ps_buf_que;
+ WORD32 i;
+ UWORD32 next_disp_seq;
+
+ ps_buf_que = (buf_que_t *)pv_buf_que;
+
+ /* get the next reordered buffer to be sent */
+ next_disp_seq = ps_buf_que->u4_last_disp_seq;
+
+ /* check for matching index */
+ for(i = 0; i < ps_buf_que->i4_num_bufs; i++)
+ {
+ if(next_disp_seq == ps_buf_que->pu4_enc_seq[i])
+ {
+ *(pi4_id) = i;
+ ps_buf_que->u4_last_disp_seq += 1;
+
+ return (ps_buf_que->ppv_buff_ptrs[i]);
+ }
+ }
+
+ /* Buffer not ready for Consumption */
+ return (NULL);
+}
+
+/*!
+**************************************************************************
+* \brief
+* This function sets the buffer as produced. This function will be called
+* by Producer to say that buffer is ready for consumption.
+**************************************************************************
+*/
+WORD32 ihevce_buff_que_set_buf_prod(void *pv_buf_que, WORD32 buf_id, WORD32 num_users)
+{
+ buf_que_t *ps_buf_que;
+
+ ps_buf_que = (buf_que_t *)pv_buf_que;
+
+ if(buf_id < ps_buf_que->i4_num_bufs)
+ {
+ if(ps_buf_que->pi4_produced_sts[buf_id] == 0)
+ {
+ ps_buf_que->pi4_num_users[buf_id] += num_users;
+ ps_buf_que->i4_num_active_bufs += 1;
+ ps_buf_que->pi4_produced_sts[buf_id] = 1;
+
+ return 0;
+ }
+ else
+ {
+ /* Buffer is already marked as Produced */
+ return (-1);
+ }
+ }
+ else
+ {
+ /* Unable to recognize the Buffer ID */
+ return (-1);
+ }
+
+ return (-1);
+}
+
+/*!
+**************************************************************************
+* \brief
+* This function decrements number of users. If Number of users are Zero,
+* then active Buffers in list gets decremented and this buffer is marked
+* unused.
+**************************************************************************
+*/
+WORD32 ihevce_buff_que_rel_buf(void *pv_buf_que, WORD32 buf_id)
+{
+ buf_que_t *ps_buf_que;
+ WORD32 i;
+
+ ps_buf_que = (buf_que_t *)pv_buf_que;
+ i = buf_id;
+
+ /* check if the buf id is less than max num buffers */
+ if(i < ps_buf_que->i4_num_bufs)
+ {
+ if(ps_buf_que->pi4_produced_sts[i] > 0)
+ {
+ /* decrease the number of users */
+ ps_buf_que->pi4_num_users[i] -= 1;
+
+ if(ps_buf_que->pi4_num_users[i] == 0)
+ {
+ if(0 == ps_buf_que->i4_num_active_bufs)
+ {
+ return (-1);
+ }
+
+ ps_buf_que->i4_num_active_bufs -= 1;
+ ps_buf_que->pi4_produced_sts[i] = 0;
+ }
+ return 0;
+ }
+ else
+ {
+ /* Illeagal release of Buffer, No one is using it */
+ return (-1);
+ }
+ }
+
+ /* Unable to recognize the Buffer ID */
+ return (-1);
+}
+
+/*!
+**************************************************************************
+* \brief
+* This function gets number of active buffers.
+**************************************************************************
+*/
+WORD32 ihevce_buff_que_get_active_bufs(void *pv_buf_que)
+{
+ buf_que_t *ps_buf_que;
+
+ ps_buf_que = (buf_que_t *)pv_buf_que;
+ return (ps_buf_que->i4_num_active_bufs);
+}
+
+/*!
+**************************************************************************
+* \brief
+* This function sets the reorder number for given buffer.
+* this will set the order for the consumer who is consuming in reorder order
+**************************************************************************
+*/
+WORD32 ihevce_buff_que_set_reorder_buf(void *pv_buf_que, WORD32 buf_id)
+{
+ buf_que_t *ps_buf_que;
+
+ ps_buf_que = (buf_que_t *)pv_buf_que;
+
+ if(buf_id < ps_buf_que->i4_num_bufs)
+ {
+ WORD32 next_disp_seq = ps_buf_que->u4_next_disp_seq;
+
+ /* increment the seq number */
+ ps_buf_que->u4_next_disp_seq++;
+
+ /* set the reorder number to the corresponding id */
+ ps_buf_que->pu4_enc_seq[buf_id] = next_disp_seq;
+
+ return 0;
+ }
+ else
+ {
+ /* invalid buffer id */
+ return (-1);
+ }
+
+ return (-1);
+}
diff --git a/encoder/ihevce_buffer_que_interface.h b/encoder/ihevce_buffer_que_interface.h
new file mode 100644
index 0000000..63ca764
--- /dev/null
+++ b/encoder/ihevce_buffer_que_interface.h
@@ -0,0 +1,72 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_buffer_que_interface.h
+*
+* \brief
+* This file contains interface prototypes of Buffer Queue manager functions
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_BUFFER_QUE_INTERFACE_H_
+#define _IHEVCE_BUFFER_QUE_INTERFACE_H_
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/* Create APIs */
+WORD32 ihevce_buff_que_get_num_mem_recs(void);
+
+WORD32 ihevce_buff_que_get_mem_recs(
+ iv_mem_rec_t *ps_mem_tab, WORD32 max_num_bufs_in_que, WORD32 i4_mem_space);
+
+void *ihevce_buff_que_init(iv_mem_rec_t *ps_mem_tab, WORD32 num_bufs_in_que, void **ppv_buff_ptrs);
+
+/* Process APIs */
+void *ihevce_buff_que_get_free_buf(void *pv_buf_que, WORD32 *pi4_id);
+
+void *ihevce_buff_que_get_next_buf(void *pv_buf_que, WORD32 *pi4_id);
+
+void *ihevce_buff_que_get_next_reorder_buf(void *pv_buf_que, WORD32 *pi4_id);
+
+WORD32 ihevce_buff_que_set_buf_prod(void *pv_buf_que, WORD32 buf_id, WORD32 num_users);
+/*!< Note :The manager assumes that once a buffer is requested from Q atleast
+ * one consumer will consume it. so num_uers should be 0 when buffer has only
+ * one consumer, In general num_users should be passed as MAX num
+ * consumers - 1 */
+
+WORD32 ihevce_buff_que_rel_buf(void *pv_buf_que, WORD32 buf_id);
+
+WORD32 ihevce_buff_que_get_active_bufs(void *pv_buf_que);
+
+WORD32 ihevce_buff_que_set_reorder_buf(void *pv_buf_que, WORD32 buf_id);
+
+void *ihevce_buff_que_get_buf(void *pv_buf_que, WORD32 i4_id);
+
+#endif /* _IHEVCE_BUFFER_QUE_INTERFACE_H_ */
diff --git a/encoder/ihevce_buffer_que_private.h b/encoder/ihevce_buffer_que_private.h
new file mode 100644
index 0000000..dcaf910
--- /dev/null
+++ b/encoder/ihevce_buffer_que_private.h
@@ -0,0 +1,79 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+******************************************************************************
+* \file ihevce_buffer_que_private.h
+*
+* \brief
+* This file contains private structures & definitions of Buffer Queue manager
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_BUFFER_QUE_PRIVATE_H_
+#define _IHEVCE_BUFFER_QUE_PRIVATE_H_
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+typedef enum
+{
+
+ BUFFER_QUE_CTXT = 0,
+ BUFFER_QUE_NUM_USER_MEM,
+ BUFFER_QUE_PROD_STS_MEM,
+ BUFFER_QUE_ENC_SEQ_MEM,
+ BUFFER_QUE_QUED_SEQ_MEM,
+
+ /* should be last entry */
+ NUM_BUFFER_QUE_MEM_RECS
+
+} BUFFER_QUE_MEM_T;
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+typedef struct
+{
+ UWORD32 u4_next_disp_seq; /*! < Next display sequence number */
+ UWORD32 u4_last_disp_seq; /*! < Last displayed sequence number */
+ UWORD32 u4_last_cons; /*! < last consumed buffer ID */
+ UWORD32 u4_last_prod; /*! < last produced buffer ID */
+ WORD32 i4_num_bufs; /*! < number of buffers */
+ WORD32 i4_num_active_bufs; /*! < number of active buffers */
+
+ UWORD32 *pu4_enc_seq; /*! < Array to store encode seq of each
+ buffer */
+ UWORD32 *pu4_que_seq; /*! < Array to store queued seq of each
+ buffer */
+ void **ppv_buff_ptrs; /*! < Pointer to array of buffer structure */
+ WORD32 *pi4_num_users; /*! < Array to store number of users */
+ WORD32 *pi4_produced_sts; /*! < Array to store produced status */
+} buf_que_t;
+
+#endif //_IHEVCE_BUFFER_QUE_PRIVATE_H_
diff --git a/encoder/ihevce_cabac.c b/encoder/ihevce_cabac.c
new file mode 100644
index 0000000..6c06265
--- /dev/null
+++ b/encoder/ihevce_cabac.c
@@ -0,0 +1,914 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+******************************************************************************
+* @file ihevce_cabac.c
+*
+* @brief
+* This file contains function definitions related to bitstream generation
+*
+* @author
+* ittiam
+*
+* @List of Functions
+* ihevce_cabac_reset()
+* ihevce_cabac_init()
+* ihevce_cabac_put_byte()
+* ihevce_cabac_encode_bin()
+* ihevce_cabac_encode_bypass_bin()
+* ihevce_cabac_encode_terminate()
+* ihevce_cabac_encode_tunary()
+* ihevce_cabac_encode_tunary_bypass()
+* ihevce_cabac_encode_bypass_bins()
+* ihevce_cabac_encode_egk()
+* ihevce_cabac_encode_trunc_rice()
+* ihevce_cabac_encode_trunc_rice_ctxt()
+* ihevce_cabac_flush()
+* ihevce_cabac_ctxt_backup()
+* ihevce_cabac_ctxt_row_init()
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "ihevc_debug.h"
+#include "ihevc_macros.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+
+#define TEST_CABAC_BITESTIMATE 0
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief Resets the encoder cabac engine
+*
+* @par Description
+* This routine needs to be called at start of dependent slice encode
+*
+* @param[inout] ps_cabac_ctxt
+* pointer to cabac context (handle)
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] e_cabac_op_mode
+* opertaing mode of cabac; put bits / compute bits mode @sa CABAC_OP_MODE
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32
+ ihevce_cabac_reset(cab_ctxt_t *ps_cabac, bitstrm_t *ps_bitstrm, CABAC_OP_MODE e_cabac_op_mode)
+{
+ /* Sanity checks */
+ ASSERT(ps_cabac != NULL);
+ ASSERT(
+ (e_cabac_op_mode == CABAC_MODE_ENCODE_BITS) ||
+ (e_cabac_op_mode == CABAC_MODE_COMPUTE_BITS));
+
+ ps_cabac->e_cabac_op_mode = e_cabac_op_mode;
+
+ if(CABAC_MODE_ENCODE_BITS == e_cabac_op_mode)
+ {
+ ASSERT(ps_bitstrm != NULL);
+
+ /* Bitstream context initialization */
+ ps_cabac->pu1_strm_buffer = ps_bitstrm->pu1_strm_buffer;
+ ps_cabac->u4_max_strm_size = ps_bitstrm->u4_max_strm_size;
+ /* When entropy sync is enabled start form fixed offset from point
+ * where slice header extension has ended to handle emulation prevention
+ * bytes during insertion of slice offset at end of frame */
+ if(1 == ps_cabac->i1_entropy_coding_sync_enabled_flag)
+ {
+ ps_cabac->u4_strm_buf_offset = ps_cabac->u4_first_slice_start_offset;
+ }
+ else
+ {
+ ps_cabac->u4_strm_buf_offset = ps_bitstrm->u4_strm_buf_offset;
+ }
+ ps_cabac->i4_zero_bytes_run = ps_bitstrm->i4_zero_bytes_run;
+
+ /* cabac engine initialization */
+ ps_cabac->u4_low = 0;
+ ps_cabac->u4_range = 510;
+ ps_cabac->u4_bits_gen = 0;
+ ps_cabac->u4_out_standing_bytes = 0;
+ }
+ else /* (CABAC_MODE_COMPUTE_BITS == e_cabac_op_mode) */
+ {
+ /* reset the bits estimated */
+ ps_cabac->u4_bits_estimated_q12 = 0;
+
+ /* reset the texture bits estimated */
+ ps_cabac->u4_texture_bits_estimated_q12 = 0;
+
+ /* Setting range to 0 switches off AEV_TRACE in compute bits mode */
+ ps_cabac->u4_range = 0;
+ }
+
+ return (IHEVCE_SUCCESS);
+}
+
+/**
+******************************************************************************
+*
+* @brief Initializes the encoder cabac engine
+*
+* @par Description
+* This routine needs to be called at start of slice/frame encode
+*
+* @param[inout] ps_cabac_ctxt
+* pointer to cabac context (handle)
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] qp
+* current slice qp
+*
+* @param[in] cabac_init_idc
+* current slice init idc (range - [0- 2])*
+*
+* @param[in] e_cabac_op_mode
+* opertaing mode of cabac; put bits / compute bits mode @sa CABAC_OP_MODE
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_cabac_init(
+ cab_ctxt_t *ps_cabac,
+ bitstrm_t *ps_bitstrm,
+ WORD32 slice_qp,
+ WORD32 cabac_init_idc,
+ CABAC_OP_MODE e_cabac_op_mode)
+{
+ /* Sanity checks */
+ ASSERT(ps_cabac != NULL);
+ ASSERT((slice_qp >= 0) && (slice_qp < IHEVC_MAX_QP));
+ ASSERT((cabac_init_idc >= 0) && (cabac_init_idc < 3));
+ ASSERT(
+ (e_cabac_op_mode == CABAC_MODE_ENCODE_BITS) ||
+ (e_cabac_op_mode == CABAC_MODE_COMPUTE_BITS));
+
+ ps_cabac->e_cabac_op_mode = e_cabac_op_mode;
+
+ if(CABAC_MODE_ENCODE_BITS == e_cabac_op_mode)
+ {
+ ASSERT(ps_bitstrm != NULL);
+
+ /* Bitstream context initialization */
+ ps_cabac->pu1_strm_buffer = ps_bitstrm->pu1_strm_buffer;
+ ps_cabac->u4_max_strm_size = ps_bitstrm->u4_max_strm_size;
+ /* When entropy sync is enabled start form fixed offset from point
+ * where slice header extension has ended to handle emulation prevention
+ * bytes during insertion of slice offset at end of frame */
+ if(1 == ps_cabac->i1_entropy_coding_sync_enabled_flag)
+ {
+ ps_cabac->u4_strm_buf_offset = ps_cabac->u4_first_slice_start_offset;
+ }
+ else
+ {
+ ps_cabac->u4_strm_buf_offset = ps_bitstrm->u4_strm_buf_offset;
+ }
+ ps_cabac->i4_zero_bytes_run = ps_bitstrm->i4_zero_bytes_run;
+
+ /* cabac engine initialization */
+ ps_cabac->u4_low = 0;
+ ps_cabac->u4_range = 510;
+ ps_cabac->u4_bits_gen = 0;
+ ps_cabac->u4_out_standing_bytes = 0;
+
+ /* reset the bits estimated */
+ ps_cabac->u4_bits_estimated_q12 = 0;
+
+ /* reset the texture bits estimated */
+ ps_cabac->u4_texture_bits_estimated_q12 = 0;
+ }
+ else /* (CABAC_MODE_COMPUTE_BITS == e_cabac_op_mode) */
+ {
+ /* reset the bits estimated */
+ ps_cabac->u4_bits_estimated_q12 = 0;
+
+ /* reset the texture bits estimated */
+ ps_cabac->u4_texture_bits_estimated_q12 = 0;
+
+ /* Setting range to 0 switches off AEV_TRACE in compute bits mode */
+ ps_cabac->u4_range = 0;
+ }
+
+ /* cabac context initialization based on init idc and slice qp */
+ COPY_CABAC_STATES(
+ ps_cabac->au1_ctxt_models,
+ &gau1_ihevc_cab_ctxts[cabac_init_idc][slice_qp][0],
+ IHEVC_CAB_CTXT_END);
+
+ return (IHEVCE_SUCCESS);
+}
+
+/**
+******************************************************************************
+*
+* @brief Puts new byte (and outstanding bytes) into bitstream after cabac
+* renormalization
+*
+* @par Description
+* 1. Extract the leading byte of low(L)
+* 2. If leading byte=0xff increment outstanding bytes and return
+* (as the actual bits depend on carry propogation later)
+* 3. If leading byte is not 0xff check for any carry propogation
+* 4. Insert the carry (propogated in previous byte) along with outstanding
+* bytes (if any) and leading byte
+*
+*
+* @param[inout] ps_cabac
+* pointer to cabac context (handle)
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_cabac_put_byte(cab_ctxt_t *ps_cabac)
+{
+ UWORD32 u4_low = ps_cabac->u4_low;
+ UWORD32 u4_bits_gen = ps_cabac->u4_bits_gen;
+ WORD32 lead_byte = u4_low >> (u4_bits_gen + CABAC_BITS - 8);
+
+ /* Sanity checks */
+ ASSERT((ps_cabac->u4_range >= 256) && (ps_cabac->u4_range < 512));
+ ASSERT((u4_bits_gen >= 8));
+
+ /* update bits generated and low after extracting leading byte */
+ u4_bits_gen -= 8;
+ ps_cabac->u4_low &= ((1 << (CABAC_BITS + u4_bits_gen)) - 1);
+ ps_cabac->u4_bits_gen = u4_bits_gen;
+
+ /************************************************************************/
+ /* 1. Extract the leading byte of low(L) */
+ /* 2. If leading byte=0xff increment outstanding bytes and return */
+ /* (as the actual bits depend on carry propogation later) */
+ /* 3. If leading byte is not 0xff check for any carry propogation */
+ /* 4. Insert the carry (propogated in previous byte) along with */
+ /* outstanding bytes (if any) and leading byte */
+ /************************************************************************/
+ if(lead_byte == 0xff)
+ {
+ /* actual bits depend on carry propogration */
+ ps_cabac->u4_out_standing_bytes++;
+ return (IHEVCE_SUCCESS);
+ }
+ else
+ {
+ /* carry = 1 => putbit(1); carry propogated due to L renorm */
+ WORD32 carry = (lead_byte >> 8) & 0x1;
+ UWORD8 *pu1_strm_buf = ps_cabac->pu1_strm_buffer;
+ UWORD32 u4_strm_buf_offset = ps_cabac->u4_strm_buf_offset;
+ WORD32 zero_run = ps_cabac->i4_zero_bytes_run;
+ UWORD32 u4_out_standing_bytes = ps_cabac->u4_out_standing_bytes;
+
+ /*********************************************************************/
+ /* Bitstream overflow check */
+ /* NOTE: corner case of epb bytes (max 2 for 32bit word) not handled */
+ /*********************************************************************/
+ if((u4_strm_buf_offset + u4_out_standing_bytes + 1) >= ps_cabac->u4_max_strm_size)
+ {
+ /* return without corrupting the buffer beyond its size */
+ return (IHEVCE_BITSTREAM_BUFFER_OVERFLOW);
+ }
+
+ /*********************************************************************/
+ /* Insert the carry propogated in previous byte */
+ /* */
+ /* Note : Do not worry about corruption into slice header align byte */
+ /* This is because the first bin cannot result in overflow */
+ /*********************************************************************/
+ if(carry)
+ {
+ /* CORNER CASE: if the previous data is 0x000003, then EPB will be inserted
+ and the data will become 0x00000303 and if the carry is present, it will
+ be added with the last byte and it will become 0x00000304 which is not correct
+ as per standard*/
+ /* so check for previous four bytes and if it is equal to 0x00000303
+ then subtract u4_strm_buf_offset by 1 */
+ if(pu1_strm_buf[u4_strm_buf_offset - 1] == 0x03 &&
+ pu1_strm_buf[u4_strm_buf_offset - 2] == 0x03 &&
+ pu1_strm_buf[u4_strm_buf_offset - 3] == 0x00 &&
+ pu1_strm_buf[u4_strm_buf_offset - 4] == 0x00)
+ {
+ u4_strm_buf_offset -= 1;
+ }
+ /* previous byte carry add will not result in overflow to */
+ /* u4_strm_buf_offset - 2 as we track 0xff as outstanding bytes */
+ pu1_strm_buf[u4_strm_buf_offset - 1] += carry;
+ zero_run = 0;
+ }
+
+ /* Insert outstanding bytes (if any) */
+ while(u4_out_standing_bytes)
+ {
+ UWORD8 u1_0_or_ff = carry ? 0 : 0xFF;
+
+ PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, u1_0_or_ff, zero_run);
+
+ u4_out_standing_bytes--;
+ }
+ ps_cabac->u4_out_standing_bytes = 0;
+
+ /* Insert the leading byte */
+ lead_byte &= 0xFF;
+ PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, lead_byte, zero_run);
+
+ /* update the state variables and return success */
+ ps_cabac->u4_strm_buf_offset = u4_strm_buf_offset;
+ ps_cabac->i4_zero_bytes_run = zero_run;
+ return (IHEVCE_SUCCESS);
+ }
+}
+
+/**
+******************************************************************************
+*
+* @brief Codes a bypass bin (equi probable 0 / 1)
+*
+* @par Description
+* After encoding bypass bin, bits gen incremented by 1 and bitstream generated
+*
+* @param[inout] ps_cabac : pointer to cabac context (handle)
+*
+* @param[in] bin : bypass bin(0/1) to be encoded
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_cabac_encode_bypass_bin(cab_ctxt_t *ps_cabac, WORD32 bin)
+{
+ UWORD32 u4_range = ps_cabac->u4_range;
+ UWORD32 u4_low = ps_cabac->u4_low;
+
+ if(CABAC_MODE_ENCODE_BITS == ps_cabac->e_cabac_op_mode)
+ {
+ /* Sanity checks */
+ ASSERT((u4_range >= 256) && (u4_range < 512));
+ ASSERT((bin == 0) || (bin == 1));
+
+ /*Compute bit always to populate the trace*/
+ /* increment bits generated by 1 */
+ ps_cabac->u4_bits_estimated_q12 += (1 << CABAC_FRAC_BITS_Q);
+
+ u4_low <<= 1;
+ /* add range if bin is 1 */
+ if(bin)
+ {
+ u4_low += u4_range;
+ }
+
+ /* 1 bit to be inserted in the bitstream */
+ ps_cabac->u4_bits_gen++;
+ ps_cabac->u4_low = u4_low;
+
+ /* generate stream when a byte is ready */
+ if(ps_cabac->u4_bits_gen > CABAC_BITS)
+ {
+ return (ihevce_cabac_put_byte(ps_cabac));
+ }
+ }
+ else /* (CABAC_MODE_COMPUTE_BITS == e_cabac_op_mode) */
+ {
+ /* increment bits generated by 1 */
+ ps_cabac->u4_bits_estimated_q12 += (1 << CABAC_FRAC_BITS_Q);
+ }
+
+ return (IHEVCE_SUCCESS);
+}
+
+/**
+******************************************************************************
+*
+* @brief Codes a terminate bin (1:terminate 0:do not terminate)
+*
+* @par Description
+* After encoding bypass bin, bits gen incremented by 1 and bitstream generated
+*
+* @param[inout] ps_cabac : pointer to cabac context (handle)
+*
+* @param[in] term_bin : (1:terminate 0:do not terminate)
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32
+ ihevce_cabac_encode_terminate(cab_ctxt_t *ps_cabac, WORD32 term_bin, WORD32 i4_end_of_sub_strm)
+{
+ UWORD32 u4_range = ps_cabac->u4_range;
+ UWORD32 u4_low = ps_cabac->u4_low;
+ UWORD32 u4_rlps;
+ WORD32 shift;
+ WORD32 error = IHEVCE_SUCCESS;
+
+ /* Sanity checks */
+ ASSERT((u4_range >= 256) && (u4_range < 512));
+ ASSERT((term_bin == 0) || (term_bin == 1));
+
+ /* term_bin = 1 has lps range = 2 */
+ u4_rlps = 2;
+ u4_range -= u4_rlps;
+
+ /* if terminate L is incremented by curR and R=2 */
+ if(term_bin)
+ {
+ /* lps path; L= L + R; R = RLPS */
+ u4_low += u4_range;
+ u4_range = u4_rlps;
+ }
+
+ /*****************************************************************/
+ /* Renormalization; calculate bits generated based on range(R) */
+ /* Note : 6 <= R < 512; R is 2 only for terminating encode */
+ /*****************************************************************/
+ GETRANGE(shift, u4_range);
+ shift = 9 - shift;
+ u4_low <<= shift;
+ u4_range <<= shift;
+
+ /* bits to be inserted in the bitstream */
+ ps_cabac->u4_bits_gen += shift;
+ ps_cabac->u4_range = u4_range;
+ ps_cabac->u4_low = u4_low;
+
+ /* generate stream when a byte is ready */
+ if(ps_cabac->u4_bits_gen > CABAC_BITS)
+ {
+ error = ihevce_cabac_put_byte(ps_cabac);
+ }
+
+ if(term_bin)
+ {
+ ihevce_cabac_flush(ps_cabac, i4_end_of_sub_strm);
+ }
+
+ /*Compute bit always to populate the trace*/
+ ps_cabac->u4_bits_estimated_q12 += gau2_ihevce_cabac_bin_to_bits[(62 << 1) | term_bin];
+
+ return (error);
+}
+
+/**
+******************************************************************************
+*
+* @brief Encodes a truncated unary symbol associated with context model(s)
+*
+* @par Description
+* Does binarization of tunary symbol as per sec 9.3.2.2 and does the cabac
+* encoding of each bin. This is used for computing symbols like qp_delta,
+* last_sig_coeff_prefix_x, last_sig_coeff_prefix_y.
+*
+* The context models associated with each bin is computed as :
+* current bin context = "base context idx" + (bin_idx >> shift)
+* where
+* 1. "base context idx" is the base index for the syntax element
+* 2. "bin_idx" is the current bin index of the unary code
+* 3. "shift" is the shift factor associated with this syntax element
+*
+* @param[inout]ps_cabac
+* pointer to cabac context (handle)
+*
+* @param[in] sym
+* syntax element to be coded as truncated unary bins
+*
+* @param[in] c_max
+* maximum value of sym (required for tunary binarization)
+*
+* @param[in] ctxt_index
+* base context model index for this syntax element
+*
+* @param[in] ctxt_shift
+* shift factor for context increments associated with this syntax element
+*
+* @param[in] ctxt_inc_max
+* max value of context increment beyond which all bins will use same ctxt
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_cabac_encode_tunary(
+ cab_ctxt_t *ps_cabac,
+ WORD32 sym,
+ WORD32 c_max,
+ WORD32 ctxt_index,
+ WORD32 ctxt_shift,
+ WORD32 ctxt_inc_max)
+{
+ WORD32 bin_ctxt, i;
+ WORD32 error = IHEVCE_SUCCESS;
+
+ /* Sanity checks */
+ ASSERT(c_max > 0);
+ ASSERT((sym <= c_max) && (sym >= 0));
+ ASSERT((ctxt_index >= 0) && (ctxt_index < IHEVC_CAB_CTXT_END));
+ ASSERT((ctxt_index + (c_max >> ctxt_shift)) < IHEVC_CAB_CTXT_END);
+
+ /* Special case of sym= 0 */
+ if(0 == sym)
+ {
+ return (ihevce_cabac_encode_bin(ps_cabac, 0, ctxt_index));
+ }
+
+ /* write '1' bins */
+ for(i = 0; i < sym; i++)
+ {
+ /* TODO: encode bin to be inlined later */
+ bin_ctxt = ctxt_index + MIN((i >> ctxt_shift), ctxt_inc_max);
+ error |= ihevce_cabac_encode_bin(ps_cabac, 1, bin_ctxt);
+ }
+
+ /* write terminating 0 bin */
+ if(sym < c_max)
+ {
+ /* TODO: encode bin to be inlined later */
+ bin_ctxt = ctxt_index + MIN((i >> ctxt_shift), ctxt_inc_max);
+ error |= ihevce_cabac_encode_bin(ps_cabac, 0, bin_ctxt);
+ }
+
+ return (error);
+}
+
+/**
+******************************************************************************
+*
+* @brief Encodes a syntax element as truncated unary bypass bins
+*
+* @par Description
+* Does binarization of tunary symbol as per sec 9.3.2.2 and does the cabac
+* encoding of each bin. This is used for computing symbols like merge_idx,
+* mpm_idx etc
+*
+* @param[inout]ps_cabac
+* pointer to cabac context (handle)
+*
+* @param[in] sym
+* syntax element to be coded as truncated unary bins
+*
+* @param[in] c_max
+* maximum value of sym (required for tunary binarization)
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_cabac_encode_tunary_bypass(cab_ctxt_t *ps_cabac, WORD32 sym, WORD32 c_max)
+{
+ WORD32 error = IHEVCE_SUCCESS;
+ WORD32 length;
+ WORD32 u4_bins;
+
+ /* Sanity checks */
+ ASSERT(c_max > 0);
+ ASSERT((sym <= c_max) && (sym >= 0));
+
+ if(sym < c_max)
+ {
+ /* unary code with (sym) '1's and terminating '0' bin */
+ length = (sym + 1);
+ u4_bins = (1 << length) - 2;
+ }
+ else
+ {
+ /* tunary code with (sym) '1's */
+ length = sym;
+ u4_bins = (1 << length) - 1;
+ }
+
+ /* Encode the tunary binarized code as bypass bins */
+ error = ihevce_cabac_encode_bypass_bins(ps_cabac, u4_bins, length);
+
+ return (error);
+}
+
+/**
+******************************************************************************
+*
+* @brief Encodes a syntax element as kth order Exp-Golomb code (EGK)
+*
+* @par Description
+* Does binarization of symbol as per sec 9.3.2.4 kth order Exp-Golomb(EGk)
+* process and encodes the resulting bypass bins
+*
+* @param[inout]ps_cabac
+* pointer to cabac context (handle)
+*
+* @param[in] u4_sym
+* syntax element to be coded as EGK
+*
+* @param[in] k
+* order of EGk
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_cabac_encode_egk(cab_ctxt_t *ps_cabac, UWORD32 u4_sym, WORD32 k)
+{
+ WORD32 num_bins, unary_length;
+ UWORD32 u4_sym_shiftk_plus1, u4_egk, u4_unary_bins;
+
+ WORD32 error = IHEVCE_SUCCESS;
+
+ /* Sanity checks */
+ ASSERT((k >= 0));
+ /* ASSERT(u4_sym >= (UWORD32)(1 << k)); */
+
+ /************************************************************************/
+ /* shift symbol by k bits to find unary code prefix (111110) */
+ /* Use GETRANGE to elminate the while loop in sec 9.3.2.4 of HEVC spec */
+ /************************************************************************/
+ u4_sym_shiftk_plus1 = (u4_sym >> k) + 1;
+ /* GETRANGE(unary_length, (u4_sym_shiftk_plus1 + 1)); */
+ GETRANGE(unary_length, u4_sym_shiftk_plus1);
+
+ /* unary code with (unary_length-1) '1's and terminating '0' bin */
+ u4_unary_bins = (1 << unary_length) - 2;
+
+ /* insert the symbol prefix of (unary lenght - 1) bins */
+ u4_egk = (u4_unary_bins << (unary_length - 1)) |
+ (u4_sym_shiftk_plus1 & ((1 << (unary_length - 1)) - 1));
+
+ /* insert last k bits of symbol in the end */
+ u4_egk = (u4_egk << k) | (u4_sym & ((1 << k) - 1));
+
+ /* length of the code = 2 *(unary_length - 1) + 1 + k */
+ num_bins = (2 * unary_length) - 1 + k;
+
+ /* Encode the egk binarized code as bypass bins */
+ error = ihevce_cabac_encode_bypass_bins(ps_cabac, u4_egk, num_bins);
+
+ return (error);
+}
+
+/**
+******************************************************************************
+*
+* @brief Encodes a syntax element as truncated rice code (TR)
+*
+* @par Description
+* Does binarization of symbol as per sec 9.3.2.3 Truncated Rice(TR)
+* binarization process and encodes the resulting bypass bins
+* This function ise used for coeff_abs_level_remaining coding when
+* level is less than c_rice_max
+*
+* @param[inout]ps_cabac
+* pointer to cabac context (handle)
+*
+* @param[in] u4_sym
+* syntax element to be coded as truncated rice code
+*
+* @param[in] c_rice_param
+* shift factor for truncated unary prefix coding of (u4_sym >> c_rice_param)
+*
+* @param[in] c_rice_max
+* max symbol val below which a suffix is coded as (u4_sym%(1<<c_rice_param))
+* This is currently (4 << c_rice_param) for coeff_abs_level_remaining
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_cabac_encode_trunc_rice(
+ cab_ctxt_t *ps_cabac, UWORD32 u4_sym, WORD32 c_rice_param, WORD32 c_rice_max)
+{
+ WORD32 num_bins, unary_length, u4_unary_bins;
+ UWORD32 u4_tr;
+
+ WORD32 error = IHEVCE_SUCCESS;
+
+ (void)c_rice_max;
+ /* Sanity checks */
+ ASSERT((c_rice_param >= 0));
+ ASSERT((UWORD32)c_rice_max > u4_sym);
+
+ /************************************************************************/
+ /* shift symbol by c_rice_param bits to find unary code prefix (111.10) */
+ /************************************************************************/
+ unary_length = (u4_sym >> c_rice_param) + 1;
+
+ /* unary code with (unary_length-1) '1's and terminating '0' bin */
+ u4_unary_bins = (1 << unary_length) - 2;
+
+ /* insert last c_rice_param bits of symbol in the end */
+ u4_tr = (u4_unary_bins << c_rice_param) | (u4_sym & ((1 << c_rice_param) - 1));
+
+ /* length of the code */
+ num_bins = unary_length + c_rice_param;
+
+ /* Encode the tr binarized code as bypass bins */
+ error = ihevce_cabac_encode_bypass_bins(ps_cabac, u4_tr, num_bins);
+
+ return (error);
+}
+
+/**
+******************************************************************************
+*
+* @brief Flushes the cabac encoder engine as per section 9.3.4 figure 9-12
+*
+* @par Description
+*
+*
+* @param[inout] ps_cabac
+* pointer to cabac context (handle)
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_cabac_flush(cab_ctxt_t *ps_cabac, WORD32 i4_end_of_sub_strm)
+{
+ UWORD32 u4_low = ps_cabac->u4_low;
+ UWORD32 u4_bits_gen = ps_cabac->u4_bits_gen;
+
+ UWORD8 *pu1_strm_buf = ps_cabac->pu1_strm_buffer;
+ UWORD32 u4_strm_buf_offset = ps_cabac->u4_strm_buf_offset;
+ WORD32 zero_run = ps_cabac->i4_zero_bytes_run;
+ UWORD32 u4_out_standing_bytes = ps_cabac->u4_out_standing_bytes;
+
+ (void)i4_end_of_sub_strm;
+ /************************************************************************/
+ /* Insert the carry (propogated in previous byte) along with */
+ /* outstanding bytes (if any) and flush remaining bits */
+ /************************************************************************/
+
+ //TODO: Review this function
+ {
+ /* carry = 1 => putbit(1); carry propogated due to L renorm */
+ WORD32 carry = (u4_low >> (u4_bits_gen + CABAC_BITS)) & 0x1;
+ WORD32 last_byte;
+ WORD32 bits_left;
+ WORD32 rem_bits;
+
+ /*********************************************************************/
+ /* Bitstream overflow check */
+ /* NOTE: corner case of epb bytes (max 2 for 32bit word) not handled */
+ /*********************************************************************/
+ if((u4_strm_buf_offset + u4_out_standing_bytes + 1) >= ps_cabac->u4_max_strm_size)
+ {
+ /* return without corrupting the buffer beyond its size */
+ return (IHEVCE_BITSTREAM_BUFFER_OVERFLOW);
+ }
+
+ if(carry)
+ {
+ /* CORNER CASE: if the previous data is 0x000003, then EPB will be inserted
+ and the data will become 0x00000303 and if the carry is present, it will
+ be added with the last byte and it will become 0x00000304 which is not correct
+ as per standard*/
+ /* so check for previous four bytes and if it is equal to 0x00000303
+ then subtract u4_strm_buf_offset by 1 */
+ if(pu1_strm_buf[u4_strm_buf_offset - 1] == 0x03 &&
+ pu1_strm_buf[u4_strm_buf_offset - 2] == 0x03 &&
+ pu1_strm_buf[u4_strm_buf_offset - 3] == 0x00 &&
+ pu1_strm_buf[u4_strm_buf_offset - 4] == 0x00)
+ {
+ u4_strm_buf_offset -= 1;
+ }
+ /* previous byte carry add will not result in overflow to */
+ /* u4_strm_buf_offset - 2 as we track 0xff as outstanding bytes */
+ pu1_strm_buf[u4_strm_buf_offset - 1] += carry;
+ zero_run = 0;
+ }
+
+ /* Insert outstanding bytes (if any) */
+ while(u4_out_standing_bytes)
+ {
+ UWORD8 u1_0_or_ff = carry ? 0 : 0xFF;
+
+ PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, u1_0_or_ff, zero_run);
+
+ u4_out_standing_bytes--;
+ }
+
+ /* clear the carry in low */
+ u4_low &= ((1 << (u4_bits_gen + CABAC_BITS)) - 1);
+
+ /* extract the remaining bits; */
+ /* includes additional msb bit of low as per Figure 9-12 */
+ bits_left = u4_bits_gen + 1;
+ rem_bits = (u4_low >> (u4_bits_gen + CABAC_BITS - bits_left));
+
+ if(bits_left >= 8)
+ {
+ last_byte = (rem_bits >> (bits_left - 8)) & 0xFF;
+ PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, last_byte, zero_run);
+ bits_left -= 8;
+ }
+
+ /* insert last byte along with rbsp stop bit(1) and 0's in the end */
+ last_byte = (rem_bits << (8 - bits_left)) | (1 << (7 - bits_left));
+ last_byte &= 0xFF;
+ PUTBYTE_EPB(pu1_strm_buf, u4_strm_buf_offset, last_byte, zero_run);
+
+ /* update the state variables and return success */
+ ps_cabac->u4_strm_buf_offset = u4_strm_buf_offset;
+ ps_cabac->i4_zero_bytes_run = 0;
+ return (IHEVCE_SUCCESS);
+ }
+}
+
+/**
+******************************************************************************
+*
+* @brief API to backup cabac ctxt at end of 2nd CTB row which is used to init
+* context at start of every row
+*
+* @par Description
+* API to backup cabac ctxt at end of 2nd CTB row which is used to init
+* context at start of every row
+*
+* @param[inout] ps_cabac
+* pointer to cabac context (handle)
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_cabac_ctxt_backup(cab_ctxt_t *ps_cabac)
+{
+ memcpy(
+ ps_cabac->au1_ctxt_models_top_right,
+ ps_cabac->au1_ctxt_models,
+ sizeof(ps_cabac->au1_ctxt_models));
+ return (IHEVCE_SUCCESS);
+}
+
+/**
+******************************************************************************
+*
+* @brief Init cabac ctxt at every row start
+*
+* @par Description
+* API to init cabac ctxt at start of every row when entropy sync is
+* enabled
+*
+* @param[inout] ps_cabac
+* pointer to cabac context (handle)
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_cabac_ctxt_row_init(cab_ctxt_t *ps_cabac)
+{
+ /* cabac engine initialization */
+ ps_cabac->u4_low = 0;
+ ps_cabac->u4_range = 510;
+ ps_cabac->u4_bits_gen = 0;
+ ps_cabac->u4_out_standing_bytes = 0;
+ ps_cabac->i4_zero_bytes_run = 0;
+
+ /*copy top right context as init context when starting to encode a row*/
+ COPY_CABAC_STATES(
+ ps_cabac->au1_ctxt_models, ps_cabac->au1_ctxt_models_top_right, IHEVC_CAB_CTXT_END);
+
+ return (IHEVCE_SUCCESS);
+}
diff --git a/encoder/ihevce_cabac.h b/encoder/ihevce_cabac.h
new file mode 100644
index 0000000..2cce81e
--- /dev/null
+++ b/encoder/ihevce_cabac.h
@@ -0,0 +1,427 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+*
+* @file ihevce_cabac.h
+*
+* @brief
+* This file contains encoder cabac engine related structures and
+* interface prototypes
+*
+* @author
+* ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_CABAC_H_
+#define _IHEVCE_CABAC_H_
+
+#include "ihevc_debug.h"
+#include "ihevc_macros.h"
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+/**
+*******************************************************************************
+@brief Bit precision of cabac engine;
+*******************************************************************************
+ */
+#define CABAC_BITS 9
+
+/**
+*******************************************************************************
+@brief q format to account for the fractional bits encoded in cabac
+*******************************************************************************
+ */
+#define CABAC_FRAC_BITS_Q 12
+
+/**
+*******************************************************************************
+@brief Enables bit-efficient chroma cbf signalling by peeking into cbfs of
+ children nodes
+*******************************************************************************
+ */
+#define CABAC_BIT_EFFICIENT_CHROMA_PARENT_CBF 1
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+@brief converts floating point number to CABAC_FRAC_BITS_Q q format and
+ rounds the results to 16 bit integer
+*******************************************************************************
+ */
+#define ROUND_Q12(x) ((UWORD16)(((x) * (1 << CABAC_FRAC_BITS_Q)) + 0.5))
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+@brief Enums for controlling the operating mode of cabac engine
+*******************************************************************************
+ */
+typedef enum
+{
+ /** in this mode, bits are encoded in the bit stream buffer */
+ CABAC_MODE_ENCODE_BITS = 0,
+
+ /** in this mode, only num bits gen are computed but not put in the stream */
+ CABAC_MODE_COMPUTE_BITS = 1
+
+} CABAC_OP_MODE;
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief Cabac context for encoder
+******************************************************************************
+ */
+typedef struct cab_ctxt
+{
+ /**
+ * indicates if cabac encode works in put bits mode or bit compute mode
+ * In puts bits mode, bitstream and cabac engine fields L,R etc are used
+ * In bit compute mode, bitstream and cabac engine fields are not used
+ */
+ CABAC_OP_MODE e_cabac_op_mode;
+
+ /**
+ * total bits estimated (for a cu) when mode is CABAC_MODE_COMPUTE_BITS
+ * This is in q12 format to account for the fractional bits as well
+ */
+ UWORD32 u4_bits_estimated_q12;
+
+ /**
+ * total texture bits estimated (for a cu) when mode is CABAC_MODE_COMPUTE_BITS
+ * This is in q12 format to account for the fractional bits as well
+ */
+ UWORD32 u4_texture_bits_estimated_q12;
+
+ /**
+ * total header bits estimated (for a cu) when mode is CABAC_MODE_COMPUTE_BITS
+ * This is in q12 format to account for the fractional bits as well
+ */
+ UWORD32 u4_header_bits_estimated_q12;
+
+ UWORD32 u4_cbf_bits_q12;
+
+ UWORD32 u4_true_tu_split_flag_q12;
+ /*********************************************************************/
+ /* CABAC ENGINE related fields; not used in CABAC_MODE_COMPUTE_BITS */
+ /*********************************************************************/
+ /** cabac interval range R */
+ UWORD32 u4_range;
+
+ /** cabac interval start L */
+ UWORD32 u4_low;
+
+ /** bits generated during renormalization
+ * A byte is put to stream/u4_out_standing_bytes from u4_low(L) when
+ * u4_bits_gen exceeds 8
+ */
+ UWORD32 u4_bits_gen;
+
+ /** bytes_outsanding; number of 0xFF bits that occur during renorm
+ * These will be accumulated till the carry bit is knwon
+ */
+ UWORD32 u4_out_standing_bytes;
+
+ /*************************************************************************/
+ /* OUTPUT Bitstream related fields; not used in CABAC_MODE_COMPUTE_BITS */
+ /*************************************************************************/
+ /** points to start of stream buffer. */
+ UWORD8 *pu1_strm_buffer;
+
+ /**
+ * max bitstream size (in bytes).
+ * Encoded stream shall not exceed this size.
+ */
+ UWORD32 u4_max_strm_size;
+
+ /**
+ `* byte offset (w.r.t pu1_strm_buffer) where next byte would be written
+ * Bitstream engine makes sure it would not corrupt data beyond
+ * u4_max_strm_size bytes
+ */
+ UWORD32 u4_strm_buf_offset;
+
+ /**
+ * signifies the number of consecutive zero bytes propogated from previous
+ * word. It is used for emulation prevention byte insertion in the stream
+ */
+ WORD32 i4_zero_bytes_run;
+
+ /*********************************************************************/
+ /* CABAC context models */
+ /*********************************************************************/
+ /** All Context models stored in packed form pState[bits6-1] | MPS[bit0] */
+ UWORD8 au1_ctxt_models[IHEVC_CAB_CTXT_END];
+
+ /**
+ *Cabac context for start of every row which is same as top right ctxt
+ */
+ UWORD8 au1_ctxt_models_top_right[IHEVC_CAB_CTXT_END];
+
+ /**
+ * copy of enable entropy coding sync flag in pps
+ */
+ WORD8 i1_entropy_coding_sync_enabled_flag;
+
+ /**
+ * store the bitstream offset from which first slice data is generated by cabac
+ */
+ UWORD32 u4_first_slice_start_offset;
+
+} cab_ctxt_t;
+
+/*****************************************************************************/
+/* Globals */
+/*****************************************************************************/
+extern UWORD16 gau2_ihevce_cabac_bin_to_bits[64 * 2];
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+WORD32
+ ihevce_cabac_reset(cab_ctxt_t *ps_cabac, bitstrm_t *ps_bitstrm, CABAC_OP_MODE e_cabac_op_mode);
+
+WORD32 ihevce_cabac_init(
+ cab_ctxt_t *ps_cabac,
+ bitstrm_t *ps_bitstrm,
+ WORD32 slice_qp,
+ WORD32 cabac_init_idc,
+ CABAC_OP_MODE e_cabac_op_mode);
+
+WORD32 ihevce_cabac_put_byte(cab_ctxt_t *ps_cabac);
+
+/**
+******************************************************************************
+*
+* @brief Codes a bin based on probablilty and mps packed context model
+*
+* @par Description
+* 1. Apart from encoding bin, context model is updated as per state transition
+* 2. Range and Low renormalization is done based on bin and original state
+* 3. After renorm bistream is updated (if required)
+*
+* @param[inout] ps_cabac
+* pointer to cabac context (handle)
+*
+* @param[in] bin
+* bin(boolean) to be encoded
+*
+* @param[in] ctxt_index
+* index of cabac context model containing pState[bits6-1] | MPS[bit0]
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+static INLINE WORD32 ihevce_cabac_encode_bin(cab_ctxt_t *ps_cabac, WORD32 bin, WORD32 ctxt_index)
+{
+ UWORD32 u4_range = ps_cabac->u4_range;
+ UWORD32 u4_low = ps_cabac->u4_low;
+ UWORD32 u4_rlps;
+ UWORD8 *pu1_ctxt_model = &ps_cabac->au1_ctxt_models[ctxt_index];
+ WORD32 state_mps = *pu1_ctxt_model;
+ WORD32 shift;
+
+ /* Sanity checks */
+ ASSERT((bin == 0) || (bin == 1));
+ ASSERT((ctxt_index >= 0) && (ctxt_index < IHEVC_CAB_CTXT_END));
+ ASSERT(state_mps < 128);
+
+ if(CABAC_MODE_ENCODE_BITS == ps_cabac->e_cabac_op_mode)
+ {
+ ASSERT((u4_range >= 256) && (u4_range < 512));
+
+ /* Get the lps range from LUT based on quantized range and state */
+ u4_rlps = gau1_ihevc_cabac_rlps[state_mps >> 1][(u4_range >> 6) & 0x3];
+
+ u4_range -= u4_rlps;
+
+ /* check if bin is mps or lps */
+ if((state_mps & 0x1) ^ bin)
+ {
+ /* lps path; L= L + R; R = RLPS */
+ u4_low += u4_range;
+ u4_range = u4_rlps;
+ }
+
+ /*Compute bit always to populate the trace*/
+ /* increment bits generated based on state and bin encoded */
+ ps_cabac->u4_bits_estimated_q12 += gau2_ihevce_cabac_bin_to_bits[state_mps ^ bin];
+
+ /* update the context model from state transition LUT */
+ *pu1_ctxt_model = gau1_ihevc_next_state[(state_mps << 1) | bin];
+
+ /*****************************************************************/
+ /* Renormalization; calculate bits generated based on range(R) */
+ /* Note : 6 <= R < 512; R is 2 only for terminating encode */
+ /*****************************************************************/
+ GETRANGE(shift, u4_range);
+ shift = 9 - shift;
+ u4_low <<= shift;
+ u4_range <<= shift;
+
+ /* bits to be inserted in the bitstream */
+ ps_cabac->u4_bits_gen += shift;
+ ps_cabac->u4_range = u4_range;
+ ps_cabac->u4_low = u4_low;
+
+ /* generate stream when a byte is ready */
+ if(ps_cabac->u4_bits_gen > CABAC_BITS)
+ {
+ return (ihevce_cabac_put_byte(ps_cabac));
+ }
+ }
+ else /* (CABAC_MODE_COMPUTE_BITS == e_cabac_op_mode) */
+ {
+ /* increment bits generated based on state and bin encoded */
+ ps_cabac->u4_bits_estimated_q12 += gau2_ihevce_cabac_bin_to_bits[state_mps ^ bin];
+
+ /* update the context model from state transition LUT */
+ *pu1_ctxt_model = gau1_ihevc_next_state[(state_mps << 1) | bin];
+ }
+
+ return (IHEVCE_SUCCESS);
+}
+
+WORD32 ihevce_cabac_encode_bypass_bin(cab_ctxt_t *ps_cabac, WORD32 bin);
+
+WORD32
+ ihevce_cabac_encode_terminate(cab_ctxt_t *ps_cabac, WORD32 term_bin, WORD32 i4_end_of_sub_strm);
+
+/**
+******************************************************************************
+*
+* @brief Encodes a series of bypass bins (FLC bypass bins)
+*
+* @par Description
+* This function is more optimal than calling ihevce_cabac_encode_bypass_bin()
+* in a loop as cabac low, renorm and generating the stream (8bins at a time)
+* can be done in one operation
+*
+* @param[inout]ps_cabac
+* pointer to cabac context (handle)
+*
+* @param[in] u4_sym
+* syntax element to be coded (as FLC bins)
+*
+* @param[in] num_bins
+* This is the FLC length for u4_sym
+*
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+static INLINE WORD32
+ ihevce_cabac_encode_bypass_bins(cab_ctxt_t *ps_cabac, UWORD32 u4_bins, WORD32 num_bins)
+{
+ UWORD32 u4_range = ps_cabac->u4_range;
+ WORD32 next_byte;
+ WORD32 error = IHEVCE_SUCCESS;
+
+ if(CABAC_MODE_ENCODE_BITS == ps_cabac->e_cabac_op_mode)
+ {
+ /* Sanity checks */
+ ASSERT((num_bins < 33) && (num_bins > 0));
+ ASSERT((u4_range >= 256) && (u4_range < 512));
+
+ /*Compute bit always to populate the trace*/
+ /* increment bits generated by num_bins */
+ ps_cabac->u4_bits_estimated_q12 += (num_bins << CABAC_FRAC_BITS_Q);
+
+ /* Encode 8bins at a time and put in the bit-stream */
+ while(num_bins > 8)
+ {
+ num_bins -= 8;
+
+ /* extract the leading 8 bins */
+ next_byte = (u4_bins >> num_bins) & 0xff;
+
+ /* L = (L << 8) + (R * next_byte) */
+ ps_cabac->u4_low <<= 8;
+ ps_cabac->u4_low += (next_byte * u4_range);
+ ps_cabac->u4_bits_gen += 8;
+
+ if(ps_cabac->u4_bits_gen > CABAC_BITS)
+ {
+ /* insert the leading byte of low into stream */
+ error |= ihevce_cabac_put_byte(ps_cabac);
+ }
+ }
+
+ /* Update low with remaining bins and return */
+ next_byte = (u4_bins & ((1 << num_bins) - 1));
+
+ ps_cabac->u4_low <<= num_bins;
+ ps_cabac->u4_low += (next_byte * u4_range);
+ ps_cabac->u4_bits_gen += num_bins;
+
+ if(ps_cabac->u4_bits_gen > CABAC_BITS)
+ {
+ /* insert the leading byte of low into stream */
+ error |= ihevce_cabac_put_byte(ps_cabac);
+ }
+ }
+ else
+ {
+ /* increment bits generated by num_bins */
+ ps_cabac->u4_bits_estimated_q12 += (num_bins << CABAC_FRAC_BITS_Q);
+ }
+
+ return (error);
+}
+
+WORD32 ihevce_cabac_encode_tunary(
+ cab_ctxt_t *ps_cabac,
+ WORD32 sym,
+ WORD32 c_max,
+ WORD32 ctxt_index,
+ WORD32 ctxt_shift,
+ WORD32 ctxt_inc_max);
+
+WORD32 ihevce_cabac_encode_tunary_bypass(cab_ctxt_t *ps_cabac, WORD32 sym, WORD32 c_max);
+
+WORD32 ihevce_cabac_encode_egk(cab_ctxt_t *ps_cabac, UWORD32 u4_sym, WORD32 k);
+
+WORD32 ihevce_cabac_encode_trunc_rice(
+ cab_ctxt_t *ps_cabac, UWORD32 u4_sym, WORD32 c_rice_param, WORD32 c_rice_max);
+
+WORD32 ihevce_cabac_flush(cab_ctxt_t *ps_cabac, WORD32 i4_end_of_sub_strm);
+
+WORD32 ihevce_cabac_ctxt_backup(cab_ctxt_t *ps_cabac);
+
+WORD32 ihevce_cabac_ctxt_row_init(cab_ctxt_t *ps_cabac);
+
+#endif /* _IHEVCE_CABAC_H_ */
diff --git a/encoder/ihevce_cabac_cu_pu.c b/encoder/ihevce_cabac_cu_pu.c
new file mode 100644
index 0000000..355fab0
--- /dev/null
+++ b/encoder/ihevce_cabac_cu_pu.c
@@ -0,0 +1,2264 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+******************************************************************************
+* @file ihevce_cabac_cu_pu.c
+*
+* @brief
+* This file contains function definitions for cabac entropy coding of CU
+* and PU structures in HEVC syntax
+*
+* @author
+* ittiam
+*
+* @List of Functions
+* ihevce_cabac_encode_intra_pu()
+* ihevce_cabac_encode_skip_flag()
+* ihevce_cabac_encode_part_mode()
+* ihevce_cabac_encode_merge_idx()
+* ihevce_cabac_encode_inter_pred_idc()
+* ihevce_cabac_encode_refidx()
+* ihevce_cabac_encode_mvd()
+* ihevce_cabac_encode_inter_pu()
+* ihevce_cabac_encode_coding_unit()
+* ihevce_cabac_encode_sao()
+* ihevce_encode_coding_quadtree()
+* ihevce_encode_slice_data()
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_trace.h"
+
+#define TEST_CABAC_BITESTIMATE 0
+
+// clang-format off
+/**
+******************************************************************************
+* @brief LUT for binarization of inter partmode bins for cu size > mincu size
+* as per Table9-34 of spec
+*
+* @input : amp_enable flag and part_mode
+*
+* @output : packed bins and count of bins as per following bit packed format
+* Bins : (bits3-bit0) first bin starts from bit3
+* Bins Count: (bits7-bit4)
+* 0xFF in the following table is invalid entry
+*
+* @remarks See Table 9-34 of HEVC spec for Binarization of part_mode
+*******************************************************************************
+*/
+#define INVALID 0xFF
+const UWORD8 gu1_hevce_inter_part_mode_bins[2][8] = {
+
+ /* cusize > minCUsize, no amp */
+ { 0x18, 0x24, 0x20, INVALID, INVALID, INVALID, INVALID, INVALID, },
+
+ /* cusize > minCUsize, amp enable, minCUsize > 8 (irrelevant) */
+ { 0x18, 0x36, 0x32, INVALID, 0x44, 0x45, 0x40, 0x41, },
+
+};
+
+/**
+******************************************************************************
+* @brief LUT for binarization of inter partmode bins for cu size = mincu size
+* as per Table9-34 of spec
+*
+* @input : mincusize==8 flag and part_mode
+*
+* @output : packed bins and count of bins as per following bit packed format
+* Bins : (bits3-bit0) first bin starts from bit3
+* Bins Count: (bits7-bit4)
+* 0xFF in the following table is invalid entry
+*
+* @remarks See Table 9-34 of HEVC spec for Binarization of part_mode
+*******************************************************************************
+*/
+const UWORD8 gu1_hevce_inter_part_mode_bins_mincu[2][4] = {
+
+ /* cusize == minCUsize, minCUsize > 8 */
+ { 0x18, 0x24, 0x32, 0x30, },
+
+ /* cusize == minCUsize, minCUsize = 8 */
+ { 0x18, 0x24, 0x20, INVALID },
+
+};
+// clang-format on
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+/**
+******************************************************************************
+*
+* @brief Entropy encoding of luma and chroma intra pred modes
+*
+* @par Description
+* Encodes prev_intra_ped_mode, mpm_idx and rem_intra_pred_mode for each
+* luma partition and chrom intra pred of cu as per section:7.3.9.1
+*
+* Binzarization, context model as per Table 9-32 for luma
+* Binzarization, context model as per Table 9-35, section 9.3.2.8 for chroma
+*
+* @param[inout] ps_entropy_ctxt
+* pointer to entropy context (handle)
+*
+* @param[in] part_mode
+* indicates whether the mode is 2Nx2N or NxN luma parition
+*
+* @param[in] ps_enc_cu
+* pointer to the intra cu whose luma and chroma pred modes are encoded
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_cabac_encode_intra_pu(
+ entropy_context_t *ps_entropy_ctxt, WORD32 part_mode, cu_enc_loop_out_t *ps_enc_cu)
+{
+ WORD32 error = IHEVCE_SUCCESS;
+ cab_ctxt_t *ps_cabac = &ps_entropy_ctxt->s_cabac_ctxt;
+ intra_prev_rem_flags_t *ps_prev_mpm_rem_flags = &ps_enc_cu->as_prev_rem[0];
+ WORD32 i, num_parts;
+
+ /* intra can only be 2Nx2N partition or a NxN partition */
+ num_parts = (PART_NxN == part_mode) ? 4 : 1;
+
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ {
+ WORD32 cu_size = ps_enc_cu->b4_cu_size << 3;
+
+ /*PIC_INFO : INTRA CU in frame*/
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_intra_cu++;
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_pu += num_parts;
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_intra_pu += num_parts;
+ /*PIC_INFO : Total CU in frame based on cu size */
+
+ if(PART_2Nx2N == part_mode)
+ {
+ // clang-format off
+ if(cu_size == 64)
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_2nx2n_intra_pu[3]++;
+ else
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_2nx2n_intra_pu[cu_size >> 4]++;
+ // clang-format on
+ }
+ else if(PART_NxN == part_mode)
+ {
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_nxn_intra_pu++;
+ }
+ }
+ /* encode prev intra pred mode flags : context model based */
+ for(i = 0; i < num_parts; i++)
+ {
+ WORD32 prev_intra_pred_flag = ps_prev_mpm_rem_flags[i].b1_prev_intra_luma_pred_flag;
+ error |=
+ ihevce_cabac_encode_bin(ps_cabac, prev_intra_pred_flag, IHEVC_CAB_INTRA_LUMA_PRED_FLAG);
+ AEV_TRACE("prev_intra_pred_luma_flag", prev_intra_pred_flag, ps_cabac->u4_range);
+ }
+
+ /* encode mpm_idx or rem_intra_pred_mode bypass bins */
+ for(i = 0; i < num_parts; i++)
+ {
+ if(ps_prev_mpm_rem_flags[i].b1_prev_intra_luma_pred_flag)
+ {
+ WORD32 mpm_idx = ps_prev_mpm_rem_flags[i].b2_mpm_idx;
+
+ /* tunary bins for cmax = 2 */
+ WORD32 num_bins = mpm_idx ? 2 : 1;
+ UWORD32 bins = mpm_idx ? ((1 << 1) | (mpm_idx - 1)) : 0;
+
+ ASSERT(mpm_idx < 3);
+
+ error |= ihevce_cabac_encode_bypass_bins(ps_cabac, bins, num_bins);
+ AEV_TRACE("mpm_idx", mpm_idx, ps_cabac->u4_range);
+ }
+ else
+ {
+ WORD32 rem_intra_pred_mode = ps_prev_mpm_rem_flags[i].b5_rem_intra_pred_mode;
+ error |= ihevce_cabac_encode_bypass_bins(ps_cabac, rem_intra_pred_mode, 5);
+ AEV_TRACE("rem_intra_luma_pred_mode", rem_intra_pred_mode, ps_cabac->u4_range);
+ }
+ }
+
+ /************************************************************************/
+ /* encode the chroma intra prediction mode as per Table 9-35 */
+ /* First bin is context model based prefix : 0 if chroma_mode==4 else 1 */
+ /* If chroma pred mode is not 4, suffix bins are coded as bypass bins */
+ /************************************************************************/
+ {
+ WORD32 chroma_pred_mode = ps_enc_cu->b3_chroma_intra_pred_mode;
+ WORD32 prefix_bin = (chroma_pred_mode == 4) ? 0 : 1;
+
+ /* encode prefix bin */
+ error |= ihevce_cabac_encode_bin(ps_cabac, prefix_bin, IHEVC_CAB_CHROMA_PRED_MODE);
+
+ /* encode suffix bins */
+ if(prefix_bin)
+ {
+ error |= ihevce_cabac_encode_bypass_bins(ps_cabac, chroma_pred_mode, 2);
+ }
+ AEV_TRACE("intra_chroma_pred_mode", chroma_pred_mode, ps_cabac->u4_range);
+ }
+
+ return (error);
+}
+
+/**
+******************************************************************************
+*
+* @brief Entropy encoding of skip flag (Coding Unit syntax)
+*
+* @par Description
+* context increment for skip flag is derived based on left and top skip flag
+* as per section 9.3.3.1.1, Table 9-38
+*
+* @param[inout] ps_entropy_ctxt
+* pointer to entropy context (handle)
+*
+* @param[in] ps_enc_cu
+* pointer to inter cu whose skip flag is to be coded
+*
+* @param[in] top_avail
+* top availabilty flag for current cu (boolean)
+*
+* @param[in] left_avail
+* left availabilty flag for current cu (boolean)
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_cabac_encode_skip_flag(
+ entropy_context_t *ps_entropy_ctxt,
+ cu_enc_loop_out_t *ps_enc_cu,
+ WORD32 top_avail,
+ WORD32 left_avail)
+
+{
+ WORD32 error = IHEVCE_SUCCESS;
+ WORD32 skip_flag = ps_enc_cu->b1_skip_flag;
+ cab_ctxt_t *ps_cabac = &ps_entropy_ctxt->s_cabac_ctxt;
+
+ /* CU top left co-ordinates w.r.t ctb */
+ WORD32 cu_x0 = ps_enc_cu->b3_cu_pos_x << 3;
+ WORD32 cu_y0 = ps_enc_cu->b3_cu_pos_y << 3;
+
+ /* CU size in pels */
+ WORD32 cu_size = ps_enc_cu->b4_cu_size << 3;
+
+ /* CU x co-ordinate w.r.t frame start */
+ WORD32 ctb_x0_frm = (ps_entropy_ctxt->i4_ctb_x << ps_entropy_ctxt->i1_log2_ctb_size);
+
+ WORD32 cu_x0_frm = cu_x0 + ctb_x0_frm;
+
+ /* bit postion from where top skip flag is extracted; 1bit per 8 pel */
+ WORD32 x_pos = ((cu_x0_frm >> 3) & 0x7);
+
+ /* bit postion from where left skip flag is extracted; 1bit per 8 pel */
+ WORD32 y_pos = ((cu_y0 >> 3) & 0x7);
+
+ /* top and left skip flags computed based on nbr availability */
+ UWORD8 *pu1_top_skip_flags = ps_entropy_ctxt->pu1_skip_cu_top + (cu_x0_frm >> 6);
+ UWORD32 u4_skip_left_flags = ps_entropy_ctxt->u4_skip_cu_left;
+
+ /* context incerements based on top and left neigbours */
+ UWORD32 ctxt_inc = 0;
+
+ if(top_avail)
+ {
+ WORD32 val;
+ EXTRACT_BIT(val, pu1_top_skip_flags[0], x_pos);
+ ctxt_inc += val;
+ }
+
+ if(left_avail)
+ {
+ WORD32 val;
+ EXTRACT_BIT(val, u4_skip_left_flags, y_pos);
+ ctxt_inc += val;
+ }
+
+ if(CABAC_MODE_COMPUTE_BITS == ps_cabac->e_cabac_op_mode)
+ {
+ //ASSERT(ctxt_inc == ps_entropy_ctxt->i4_num_nbr_skip_cus);
+ ctxt_inc = ps_entropy_ctxt->i4_num_nbr_skip_cus;
+ ASSERT(ctxt_inc < 3);
+ ASSERT((WORD32)ctxt_inc <= (top_avail + left_avail));
+ }
+
+ /* encode the skip flag */
+ error |= ihevce_cabac_encode_bin(ps_cabac, skip_flag, (IHEVC_CAB_SKIP_FLAG + ctxt_inc));
+
+ AEV_TRACE("cu_skip_flag", skip_flag, ps_cabac->u4_range);
+
+ if(CABAC_MODE_ENCODE_BITS == ps_cabac->e_cabac_op_mode)
+ {
+ /* update top and left skip flags only in encode mode */
+ if(skip_flag)
+ {
+ SET_BITS(pu1_top_skip_flags[0], x_pos, (cu_size >> 3));
+ SET_BITS(u4_skip_left_flags, y_pos, (cu_size >> 3));
+ }
+ else
+ {
+ CLEAR_BITS(pu1_top_skip_flags[0], x_pos, (cu_size >> 3));
+ CLEAR_BITS(u4_skip_left_flags, y_pos, (cu_size >> 3));
+ }
+
+ ps_entropy_ctxt->u4_skip_cu_left = u4_skip_left_flags;
+ }
+
+ return (error);
+}
+
+/**
+******************************************************************************
+*
+* @brief Entropy encoding of partition mode (Coding Unit syntax)
+*
+* @par Description
+* Binarization process and context modelling of partition mode is done as per
+* section 9.3.2.6 (Table 9-34) and se
+*
+* @param[inout] ps_cabac
+* pointer to cabac encoding context (handle)
+*
+* @param[in] intra
+* boolean indicating if current cu is intra cu
+*
+* @param[in] is_mincu
+* boolean indicating if current cu size is equal to mincu
+*
+* @param[in] amp_enabled
+* flag to indicate if AMP(Assymetric motion partition) is enabled at sps level
+*
+* @param[in] cu_eq_8
+* boolean indicating if current cu size is equal to 8
+*
+* @param[in] part_mode
+* partition mode of current CU
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_cabac_encode_part_mode(
+ cab_ctxt_t *ps_cabac,
+ WORD32 intra,
+ WORD32 is_mincu,
+ WORD32 amp_enabled,
+ WORD32 cu_eq_8,
+ WORD32 part_mode)
+{
+ /* Binarization depends on intra/inter, is_mincu, amp flag, cbsize == 8 */
+ WORD32 bins;
+ WORD32 bin_count, i;
+ WORD32 error = IHEVCE_SUCCESS;
+
+ (void)is_mincu;
+ (void)amp_enabled;
+ (void)cu_eq_8;
+ if(intra)
+ {
+ /* sanity checks for intra part mode */
+ ASSERT(is_mincu);
+ ASSERT((part_mode == SIZE_NxN) || (part_mode == SIZE_2Nx2N));
+
+ bins = (part_mode == SIZE_2Nx2N) ? 1 : 0;
+ error |= ihevce_cabac_encode_bin(ps_cabac, bins, IHEVC_CAB_PART_MODE);
+ }
+ else
+ {
+ /* sanity checks for inter part mode....Too many but good to have */
+ ASSERT((amp_enabled == 0) || (amp_enabled == 1));
+ ASSERT((is_mincu == 0) || (is_mincu == 1));
+ ASSERT((cu_eq_8 == 0) || (cu_eq_8 == 1));
+ ASSERT((part_mode <= SIZE_nRx2N) && (part_mode >= SIZE_2Nx2N));
+ if(!amp_enabled)
+ ASSERT(part_mode <= SIZE_NxN);
+ if(!is_mincu)
+ ASSERT(part_mode != SIZE_NxN);
+ if(is_mincu)
+ ASSERT(part_mode <= SIZE_NxN);
+ if(cu_eq_8)
+ ASSERT(part_mode < SIZE_NxN);
+ if(cu_eq_8)
+ ASSERT(is_mincu);
+
+ /* look up table for bins and number of bins for inter pred mode */
+ if(!is_mincu)
+ {
+ bins = gu1_hevce_inter_part_mode_bins[amp_enabled][part_mode];
+ }
+ else
+ {
+ bins = gu1_hevce_inter_part_mode_bins_mincu[cu_eq_8][part_mode];
+ }
+
+ bin_count = (bins >> 4) & 0xF;
+
+ /* Encode the context model based bins, max of 3 */
+ for(i = 0; i < MIN(bin_count, 3); i++)
+ {
+ //TODO: HM-8.0-dev uses 0 context increment for bin2 (i===2) when amp is enabled
+ WORD32 ctxt_inc = IHEVC_CAB_PART_MODE + i;
+ WORD32 bin = (bins >> (3 - i)) & 0x1;
+ error |= ihevce_cabac_encode_bin(ps_cabac, bin, ctxt_inc);
+ }
+
+ /* Encode the last bin as bypass bin for amp partitions */
+ if(bin_count == 4)
+ {
+ error |= ihevce_cabac_encode_bypass_bin(ps_cabac, (bins & 0x1));
+ }
+ }
+ AEV_TRACE("part_mode", part_mode, ps_cabac->u4_range);
+ return (error);
+}
+
+/**
+******************************************************************************
+*
+* @brief Entropy encoding of merge_idx of inter prediction unit as per sec
+* as per sec 9.3.2 Table9-32. (tunary binarization)
+*
+* @par Description
+* trunacted unary binarization is done based on max merge candidates
+* First bin is context modelled bin and the rest are coded as bypass
+*
+* @param[inout] ps_cabac
+* pointer to cabac encoding context (handle)
+*
+* @param[in] merge_idx
+* merge idx of the pu to be encoded;
+*
+* @param[in] max_merge_cand
+* maximum merge candidates signalled in the slice header*
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_cabac_encode_merge_idx(cab_ctxt_t *ps_cabac, WORD32 merge_idx, WORD32 max_merge_cand)
+{
+ WORD32 ret = IHEVCE_SUCCESS;
+ WORD32 ctxt_inc = IHEVC_CAB_MERGE_IDX_EXT;
+
+ /* sanity checks */
+ ASSERT((merge_idx >= 0) && (merge_idx < max_merge_cand));
+
+ /* encode the merge idx only if required */
+ if(max_merge_cand > 1)
+ {
+ /* encode the context modelled first bin */
+ ret |= ihevce_cabac_encode_bin(ps_cabac, (merge_idx > 0), ctxt_inc);
+
+ /* encode the remaining bins as bypass tunary */
+ if((max_merge_cand > 2) && (merge_idx > 0))
+ {
+ ret |=
+ ihevce_cabac_encode_tunary_bypass(ps_cabac, (merge_idx - 1), (max_merge_cand - 2));
+ }
+
+ AEV_TRACE("merge_idx", merge_idx, ps_cabac->u4_range);
+ }
+
+ return (ret);
+}
+
+/**
+******************************************************************************
+*
+* @brief Entropy encoding of inter_pred_idc for prediction unit of B slice as
+* per sec 9.3.2.9 Table9-36
+*
+* @par Description
+* Max of two context modelled bins coded for pu size > 8x4 or 4x8
+* one context modelled bin coded for pu size = 8x4 or 4x8; bipred not allowed
+* for 8x4 or 4x8.
+*
+* @param[inout] ps_cabac
+* pointer to cabac encoding context (handle)
+*
+* @param[in] inter_pred_idc
+* inter pred mode to be encoded; shall be PRED_L0 or PRED_L1 or PRED_BI
+*
+* @param[in] cu_depth
+* depth of the cu to which current pu belongs (required for context increment)
+*
+* @param[in] pu_w_plus_pu_h
+* required to check if pu_w_plus_pu_h is 12 (8x4PU or 4x8PU)
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_cabac_encode_inter_pred_idc(
+ cab_ctxt_t *ps_cabac, WORD32 inter_pred_idc, WORD32 cu_depth, WORD32 pu_w_plus_pu_h)
+{
+ WORD32 ret = IHEVCE_SUCCESS;
+ WORD32 ctxt_inc;
+
+ ASSERT(inter_pred_idc <= PRED_BI);
+
+ /* check if PU is 8x4/4x8 */
+ if(pu_w_plus_pu_h == 12)
+ {
+ /* case of 8x4 or 4x8 where bi_pred is not allowed */
+ ASSERT((inter_pred_idc == PRED_L0) || (inter_pred_idc == PRED_L1));
+
+ ctxt_inc = IHEVC_CAB_INTER_PRED_IDC + 4;
+ ret |= ihevce_cabac_encode_bin(ps_cabac, inter_pred_idc, ctxt_inc);
+ }
+ else
+ {
+ /* larger PUs can be encoded as bi_pred/l0/l1 inter_pred_idc */
+ WORD32 is_bipred = (inter_pred_idc == PRED_BI);
+
+ ctxt_inc = IHEVC_CAB_INTER_PRED_IDC + cu_depth;
+ ret |= ihevce_cabac_encode_bin(ps_cabac, is_bipred, ctxt_inc);
+
+ if(!is_bipred)
+ {
+ ctxt_inc = IHEVC_CAB_INTER_PRED_IDC + 4;
+ ret |= ihevce_cabac_encode_bin(ps_cabac, inter_pred_idc, ctxt_inc);
+ }
+ }
+
+ AEV_TRACE("inter_pred_idc", inter_pred_idc, ps_cabac->u4_range);
+
+ return (ret);
+}
+
+/**
+******************************************************************************
+*
+* @brief Entropy encoding of refidx for prediction unit; Binarization done as
+* tunary code as per sec 9.3.2 Table9-32
+*
+* @par Description
+* First two bins are context modelled while the rest are coded as bypass
+*
+* @param[inout] ps_cabac
+* pointer to cabac encoding context (handle)
+*
+* @param[in] ref_idx
+* ref idx of partition unit
+*
+* @param[in] active_refs
+* max number of active references signalled in slice header
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_cabac_encode_refidx(cab_ctxt_t *ps_cabac, WORD32 ref_idx, WORD32 active_refs)
+{
+ /************************************************************/
+ /* encode ref_idx as tunary binarization Table 9-32 */
+ /* First 2 bin use context model and rest coded as bypass */
+ /************************************************************/
+ WORD32 ret = IHEVCE_SUCCESS;
+ WORD32 ctxt_inc = IHEVC_CAB_INTER_REF_IDX;
+
+ /* sanity checks */
+ ASSERT((ref_idx >= 0) && (ref_idx < active_refs));
+
+ /* encode the ref idx only if required */
+ if(active_refs > 1)
+ {
+ /* encode the context modelled first bin */
+ ret |= ihevce_cabac_encode_bin(ps_cabac, (ref_idx > 0), ctxt_inc);
+
+ if((active_refs > 2) && (ref_idx > 0))
+ {
+ /* encode the context modelled second bin */
+ ctxt_inc++;
+ ret |= ihevce_cabac_encode_bin(ps_cabac, (ref_idx > 1), ctxt_inc);
+ }
+
+ if((active_refs > 3) && (ref_idx > 1))
+ {
+ /* encode remaining bypass bins */
+ ret |= ihevce_cabac_encode_tunary_bypass(ps_cabac, (ref_idx - 2), (active_refs - 3));
+ }
+
+ AEV_TRACE("ref_idx", ref_idx, ps_cabac->u4_range);
+ }
+
+ return (ret);
+}
+
+/**
+******************************************************************************
+*
+* @brief Entropy encoding of mvd for inter pu as per section 7.3.10.2
+*
+* @par Description
+* syntax coded as per section 7.3.10.2 for mvdx and mvdy
+* context modeling of abs_mvd_greater0 abs_mvd_greater1 done as per Table 9-32
+* binazrization of abs_mvd_minus2 is done as done as EG1 code section 9.3.2.4
+*
+* @param[inout] ps_cabac
+* pointer to cabac encoding context (handle)
+*
+* @param[in] ps_mvd
+* pointer to mvd struct containing mvdx and mvdy
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_cabac_encode_mvd(cab_ctxt_t *ps_cabac, mv_t *ps_mvd)
+{
+ WORD32 ret = IHEVCE_SUCCESS;
+ WORD32 mvd_x = ps_mvd->i2_mvx;
+ WORD32 mvd_y = ps_mvd->i2_mvy;
+
+ WORD32 abs_mvd_x = ABS(mvd_x);
+ WORD32 abs_mvd_y = ABS(mvd_y);
+
+ WORD32 abs_mvd_x_gt0 = abs_mvd_x > 0;
+ WORD32 abs_mvd_y_gt0 = abs_mvd_y > 0;
+
+ WORD32 abs_mvd_x_gt1 = abs_mvd_x > 1;
+ WORD32 abs_mvd_y_gt1 = abs_mvd_y > 1;
+
+ WORD32 ctxt_inc = IHEVC_CAB_MVD_GRT0;
+
+ /* encode absmvd_x > 0 */
+ ret |= ihevce_cabac_encode_bin(ps_cabac, abs_mvd_x_gt0, ctxt_inc);
+ AEV_TRACE("abs_mvd_greater0_flag[0]", abs_mvd_x_gt0, ps_cabac->u4_range);
+
+ /* encode absmvd_y > 0 */
+ ret |= ihevce_cabac_encode_bin(ps_cabac, abs_mvd_y_gt0, ctxt_inc);
+ AEV_TRACE("abs_mvd_greater0_flag[1]", abs_mvd_y_gt0, ps_cabac->u4_range);
+
+ ctxt_inc = IHEVC_CAB_MVD_GRT1;
+
+ /* encode abs_mvd_x > 1 iff (abs_mvd_x > 0) */
+ if(abs_mvd_x_gt0)
+ {
+ ret |= ihevce_cabac_encode_bin(ps_cabac, abs_mvd_x_gt1, ctxt_inc);
+ AEV_TRACE("abs_mvd_greater1_flag[0]", abs_mvd_x_gt1, ps_cabac->u4_range);
+ }
+
+ /* encode abs_mvd_y > 1 iff (abs_mvd_y > 0) */
+ if(abs_mvd_y_gt0)
+ {
+ ret |= ihevce_cabac_encode_bin(ps_cabac, abs_mvd_y_gt1, ctxt_inc);
+ AEV_TRACE("abs_mvd_greater1_flag[1]", abs_mvd_y_gt1, ps_cabac->u4_range);
+ }
+
+ /* encode abs_mvd_x - 2 iff (abs_mvd_x > 1) */
+ if(abs_mvd_x_gt1)
+ {
+ ret |= ihevce_cabac_encode_egk(ps_cabac, (abs_mvd_x - 2), 1);
+ AEV_TRACE("abs_mvd_minus2[0]", (abs_mvd_x - 2), ps_cabac->u4_range);
+ }
+
+ /* encode mvd_x sign iff (abs_mvd_x > 0) */
+ if(abs_mvd_x_gt0)
+ {
+ ret |= ihevce_cabac_encode_bypass_bin(ps_cabac, (mvd_x < 0));
+ AEV_TRACE("mvd_sign_flag[0]", (mvd_x < 0), ps_cabac->u4_range);
+ }
+
+ /* encode abs_mvd_y - 2 iff (abs_mvd_y > 1) */
+ if(abs_mvd_y_gt1)
+ {
+ ret |= ihevce_cabac_encode_egk(ps_cabac, (abs_mvd_y - 2), 1);
+ AEV_TRACE("abs_mvd_minus2[1]", (abs_mvd_y - 2), ps_cabac->u4_range);
+ }
+
+ /* encode mvd_y sign iff (abs_mvd_y > 0) */
+ if(abs_mvd_y_gt0)
+ {
+ ret |= ihevce_cabac_encode_bypass_bin(ps_cabac, (mvd_y < 0));
+ AEV_TRACE("mvd_sign_flag[1]", (mvd_y < 0), ps_cabac->u4_range);
+ }
+
+ return ret;
+}
+
+/**
+******************************************************************************
+*
+* @brief Entropy encoding of all syntax elements of inter PUs in a CU
+*
+* @par Description
+* syntax coded as per section 7.3.10.1 for inter prediction unit
+*
+* @param[inout] ps_entropy_ctxt
+* pointer to entropy context (handle)
+*
+* @param[in] ps_enc_cu
+* pointer to current cu whose inter prediction units are to be encoded
+*
+* @param[in] cu_depth
+* depth of the the current cu in coding tree
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_cabac_encode_inter_pu(
+ entropy_context_t *ps_entropy_ctxt, cu_enc_loop_out_t *ps_enc_cu, WORD32 cu_depth)
+{
+ WORD32 ret = IHEVCE_SUCCESS;
+
+ slice_header_t *ps_slice_hdr = ps_entropy_ctxt->ps_slice_hdr;
+ cab_ctxt_t *ps_cabac = &ps_entropy_ctxt->s_cabac_ctxt;
+ pu_t *ps_pu = ps_enc_cu->ps_pu;
+
+ WORD32 merge_idx = ps_pu->b3_merge_idx;
+ WORD32 max_merge_cand = ps_slice_hdr->i1_max_num_merge_cand;
+ WORD32 ctxt_inc;
+
+ if(ps_enc_cu->b1_skip_flag)
+ {
+ WORD32 cu_size = ps_enc_cu->b4_cu_size << 3;
+ /*PIC_INFO : SKIP CU in frame*/
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ {
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_skip_cu++;
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_pu++;
+ if(cu_size == 64)
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_2nx2n_inter_pu[3]++;
+ else
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_2nx2n_inter_pu[cu_size >> 4]++;
+ }
+ /* encode the merge idx for skip cu and return */
+ ret |= ihevce_cabac_encode_merge_idx(ps_cabac, merge_idx, max_merge_cand);
+ }
+ else
+ {
+ /* MODE_INTER */
+ WORD32 part_mode = ps_enc_cu->b3_part_mode;
+ WORD32 num_parts, i;
+
+ num_parts = (part_mode == SIZE_2Nx2N) ? 1 : ((part_mode == SIZE_NxN) ? 4 : 2);
+
+ /*PIC_INFO : INTER CU in frame*/
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ {
+ WORD32 cu_size = ps_enc_cu->b4_cu_size << 3;
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_inter_cu++;
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_pu += num_parts;
+
+ // clang-format off
+ if(PART_2Nx2N == part_mode)
+ {
+ if(cu_size == 64)
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_2nx2n_inter_pu[3]++;
+ else
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_2nx2n_inter_pu[cu_size >> 4]++;
+ }
+ else if((PART_2NxN == part_mode) || (PART_Nx2N == part_mode))
+ {
+ if(cu_size == 64)
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_smp_inter_pu[3]++;
+ else
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_smp_inter_pu[cu_size >> 4]++;
+ }
+ else if((PART_2NxnU == part_mode) || (PART_2NxnD == part_mode) ||
+ (PART_nLx2N == part_mode) || (PART_nRx2N == part_mode))
+ {
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_amp_inter_pu[cu_size >> 5]++;
+ }
+ else
+ {
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_nxn_inter_pu[cu_size >> 5]++;
+ }
+ // clang-format on
+ }
+
+ /* encode each pu partition */
+ for(i = 0; i < num_parts; i++)
+ {
+ /* encode the merge flag context modelled bin */
+ WORD32 merge_flag;
+ UWORD32 u4_bits_estimated_merge_flag = 0;
+ ps_pu = ps_enc_cu->ps_pu + i;
+
+ /* encode the merge flag context modelled bin */
+ merge_flag = ps_pu->b1_merge_flag;
+ u4_bits_estimated_merge_flag = ps_cabac->u4_bits_estimated_q12;
+ ctxt_inc = IHEVC_CAB_MERGE_FLAG_EXT;
+ ret |= ihevce_cabac_encode_bin(ps_cabac, merge_flag, ctxt_inc);
+
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ {
+ // clang-format off
+ /*PIC INFO : Populate merge flag */
+ ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_merge_flag =
+ (ps_cabac->u4_bits_estimated_q12 -
+ u4_bits_estimated_merge_flag);
+ // clang-format on
+ }
+ AEV_TRACE("merge_flag", merge_flag, ps_cabac->u4_range);
+
+ if(merge_flag)
+ {
+ merge_idx = ps_pu->b3_merge_idx;
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_merge_pu++;
+ /* encode the merge idx for the pu */
+ ret |= ihevce_cabac_encode_merge_idx(ps_cabac, merge_idx, max_merge_cand);
+ }
+ else
+ {
+ /* encode the inter_pred_idc, ref_idx and mvd */
+ WORD32 inter_pred_idc = ps_pu->b2_pred_mode;
+ WORD32 ref_l0_active = ps_slice_hdr->i1_num_ref_idx_l0_active;
+ WORD32 ref_l1_active = ps_slice_hdr->i1_num_ref_idx_l1_active;
+
+ /*PIC_INFO : L0 L1 BI ro r1.. in frame*/
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ {
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_non_skipped_inter_pu++;
+ // clang-format off
+ if(inter_pred_idc == PRED_L0)
+ {
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_L0_mode++;
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_L0_ref_idx[ps_pu->mv.i1_l0_ref_idx]++;
+ }
+ else if(inter_pred_idc == PRED_L1)
+ {
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_L1_mode++;
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_L1_ref_idx[ps_pu->mv.i1_l1_ref_idx]++;
+ }
+ else if(inter_pred_idc == PRED_BI)
+ {
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_BI_mode++;
+ if(inter_pred_idc != PRED_L1)
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_L0_ref_idx[ps_pu->mv.i1_l0_ref_idx]++;
+ if(inter_pred_idc != PRED_L0)
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_L1_ref_idx[ps_pu->mv.i1_l1_ref_idx]++;
+ }
+ // clang-format on
+ }
+ if(ps_slice_hdr->i1_slice_type == BSLICE)
+ {
+ /* Encode inter_pred_idc as per sec 9.3.2.9 Table9-36 */
+ WORD32 pu_w_plus_pu_h;
+ WORD32 inter_pred_idc = ps_pu->b2_pred_mode;
+
+ /* required to check if w+h==12 case */
+ pu_w_plus_pu_h = ((ps_pu->b4_wd + 1) << 2) + ((ps_pu->b4_ht + 1) << 2);
+
+ ret |= ihevce_cabac_encode_inter_pred_idc(
+ ps_cabac, inter_pred_idc, cu_depth, pu_w_plus_pu_h);
+ }
+ else
+ {
+ ASSERT(inter_pred_idc == 0);
+ }
+
+ /* Decode ref idx and mvd for L0 (PRED_L0 or PRED_BI) */
+ if(inter_pred_idc != PRED_L1)
+ {
+ UWORD32 u4_bits_estimated_prev_mvd_ref_id;
+ /* encode L0 ref_idx */
+ WORD32 ref_idx_l0 = ps_pu->mv.i1_l0_ref_idx;
+
+ /*PIC INFO : Populate Ref Indx L0 Bits*/
+ u4_bits_estimated_prev_mvd_ref_id = ps_cabac->u4_bits_estimated_q12;
+ ret |= ihevce_cabac_encode_refidx(ps_cabac, ref_idx_l0, ref_l0_active);
+
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ {
+ // clang-format off
+ ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_ref_id +=
+ (ps_cabac->u4_bits_estimated_q12 -
+ u4_bits_estimated_prev_mvd_ref_id);
+ // clang-format on
+ }
+ /* Encode the mvd for L0 */
+ /*PIC INFO : Populate MVD Bits*/
+ u4_bits_estimated_prev_mvd_ref_id = ps_cabac->u4_bits_estimated_q12;
+
+ ret |= ihevce_cabac_encode_mvd(ps_cabac, &ps_pu->mv.s_l0_mv);
+
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ { // clang-format off
+ ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_mvd +=
+ (ps_cabac->u4_bits_estimated_q12 -
+ u4_bits_estimated_prev_mvd_ref_id);
+ // clang-format on
+ }
+
+ /* Encode the mvp_l0_flag */
+ ctxt_inc = IHEVC_CAB_MVP_L0L1;
+ ret |= ihevce_cabac_encode_bin(ps_cabac, ps_pu->b1_l0_mvp_idx, ctxt_inc);
+
+ AEV_TRACE("mvp_l0/l1_flag", ps_pu->b1_l0_mvp_idx, ps_cabac->u4_range);
+ }
+
+ /* Encode ref idx and MVD for L1 (PRED_L1 or PRED_BI) */
+ if(inter_pred_idc != PRED_L0)
+ {
+ /* encode L1 ref_idx */
+ WORD32 ref_idx_l1 = ps_pu->mv.i1_l1_ref_idx;
+
+ UWORD32 u4_bits_estimated_prev_mvd_ref_id;
+ /*PIC INFO : Populate Ref Indx L1 Bits*/
+ u4_bits_estimated_prev_mvd_ref_id = ps_cabac->u4_bits_estimated_q12;
+
+ ret |= ihevce_cabac_encode_refidx(ps_cabac, ref_idx_l1, ref_l1_active);
+
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ { // clang-format off
+ ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_ref_id +=
+ (ps_cabac->u4_bits_estimated_q12 -
+ u4_bits_estimated_prev_mvd_ref_id);
+ } // clang-format on
+
+ /* Check for zero mvd in case of bi_pred */
+ if(ps_slice_hdr->i1_mvd_l1_zero_flag && inter_pred_idc == PRED_BI)
+ {
+ ASSERT(ps_pu->mv.s_l1_mv.i2_mvx == 0);
+ ASSERT(ps_pu->mv.s_l1_mv.i2_mvy == 0);
+ }
+ else
+ {
+ /* Encode the mvd for L1 */
+ /*PIC INFO : Populate MVD Bits*/
+ u4_bits_estimated_prev_mvd_ref_id = ps_cabac->u4_bits_estimated_q12;
+
+ /* Encode the mvd for L1 */
+ ret |= ihevce_cabac_encode_mvd(ps_cabac, &ps_pu->mv.s_l1_mv);
+
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ {
+ ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_mvd +=
+ (ps_cabac->u4_bits_estimated_q12 -
+ u4_bits_estimated_prev_mvd_ref_id);
+ }
+ }
+
+ /* Encode the mvp_l1_flag */
+ ctxt_inc = IHEVC_CAB_MVP_L0L1;
+ ret |= ihevce_cabac_encode_bin(ps_cabac, ps_pu->b1_l1_mvp_idx, ctxt_inc);
+
+ AEV_TRACE("mvp_l0/l1_flag", ps_pu->b1_l1_mvp_idx, ps_cabac->u4_range);
+ }
+ }
+ }
+ }
+
+ return ret;
+}
+
+/**
+******************************************************************************
+*
+* @brief Entropy encoding of coding unit (Coding Unit syntax)
+*
+* @par Description
+* Entropy encode of coding unit (Coding Unit syntax) as per section:7.3.9.1
+* General Coding unit syntax
+*
+* @param[inout] ps_entropy_ctxt
+* pointer to entropy context (handle)
+*
+* @param[in] ps_enc_cu
+* pointer to current cu whose entropy encode is done
+*
+* @param[in] cu_depth
+* depth of the the current cu in coding tree
+*
+* @param[in] top_avail
+* top availabilty flag for current cu (boolean)
+*
+* @param[in] left_avail
+* left availabilty flag for current cu (boolean)
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_cabac_encode_coding_unit(
+ entropy_context_t *ps_entropy_ctxt,
+ cu_enc_loop_out_t *ps_enc_cu,
+ WORD32 cu_depth,
+ WORD32 top_avail,
+ WORD32 left_avail)
+{
+ WORD32 ret = IHEVCE_SUCCESS;
+ sps_t *ps_sps = ps_entropy_ctxt->ps_sps;
+ pps_t *ps_pps = ps_entropy_ctxt->ps_pps;
+ slice_header_t *ps_slice_hdr = ps_entropy_ctxt->ps_slice_hdr;
+
+ WORD32 skip_flag = 0;
+ WORD32 no_res_flag = 0;
+
+ /* CU top left co-ordinates w.r.t ctb */
+ WORD32 cu_x0 = ps_enc_cu->b3_cu_pos_x << 3;
+ WORD32 cu_y0 = ps_enc_cu->b3_cu_pos_y << 3;
+
+ /* CU size in pels */
+ WORD32 cu_size = ps_enc_cu->b4_cu_size << 3;
+ WORD32 log2_cb_size;
+
+ cab_ctxt_t *ps_cabac = &ps_entropy_ctxt->s_cabac_ctxt;
+
+ UWORD32 u4_header_bits_temp = ps_cabac->u4_bits_estimated_q12;
+
+ (void)cu_depth;
+ (void)top_avail;
+ (void)left_avail;
+ /* Sanity checks */
+ ASSERT((cu_x0 + cu_size) <= (1 << ps_entropy_ctxt->i1_log2_ctb_size));
+ ASSERT((cu_y0 + cu_size) <= (1 << ps_entropy_ctxt->i1_log2_ctb_size));
+
+ /* code tq bypass flag */
+ ASSERT(ps_pps->i1_transquant_bypass_enable_flag == 0);
+
+ /* log2_cb_size based on cu size */
+ GETRANGE(log2_cb_size, cu_size);
+ log2_cb_size -= 1;
+
+ if(ps_pps->i1_transquant_bypass_enable_flag)
+ {
+ ihevce_cabac_encode_bin(
+ ps_cabac, ps_enc_cu->b1_tq_bypass_flag, IHEVC_CAB_CU_TQ_BYPASS_FLAG);
+
+ AEV_TRACE("cu_transquant_bypass_flag", ps_enc_cu->b1_tq_bypass_flag, ps_cabac->u4_range);
+ }
+ /* code the skip flag for inter slices */
+ if(ps_slice_hdr->i1_slice_type != ISLICE)
+ {
+ skip_flag = ps_enc_cu->b1_skip_flag;
+
+ ret |= ihevce_cabac_encode_skip_flag(ps_entropy_ctxt, ps_enc_cu, top_avail, left_avail);
+ }
+ /*PIC_INFO : Total CU in frame based on cu size */
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ {
+ // clang-format off
+ if(cu_size == 64)
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_cu_based_on_size[3]++;
+ else
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_cu_based_on_size[cu_size >> 4]++;
+ // clang-format on
+ }
+ if(skip_flag)
+ {
+ /* encode merge idx for the skip cu */
+ ret |= ihevce_cabac_encode_inter_pu(ps_entropy_ctxt, ps_enc_cu, cu_depth);
+
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ {
+ /*PIC INFO: Populated non-coded TUs in CU*/
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_non_coded_tu +=
+ ps_enc_cu->u2_num_tus_in_cu;
+ // clang-format off
+ if(cu_size == 64)
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_tu_based_on_size[3] +=
+ ps_enc_cu->u2_num_tus_in_cu;
+ else if(cu_size == 32)
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_tu_based_on_size[3] +=
+ ps_enc_cu->u2_num_tus_in_cu;
+ else
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_tu_based_on_size[cu_size >> 3] +=
+ ps_enc_cu->u2_num_tus_in_cu;
+ // clang-format on
+
+ /*PIC INFO: Populate cu header bits*/
+ ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cu_hdr_bits +=
+ (ps_cabac->u4_bits_estimated_q12 - u4_header_bits_temp);
+ }
+ }
+ else
+ {
+ WORD32 pred_mode = PRED_MODE_INTRA;
+ WORD32 part_mode = ps_enc_cu->b3_part_mode;
+ WORD32 pcm_flag = ps_enc_cu->b1_pcm_flag;
+ WORD32 is_mincu;
+ WORD32 is_intra;
+
+ is_mincu = (cu_size == (1 << ps_sps->i1_log2_min_coding_block_size));
+ /* encode pred mode flag for inter slice */
+ if(ps_slice_hdr->i1_slice_type != ISLICE)
+ {
+ pred_mode = ps_enc_cu->b1_pred_mode_flag;
+
+ ret |= ihevce_cabac_encode_bin(ps_cabac, pred_mode, IHEVC_CAB_PRED_MODE);
+
+ AEV_TRACE("pred_mode_flag", pred_mode, ps_cabac->u4_range);
+ }
+ is_intra = (PRED_MODE_INTRA == pred_mode);
+
+ /* encode partition mode for inter pred or smallest intra pred cu */
+ if((!is_intra) || is_mincu)
+ {
+ WORD32 amp_enabled = ps_sps->i1_amp_enabled_flag;
+ WORD32 cusize_8 = (cu_size == 8);
+
+ ret |= ihevce_cabac_encode_part_mode(
+ ps_cabac, is_intra, is_mincu, amp_enabled, cusize_8, part_mode);
+ }
+ else
+ {
+ ASSERT(part_mode == SIZE_2Nx2N);
+ }
+
+ /* encode intra / inter pu modes of the current CU */
+ if(is_intra)
+ {
+ /* NOTE: I_PCM not supported in encoder */
+ ASSERT(0 == pcm_flag);
+ ASSERT(0 == ps_sps->i1_pcm_enabled_flag);
+
+ ret |= ihevce_cabac_encode_intra_pu(ps_entropy_ctxt, part_mode, ps_enc_cu);
+ }
+ else
+ {
+ ret |= ihevce_cabac_encode_inter_pu(ps_entropy_ctxt, ps_enc_cu, cu_depth);
+ }
+ /* encode no residue syntax flag and transform tree conditionally */
+ if(!pcm_flag)
+ {
+ pu_t *ps_pu = &ps_enc_cu->ps_pu[0];
+ WORD32 merge_cu;
+ /* Encode residue syntax flag for inter cus not merged as 2Nx2N */
+ if(!is_intra)
+ merge_cu = (part_mode == PART_2Nx2N) && ps_pu->b1_merge_flag;
+
+ if(!is_intra && !merge_cu)
+ {
+ no_res_flag = ps_enc_cu->b1_no_residual_syntax_flag;
+
+#if 1 /* HACK FOR COMPLIANCE WITH HM REFERENCE DECODER */
+ /*********************************************************/
+ /* currently the HM decoder expects qtroot cbf instead of */
+ /* no_residue_flag which has opposite meaning */
+ /* This will be fixed once the software / spec is fixed */
+ /*********************************************************/
+ ret |= ihevce_cabac_encode_bin(ps_cabac, !no_res_flag, IHEVC_CAB_NORES_IDX);
+
+ AEV_TRACE("no_residual_syntax_flag (HACKY)", !no_res_flag, ps_cabac->u4_range);
+#else
+ ret |= ihevce_cabac_encode_bin(ps_cabac, no_res_flag, IHEVC_CAB_NORES_IDX);
+
+ AEV_TRACE("no_residual_syntax_flag", no_res_flag, ps_cabac->u4_range);
+#endif
+ }
+ /*initialize header bits*/
+ ps_cabac->u4_header_bits_estimated_q12 = ps_cabac->u4_bits_estimated_q12;
+
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ { // clang-format off
+ /*PIC INFO: Populate cu header bits*/
+ ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cu_hdr_bits +=
+ (ps_cabac->u4_bits_estimated_q12 - u4_header_bits_temp);
+ } // clang-format on
+
+ ps_cabac->u4_true_tu_split_flag_q12 = 0;
+ /* encode transform tree if no_residue_flag is 0 */
+ if(!no_res_flag)
+ {
+ ps_entropy_ctxt->i4_tu_idx = 0;
+
+ ret |= ihevce_encode_transform_tree(
+ ps_entropy_ctxt, cu_x0, cu_y0, log2_cb_size, 0, 0, ps_enc_cu);
+ }
+ else
+ {
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ {
+ /*PIC INFO: Populated non-coded TUs in CU*/
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_non_coded_tu +=
+ ps_enc_cu->u2_num_tus_in_cu;
+ // clang-format off
+ if(cu_size == 64)
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_tu_based_on_size[3] +=
+ ps_enc_cu->u2_num_tus_in_cu;
+ else if(cu_size == 32)
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_tu_based_on_size[3] +=
+ ps_enc_cu->u2_num_tus_in_cu;
+ else
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_tu_based_on_size[cu_size >> 3] +=
+ ps_enc_cu->u2_num_tus_in_cu;
+ // clang-format on
+ }
+ }
+ ps_cabac->u4_cbf_bits_q12 = ps_cabac->u4_bits_estimated_q12 -
+ ps_cabac->u4_header_bits_estimated_q12 -
+ ps_cabac->u4_true_tu_split_flag_q12;
+ }
+ }
+
+ /*duplicate the qp values for 8x8 CU array to maintain neighbour qp*/
+ if(CABAC_MODE_ENCODE_BITS == ps_entropy_ctxt->s_cabac_ctxt.e_cabac_op_mode)
+ {
+ WORD32 i, j;
+ WORD32 cur_cu_offset, cur_qp, qp_left, qp_top;
+ WORD32 is_last_blk_in_qg;
+ /* CU x co-ordinate w.r.t frame start */
+ WORD32 ctb_x0_frm = (ps_entropy_ctxt->i4_ctb_x << ps_entropy_ctxt->i1_log2_ctb_size);
+
+ WORD32 cu_x0_frm = cu_x0 + ctb_x0_frm;
+
+ /* CU y co-ordinate w.r.t frame start */
+ WORD32 ctb_y0_frm = (ps_entropy_ctxt->i4_ctb_y << ps_entropy_ctxt->i1_log2_ctb_size);
+
+ WORD32 cu_y0_frm = cu_y0 + ctb_y0_frm;
+
+ WORD32 pic_width = ps_sps->i2_pic_width_in_luma_samples;
+ WORD32 pic_height = ps_sps->i2_pic_height_in_luma_samples;
+
+ /* Added code for handling the QP neighbour population depending
+ on the diff_cu_qp_delta_depth: Lokesh */
+ /* is_last_blk_in_qg variables is to find if the coding block is the last CU in the Quantization group
+ 3 - i1_diff_cu_qp_delta_depth is done as the cu_pos_x and cu_pos_y are in terms of 8x8 positions in the CTB: Lokesh*/
+ WORD32 log2_min_cu_qp_delta_size =
+ ps_entropy_ctxt->i1_log2_ctb_size - ps_entropy_ctxt->ps_pps->i1_diff_cu_qp_delta_depth;
+ UWORD32 min_cu_qp_delta_size = 1 << log2_min_cu_qp_delta_size;
+
+ WORD32 block_addr_align = 15 << (log2_min_cu_qp_delta_size - 3);
+
+ ps_entropy_ctxt->i4_qg_pos_x = ps_enc_cu->b3_cu_pos_x & block_addr_align;
+ ps_entropy_ctxt->i4_qg_pos_y = ps_enc_cu->b3_cu_pos_y & block_addr_align;
+
+ /* Condition for detecting last cu in a qp group. */
+ /* Case 1: Current cu position + size exceed or meets the next qp group start location */
+ /* Case 2: Current cu position + size hits the incomplete ctb boundary in atleast one */
+ /* direction and the qp grp limit in other direction */
+
+ /* case 1 */
+ is_last_blk_in_qg =
+ ((cu_x0 + cu_size) >=
+ ((ps_entropy_ctxt->i4_qg_pos_x << 3) + (WORD32)min_cu_qp_delta_size) &&
+ (cu_y0 + cu_size) >=
+ ((ps_entropy_ctxt->i4_qg_pos_y << 3) + (WORD32)min_cu_qp_delta_size));
+
+ /* case 2 : x direction incomplete ctb */
+ if((cu_x0_frm + cu_size) >= pic_width)
+ {
+ is_last_blk_in_qg |=
+ ((cu_y0 + cu_size) >=
+ ((ps_entropy_ctxt->i4_qg_pos_y << 3) + (WORD32)min_cu_qp_delta_size));
+ }
+
+ /* case 2 : y direction incomplete ctb */
+ if((cu_y0_frm + cu_size) >= pic_height)
+ {
+ is_last_blk_in_qg |=
+ ((cu_x0 + cu_size) >=
+ ((ps_entropy_ctxt->i4_qg_pos_x << 3) + (WORD32)min_cu_qp_delta_size));
+ }
+
+ cur_cu_offset = ps_enc_cu->b3_cu_pos_x + (ps_enc_cu->b3_cu_pos_y * 8);
+
+ if((ps_entropy_ctxt->i4_is_cu_cbf_zero || no_res_flag || skip_flag) &&
+ ((ps_entropy_ctxt->i1_encode_qp_delta)))
+ {
+ { // clang-format off
+ /*it should remember average of qp_top and qp_left*/
+ if(ps_entropy_ctxt->i4_qg_pos_x > 0)
+ {
+ qp_left =
+ ps_entropy_ctxt->ai4_8x8_cu_qp[(ps_entropy_ctxt->i4_qg_pos_x - 1) +
+ (ps_entropy_ctxt->i4_qg_pos_y * 8)];
+ }
+ if(ps_entropy_ctxt->i4_qg_pos_y > 0)
+ {
+ qp_top =
+ ps_entropy_ctxt->ai4_8x8_cu_qp[ps_entropy_ctxt->i4_qg_pos_x +
+ (ps_entropy_ctxt->i4_qg_pos_y - 1) *
+ 8];
+ } // clang-format on
+ if(ps_entropy_ctxt->i4_qg_pos_x == 0)
+ {
+ /*previous coded Qp*/
+ qp_left = ps_entropy_ctxt->i1_cur_qp;
+ }
+ if(ps_entropy_ctxt->i4_qg_pos_y == 0)
+ {
+ /*previous coded Qp*/
+ qp_top = ps_entropy_ctxt->i1_cur_qp;
+ }
+ cur_qp = (qp_top + qp_left + 1) >> 1;
+ /*In case of skip or zero cbf CU the previous qp used has to be updated*/
+ if(is_last_blk_in_qg)
+ ps_entropy_ctxt->i1_cur_qp = cur_qp;
+ }
+ }
+ else
+ {
+ cur_qp = (WORD32)ps_enc_cu->ps_enc_tu->s_tu.b7_qp;
+ }
+
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ {
+ WORD32 temp = 0;
+ /*PIC_INFO: Accumalate average qp, min qp and max qp*/
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_qp += cur_qp;
+ if(cu_size == 64)
+ temp = 6;
+ else if(cu_size == 32)
+ temp = 4;
+ else if(cu_size == 16)
+ temp = 2;
+ else if(cu_size == 8)
+ temp = 0;
+
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_qp_min_cu += (cur_qp * (1 << temp));
+ if(cur_qp < ps_entropy_ctxt->ps_pic_level_info->i4_min_qp)
+ ps_entropy_ctxt->ps_pic_level_info->i4_min_qp = cur_qp;
+ if(cur_qp > ps_entropy_ctxt->ps_pic_level_info->i4_max_qp)
+ ps_entropy_ctxt->ps_pic_level_info->i4_max_qp = cur_qp;
+ }
+
+ for(i = 0; i < (WORD32)ps_enc_cu->b4_cu_size; i++)
+ {
+ for(j = 0; j < (WORD32)ps_enc_cu->b4_cu_size; j++)
+ {
+ ps_entropy_ctxt->ai4_8x8_cu_qp[cur_cu_offset + (i * 8) + j] = cur_qp;
+ }
+ }
+ ps_entropy_ctxt->i4_is_cu_cbf_zero = 1;
+ }
+
+ return ret;
+}
+
+/**
+******************************************************************************
+*
+* @brief Entropy encoding of SAO related syntax elements as per sec 7.3.8.3
+*
+* @par Description
+* Encoding of sao related syntax elements at ctb level.
+*
+* @param[inout] ps_entropy_ctxt
+* pointer to entropy context (handle)
+*
+* @param[in] ps_ctb_enc_loop_out
+* pointer to ctb level output structure from enc loop
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_cabac_encode_sao(
+ entropy_context_t *ps_entropy_ctxt, ctb_enc_loop_out_t *ps_ctb_enc_loop_out)
+{
+ WORD32 error = IHEVCE_SUCCESS;
+ sao_enc_t *ps_sao;
+ nbr_avail_flags_t *ps_ctb_nbr_avail_flags;
+ slice_header_t *ps_slice_hdr = ps_entropy_ctxt->ps_slice_hdr;
+ cab_ctxt_t *ps_cabac = &ps_entropy_ctxt->s_cabac_ctxt;
+
+ UWORD8 u1_left_avail, u1_top_avail;
+
+ ps_ctb_nbr_avail_flags = &ps_ctb_enc_loop_out->s_ctb_nbr_avail_flags;
+
+ ps_sao = &ps_ctb_enc_loop_out->s_sao;
+
+ ASSERT(ps_sao->b1_sao_merge_left_flag < 2);
+
+ u1_left_avail = ps_ctb_nbr_avail_flags->u1_left_avail;
+ u1_top_avail = ps_ctb_nbr_avail_flags->u1_top_avail;
+
+ if(u1_left_avail == 1)
+ {
+ /*Encode the sao_merge_left_flag as FL as per table 9-32*/
+ error |=
+ ihevce_cabac_encode_bin(ps_cabac, ps_sao->b1_sao_merge_left_flag, IHEVC_CAB_SAO_MERGE);
+
+ AEV_TRACE("sao_merge_flag", ps_sao->b1_sao_merge_left_flag, ps_cabac->u4_range);
+ }
+
+ if((u1_top_avail == 1) && (!ps_sao->b1_sao_merge_left_flag))
+ {
+ /*Encode the sao_merge_up_flag as FL as per table 9-32*/
+ error |=
+ ihevce_cabac_encode_bin(ps_cabac, ps_sao->b1_sao_merge_up_flag, IHEVC_CAB_SAO_MERGE);
+
+ AEV_TRACE("sao_merge_flag", ps_sao->b1_sao_merge_up_flag, ps_cabac->u4_range);
+ }
+
+ if((!ps_sao->b1_sao_merge_left_flag) && (!ps_sao->b1_sao_merge_up_flag))
+ {
+ WORD32 c_idx;
+ WORD32 sao_type_idx = ps_sao->b3_y_type_idx;
+
+ /*Run a loop for y,cb and cr to encode the type idx for luma and chroma*/
+ for(c_idx = 0; c_idx < 3; c_idx++)
+ {
+ if((ps_slice_hdr->i1_slice_sao_luma_flag && c_idx == 0) ||
+ (ps_slice_hdr->i1_slice_sao_chroma_flag && c_idx > 0))
+ {
+ WORD32 ctxt_bin;
+
+ /**************************************************************************/
+ /* encode the sao_type_idx as per Table 9-33 */
+ /* First bin is context model based prefix : 1 if sao_type_idx > 0 else 0 */
+ /* Second bin is coded as bypass bin if sao_type_ide > 0 */
+ /**************************************************************************/
+
+ if(c_idx < 2)
+ {
+ WORD32 sao_type_idx_temp;
+
+ ASSERT(ps_sao->b3_cb_type_idx == ps_sao->b3_cr_type_idx);
+
+ sao_type_idx = c_idx ? ps_sao->b3_cb_type_idx : ps_sao->b3_y_type_idx;
+
+ ctxt_bin = sao_type_idx ? 1 : 0;
+
+ if(sao_type_idx > 1)
+ {
+ sao_type_idx_temp = 2;
+ }
+ else
+ {
+ sao_type_idx_temp = sao_type_idx;
+ }
+
+ ASSERT(sao_type_idx_temp < 3);
+
+ /*Encode the first bin as context bin as per table 9-37*/
+ error |= ihevce_cabac_encode_bin(ps_cabac, ctxt_bin, IHEVC_CAB_SAO_TYPE);
+
+ if(sao_type_idx_temp)
+ {
+ /*Binarisation for sao_type_idx is TR(truncated rice) process as per
+ * table 9-32 with cMax=2 and cRiceParam=0
+ */
+
+ /* Encode the second bin as bypass bin as per below table*/
+ /*
+ |Symbol | Prefix |Prefix length |Prefix bins|
+ | 0 | 0 | 1 | 0 |
+ | 1 | 1 | 2 | 10 |
+ | 2 | 2 | 2 | 11 |
+
+ Since cRiceParam=0, there is no suffix code
+ */
+
+ error |= ihevce_cabac_encode_bypass_bin(ps_cabac, sao_type_idx_temp - 1);
+ }
+ AEV_TRACE("sao_type_idx", sao_type_idx_temp, ps_cabac->u4_range);
+ }
+
+ if(sao_type_idx != 0)
+ {
+ WORD32 i;
+ UWORD8 u1_bit_depth = ps_entropy_ctxt->ps_sps->i1_bit_depth_luma_minus8 + 8;
+ WORD8 *sao_offset;
+ WORD32 sao_band_position;
+ WORD32 c_max = (1 << (MIN(u1_bit_depth, 10) - 5)) -
+ 1; //( 1 << (MIN(BIT_DEPTH, 10) - 5)) - 1;
+
+ if(c_idx == 0)
+ {
+ //sao_offset[0] = ps_sao->b4_y_offset_1;
+ //sao_offset[1] = ps_sao->b4_y_offset_2;
+ //sao_offset[2] = ps_sao->b4_y_offset_3;
+ //sao_offset[3] = ps_sao->b4_y_offset_4;
+ sao_offset = &ps_sao->u1_y_offset[1];
+ sao_band_position = ps_sao->b5_y_band_pos;
+ }
+ else if(c_idx == 1)
+ {
+ //sao_offset[0] = ps_sao->b4_cb_offset_1;
+ //sao_offset[1] = ps_sao->b4_cb_offset_2;
+ //sao_offset[2] = ps_sao->b4_cb_offset_3;
+ //sao_offset[3] = ps_sao->b4_cb_offset_4;
+ sao_offset = &ps_sao->u1_cb_offset[1];
+ sao_band_position = ps_sao->b5_cb_band_pos;
+ }
+ else
+ {
+ //sao_offset[0] = ps_sao->b4_cr_offset_1;
+ //sao_offset[1] = ps_sao->b4_cr_offset_2;
+ //sao_offset[2] = ps_sao->b4_cr_offset_3;
+ //sao_offset[3] = ps_sao->b4_cr_offset_4;
+ sao_offset = &ps_sao->u1_cr_offset[1];
+ sao_band_position = ps_sao->b5_cr_band_pos;
+ }
+
+ for(i = 0; i < 4; i++)
+ {
+ /*Encode the sao offset value as tunary bypass*/
+ error |=
+ ihevce_cabac_encode_tunary_bypass(ps_cabac, abs(sao_offset[i]), c_max);
+
+ AEV_TRACE("sao_offset_abs", abs(sao_offset[i]), ps_cabac->u4_range);
+ }
+
+ /*Band offset case*/
+ if(sao_type_idx == 1)
+ {
+ for(i = 0; i < 4; i++)
+ {
+ if(sao_offset[i] != 0)
+ {
+ /*Encode the sao offset sign as FL as per table 9-32*/
+ error |= ihevce_cabac_encode_bypass_bin(
+ ps_cabac,
+ (abs(sao_offset[i]) + sao_offset[i] == 0)); //,
+ //IHEVC_CAB_SAO_MERGE
+ //);
+
+ AEV_TRACE(
+ "sao_offset_sign",
+ (abs(sao_offset[i]) + sao_offset[i] == 0),
+ ps_cabac->u4_range);
+ }
+ }
+
+ /*Encode the sao band position as FL as per table 9-32*/
+ error |= ihevce_cabac_encode_bypass_bins(ps_cabac, sao_band_position, 5);
+ AEV_TRACE("sao_band_position", sao_band_position, ps_cabac->u4_range);
+ }
+ else
+ {
+ /*Encode the sao edge offset class for luma and chroma as FL as per table 9-32*/
+ if(c_idx == 0)
+ {
+ error |= ihevce_cabac_encode_bypass_bins(
+ ps_cabac, (ps_sao->b3_y_type_idx - 2), 2);
+ AEV_TRACE(
+ "sao_eo_class", (ps_sao->b3_y_type_idx - 2), ps_cabac->u4_range);
+ }
+
+ if(c_idx == 1)
+ {
+ ASSERT(ps_sao->b3_cb_type_idx == ps_sao->b3_cr_type_idx);
+ error |= ihevce_cabac_encode_bypass_bins(
+ ps_cabac, (ps_sao->b3_cb_type_idx - 2), 2);
+ AEV_TRACE(
+ "sao_eo_class", (ps_sao->b3_cb_type_idx - 2), ps_cabac->u4_range);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return (error);
+}
+
+/**
+******************************************************************************
+*
+* @brief Encodes a coding quad tree (QuadTree syntax) as per section 7.3.8
+*
+* @par Description
+* Entropy encode of coding quad tree based on cu split flags of ctb as per
+* section:7.3.8
+*
+* @param[inout] ps_entropy_ctxt
+* pointer to entropy context (handle)
+*
+* @param[in] x0_frm
+* x co-ordinate of current cu node of coding tree
+*
+* @param[in] y0_frm
+* y co-ordinate of current cu node of coding tree
+*
+* @param[in] log2_cb_size
+* current cu node block size
+*
+* @param[in] ct_depth
+* depth of current cu node w.r.t ctb
+*
+* @param[in] ps_ctb
+* pointer to current ctb structure
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_encode_coding_quadtree(
+ entropy_context_t *ps_entropy_ctxt,
+ WORD32 x0_frm,
+ WORD32 y0_frm,
+ WORD32 log2_cb_size,
+ WORD32 ct_depth,
+ ctb_enc_loop_out_t *ps_ctb,
+ ihevce_tile_params_t *ps_tile_params)
+{
+ WORD32 ret = IHEVCE_SUCCESS;
+ sps_t *ps_sps = ps_entropy_ctxt->ps_sps;
+ pps_t *ps_pps = ps_entropy_ctxt->ps_pps;
+ WORD32 split_cu_flag;
+ WORD32 cu_idx = ps_entropy_ctxt->i4_cu_idx;
+ cu_enc_loop_out_t *ps_enc_cu = ps_ctb->ps_enc_cu + cu_idx;
+
+ /* CU size in pels */
+ WORD32 cu_size = ps_enc_cu->b4_cu_size << 3;
+
+ WORD32 pic_width = ps_tile_params->i4_curr_tile_width;
+ WORD32 pic_height = ps_tile_params->i4_curr_tile_height;
+
+ WORD32 log2_min_cb_size = ps_sps->i1_log2_min_coding_block_size;
+ WORD32 ctb_size = (1 << (log2_cb_size + ct_depth));
+ cab_ctxt_t *ps_cabac = &ps_entropy_ctxt->s_cabac_ctxt;
+
+ /* top row cu depth stored for frm_width (1byte per mincusize=8) */
+ UWORD8 *pu1_cu_depth_top = ps_entropy_ctxt->pu1_cu_depth_top;
+
+ /* left cu depth stored for one ctb column (1byte per mincusize=8) */
+ UWORD8 *pu1_cu_depth_left = &ps_entropy_ctxt->au1_cu_depth_left[0];
+
+ /* calculation of top and left nbr availability */
+ WORD32 top_avail;
+ WORD32 left_avail;
+
+ /* top and left cu within ctb or outside ctb boundary */
+ left_avail = (x0_frm & (ctb_size - 1)) ? 1 : ps_ctb->s_ctb_nbr_avail_flags.u1_left_avail;
+ top_avail = (y0_frm & (ctb_size - 1)) ? 1 : ps_ctb->s_ctb_nbr_avail_flags.u1_top_avail;
+
+ /* Sanity checks */
+ ASSERT(ct_depth <= 3);
+ ASSERT((cu_idx >= 0) && (cu_idx < ps_ctb->u1_num_cus_in_ctb));
+ ASSERT(cu_size >= (1 << log2_min_cb_size));
+ ASSERT(((ps_enc_cu->b3_cu_pos_x << 3) + cu_size) <= (UWORD32)ctb_size);
+ ASSERT(((ps_enc_cu->b3_cu_pos_y << 3) + cu_size) <= (UWORD32)ctb_size);
+
+ /* Encode cu split flags based on following conditions; See section 7.3.8*/
+ if(((x0_frm + (1 << log2_cb_size)) <= pic_width) &&
+ ((y0_frm + (1 << log2_cb_size)) <= pic_height) && (log2_cb_size > log2_min_cb_size) &&
+ (ps_entropy_ctxt->i1_ctb_num_pcm_blks == 0))
+ {
+ /* encode the split cu flag */
+ WORD32 ctxt_inc = IHEVC_CAB_SPLIT_CU_FLAG;
+ UWORD32 u4_bits_estimated_prev;
+ /* Context increment for skip flag as per Table 9-38 */
+ if(top_avail)
+ {
+ ctxt_inc += (pu1_cu_depth_top[x0_frm >> 3] > ct_depth);
+ }
+
+ if(left_avail)
+ {
+ ctxt_inc += (pu1_cu_depth_left[(y0_frm >> 3) & 0x7] > ct_depth);
+ }
+
+ /* split if actual cu size is smaller than target cu size */
+ split_cu_flag = cu_size < (1 << log2_cb_size);
+ u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
+ ret |= ihevce_cabac_encode_bin(ps_cabac, split_cu_flag, ctxt_inc);
+
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ { // clang-format off
+ /*PIC INFO : populate cu split flag*/
+ ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_split_cu_flag +=
+ (ps_cabac->u4_bits_estimated_q12 - u4_bits_estimated_prev);
+ } // clang-format on
+
+ AEV_TRACE("split_cu_flag", split_cu_flag, ps_cabac->u4_range);
+ if(split_cu_flag == 0)
+ {
+ AEV_TRACE("split_cu_flag : X0", (x0_frm >> 6) << 6, ps_cabac->u4_range);
+ AEV_TRACE("split_cu_flag : Y0", (y0_frm >> 6) << 6, ps_cabac->u4_range);
+ }
+ }
+ else
+ {
+ /*********************************************************************/
+ /* split cu is implicitly derived as 1 in frame/slice boundary case */
+ /* else split cu is implicitly derived as 0 if mincu size is reached */
+ /*********************************************************************/
+ if(log2_cb_size > ps_sps->i1_log2_min_coding_block_size)
+ split_cu_flag = 1;
+ else
+ split_cu_flag = 0;
+ }
+
+ /************************************************************************/
+ /* Reset qp delata coded flag appropriately so as to signal qp rightly */
+ /* during transform coding */
+ /************************************************************************/
+ if((ps_pps->i1_cu_qp_delta_enabled_flag) && (ct_depth <= (ps_pps->i1_diff_cu_qp_delta_depth)))
+
+ {
+ ps_entropy_ctxt->i1_encode_qp_delta = 1;
+ }
+ /*else
+ {
+ ps_entropy_ctxt->i1_encode_qp_delta = 0;
+ }*/
+
+ if(split_cu_flag)
+ {
+ /* recurse quad tree till a leaf node is reached */
+ WORD32 x1_frm = x0_frm + ((1 << log2_cb_size) >> 1);
+ WORD32 y1_frm = y0_frm + ((1 << log2_cb_size) >> 1);
+
+ /* node0 of quad tree */
+ ret |= ihevce_encode_coding_quadtree(
+ ps_entropy_ctxt, x0_frm, y0_frm, log2_cb_size - 1, ct_depth + 1, ps_ctb, ps_tile_params);
+
+ if(x1_frm < pic_width)
+ { /* node1 of quad tree */
+ ret |= ihevce_encode_coding_quadtree(
+ ps_entropy_ctxt,
+ x1_frm,
+ y0_frm,
+ log2_cb_size - 1,
+ ct_depth + 1,
+ ps_ctb,
+ ps_tile_params);
+ }
+
+ if(y1_frm < pic_height)
+ {
+ /* node2 of quad tree */
+ ret |= ihevce_encode_coding_quadtree(
+ ps_entropy_ctxt,
+ x0_frm,
+ y1_frm,
+ log2_cb_size - 1,
+ ct_depth + 1,
+ ps_ctb,
+ ps_tile_params);
+ }
+
+ if((x1_frm < pic_width) && (y1_frm < pic_height))
+ {
+ /* node3 of quad tree */
+ ret |= ihevce_encode_coding_quadtree(
+ ps_entropy_ctxt,
+ x1_frm,
+ y1_frm,
+ log2_cb_size - 1,
+ ct_depth + 1,
+ ps_ctb,
+ ps_tile_params);
+ }
+ }
+ else
+ {
+ /* leaf node is reached! Encode the CU */
+ WORD32 i;
+
+ /* sanity checks */
+ ASSERT(ps_entropy_ctxt->i1_ctb_num_pcm_blks == 0);
+
+ if(ps_entropy_ctxt->i1_ctb_num_pcm_blks == 0)
+ {
+ UWORD32 u4_bits_eztimated = ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12;
+ /* Encode a non-PCM CU */
+ /*PCM INFO: populate total TUs*/
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ {
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_tu += ps_enc_cu->u2_num_tus_in_cu;
+ }
+
+ ret |= ihevce_cabac_encode_coding_unit(
+ ps_entropy_ctxt, ps_enc_cu, ct_depth, top_avail, left_avail);
+
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ {
+ // clang-format off
+ if(PRED_MODE_INTRA == ps_enc_cu->b1_pred_mode_flag)
+ {
+ ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_intra +=
+ (ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12 -
+ u4_bits_eztimated);
+ }
+ else
+ {
+ ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_inter +=
+ (ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12 -
+ u4_bits_eztimated);
+ }
+ // clang-format on
+ }
+ }
+ else
+ { //TODO: //PCM not supported in this encoder
+ }
+
+ /* update cu_idx, left and top arrays for cudepth after encoding cu */
+ ps_entropy_ctxt->i4_cu_idx++;
+ for(i = 0; i < (cu_size >> 3); i++)
+ {
+ pu1_cu_depth_top[(x0_frm >> 3) + i] = ct_depth;
+ pu1_cu_depth_left[((y0_frm >> 3) & 0x7) + i] = ct_depth;
+ }
+ }
+
+ return ret;
+}
+
+/**
+******************************************************************************
+*
+* @brief Encodes slice data (General Slice syntax) as per section 7.3.6.1
+*
+* @par Description
+* Entropy encode of all ctbs in a slice as per section 7.3.6.1
+*
+* @param[inout] ps_entropy_ctxt
+* pointer to entropy context (handle)
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_encode_slice_data(
+ entropy_context_t *ps_entropy_ctxt,
+ ihevce_tile_params_t *ps_tile_params,
+ WORD32 *pi4_end_of_slice_flag)
+{
+ WORD32 ret = IHEVCE_SUCCESS;
+ WORD32 end_of_slice_seg_flag = 0;
+ sps_t *ps_sps = ps_entropy_ctxt->ps_sps;
+ pps_t *ps_pps = ps_entropy_ctxt->ps_pps;
+ slice_header_t *ps_slice_hdr = ps_entropy_ctxt->ps_slice_hdr;
+
+ cab_ctxt_t *ps_cabac = &ps_entropy_ctxt->s_cabac_ctxt;
+
+ /* State of previous CTB before it's terminate bin is encoded */
+ cab_ctxt_t s_cabac_prev_ctb;
+
+ /* State after current CTB's encoding is complete but before
+ the termintate bin encoding */
+ cab_ctxt_t s_cabac_after_ctb;
+
+ /* Storing the last 4 bytes before adding terminate bin
+ as these 4 bytes might get corrupted while encoding terminate bin */
+ UWORD32 u4_prev_ctb_temp, u4_cur_ctb_temp;
+ WORD8 i1_last_cu_qp = 0;
+ bitstrm_t *ps_bit_strm = &ps_entropy_ctxt->s_bit_strm;
+
+ WORD32 log2_ctb_size, ctb_size;
+ //WORD32 pic_width = ps_sps->i2_pic_width_in_luma_samples;
+ //WORD32 pic_height = ps_sps->i2_pic_height_in_luma_samples;
+ WORD32 pic_width = ps_tile_params->i4_curr_tile_width;
+ WORD32 pic_height = ps_tile_params->i4_curr_tile_height;
+ WORD32 num_ctb_in_row;
+
+ WORD32 i4_curr_ctb_x, i4_curr_ctb_y;
+ UWORD32 u4_slice_seg_hdr_size = (UWORD32)ps_entropy_ctxt->i4_slice_seg_len;
+ UWORD32 u4_slice_start_offset = ps_bit_strm->u4_strm_buf_offset - u4_slice_seg_hdr_size;
+
+ WORD32 ctb_slice_address = ps_slice_hdr->i2_slice_address;
+ WORD32 slice_qp = ps_slice_hdr->i1_slice_qp_delta + ps_pps->i1_pic_init_qp;
+ WORD32 cabac_init_idc;
+ WORD32 x0_frm, y0_frm;
+ ctb_enc_loop_out_t *ps_first_ctb; // Points to first CTB of ctb-row
+ ctb_enc_loop_out_t *ps_ctb;
+ WORD32 ctb_ctr = 0; //count ctb encoded in a ctb-row
+
+ ihevce_sys_api_t *ps_sys_api = (ihevce_sys_api_t *)ps_entropy_ctxt->pv_sys_api;
+
+ /* Structure to backup pic info in case we need to revert back to pervious
+ CTB when i4_slice_segment_mode is 2 */
+ s_pic_level_acc_info_t s_pic_level_info_backup; // info before
+
+ /* Initialize the CTB size from sps parameters */
+ log2_ctb_size =
+ ps_sps->i1_log2_min_coding_block_size + ps_sps->i1_log2_diff_max_min_coding_block_size;
+
+ ctb_size = (1 << log2_ctb_size);
+
+ /* sanity checks */
+ ASSERT((log2_ctb_size >= 3) && (log2_ctb_size <= 6));
+
+ ps_entropy_ctxt->i1_log2_ctb_size = (WORD8)log2_ctb_size;
+
+ /* Initiallise before starting slice. For single slice case both
+ x and y will be set to zero */
+ ps_entropy_ctxt->i4_ctb_x = ps_entropy_ctxt->i4_next_slice_seg_x;
+ ps_entropy_ctxt->i4_ctb_y = ps_entropy_ctxt->i4_next_slice_seg_y;
+ num_ctb_in_row = (ps_sps->i2_pic_width_in_luma_samples + ctb_size - 1) >> log2_ctb_size;
+
+ /* initialize the cabac init idc based on slice type */
+ if(ps_slice_hdr->i1_slice_type == ISLICE)
+ {
+ cabac_init_idc = 0;
+ }
+ else if(ps_slice_hdr->i1_slice_type == PSLICE)
+ {
+ cabac_init_idc = ps_slice_hdr->i1_cabac_init_flag ? 2 : 1;
+ }
+ else
+ {
+ cabac_init_idc = ps_slice_hdr->i1_cabac_init_flag ? 1 : 2;
+ }
+ ps_cabac->i1_entropy_coding_sync_enabled_flag = ps_pps->i1_entropy_coding_sync_enabled_flag;
+
+ /* Dependent slices should be ON only when slice segment mode is enabled */
+ if(ps_slice_hdr->i1_dependent_slice_flag == 1)
+ {
+ ASSERT(
+ (ps_entropy_ctxt->i4_slice_segment_mode == 1) ||
+ (ps_entropy_ctxt->i4_slice_segment_mode == 2));
+ }
+
+ /* initialize the cabac engine. For dependent slice segments
+ cabac context models will not be reset */
+ if(ps_slice_hdr->i1_dependent_slice_flag == 1)
+ {
+ ret = ihevce_cabac_reset(ps_cabac, ps_bit_strm, CABAC_MODE_ENCODE_BITS);
+ }
+ else
+ {
+ ret = ihevce_cabac_init(
+ ps_cabac,
+ ps_bit_strm,
+ CLIP3(slice_qp, 0, IHEVC_MAX_QP),
+ cabac_init_idc,
+ CABAC_MODE_ENCODE_BITS);
+
+ /* initialize qp to slice start qp */
+ ps_entropy_ctxt->i1_cur_qp = slice_qp;
+ }
+
+ /* initialize slice x and y offset in pels w.r.t top left conrner */
+ x0_frm = ps_entropy_ctxt->i4_ctb_x << log2_ctb_size;
+ y0_frm = ps_entropy_ctxt->i4_ctb_y << log2_ctb_size;
+
+ /* Pointing ctb structure to the correct CTB in frame based on
+ slice address */
+ ps_first_ctb = ps_entropy_ctxt->ps_frm_ctb + ctb_slice_address;
+ ps_ctb = ps_first_ctb - 1;
+
+ //ps_entropy_ctxt->i4_ctb_slice_x = 0;
+ //ps_entropy_ctxt->i4_ctb_slice_y = 0;
+
+ /* Setting to NULL to detect if first CTB of slice itself
+ exceeds the i4_slice_segment_max_length. Will be used only if
+ i4_slice_segment_mode is non-zero */
+ s_cabac_prev_ctb.pu1_strm_buffer = NULL;
+
+ do
+ {
+ UWORD8 au1_cu_depth_top[8] = { 0 }, au1_cu_depth_left[8] = { 0 };
+ UWORD8 u1_skip_cu_top = 0;
+ UWORD32 u4_skip_cu_left = 0;
+
+ /* By default assume that slice-segment is going to end after
+ current CTB */
+ end_of_slice_seg_flag = 1;
+
+ i4_curr_ctb_x = ps_entropy_ctxt->i4_ctb_x;
+ i4_curr_ctb_y = ps_entropy_ctxt->i4_ctb_y;
+
+ if(1 == ps_tile_params->i4_tiles_enabled_flag)
+ {
+ ps_ctb = ps_first_ctb + ctb_ctr;
+ }
+ else
+ {
+ ps_ctb++;
+ }
+
+ /* Store some parameters. Will be used if current CTB's encoding
+ has to be reverted in the event of overflow beyond i4_slice_segment_max_length */
+ if(2 == ps_entropy_ctxt->i4_slice_segment_mode)
+ {
+ /* Store CU depths flag */
+ memcpy(au1_cu_depth_top, &ps_entropy_ctxt->pu1_cu_depth_top[i4_curr_ctb_x * 8], 8);
+ memcpy(au1_cu_depth_left, ps_entropy_ctxt->au1_cu_depth_left, 8);
+
+ /* Store CU skip flags */
+ u1_skip_cu_top = *(ps_entropy_ctxt->pu1_skip_cu_top + i4_curr_ctb_x);
+ u4_skip_cu_left = ps_entropy_ctxt->u4_skip_cu_left;
+
+ /* Backup current state of pic info */
+ s_pic_level_info_backup = *(ps_entropy_ctxt->ps_pic_level_info);
+ }
+
+ /* Section:7.3.7 Coding tree unit syntax */
+ /* coding_tree_unit() inlined here */
+ ps_entropy_ctxt->i1_ctb_num_pcm_blks = 0;
+
+ /* Simple Neigbour avail calculation */
+ ps_ctb->s_ctb_nbr_avail_flags.u1_left_avail = (x0_frm > 0);
+ ps_ctb->s_ctb_nbr_avail_flags.u1_top_avail = (y0_frm > 0);
+
+ ps_entropy_ctxt->i4_cu_idx = 0;
+
+ /* Encode SAO syntax as per section 7.3.8.3 */
+ if(ps_sps->i1_sample_adaptive_offset_enabled_flag)
+ {
+ if((ps_slice_hdr->i1_slice_sao_luma_flag) || (ps_slice_hdr->i1_slice_sao_chroma_flag))
+ {
+ /*PIC INFO: SAO encode biys*/
+ UWORD32 u4_bits_estimated_prev =
+ ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12;
+
+ ret |= ihevce_cabac_encode_sao(ps_entropy_ctxt, ps_ctb);
+
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ {
+ ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_sao +=
+ (ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12 -
+ u4_bits_estimated_prev);
+ }
+ }
+ }
+
+ ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12 = 0;
+
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ {
+ /*PIC_INFO: Update total no.of CUS*/
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_cu += ps_ctb->u1_num_cus_in_ctb;
+ }
+ /* call recursive coding tree structure to encode all cus in ctb */
+ ret |= ihevce_encode_coding_quadtree(
+ ps_entropy_ctxt, x0_frm, y0_frm, log2_ctb_size, 0, ps_ctb, ps_tile_params);
+
+ /* post ctb encode increments */
+ ctb_ctr++;
+ x0_frm += ctb_size;
+ ps_entropy_ctxt->i4_ctb_x++;
+ //ps_entropy_ctxt->i4_ctb_slice_x++;
+
+ if(ps_pps->i1_entropy_coding_sync_enabled_flag && ps_entropy_ctxt->i4_ctb_x == 2)
+ {
+ /*backup cabac context at end of second CTB(top right neighbour for start of bottom row)*/
+ ihevce_cabac_ctxt_backup(ps_cabac);
+ }
+
+ /* end of row check using x0_frm offset */
+ if(x0_frm >= pic_width)
+ {
+ ctb_ctr = 0;
+ ps_first_ctb += num_ctb_in_row;
+ x0_frm = 0;
+ y0_frm += ctb_size;
+
+ ps_entropy_ctxt->i4_ctb_x = 0;
+ ps_entropy_ctxt->i4_ctb_y++;
+ //ps_entropy_ctxt->i4_ctb_slice_y++;
+ }
+
+ /* Detect end of slice. Which would mean end-of-slice-segment too */
+ *pi4_end_of_slice_flag = (y0_frm >= pic_height);
+
+ if(0 == ps_entropy_ctxt->i4_slice_segment_mode)
+ {
+ /* If slice ends then so does slice segment */
+ end_of_slice_seg_flag = *pi4_end_of_slice_flag;
+
+ /* encode terminate bin */
+ ret |= ihevce_cabac_encode_terminate(ps_cabac, end_of_slice_seg_flag, 0);
+ }
+ else if(1 == ps_entropy_ctxt->i4_slice_segment_mode)
+ {
+ ps_entropy_ctxt->i4_slice_seg_len++;
+ if((ps_entropy_ctxt->i4_slice_seg_len) >= ps_entropy_ctxt->i4_slice_segment_max_length)
+ {
+ /* Store the address of CTB from where next slice segment will start */
+ ps_entropy_ctxt->i4_next_slice_seg_x = ps_entropy_ctxt->i4_ctb_x;
+ ps_entropy_ctxt->i4_next_slice_seg_y = ps_entropy_ctxt->i4_ctb_y;
+ }
+ else
+ {
+ /* If slice ends then so does slice segment */
+ end_of_slice_seg_flag = *pi4_end_of_slice_flag;
+ }
+
+ /* encode terminate bin */
+ ret |= ihevce_cabac_encode_terminate(ps_cabac, end_of_slice_seg_flag, 0);
+ }
+ else if(2 == ps_entropy_ctxt->i4_slice_segment_mode)
+ {
+ //WORD32 i4_slice_seg_len_prev = i4_slice_seg_len;
+
+ /* Store some parameters. Will be used to revert back to this state if
+ i4_slice_segment_max_length is not exceeded after encoding end-of-slice */
+ s_cabac_after_ctb = *ps_cabac;
+ u4_cur_ctb_temp =
+ *((UWORD32 *)(ps_cabac->pu1_strm_buffer + ps_cabac->u4_strm_buf_offset - 4));
+
+ /* encode terminate bin. For dependent slices, always simulate
+ end-of-slice to check if i4_slice_segment_max_length is surpassed */
+ ret |= ihevce_cabac_encode_terminate(ps_cabac, 1, 0);
+
+ //i4_slice_seg_len_prev = i4_slice_seg_len;
+ ps_entropy_ctxt->i4_slice_seg_len =
+ (WORD32)(ps_cabac->u4_strm_buf_offset - u4_slice_start_offset);
+
+ //ps_entropy_ctxt->i4_slice_seg_len = i4_slice_seg_len; //No need to update it.
+
+ if(ps_entropy_ctxt->i4_slice_seg_len > ps_entropy_ctxt->i4_slice_segment_max_length)
+ {
+ if(s_cabac_prev_ctb.pu1_strm_buffer == NULL)
+ {
+ /* Bytes in a single CTB has exceeded the i4_slice_segment_max_length
+ set by the user. Close the slice-segment and print a warning */
+
+ /* Store the address of CTB from where next slice segment will start */
+ ps_entropy_ctxt->i4_next_slice_seg_x = ps_entropy_ctxt->i4_ctb_x;
+ ps_entropy_ctxt->i4_next_slice_seg_y = ps_entropy_ctxt->i4_ctb_y;
+
+ ps_sys_api->ihevce_printf(
+ ps_sys_api->pv_cb_handle,
+ "IHEVCE_WARNING: CTB(%2d, %2d) encoded using %d bytes; "
+ "this exceeds max slice segment size %d as requested "
+ "by the user\n",
+ i4_curr_ctb_x,
+ i4_curr_ctb_y,
+ ps_entropy_ctxt->i4_slice_seg_len,
+ ps_entropy_ctxt->i4_slice_segment_max_length);
+ }
+ else /* Revert back to previous CTB's state and close current slice */
+ {
+ *ps_cabac = s_cabac_prev_ctb;
+ *((UWORD32 *)(ps_cabac->pu1_strm_buffer + ps_cabac->u4_strm_buf_offset - 4)) =
+ u4_prev_ctb_temp;
+
+ memcpy(
+ &ps_entropy_ctxt->pu1_cu_depth_top[i4_curr_ctb_x * 8], au1_cu_depth_top, 8);
+ memcpy(ps_entropy_ctxt->au1_cu_depth_left, au1_cu_depth_left, 8);
+
+ *(ps_entropy_ctxt->pu1_skip_cu_top + i4_curr_ctb_x) = u1_skip_cu_top;
+ ps_entropy_ctxt->u4_skip_cu_left = u4_skip_cu_left;
+
+ ps_entropy_ctxt->i1_cur_qp = i1_last_cu_qp;
+
+ /* Restore pic info */
+ *(ps_entropy_ctxt->ps_pic_level_info) = s_pic_level_info_backup;
+
+ /* encode terminate bin with end-of-slice */
+ ret |= ihevce_cabac_encode_terminate(ps_cabac, 1, 0);
+
+ /* Store the address of CTB from where next slice segment will start */
+ ps_entropy_ctxt->i4_next_slice_seg_x = i4_curr_ctb_x;
+ ps_entropy_ctxt->i4_next_slice_seg_y = i4_curr_ctb_y;
+
+ /* As we are reverted back to the previous CTB, force end of slice to zero */
+ *pi4_end_of_slice_flag = 0;
+ }
+ }
+ else if(0 == *pi4_end_of_slice_flag)
+ {
+ /* As this is not the end of slice, therefore revert back
+ the end-of-slice encoding and then add terminate bit */
+
+ /* Signal that this is not slice segment end */
+ end_of_slice_seg_flag = 0;
+
+ *ps_cabac = s_cabac_after_ctb;
+ *((UWORD32 *)(ps_cabac->pu1_strm_buffer + ps_cabac->u4_strm_buf_offset - 4)) =
+ u4_cur_ctb_temp;
+
+ /* encode terminate bin */
+ ret |= ihevce_cabac_encode_terminate(ps_cabac, 0, 0);
+ }
+
+ /* Update variables storing previous CTB's state in order to be
+ able to revert to previous CTB's state */
+ s_cabac_prev_ctb = s_cabac_after_ctb;
+ u4_prev_ctb_temp = u4_cur_ctb_temp;
+
+ i1_last_cu_qp = ps_entropy_ctxt->i1_cur_qp;
+ }
+ else //No other slice segment mode supported
+ {
+ ASSERT(0);
+ }
+
+ AEV_TRACE("end_of_slice_flag", end_of_slice_seg_flag, ps_cabac->u4_range);
+
+ if((0 == ps_entropy_ctxt->i4_ctb_x) && (!end_of_slice_seg_flag) &&
+ (ps_pps->i1_entropy_coding_sync_enabled_flag))
+ {
+ /* initialize qp to slice start qp */
+ ps_entropy_ctxt->i1_cur_qp = slice_qp;
+
+ /* flush and align to byte bounary for entropy sync every row */
+ ret |= ihevce_cabac_encode_terminate(ps_cabac, 1, 1);
+
+ /*This will be entered only during row end, tap bits generated in that row to cal entry point offset*/
+ /*add error check to make sure row count doesnt exceed the size of array allocated*/
+ ASSERT(ps_entropy_ctxt->i4_ctb_y < MAX_NUM_CTB_ROWS_FRM);
+ ps_slice_hdr->pu4_entry_point_offset[ps_entropy_ctxt->i4_ctb_y] =
+ ps_cabac->u4_strm_buf_offset;
+
+ /*init the cabac context with top right neighbour*/
+ ret |= ihevce_cabac_ctxt_row_init(ps_cabac);
+ }
+
+ } while(!end_of_slice_seg_flag);
+
+ if(end_of_slice_seg_flag && ps_pps->i1_entropy_coding_sync_enabled_flag)
+ {
+ ps_slice_hdr->pu4_entry_point_offset[ps_entropy_ctxt->i4_ctb_y] =
+ ps_cabac->u4_strm_buf_offset;
+ }
+
+ return ret;
+}
diff --git a/encoder/ihevce_cabac_rdo.c b/encoder/ihevce_cabac_rdo.c
new file mode 100644
index 0000000..fadf479
--- /dev/null
+++ b/encoder/ihevce_cabac_rdo.c
@@ -0,0 +1,868 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+******************************************************************************
+* @file ihevce_cabac_rdo.c
+*
+* @brief
+* This file contains function definitions for rdopt cabac entropy modules
+*
+* @author
+* ittiam
+*
+* @List of Functions
+* ihevce_entropy_rdo_frame_init()
+* ihevce_entropy_rdo_ctb_init()
+* ihevce_entropy_rdo_encode_cu()
+* ihevce_cabac_rdo_encode_sao()
+* ihevce_update_best_sao_cabac_state()
+* ihevce_entropy_update_best_cu_states()
+* ihevce_entropy_rdo_encode_tu()
+* ihevce_entropy_rdo_encode_tu_rdoq()
+* ihevce_entropy_rdo_copy_states()
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_cabac_rdo.h"
+#include "ihevce_trace.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief Cabac rdopt frame level initialization.
+*
+* @par Description
+* Registers the sps,vps,pps,slice header pointers in rdopt enntropy contexts
+* and intializes cabac engine (init states) for each init cu and scratch cu
+* contexts
+*
+* @param[inout] ps_rdopt_entropy_ctxt
+* pointer to rdopt entropy context (handle)
+*
+* @param[in] ps_slice_hdr
+* pointer to current slice header
+*
+* @param[in] ps_sps
+* pointer to active SPS params
+*
+* @param[in] ps_pps
+* pointer to active PPS params
+*
+* @param[in] ps_vps
+* pointer to active VPS params
+*
+* @param[in] pu1_cu_skip_top_row
+* pointer to top row cu skip flags (registered at frame level)
+*
+* @return none
+*
+******************************************************************************
+*/
+void ihevce_entropy_rdo_frame_init(
+ rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt,
+ slice_header_t *ps_slice_hdr,
+ pps_t *ps_pps,
+ sps_t *ps_sps,
+ vps_t *ps_vps,
+ UWORD8 *pu1_cu_skip_top_row,
+ rc_quant_t *ps_rc_quant_ctxt)
+{
+ WORD32 slice_qp = ps_slice_hdr->i1_slice_qp_delta + ps_pps->i1_pic_init_qp;
+
+ /* Initialize the CTB size from sps parameters */
+ WORD32 log2_ctb_size =
+ ps_sps->i1_log2_min_coding_block_size + ps_sps->i1_log2_diff_max_min_coding_block_size;
+
+ WORD32 cabac_init_idc;
+
+ (void)ps_rc_quant_ctxt;
+ /* sanity checks */
+ ASSERT((log2_ctb_size >= 3) && (log2_ctb_size <= 6));
+ ASSERT((slice_qp >= ps_rc_quant_ctxt->i2_min_qp) && (slice_qp <= ps_rc_quant_ctxt->i2_max_qp));
+
+ /* register the sps,vps,pps, slice header pts in all cu entropy ctxts */
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[0].ps_vps = ps_vps;
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[0].ps_sps = ps_sps;
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[0].ps_pps = ps_pps;
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[0].ps_slice_hdr = ps_slice_hdr;
+
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[1].ps_vps = ps_vps;
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[1].ps_sps = ps_sps;
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[1].ps_pps = ps_pps;
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[1].ps_slice_hdr = ps_slice_hdr;
+
+ /* initialze the skip cu top row ptrs for all rdo entropy contexts */
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[0].pu1_skip_cu_top = pu1_cu_skip_top_row;
+
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[1].pu1_skip_cu_top = pu1_cu_skip_top_row;
+
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[0].i1_log2_ctb_size = log2_ctb_size;
+
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[1].i1_log2_ctb_size = log2_ctb_size;
+
+ /* initialze the skip cu left flagd for all rdo entropy contexts */
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[0].u4_skip_cu_left = 0;
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[1].u4_skip_cu_left = 0;
+
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[0].i1_ctb_num_pcm_blks = 0;
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[1].i1_ctb_num_pcm_blks = 0;
+
+ /* residue encoding should be enaled if ZERO_CBF eval is disabled */
+#if((!RDOPT_ZERO_CBF_ENABLE) && (RDOPT_ENABLE))
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[0].i4_enable_res_encode = 1;
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[1].i4_enable_res_encode = 1;
+#else
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[0].i4_enable_res_encode = 0;
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[1].i4_enable_res_encode = 0;
+#endif
+
+ /*************************************************************************/
+ /* Note pu1_cbf_cb, pu1_cbf_cr initialization are done with array idx 1 */
+ /* This is because these flags are accessed as pu1_cbf_cb[tfr_depth - 1] */
+ /* without cheking for tfr_depth= 0 */
+ /*************************************************************************/
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[0].apu1_cbf_cb[0] =
+ &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[0].au1_cbf_cb[0][1];
+
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[1].apu1_cbf_cb[0] =
+ &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[1].au1_cbf_cb[0][1];
+
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[0].apu1_cbf_cr[0] =
+ &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[0].au1_cbf_cr[0][1];
+
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[1].apu1_cbf_cr[0] =
+ &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[1].au1_cbf_cr[0][1];
+
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[0].apu1_cbf_cb[1] =
+ &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[0].au1_cbf_cb[1][1];
+
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[1].apu1_cbf_cb[1] =
+ &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[1].au1_cbf_cb[1][1];
+
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[0].apu1_cbf_cr[1] =
+ &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[0].au1_cbf_cr[1][1];
+
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[1].apu1_cbf_cr[1] =
+ &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[1].au1_cbf_cr[1][1];
+
+ memset(
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[0].au1_cbf_cb,
+ 0,
+ (MAX_TFR_DEPTH + 1) * 2 * sizeof(UWORD8));
+
+ memset(
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[1].au1_cbf_cb,
+ 0,
+ (MAX_TFR_DEPTH + 1) * 2 * sizeof(UWORD8));
+
+ memset(
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[0].au1_cbf_cr,
+ 0,
+ (MAX_TFR_DEPTH + 1) * 2 * sizeof(UWORD8));
+
+ memset(
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[1].au1_cbf_cr,
+ 0,
+ (MAX_TFR_DEPTH + 1) * 2 * sizeof(UWORD8));
+
+ /* initialize the cabac init idc based on slice type */
+ if(ps_slice_hdr->i1_slice_type == ISLICE)
+ {
+ cabac_init_idc = 0;
+ }
+ else if(ps_slice_hdr->i1_slice_type == PSLICE)
+ {
+ cabac_init_idc = ps_slice_hdr->i1_cabac_init_flag ? 2 : 1;
+ }
+ else
+ {
+ cabac_init_idc = ps_slice_hdr->i1_cabac_init_flag ? 1 : 2;
+ }
+
+ /* all the entropy contexts in rdo initialized in bit compute mode */
+ ihevce_cabac_init(
+ &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[0].s_cabac_ctxt,
+ NULL, /* bitstream buffer not required in bits compute mode */
+ CLIP3(slice_qp, 0, IHEVC_MAX_QP),
+ cabac_init_idc,
+ CABAC_MODE_COMPUTE_BITS);
+
+ ihevce_cabac_init(
+ &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[1].s_cabac_ctxt,
+ NULL, /* bitstream buffer not required in bits compute mode */
+ CLIP3(slice_qp, 0, IHEVC_MAX_QP),
+ cabac_init_idc,
+ CABAC_MODE_COMPUTE_BITS);
+
+ /* initialize the entropy states in rdopt struct */
+ COPY_CABAC_STATES(
+ &ps_rdopt_entropy_ctxt->au1_init_cabac_ctxt_states[0],
+ &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[0].s_cabac_ctxt.au1_ctxt_models[0],
+ sizeof(ps_rdopt_entropy_ctxt->au1_init_cabac_ctxt_states));
+}
+
+/**
+******************************************************************************
+*
+* @brief Cabac rdopt ctb level initialization.
+*
+* @par Description
+* initialzes the ctb x and y co-ordinates for all the rdopt entropy contexts
+*
+* @param[inout] ps_rdopt_entropy_ctxt
+* pointer to rdopt entropy context (handle)
+*
+* @param[in] ctb_x
+* current ctb x offset w.r.t frame start (ctb units)
+*
+* @param[in] ctb_y
+* current ctb y offset w.r.t frame start (ctb units)
+*
+* @return none
+*
+******************************************************************************
+*/
+void ihevce_entropy_rdo_ctb_init(
+ rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt, WORD32 ctb_x, WORD32 ctb_y)
+{
+ /* initialze the ctb x and y co-ordinates for all the rdopt entropy contexts */
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[0].i4_ctb_x = ctb_x;
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[1].i4_ctb_x = ctb_x;
+
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[0].i4_ctb_y = ctb_y;
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[1].i4_ctb_y = ctb_y;
+}
+
+/**
+******************************************************************************
+*
+* @brief Cabac rdopt cu encode function to compute luma bits for a given cu
+* only luma bits are used for rd optimization currently
+*
+* @par Description
+* use a scratch CU entropy context (indicated by rdopt_buf_idx) whose cabac
+* states are reset (to CU init state) and calls the cabac entropy coding
+* unit function to compute the total bits for current CU
+*
+* A local CU structutre is prepared (in stack) as the structures that entropy
+* encode expects and the rdopt gets are different
+*
+* @param[inout] ps_rdopt_entropy_ctxt
+* pointer to rdopt entropy context (handle)
+*
+* @param[in] ps_cu_prms
+* pointer to current CU params whose bits are computed
+*
+* @param[in] cu_pos_x
+* current CU x position w.r.t ctb (in 8x8 units)
+*
+* @param[in] cu_pos_y
+* current CU y position w.r.t ctb (in 8x8 units)
+*
+* @param[in] cu_size
+* current cu size (in pel units)
+*
+* @param[in] top_avail
+* top avaialability flag for current CU (required for encoding skip flag)
+*
+* @param[in] left_avail
+* left avaialability flag for current CU (required for encoding skip flag)
+*
+* @param[in] pv_ecd_coeff
+* Compressed coeff residue buffer (for luma)
+*
+* @param[in] rdopt_buf_idx
+* corresponds to the id of the scratch CU entropy context that needs to be
+* used for bit estimation
+*
+* @param[out] pi4_cu_rdopt_tex_bits
+* returns cbf bits if zer0 cbf eval flag is enabled otherwiese returns total
+* tex(including cbf bits)
+*
+* @return total bits required to encode the current CU
+*
+******************************************************************************
+*/
+WORD32 ihevce_entropy_rdo_encode_cu(
+ rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt,
+ enc_loop_cu_final_prms_t *ps_cu_prms,
+ WORD32 cu_pos_x,
+ WORD32 cu_pos_y,
+ WORD32 cu_size,
+ WORD32 top_avail,
+ WORD32 left_avail,
+ void *pv_ecd_coeff,
+ WORD32 *pi4_cu_rdopt_tex_bits)
+{
+ /* local cu structure for passing to entrop encode cu module */
+ cu_enc_loop_out_t s_enc_cu;
+ WORD32 rdopt_buf_idx = ps_rdopt_entropy_ctxt->i4_curr_buf_idx;
+
+ entropy_context_t *ps_cur_cu_entropy =
+ &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[rdopt_buf_idx];
+
+ WORD32 total_bits = 0;
+
+ WORD32 log2_ctb_size = ps_cur_cu_entropy->i1_log2_ctb_size;
+ WORD32 log2_cu_size;
+
+ WORD32 cu_depth;
+
+ /* sanity checks */
+ ASSERT((rdopt_buf_idx == 0) || (rdopt_buf_idx == 1));
+ ASSERT((cu_size >= 8) && (cu_size <= (1 << log2_ctb_size)));
+ ASSERT((cu_pos_x >= 0) && (cu_pos_x <= (1 << (log2_ctb_size - 3))));
+ ASSERT((cu_pos_y >= 0) && (cu_pos_y <= (1 << (log2_ctb_size - 3))));
+
+ GETRANGE(log2_cu_size, cu_size);
+ log2_cu_size -= 1;
+ cu_depth = log2_ctb_size - log2_cu_size;
+
+ {
+ /**********************************************************/
+ /* prepare local cu structure before calling cabac encode */
+ /**********************************************************/
+
+ /* default be canged to have orred val*/
+ s_enc_cu.b1_no_residual_syntax_flag = 0;
+
+ /* initialize cu posx, posy and size */
+ s_enc_cu.b3_cu_pos_x = cu_pos_x;
+ s_enc_cu.b3_cu_pos_y = cu_pos_y;
+ s_enc_cu.b4_cu_size = (cu_size >> 3);
+
+ /* PCM not supported */
+ s_enc_cu.b1_pcm_flag = 0;
+ s_enc_cu.b1_pred_mode_flag = ps_cu_prms->u1_intra_flag;
+ s_enc_cu.b3_part_mode = ps_cu_prms->u1_part_mode;
+
+ s_enc_cu.b1_skip_flag = ps_cu_prms->u1_skip_flag;
+ s_enc_cu.b1_tq_bypass_flag = 0;
+ s_enc_cu.pv_coeff = pv_ecd_coeff;
+
+ /* store the number of TUs */
+ s_enc_cu.u2_num_tus_in_cu = ps_cu_prms->u2_num_tus_in_cu;
+
+ /* ---- intialize the PUs and TUs start ptrs for cur CU ----- */
+ s_enc_cu.ps_pu = &ps_cu_prms->as_pu_enc_loop[0];
+ s_enc_cu.ps_enc_tu = &ps_cu_prms->as_tu_enc_loop[0];
+
+ /* Corner case : If Part is 2Nx2N and Merge has all TU with zero cbf */
+ /* then it has to be coded as skip CU */
+ if((SIZE_2Nx2N == ps_cu_prms->u1_part_mode) &&
+ /*(1 == ps_cu_prms->u2_num_tus_in_cu) &&*/
+ (1 == ps_cu_prms->as_pu_enc_loop[0].b1_merge_flag) && (0 == ps_cu_prms->u1_skip_flag) &&
+ (0 == ps_cu_prms->u1_is_cu_coded))
+ {
+ s_enc_cu.b1_skip_flag = 1;
+ }
+
+ if(s_enc_cu.b1_pred_mode_flag == PRED_MODE_INTER)
+ {
+ s_enc_cu.b1_no_residual_syntax_flag = !ps_cu_prms->u1_is_cu_coded;
+ }
+ else /* b1_pred_mode_flag == PRED_MODE_INTRA */
+ {
+ /* copy prev_mode_flag, mpm_idx and rem_intra_pred_mode for each PU */
+ memcpy(
+ &s_enc_cu.as_prev_rem[0],
+ &ps_cu_prms->as_intra_prev_rem[0],
+ ps_cu_prms->u2_num_tus_in_cu * sizeof(intra_prev_rem_flags_t));
+
+ s_enc_cu.b3_chroma_intra_pred_mode = ps_cu_prms->u1_chroma_intra_pred_mode;
+ }
+ }
+
+ /* reset the total bits in cabac engine to zero */
+ ps_cur_cu_entropy->s_cabac_ctxt.u4_bits_estimated_q12 = 0;
+ ps_cur_cu_entropy->s_cabac_ctxt.u4_texture_bits_estimated_q12 = 0;
+ ps_cur_cu_entropy->s_cabac_ctxt.u4_cbf_bits_q12 = 0;
+ ps_cur_cu_entropy->i1_encode_qp_delta = 0;
+
+ /* Call the cabac encode function of current cu to compute bits */
+ ihevce_cabac_encode_coding_unit(ps_cur_cu_entropy, &s_enc_cu, cu_depth, top_avail, left_avail);
+
+ /* return total bits after rounding the fractional bits */
+ total_bits =
+ (ps_cur_cu_entropy->s_cabac_ctxt.u4_bits_estimated_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >>
+ CABAC_FRAC_BITS_Q;
+#if RDOPT_ZERO_CBF_ENABLE
+ ASSERT(ps_cur_cu_entropy->s_cabac_ctxt.u4_texture_bits_estimated_q12 == 0);
+#endif
+ /* return total texture bits rounding the fractional bits */
+ *pi4_cu_rdopt_tex_bits =
+ (ps_cur_cu_entropy->s_cabac_ctxt.u4_cbf_bits_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >>
+ CABAC_FRAC_BITS_Q;
+
+ /* ( ps_cur_cu_entropy->s_cabac_ctxt.u4_texture_bits_estimated_q12 +
+ (1 << (CABAC_FRAC_BITS_Q - 1))
+ ) >> CABAC_FRAC_BITS_Q;*/
+
+ return (total_bits);
+}
+
+/**
+******************************************************************************
+*
+* @brief Cabac rdo encode sao function to compute bits required for a given
+* ctb to be encoded with any sao type or no SAO.
+*
+* @par Description
+* use a scratch CU entropy context (indicated by rdopt_buf_idx) and init cabac
+* states are reset (to CU init state) and calls the cabac encode sao
+* function to compute the total bits for current CTB
+*
+* @param[inout] ps_rdopt_entropy_ctxt
+* pointer to rdopt entropy context (handle)
+*
+* @param[in] ps_ctb_enc_loop_out
+* pointer to current enc loop CTB output structure
+*
+* @return total bits required to encode the current CTB
+*
+******************************************************************************
+*/
+WORD32 ihevce_cabac_rdo_encode_sao(
+ rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt, ctb_enc_loop_out_t *ps_ctb_enc_loop_out)
+{
+ /* index to curr buf*/
+ WORD32 rdopt_buf_idx = ps_rdopt_entropy_ctxt->i4_curr_buf_idx;
+ WORD32 total_bits = 0;
+ entropy_context_t *ps_cur_ctb_entropy =
+ &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[rdopt_buf_idx];
+
+ /* copy the intial entropy states from backuped buf to curr buf */
+ memcpy(
+ &ps_cur_ctb_entropy->s_cabac_ctxt.au1_ctxt_models[0],
+ &ps_rdopt_entropy_ctxt->au1_init_cabac_ctxt_states[0],
+ sizeof(ps_rdopt_entropy_ctxt->au1_init_cabac_ctxt_states));
+
+ /* reset the total bits in cabac engine to zero */
+ ps_cur_ctb_entropy->s_cabac_ctxt.u4_bits_estimated_q12 = 0;
+ ps_cur_ctb_entropy->s_cabac_ctxt.u4_texture_bits_estimated_q12 = 0;
+ ps_cur_ctb_entropy->s_cabac_ctxt.u4_cbf_bits_q12 = 0;
+ ps_cur_ctb_entropy->i1_encode_qp_delta = 0;
+ //ps_cur_ctb_entropy->s_cabac_ctxt.u4_range = 0;
+
+ ASSERT(ps_cur_ctb_entropy->s_cabac_ctxt.u4_range == 0);
+ ihevce_cabac_encode_sao(ps_cur_ctb_entropy, ps_ctb_enc_loop_out);
+
+ /* return total bits after rounding the fractional bits */
+ total_bits =
+ (ps_cur_ctb_entropy->s_cabac_ctxt.u4_bits_estimated_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >>
+ CABAC_FRAC_BITS_Q;
+
+ return (total_bits);
+}
+
+/**
+******************************************************************************
+*
+* @brief Updates best sao cabac state.
+*
+* @par Description
+* Copies the cabac states of best cand to init states buf for next ctb.
+*
+* @param[inout] ps_rdopt_entropy_ctxt
+* pointer to rdopt entropy context (handle)
+*
+* @param[in] i4_best_buf_idx
+* Index to the buffer having the cabac states of best candidate
+*
+* @return Success/failure
+*
+******************************************************************************
+*/
+WORD32 ihevce_update_best_sao_cabac_state(
+ rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt, WORD32 i4_best_buf_idx)
+{
+ /* local cu structure for passing to entrop encode cu module */
+ WORD32 rdopt_buf_idx = i4_best_buf_idx;
+ entropy_context_t *ps_cur_ctb_entropy =
+ &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[rdopt_buf_idx];
+
+ /* copy the intial entropy states from best buf to intial states buf */
+ memcpy(
+ &ps_rdopt_entropy_ctxt->au1_init_cabac_ctxt_states[0],
+ &ps_cur_ctb_entropy->s_cabac_ctxt.au1_ctxt_models[0],
+ sizeof(ps_rdopt_entropy_ctxt->au1_init_cabac_ctxt_states));
+
+ /* reset the total bits in cabac engine to zero */
+ ps_cur_ctb_entropy->s_cabac_ctxt.u4_bits_estimated_q12 = 0;
+ ps_cur_ctb_entropy->s_cabac_ctxt.u4_texture_bits_estimated_q12 = 0;
+ ps_cur_ctb_entropy->s_cabac_ctxt.u4_cbf_bits_q12 = 0;
+ ps_cur_ctb_entropy->i1_encode_qp_delta = 0;
+
+ return (1);
+}
+
+/**
+******************************************************************************
+*
+* @brief Cabac rdopt cu encode function to compute luma bits for a given cu
+* only luma bits are used for rd optimization currently
+*
+* @par Description
+* use a scratch CU entropy context (indicated by rdopt_buf_idx) whose cabac
+* states are reset (to CU init state) and calls the cabac entropy coding
+* unit function to compute the total bits for current CU
+*
+* A local CU structutre is prepared (in stack) as the structures that entropy
+* encode expects and the rdopt gets are different
+*
+* @param[inout] ps_rdopt_entropy_ctxt
+* pointer to rdopt entropy context (handle)
+*
+* @param[in] cu_pos_x
+* current CU x position w.r.t ctb (in 8x8 units)
+*
+* @param[in] cu_pos_y
+* current CU y position w.r.t ctb (in 8x8 units)
+*
+* @param[in] cu_size
+* current cu size (in pel units)
+*
+* @param[in] rdopt_best_cu_idx
+* id of the best CU entropy ctxt (rdopt winner candidate)
+*
+* @return total bits required to encode the current CU
+*
+******************************************************************************
+*/
+void ihevce_entropy_update_best_cu_states(
+ rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt,
+ WORD32 cu_pos_x,
+ WORD32 cu_pos_y,
+ WORD32 cu_size,
+ WORD32 cu_skip_flag,
+ WORD32 rdopt_best_cu_idx)
+{
+ entropy_context_t *ps_best_cu_entropy =
+ &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[rdopt_best_cu_idx];
+
+ /* CTB x co-ordinate w.r.t frame start */
+ WORD32 ctb_x0_frm = (ps_best_cu_entropy->i4_ctb_x << ps_best_cu_entropy->i1_log2_ctb_size);
+
+ /* CU x co-ordinate w.r.t frame start */
+ WORD32 cu_x0_frm = cu_pos_x + ctb_x0_frm;
+
+ /* bit postion from where top skip flag is extracted; 1bit per 8 pel */
+ WORD32 x_pos = ((cu_x0_frm >> 3) & 0x7);
+
+ /* bit postion from where left skip flag is extracted; 1bit per 8 pel */
+ WORD32 y_pos = ((cu_pos_y >> 3) & 0x7);
+
+ /* top and left skip flags computed based on nbr availability */
+ UWORD8 *pu1_top_skip_flags = ps_best_cu_entropy->pu1_skip_cu_top + (cu_x0_frm >> 6);
+
+ UWORD32 u4_skip_left_flags = ps_best_cu_entropy->u4_skip_cu_left;
+
+ ps_best_cu_entropy = &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[rdopt_best_cu_idx];
+
+ /* copy the entropy states from best rdopt cu states to init states */
+ COPY_CABAC_STATES(
+ &ps_rdopt_entropy_ctxt->au1_init_cabac_ctxt_states[0],
+ &ps_best_cu_entropy->s_cabac_ctxt.au1_ctxt_models[0],
+ sizeof(ps_rdopt_entropy_ctxt->au1_init_cabac_ctxt_states));
+
+ /* replicate skip flag in left and top row cu skip flags */
+ if(cu_skip_flag)
+ {
+ SET_BITS(pu1_top_skip_flags[0], x_pos, (cu_size >> 3));
+ SET_BITS(u4_skip_left_flags, y_pos, (cu_size >> 3));
+ }
+ else
+ {
+ CLEAR_BITS(pu1_top_skip_flags[0], x_pos, (cu_size >> 3));
+ CLEAR_BITS(u4_skip_left_flags, y_pos, (cu_size >> 3));
+ }
+
+ /* copy the left skip flags in both the rdopt contexts */
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[0].u4_skip_cu_left =
+ ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[1].u4_skip_cu_left = u4_skip_left_flags;
+}
+
+/**
+******************************************************************************
+*
+* @brief Cabac rdopt tu encode function to compute luma bits for a given tu
+* only luma bits are used for rd optimization currently
+*
+* @par Description
+* use a scratch CU entropy context (indicated by rdopt_buf_idx) whose cabac
+* states are reset (to CU init state for first tu) and calls the cabac residue
+* coding function to compute the total bits for current TU
+*
+* Note : TU includes only residual coding bits and does not include
+* tu split, cbf and qp delta encoding bits for a TU
+*
+* @param[inout] ps_rdopt_entropy_ctxt
+* pointer to rdopt entropy context (handle)
+*
+* @param[in] pv_ecd_coeff
+* Compressed coeff residue buffer (for luma)
+*
+* @param[in] transform_size
+* current tu size in pel units
+*
+* @param[in] is_luma
+* indicates if it is luma or chrom TU (required for residue encode)
+*
+* @return total bits required to encode the current TU
+*
+******************************************************************************
+*/
+WORD32 ihevce_entropy_rdo_encode_tu(
+ rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt,
+ void *pv_ecd_coeff,
+ WORD32 transform_size,
+ WORD32 is_luma,
+ WORD32 perform_sbh)
+{
+ WORD32 log2_tfr_size;
+ WORD32 total_bits = 0;
+ WORD32 curr_buf_idx = ps_rdopt_entropy_ctxt->i4_curr_buf_idx;
+ entropy_context_t *ps_cur_tu_entropy;
+
+ ps_cur_tu_entropy = &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[curr_buf_idx];
+
+ ASSERT((transform_size >= 4) && (transform_size <= 32));
+
+ /* transform size to log2transform size */
+ GETRANGE(log2_tfr_size, transform_size);
+ log2_tfr_size -= 1;
+
+ /* reset the total bits in cabac engine to zero */
+ ps_cur_tu_entropy->s_cabac_ctxt.u4_bits_estimated_q12 = 0;
+ ps_cur_tu_entropy->i1_encode_qp_delta = 0;
+
+ /* Call the cabac residue encode function to compute TU bits */
+ ihevce_cabac_residue_encode_rdopt(
+ ps_cur_tu_entropy, pv_ecd_coeff, log2_tfr_size, is_luma, perform_sbh);
+
+ /* return total bits after rounding the fractional bits */
+ total_bits =
+ (ps_cur_tu_entropy->s_cabac_ctxt.u4_bits_estimated_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >>
+ CABAC_FRAC_BITS_Q;
+
+ return (total_bits);
+}
+
+/**
+******************************************************************************
+*
+* @brief Cabac rdopt tu encode function to compute bits for a given tu. Actual
+* RDOQ algorithm is performed by the ihevce_cabac_residue_encode_rdoq function
+* called by this function.
+*
+* @par Description
+* use a scratch CU entropy context (indicated by rdopt_buf_idx) whose cabac
+* states are reset (to CU init state for first tu) and calls the cabac residue
+* coding function to compute the total bits for current TU
+*
+* Note : TU includes only residual coding bits and does not include
+* tu split, cbf and qp delta encoding bits for a TU
+*
+* @param[inout] ps_rdopt_entropy_ctxt
+* pointer to rdopt entropy context (handle)
+*
+* @param[in] pv_ecd_coeff
+* Compressed coeff residue buffer
+*
+* @param[in] transform_size
+* current tu size in pel units
+*
+* @param[in] first_tu_of_cu
+* indicates if the tu is the first unit of cu (required for initializing
+* cabac ctxts)
+*
+* @param[in] rdopt_buf_idx
+* corresponds to the id of the rdopt CU entropy context that needs to be
+* used for bit estimation
+*
+* @param[in] is_luma
+* indicates if it is luma or chrom TU (required for residue encode)
+*
+* @param[in] intra_nxn_mode
+* indicates if it is luma or chrom TU (required for residue encode)
+*
+* @param[inout] ps_rdoq_ctxt
+* pointer to rdoq context structure
+*
+* @param[inout] pi4_coded_tu_dist
+* Pointer to the variable which will contain the transform domain distortion
+* of the entire TU, when any of the coeffs in the TU are coded
+*
+* @param[inout] pi4_not_coded_tu_dist
+* Pointer to the variable which will contain the transform domain distortion
+* of the enture TU, when all the coeffs in the TU are coded
+*
+* @return total bits required to encode the current TU
+*
+******************************************************************************
+*/
+WORD32 ihevce_entropy_rdo_encode_tu_rdoq(
+ rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt,
+ void *pv_ecd_coeff,
+ WORD32 transform_size,
+ WORD32 is_luma,
+ rdoq_sbh_ctxt_t *ps_rdoq_ctxt,
+ LWORD64 *pi8_coded_tu_dist,
+ LWORD64 *pi8_not_coded_tu_dist,
+ WORD32 perform_sbh)
+{
+ WORD32 log2_tfr_size;
+ WORD32 total_bits = 0;
+ WORD32 curr_buf_idx = ps_rdopt_entropy_ctxt->i4_curr_buf_idx;
+ entropy_context_t *ps_cur_tu_entropy;
+
+ ps_cur_tu_entropy = &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[curr_buf_idx];
+
+ ASSERT((transform_size >= 4) && (transform_size <= 32));
+
+ /* transform size to log2transform size */
+ GETRANGE(log2_tfr_size, transform_size);
+ log2_tfr_size -= 1;
+
+ /* reset the total bits in cabac engine to zero */
+ ps_cur_tu_entropy->s_cabac_ctxt.u4_bits_estimated_q12 = 0;
+ ps_cur_tu_entropy->i1_encode_qp_delta = 0;
+
+ /* Call the cabac residue encode function to compute TU bits */
+ ihevce_cabac_residue_encode_rdoq(
+ ps_cur_tu_entropy,
+ pv_ecd_coeff,
+ log2_tfr_size,
+ is_luma,
+ (void *)ps_rdoq_ctxt,
+ pi8_coded_tu_dist,
+ pi8_not_coded_tu_dist,
+ perform_sbh);
+
+ /* return total bits after rounding the fractional bits */
+ total_bits =
+ (ps_cur_tu_entropy->s_cabac_ctxt.u4_bits_estimated_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >>
+ CABAC_FRAC_BITS_Q;
+
+ return (total_bits);
+}
+
+/**
+******************************************************************************
+*
+* @brief Cabac rdopt copy functions for copying states (which will be used later)
+*
+* @par Description
+* Does the HEVC style of entropy sync by copying the state to/from rdo context
+* from/to row level cabac states at start of row/2nd ctb of row
+*
+* Caller needs to make sure UPDATE_ENT_SYNC_RDO_STATE is used for first ctb of
+* every row (leaving first row of slice) and STORE_ENT_SYNC_RDO_STATE is used for
+* storing the cabac states at the end of 2nd ctb of a row.
+*
+* @param[inout] ps_rdopt_entropy_ctxt
+* pointer to rdopt entropy context (handle)
+*
+* @param[in] pu1_entropy_sync_states
+* pointer to entropy sync cabac states
+*
+* @param[in] copy_mode
+* mode of copying cabac states. Shall be either UPDATE_ENT_SYNC_RDO_STATE and
+* STORE_ENT_SYNC_RDO_STATE
+*
+******************************************************************************
+*/
+void ihevce_entropy_rdo_copy_states(
+ rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt, UWORD8 *pu1_entropy_sync_states, WORD32 copy_mode)
+{
+ /* sanity checks */
+ ASSERT((copy_mode == STORE_ENT_SYNC_RDO_STATE) || (copy_mode == UPDATE_ENT_SYNC_RDO_STATE));
+
+ if(STORE_ENT_SYNC_RDO_STATE == copy_mode)
+ {
+ COPY_CABAC_STATES(
+ pu1_entropy_sync_states,
+ &ps_rdopt_entropy_ctxt->au1_init_cabac_ctxt_states[0],
+ IHEVC_CAB_CTXT_END);
+ }
+ else if(UPDATE_ENT_SYNC_RDO_STATE == copy_mode)
+ {
+ COPY_CABAC_STATES(
+ &ps_rdopt_entropy_ctxt->au1_init_cabac_ctxt_states[0],
+ pu1_entropy_sync_states,
+ IHEVC_CAB_CTXT_END);
+ }
+}
diff --git a/encoder/ihevce_cabac_rdo.h b/encoder/ihevce_cabac_rdo.h
new file mode 100644
index 0000000..fb3ba0d
--- /dev/null
+++ b/encoder/ihevce_cabac_rdo.h
@@ -0,0 +1,120 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+*
+* @file ihevce_cabac_rdo.h
+*
+* @brief
+* This file contains function prototypes for rdopt cabac entropy modules
+*
+* @author
+* ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_CABAC_RDO_H_
+#define _IHEVCE_CABAC_RDO_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+typedef enum
+{
+ UPDATE_ENT_SYNC_RDO_STATE = 0,
+ STORE_ENT_SYNC_RDO_STATE = 1,
+} CABAC_RDO_COPY_STATE_T;
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+void ihevce_entropy_rdo_frame_init(
+ rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt,
+ slice_header_t *ps_slice_hdr,
+ pps_t *ps_pps,
+ sps_t *ps_sps,
+ vps_t *ps_vps,
+ UWORD8 *pu1_cu_skip_top_row,
+ rc_quant_t *ps_rc_quant_ctxt);
+
+void ihevce_entropy_rdo_ctb_init(
+ rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt, WORD32 ctb_x, WORD32 ctb_y);
+
+WORD32 ihevce_entropy_rdo_encode_cu(
+ rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt,
+ enc_loop_cu_final_prms_t *ps_cu_prms,
+ WORD32 cu_pos_x,
+ WORD32 cu_pos_y,
+ WORD32 cu_size,
+ WORD32 top_avail,
+ WORD32 left_avail,
+ void *pv_ecd_coeff,
+ WORD32 *pi4_cu_rdopt_tex_bits);
+
+void ihevce_entropy_update_best_cu_states(
+ rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt,
+ WORD32 cu_pos_x,
+ WORD32 cu_pos_y,
+ WORD32 cu_size,
+ WORD32 cu_skip_flag,
+ WORD32 rdopt_best_cu_idx);
+
+WORD32 ihevce_entropy_rdo_encode_tu(
+ rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt,
+ void *pv_ecd_coeff,
+ WORD32 transform_size,
+ WORD32 is_luma,
+ WORD32 perform_sbh);
+
+WORD32 ihevce_cabac_rdo_encode_sao(
+ rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt, ctb_enc_loop_out_t *ps_ctb_enc_loop_out);
+
+WORD32 ihevce_update_best_sao_cabac_state(
+ rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt, WORD32 i4_best_buf_idx);
+
+WORD32 ihevce_entropy_rdo_encode_tu_rdoq(
+ rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt,
+ void *pv_ecd_coeff,
+ WORD32 transform_size,
+ WORD32 is_luma,
+ rdoq_sbh_ctxt_t *ps_rdoq_ctxt,
+ LWORD64 *pi8_coded_tu_dist,
+ LWORD64 *pi8_not_coded_tu_dist,
+ WORD32 perform_sbh);
+
+void ihevce_entropy_rdo_copy_states(
+ rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt, UWORD8 *pu1_entropy_sync_states, WORD32 copy_mode);
+
+#endif /* _IHEVCE_CABAC_RDO_H_ */
diff --git a/encoder/ihevce_cabac_tu.c b/encoder/ihevce_cabac_tu.c
new file mode 100644
index 0000000..f4a1d1e
--- /dev/null
+++ b/encoder/ihevce_cabac_tu.c
@@ -0,0 +1,3496 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+******************************************************************************
+* @file ihevce_cabac_tu.c
+*
+* @brief
+* This file contains function definitions for cabac entropy coding of
+* transform units of HEVC syntax
+*
+* @author
+* ittiam
+*
+* @List of Functions
+* ihevce_cabac_encode_qp_delta()
+* ihevce_cabac_encode_last_coeff_x_y()
+* ihevce_encode_transform_tree()
+* ihevce_cabac_residue_encode()
+* ihevce_cabac_residue_encode_rdopt()
+* ihevce_cabac_residue_encode_rdoq()
+* ihevce_code_all_sig_coeffs_as_0_explicitly()
+* ihevce_find_new_last_csb()
+* ihevce_copy_backup_ctxt()
+* ihevce_estimate_num_bits_till_next_non_zero_coeff()
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+#include "ihevc_trans_macros.h"
+#include "ihevc_trans_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_bs_compute_ctb.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_common_utils.h"
+#include "ihevce_trace.h"
+
+/*****************************************************************************/
+/* Globals */
+/*****************************************************************************/
+extern UWORD16 gau2_ihevce_cabac_bin_to_bits[64 * 2];
+
+/**
+******************************************************************************
+* @brief LUT for deriving of last significant coeff prefix.
+*
+* @input : last_significant_coeff
+*
+* @output : last_significant_prefix (does not include the
+*
+* @remarks Look up tables taken frm HM-8.0-dev
+******************************************************************************
+*/
+const UWORD8 gu1_hevce_last_coeff_prefix[32] = { 0, 1, 2, 3, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7,
+ 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9 };
+
+/**
+*****************************************************************************
+* @brief LUT for deriving of last significant coeff suffix
+*
+* @input : last significant prefix
+*
+* @output : prefix code that needs to be subtracted from last_pos to get
+* suffix as per equation 7-55 in section 7.4.12.
+*
+* It returns the following code for last_significant_prefix > 3
+* ((1 << ((last_significant_coeff_x_prefix >> 1) - 1)) *
+* (2 + (last_significant_coeff_x_prefix & 1))
+*
+*
+* @remarks Look up tables taken frm HM-8.0-dev
+*****************************************************************************
+*/
+const UWORD8 gu1_hevce_last_coeff_prefix_code[10] = { 0, 1, 2, 3, 4, 6, 8, 12, 16, 24 };
+
+/**
+*****************************************************************************
+* @brief returns raster index of 4x4 block for diag up-right/horz/vert scans
+*
+* @input : scan type and scan idx
+*
+* @output : packed y pos(msb 4bit) and x pos(lsb 2bit)
+*
+*****************************************************************************
+*/
+const UWORD8 gu1_hevce_scan4x4[3][16] = {
+ /* diag up right */
+ { 0, 4, 1, 8, 5, 2, 12, 9, 6, 3, 13, 10, 7, 14, 11, 15 },
+
+ /* horz */
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+
+ /* vert */
+ { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }
+};
+
+/**
+*****************************************************************************
+* @brief returns context increment for sig coeff based on csbf neigbour
+* flags (bottom and right) and current coeff postion in 4x4 block
+* See section 9.3.3.1.4 for details on this context increment
+*
+* @input : neigbour csbf flags(bit0:rightcsbf, bit1:bottom csbf)
+* coeff idx in raster order (0-15)
+*
+* @output : context increment for sig coeff flag
+*
+*****************************************************************************
+*/
+const UWORD8 gu1_hevce_sigcoeff_ctxtinc[4][16] = {
+ /* nbr csbf = 0: sigCtx = (xP+yP == 0) ? 2 : (xP+yP < 3) ? 1: 0 */
+ { 2, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 },
+
+ /* nbr csbf = 1: sigCtx = (yP == 0) ? 2 : (yP == 1) ? 1: 0 */
+ { 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 },
+
+ /* nbr csbf = 2: sigCtx = (xP == 0) ? 2 : (xP == 1) ? 1: 0 */
+ { 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0 },
+
+ /* nbr csbf = 3: sigCtx = 2 */
+ { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 }
+};
+
+const UWORD8 gu1_hevce_sigcoeff_ctxtinc_00[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+/**
+*****************************************************************************
+* @brief returns context increment for sig coeff for 4x4 tranform size as
+* per Table 9-39 in section 9.3.3.1.4
+*
+* @input : coeff idx in raster order (0-15)
+*
+* @output : context increment for sig coeff flag
+*
+*****************************************************************************
+*/
+const UWORD8 gu1_hevce_sigcoeff_ctxtinc_tr4[16] = { 0, 1, 4, 5, 2, 3, 4, 5, 6, 6, 8, 8, 7, 7, 8, 0 };
+
+#define DISABLE_ZCSBF 0
+
+#define TEST_CABAC_BITESTIMATE 0
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+/**
+******************************************************************************
+*
+* @brief Entropy encoding of qp_delta in a tu as per sec 9.3.2 Table 9-32
+*
+* @par Description
+* trunacted unary binarization is done based upto abs_delta of 5 and the rest
+* is coded as 0th order Exponential Golomb code
+*
+* @param[inout] ps_cabac
+* pointer to cabac encoding context (handle)
+*
+* @param[in] qp_delta
+* delta qp that needs to be encoded
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_cabac_encode_qp_delta(cab_ctxt_t *ps_cabac, WORD32 qp_delta)
+{
+ WORD32 qp_delta_abs = ABS(qp_delta);
+ WORD32 c_max = TU_MAX_QP_DELTA_ABS;
+ WORD32 ctxt_inc = IHEVC_CAB_QP_DELTA_ABS;
+ WORD32 ctxt_inc_max = CTXT_MAX_QP_DELTA_ABS;
+ WORD32 ret = IHEVCE_SUCCESS;
+
+ /* qp_delta_abs is coded as combination of tunary and eg0 code */
+ /* See Table 9-32 and Table 9-37 for details on cu_qp_delta_abs */
+ ret |= ihevce_cabac_encode_tunary(
+ ps_cabac, MIN(qp_delta_abs, c_max), c_max, ctxt_inc, 0, ctxt_inc_max);
+ if(qp_delta_abs >= c_max)
+ {
+ ret |= ihevce_cabac_encode_egk(ps_cabac, qp_delta_abs - c_max, 0);
+ }
+ AEV_TRACE("cu_qp_delta_abs", qp_delta_abs, ps_cabac->u4_range);
+
+ /* code the qp delta sign flag */
+ if(qp_delta_abs)
+ {
+ WORD32 sign = (qp_delta < 0) ? 1 : 0;
+ ret |= ihevce_cabac_encode_bypass_bin(ps_cabac, sign);
+ AEV_TRACE("cu_qp_delta_sign", sign, ps_cabac->u4_range);
+ }
+
+ return (ret);
+}
+
+/**
+******************************************************************************
+*
+* @brief Encodes position of the last coded coeff (in scan order) of TU
+*
+* @par Description
+* Entropy encode of last coded coeff of a TU as per section:7.3.13
+*
+* @param[inout] ps_cabac
+* pointer to cabac context (handle)
+*
+* @param[in] last_coeff_x
+* x co-ordinate of the last coded coeff of TU(in scan order)
+*
+* @param[in] last_coeff_y
+* x co-ordinate of the last coded coeff of TU (in scan order
+*
+* @param[in] log2_tr_size
+* transform block size corresponding to this node in quad tree
+*
+* @param[in] is_luma
+* indicates if residual block corresponds to luma or chroma block
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_cabac_encode_last_coeff_x_y(
+ cab_ctxt_t *ps_cabac,
+ WORD32 last_coeff_x,
+ WORD32 last_coeff_y,
+ WORD32 log2_tr_size,
+ WORD32 is_luma)
+{
+ WORD32 ret = IHEVCE_SUCCESS;
+
+ WORD32 last_coeff_x_prefix;
+ WORD32 last_coeff_y_prefix;
+ WORD32 suffix, suf_length;
+ WORD32 c_max;
+ WORD32 ctxt_idx_x, ctxt_idx_y, ctx_shift;
+
+ /* derive the prefix code */
+ last_coeff_x_prefix = gu1_hevce_last_coeff_prefix[last_coeff_x];
+ last_coeff_y_prefix = gu1_hevce_last_coeff_prefix[last_coeff_y];
+
+ c_max = gu1_hevce_last_coeff_prefix[(1 << log2_tr_size) - 1];
+
+ /* context increment as per section 9.3.3.1.2 */
+ if(is_luma)
+ {
+ WORD32 ctx_offset = (3 * (log2_tr_size - 2)) + ((log2_tr_size - 1) >> 2);
+
+ ctxt_idx_x = IHEVC_CAB_COEFFX_PREFIX + ctx_offset;
+ ctxt_idx_y = IHEVC_CAB_COEFFY_PREFIX + ctx_offset;
+ ctx_shift = (log2_tr_size + 1) >> 2;
+ }
+ else
+ {
+ ctxt_idx_x = IHEVC_CAB_COEFFX_PREFIX + 15;
+ ctxt_idx_y = IHEVC_CAB_COEFFY_PREFIX + 15;
+ ctx_shift = log2_tr_size - 2;
+ }
+
+ /* code the last_coeff_x_prefix as tunary binarized code */
+ ret |= ihevce_cabac_encode_tunary(
+ ps_cabac, last_coeff_x_prefix, c_max, ctxt_idx_x, ctx_shift, c_max);
+
+ AEV_TRACE("last_coeff_x_prefix", last_coeff_x_prefix, ps_cabac->u4_range);
+
+ /* code the last_coeff_y_prefix as tunary binarized code */
+ ret |= ihevce_cabac_encode_tunary(
+ ps_cabac, last_coeff_y_prefix, c_max, ctxt_idx_y, ctx_shift, c_max);
+
+ AEV_TRACE("last_coeff_y_prefix", last_coeff_y_prefix, ps_cabac->u4_range);
+
+ if(last_coeff_x_prefix > 3)
+ {
+ /* code the last_coeff_x_suffix as FLC bypass code */
+ suffix = last_coeff_x - gu1_hevce_last_coeff_prefix_code[last_coeff_x_prefix];
+
+ suf_length = ((last_coeff_x_prefix - 2) >> 1);
+
+ ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, suffix, suf_length);
+
+ AEV_TRACE("last_coeff_x_suffix", suffix, ps_cabac->u4_range);
+ }
+
+ if(last_coeff_y_prefix > 3)
+ {
+ /* code the last_coeff_y_suffix as FLC bypass code */
+ suffix = last_coeff_y - gu1_hevce_last_coeff_prefix_code[last_coeff_y_prefix];
+
+ suf_length = ((last_coeff_y_prefix - 2) >> 1);
+
+ ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, suffix, suf_length);
+
+ AEV_TRACE("last_coeff_y_suffix", suffix, ps_cabac->u4_range);
+ }
+
+ return (ret);
+}
+
+/**
+******************************************************************************
+*
+* @brief Encodes a transform tree as per section 7.3.11
+*
+* @par Description
+* Uses recursion till a leaf node is reached where a transform unit
+* is coded. While recursing split_transform_flag and parent chroma cbf flags
+* are coded before recursing to leaf node
+*
+* @param[inout] ps_entropy_ctxt
+* pointer to entropy context (handle)
+*
+* @param[in] x0_ctb
+* x co-ordinate w.r.t ctb start of current tu node of coding tree
+*
+* @param[in] y0_ctb
+* y co-ordinate w.r.t ctb start of current cu node of coding tree
+*
+* @param[in] log2_tr_size
+* transform block size corresponding to this node in quad tree
+*
+* @param[in] tr_depth
+* current depth of the tree
+*
+* @param[in] tr_depth
+* current depth of the tree
+*
+* @param[in] blk_num
+* current block number in the quad tree (required for chorma 4x4 coding)
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_encode_transform_tree(
+ entropy_context_t *ps_entropy_ctxt,
+ WORD32 x0_ctb,
+ WORD32 y0_ctb,
+ WORD32 log2_tr_size,
+ WORD32 tr_depth,
+ WORD32 blk_num,
+ cu_enc_loop_out_t *ps_enc_cu)
+{
+ WORD32 ret = IHEVCE_SUCCESS;
+ sps_t *ps_sps = ps_entropy_ctxt->ps_sps;
+ WORD32 split_tr_flag;
+
+ WORD32 tu_idx = ps_entropy_ctxt->i4_tu_idx;
+ tu_enc_loop_out_t *ps_enc_tu = ps_enc_cu->ps_enc_tu + tu_idx;
+
+ /* TU size in pels */
+ WORD32 tu_size = 4 << ps_enc_tu->s_tu.b3_size;
+
+ cab_ctxt_t *ps_cabac = &ps_entropy_ctxt->s_cabac_ctxt;
+
+ WORD32 max_tr_depth;
+ WORD32 is_intra = (ps_enc_cu->b1_pred_mode_flag == PRED_MODE_INTRA);
+ WORD32 log2_min_trafo_size, log2_max_trafo_size;
+ UWORD32 u4_bits_estimated_prev;
+
+ WORD32 intra_nxn_pu = 0;
+ WORD32 ctxt_inc;
+ WORD32 cbf_luma = 0;
+ WORD32 ai4_cbf_cb[2] = { 0, 0 };
+ WORD32 ai4_cbf_cr[2] = { 0, 0 };
+ UWORD32 tu_split_bits = 0;
+ UWORD8 u1_is_422 = (ps_sps->i1_chroma_format_idc == 2);
+
+ tu_split_bits = ps_cabac->u4_bits_estimated_q12;
+ /* intialize min / max transform sizes based on sps */
+ log2_min_trafo_size = ps_sps->i1_log2_min_transform_block_size;
+
+ log2_max_trafo_size = log2_min_trafo_size + ps_sps->i1_log2_diff_max_min_transform_block_size;
+
+ /* intialize max transform depth for intra / inter signalled in sps */
+ if(is_intra)
+ {
+ max_tr_depth = ps_sps->i1_max_transform_hierarchy_depth_intra;
+ intra_nxn_pu = ps_enc_cu->b3_part_mode == PART_NxN;
+ }
+ else
+ {
+ max_tr_depth = ps_sps->i1_max_transform_hierarchy_depth_inter;
+ }
+
+ /* Sanity checks */
+ ASSERT(tr_depth <= 4);
+ ASSERT(log2_min_trafo_size >= 2);
+ ASSERT(log2_max_trafo_size <= 5);
+ ASSERT((tu_idx >= 0) && (tu_idx < ps_enc_cu->u2_num_tus_in_cu));
+ ASSERT((tu_size >= 4) && (tu_size <= (1 << log2_tr_size)));
+
+ /* Encode split transform flag based on following conditions; sec 7.3.11 */
+ if((log2_tr_size <= log2_max_trafo_size) && (log2_tr_size > log2_min_trafo_size) &&
+ (tr_depth < max_tr_depth) && (!(intra_nxn_pu && (tr_depth == 0))))
+ {
+ /* encode the split transform flag, context derived as per Table9-37 */
+ ctxt_inc = IHEVC_CAB_SPLIT_TFM + (5 - log2_tr_size);
+
+ /* split if actual tu size is smaller than target tu size */
+ split_tr_flag = tu_size < (1 << log2_tr_size);
+ u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
+ ret |= ihevce_cabac_encode_bin(ps_cabac, split_tr_flag, ctxt_inc);
+
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ { // clang-format off
+ /*PIC INFO : populate cu split flag*/
+ ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_split_tu_flag +=
+ (ps_cabac->u4_bits_estimated_q12 - u4_bits_estimated_prev);
+ } // clang-format on
+
+ AEV_TRACE("split_transform_flag", split_tr_flag, ps_cabac->u4_range);
+ }
+ else
+ {
+ WORD32 inter_split;
+ /*********************************************************************/
+ /* */
+ /* split tr is implicitly derived as 1 if (see section 7.4.10) */
+ /* a. log2_tr_size > log2_max_trafo_size */
+ /* b. intra cu has NXN pu */
+ /* c. inter cu is not 2Nx2N && max_transform_hierarchy_depth_inter=0*/
+ /* */
+ /* split tu is implicitly derived as 0 otherwise */
+ /*********************************************************************/
+ inter_split = (!is_intra) && (max_tr_depth == 0) && (tr_depth == 0) &&
+ (ps_enc_cu->b3_part_mode != PART_2Nx2N);
+
+ if((log2_tr_size > log2_max_trafo_size) || (intra_nxn_pu && (tr_depth == 0)) ||
+ (inter_split))
+ {
+ split_tr_flag = 1;
+ }
+ else
+ {
+ split_tr_flag = 0;
+ }
+ }
+ /*accumulate only tu tree bits*/
+ ps_cabac->u4_true_tu_split_flag_q12 += ps_cabac->u4_bits_estimated_q12 - tu_split_bits;
+
+ /* Encode the cbf flags for chroma before the split as per sec 7.3.11 */
+ if(log2_tr_size > 2)
+ {
+ /* encode the cbf cb, context derived as per Table 9-37 */
+ ctxt_inc = IHEVC_CAB_CBCR_IDX + tr_depth;
+
+ /* Note chroma cbf is coded for depth=0 or if parent cbf was coded */
+ if((tr_depth == 0) || (ps_entropy_ctxt->apu1_cbf_cb[0][tr_depth - 1]) ||
+ (ps_entropy_ctxt->apu1_cbf_cb[1][tr_depth - 1]))
+ {
+#if CABAC_BIT_EFFICIENT_CHROMA_PARENT_CBF
+ /*************************************************************/
+ /* Bit-Efficient chroma cbf signalling */
+ /* if children nodes have 0 cbf parent cbf can be coded as 0 */
+ /* peeking through all the child nodes for cb to check if */
+ /* parent can be coded as 0 */
+ /*************************************************************/
+ WORD32 tu_cnt = 0;
+ while(1)
+ {
+ WORD32 trans_size = 1 << (ps_enc_tu[tu_cnt].s_tu.b3_size + 2);
+ WORD32 tu_x = (ps_enc_tu[tu_cnt].s_tu.b4_pos_x << 2);
+ WORD32 tu_y = (ps_enc_tu[tu_cnt].s_tu.b4_pos_y << 2);
+
+ ASSERT(tu_cnt < ps_enc_cu->u2_num_tus_in_cu);
+
+ if((ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf) || (ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf_subtu1))
+ {
+ ai4_cbf_cb[0] = ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf;
+ ai4_cbf_cb[1] = ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf_subtu1;
+ break;
+ }
+
+ /* 8x8 parent has only one 4x4 valid chroma block for 420 */
+ if(3 == log2_tr_size)
+ break;
+
+ if((tu_x + trans_size == (x0_ctb + (1 << log2_tr_size))) &&
+ (tu_y + trans_size == (y0_ctb + (1 << log2_tr_size))))
+ {
+ ai4_cbf_cb[0] = ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf;
+ ai4_cbf_cb[1] = ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf_subtu1;
+ ASSERT(
+ (0 == ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf) &&
+ (0 == ps_enc_tu[tu_cnt].s_tu.b1_cb_cbf_subtu1));
+ break;
+ }
+
+ tu_cnt++;
+ }
+#else
+ /* read cbf only when split is 0 (child node) else force cbf=1 */
+ ai4_cbf_cb[0] = (split_tr_flag && (log2_tr_size > 3)) ? 1 : ps_enc_tu->s_tu.b1_cb_cbf;
+ ai4_cbf_cb[1] =
+ (split_tr_flag && (log2_tr_size > 3)) ? 1 : ps_enc_tu->s_tu.b1_cb_cbf_subtu1;
+
+#endif
+ if((u1_is_422) && ((!split_tr_flag) || (3 == log2_tr_size)))
+ {
+ u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
+ ret |= ihevce_cabac_encode_bin(ps_cabac, ai4_cbf_cb[0], ctxt_inc);
+
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ { // clang-format off
+ /*PIC INFO : Populate CBF cr bits*/
+ ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_chroma_bits +=
+ (ps_cabac->u4_bits_estimated_q12 -
+ u4_bits_estimated_prev);
+ } // clang-format on
+
+ AEV_TRACE("cbf_cb", ai4_cbf_cb[0], ps_cabac->u4_range);
+
+ u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
+ ret |= ihevce_cabac_encode_bin(ps_cabac, ai4_cbf_cb[1], ctxt_inc);
+
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ { // clang-format off
+ /*PIC INFO : Populate CBF cr bits*/
+ ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_chroma_bits +=
+ (ps_cabac->u4_bits_estimated_q12 -
+ u4_bits_estimated_prev);
+ } // clang-format on
+
+ AEV_TRACE("cbf_cb", ai4_cbf_cb[1], ps_cabac->u4_range);
+ }
+ else
+ {
+ u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
+ ret |= ihevce_cabac_encode_bin(ps_cabac, ai4_cbf_cb[0] || ai4_cbf_cb[1], ctxt_inc);
+
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ { // clang-format off
+ /*PIC INFO : Populate CBF cr bits*/
+ ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_chroma_bits +=
+ (ps_cabac->u4_bits_estimated_q12 -
+ u4_bits_estimated_prev);
+ } // clang-format on
+
+ AEV_TRACE("cbf_cb", ai4_cbf_cb[0] || ai4_cbf_cb[1], ps_cabac->u4_range);
+ }
+ }
+ else
+ {
+ ai4_cbf_cb[0] = ps_entropy_ctxt->apu1_cbf_cb[0][tr_depth - 1];
+ ai4_cbf_cb[1] = ps_entropy_ctxt->apu1_cbf_cb[1][tr_depth - 1];
+ }
+
+ if((tr_depth == 0) || (ps_entropy_ctxt->apu1_cbf_cr[0][tr_depth - 1]) ||
+ (ps_entropy_ctxt->apu1_cbf_cr[1][tr_depth - 1]))
+ {
+#if CABAC_BIT_EFFICIENT_CHROMA_PARENT_CBF
+ /*************************************************************/
+ /* Bit-Efficient chroma cbf signalling */
+ /* if children nodes have 0 cbf parent cbf can be coded as 0 */
+ /* peeking through all the child nodes for cr to check if */
+ /* parent can be coded as 0 */
+ /*************************************************************/
+ WORD32 tu_cnt = 0;
+ while(1)
+ {
+ WORD32 trans_size = 1 << (ps_enc_tu[tu_cnt].s_tu.b3_size + 2);
+ WORD32 tu_x = (ps_enc_tu[tu_cnt].s_tu.b4_pos_x << 2);
+ WORD32 tu_y = (ps_enc_tu[tu_cnt].s_tu.b4_pos_y << 2);
+
+ ASSERT(tu_cnt < ps_enc_cu->u2_num_tus_in_cu);
+
+ if((ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf) || (ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf_subtu1))
+ {
+ ai4_cbf_cr[0] = ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf;
+ ai4_cbf_cr[1] = ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf_subtu1;
+ break;
+ }
+
+ /* 8x8 parent has only one 4x4 valid chroma block for 420 */
+ if(3 == log2_tr_size)
+ break;
+
+ if((tu_x + trans_size == (x0_ctb + (1 << log2_tr_size))) &&
+ (tu_y + trans_size == (y0_ctb + (1 << log2_tr_size))))
+ {
+ ai4_cbf_cr[0] = ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf;
+ ai4_cbf_cr[1] = ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf_subtu1;
+ ASSERT(
+ (0 == ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf) &&
+ (0 == ps_enc_tu[tu_cnt].s_tu.b1_cr_cbf_subtu1));
+ break;
+ }
+
+ tu_cnt++;
+ }
+#else
+ /* read cbf only when split is 0 (child node) else force cbf=1 */
+ ai4_cbf_cr[0] = (split_tr_flag && (log2_tr_size > 3)) ? 1 : ps_enc_tu->s_tu.b1_cr_cbf;
+ ai4_cbf_cr[1] =
+ (split_tr_flag && (log2_tr_size > 3)) ? 1 : ps_enc_tu->s_tu.b1_cr_cbf_subtu1;
+#endif
+
+ if((u1_is_422) && ((!split_tr_flag) || (3 == log2_tr_size)))
+ {
+ u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
+ ret |= ihevce_cabac_encode_bin(ps_cabac, ai4_cbf_cr[0], ctxt_inc);
+
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ { // clang-format off
+ /*PIC INFO : Populate CBF cr bits*/
+ ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_chroma_bits +=
+ (ps_cabac->u4_bits_estimated_q12 -
+ u4_bits_estimated_prev);
+ } // clang-format on
+
+ AEV_TRACE("cbf_cr", ai4_cbf_cr[0], ps_cabac->u4_range);
+
+ u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
+ ret |= ihevce_cabac_encode_bin(ps_cabac, ai4_cbf_cr[1], ctxt_inc);
+
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ { // clang-format off
+ /*PIC INFO : Populate CBF cr bits*/
+ ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_chroma_bits +=
+ (ps_cabac->u4_bits_estimated_q12 -
+ u4_bits_estimated_prev);
+ } // clang-format on
+
+ AEV_TRACE("cbf_cr", ai4_cbf_cr[1], ps_cabac->u4_range);
+ }
+ else
+ {
+ u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
+ ret |= ihevce_cabac_encode_bin(ps_cabac, ai4_cbf_cr[0] || ai4_cbf_cr[1], ctxt_inc);
+
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ { // clang-format off
+ /*PIC INFO : Populate CBF cr bits*/
+ ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_chroma_bits +=
+ (ps_cabac->u4_bits_estimated_q12 -
+ u4_bits_estimated_prev);
+ } // clang-format on
+
+ AEV_TRACE("cbf_cr", ai4_cbf_cr[0] || ai4_cbf_cr[1], ps_cabac->u4_range);
+ }
+ }
+ else
+ {
+ ai4_cbf_cr[0] = ps_entropy_ctxt->apu1_cbf_cr[0][tr_depth - 1];
+ ai4_cbf_cr[1] = ps_entropy_ctxt->apu1_cbf_cr[1][tr_depth - 1];
+ }
+
+ ps_entropy_ctxt->apu1_cbf_cb[0][tr_depth] = ai4_cbf_cb[0];
+ ps_entropy_ctxt->apu1_cbf_cr[0][tr_depth] = ai4_cbf_cr[0];
+ ps_entropy_ctxt->apu1_cbf_cb[1][tr_depth] = ai4_cbf_cb[1];
+ ps_entropy_ctxt->apu1_cbf_cr[1][tr_depth] = ai4_cbf_cr[1];
+ }
+ else
+ {
+ ai4_cbf_cb[0] = ps_entropy_ctxt->apu1_cbf_cb[0][tr_depth - 1];
+ ai4_cbf_cr[0] = ps_entropy_ctxt->apu1_cbf_cr[0][tr_depth - 1];
+ ai4_cbf_cb[1] = ps_entropy_ctxt->apu1_cbf_cb[1][tr_depth - 1];
+ ai4_cbf_cr[1] = ps_entropy_ctxt->apu1_cbf_cr[1][tr_depth - 1];
+ }
+
+ if(split_tr_flag)
+ {
+ /* recurse into quad child nodes till a leaf node is reached */
+ WORD32 x1_ctb = x0_ctb + ((1 << log2_tr_size) >> 1);
+ WORD32 y1_ctb = y0_ctb + ((1 << log2_tr_size) >> 1);
+
+ /* node0 of quad tree */
+ ret |= ihevce_encode_transform_tree(
+ ps_entropy_ctxt,
+ x0_ctb,
+ y0_ctb,
+ log2_tr_size - 1,
+ tr_depth + 1,
+ 0, /* block 0 */
+ ps_enc_cu);
+
+ /* node1 of quad tree */
+ ret |= ihevce_encode_transform_tree(
+ ps_entropy_ctxt,
+ x1_ctb,
+ y0_ctb,
+ log2_tr_size - 1,
+ tr_depth + 1,
+ 1, /* block 1 */
+ ps_enc_cu);
+
+ /* node2 of quad tree */
+ ret |= ihevce_encode_transform_tree(
+ ps_entropy_ctxt,
+ x0_ctb,
+ y1_ctb,
+ log2_tr_size - 1,
+ tr_depth + 1,
+ 2, /* block 2 */
+ ps_enc_cu);
+
+ /* node3 of quad tree */
+ ret |= ihevce_encode_transform_tree(
+ ps_entropy_ctxt,
+ x1_ctb,
+ y1_ctb,
+ log2_tr_size - 1,
+ tr_depth + 1,
+ 3, /* block 3 */
+ ps_enc_cu);
+ }
+ else
+ {
+ /* leaf node is reached! Encode the TU */
+ WORD32 encode_delta_qp;
+ void *pv_coeff;
+ void *pv_cu_coeff = ps_enc_cu->pv_coeff;
+
+ /* condition to encode qp of cu in first coded tu */
+ encode_delta_qp = ps_entropy_ctxt->i1_encode_qp_delta &&
+ (ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS);
+
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ { // clang-format off
+ /*PIC INFO : Tota TUs based on size*/
+ if(32 == tu_size)
+ {
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_tu_based_on_size[3]++;
+ }
+ else
+ {
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_tu_based_on_size[tu_size >> 3]++;
+ }
+ } // clang-format on
+
+ /* sanity checks */
+ ASSERT(ps_entropy_ctxt->i1_ctb_num_pcm_blks == 0);
+ ASSERT((ps_enc_tu->s_tu.b4_pos_x << 2) == x0_ctb);
+ ASSERT((ps_enc_tu->s_tu.b4_pos_y << 2) == y0_ctb);
+ ASSERT(tu_size == (1 << log2_tr_size));
+
+ /********************************************************************/
+ /* encode luma cbf if any of following conditions are true */
+ /* intra cu | transform depth > 0 | any of chroma cbfs are coded */
+ /* */
+ /* Note that these conditions mean that cbf_luma need not be */
+ /* signalled and implicitly derived as 1 for inter cu whose tfr size*/
+ /* is same as cu size and cbf for cb+cr are zero as no_residue_flag */
+ /* at cu level = 1 indicated cbf luma is coded */
+ /********************************************************************/
+ if(is_intra || (tr_depth != 0) || ai4_cbf_cb[0] || ai4_cbf_cr[0] ||
+ ((u1_is_422) && (ai4_cbf_cb[1] || ai4_cbf_cr[1])))
+ {
+ /* encode cbf luma, context derived as per Table 9-37 */
+ cbf_luma = ps_enc_tu->s_tu.b1_y_cbf;
+
+ ctxt_inc = IHEVC_CAB_CBF_LUMA_IDX;
+ ctxt_inc += (tr_depth == 0) ? 1 : 0;
+
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ {
+ if(1 == cbf_luma)
+ {
+ // clang-format off
+ /*PIC INFO: Populated coded Intra/Inter TUs in CU*/
+ if(1 == is_intra)
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_intra_coded_tu++;
+ else
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_inter_coded_tu++;
+ // clang-format on
+ }
+ else
+ { /*PIC INFO: Populated coded non-coded TUs in CU*/
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_non_coded_tu++;
+ }
+ }
+ u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
+ ret |= ihevce_cabac_encode_bin(ps_cabac, cbf_luma, ctxt_inc);
+
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ { // clang-format off
+ /*PIC INFO : Populate CBF luma bits*/
+ ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_cbf_luma_bits +=
+ (ps_cabac->u4_bits_estimated_q12 - u4_bits_estimated_prev);
+ } // clang-format on
+ AEV_TRACE("cbf_luma", cbf_luma, ps_cabac->u4_range);
+ }
+ else
+ {
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ {
+ /*PIC INFO: Populated coded Inter TUs in CU*/
+ ps_entropy_ctxt->ps_pic_level_info->i8_total_inter_coded_tu++;
+ }
+
+ /* shall be 1 as no_residue_flag was encoded as 1 in inter cu */
+ ASSERT(1 == ps_enc_tu->s_tu.b1_y_cbf);
+ cbf_luma = ps_enc_tu->s_tu.b1_y_cbf;
+ }
+
+ /*******************************************************************/
+ /* code qp delta conditionally if following conditions are true */
+ /* any cbf coded (luma/cb/cr) and qp_delta_coded is 0 for this cu */
+ /* see section 7.3.12 Transform unit Syntax */
+ /*******************************************************************/
+ {
+ WORD32 cbf_chroma = (ai4_cbf_cb[0] || ai4_cbf_cr[0]) ||
+ (u1_is_422 && (ai4_cbf_cb[1] || ai4_cbf_cr[1]));
+
+ if((cbf_luma || cbf_chroma) && encode_delta_qp)
+ {
+ WORD32 tu_qp = ps_enc_tu->s_tu.b7_qp;
+ WORD32 qp_pred, qp_left, qp_top;
+ WORD32 qp_delta = tu_qp - ps_entropy_ctxt->i1_cur_qp;
+ WORD32 x_nbr_indx, y_nbr_indx;
+
+ /* Added code for handling the QP neighbour population depending
+ on the diff_cu_qp_delta_depth: Lokesh */
+ /* minus 2 becoz the pos_x and pos_y are given in the order of
+ * 8x8 blocks rather than pixels */
+ WORD32 log2_min_cu_qp_delta_size =
+ ps_entropy_ctxt->i1_log2_ctb_size -
+ ps_entropy_ctxt->ps_pps->i1_diff_cu_qp_delta_depth;
+ //WORD32 min_cu_qp_delta_size = 1 << log2_min_cu_qp_delta_size;
+
+ //WORD32 curr_pos_x = ps_enc_cu->b3_cu_pos_x << 3;
+ //WORD32 curr_pos_y = ps_enc_cu->b3_cu_pos_y << 3;
+
+ WORD32 block_addr_align = 15 << (log2_min_cu_qp_delta_size - 3);
+
+ ps_entropy_ctxt->i4_qg_pos_x = ps_enc_cu->b3_cu_pos_x & block_addr_align;
+ ps_entropy_ctxt->i4_qg_pos_y = ps_enc_cu->b3_cu_pos_y & block_addr_align;
+
+ x_nbr_indx = ps_entropy_ctxt->i4_qg_pos_x - 1;
+ y_nbr_indx = ps_entropy_ctxt->i4_qg_pos_y - 1;
+
+ if(ps_entropy_ctxt->i4_qg_pos_x > 0)
+ {
+ // clang-format off
+ qp_left =
+ ps_entropy_ctxt->ai4_8x8_cu_qp[x_nbr_indx +
+ (ps_entropy_ctxt->i4_qg_pos_y * 8)];
+ // clang-format on
+ }
+ if(ps_entropy_ctxt->i4_qg_pos_y > 0)
+ {
+ // clang-format off
+ qp_top = ps_entropy_ctxt->ai4_8x8_cu_qp[ps_entropy_ctxt->i4_qg_pos_x +
+ y_nbr_indx * 8];
+ // clang-format on
+ }
+ if(ps_entropy_ctxt->i4_qg_pos_x == 0)
+ {
+ /*previous coded Qp*/
+ qp_left = ps_entropy_ctxt->i1_cur_qp;
+ }
+ if(ps_entropy_ctxt->i4_qg_pos_y == 0)
+ {
+ /*previous coded Qp*/
+ qp_top = ps_entropy_ctxt->i1_cur_qp;
+ }
+
+ qp_pred = (qp_left + qp_top + 1) >> 1;
+ // clang-format off
+ /* start of every frame encode qp delta wrt slice qp when entrop
+ * sync is enabled */
+ if(ps_entropy_ctxt->i4_ctb_x == 0 &&
+ ps_entropy_ctxt->i4_qg_pos_x == 0 &&
+ ps_entropy_ctxt->i4_qg_pos_y == 0 &&
+ ps_entropy_ctxt->s_cabac_ctxt.i1_entropy_coding_sync_enabled_flag)
+ // clang-format on
+ {
+ qp_pred = ps_entropy_ctxt->ps_slice_hdr->i1_slice_qp_delta +
+ ps_entropy_ctxt->ps_pps->i1_pic_init_qp;
+ }
+ qp_delta = tu_qp - qp_pred;
+
+ /*PIC INFO : Populate QP delta bits*/
+ u4_bits_estimated_prev = ps_cabac->u4_bits_estimated_q12;
+
+ /* code the qp delta */
+ ret |= ihevce_cabac_encode_qp_delta(ps_cabac, qp_delta);
+
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ {
+ // clang-format off
+ ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_qp_delta_bits +=
+ (ps_cabac->u4_bits_estimated_q12 -
+ u4_bits_estimated_prev);
+ // clang-format on
+ }
+
+ ps_entropy_ctxt->i1_cur_qp = tu_qp;
+ //ps_entropy_ctxt->i1_cur_qp = Qp_pred;
+ ps_entropy_ctxt->i1_encode_qp_delta = 0;
+ //ps_entropy_ctxt->i4_is_cu_cbf_zero = 0;
+ }
+
+ if(cbf_luma || cbf_chroma)
+ {
+ ps_entropy_ctxt->i4_is_cu_cbf_zero = 0;
+ }
+
+ /* code the residue of for luma and chroma tu based on cbf */
+ if((cbf_luma) && (1 == ps_entropy_ctxt->i4_enable_res_encode))
+ {
+ u4_bits_estimated_prev = ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12;
+ /* code the luma residue */
+ pv_coeff = (void *)((UWORD8 *)pv_cu_coeff + ps_enc_tu->i4_luma_coeff_offset);
+
+ ret |= ihevce_cabac_residue_encode(ps_entropy_ctxt, pv_coeff, log2_tr_size, 1);
+
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ { // clang-format off
+ /*PIC INFO : Populate Residue Luma Bits*/
+ ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_res_luma_bits +=
+ (ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12 -
+ u4_bits_estimated_prev);
+ } // clang-format on
+ }
+
+ /* code chroma residue based on tranform size */
+ /* For Inta 4x4 pu chroma is coded after all 4 luma blks coded */
+ /* Note: chroma not encoded in rdopt mode */
+ if(((log2_tr_size > 2) || (3 == blk_num)) /* &&
+ (CABAC_MODE_ENCODE_BITS == ps_cabac->e_cabac_op_mode) */
+ )
+ {
+ WORD32 log2_chroma_tr_size;
+ WORD32 i4_subtu_idx;
+ void *pv_coeff_cb, *pv_coeff_cr;
+
+ WORD32 i4_num_subtus = u1_is_422 + 1;
+
+ if(1 == ps_entropy_ctxt->i4_enable_res_encode)
+ {
+ for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_subtus; i4_subtu_idx++)
+ {
+ if(ai4_cbf_cb[i4_subtu_idx])
+ {
+ /* initailize chroma transform size and coeff based
+ * on luma size */
+ if(2 == log2_tr_size)
+ {
+ /*********************************************************/
+ /* For Intra 4x4, chroma transform size is 4 and chroma */
+ /* coeff offset is present in the first Luma block */
+ /*********************************************************/
+ log2_chroma_tr_size = 2;
+
+ /* -3 is for going to first luma tu of the 4 TUs in min CU */
+ pv_coeff_cb =
+ (void
+ *)((UWORD8 *)pv_cu_coeff + ps_enc_tu[-3].ai4_cb_coeff_offset[i4_subtu_idx]);
+ }
+ else
+ {
+ log2_chroma_tr_size = (log2_tr_size - 1);
+
+ pv_coeff_cb =
+ (void
+ *)((UWORD8 *)pv_cu_coeff + ps_enc_tu->ai4_cb_coeff_offset[i4_subtu_idx]);
+ }
+ // clang-format off
+ u4_bits_estimated_prev =
+ ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12;
+ // clang-format on
+ /* code the cb residue */
+ ret |= ihevce_cabac_residue_encode(
+ ps_entropy_ctxt, pv_coeff_cb, log2_chroma_tr_size, 0);
+
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ { // clang-format off
+ /*PIC INFO : Populate Residue Chroma cr Bits*/
+ ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_res_chroma_bits +=
+ (ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12 -
+ u4_bits_estimated_prev);
+ } // clang-format on
+ }
+ }
+ }
+
+ if(1 == ps_entropy_ctxt->i4_enable_res_encode)
+ {
+ for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_subtus; i4_subtu_idx++)
+ {
+ if(ai4_cbf_cr[i4_subtu_idx])
+ {
+ /* initailize chroma transform size and coeff based on luma size */
+ if(2 == log2_tr_size)
+ {
+ /*********************************************************/
+ /* For Intra 4x4, chroma transform size is 4 and chroma */
+ /* coeff offset is present in the first Luma block */
+ /*********************************************************/
+ log2_chroma_tr_size = 2;
+
+ pv_coeff_cr =
+ (void
+ *)((UWORD8 *)pv_cu_coeff + ps_enc_tu[-3].ai4_cr_coeff_offset[i4_subtu_idx]);
+ }
+ else
+ {
+ log2_chroma_tr_size = (log2_tr_size - 1);
+
+ pv_coeff_cr =
+ (void
+ *)((UWORD8 *)pv_cu_coeff + ps_enc_tu->ai4_cr_coeff_offset[i4_subtu_idx]);
+ }
+ // clang-format off
+ u4_bits_estimated_prev =
+ ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12;
+ // clang-format on
+ /* code the cb residue */
+ ret |= ihevce_cabac_residue_encode(
+ ps_entropy_ctxt, pv_coeff_cr, log2_chroma_tr_size, 0);
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_ENCODE_BITS)
+ { // clang-format off
+ /*PIC INFO : Populate Residue Chroma cr Bits*/
+ ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_res_chroma_bits +=
+ (ps_entropy_ctxt->s_cabac_ctxt.u4_bits_estimated_q12 -
+ u4_bits_estimated_prev);
+ } // clang-format on
+ }
+ }
+ }
+ }
+ }
+
+ /* update tu_idx after encoding current tu */
+ ps_entropy_ctxt->i4_tu_idx++;
+ }
+
+ return ret;
+}
+
+/**
+******************************************************************************
+*
+* @brief Encodes a transform residual block as per section 7.3.13
+*
+* @par Description
+* The residual block is read from a compressed coeff buffer populated during
+* the scanning of the quantized coeffs. The contents of the buffer are
+* breifly explained in param description of pv_coeff
+*
+* @remarks Does not support sign data hiding and transform skip flag currently
+*
+* @remarks Need to resolve the differences between JVT-J1003_d7 spec and
+* HM.8.0-dev for related abs_greater_than_1 context initialization
+* and rice_max paramtere used for coeff abs level remaining
+*
+* @param[inout] ps_entropy_ctxt
+* pointer to entropy context (handle)
+*
+* @param[in] pv_coeff
+* Compressed residue buffer containing following information:
+*
+* HEADER(4 bytes) : last_coeff_x, last_coeff_y, scantype, last_subblock_num
+*
+* For each 4x4 subblock starting from last_subblock_num (in scan order)
+* Read 2 bytes : MSB 12bits (0xBAD marker), bit0 cur_csbf, bit1-2 nbr csbf
+*
+* `If cur_csbf
+* Read 2 bytes : sig_coeff_map (16bits in scan_order 1:coded, 0:not coded)
+* Read 2 bytes : abs_gt1_flags (max of 8 only)
+* Read 2 bytes : coeff_sign_flags
+*
+* Based on abs_gt1_flags and sig_coeff_map read remaining abs levels
+* Read 2 bytes : remaining_abs_coeffs_minus1 (this is in a loop)
+*
+* @param[in] log2_tr_size
+* transform size of the current TU
+*
+* @param[in] is_luma
+* boolean indicating if the texture type is luma / chroma
+*
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_cabac_residue_encode(
+ entropy_context_t *ps_entropy_ctxt, void *pv_coeff, WORD32 log2_tr_size, WORD32 is_luma)
+{
+ WORD32 ret = IHEVCE_SUCCESS;
+ cab_ctxt_t *ps_cabac = &ps_entropy_ctxt->s_cabac_ctxt;
+ WORD32 i4_sign_data_hiding_flag, cu_tq_bypass_flag;
+
+ UWORD8 *pu1_coeff_buf_hdr = (UWORD8 *)pv_coeff;
+ UWORD16 *pu2_sig_coeff_buf = (UWORD16 *)pv_coeff;
+
+ /* last sig coeff indices in scan order */
+ WORD32 last_sig_coeff_x = pu1_coeff_buf_hdr[0];
+ WORD32 last_sig_coeff_y = pu1_coeff_buf_hdr[1];
+
+ /* read the scan type : upright diag / horz / vert */
+ WORD32 scan_type = pu1_coeff_buf_hdr[2];
+
+ /************************************************************************/
+ /* position of the last coded sub block. This sub block contains coeff */
+ /* corresponding to last_sig_coeff_x, last_sig_coeff_y. Althoug this can*/
+ /* be derived here it better to be populated by scanning module */
+ /************************************************************************/
+ WORD32 last_csb = pu1_coeff_buf_hdr[3];
+
+ WORD32 cur_csbf = 0, nbr_csbf;
+ WORD32 sig_coeff_base_ctxt; /* cabac context for sig coeff flag */
+ WORD32 abs_gt1_base_ctxt; /* cabac context for abslevel > 1 flag */
+
+ WORD32 gt1_ctxt = 1; /* required for abs_gt1_ctxt modelling */
+
+ WORD32 i;
+
+ /* sanity checks */
+ /* transform skip not supported */
+ ASSERT(0 == ps_entropy_ctxt->ps_pps->i1_transform_skip_enabled_flag);
+
+ cu_tq_bypass_flag = ps_entropy_ctxt->ps_pps->i1_transform_skip_enabled_flag;
+
+ i4_sign_data_hiding_flag = ps_entropy_ctxt->ps_pps->i1_sign_data_hiding_flag;
+
+ if(SCAN_VERT == scan_type)
+ {
+ /* last coeff x and y are swapped for vertical scan */
+ SWAP(last_sig_coeff_x, last_sig_coeff_y);
+ }
+
+ /* Encode the last_sig_coeff_x and last_sig_coeff_y */
+ ret |= ihevce_cabac_encode_last_coeff_x_y(
+ ps_cabac, last_sig_coeff_x, last_sig_coeff_y, log2_tr_size, is_luma);
+
+ /*************************************************************************/
+ /* derive base context index for sig coeff as per section 9.3.3.1.4 */
+ /* TODO; convert to look up based on luma/chroma, scan type and tfr size */
+ /*************************************************************************/
+ if(is_luma)
+ {
+ sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG;
+ abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG;
+
+ if(3 == log2_tr_size)
+ {
+ /* 8x8 transform size */
+ sig_coeff_base_ctxt += (scan_type == SCAN_DIAG_UPRIGHT) ? 9 : 15;
+ }
+ else if(3 < log2_tr_size)
+ {
+ /* larger transform sizes */
+ sig_coeff_base_ctxt += 21;
+ }
+ }
+ else
+ {
+ /* chroma context initializations */
+ sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27;
+ abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG + 16;
+
+ if(3 == log2_tr_size)
+ {
+ /* 8x8 transform size */
+ sig_coeff_base_ctxt += 9;
+ }
+ else if(3 < log2_tr_size)
+ {
+ /* larger transform sizes */
+ sig_coeff_base_ctxt += 12;
+ }
+ }
+
+ /* go to csbf flags */
+ pu2_sig_coeff_buf = (UWORD16 *)(pu1_coeff_buf_hdr + COEFF_BUF_HEADER_LEN);
+
+ /************************************************************************/
+ /* encode the csbf, sig_coeff_map, abs_grt1_flags, abs_grt2_flag, sign */
+ /* and abs_coeff_remaining for each 4x4 starting from last scan to first*/
+ /************************************************************************/
+ for(i = last_csb; i >= 0; i--)
+ {
+ UWORD16 u2_marker_csbf;
+ WORD32 ctxt_idx;
+
+ u2_marker_csbf = *pu2_sig_coeff_buf;
+ pu2_sig_coeff_buf++;
+
+ /* sanity checks for marker present in every csbf flag */
+ ASSERT((u2_marker_csbf >> 4) == 0xBAD);
+
+ /* extract the current and neigbour csbf flags */
+ cur_csbf = u2_marker_csbf & 0x1;
+ nbr_csbf = (u2_marker_csbf >> 1) & 0x3;
+
+ /*********************************************************************/
+ /* code the csbf flags; last and first csb not sent as it is derived */
+ /*********************************************************************/
+ if((i < last_csb) && (i > 0))
+ {
+ ctxt_idx = IHEVC_CAB_CODED_SUBLK_IDX;
+
+ /* ctxt based on right / bottom avail csbf, section 9.3.3.1.3 */
+ ctxt_idx += nbr_csbf ? 1 : 0;
+ ctxt_idx += is_luma ? 0 : 2;
+
+ ret |= ihevce_cabac_encode_bin(ps_cabac, cur_csbf, ctxt_idx);
+ AEV_TRACE("coded_sub_block_flag", cur_csbf, ps_cabac->u4_range);
+ }
+ else
+ {
+ /* sanity check, this csb contains the last_sig_coeff */
+ if(i == last_csb)
+ {
+ ASSERT(cur_csbf == 1);
+ }
+ }
+
+ if(cur_csbf)
+ {
+ /*****************************************************************/
+ /* encode the sig coeff map as per section 7.3.13 */
+ /* significant_coeff_flags: msb=coeff15-lsb=coeff0 in scan order */
+ /*****************************************************************/
+
+ /* Added for Sign bit data hiding*/
+ WORD32 first_scan_pos = 16;
+ WORD32 last_scan_pos = -1;
+ WORD32 sign_hidden = 0;
+
+ UWORD16 u2_gt0_flags = *pu2_sig_coeff_buf;
+ WORD32 gt1_flags = *(pu2_sig_coeff_buf + 1);
+ WORD32 sign_flags = *(pu2_sig_coeff_buf + 2);
+
+ WORD32 sig_coeff_map = u2_gt0_flags;
+
+ WORD32 gt1_bins = 0; /* bins for coeffs with abslevel > 1 */
+
+ WORD32 sign_bins = 0; /* bins for sign flags of coded coeffs */
+ WORD32 num_coded = 0; /* total coeffs coded in 4x4 */
+
+ WORD32 infer_coeff; /* infer when 0,0 is the only coded coeff */
+ WORD32 bit; /* temp boolean */
+
+ /* total count of coeffs to be coded as abs level remaining */
+ WORD32 num_coeffs_remaining = 0;
+
+ /* count of coeffs to be coded as abslevel-1 */
+ WORD32 num_coeffs_base1 = 0;
+ WORD32 scan_pos;
+ WORD32 first_gt1_coeff = 0;
+
+ if((i != 0) || (0 == last_csb))
+ {
+ /* sanity check, atleast one coeff is coded as csbf is set */
+ ASSERT(sig_coeff_map != 0);
+ }
+
+ pu2_sig_coeff_buf += 3;
+
+ scan_pos = 15;
+ if(i == last_csb)
+ {
+ /*************************************************************/
+ /* clear last_scan_pos for last block in scan order as this */
+ /* is communicated throught last_coeff_x and last_coeff_y */
+ /*************************************************************/
+ WORD32 next_sig = CLZ(sig_coeff_map) + 1;
+
+ scan_pos = WORD_SIZE - next_sig;
+
+ /* prepare the bins for gt1 flags */
+ EXTRACT_BIT(bit, gt1_flags, scan_pos);
+
+ /* insert gt1 bin in lsb */
+ gt1_bins |= bit;
+
+ /* prepare the bins for sign flags */
+ EXTRACT_BIT(bit, sign_flags, scan_pos);
+
+ /* insert sign bin in lsb */
+ sign_bins |= bit;
+
+ sig_coeff_map = CLEAR_BIT(sig_coeff_map, scan_pos);
+
+ if(-1 == last_scan_pos)
+ last_scan_pos = scan_pos;
+
+ scan_pos--;
+ num_coded++;
+ }
+
+ /* infer 0,0 coeff for all 4x4 blocks except fitst and last */
+ infer_coeff = (i < last_csb) && (i > 0);
+
+ /* encode the required sigcoeff flags (abslevel > 0) */
+ while(scan_pos >= 0)
+ {
+ WORD32 y_pos_x_pos;
+ WORD32 sig_ctxinc = 0; /* 0 is default inc for DC coeff */
+
+ WORD32 sig_coeff;
+
+ EXTRACT_BIT(sig_coeff, sig_coeff_map, scan_pos);
+
+ /* derive the x,y pos */
+ y_pos_x_pos = gu1_hevce_scan4x4[scan_type][scan_pos];
+
+ /* derive the context inc as per section 9.3.3.1.4 */
+ if(2 == log2_tr_size)
+ {
+ /* 4x4 transform size increment uses lookup */
+ sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos];
+ }
+ else if(scan_pos || i)
+ {
+ /* ctxt for AC coeff depends on curpos and neigbour csbf */
+ sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc[nbr_csbf][y_pos_x_pos];
+
+ /* based on luma subblock pos */
+ sig_ctxinc += (i && is_luma) ? 3 : 0;
+ }
+ else
+ {
+ /* DC coeff has fixed context for luma and chroma */
+ sig_coeff_base_ctxt = is_luma ? IHEVC_CAB_COEFF_FLAG
+ : IHEVC_CAB_COEFF_FLAG + 27;
+ }
+
+ /*************************************************************/
+ /* encode sig coeff only if required */
+ /* decoder infers 0,0 coeff when all the other coeffs are 0 */
+ /*************************************************************/
+ if(scan_pos || (!infer_coeff))
+ {
+ ctxt_idx = sig_ctxinc + sig_coeff_base_ctxt;
+ ret |= ihevce_cabac_encode_bin(ps_cabac, sig_coeff, ctxt_idx);
+ AEV_TRACE("significant_coeff_flag", sig_coeff, ps_cabac->u4_range);
+ }
+
+ if(sig_coeff)
+ {
+ /* prepare the bins for gt1 flags */
+ EXTRACT_BIT(bit, gt1_flags, scan_pos);
+
+ /* shift and insert gt1 bin in lsb */
+ gt1_bins <<= 1;
+ gt1_bins |= bit;
+
+ /* prepare the bins for sign flags */
+ EXTRACT_BIT(bit, sign_flags, scan_pos);
+
+ /* shift and insert sign bin in lsb */
+ sign_bins <<= 1;
+ sign_bins |= bit;
+
+ num_coded++;
+
+ /* 0,0 coeff can no more be inferred :( */
+ infer_coeff = 0;
+
+ if(-1 == last_scan_pos)
+ last_scan_pos = scan_pos;
+
+ first_scan_pos = scan_pos;
+ }
+
+ scan_pos--;
+ }
+
+ /* Added for sign bit hiding*/
+ sign_hidden = ((last_scan_pos - first_scan_pos) > 3 && !cu_tq_bypass_flag);
+
+ /****************************************************************/
+ /* encode the abs level greater than 1 bins; Section 7.3.13 */
+ /* These have already been prepared during sig_coeff_map encode */
+ /* Context modelling done as per section 9.3.3.1.5 */
+ /****************************************************************/
+ {
+ WORD32 j;
+
+ /* context set based on luma subblock pos */
+ WORD32 ctxt_set = (i && is_luma) ? 2 : 0;
+
+ /* count of coeffs with abslevel > 1; max of 8 to be coded */
+ WORD32 num_gt1_bins = MIN(8, num_coded);
+
+ if(num_coded > 8)
+ {
+ /* pull back the bins to required number */
+ gt1_bins >>= (num_coded - 8);
+
+ num_coeffs_remaining += (num_coded - 8);
+ num_coeffs_base1 = (num_coded - 8);
+ }
+
+ /* See section 9.3.3.1.5 */
+ ctxt_set += (0 == gt1_ctxt) ? 1 : 0;
+
+ gt1_ctxt = 1;
+
+ for(j = num_gt1_bins - 1; j >= 0; j--)
+ {
+ /* Encodet the abs level gt1 bins */
+ ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
+
+ EXTRACT_BIT(bit, gt1_bins, j);
+
+ ret |= ihevce_cabac_encode_bin(ps_cabac, bit, ctxt_idx);
+
+ AEV_TRACE("coeff_abs_level_greater1_flag", bit, ps_cabac->u4_range);
+
+ if(bit)
+ {
+ gt1_ctxt = 0;
+ num_coeffs_remaining++;
+ }
+ else if(gt1_ctxt && (gt1_ctxt < 3))
+ {
+ gt1_ctxt++;
+ }
+ }
+
+ /*************************************************************/
+ /* encode abs level greater than 2 bin; Section 7.3.13 */
+ /*************************************************************/
+ if(gt1_bins)
+ {
+ WORD32 gt2_bin;
+
+ first_gt1_coeff = pu2_sig_coeff_buf[0] + 1;
+ gt2_bin = (first_gt1_coeff > 2);
+
+ /* atleast one level > 2 */
+ ctxt_idx = IHEVC_CAB_COEFABS_GRTR2_FLAG;
+
+ ctxt_idx += (is_luma) ? ctxt_set : (ctxt_set + 4);
+
+ ret |= ihevce_cabac_encode_bin(ps_cabac, gt2_bin, ctxt_idx);
+
+ if(!gt2_bin)
+ {
+ /* sanity check */
+ ASSERT(first_gt1_coeff == 2);
+
+ /* no need to send this coeff as bypass bins */
+ pu2_sig_coeff_buf++;
+ num_coeffs_remaining--;
+ }
+
+ AEV_TRACE("coeff_abs_level_greater2_flag", gt2_bin, ps_cabac->u4_range);
+ }
+ }
+
+ /*************************************************************/
+ /* encode the coeff signs and abs remaing levels */
+ /*************************************************************/
+ if(num_coded)
+ {
+ WORD32 base_level;
+ WORD32 rice_param = 0;
+ WORD32 j;
+
+ /*************************************************************/
+ /* encode the coeff signs populated in sign_bins */
+ /*************************************************************/
+
+ if(sign_hidden && i4_sign_data_hiding_flag)
+ {
+ sign_bins >>= 1;
+ num_coded--;
+ }
+
+ if(num_coded > 0)
+ {
+ ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, sign_bins, num_coded);
+ }
+
+ AEV_TRACE("sign_flags", sign_bins, ps_cabac->u4_range);
+
+ /*************************************************************/
+ /* encode the coeff_abs_level_remaining as TR / EGK bins */
+ /* See section 9.3.2.7 for details */
+ /*************************************************************/
+
+ /* first remaining coeff baselevel */
+ if(first_gt1_coeff > 2)
+ {
+ base_level = 3;
+ }
+ else if(num_coeffs_remaining > num_coeffs_base1)
+ {
+ /* atleast one coeff in first 8 is gt > 1 */
+ base_level = 2;
+ }
+ else
+ {
+ /* all coeffs have base of 1 */
+ base_level = 1;
+ }
+
+ for(j = 0; j < num_coeffs_remaining; j++)
+ {
+ WORD32 abs_coeff = pu2_sig_coeff_buf[0] + 1;
+ WORD32 abs_coeff_rem;
+ WORD32 rice_max = (4 << rice_param);
+
+ pu2_sig_coeff_buf++;
+
+ /* sanity check */
+ ASSERT(abs_coeff >= base_level);
+
+ abs_coeff_rem = (abs_coeff - base_level);
+
+ /* TODO://HM-8.0-dev uses (3 << rice_param) as rice_max */
+ /* TODO://HM-8.0-dev does either TR or EGK but not both */
+ if(abs_coeff_rem >= rice_max)
+ {
+ UWORD32 u4_suffix = (abs_coeff_rem - rice_max);
+
+ /* coeff exceeds max rice limit */
+ /* encode the TR prefix as tunary code */
+ /* prefix = 1111 as (rice_max >> rice_praram) = 4 */
+ ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, 0xF, 4);
+
+ /* encode the exponential golomb code suffix */
+ ret |= ihevce_cabac_encode_egk(ps_cabac, u4_suffix, (rice_param + 1));
+ }
+ else
+ {
+ /* code coeff as truncated rice code */
+ ret |= ihevce_cabac_encode_trunc_rice(
+ ps_cabac, abs_coeff_rem, rice_param, rice_max);
+ }
+
+ AEV_TRACE("coeff_abs_level_remaining", abs_coeff_rem, ps_cabac->u4_range);
+
+ /* update the rice param based on coeff level */
+ if((abs_coeff > (3 << rice_param)) && (rice_param < 4))
+ {
+ rice_param++;
+ }
+
+ /* change base level to 1 if more than 8 coded coeffs */
+ if((j + 1) < (num_coeffs_remaining - num_coeffs_base1))
+ {
+ base_level = 2;
+ }
+ else
+ {
+ base_level = 1;
+ }
+ }
+ }
+ }
+ }
+ /*tap texture bits*/
+ if(ps_cabac->e_cabac_op_mode == CABAC_MODE_COMPUTE_BITS)
+ { // clang-format off
+ ps_cabac->u4_texture_bits_estimated_q12 +=
+ (ps_cabac->u4_bits_estimated_q12 -
+ ps_cabac->u4_header_bits_estimated_q12); //(ps_cabac->u4_bits_estimated_q12 - temp_tex_bits_q12);
+ } // clang-format on
+
+ return (ret);
+}
+
+/**
+******************************************************************************
+*
+* @brief Get the bits estimate for a transform residual block as per section
+* 7.3.13
+*
+* @par Description
+* The residual block is read from a compressed coeff buffer populated during
+* the scanning of the quantized coeffs. The contents of the buffer are
+* breifly explained in param description of pv_coeff
+*
+* @remarks Does not support sign data hiding and transform skip flag currently
+*
+* @remarks Need to resolve the differences between JVT-J1003_d7 spec and
+* HM.8.0-dev for related abs_greater_than_1 context initialization
+* and rice_max paramtere used for coeff abs level remaining
+*
+* @param[inout] ps_entropy_ctxt
+* pointer to entropy context (handle)
+*
+* @param[in] pv_coeff
+* Compressed residue buffer containing following information:
+*
+* HEADER(4 bytes) : last_coeff_x, last_coeff_y, scantype, last_subblock_num
+*
+* For each 4x4 subblock starting from last_subblock_num (in scan order)
+* Read 2 bytes : MSB 12bits (0xBAD marker), bit0 cur_csbf, bit1-2 nbr csbf
+*
+* `If cur_csbf
+* Read 2 bytes : sig_coeff_map (16bits in scan_order 1:coded, 0:not coded)
+* Read 2 bytes : abs_gt1_flags (max of 8 only)
+* Read 2 bytes : coeff_sign_flags
+*
+* Based on abs_gt1_flags and sig_coeff_map read remaining abs levels
+* Read 2 bytes : remaining_abs_coeffs_minus1 (this is in a loop)
+*
+* @param[in] log2_tr_size
+* transform size of the current TU
+*
+* @param[in] is_luma
+* boolean indicating if the texture type is luma / chroma
+*
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_cabac_residue_encode_rdopt(
+ entropy_context_t *ps_entropy_ctxt,
+ void *pv_coeff,
+ WORD32 log2_tr_size,
+ WORD32 is_luma,
+ WORD32 perform_sbh)
+{
+ WORD32 ret = IHEVCE_SUCCESS;
+ cab_ctxt_t *ps_cabac = &ps_entropy_ctxt->s_cabac_ctxt;
+ UWORD32 temp_tex_bits_q12;
+ WORD32 i4_sign_data_hiding_flag, cu_tq_bypass_flag;
+
+ UWORD8 *pu1_coeff_buf_hdr = (UWORD8 *)pv_coeff;
+ UWORD16 *pu2_sig_coeff_buf = (UWORD16 *)pv_coeff;
+
+ /* last sig coeff indices in scan order */
+ WORD32 last_sig_coeff_x = pu1_coeff_buf_hdr[0];
+ WORD32 last_sig_coeff_y = pu1_coeff_buf_hdr[1];
+
+ /* read the scan type : upright diag / horz / vert */
+ WORD32 scan_type = pu1_coeff_buf_hdr[2];
+
+ /************************************************************************/
+ /* position of the last coded sub block. This sub block contains coeff */
+ /* corresponding to last_sig_coeff_x, last_sig_coeff_y. Althoug this can*/
+ /* be derived here it better to be populated by scanning module */
+ /************************************************************************/
+ WORD32 last_csb = pu1_coeff_buf_hdr[3];
+
+ WORD32 cur_csbf = 0, nbr_csbf;
+ WORD32 sig_coeff_base_ctxt; /* cabac context for sig coeff flag */
+ WORD32 abs_gt1_base_ctxt; /* cabac context for abslevel > 1 flag */
+
+ WORD32 gt1_ctxt = 1; /* required for abs_gt1_ctxt modelling */
+
+ WORD32 i;
+
+ UWORD8 *pu1_ctxt_model = &ps_cabac->au1_ctxt_models[0];
+
+ /* sanity checks */
+ /* transform skip not supported */
+ ASSERT(0 == ps_entropy_ctxt->ps_pps->i1_transform_skip_enabled_flag);
+
+ cu_tq_bypass_flag = ps_entropy_ctxt->ps_pps->i1_transform_skip_enabled_flag;
+
+ i4_sign_data_hiding_flag = ps_entropy_ctxt->ps_pps->i1_sign_data_hiding_flag;
+
+ {
+ temp_tex_bits_q12 = ps_cabac->u4_bits_estimated_q12;
+ }
+
+ if(SCAN_VERT == scan_type)
+ {
+ /* last coeff x and y are swapped for vertical scan */
+ SWAP(last_sig_coeff_x, last_sig_coeff_y);
+ }
+
+ /* Encode the last_sig_coeff_x and last_sig_coeff_y */
+ ret |= ihevce_cabac_encode_last_coeff_x_y(
+ ps_cabac, last_sig_coeff_x, last_sig_coeff_y, log2_tr_size, is_luma);
+
+ /*************************************************************************/
+ /* derive base context index for sig coeff as per section 9.3.3.1.4 */
+ /* TODO; convert to look up based on luma/chroma, scan type and tfr size */
+ /*************************************************************************/
+ if(is_luma)
+ {
+ sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG;
+ abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG;
+
+ if(3 == log2_tr_size)
+ {
+ /* 8x8 transform size */
+ sig_coeff_base_ctxt += (scan_type == SCAN_DIAG_UPRIGHT) ? 9 : 15;
+ }
+ else if(3 < log2_tr_size)
+ {
+ /* larger transform sizes */
+ sig_coeff_base_ctxt += 21;
+ }
+ }
+ else
+ {
+ /* chroma context initializations */
+ sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27;
+ abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG + 16;
+
+ if(3 == log2_tr_size)
+ {
+ /* 8x8 transform size */
+ sig_coeff_base_ctxt += 9;
+ }
+ else if(3 < log2_tr_size)
+ {
+ /* larger transform sizes */
+ sig_coeff_base_ctxt += 12;
+ }
+ }
+
+ /* go to csbf flags */
+ pu2_sig_coeff_buf = (UWORD16 *)(pu1_coeff_buf_hdr + COEFF_BUF_HEADER_LEN);
+
+ /************************************************************************/
+ /* encode the csbf, sig_coeff_map, abs_grt1_flags, abs_grt2_flag, sign */
+ /* and abs_coeff_remaining for each 4x4 starting from last scan to first*/
+ /************************************************************************/
+ for(i = last_csb; i >= 0; i--)
+ {
+ UWORD16 u2_marker_csbf;
+ WORD32 ctxt_idx;
+
+ u2_marker_csbf = *pu2_sig_coeff_buf;
+ pu2_sig_coeff_buf++;
+
+ /* sanity checks for marker present in every csbf flag */
+ ASSERT((u2_marker_csbf >> 4) == 0xBAD);
+
+ /* extract the current and neigbour csbf flags */
+ cur_csbf = u2_marker_csbf & 0x1;
+ nbr_csbf = (u2_marker_csbf >> 1) & 0x3;
+
+ /*********************************************************************/
+ /* code the csbf flags; last and first csb not sent as it is derived */
+ /*********************************************************************/
+ if((i < last_csb) && (i > 0))
+ {
+ ctxt_idx = IHEVC_CAB_CODED_SUBLK_IDX;
+
+ /* ctxt based on right / bottom avail csbf, section 9.3.3.1.3 */
+ ctxt_idx += nbr_csbf ? 1 : 0;
+ ctxt_idx += is_luma ? 0 : 2;
+
+ {
+ WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
+
+ /* increment bits generated based on state and bin encoded */
+ ps_cabac->u4_bits_estimated_q12 +=
+ gau2_ihevce_cabac_bin_to_bits[state_mps ^ cur_csbf];
+
+ /* update the context model from state transition LUT */
+ pu1_ctxt_model[ctxt_idx] = gau1_ihevc_next_state[(state_mps << 1) | cur_csbf];
+ }
+ }
+ else
+ {
+ /* sanity check, this csb contains the last_sig_coeff */
+ if(i == last_csb)
+ {
+ ASSERT(cur_csbf == 1);
+ }
+ }
+
+ if(cur_csbf)
+ {
+ /*****************************************************************/
+ /* encode the sig coeff map as per section 7.3.13 */
+ /* significant_coeff_flags: msb=coeff15-lsb=coeff0 in scan order */
+ /*****************************************************************/
+
+ /* Added for Sign bit data hiding*/
+ WORD32 first_scan_pos = 16;
+ WORD32 last_scan_pos = -1;
+ WORD32 sign_hidden;
+
+ UWORD16 u2_gt0_flags = *pu2_sig_coeff_buf;
+ WORD32 gt1_flags = *(pu2_sig_coeff_buf + 1);
+ WORD32 sign_flags = *(pu2_sig_coeff_buf + 2);
+
+ WORD32 sig_coeff_map = u2_gt0_flags;
+
+ WORD32 gt1_bins = 0; /* bins for coeffs with abslevel > 1 */
+
+ WORD32 sign_bins = 0; /* bins for sign flags of coded coeffs */
+ WORD32 num_coded = 0; /* total coeffs coded in 4x4 */
+
+ WORD32 infer_coeff; /* infer when 0,0 is the only coded coeff */
+ WORD32 bit; /* temp boolean */
+
+ /* total count of coeffs to be coded as abs level remaining */
+ WORD32 num_coeffs_remaining = 0;
+
+ /* count of coeffs to be coded as abslevel-1 */
+ WORD32 num_coeffs_base1 = 0;
+ WORD32 scan_pos;
+ WORD32 first_gt1_coeff = 0;
+
+ if((i != 0) || (0 == last_csb))
+ {
+ /* sanity check, atleast one coeff is coded as csbf is set */
+ ASSERT(sig_coeff_map != 0);
+ }
+
+ pu2_sig_coeff_buf += 3;
+
+ scan_pos = 15;
+ if(i == last_csb)
+ {
+ /*************************************************************/
+ /* clear last_scan_pos for last block in scan order as this */
+ /* is communicated throught last_coeff_x and last_coeff_y */
+ /*************************************************************/
+ WORD32 next_sig = CLZ(sig_coeff_map) + 1;
+
+ scan_pos = WORD_SIZE - next_sig;
+
+ /* prepare the bins for gt1 flags */
+ EXTRACT_BIT(bit, gt1_flags, scan_pos);
+
+ /* insert gt1 bin in lsb */
+ gt1_bins |= bit;
+
+ /* prepare the bins for sign flags */
+ EXTRACT_BIT(bit, sign_flags, scan_pos);
+
+ /* insert sign bin in lsb */
+ sign_bins |= bit;
+
+ sig_coeff_map = CLEAR_BIT(sig_coeff_map, scan_pos);
+
+ if(-1 == last_scan_pos)
+ last_scan_pos = scan_pos;
+
+ scan_pos--;
+ num_coded++;
+ }
+
+ /* infer 0,0 coeff for all 4x4 blocks except fitst and last */
+ infer_coeff = (i < last_csb) && (i > 0);
+
+ /* encode the required sigcoeff flags (abslevel > 0) */
+ while(scan_pos >= 0)
+ {
+ WORD32 y_pos_x_pos;
+ WORD32 sig_ctxinc = 0; /* 0 is default inc for DC coeff */
+
+ WORD32 sig_coeff;
+
+ EXTRACT_BIT(sig_coeff, sig_coeff_map, scan_pos);
+
+ /* derive the x,y pos */
+ y_pos_x_pos = gu1_hevce_scan4x4[scan_type][scan_pos];
+
+ /* derive the context inc as per section 9.3.3.1.4 */
+ if(2 == log2_tr_size)
+ {
+ /* 4x4 transform size increment uses lookup */
+ sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos];
+ }
+ else if(scan_pos || i)
+ {
+ /* ctxt for AC coeff depends on curpos and neigbour csbf */
+ sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc[nbr_csbf][y_pos_x_pos];
+
+ /* based on luma subblock pos */
+ sig_ctxinc += (i && is_luma) ? 3 : 0;
+ }
+ else
+ {
+ /* DC coeff has fixed context for luma and chroma */
+ sig_coeff_base_ctxt = is_luma ? IHEVC_CAB_COEFF_FLAG
+ : IHEVC_CAB_COEFF_FLAG + 27;
+ }
+
+ /*************************************************************/
+ /* encode sig coeff only if required */
+ /* decoder infers 0,0 coeff when all the other coeffs are 0 */
+ /*************************************************************/
+ if(scan_pos || (!infer_coeff))
+ {
+ ctxt_idx = sig_ctxinc + sig_coeff_base_ctxt;
+
+ //ret |= ihevce_cabac_encode_bin(ps_cabac, sig_coeff, ctxt_idx);
+ {
+ WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
+
+ /* increment bits generated based on state and bin encoded */
+ ps_cabac->u4_bits_estimated_q12 +=
+ gau2_ihevce_cabac_bin_to_bits[state_mps ^ sig_coeff];
+
+ /* update the context model from state transition LUT */
+ pu1_ctxt_model[ctxt_idx] =
+ gau1_ihevc_next_state[(state_mps << 1) | sig_coeff];
+ }
+ }
+
+ if(sig_coeff)
+ {
+ /* prepare the bins for gt1 flags */
+ EXTRACT_BIT(bit, gt1_flags, scan_pos);
+
+ /* shift and insert gt1 bin in lsb */
+ gt1_bins <<= 1;
+ gt1_bins |= bit;
+
+ /* prepare the bins for sign flags */
+ EXTRACT_BIT(bit, sign_flags, scan_pos);
+
+ /* shift and insert sign bin in lsb */
+ sign_bins <<= 1;
+ sign_bins |= bit;
+
+ num_coded++;
+
+ /* 0,0 coeff can no more be inferred :( */
+ infer_coeff = 0;
+
+ if(-1 == last_scan_pos)
+ last_scan_pos = scan_pos;
+
+ first_scan_pos = scan_pos;
+ }
+
+ scan_pos--;
+ }
+
+ /* Added for sign bit hiding*/
+ sign_hidden =
+ (((last_scan_pos - first_scan_pos) > 3 && !cu_tq_bypass_flag) && (perform_sbh));
+
+ /****************************************************************/
+ /* encode the abs level greater than 1 bins; Section 7.3.13 */
+ /* These have already been prepared during sig_coeff_map encode */
+ /* Context modelling done as per section 9.3.3.1.5 */
+ /****************************************************************/
+ {
+ WORD32 j;
+
+ /* context set based on luma subblock pos */
+ WORD32 ctxt_set = (i && is_luma) ? 2 : 0;
+
+ /* count of coeffs with abslevel > 1; max of 8 to be coded */
+ WORD32 num_gt1_bins = MIN(8, num_coded);
+
+ if(num_coded > 8)
+ {
+ /* pull back the bins to required number */
+ gt1_bins >>= (num_coded - 8);
+
+ num_coeffs_remaining += (num_coded - 8);
+ num_coeffs_base1 = (num_coded - 8);
+ }
+
+ /* See section 9.3.3.1.5 */
+ ctxt_set += (0 == gt1_ctxt) ? 1 : 0;
+
+ gt1_ctxt = 1;
+
+ for(j = num_gt1_bins - 1; j >= 0; j--)
+ {
+ /* Encodet the abs level gt1 bins */
+ ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
+
+ EXTRACT_BIT(bit, gt1_bins, j);
+
+ //ret |= ihevce_cabac_encode_bin(ps_cabac, bit, ctxt_idx);
+ {
+ WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
+
+ /* increment bits generated based on state and bin encoded */
+ ps_cabac->u4_bits_estimated_q12 +=
+ gau2_ihevce_cabac_bin_to_bits[state_mps ^ bit];
+
+ /* update the context model from state transition LUT */
+ pu1_ctxt_model[ctxt_idx] = gau1_ihevc_next_state[(state_mps << 1) | bit];
+ }
+
+ if(bit)
+ {
+ gt1_ctxt = 0;
+ num_coeffs_remaining++;
+ }
+ else if(gt1_ctxt && (gt1_ctxt < 3))
+ {
+ gt1_ctxt++;
+ }
+ }
+
+ /*************************************************************/
+ /* encode abs level greater than 2 bin; Section 7.3.13 */
+ /*************************************************************/
+ if(gt1_bins)
+ {
+ WORD32 gt2_bin;
+
+ first_gt1_coeff = pu2_sig_coeff_buf[0] + 1;
+ gt2_bin = (first_gt1_coeff > 2);
+
+ /* atleast one level > 2 */
+ ctxt_idx = IHEVC_CAB_COEFABS_GRTR2_FLAG;
+
+ ctxt_idx += (is_luma) ? ctxt_set : (ctxt_set + 4);
+
+ //ret |= ihevce_cabac_encode_bin(ps_cabac, gt2_bin, ctxt_idx);
+ {
+ WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
+
+ /* increment bits generated based on state and bin encoded */
+ ps_cabac->u4_bits_estimated_q12 +=
+ gau2_ihevce_cabac_bin_to_bits[state_mps ^ gt2_bin];
+
+ /* update the context model from state transition LUT */
+ pu1_ctxt_model[ctxt_idx] =
+ gau1_ihevc_next_state[(state_mps << 1) | gt2_bin];
+ }
+
+ if(!gt2_bin)
+ {
+ /* sanity check */
+ ASSERT(first_gt1_coeff == 2);
+
+ /* no need to send this coeff as bypass bins */
+ pu2_sig_coeff_buf++;
+ num_coeffs_remaining--;
+ }
+ }
+ }
+
+ /*************************************************************/
+ /* encode the coeff signs and abs remaing levels */
+ /*************************************************************/
+ if(num_coded)
+ {
+ WORD32 base_level;
+ WORD32 rice_param = 0;
+ WORD32 j;
+
+ /*************************************************************/
+ /* encode the coeff signs populated in sign_bins */
+ /*************************************************************/
+ if(sign_hidden && i4_sign_data_hiding_flag)
+ {
+ sign_bins >>= 1;
+ num_coded--;
+ }
+
+ if(num_coded > 0)
+ {
+ /* ret |= ihevce_cabac_encode_bypass_bins(ps_cabac,
+ sign_bins,
+ num_coded);
+ */
+
+ /* increment bits generated based on num bypass bins */
+ ps_cabac->u4_bits_estimated_q12 += (num_coded << CABAC_FRAC_BITS_Q);
+ }
+
+ /*************************************************************/
+ /* encode the coeff_abs_level_remaining as TR / EGK bins */
+ /* See section 9.3.2.7 for details */
+ /*************************************************************/
+
+ /* first remaining coeff baselevel */
+ if(first_gt1_coeff > 2)
+ {
+ base_level = 3;
+ }
+ else if(num_coeffs_remaining > num_coeffs_base1)
+ {
+ /* atleast one coeff in first 8 is gt > 1 */
+ base_level = 2;
+ }
+ else
+ {
+ /* all coeffs have base of 1 */
+ base_level = 1;
+ }
+
+ for(j = 0; j < num_coeffs_remaining; j++)
+ {
+ WORD32 abs_coeff = pu2_sig_coeff_buf[0] + 1;
+ WORD32 abs_coeff_rem;
+ WORD32 rice_max = (4 << rice_param);
+ WORD32 num_bins, unary_length;
+ UWORD32 u4_sym_shiftk_plus1;
+
+ pu2_sig_coeff_buf++;
+
+ /* sanity check */
+ ASSERT(abs_coeff >= base_level);
+
+ abs_coeff_rem = (abs_coeff - base_level);
+
+ /* TODO://HM-8.0-dev uses (3 << rice_param) as rice_max */
+ /* TODO://HM-8.0-dev does either TR or EGK but not both */
+ if(abs_coeff_rem >= rice_max)
+ {
+ UWORD32 u4_suffix = (abs_coeff_rem - rice_max);
+
+ /* coeff exceeds max rice limit */
+ /* encode the TR prefix as tunary code */
+ /* prefix = 1111 as (rice_max >> rice_praram) = 4 */
+ /* ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, 0xF, 4); */
+
+ /* increment bits generated based on num bypass bins */
+ ps_cabac->u4_bits_estimated_q12 += (4 << CABAC_FRAC_BITS_Q);
+
+ /* encode the exponential golomb code suffix */
+ /*ret |= ihevce_cabac_encode_egk(ps_cabac,
+ u4_suffix,
+ (rice_param+1)
+ ); */
+
+ /* k = rice_param+1 */
+ /************************************************************************/
+ /* shift symbol by k bits to find unary code prefix (111110) */
+ /* Use GETRANGE to elminate the while loop in sec 9.3.2.4 of HEVC spec */
+ /************************************************************************/
+ u4_sym_shiftk_plus1 = (u4_suffix >> (rice_param + 1)) + 1;
+
+ /* GETRANGE(unary_length, (u4_sym_shiftk_plus1 + 1)); */
+ GETRANGE(unary_length, u4_sym_shiftk_plus1);
+
+ /* length of the code = 2 *(unary_length - 1) + 1 + k */
+ num_bins = (2 * unary_length) + rice_param;
+
+ /* increment bits generated based on num bypass bins */
+ ps_cabac->u4_bits_estimated_q12 += (num_bins << CABAC_FRAC_BITS_Q);
+ }
+ else
+ {
+ /* code coeff as truncated rice code */
+ /* ret |= ihevce_cabac_encode_trunc_rice(ps_cabac,
+ abs_coeff_rem,
+ rice_param,
+ rice_max);
+ */
+
+ /************************************************************************/
+ /* shift symbol by c_rice_param bits to find unary code prefix (111.10) */
+ /************************************************************************/
+ unary_length = (abs_coeff_rem >> rice_param) + 1;
+
+ /* length of the code */
+ num_bins = unary_length + rice_param;
+
+ /* increment bits generated based on num bypass bins */
+ ps_cabac->u4_bits_estimated_q12 += (num_bins << CABAC_FRAC_BITS_Q);
+ }
+
+ /* update the rice param based on coeff level */
+ if((abs_coeff > (3 << rice_param)) && (rice_param < 4))
+ {
+ rice_param++;
+ }
+
+ /* change base level to 1 if more than 8 coded coeffs */
+ if((j + 1) < (num_coeffs_remaining - num_coeffs_base1))
+ {
+ base_level = 2;
+ }
+ else
+ {
+ base_level = 1;
+ }
+ }
+ }
+ }
+ }
+ /*tap texture bits*/
+ {
+ ps_cabac->u4_texture_bits_estimated_q12 +=
+ (ps_cabac->u4_bits_estimated_q12 - temp_tex_bits_q12);
+ }
+
+ return (ret);
+}
+
+/**
+******************************************************************************
+*
+* @brief Encodes a transform residual block as per section 7.3.13
+*
+* @par Description
+* RDOQ optimization is carried out here. When sub-blk RDOQ is turned on, we calculate
+* the distortion(D) and bits(R) for when the sub blk is coded and when not coded. We
+* then use the D+lambdaR metric to decide whether the sub-blk should be coded or not, and
+* aprropriately signal it. When coeff RDOQ is turned on, we traverse through the TU to
+* find all non-zero coeffs. If the non zero coeff is a 1, then we make a decision(based on D+lambdaR)
+* metric as to whether to code it as a 0 or 1. In case the coeff is > 1(say L where L>1) we choose betweem
+* L and L+1
+*
+* @remarks Does not support sign data hiding and transform skip flag currently
+*
+* @remarks Need to resolve the differences between JVT-J1003_d7 spec and
+* HM.8.0-dev for related abs_greater_than_1 context initialization
+* and rice_max paramtere used for coeff abs level remaining
+*
+* @param[inout] ps_entropy_ctxt
+* pointer to entropy context (handle)
+*
+* @param[in] pv_coeff
+* Compressed residue buffer containing following information:
+*
+*
+* HEADER(4 bytes) : last_coeff_x, last_coeff_y, scantype, last_subblock_num
+*
+* For each 4x4 subblock starting from last_subblock_num (in scan order)
+* Read 2 bytes : MSB 12bits (0xBAD marker), bit0 cur_csbf, bit1-2 nbr csbf
+*
+* `If cur_csbf
+* Read 2 bytes : sig_coeff_map (16bits in scan_order 1:coded, 0:not coded)
+* Read 2 bytes : abs_gt1_flags (max of 8 only)
+* Read 2 bytes : coeff_sign_flags
+*
+* Based on abs_gt1_flags and sig_coeff_map read remaining abs levels
+* Read 2 bytes : remaining_abs_coeffs_minus1 (this is in a loop)
+*
+* @param[in] log2_tr_size
+* transform size of the current TU
+*
+* @param[in] is_luma
+* boolean indicating if the texture type is luma / chroma
+*
+* @param[out] pi4_tu_coded_dist
+* The distortion when the TU is coded(not all coeffs are set to 0) is stored here
+*
+* @param[out] pi4_tu_not_coded_dist
+* The distortion when the entire TU is not coded(all coeffs are set to 0) is stored here
+*
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+
+WORD32 ihevce_cabac_residue_encode_rdoq(
+ entropy_context_t *ps_entropy_ctxt,
+ void *pv_coeff,
+ WORD32 log2_tr_size,
+ WORD32 is_luma,
+ void *pv_rdoq_ctxt,
+ LWORD64 *pi8_tu_coded_dist,
+ LWORD64 *pi8_tu_not_coded_dist,
+ WORD32 perform_sbh)
+{
+ WORD32 *pi4_subBlock2csbfId_map;
+
+ WORD32 ret = IHEVCE_SUCCESS;
+
+ cab_ctxt_t *ps_cabac = &ps_entropy_ctxt->s_cabac_ctxt;
+ cab_ctxt_t s_sub_blk_not_coded_cabac_ctxt;
+ backup_ctxt_t s_backup_ctxt;
+ backup_ctxt_t s_backup_ctxt_sub_blk_not_coded;
+
+ UWORD32 temp_tex_bits_q12;
+
+ UWORD8 *pu1_coeff_buf_hdr = (UWORD8 *)pv_coeff;
+ UWORD16 *pu2_sig_coeff_buf = (UWORD16 *)pv_coeff;
+
+ LWORD64 i8_sub_blk_not_coded_dist = 0, i8_sub_blk_coded_dist = 0;
+ WORD32 i4_sub_blk_not_coded_bits = 0, i4_sub_blk_coded_bits = 0;
+ LWORD64 i8_sub_blk_not_coded_metric, i8_sub_blk_coded_metric;
+ LWORD64 i8_tu_not_coded_dist = 0, i8_tu_coded_dist = 0;
+ WORD32 i4_tu_coded_bits = 0;
+ WORD32 temp_zero_col = 0, temp_zero_row = 0;
+
+ UWORD8 *pu1_last_sig_coeff_x;
+ UWORD8 *pu1_last_sig_coeff_y;
+ WORD32 scan_type;
+ WORD32 last_csb;
+
+ WORD32 cur_csbf = 0, nbr_csbf;
+ // WORD32 i4_temp_bits;
+
+ WORD32 sig_coeff_base_ctxt; /* cabac context for sig coeff flag */
+ WORD32 abs_gt1_base_ctxt; /* cabac context for abslevel > 1 flag */
+
+ UWORD8 *pu1_ctxt_model = &ps_cabac->au1_ctxt_models[0];
+
+ rdoq_sbh_ctxt_t *ps_rdoq_ctxt = (rdoq_sbh_ctxt_t *)pv_rdoq_ctxt;
+ WORD16 *pi2_coeffs = ps_rdoq_ctxt->pi2_quant_coeffs;
+ WORD16 *pi2_tr_coeffs = ps_rdoq_ctxt->pi2_trans_values;
+ WORD32 trans_size = ps_rdoq_ctxt->i4_trans_size;
+ WORD32 i4_round_val = ps_rdoq_ctxt->i4_round_val_ssd_in_td;
+ WORD32 i4_shift_val = ps_rdoq_ctxt->i4_shift_val_ssd_in_td;
+ WORD32 scan_idx = ps_rdoq_ctxt->i4_scan_idx;
+
+ UWORD8 *pu1_csb_table, *pu1_trans_table;
+ WORD32 shift_value, mask_value;
+
+ WORD32 gt1_ctxt = 1; /* required for abs_gt1_ctxt modelling */
+ WORD32 temp_gt1_ctxt = gt1_ctxt;
+
+ WORD32 i;
+#if DISABLE_ZCSBF
+ WORD32 i4_skip_zero_cbf = 0;
+ WORD32 i4_skip_zero_csbf = 0;
+ WORD32 i4_num_abs_1_coeffs = 0;
+#endif
+ (void)perform_sbh;
+ pi4_subBlock2csbfId_map = ps_rdoq_ctxt->pi4_subBlock2csbfId_map;
+
+ /* scan order inside a csb */
+ pu1_csb_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
+ /*Initializing the backup_ctxt structures*/
+ s_backup_ctxt.i4_num_bits = 0;
+ s_backup_ctxt_sub_blk_not_coded.i4_num_bits = 0;
+
+ memset(&s_backup_ctxt.au1_ctxt_to_backup, 0, MAX_NUM_CONTEXT_ELEMENTS);
+ memset(&s_backup_ctxt_sub_blk_not_coded.au1_ctxt_to_backup, 0, MAX_NUM_CONTEXT_ELEMENTS);
+
+ pu1_coeff_buf_hdr = (UWORD8 *)pv_coeff;
+ pu2_sig_coeff_buf = (UWORD16 *)pv_coeff;
+
+ /* last sig coeff indices in scan order */
+ pu1_last_sig_coeff_x = &pu1_coeff_buf_hdr[0];
+ pu1_last_sig_coeff_y = &pu1_coeff_buf_hdr[1];
+
+ /* read the scan type : upright diag / horz / vert */
+ scan_type = pu1_coeff_buf_hdr[2];
+
+ /************************************************************************/
+ /* position of the last coded sub block. This sub block contains coeff */
+ /* corresponding to last_sig_coeff_x, last_sig_coeff_y. Althoug this can*/
+ /* be derived here it better to be populated by scanning module */
+ /************************************************************************/
+ last_csb = pu1_coeff_buf_hdr[3];
+
+ shift_value = ps_rdoq_ctxt->i4_log2_trans_size + 1;
+ /* for finding. row no. from scan index */
+ shift_value = shift_value - 3;
+ /*for finding the col. no. from scan index*/
+ mask_value = (ps_rdoq_ctxt->i4_trans_size / 4) - 1;
+
+ switch(ps_rdoq_ctxt->i4_trans_size)
+ {
+ case 32:
+ pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_8x8[scan_idx][0]);
+ break;
+ case 16:
+ pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
+ break;
+ case 8:
+ pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_2x2[scan_idx][0]);
+ break;
+ case 4:
+ pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_1x1[0]);
+ break;
+ default:
+ DBG_PRINTF("Invalid Trans Size\n");
+ return -1;
+ break;
+ }
+
+ /* sanity checks */
+ /* transform skip not supported */
+ ASSERT(0 == ps_entropy_ctxt->ps_pps->i1_transform_skip_enabled_flag);
+ {
+ temp_tex_bits_q12 = ps_cabac->u4_bits_estimated_q12;
+ }
+ /*************************************************************************/
+ /* derive base context index for sig coeff as per section 9.3.3.1.4 */
+ /* TODO; convert to look up based on luma/chroma, scan type and tfr size */
+ /*************************************************************************/
+ if(is_luma)
+ {
+ sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG;
+ abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG;
+
+ if(3 == log2_tr_size)
+ {
+ /* 8x8 transform size */
+ sig_coeff_base_ctxt += (scan_type == SCAN_DIAG_UPRIGHT) ? 9 : 15;
+ }
+ else if(3 < log2_tr_size)
+ {
+ /* larger transform sizes */
+ sig_coeff_base_ctxt += 21;
+ }
+ }
+ else
+ {
+ /* chroma context initializations */
+ sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27;
+ abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG + 16;
+
+ if(3 == log2_tr_size)
+ {
+ /* 8x8 transform size */
+ sig_coeff_base_ctxt += 9;
+ }
+ else if(3 < log2_tr_size)
+ {
+ /* larger transform sizes */
+ sig_coeff_base_ctxt += 12;
+ }
+ }
+
+ /* go to csbf flags */
+ pu2_sig_coeff_buf = (UWORD16 *)(pu1_coeff_buf_hdr + COEFF_BUF_HEADER_LEN);
+
+ /*Calculating the distortion produced by all the zero coeffs in the TU*/
+ for(i = (trans_size * trans_size) - 1; i >= 0; i--)
+ {
+ WORD32 i4_dist;
+ WORD16 *pi2_orig_coeff = ps_rdoq_ctxt->pi2_trans_values;
+
+ if(pi2_coeffs[i] == 0)
+ {
+ i4_dist = CALC_SSD_IN_TRANS_DOMAIN(pi2_orig_coeff[i], 0, 0, 0);
+ i8_tu_not_coded_dist += i4_dist;
+ i8_tu_coded_dist += i4_dist;
+ }
+ }
+
+ /*Backup of the various cabac ctxts*/
+ memcpy(&s_sub_blk_not_coded_cabac_ctxt, ps_cabac, sizeof(cab_ctxt_t));
+ /************************************************************************/
+ /* encode the csbf, sig_coeff_map, abs_grt1_flags, abs_grt2_flag, sign */
+ /* and abs_coeff_remaining for each 4x4 starting from last scan to first*/
+ /************************************************************************/
+
+ for(i = last_csb; i >= 0; i--)
+ {
+ UWORD16 u2_marker_csbf;
+ WORD32 ctxt_idx;
+ WORD32 i4_sub_blk_is_coded = 0;
+ WORD32 blk_row, blk_col;
+ WORD32 scaled_blk_row;
+ WORD32 scaled_blk_col;
+ WORD32 infer_coeff;
+
+ gt1_ctxt = temp_gt1_ctxt;
+#if DISABLE_ZCSBF
+ /*Initialize skip zero cbf flag to 0*/
+ i4_skip_zero_csbf = 0;
+ i4_num_abs_1_coeffs = 0;
+#endif
+
+#if OPT_MEMCPY
+ ihevce_copy_backup_ctxt(
+ (void *)&s_sub_blk_not_coded_cabac_ctxt,
+ (void *)ps_cabac,
+ (void *)&s_backup_ctxt_sub_blk_not_coded,
+ (void *)&s_backup_ctxt);
+ memset(s_backup_ctxt_sub_blk_not_coded.au1_ctxt_to_backup, 0, 5);
+ memset(s_backup_ctxt.au1_ctxt_to_backup, 0, 5);
+#else
+ memcpy(&s_sub_blk_not_coded_cabac_ctxt, ps_cabac, sizeof(cab_ctxt_t));
+#endif
+ // i4_temp_bits = s_sub_blk_not_coded_cabac_ctxt.u4_bits_estimated_q12;
+
+ blk_row = pu1_trans_table[i] >> shift_value; /*row of csb*/
+ blk_col = pu1_trans_table[i] & mask_value; /*col of csb*/
+
+ scaled_blk_row = blk_row << 2;
+ scaled_blk_col = blk_col << 2;
+
+ infer_coeff = (i < last_csb) && (i > 0);
+ u2_marker_csbf = *pu2_sig_coeff_buf;
+
+ if((blk_col + 1 < trans_size / 4)) /* checking right boundary */
+ {
+ if(!ps_rdoq_ctxt
+ ->pu1_csbf_buf[pi4_subBlock2csbfId_map[blk_row * trans_size / 4 + blk_col + 1]])
+ {
+ /* clear the 2nd bit if the right csb is 0 */
+ u2_marker_csbf = u2_marker_csbf & (~(1 << 1));
+ }
+ }
+ if((blk_row + 1 < trans_size / 4)) /* checking bottom boundary */
+ {
+ if(!ps_rdoq_ctxt
+ ->pu1_csbf_buf[pi4_subBlock2csbfId_map[(blk_row + 1) * trans_size / 4 + blk_col]])
+ {
+ /* clear the 3rd bit if the bottom csb is 0*/
+ u2_marker_csbf = u2_marker_csbf & (~(1 << 2));
+ }
+ }
+ pu2_sig_coeff_buf++;
+
+ /* sanity checks for marker present in every csbf flag */
+ ASSERT((u2_marker_csbf >> 4) == 0xBAD);
+
+ /* extract the current and neigbour csbf flags */
+ cur_csbf = u2_marker_csbf & 0x1;
+ nbr_csbf = (u2_marker_csbf >> 1) & 0x3;
+
+ if((i < last_csb) && (i > 0))
+ {
+ ctxt_idx = IHEVC_CAB_CODED_SUBLK_IDX;
+
+ /* ctxt based on right / bottom avail csbf, section 9.3.3.1.3 */
+ ctxt_idx += nbr_csbf ? 1 : 0;
+ ctxt_idx += is_luma ? 0 : 2;
+
+ ret |= ihevce_cabac_encode_bin(ps_cabac, cur_csbf, ctxt_idx);
+
+ s_backup_ctxt.au1_ctxt_to_backup[SUB_BLK_CODED_FLAG] = 1;
+
+ if(cur_csbf)
+ {
+ ret |= ihevce_cabac_encode_bin(&s_sub_blk_not_coded_cabac_ctxt, 0, ctxt_idx);
+ // clang-format off
+ i4_sub_blk_not_coded_bits =
+ s_sub_blk_not_coded_cabac_ctxt.u4_bits_estimated_q12; // - i4_temp_bits;
+ s_backup_ctxt_sub_blk_not_coded.au1_ctxt_to_backup[SUB_BLK_CODED_FLAG] = 1;
+ // clang-format on
+ }
+ }
+ else
+ {
+ /* sanity check, this csb contains the last_sig_coeff */
+ if(i == last_csb)
+ {
+ ASSERT(cur_csbf == 1);
+ }
+ }
+ /*If any block in the TU is coded and the 0th block is not coded, the 0th
+ block is still signalled as csbf = 1, and with all sig_coeffs sent as
+ 0(HEVC requirement)*/
+ if((ps_rdoq_ctxt->i1_tu_is_coded == 1) && (i == 0))
+ {
+ i4_sub_blk_not_coded_bits = ihevce_code_all_sig_coeffs_as_0_explicitly(
+ (void *)ps_rdoq_ctxt,
+ i,
+ pu1_trans_table,
+ is_luma,
+ scan_type,
+ infer_coeff,
+ nbr_csbf,
+ &s_sub_blk_not_coded_cabac_ctxt);
+ }
+
+ if(i == last_csb)
+ {
+ WORD32 i4_last_x = *pu1_last_sig_coeff_x;
+ WORD32 i4_last_y = *pu1_last_sig_coeff_y;
+ if(SCAN_VERT == scan_type)
+ {
+ /* last coeff x and y are swapped for vertical scan */
+ SWAP(i4_last_x, i4_last_y);
+ }
+ /* Encode the last_sig_coeff_x and last_sig_coeff_y */
+ ret |= ihevce_cabac_encode_last_coeff_x_y(
+ ps_cabac, i4_last_x, i4_last_y, log2_tr_size, is_luma);
+ s_backup_ctxt.au1_ctxt_to_backup[LASTXY] = 1;
+ }
+
+ if(cur_csbf)
+ {
+ /*****************************************************************/
+ /* encode the sig coeff map as per section 7.3.13 */
+ /* significant_coeff_flags: msb=coeff15-lsb=coeff0 in scan order */
+ /*****************************************************************/
+
+ WORD32 i4_bit_depth;
+ WORD32 i4_shift_iq;
+ WORD32 i4_dequant_val;
+ WORD32 bit; /* temp boolean */
+
+ UWORD16 u2_gt0_flags = *pu2_sig_coeff_buf;
+ WORD32 sig_coeff_map = u2_gt0_flags;
+ WORD32 gt1_flags = *(pu2_sig_coeff_buf + 1);
+ WORD32 sign_flags = *(pu2_sig_coeff_buf + 2);
+
+ WORD32 gt1_bins = 0; /* bins for coeffs with abslevel > 1 */
+
+ WORD16 *pi2_dequant_coeff = ps_rdoq_ctxt->pi2_dequant_coeff;
+ WORD16 i2_qp_rem = ps_rdoq_ctxt->i2_qp_rem;
+ WORD32 i4_qp_div = ps_rdoq_ctxt->i4_qp_div;
+
+ WORD32 sign_bins = 0; /* bins for sign flags of coded coeffs */
+ WORD32 num_coded = 0; /* total coeffs coded in 4x4 */
+
+ /* total count of coeffs to be coded as abs level remaining */
+ WORD32 num_coeffs_remaining = 0;
+
+ /* count of coeffs to be coded as abslevel-1 */
+ WORD32 num_coeffs_base1 = 0;
+ WORD32 scan_pos;
+ WORD32 first_gt1_coeff = 0;
+
+ i4_bit_depth = ps_entropy_ctxt->ps_sps->i1_bit_depth_luma_minus8 + 8;
+ i4_shift_iq = i4_bit_depth + ps_rdoq_ctxt->i4_log2_trans_size - 5;
+
+ i4_sub_blk_is_coded = 1;
+
+ if((i != 0) || (0 == last_csb))
+ {
+ /* sanity check, atleast one coeff is coded as csbf is set */
+ ASSERT(sig_coeff_map != 0);
+ }
+ /*Calculating the distortions produced*/
+ {
+ WORD32 k, j;
+ WORD16 *pi2_temp_coeff =
+ &pi2_coeffs[scaled_blk_col + (scaled_blk_row * trans_size)];
+ WORD16 *pi2_temp_tr_coeff =
+ &pi2_tr_coeffs[scaled_blk_col + (scaled_blk_row * trans_size)];
+ WORD16 *pi2_temp_dequant_coeff =
+ &pi2_dequant_coeff[scaled_blk_col + (scaled_blk_row * trans_size)];
+
+ for(k = 0; k < 4; k++)
+ {
+ for(j = 0; j < 4; j++)
+ {
+ if(*pi2_temp_coeff)
+ {
+ /*Inverse quantizing for distortion calculation*/
+ if(ps_rdoq_ctxt->i4_trans_size != 4)
+ {
+ IQUANT(
+ i4_dequant_val,
+ *pi2_temp_coeff,
+ *pi2_temp_dequant_coeff * g_ihevc_iquant_scales[i2_qp_rem],
+ i4_shift_iq,
+ i4_qp_div);
+ }
+ else
+ {
+ IQUANT_4x4(
+ i4_dequant_val,
+ *pi2_temp_coeff,
+ *pi2_temp_dequant_coeff * g_ihevc_iquant_scales[i2_qp_rem],
+ i4_shift_iq,
+ i4_qp_div);
+ }
+
+ i8_sub_blk_coded_dist +=
+ CALC_SSD_IN_TRANS_DOMAIN(*pi2_temp_tr_coeff, i4_dequant_val, 0, 0);
+
+ i8_sub_blk_not_coded_dist +=
+ CALC_SSD_IN_TRANS_DOMAIN(*pi2_temp_tr_coeff, 0, 0, 0);
+ }
+#if DISABLE_ZCSBF
+ if(abs(*pi2_temp_coeff) > 1)
+ {
+ i4_skip_zero_csbf = 1;
+ }
+ else if(abs(*pi2_temp_coeff) == 1)
+ {
+ i4_num_abs_1_coeffs++;
+ }
+#endif
+ pi2_temp_coeff++;
+ pi2_temp_tr_coeff++;
+ pi2_temp_dequant_coeff++;
+ }
+ pi2_temp_tr_coeff += ps_rdoq_ctxt->i4_trans_size - 4;
+ pi2_temp_coeff += ps_rdoq_ctxt->i4_q_data_strd - 4;
+ pi2_dequant_coeff += ps_rdoq_ctxt->i4_trans_size - 4;
+ }
+ }
+
+#if DISABLE_ZCSBF
+ i4_skip_zero_csbf = i4_skip_zero_csbf || (i4_num_abs_1_coeffs > 3);
+#endif
+ pu2_sig_coeff_buf += 3;
+
+ scan_pos = 15;
+ if(i == last_csb)
+ {
+ /*************************************************************/
+ /* clear last_scan_pos for last block in scan order as this */
+ /* is communicated throught last_coeff_x and last_coeff_y */
+ /*************************************************************/
+ WORD32 next_sig = CLZ(sig_coeff_map) + 1;
+
+ scan_pos = WORD_SIZE - next_sig;
+
+ /* prepare the bins for gt1 flags */
+ EXTRACT_BIT(bit, gt1_flags, scan_pos);
+
+ /* insert gt1 bin in lsb */
+ gt1_bins |= bit;
+
+ /* prepare the bins for sign flags */
+ EXTRACT_BIT(bit, sign_flags, scan_pos);
+
+ /* insert sign bin in lsb */
+ sign_bins |= bit;
+
+ sig_coeff_map = CLEAR_BIT(sig_coeff_map, scan_pos);
+
+ scan_pos--;
+ num_coded++;
+ }
+
+ /* encode the required sigcoeff flags (abslevel > 0) */
+ while(scan_pos >= 0)
+ {
+ WORD32 y_pos_x_pos;
+ WORD32 sig_ctxinc = 0; /* 0 is default inc for DC coeff */
+
+ WORD32 sig_coeff;
+
+ EXTRACT_BIT(sig_coeff, sig_coeff_map, scan_pos);
+
+ /* derive the x,y pos */
+ y_pos_x_pos = gu1_hevce_scan4x4[scan_type][scan_pos];
+
+ /* derive the context inc as per section 9.3.3.1.4 */
+ if(2 == log2_tr_size)
+ {
+ /* 4x4 transform size increment uses lookup */
+ sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos];
+ }
+ else if(scan_pos || i)
+ {
+ /* ctxt for AC coeff depends on curpos and neigbour csbf */
+ sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc[nbr_csbf][y_pos_x_pos];
+
+ /* based on luma subblock pos */
+ sig_ctxinc += (i && is_luma) ? 3 : 0;
+ }
+ else
+ {
+ /* DC coeff has fixed context for luma and chroma */
+ sig_coeff_base_ctxt = is_luma ? IHEVC_CAB_COEFF_FLAG
+ : IHEVC_CAB_COEFF_FLAG + 27;
+ }
+
+ /*************************************************************/
+ /* encode sig coeff only if required */
+ /* decoder infers 0,0 coeff when all the other coeffs are 0 */
+ /*************************************************************/
+ if(scan_pos || (!infer_coeff))
+ {
+ ctxt_idx = sig_ctxinc + sig_coeff_base_ctxt;
+ //ret |= ihevce_cabac_encode_bin(ps_cabac, sig_coeff, ctxt_idx);
+ {
+ WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
+
+ /* increment bits generated based on state and bin encoded */
+ ps_cabac->u4_bits_estimated_q12 +=
+ gau2_ihevce_cabac_bin_to_bits[state_mps ^ sig_coeff];
+
+ /* update the context model from state transition LUT */
+ pu1_ctxt_model[ctxt_idx] =
+ gau1_ihevc_next_state[(state_mps << 1) | sig_coeff];
+ }
+ }
+
+ if(sig_coeff)
+ {
+ /* prepare the bins for gt1 flags */
+ EXTRACT_BIT(bit, gt1_flags, scan_pos);
+
+ /* shift and insert gt1 bin in lsb */
+ gt1_bins <<= 1;
+ gt1_bins |= bit;
+
+ /* prepare the bins for sign flags */
+ EXTRACT_BIT(bit, sign_flags, scan_pos);
+
+ /* shift and insert sign bin in lsb */
+ sign_bins <<= 1;
+ sign_bins |= bit;
+
+ num_coded++;
+
+ /* 0,0 coeff can no more be inferred :( */
+ infer_coeff = 0;
+ }
+
+ scan_pos--;
+ }
+
+ s_backup_ctxt.au1_ctxt_to_backup[SIG_COEFF] = 1;
+
+ /****************************************************************/
+ /* encode the abs level greater than 1 bins; Section 7.3.13 */
+ /* These have already been prepared during sig_coeff_map encode */
+ /* Context modelling done as per section 9.3.3.1.5 */
+ /****************************************************************/
+ {
+ WORD32 j;
+
+ /* context set based on luma subblock pos */
+ WORD32 ctxt_set = (i && is_luma) ? 2 : 0;
+
+ /* count of coeffs with abslevel > 1; max of 8 to be coded */
+ WORD32 num_gt1_bins = MIN(8, num_coded);
+
+ if(num_coded > 8)
+ {
+ /* pull back the bins to required number */
+ gt1_bins >>= (num_coded - 8);
+
+ num_coeffs_remaining += (num_coded - 8);
+ num_coeffs_base1 = (num_coded - 8);
+ }
+
+ /* See section 9.3.3.1.5 */
+ ctxt_set += (0 == gt1_ctxt) ? 1 : 0;
+
+ gt1_ctxt = 1;
+
+ for(j = num_gt1_bins - 1; j >= 0; j--)
+ {
+ /* Encodet the abs level gt1 bins */
+ ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
+
+ EXTRACT_BIT(bit, gt1_bins, j);
+
+ //ret |= ihevce_cabac_encode_bin(ps_cabac, bit, ctxt_idx);
+ {
+ WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
+
+ /* increment bits generated based on state and bin encoded */
+ ps_cabac->u4_bits_estimated_q12 +=
+ gau2_ihevce_cabac_bin_to_bits[state_mps ^ bit];
+
+ /* update the context model from state transition LUT */
+ pu1_ctxt_model[ctxt_idx] = gau1_ihevc_next_state[(state_mps << 1) | bit];
+ }
+
+ if(bit)
+ {
+ gt1_ctxt = 0;
+ num_coeffs_remaining++;
+ }
+ else if(gt1_ctxt && (gt1_ctxt < 3))
+ {
+ gt1_ctxt++;
+ }
+ }
+ s_backup_ctxt.au1_ctxt_to_backup[GRTR_THAN_1] = 1;
+ /*************************************************************/
+ /* encode abs level greater than 2 bin; Section 7.3.13 */
+ /*************************************************************/
+ if(gt1_bins)
+ {
+ WORD32 gt2_bin;
+
+ first_gt1_coeff = pu2_sig_coeff_buf[0] + 1;
+ gt2_bin = (first_gt1_coeff > 2);
+
+ /* atleast one level > 2 */
+ ctxt_idx = IHEVC_CAB_COEFABS_GRTR2_FLAG;
+
+ ctxt_idx += (is_luma) ? ctxt_set : (ctxt_set + 4);
+
+ //ret |= ihevce_cabac_encode_bin(ps_cabac, gt2_bin, ctxt_idx);
+ {
+ WORD32 state_mps = pu1_ctxt_model[ctxt_idx];
+
+ /* increment bits generated based on state and bin encoded */
+ ps_cabac->u4_bits_estimated_q12 +=
+ gau2_ihevce_cabac_bin_to_bits[state_mps ^ gt2_bin];
+
+ /* update the context model from state transition LUT */
+ pu1_ctxt_model[ctxt_idx] =
+ gau1_ihevc_next_state[(state_mps << 1) | gt2_bin];
+ }
+
+ if(!gt2_bin)
+ {
+ /* sanity check */
+ ASSERT(first_gt1_coeff == 2);
+
+ /* no need to send this coeff as bypass bins */
+ pu2_sig_coeff_buf++;
+ num_coeffs_remaining--;
+ }
+ s_backup_ctxt.au1_ctxt_to_backup[GRTR_THAN_2] = 1;
+ }
+ }
+
+ /*************************************************************/
+ /* encode the coeff signs and abs remaing levels */
+ /*************************************************************/
+ if(num_coded)
+ {
+ WORD32 base_level;
+ WORD32 rice_param = 0;
+ WORD32 j;
+
+ /*************************************************************/
+ /* encode the coeff signs populated in sign_bins */
+ /*************************************************************/
+ if(num_coded > 0)
+ {
+ ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, sign_bins, num_coded);
+ }
+ /*************************************************************/
+ /* encode the coeff_abs_level_remaining as TR / EGK bins */
+ /* See section 9.3.2.7 for details */
+ /*************************************************************/
+
+ /* first remaining coeff baselevel */
+ if(first_gt1_coeff > 2)
+ {
+ base_level = 3;
+ }
+ else if(num_coeffs_remaining > num_coeffs_base1)
+ {
+ /* atleast one coeff in first 8 is gt > 1 */
+ base_level = 2;
+ }
+ else
+ {
+ /* all coeffs have base of 1 */
+ base_level = 1;
+ }
+
+ for(j = 0; j < num_coeffs_remaining; j++)
+ {
+ WORD32 abs_coeff = pu2_sig_coeff_buf[0] + 1;
+ WORD32 abs_coeff_rem;
+ WORD32 rice_max = (4 << rice_param);
+
+ pu2_sig_coeff_buf++;
+
+ /* sanity check */
+ ASSERT(abs_coeff >= base_level);
+
+ abs_coeff_rem = (abs_coeff - base_level);
+
+ /* TODO://HM-8.0-dev uses (3 << rice_param) as rice_max */
+ /* TODO://HM-8.0-dev does either TR or EGK but not both */
+ if(abs_coeff_rem >= rice_max)
+ {
+ UWORD32 u4_suffix = (abs_coeff_rem - rice_max);
+
+ /* coeff exceeds max rice limit */
+ /* encode the TR prefix as tunary code */
+ /* prefix = 1111 as (rice_max >> rice_praram) = 4 */
+ ret |= ihevce_cabac_encode_bypass_bins(ps_cabac, 0xF, 4);
+
+ /* encode the exponential golomb code suffix */
+ ret |= ihevce_cabac_encode_egk(ps_cabac, u4_suffix, (rice_param + 1));
+ }
+ else
+ {
+ /* code coeff as truncated rice code */
+ ret |= ihevce_cabac_encode_trunc_rice(
+ ps_cabac, abs_coeff_rem, rice_param, rice_max);
+ }
+
+ /* update the rice param based on coeff level */
+ if((abs_coeff > (3 << rice_param)) && (rice_param < 4))
+ {
+ rice_param++;
+ }
+
+ /* change base level to 1 if more than 8 coded coeffs */
+ if((j + 1) < (num_coeffs_remaining - num_coeffs_base1))
+ {
+ base_level = 2;
+ }
+ else
+ {
+ base_level = 1;
+ }
+ }
+ }
+
+ i4_sub_blk_coded_bits = ps_cabac->u4_bits_estimated_q12;
+ /**********************************************************/
+ /**********************************************************/
+ /**********************************************************/
+ /*Decide whether sub block should be coded or not*/
+ /**********************************************************/
+ /**********************************************************/
+ /**********************************************************/
+ i8_sub_blk_coded_metric = CALC_CUMMUL_SSD_IN_TRANS_DOMAIN(
+ i8_sub_blk_coded_dist, 0, i4_round_val, i4_shift_val) +
+ COMPUTE_RATE_COST_CLIP30_RDOQ(
+ i4_sub_blk_coded_bits,
+ ps_rdoq_ctxt->i8_cl_ssd_lambda_qf,
+ (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
+ i8_sub_blk_not_coded_metric =
+ CALC_CUMMUL_SSD_IN_TRANS_DOMAIN(
+ i8_sub_blk_not_coded_dist, 0, i4_round_val, i4_shift_val) +
+ COMPUTE_RATE_COST_CLIP30_RDOQ(
+ i4_sub_blk_not_coded_bits,
+ ps_rdoq_ctxt->i8_cl_ssd_lambda_qf,
+ (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
+
+#if DISABLE_ZCSBF
+ if(((i8_sub_blk_not_coded_metric < i8_sub_blk_coded_metric) ||
+ (i4_sub_blk_is_coded == 0)) &&
+ (i4_skip_zero_csbf == 0))
+#else
+ if((i8_sub_blk_not_coded_metric < i8_sub_blk_coded_metric) ||
+ (i4_sub_blk_is_coded == 0))
+#endif
+ {
+#if OPT_MEMCPY
+ ihevce_copy_backup_ctxt(
+ (void *)ps_cabac,
+ (void *)&s_sub_blk_not_coded_cabac_ctxt,
+ (void *)&s_backup_ctxt,
+ (void *)&s_backup_ctxt_sub_blk_not_coded);
+#else
+ memcpy(ps_cabac, &s_sub_blk_not_coded_cabac_ctxt, sizeof(cab_ctxt_t));
+#endif
+ scan_pos = 15;
+ i4_sub_blk_is_coded = 0;
+
+ {
+ WORD32 k, j;
+ WORD16 *pi2_temp_coeff =
+ &pi2_coeffs[scaled_blk_col + (scaled_blk_row * ps_rdoq_ctxt->i4_q_data_strd)];
+ WORD16 *pi2_temp_iquant_coeff =
+ &ps_rdoq_ctxt->pi2_iquant_coeffs
+ [scaled_blk_col + (scaled_blk_row * ps_rdoq_ctxt->i4_iq_data_strd)];
+ for(k = 0; k < 4; k++)
+ {
+ for(j = 0; j < 4; j++)
+ {
+ *pi2_temp_coeff = 0;
+ *pi2_temp_iquant_coeff = 0;
+
+ pi2_temp_coeff++;
+ pi2_temp_iquant_coeff++;
+ }
+ pi2_temp_coeff += ps_rdoq_ctxt->i4_q_data_strd - 4;
+ pi2_temp_iquant_coeff += ps_rdoq_ctxt->i4_iq_data_strd - 4;
+ }
+ }
+
+ /* If the csb to be masked is the last csb, then we should
+ * signal last x and last y from the next coded sub_blk */
+ if(i == last_csb)
+ {
+ pu1_coeff_buf_hdr = (UWORD8 *)pu2_sig_coeff_buf;
+
+ ps_rdoq_ctxt->pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[i]]] = 0;
+ last_csb = ihevce_find_new_last_csb(
+ pi4_subBlock2csbfId_map,
+ i,
+ (void *)ps_rdoq_ctxt,
+ pu1_trans_table,
+ pu1_csb_table,
+ pi2_coeffs,
+ shift_value,
+ mask_value,
+ &pu1_coeff_buf_hdr);
+ /*We are in a for loop. This means that the decrement to i happens immediately right
+ at the end of the for loop. This would decrement the value of i to (last_csb - 1).
+ Hence we increment i by 1, so that after the decrement i becomes last_csb.*/
+ i = last_csb + 1;
+ pu1_last_sig_coeff_x = &pu1_coeff_buf_hdr[0];
+ pu1_last_sig_coeff_y = &pu1_coeff_buf_hdr[1];
+ scan_type = pu1_coeff_buf_hdr[2];
+ pu2_sig_coeff_buf = (UWORD16 *)(pu1_coeff_buf_hdr + 4);
+ }
+ i8_tu_coded_dist += i8_sub_blk_not_coded_dist;
+ i4_tu_coded_bits += i4_sub_blk_not_coded_bits;
+ }
+ else
+ {
+ ps_rdoq_ctxt->i1_tu_is_coded = 1;
+ temp_gt1_ctxt = gt1_ctxt;
+
+ i8_tu_coded_dist += i8_sub_blk_coded_dist;
+ i4_tu_coded_bits += i4_sub_blk_coded_bits;
+ }
+#if DISABLE_ZCSBF
+ i4_skip_zero_cbf = i4_skip_zero_cbf || i4_skip_zero_csbf;
+#endif
+ /*Cumulating the distortion for the entire TU*/
+ i8_tu_not_coded_dist += i8_sub_blk_not_coded_dist;
+ //i4_tu_coded_dist += i4_sub_blk_coded_dist;
+ //i4_tu_coded_bits += i4_sub_blk_coded_bits;
+ i8_sub_blk_not_coded_dist = 0;
+ i4_sub_blk_not_coded_bits = 0;
+ i8_sub_blk_coded_dist = 0;
+ i4_sub_blk_coded_bits = 0;
+
+ if(i4_sub_blk_is_coded)
+ {
+ ps_rdoq_ctxt->pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[i]]] = 1;
+ temp_zero_col = (temp_zero_col) | (0xF << scaled_blk_col);
+ temp_zero_row = (temp_zero_row) | (0xF << scaled_blk_row);
+ }
+ else
+ {
+ if(!((ps_rdoq_ctxt->i1_tu_is_coded == 1) && (i == 0)))
+ {
+ ps_rdoq_ctxt->pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[i]]] = 0;
+ }
+ }
+ }
+ }
+
+ /*tap texture bits*/
+ {
+ ps_cabac->u4_texture_bits_estimated_q12 +=
+ (ps_cabac->u4_bits_estimated_q12 - temp_tex_bits_q12);
+ }
+
+ i8_tu_not_coded_dist =
+ CALC_CUMMUL_SSD_IN_TRANS_DOMAIN(i8_tu_not_coded_dist, 0, i4_round_val, i4_shift_val);
+
+ /* i4_tu_coded_dist = CALC_CUMMUL_SSD_IN_TRANS_DOMAIN(
+ i4_tu_coded_dist, 0, i4_round_val, i4_shift_val); */
+ *pi8_tu_coded_dist = i8_tu_coded_dist;
+ *pi8_tu_not_coded_dist = i8_tu_not_coded_dist;
+#if DISABLE_ZCSBF
+ if(i4_skip_zero_cbf == 1)
+ {
+ *pi8_tu_not_coded_dist = 0x7FFFFFFF;
+ }
+#endif
+
+ *ps_rdoq_ctxt->pi4_zero_col = ~temp_zero_col;
+ *ps_rdoq_ctxt->pi4_zero_row = ~temp_zero_row;
+
+ return (ret);
+}
+
+/**
+******************************************************************************
+*
+* @brief Codes all the sig coeffs as 0
+*
+* @param[in] i
+* Index of the current csb
+*
+* @param[in] pu1_trans_table
+* Pointer to the trans table
+*
+* @param[in] scan_type
+* Determines the scan order
+*
+* @param[in] infer_coeff
+* Indicates whether the 0,0 coeff can be inferred or not
+*
+* @param[in] nbr_csbf
+* Talks about if the neighboour csbs(right and bottom) are coded or not
+*
+* @param[in] ps_cabac
+* Cabac state
+*
+* @param[out] pi4_tu_not_coded_dist
+* The distortion when the entire TU is not coded(all coeffs are set to 0) is stored here
+*
+* @return The number of bits generated when the 0th sub blk is coded as all 0s
+* This is the cumulate bits(i.e. for all blocks in the TU), and not only
+* the bits generated for this block
+*
+******************************************************************************
+*/
+WORD32 ihevce_code_all_sig_coeffs_as_0_explicitly(
+ void *pv_rdoq_ctxt,
+ WORD32 i,
+ UWORD8 *pu1_trans_table,
+ WORD32 is_luma,
+ WORD32 scan_type,
+ WORD32 infer_coeff,
+ WORD32 nbr_csbf,
+ cab_ctxt_t *ps_cabac)
+{
+ WORD32 sig_coeff_base_ctxt;
+ WORD32 scan_pos = 15;
+ WORD32 ctxt_idx;
+ WORD32 ret = 0;
+
+ rdoq_sbh_ctxt_t *ps_rdoq_ctxt = (rdoq_sbh_ctxt_t *)pv_rdoq_ctxt;
+
+ WORD32 log2_tr_size = ps_rdoq_ctxt->i4_log2_trans_size;
+
+ (void)pu1_trans_table;
+ if(is_luma)
+ {
+ sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG;
+ if(3 == log2_tr_size)
+ {
+ /* 8x8 transform size */
+ sig_coeff_base_ctxt += (scan_type == SCAN_DIAG_UPRIGHT) ? 9 : 15;
+ }
+ else if(3 < log2_tr_size)
+ {
+ /* larger transform sizes */
+ sig_coeff_base_ctxt += 21;
+ }
+ }
+ else
+ {
+ /* chroma context initializations */
+ sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27;
+
+ if(3 == log2_tr_size)
+ {
+ /* 8x8 transform size */
+ sig_coeff_base_ctxt += 9;
+ }
+ else if(3 < log2_tr_size)
+ {
+ /* larger transform sizes */
+ sig_coeff_base_ctxt += 12;
+ }
+ }
+ while(scan_pos >= 0)
+ {
+ WORD32 sig_ctxinc = 0; /* 0 is default inc for DC coeff */
+ WORD32 sig_coeff = 0;
+ /* derive the x,y pos */
+ WORD32 y_pos_x_pos = gu1_hevce_scan4x4[scan_type][scan_pos];
+
+ /* derive the context inc as per section 9.3.3.1.4 */
+ if(2 == log2_tr_size)
+ {
+ /* 4x4 transform size increment uses lookup */
+ sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos];
+ }
+ else if(scan_pos || i)
+ {
+ /* ctxt for AC coeff depends on curpos and neigbour csbf */
+ sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc[nbr_csbf][y_pos_x_pos];
+
+ /* based on luma subblock pos */
+ sig_ctxinc += (i && is_luma) ? 3 : 0;
+ }
+ else
+ {
+ /* DC coeff has fixed context for luma and chroma */
+ sig_coeff_base_ctxt = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27;
+ }
+
+ if(scan_pos || (!infer_coeff))
+ {
+ ctxt_idx = sig_ctxinc + sig_coeff_base_ctxt;
+ ret |= ihevce_cabac_encode_bin(ps_cabac, sig_coeff, ctxt_idx);
+ AEV_TRACE("significant_coeff_flag", sig_coeff, ps_cabac->u4_range);
+ }
+ scan_pos--;
+ }
+ return (ps_cabac->u4_bits_estimated_q12); // - i4_temp_bits);
+}
+
+/**
+******************************************************************************
+*
+* @brief Finds the next csb with a non-zero coeff
+*
+* @paramp[in] cur_last_csb_pos
+* The index of the current csb with a non-zero coeff
+*
+* @param[inout] pv_rdoq_ctxt
+* RODQ context structure
+*
+* @param[in] pu1_trans_table
+* Pointer to the trans table
+*
+* @param[in] pi2_coeffs
+* Pointer to all the quantized coefficients
+*
+* @param[in] shift_value
+* Determines the shifting value for determining appropriate position of coeff
+*
+* @param[in] mask_value
+* Determines the masking value for determining appropriate position of coeff
+*
+* @param[in] nbr_csbf
+* Talks about if the neighboour csbs(right and bottom) are coded or not
+*
+* @param[in] ps_cabac
+* Cabac state
+*
+* @param[inout] ppu1_addr
+* Pointer to the header(i.e. pointer used for traversing the ecd data generated
+* in ihevce_scan_coeffs)
+*
+* @return The index of the csb with the next non-zero coeff
+*
+******************************************************************************
+*/
+WORD32 ihevce_find_new_last_csb(
+ WORD32 *pi4_subBlock2csbfId_map,
+ WORD32 cur_last_csb_pos,
+ void *pv_rdoq_ctxt,
+ UWORD8 *pu1_trans_table,
+ UWORD8 *pu1_csb_table,
+ WORD16 *pi2_coeffs,
+ WORD32 shift_value,
+ WORD32 mask_value,
+ UWORD8 **ppu1_addr)
+{
+ WORD32 blk_row;
+ WORD32 blk_col;
+ WORD32 x_pos;
+ WORD32 y_pos;
+ WORD32 i;
+ WORD32 j;
+ UWORD16 *pu2_out_data_coeff;
+ rdoq_sbh_ctxt_t *ps_rdoq_ctxt = (rdoq_sbh_ctxt_t *)pv_rdoq_ctxt;
+ WORD32 trans_size = ps_rdoq_ctxt->i4_trans_size;
+ UWORD8 *pu1_out_data_header = *ppu1_addr;
+
+ for(i = cur_last_csb_pos - 1; i >= 0; i--)
+ {
+ /* check for the first csb flag in our scan order */
+ if(ps_rdoq_ctxt->pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[i]]])
+ {
+ UWORD8 u1_last_x, u1_last_y;
+ WORD32 quant_coeff;
+
+ pu1_out_data_header -= 4; //To move the pointer back to the appropriate position
+ /* row of csb */
+ blk_row = pu1_trans_table[i] >> shift_value;
+ /* col of csb */
+ blk_col = pu1_trans_table[i] & mask_value;
+
+ /*check for the 1st non-0 values inside the csb in our scan order*/
+ for(j = 15; j >= 0; j--)
+ {
+ x_pos = (pu1_csb_table[j] & 0x3) + blk_col * 4;
+ y_pos = (pu1_csb_table[j] >> 2) + blk_row * 4;
+
+ quant_coeff = pi2_coeffs[x_pos + (y_pos * trans_size)];
+
+ if(quant_coeff != 0)
+ break;
+ }
+
+ ASSERT(j >= 0);
+
+ u1_last_x = x_pos;
+ u1_last_y = y_pos;
+
+ /* storing last_x and last_y */
+ *(pu1_out_data_header) = u1_last_x;
+ *(pu1_out_data_header + 1) = u1_last_y;
+
+ /* storing the scan order */
+ *(pu1_out_data_header + 2) = ps_rdoq_ctxt->i4_scan_idx;
+
+ /* storing last_sub_block pos. in scan order count */
+ *(pu1_out_data_header + 3) = i;
+
+ /*stored the first 4 bytes, now all are word16. So word16 pointer*/
+ pu2_out_data_coeff = (UWORD16 *)(pu1_out_data_header + 4);
+
+ *pu2_out_data_coeff = 0xBAD0 | 1; /*since right&bottom csbf is 0*/
+ *ppu1_addr = pu1_out_data_header;
+
+ break; /*We just need this loop for finding 1st non-zero csb only*/
+ }
+ else
+ pu1_out_data_header += 2;
+ }
+ return i;
+}
+
+/**
+******************************************************************************
+*
+* @brief Used to optimize the memcpy of cabac states. It copies only those
+* states in the cabac context which have been altered.
+*
+* @paramp[inout] pv_dest
+* Pointer to desitination cabac state.
+*
+* @param[inout] pv_backup_ctxt_dest
+* Pointer to destination backup context
+*
+* @param[inout] pv_backup_ctxt_src
+* Pointer to source backup context
+*
+* @Desc:
+* We go through each element in the backup_ctxt structure which will tell us
+* if the states corresponding to lastxlasty, sigcoeffs, grtr_than_1_bins,
+* grtr_than_2_bins and sub_blk_coded_flag(i.e. 0xBAD0) context elements
+* have been altered. If they have been altered, we will memcpy the states
+* corresponding to these context elements alone
+*
+* @return Nothing
+*
+******************************************************************************
+*/
+void ihevce_copy_backup_ctxt(
+ void *pv_dest, void *pv_src, void *pv_backup_ctxt_dest, void *pv_backup_ctxt_src)
+{
+ UWORD8 *pu1_dest = (UWORD8 *)(((cab_ctxt_t *)pv_dest)->au1_ctxt_models);
+ UWORD8 *pu1_src = (UWORD8 *)(((cab_ctxt_t *)pv_src)->au1_ctxt_models);
+ backup_ctxt_t *ps_backup_dest_ctxt = ((backup_ctxt_t *)pv_backup_ctxt_dest);
+ backup_ctxt_t *ps_backup_src_ctxt = ((backup_ctxt_t *)pv_backup_ctxt_src);
+ WORD32 i4_i;
+
+ /*
+ 0 IHEVC_CAB_COEFFX_PREFIX lastx last y has been coded
+ 1 IHEVC_CAB_CODED_SUBLK_IDX sub-blk coded or not flag has been coded
+ 2 IHEVC_CAB_COEFF_FLAG sigcoeff has been coded
+ 3 IHEVC_CAB_COEFABS_GRTR1_FLAG greater than 1 bin has been coded
+ 4 IHEVC_CAB_COEFABS_GRTR2_FLAG greater than 2 bin has been coded*/
+ assert(MAX_NUM_CONTEXT_ELEMENTS == 5);
+ for(i4_i = 0; i4_i < MAX_NUM_CONTEXT_ELEMENTS; i4_i++)
+ {
+ if((ps_backup_src_ctxt->au1_ctxt_to_backup[SIG_COEFF]) ||
+ (ps_backup_dest_ctxt->au1_ctxt_to_backup[SIG_COEFF]))
+ {
+ memcpy(&pu1_dest[IHEVC_CAB_COEFF_FLAG], &pu1_src[IHEVC_CAB_COEFF_FLAG], 42);
+ ps_backup_dest_ctxt->au1_ctxt_to_backup[SIG_COEFF] = 0;
+ ps_backup_src_ctxt->au1_ctxt_to_backup[SIG_COEFF] = 0;
+ }
+ if((ps_backup_src_ctxt->au1_ctxt_to_backup[GRTR_THAN_1]) ||
+ (ps_backup_dest_ctxt->au1_ctxt_to_backup[GRTR_THAN_1]))
+ {
+ memcpy(
+ &pu1_dest[IHEVC_CAB_COEFABS_GRTR1_FLAG],
+ &pu1_src[IHEVC_CAB_COEFABS_GRTR1_FLAG],
+ 24);
+ ps_backup_dest_ctxt->au1_ctxt_to_backup[GRTR_THAN_1] = 0;
+ ps_backup_src_ctxt->au1_ctxt_to_backup[GRTR_THAN_1] = 0;
+ }
+ if((ps_backup_src_ctxt->au1_ctxt_to_backup[GRTR_THAN_2]) ||
+ (ps_backup_dest_ctxt->au1_ctxt_to_backup[GRTR_THAN_2]))
+ {
+ memcpy(
+ &pu1_dest[IHEVC_CAB_COEFABS_GRTR2_FLAG], &pu1_src[IHEVC_CAB_COEFABS_GRTR2_FLAG], 6);
+ ps_backup_dest_ctxt->au1_ctxt_to_backup[GRTR_THAN_2] = 0;
+ ps_backup_src_ctxt->au1_ctxt_to_backup[GRTR_THAN_2] = 0;
+ }
+ if((ps_backup_src_ctxt->au1_ctxt_to_backup[SUB_BLK_CODED_FLAG]) ||
+ (ps_backup_dest_ctxt->au1_ctxt_to_backup[SUB_BLK_CODED_FLAG]))
+ {
+ memcpy(&pu1_dest[IHEVC_CAB_CODED_SUBLK_IDX], &pu1_src[IHEVC_CAB_CODED_SUBLK_IDX], 4);
+ ps_backup_dest_ctxt->au1_ctxt_to_backup[SUB_BLK_CODED_FLAG] = 0;
+ ps_backup_src_ctxt->au1_ctxt_to_backup[SUB_BLK_CODED_FLAG] = 0;
+ }
+ if((ps_backup_src_ctxt->au1_ctxt_to_backup[LASTXY]) ||
+ (ps_backup_dest_ctxt->au1_ctxt_to_backup[LASTXY]))
+ {
+ memcpy(&pu1_dest[IHEVC_CAB_COEFFX_PREFIX], &pu1_src[IHEVC_CAB_COEFFX_PREFIX], 36);
+ ps_backup_dest_ctxt->au1_ctxt_to_backup[LASTXY] = 0;
+ ps_backup_src_ctxt->au1_ctxt_to_backup[LASTXY] = 0;
+ }
+ }
+ ((cab_ctxt_t *)pv_dest)->u4_bits_estimated_q12 = ((cab_ctxt_t *)pv_src)->u4_bits_estimated_q12;
+}
diff --git a/encoder/ihevce_chroma_had_satd.c b/encoder/ihevce_chroma_had_satd.c
new file mode 100644
index 0000000..5e158ae
--- /dev/null
+++ b/encoder/ihevce_chroma_had_satd.c
@@ -0,0 +1,558 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_chroma_had_satd.c
+*
+* \brief
+* This file contains function definitions of chroma HAD SATD functions
+*
+* \date
+* 15/07/2013
+*
+* \author
+* Ittiam
+*
+* List of Functions
+* ihevce_chroma_HAD_4x4_8b()
+* ihevce_chroma_compute_AC_HAD_4x4_8bit()
+* ihevce_hbd_chroma_HAD_4x4()
+* ihevce_hbd_chroma_compute_AC_HAD_4x4()
+* ihevce_chroma_HAD_8x8_8bit()
+* ihevce_hbd_chroma_HAD_8x8()
+* ihevce_chroma_HAD_16x16_8bit()
+* ihevce_hbd_chroma_HAD_16x16()
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "ihevc_debug.h"
+#include "itt_video_api.h"
+
+#include "ihevce_api.h"
+#include "ihevce_defs.h"
+#include "ihevce_had_satd.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* Chroma Hadamard Transform of 4x4 block (8-bit input)
+*
+* @par Description:
+*
+* @param[in] pu1_origin
+* UWORD8 pointer to the source block (u or v, interleaved)
+*
+* @param[in] src_strd
+* WORD32 Source stride
+*
+* @param[in] pu1_pred_buf
+* UWORD8 pointer to the prediction block (u or v, interleaved)
+*
+* @param[in] pred_strd
+* WORD32 Pred stride
+*
+* @param[in] pi2_dst
+* WORD16 pointer to the transform block
+*
+* @param[in] dst_strd (u or v, interleaved)
+* WORD32 Destination stride
+*
+* @returns
+* Hadamard SAD
+*
+* @remarks
+* Not updating the transform destination now. Only returning the SATD
+*
+*******************************************************************************
+*/
+UWORD32 ihevce_chroma_HAD_4x4_8bit(
+ UWORD8 *pu1_origin,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred_buf,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd)
+{
+ WORD32 k;
+ WORD16 diff[16], m[16], d[16];
+ UWORD32 u4_sad = 0;
+
+ (void)pi2_dst;
+ (void)dst_strd;
+ for(k = 0; k < 16; k += 4)
+ {
+ /* u or v, interleaved */
+ diff[k + 0] = pu1_origin[2 * 0] - pu1_pred_buf[2 * 0];
+ diff[k + 1] = pu1_origin[2 * 1] - pu1_pred_buf[2 * 1];
+ diff[k + 2] = pu1_origin[2 * 2] - pu1_pred_buf[2 * 2];
+ diff[k + 3] = pu1_origin[2 * 3] - pu1_pred_buf[2 * 3];
+
+ pu1_pred_buf += pred_strd;
+ pu1_origin += src_strd;
+ }
+
+ /*===== hadamard transform =====*/
+ m[0] = diff[0] + diff[12];
+ m[1] = diff[1] + diff[13];
+ m[2] = diff[2] + diff[14];
+ m[3] = diff[3] + diff[15];
+ m[4] = diff[4] + diff[8];
+ m[5] = diff[5] + diff[9];
+ m[6] = diff[6] + diff[10];
+ m[7] = diff[7] + diff[11];
+ m[8] = diff[4] - diff[8];
+ m[9] = diff[5] - diff[9];
+ m[10] = diff[6] - diff[10];
+ m[11] = diff[7] - diff[11];
+ m[12] = diff[0] - diff[12];
+ m[13] = diff[1] - diff[13];
+ m[14] = diff[2] - diff[14];
+ m[15] = diff[3] - diff[15];
+
+ d[0] = m[0] + m[4];
+ d[1] = m[1] + m[5];
+ d[2] = m[2] + m[6];
+ d[3] = m[3] + m[7];
+ d[4] = m[8] + m[12];
+ d[5] = m[9] + m[13];
+ d[6] = m[10] + m[14];
+ d[7] = m[11] + m[15];
+ d[8] = m[0] - m[4];
+ d[9] = m[1] - m[5];
+ d[10] = m[2] - m[6];
+ d[11] = m[3] - m[7];
+ d[12] = m[12] - m[8];
+ d[13] = m[13] - m[9];
+ d[14] = m[14] - m[10];
+ d[15] = m[15] - m[11];
+
+ m[0] = d[0] + d[3];
+ m[1] = d[1] + d[2];
+ m[2] = d[1] - d[2];
+ m[3] = d[0] - d[3];
+ m[4] = d[4] + d[7];
+ m[5] = d[5] + d[6];
+ m[6] = d[5] - d[6];
+ m[7] = d[4] - d[7];
+ m[8] = d[8] + d[11];
+ m[9] = d[9] + d[10];
+ m[10] = d[9] - d[10];
+ m[11] = d[8] - d[11];
+ m[12] = d[12] + d[15];
+ m[13] = d[13] + d[14];
+ m[14] = d[13] - d[14];
+ m[15] = d[12] - d[15];
+
+ d[0] = m[0] + m[1];
+ d[1] = m[0] - m[1];
+ d[2] = m[2] + m[3];
+ d[3] = m[3] - m[2];
+ d[4] = m[4] + m[5];
+ d[5] = m[4] - m[5];
+ d[6] = m[6] + m[7];
+ d[7] = m[7] - m[6];
+ d[8] = m[8] + m[9];
+ d[9] = m[8] - m[9];
+ d[10] = m[10] + m[11];
+ d[11] = m[11] - m[10];
+ d[12] = m[12] + m[13];
+ d[13] = m[12] - m[13];
+ d[14] = m[14] + m[15];
+ d[15] = m[15] - m[14];
+
+ /*===== sad =====*/
+ for(k = 0; k < 16; ++k)
+ {
+ u4_sad += (d[k] > 0 ? d[k] : -d[k]);
+ }
+ u4_sad = ((u4_sad + 2) >> 2);
+
+ return u4_sad;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Chroma Hadamard Transform of 4x4 block (8-bit input) with DC suppressed
+*
+* @par Description:
+*
+* @param[in] pu1_origin
+* UWORD8 pointer to the source block (u or v, interleaved)
+*
+* @param[in] src_strd
+* WORD32 Source stride
+*
+* @param[in] pu1_pred_buf
+* UWORD8 pointer to the prediction block (u or v, interleaved)
+*
+* @param[in] pred_strd
+* WORD32 Pred stride
+*
+* @param[in] pi2_dst
+* WORD16 pointer to the transform block
+*
+* @param[in] dst_strd (u or v, interleaved)
+* WORD32 Destination stride
+*
+* @returns
+* Hadamard SAD
+*
+* @remarks
+* Not updating the transform destination now. Only returning the SATD
+*
+*******************************************************************************
+*/
+UWORD32 ihevce_chroma_compute_AC_HAD_4x4_8bit(
+ UWORD8 *pu1_origin,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred_buf,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd)
+{
+ WORD32 k;
+ WORD16 diff[16], m[16], d[16];
+ UWORD32 u4_sad = 0;
+
+ (void)pi2_dst;
+ (void)dst_strd;
+ for(k = 0; k < 16; k += 4)
+ {
+ /* u or v, interleaved */
+ diff[k + 0] = pu1_origin[2 * 0] - pu1_pred_buf[2 * 0];
+ diff[k + 1] = pu1_origin[2 * 1] - pu1_pred_buf[2 * 1];
+ diff[k + 2] = pu1_origin[2 * 2] - pu1_pred_buf[2 * 2];
+ diff[k + 3] = pu1_origin[2 * 3] - pu1_pred_buf[2 * 3];
+
+ pu1_pred_buf += pred_strd;
+ pu1_origin += src_strd;
+ }
+
+ /*===== hadamard transform =====*/
+ m[0] = diff[0] + diff[12];
+ m[1] = diff[1] + diff[13];
+ m[2] = diff[2] + diff[14];
+ m[3] = diff[3] + diff[15];
+ m[4] = diff[4] + diff[8];
+ m[5] = diff[5] + diff[9];
+ m[6] = diff[6] + diff[10];
+ m[7] = diff[7] + diff[11];
+ m[8] = diff[4] - diff[8];
+ m[9] = diff[5] - diff[9];
+ m[10] = diff[6] - diff[10];
+ m[11] = diff[7] - diff[11];
+ m[12] = diff[0] - diff[12];
+ m[13] = diff[1] - diff[13];
+ m[14] = diff[2] - diff[14];
+ m[15] = diff[3] - diff[15];
+
+ d[0] = m[0] + m[4];
+ d[1] = m[1] + m[5];
+ d[2] = m[2] + m[6];
+ d[3] = m[3] + m[7];
+ d[4] = m[8] + m[12];
+ d[5] = m[9] + m[13];
+ d[6] = m[10] + m[14];
+ d[7] = m[11] + m[15];
+ d[8] = m[0] - m[4];
+ d[9] = m[1] - m[5];
+ d[10] = m[2] - m[6];
+ d[11] = m[3] - m[7];
+ d[12] = m[12] - m[8];
+ d[13] = m[13] - m[9];
+ d[14] = m[14] - m[10];
+ d[15] = m[15] - m[11];
+
+ m[0] = d[0] + d[3];
+ m[1] = d[1] + d[2];
+ m[2] = d[1] - d[2];
+ m[3] = d[0] - d[3];
+ m[4] = d[4] + d[7];
+ m[5] = d[5] + d[6];
+ m[6] = d[5] - d[6];
+ m[7] = d[4] - d[7];
+ m[8] = d[8] + d[11];
+ m[9] = d[9] + d[10];
+ m[10] = d[9] - d[10];
+ m[11] = d[8] - d[11];
+ m[12] = d[12] + d[15];
+ m[13] = d[13] + d[14];
+ m[14] = d[13] - d[14];
+ m[15] = d[12] - d[15];
+
+ d[0] = m[0] + m[1];
+ d[1] = m[0] - m[1];
+ d[2] = m[2] + m[3];
+ d[3] = m[3] - m[2];
+ d[4] = m[4] + m[5];
+ d[5] = m[4] - m[5];
+ d[6] = m[6] + m[7];
+ d[7] = m[7] - m[6];
+ d[8] = m[8] + m[9];
+ d[9] = m[8] - m[9];
+ d[10] = m[10] + m[11];
+ d[11] = m[11] - m[10];
+ d[12] = m[12] + m[13];
+ d[13] = m[12] - m[13];
+ d[14] = m[14] + m[15];
+ d[15] = m[15] - m[14];
+
+ /* DC masking */
+ d[0] = 0;
+
+ /*===== sad =====*/
+ for(k = 0; k < 16; ++k)
+ {
+ u4_sad += (d[k] > 0 ? d[k] : -d[k]);
+ }
+ u4_sad = ((u4_sad + 2) >> 2);
+
+ return u4_sad;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Chroma Hadamard Transform of 8x8 block (8-bit input)
+*
+* @par Description:
+*
+* @param[in] pu1_origin
+* UWORD8 pointer to the source block (u or v, interleaved)
+*
+* @param[in] src_strd
+* WORD32 Source stride
+*
+* @param[in] pu1_pred_buf
+* UWORD8 pointer to the prediction block (u or v, interleaved)
+*
+* @param[in] pred_strd
+* WORD32 Pred stride
+*
+* @param[in] pi2_dst
+* WORD16 pointer to the transform block
+*
+* @param[in] dst_strd (u or v, interleaved)
+* WORD32 Destination stride
+*
+* @returns
+* Hadamard SAD
+*
+* @remarks
+* Not updating the transform destination now. Only returning the SATD
+*
+*******************************************************************************
+*/
+UWORD32 ihevce_chroma_HAD_8x8_8bit(
+ UWORD8 *pu1_origin,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred_buf,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd)
+{
+ WORD32 k, i, j, jj;
+ UWORD32 u4_sad = 0;
+ WORD16 diff[64], m1[8][8], m2[8][8], m3[8][8];
+
+ (void)pi2_dst;
+ (void)dst_strd;
+ for(k = 0; k < 64; k += 8)
+ {
+ /* u or v, interleaved */
+ diff[k + 0] = pu1_origin[2 * 0] - pu1_pred_buf[2 * 0];
+ diff[k + 1] = pu1_origin[2 * 1] - pu1_pred_buf[2 * 1];
+ diff[k + 2] = pu1_origin[2 * 2] - pu1_pred_buf[2 * 2];
+ diff[k + 3] = pu1_origin[2 * 3] - pu1_pred_buf[2 * 3];
+ diff[k + 4] = pu1_origin[2 * 4] - pu1_pred_buf[2 * 4];
+ diff[k + 5] = pu1_origin[2 * 5] - pu1_pred_buf[2 * 5];
+ diff[k + 6] = pu1_origin[2 * 6] - pu1_pred_buf[2 * 6];
+ diff[k + 7] = pu1_origin[2 * 7] - pu1_pred_buf[2 * 7];
+
+ pu1_pred_buf += pred_strd;
+ pu1_origin += src_strd;
+ }
+
+ /*===== hadamard transform =====*/
+ // horizontal
+ for(j = 0; j < 8; j++)
+ {
+ jj = j << 3;
+ m2[j][0] = diff[jj] + diff[jj + 4];
+ m2[j][1] = diff[jj + 1] + diff[jj + 5];
+ m2[j][2] = diff[jj + 2] + diff[jj + 6];
+ m2[j][3] = diff[jj + 3] + diff[jj + 7];
+ m2[j][4] = diff[jj] - diff[jj + 4];
+ m2[j][5] = diff[jj + 1] - diff[jj + 5];
+ m2[j][6] = diff[jj + 2] - diff[jj + 6];
+ m2[j][7] = diff[jj + 3] - diff[jj + 7];
+
+ m1[j][0] = m2[j][0] + m2[j][2];
+ m1[j][1] = m2[j][1] + m2[j][3];
+ m1[j][2] = m2[j][0] - m2[j][2];
+ m1[j][3] = m2[j][1] - m2[j][3];
+ m1[j][4] = m2[j][4] + m2[j][6];
+ m1[j][5] = m2[j][5] + m2[j][7];
+ m1[j][6] = m2[j][4] - m2[j][6];
+ m1[j][7] = m2[j][5] - m2[j][7];
+
+ m2[j][0] = m1[j][0] + m1[j][1];
+ m2[j][1] = m1[j][0] - m1[j][1];
+ m2[j][2] = m1[j][2] + m1[j][3];
+ m2[j][3] = m1[j][2] - m1[j][3];
+ m2[j][4] = m1[j][4] + m1[j][5];
+ m2[j][5] = m1[j][4] - m1[j][5];
+ m2[j][6] = m1[j][6] + m1[j][7];
+ m2[j][7] = m1[j][6] - m1[j][7];
+ }
+
+ // vertical
+ for(i = 0; i < 8; i++)
+ {
+ m3[0][i] = m2[0][i] + m2[4][i];
+ m3[1][i] = m2[1][i] + m2[5][i];
+ m3[2][i] = m2[2][i] + m2[6][i];
+ m3[3][i] = m2[3][i] + m2[7][i];
+ m3[4][i] = m2[0][i] - m2[4][i];
+ m3[5][i] = m2[1][i] - m2[5][i];
+ m3[6][i] = m2[2][i] - m2[6][i];
+ m3[7][i] = m2[3][i] - m2[7][i];
+
+ m1[0][i] = m3[0][i] + m3[2][i];
+ m1[1][i] = m3[1][i] + m3[3][i];
+ m1[2][i] = m3[0][i] - m3[2][i];
+ m1[3][i] = m3[1][i] - m3[3][i];
+ m1[4][i] = m3[4][i] + m3[6][i];
+ m1[5][i] = m3[5][i] + m3[7][i];
+ m1[6][i] = m3[4][i] - m3[6][i];
+ m1[7][i] = m3[5][i] - m3[7][i];
+
+ m2[0][i] = m1[0][i] + m1[1][i];
+ m2[1][i] = m1[0][i] - m1[1][i];
+ m2[2][i] = m1[2][i] + m1[3][i];
+ m2[3][i] = m1[2][i] - m1[3][i];
+ m2[4][i] = m1[4][i] + m1[5][i];
+ m2[5][i] = m1[4][i] - m1[5][i];
+ m2[6][i] = m1[6][i] + m1[7][i];
+ m2[7][i] = m1[6][i] - m1[7][i];
+ }
+
+ /*===== sad =====*/
+ for(i = 0; i < 8; i++)
+ {
+ for(j = 0; j < 8; j++)
+ {
+ u4_sad += (m2[i][j] > 0 ? m2[i][j] : -m2[i][j]);
+ }
+ }
+ u4_sad = ((u4_sad + 4) >> 3);
+
+ return u4_sad;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Chroma Hadamard Transform of 16x16 block (8-bit input)
+*
+* @par Description:
+*
+* @param[in] pu1_origin
+* UWORD8 pointer to the source block (u or v, interleaved)
+*
+* @param[in] src_strd
+* WORD32 Source stride
+*
+* @param[in] pu1_pred_buf
+* UWORD8 pointer to the prediction block (u or v, interleaved)
+*
+* @param[in] pred_strd
+* WORD32 Pred stride
+*
+* @param[in] pi2_dst
+* WORD16 pointer to the transform block
+*
+* @param[in] dst_strd (u or v, interleaved)
+* WORD32 Destination stride
+*
+* @returns
+* Hadamard SAD
+*
+* @remarks
+* Not updating the transform destination now. Only returning the SATD
+*
+*******************************************************************************
+*/
+UWORD32 ihevce_chroma_HAD_16x16_8bit(
+ UWORD8 *pu1_origin,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred_buf,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd)
+{
+ UWORD32 au4_sad[4], u4_result = 0;
+ WORD32 i;
+
+ for(i = 0; i < 4; i++)
+ {
+ au4_sad[i] = ihevce_chroma_HAD_8x8_8bit(
+ pu1_origin, src_strd, pu1_pred_buf, pred_strd, pi2_dst, dst_strd);
+
+ if(i == 0 || i == 2)
+ {
+ pu1_origin += 16;
+ pu1_pred_buf += 16;
+ }
+
+ if(i == 1)
+ {
+ pu1_origin += (8 * src_strd) - 16;
+ pu1_pred_buf += (8 * pred_strd) - 16;
+ }
+
+ u4_result += au4_sad[i];
+ }
+
+ return u4_result;
+}
diff --git a/encoder/ihevce_cmn_utils_instr_set_router.c b/encoder/ihevce_cmn_utils_instr_set_router.c
new file mode 100644
index 0000000..91a72c5
--- /dev/null
+++ b/encoder/ihevce_cmn_utils_instr_set_router.c
@@ -0,0 +1,124 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_cmn_utils_instr_set_router.c
+*
+* \brief
+* This file contains function pointer initialization of common utility
+* functions
+*
+* \date
+* 15/07/2013
+*
+* \author
+* Ittiam
+*
+* List of Functions
+* ihevce_cmn_utils_instr_set_router()
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevc_debug.h"
+
+#include "ihevce_cmn_utils_instr_set_router.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_cmn_utils_instr_set_router \endif
+*
+* \brief
+* Function pointer initialization of common utils struct
+*
+*****************************************************************************
+*/
+void ihevce_cmn_utils_instr_set_router(
+ ihevce_cmn_opt_func_t *ps_func_list, UWORD8 u1_is_popcnt_available, IV_ARCH_T e_arch)
+{
+ // clang-format off
+ switch(e_arch)
+ {
+ (void)u1_is_popcnt_available;
+#ifdef ENABLE_NEON
+ case ARCH_ARM_A9Q:
+ case ARCH_ARM_V8_NEON:
+ ps_func_list->pf_2d_square_copy = ihevce_2d_square_copy_luma_neon;
+ ps_func_list->pf_AC_HAD_8x8_8bit = ihevce_compute_ac_had_8x8_8bit_neon;
+ ps_func_list->pf_chroma_AC_HAD_4x4_8bit = ihevce_chroma_compute_AC_HAD_4x4_8bit_neon;
+ ps_func_list->pf_chroma_HAD_16x16_8bit = ihevce_chroma_HAD_16x16_8bit_neon;
+ ps_func_list->pf_chroma_HAD_4x4_8bit = ihevce_chroma_HAD_4x4_8bit_neon;
+ ps_func_list->pf_chroma_HAD_8x8_8bit = ihevce_chroma_HAD_8x8_8bit_neon;
+ ps_func_list->pf_chroma_interleave_2d_copy = ihevce_chroma_interleave_2d_copy_neon;
+ ps_func_list->pf_chroma_interleave_ssd_calculator = ihevce_chroma_interleave_ssd_calculator_neon;
+ ps_func_list->pf_copy_2d = ihevce_copy_2d_neon;
+ ps_func_list->pf_get_chroma_eo_sao_params = ihevce_get_chroma_eo_sao_params_neon;
+ ps_func_list->pf_get_luma_eo_sao_params = ihevce_get_luma_eo_sao_params_neon;
+ ps_func_list->pf_HAD_16x16_8bit = ihevce_HAD_16x16_8bit_neon;
+ ps_func_list->pf_HAD_32x32_8bit = ihevce_HAD_32x32_8bit_neon;
+ ps_func_list->pf_HAD_4x4_8bit = ihevce_HAD_4x4_8bit_neon;
+ ps_func_list->pf_HAD_8x8_8bit = ihevce_HAD_8x8_8bit_neon;
+ ps_func_list->pf_itrans_recon_dc = ihevce_itrans_recon_dc_neon;
+ ps_func_list->pf_scan_coeffs = ihevce_scan_coeffs_neon;
+ ps_func_list->pf_ssd_and_sad_calculator = ihevce_ssd_and_sad_calculator_neon;
+ ps_func_list->pf_ssd_calculator = ihevce_ssd_calculator_neon;
+ ps_func_list->pf_wt_avg_2d = ihevce_wt_avg_2d_neon;
+ break;
+#endif
+ default:
+ ps_func_list->pf_2d_square_copy = ihevce_2d_square_copy_luma;
+ ps_func_list->pf_AC_HAD_8x8_8bit = ihevce_compute_ac_had_8x8_8bit;
+ ps_func_list->pf_chroma_AC_HAD_4x4_8bit = ihevce_chroma_compute_AC_HAD_4x4_8bit;
+ ps_func_list->pf_chroma_HAD_16x16_8bit = ihevce_chroma_HAD_16x16_8bit;
+ ps_func_list->pf_chroma_HAD_4x4_8bit = ihevce_chroma_HAD_4x4_8bit;
+ ps_func_list->pf_chroma_HAD_8x8_8bit = ihevce_chroma_HAD_8x8_8bit;
+ ps_func_list->pf_chroma_interleave_2d_copy = ihevce_chroma_interleave_2d_copy;
+ ps_func_list->pf_chroma_interleave_ssd_calculator = ihevce_chroma_interleave_ssd_calculator;
+ ps_func_list->pf_copy_2d = ihevce_copy_2d;
+ ps_func_list->pf_get_chroma_eo_sao_params = ihevce_get_chroma_eo_sao_params;
+ ps_func_list->pf_get_luma_eo_sao_params = ihevce_get_luma_eo_sao_params;
+ ps_func_list->pf_HAD_16x16_8bit = ihevce_HAD_16x16_8bit;
+ ps_func_list->pf_HAD_32x32_8bit = ihevce_HAD_32x32_8bit;
+ ps_func_list->pf_HAD_4x4_8bit = ihevce_HAD_4x4_8bit;
+ ps_func_list->pf_HAD_8x8_8bit = ihevce_HAD_8x8_8bit;
+ ps_func_list->pf_itrans_recon_dc = ihevce_itrans_recon_dc;
+ ps_func_list->pf_scan_coeffs = ihevce_scan_coeffs;
+ ps_func_list->pf_ssd_and_sad_calculator = ihevce_ssd_and_sad_calculator;
+ ps_func_list->pf_ssd_calculator = ihevce_ssd_calculator;
+ ps_func_list->pf_wt_avg_2d = ihevce_wt_avg_2d;
+ break;
+ }
+ // clang-format on
+}
diff --git a/encoder/ihevce_cmn_utils_instr_set_router.h b/encoder/ihevce_cmn_utils_instr_set_router.h
new file mode 100644
index 0000000..be7554f
--- /dev/null
+++ b/encoder/ihevce_cmn_utils_instr_set_router.h
@@ -0,0 +1,161 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_cmn_utils_instr_set_router.h
+*
+* \brief
+* This file contains declarations related to common utilities used in encoder
+*
+* \date
+* 15/07/2013
+*
+* \author
+* Ittiam
+*
+* List of Functions
+*
+*
+******************************************************************************
+*/
+
+#ifndef __IHEVCE_CMN_UTILS_INSTR_SET_ROUTER_H_
+#define __IHEVCE_CMN_UTILS_INSTR_SET_ROUTER_H_
+
+#include "ihevc_typedefs.h"
+#include "ihevce_defs.h"
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+typedef UWORD32 FT_CALC_HAD_SATD_8BIT(UWORD8 *, WORD32, UWORD8 *, WORD32, WORD16 *, WORD32);
+
+typedef LWORD64 FT_SSD_CALCULATOR(UWORD8 *, UWORD8 *, UWORD32, UWORD32, UWORD32, UWORD32);
+
+typedef LWORD64 FT_SSD_AND_SAD_CALCULATOR(UWORD8 *, WORD32, UWORD8 *, WORD32, WORD32, UWORD32 *);
+
+typedef void FT_CHROMA_INTERLEAVE_2D_COPY(
+ UWORD8 *, WORD32, UWORD8 *, WORD32, WORD32, WORD32, CHROMA_PLANE_ID_T);
+
+typedef void FT_COPY_2D(UWORD8 *, WORD32, UWORD8 *, WORD32, WORD32, WORD32);
+
+typedef void FT_2D_SQUARE_COPY(void *, WORD32, void *, WORD32, WORD32, WORD32);
+
+typedef void FT_WT_AVG_2D(
+ UWORD8 *,
+ UWORD8 *,
+ WORD32,
+ WORD32,
+ WORD32,
+ WORD32,
+ UWORD8 *,
+ WORD32,
+ WORD32,
+ WORD32,
+ WORD32,
+ WORD32,
+ WORD32);
+
+typedef void
+ FT_ITRANS_RECON_DC(UWORD8 *, WORD32, UWORD8 *, WORD32, WORD32, WORD16, CHROMA_PLANE_ID_T);
+
+typedef WORD32 FT_SCAN_COEFFS(WORD16 *, WORD32 *, WORD32, WORD32, UWORD8 *, UWORD8 *, WORD32);
+
+typedef void FT_GET_EO_SAO_PARAMS(void *, WORD32, WORD32 *, WORD32 *);
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+typedef struct
+{
+ FT_CALC_HAD_SATD_8BIT *pf_HAD_4x4_8bit;
+ FT_CALC_HAD_SATD_8BIT *pf_HAD_8x8_8bit;
+ FT_CALC_HAD_SATD_8BIT *pf_HAD_16x16_8bit;
+ FT_CALC_HAD_SATD_8BIT *pf_HAD_32x32_8bit;
+ FT_CALC_HAD_SATD_8BIT *pf_AC_HAD_8x8_8bit;
+ FT_CALC_HAD_SATD_8BIT *pf_chroma_HAD_4x4_8bit;
+ FT_CALC_HAD_SATD_8BIT *pf_chroma_AC_HAD_4x4_8bit;
+ FT_CALC_HAD_SATD_8BIT *pf_chroma_HAD_8x8_8bit;
+ FT_CALC_HAD_SATD_8BIT *pf_chroma_HAD_16x16_8bit;
+ FT_SSD_CALCULATOR *pf_ssd_calculator;
+ FT_SSD_CALCULATOR *pf_chroma_interleave_ssd_calculator;
+ FT_SSD_AND_SAD_CALCULATOR *pf_ssd_and_sad_calculator;
+ FT_CHROMA_INTERLEAVE_2D_COPY *pf_chroma_interleave_2d_copy;
+ FT_COPY_2D *pf_copy_2d;
+ FT_2D_SQUARE_COPY *pf_2d_square_copy;
+ FT_WT_AVG_2D *pf_wt_avg_2d;
+ FT_ITRANS_RECON_DC *pf_itrans_recon_dc;
+ FT_SCAN_COEFFS *pf_scan_coeffs;
+ FT_GET_EO_SAO_PARAMS *pf_get_luma_eo_sao_params;
+ FT_GET_EO_SAO_PARAMS *pf_get_chroma_eo_sao_params;
+} ihevce_cmn_opt_func_t;
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+void ihevce_cmn_utils_instr_set_router(
+ ihevce_cmn_opt_func_t *ps_func_list, UWORD8 u1_is_popcnt_available, IV_ARCH_T e_arch);
+
+/* Function List - C */
+FT_CALC_HAD_SATD_8BIT ihevce_HAD_4x4_8bit;
+FT_CALC_HAD_SATD_8BIT ihevce_HAD_8x8_8bit;
+FT_CALC_HAD_SATD_8BIT ihevce_HAD_16x16_8bit;
+FT_CALC_HAD_SATD_8BIT ihevce_HAD_32x32_8bit;
+FT_CALC_HAD_SATD_8BIT ihevce_compute_ac_had_8x8_8bit;
+FT_CALC_HAD_SATD_8BIT ihevce_chroma_HAD_4x4_8bit;
+FT_CALC_HAD_SATD_8BIT ihevce_chroma_compute_AC_HAD_4x4_8bit;
+FT_CALC_HAD_SATD_8BIT ihevce_chroma_HAD_8x8_8bit;
+FT_CALC_HAD_SATD_8BIT ihevce_chroma_HAD_16x16_8bit;
+FT_SSD_CALCULATOR ihevce_ssd_calculator;
+FT_SSD_CALCULATOR ihevce_chroma_interleave_ssd_calculator;
+FT_SSD_AND_SAD_CALCULATOR ihevce_ssd_and_sad_calculator;
+FT_CHROMA_INTERLEAVE_2D_COPY ihevce_chroma_interleave_2d_copy;
+FT_COPY_2D ihevce_copy_2d;
+FT_2D_SQUARE_COPY ihevce_2d_square_copy_luma;
+FT_WT_AVG_2D ihevce_wt_avg_2d;
+FT_ITRANS_RECON_DC ihevce_itrans_recon_dc;
+FT_SCAN_COEFFS ihevce_scan_coeffs;
+FT_GET_EO_SAO_PARAMS ihevce_get_luma_eo_sao_params;
+FT_GET_EO_SAO_PARAMS ihevce_get_chroma_eo_sao_params;
+
+#ifdef ENABLE_NEON
+FT_CALC_HAD_SATD_8BIT ihevce_HAD_4x4_8bit_neon;
+FT_CALC_HAD_SATD_8BIT ihevce_HAD_8x8_8bit_neon;
+FT_CALC_HAD_SATD_8BIT ihevce_chroma_compute_AC_HAD_4x4_8bit_neon;
+FT_CALC_HAD_SATD_8BIT ihevce_compute_ac_had_8x8_8bit_neon;
+FT_CALC_HAD_SATD_8BIT ihevce_HAD_16x16_8bit_neon;
+FT_CALC_HAD_SATD_8BIT ihevce_chroma_HAD_4x4_8bit_neon;
+FT_CALC_HAD_SATD_8BIT ihevce_chroma_HAD_8x8_8bit_neon;
+FT_CALC_HAD_SATD_8BIT ihevce_chroma_HAD_16x16_8bit_neon;
+FT_CALC_HAD_SATD_8BIT ihevce_HAD_32x32_8bit_neon;
+FT_SSD_CALCULATOR ihevce_ssd_calculator_neon;
+FT_SSD_CALCULATOR ihevce_chroma_interleave_ssd_calculator_neon;
+FT_SSD_AND_SAD_CALCULATOR ihevce_ssd_and_sad_calculator_neon;
+FT_2D_SQUARE_COPY ihevce_2d_square_copy_luma_neon;
+FT_CHROMA_INTERLEAVE_2D_COPY ihevce_chroma_interleave_2d_copy_neon;
+FT_COPY_2D ihevce_copy_2d_neon;
+FT_GET_EO_SAO_PARAMS ihevce_get_luma_eo_sao_params_neon;
+FT_ITRANS_RECON_DC ihevce_itrans_recon_dc_neon;
+FT_GET_EO_SAO_PARAMS ihevce_get_chroma_eo_sao_params_neon;
+FT_SCAN_COEFFS ihevce_scan_coeffs_neon;
+FT_WT_AVG_2D ihevce_wt_avg_2d_neon;
+#endif
+
+#endif
diff --git a/encoder/ihevce_coarse_me_pass.c b/encoder/ihevce_coarse_me_pass.c
new file mode 100644
index 0000000..d8b7a72
--- /dev/null
+++ b/encoder/ihevce_coarse_me_pass.c
@@ -0,0 +1,1473 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+******************************************************************************
+* \file ihevce_coarse_me_pass.c
+*
+* \brief
+* Converts the language of the encoder to language of me. This is an i/f
+* between the encoder style APIs and ME style APIs. This is basically
+* a memoryless glue layer.
+*
+* \date
+* 22/10/2012
+*
+* \author
+* Ittiam
+*
+*
+* List of Functions
+*
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_bs_compute_ctb.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_dep_mngr_interface.h"
+#include "hme_datatype.h"
+#include "hme_interface.h"
+#include "hme_common_defs.h"
+#include "hme_defs.h"
+#include "ihevce_me_instr_set_router.h"
+#include "ihevce_ipe_instr_set_router.h"
+#include "ihevce_ipe_structs.h"
+#include "hme_globals.h"
+#include "hme_utils.h"
+#include "hme_coarse.h"
+#include "hme_refine.h"
+#include "ihevce_me_pass.h"
+#include "ihevce_coarse_me_pass.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_coarse_me_get_num_mem_recs \endif
+*
+* \brief
+* Number of memory records are returned for ME module
+* Note : Include total mem. req. for HME + Total mem. req. for Dep Mngr for HME
+*
+* \return
+* Number of memory records
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32 ihevce_coarse_me_get_num_mem_recs()
+{
+ WORD32 hme_mem_recs = hme_coarse_num_alloc();
+ WORD32 hme_dep_mngr_mem_recs = hme_coarse_dep_mngr_num_alloc();
+
+ return ((hme_mem_recs + hme_dep_mngr_mem_recs));
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_coarse_me_get_mem_recs \endif
+*
+* \brief
+* Memory requirements are returned for coarse ME.
+*
+* \param[in,out] ps_mem_tab : pointer to memory descriptors table
+* \param[in] ps_init_prms : Create time static parameters
+* \param[in] i4_num_proc_thrds : Number of processing threads for this module
+* \param[in] i4_mem_space : memspace in whihc memory request should be done
+*
+* \return
+* Number of records
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32 ihevce_coarse_me_get_mem_recs(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ WORD32 i4_num_proc_thrds,
+ WORD32 i4_mem_space,
+ WORD32 i4_resolution_id)
+{
+ hme_memtab_t as_memtabs[HME_COARSE_TOT_MEMTABS];
+ WORD32 n_tabs, i;
+
+ /* Init prms structure specific to HME */
+ hme_init_prms_t s_hme_init_prms;
+
+ //return (ihevce_coarse_me_get_num_mem_recs());
+ /*************************************************************************/
+ /* code flow: we call hme alloc function and then remap those memtabs */
+ /* to a different type of memtab structure. */
+ /*************************************************************************/
+ ASSERT(HME_COARSE_TOT_MEMTABS >= hme_coarse_num_alloc());
+
+ /*************************************************************************/
+ /* POPULATE THE HME INIT PRMS */
+ /*************************************************************************/
+ ihevce_derive_me_init_prms(ps_init_prms, &s_hme_init_prms, i4_num_proc_thrds, i4_resolution_id);
+
+ /*************************************************************************/
+ /* CALL THE ME FUNCTION TO GET MEMTABS */
+ /*************************************************************************/
+ n_tabs = hme_coarse_alloc(&as_memtabs[0], &s_hme_init_prms);
+ ASSERT(n_tabs == hme_coarse_num_alloc());
+
+ /*************************************************************************/
+ /* REMAP RESULTS TO ENCODER MEMTAB STRUCTURE */
+ /*************************************************************************/
+ for(i = 0; i < n_tabs; i++)
+ {
+ ps_mem_tab[i].i4_mem_size = as_memtabs[i].size;
+ ps_mem_tab[i].i4_mem_alignment = as_memtabs[i].align;
+ ps_mem_tab[i].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+ ps_mem_tab[i].i4_size = sizeof(iv_mem_rec_t);
+ }
+
+ /*************************************************************************/
+ /* --- HME Coarse sync Dep Mngr Mem requests -- */
+ /*************************************************************************/
+ {
+ WORD32 n_dep_tabs;
+
+ ps_mem_tab += n_tabs;
+
+ n_dep_tabs = hme_coarse_dep_mngr_alloc(
+ ps_mem_tab, ps_init_prms, i4_mem_space, i4_num_proc_thrds, i4_resolution_id);
+
+ ASSERT(n_dep_tabs == hme_coarse_dep_mngr_num_alloc());
+
+ /* Update the total no. of mem tabs */
+ n_tabs += n_dep_tabs;
+ }
+
+ return (n_tabs);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_coarse_me_init \endif
+*
+* \brief
+* Intialization for ME context state structure .
+*
+* \param[in] ps_mem_tab : pointer to memory descriptors table
+* \param[in] ps_init_prms : Create time static parameters
+* \param[in] pv_osal_handle : Osal handle
+*
+* \return
+* Handle to the ME context
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void *ihevce_coarse_me_init(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ WORD32 i4_num_proc_thrds,
+ void *pv_osal_handle,
+ WORD32 i4_resolution_id,
+ UWORD8 u1_is_popcnt_available)
+{
+ /* ME handle to be returned */
+ void *pv_me_ctxt;
+ WORD32 status;
+ coarse_me_master_ctxt_t *ps_ctxt;
+
+ /* Init prms structure specific to HME */
+ hme_init_prms_t s_hme_init_prms;
+
+ /* memtabs to be passed to hme */
+ hme_memtab_t as_memtabs[HME_COARSE_TOT_MEMTABS];
+ WORD32 n_tabs, n_dep_tabs, i;
+
+ /*************************************************************************/
+ /* POPULATE THE HME INIT PRMS */
+ /*************************************************************************/
+ ihevce_derive_me_init_prms(ps_init_prms, &s_hme_init_prms, i4_num_proc_thrds, i4_resolution_id);
+
+ /*************************************************************************/
+ /* Ensure local declaration is sufficient */
+ /*************************************************************************/
+ n_tabs = hme_coarse_num_alloc();
+ ASSERT(HME_COARSE_TOT_MEMTABS >= n_tabs);
+
+ /*************************************************************************/
+ /* MAP RESULTS TO HME MEMTAB STRUCTURE */
+ /*************************************************************************/
+ for(i = 0; i < n_tabs; i++)
+ {
+ as_memtabs[i].size = ps_mem_tab[i].i4_mem_size;
+ as_memtabs[i].align = ps_mem_tab[i].i4_mem_alignment;
+ as_memtabs[i].pu1_mem = (U08 *)ps_mem_tab[i].pv_base;
+ }
+ /*************************************************************************/
+ /* CALL THE ME FUNCTION TO GET MEMTABS */
+ /*************************************************************************/
+ pv_me_ctxt = (void *)as_memtabs[0].pu1_mem;
+ status = hme_coarse_init(pv_me_ctxt, &as_memtabs[0], &s_hme_init_prms);
+ ps_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
+ if(status == -1)
+ return NULL;
+
+ /*************************************************************************/
+ /* --- HME sync Dep Mngr Mem init -- */
+ /*************************************************************************/
+
+ ps_mem_tab += n_tabs;
+
+ n_dep_tabs = hme_coarse_dep_mngr_init(
+ ps_mem_tab, ps_init_prms, pv_me_ctxt, pv_osal_handle, i4_num_proc_thrds, i4_resolution_id);
+ ASSERT(n_dep_tabs <= hme_coarse_dep_mngr_num_alloc());
+
+ n_tabs += n_dep_tabs;
+
+ ihevce_me_instr_set_router(
+ (ihevce_me_optimised_function_list_t *)ps_ctxt->pv_me_optimised_function_list,
+ ps_init_prms->e_arch_type);
+
+ ihevce_cmn_utils_instr_set_router(
+ &ps_ctxt->s_cmn_opt_func, u1_is_popcnt_available, ps_init_prms->e_arch_type);
+
+ return (pv_me_ctxt);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_coarse_me_reg_thrds_sem \endif
+*
+* \brief
+* Intialization for ME context state structure with semaphores .
+*
+* \param[in] pv_me_ctxt : pointer to Coarse ME ctxt
+* \param[in] ppv_sem_hdls : Array of semaphore handles
+* \param[in] i4_num_proc_thrds : Number of processing threads
+*
+* \return
+* none
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_coarse_me_reg_thrds_sem(void *pv_me_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds)
+{
+ hme_coarse_dep_mngr_reg_sem(pv_me_ctxt, ppv_sem_hdls, i4_num_proc_thrds);
+
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_coarse_me_delete \endif
+*
+* \brief
+* Destroy Coarse ME module
+* Note : Only Destroys the resources allocated in the module like
+* semaphore,etc. Memory free is done Separately using memtabs
+*
+* \param[in] pv_me_ctxt : pointer to Coarse ME ctxt
+* \param[in] ps_init_prms : Create time static parameters
+* \param[in] pv_osal_handle : Osal handle
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_coarse_me_delete(
+ void *pv_me_ctxt, ihevce_static_cfg_params_t *ps_init_prms, WORD32 i4_resolution_id)
+{
+ /* --- HME sync Dep Mngr Delete --*/
+ hme_coarse_dep_mngr_delete(pv_me_ctxt, ps_init_prms, i4_resolution_id);
+}
+
+/**
+*******************************************************************************
+* \if Function name : ihevce_coarse_me_set_resolution \endif
+*
+* \brief
+* Sets the resolution for ME state
+*
+* \par Description:
+* ME requires information of resolution to prime up its layer descriptors
+* and contexts. This API is called whenever a control call from application
+* causes a change of resolution. Has to be called once initially before
+* processing any frame. Again this is just a glue function and calls the
+* actual ME API for the same.
+*
+* \param[in,out] pv_me_ctxt: Handle to the ME context
+* \param[in] n_enc_layers: Number of layers getting encoded
+* \param[in] p_wd : Pointer containing widths of each layer getting encoded.
+* \param[in] p_ht : Pointer containing heights of each layer getting encoded.
+*
+* \returns
+* none
+*
+* \author
+* Ittiam
+*
+*******************************************************************************
+*/
+void ihevce_coarse_me_set_resolution(
+ void *pv_me_ctxt, WORD32 n_enc_layers, WORD32 *p_wd, WORD32 *p_ht)
+{
+ /* local variables */
+ coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
+ WORD32 thrds;
+
+ for(thrds = 0; thrds < ps_master_ctxt->i4_num_proc_thrds; thrds++)
+ {
+ coarse_me_ctxt_t *ps_me_thrd_ctxt;
+
+ ps_me_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[thrds];
+
+ hme_coarse_set_resolution((void *)ps_me_thrd_ctxt, n_enc_layers, p_wd, p_ht);
+ }
+}
+void ihevce_coarse_me_get_rc_param(
+ void *pv_me_ctxt,
+ LWORD64 *i8_acc_frame_hme_cost,
+ LWORD64 *i8_acc_frame_hme_sad,
+ LWORD64 *i8_acc_num_blks_higher_sad,
+ LWORD64 *i8_total_blks,
+ WORD32 i4_is_prev_pic_same_scene)
+{
+ coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
+ WORD32 thrds;
+ coarse_me_ctxt_t *ps_me_thrd_ctxt;
+
+ *i8_acc_frame_hme_cost = 0;
+ *i8_acc_frame_hme_sad = 0;
+
+ for(thrds = 0; thrds < ps_master_ctxt->i4_num_proc_thrds; thrds++)
+ {
+ ps_me_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[thrds];
+ *i8_acc_frame_hme_cost += ps_me_thrd_ctxt->i4_L1_hme_best_cost;
+
+ /*Calculate me cost wrt. to ref only for P frame */
+ if(ps_me_thrd_ctxt->s_frm_prms.is_i_pic == ps_me_thrd_ctxt->s_frm_prms.bidir_enabled)
+ {
+ *i8_acc_num_blks_higher_sad += ps_me_thrd_ctxt->i4_num_blks_high_sad;
+ *i8_total_blks += ps_me_thrd_ctxt->i4_num_blks;
+ }
+
+ *i8_acc_frame_hme_sad += ps_me_thrd_ctxt->i4_L1_hme_sad;
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_coarse_me_process \endif
+*
+* \brief
+* Frame level ME function
+*
+* \par Description:
+* Processing of all layers starting from coarse and going
+* to the refinement layers, except enocde layer
+*
+* \param[in] pv_ctxt : pointer to ME module
+* \param[in] ps_enc_lap_inp : pointer to input yuv buffer (frame buffer)
+* \param[in,out] ps_ctb_out : pointer to CTB analyse output structure (frame buffer)
+* \param[out] ps_cu_out : pointer to CU analyse output structure (frame buffer)
+* \param[in] pd_intra_costs : pointerto intra cost buffer
+* \param[in] ps_multi_thrd_ctxt : pointer to multi thread ctxt
+* \param[in] thrd_id : Thread id of the current thrd in which function is executed
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_coarse_me_process(
+ void *pv_me_ctxt,
+ ihevce_lap_enc_buf_t *ps_enc_lap_inp,
+ multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
+ WORD32 thrd_id,
+ WORD32 i4_ping_pong)
+
+{
+ /* local variables */
+ coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
+ coarse_me_ctxt_t *ps_thrd_ctxt;
+
+ /* get the current thread ctxt pointer */
+ ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[thrd_id];
+ ps_thrd_ctxt->thrd_id = thrd_id;
+
+ /* frame level processing function */
+ hme_coarse_process_frm(
+ (void *)ps_thrd_ctxt,
+ &ps_master_ctxt->s_ref_map,
+ &ps_master_ctxt->s_frm_prms,
+ ps_multi_thrd_ctxt,
+ i4_ping_pong,
+ &ps_master_ctxt->apv_dep_mngr_hme_sync[0]);
+
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_coarse_me_frame_end \endif
+*
+* \brief
+* End of frame update function performs
+* - GMV collation
+* - Dynamic Search Range collation
+*
+* \param[in] pv_ctxt : pointer to ME module
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_coarse_me_frame_end(void *pv_me_ctxt)
+{
+ /* local variables */
+ coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
+ coarse_me_ctxt_t *ps_thrd0_ctxt;
+ layer_ctxt_t *ps_curr_layer;
+ WORD32 num_ref, num_thrds, cur_poc;
+ WORD32 coarse_layer_id;
+ WORD32 i4_num_ref;
+ ME_QUALITY_PRESETS_T e_me_quality_preset;
+
+ /* GMV collation is done for coarse Layer only */
+ ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0];
+ coarse_layer_id = ps_thrd0_ctxt->num_layers - 1;
+ ps_curr_layer = ps_thrd0_ctxt->ps_curr_descr->aps_layers[coarse_layer_id];
+ i4_num_ref = ps_master_ctxt->s_ref_map.i4_num_ref;
+ e_me_quality_preset = ps_thrd0_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets;
+
+ /* No processing is required if current pic is I pic */
+ if(1 == ps_master_ctxt->s_frm_prms.is_i_pic)
+ {
+ return;
+ }
+
+ /* use thrd 0 ctxt to collate the GMVs histogram and Dynamic Search Range */
+ /* across all threads */
+ for(num_ref = 0; num_ref < i4_num_ref; num_ref++)
+ {
+ WORD32 i4_offset, i4_lobe_size, i4_layer_id;
+ mv_hist_t *ps_hist_thrd0;
+ dyn_range_prms_t *aps_dyn_range_prms_thrd0[MAX_NUM_LAYERS];
+
+ ps_hist_thrd0 = ps_thrd0_ctxt->aps_mv_hist[num_ref];
+
+ /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
+ if(ps_thrd0_ctxt->s_frm_prms.is_i_pic == ps_thrd0_ctxt->s_frm_prms.bidir_enabled)
+ {
+ for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--)
+ {
+ aps_dyn_range_prms_thrd0[i4_layer_id] =
+ &ps_thrd0_ctxt->s_coarse_dyn_range_prms.as_dyn_range_prms[i4_layer_id][num_ref];
+ }
+ }
+
+ i4_lobe_size = ps_hist_thrd0->i4_lobe1_size;
+ i4_offset = i4_lobe_size >> 1;
+
+ /* run a loop over all the other threads to add up the histogram */
+ /* and to update the dynamical search range */
+ for(num_thrds = 1; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
+ {
+ dyn_range_prms_t *ps_dyn_range_prms;
+
+ if(ME_XTREME_SPEED_25 != e_me_quality_preset)
+ {
+ mv_hist_t *ps_hist;
+ WORD32 i4_y, i4_x;
+ /* get current thrd histogram pointer */
+ ps_hist = ps_master_ctxt->aps_me_ctxt[num_thrds]->aps_mv_hist[num_ref];
+
+ /* Accumalate the Bin count for all the thread */
+ for(i4_y = 0; i4_y < ps_hist_thrd0->i4_num_rows; i4_y++)
+ {
+ for(i4_x = 0; i4_x < ps_hist_thrd0->i4_num_cols; i4_x++)
+ {
+ S32 i4_bin_id;
+
+ i4_bin_id = i4_x + (i4_y * ps_hist_thrd0->i4_num_cols);
+
+ ps_hist_thrd0->ai4_bin_count[i4_bin_id] +=
+ ps_hist->ai4_bin_count[i4_bin_id];
+ }
+ }
+ }
+
+ /* Update the dynamical search range for each Layer */
+ /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
+ if(ps_thrd0_ctxt->s_frm_prms.is_i_pic == ps_thrd0_ctxt->s_frm_prms.bidir_enabled)
+ {
+ for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--)
+ {
+ /* get current thrd, layer dynamical search range param. pointer */
+ ps_dyn_range_prms =
+ &ps_master_ctxt->aps_me_ctxt[num_thrds]
+ ->s_coarse_dyn_range_prms.as_dyn_range_prms[i4_layer_id][num_ref];
+ /* TODO : This calls can be optimized further. No need for min in 1st call and max in 2nd call */
+ hme_update_dynamic_search_params(
+ aps_dyn_range_prms_thrd0[i4_layer_id], ps_dyn_range_prms->i2_dyn_max_y);
+
+ hme_update_dynamic_search_params(
+ aps_dyn_range_prms_thrd0[i4_layer_id], ps_dyn_range_prms->i2_dyn_min_y);
+ }
+ }
+ }
+ }
+
+ /*************************************************************************/
+ /* Get the MAX/MIN per POC distance based on the all the ref. pics */
+ /*************************************************************************/
+ /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
+ if(ps_thrd0_ctxt->s_frm_prms.is_i_pic == ps_thrd0_ctxt->s_frm_prms.bidir_enabled)
+ {
+ WORD32 i4_layer_id;
+ cur_poc = ps_thrd0_ctxt->i4_curr_poc;
+
+ for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--)
+ {
+ ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id] = 0;
+ ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id] = 0;
+ }
+
+ for(num_ref = 0; num_ref < i4_num_ref; num_ref++)
+ {
+ for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--)
+ {
+ WORD16 i2_mv_per_poc;
+ WORD32 ref_poc, poc_diff;
+ dyn_range_prms_t *ps_dyn_range_prms_thrd0;
+
+ ps_dyn_range_prms_thrd0 =
+ &ps_thrd0_ctxt->s_coarse_dyn_range_prms.as_dyn_range_prms[i4_layer_id][num_ref];
+
+ ref_poc = ps_dyn_range_prms_thrd0->i4_poc;
+ ASSERT(ref_poc < cur_poc);
+ poc_diff = (cur_poc - ref_poc);
+
+ /* cur. ref. pic. max y per POC */
+ i2_mv_per_poc = (ps_dyn_range_prms_thrd0->i2_dyn_max_y + (poc_diff - 1)) / poc_diff;
+ /* update the max y per POC */
+ ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id] =
+ MAX(ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id],
+ i2_mv_per_poc);
+
+ /* cur. ref. pic. min y per POC */
+ i2_mv_per_poc = (ps_dyn_range_prms_thrd0->i2_dyn_min_y - (poc_diff - 1)) / poc_diff;
+ /* update the min y per POC */
+ ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id] =
+ MIN(ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id],
+ i2_mv_per_poc);
+ }
+ }
+
+ /*************************************************************************/
+ /* Populate the results to all thread ctxt */
+ /*************************************************************************/
+ for(num_thrds = 1; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
+ {
+ for(i4_layer_id = coarse_layer_id; i4_layer_id > 0; i4_layer_id--)
+ {
+ ps_master_ctxt->aps_me_ctxt[num_thrds]
+ ->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id] =
+ ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_max_y_per_poc[i4_layer_id];
+
+ ps_master_ctxt->aps_me_ctxt[num_thrds]
+ ->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id] =
+ ps_thrd0_ctxt->s_coarse_dyn_range_prms.i2_dyn_min_y_per_poc[i4_layer_id];
+ }
+ }
+ }
+
+ if(ME_XTREME_SPEED_25 != e_me_quality_preset)
+ {
+ /* call the function which calcualtes the GMV */
+ /* layer pointer is shared across all threads */
+ /* hence all threads will have access to updated */
+ /* GMVs populated using thread 0 ctxt */
+ for(num_ref = 0; num_ref < i4_num_ref; num_ref++)
+ {
+ hme_calculate_global_mv(
+ ps_thrd0_ctxt->aps_mv_hist[num_ref],
+ &ps_curr_layer->s_global_mv[num_ref][GMV_THICK_LOBE],
+ GMV_THICK_LOBE);
+ }
+ }
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_coarse_me_frame_dpb_update \endif
+*
+* \brief
+* Frame level ME initialisation function
+*
+* \par Description:
+* Updation of ME's internal DPB
+* based on available ref list information
+*
+* \param[in] pv_ctxt : pointer to ME module
+* \param[in] num_ref_l0 : Number of reference pics in L0 list
+* \param[in] num_ref_l1 : Number of reference pics in L1 list
+* \param[in] pps_rec_list_l0 : List of recon pics in L0 list
+* \param[in] pps_rec_list_l1 : List of recon pics in L1 list
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_coarse_me_frame_dpb_update(
+ void *pv_me_ctxt,
+ WORD32 num_ref_l0,
+ WORD32 num_ref_l1,
+ recon_pic_buf_t **pps_rec_list_l0,
+ recon_pic_buf_t **pps_rec_list_l1)
+{
+ coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
+ coarse_me_ctxt_t *ps_thrd0_ctxt;
+ WORD32 a_pocs_buffered_in_me[MAX_NUM_REF + 1];
+ WORD32 a_pocs_to_remove[MAX_NUM_REF + 2];
+ WORD32 poc_remove_id = 0;
+ WORD32 i, count;
+
+ /* All processing done using shared / common memory across */
+ /* threads is done using thrd ctxt */
+ ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0];
+
+ /*************************************************************************/
+ /* Updation of ME's DPB list. This involves the following steps: */
+ /* 1. Obtain list of active POCs maintained within ME. */
+ /* 2. Search each of them in the ref list. Whatever is not found goes to */
+ /* the list to be removed. Note: a_pocs_buffered_in_me holds the */
+ /* currently active POC list within ME. a_pocs_to_remove holds the */
+ /* list of POCs to be removed, terminated by -1. */
+ /*************************************************************************/
+ hme_coarse_get_active_pocs_list((void *)ps_thrd0_ctxt, a_pocs_buffered_in_me);
+
+ count = 0;
+ while(a_pocs_buffered_in_me[count] != -1)
+ {
+ WORD32 poc_to_search = a_pocs_buffered_in_me[count];
+ WORD32 match_found_flag = 0;
+
+ /*********************************************************************/
+ /* Search in any one list (L0/L1) since both lists contain all the */
+ /* active ref pics. */
+ /*********************************************************************/
+ for(i = 0; i < num_ref_l0; i++)
+ {
+ if(poc_to_search == pps_rec_list_l0[i]->i4_poc)
+ {
+ match_found_flag = 1;
+ break;
+ }
+ }
+ for(i = 0; i < num_ref_l1; i++)
+ {
+ if(poc_to_search == pps_rec_list_l1[i]->i4_poc)
+ {
+ match_found_flag = 1;
+ break;
+ }
+ }
+
+ if(0 == match_found_flag)
+ {
+ /*****************************************************************/
+ /* POC buffered inside ME but not part of ref list given by DPB */
+ /* Hence this needs to be flagged to ME for removal. */
+ /*****************************************************************/
+ a_pocs_to_remove[poc_remove_id] = poc_to_search;
+ poc_remove_id++;
+ }
+ count++;
+ }
+
+ /* List termination */
+ a_pocs_to_remove[poc_remove_id] = -1;
+
+ /* Call the ME API to remove "outdated" POCs */
+ hme_coarse_discard_frm(ps_thrd0_ctxt, a_pocs_to_remove);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_coarse_me_frame_init \endif
+*
+* \brief
+* Coarse Frame level ME initialisation function
+*
+* \par Description:
+* The following pre-conditions exist for this function: a. We have the input
+* pic ready for encode, b. We have the reference list with POC, L0/L1 IDs
+* and ref ptrs ready for this picture and c. ihevce_me_set_resolution has
+* been called atleast once. Once these are supplied, the following are
+* done here: a. Input pyramid creation, b. Updation of ME's internal DPB
+* based on available ref list information
+*
+* \param[in] pv_ctxt : pointer to ME module
+* \param[in] ps_frm_ctb_prms : CTB characteristics parameters
+* \param[in] ps_frm_lamda : Frame level Lambda params
+* \param[in] num_ref_l0 : Number of reference pics in L0 list
+* \param[in] num_ref_l1 : Number of reference pics in L1 list
+* \param[in] num_ref_l0_active : Active reference pics in L0 dir for current frame (shall be <= num_ref_l0)
+* \param[in] num_ref_l1_active : Active reference pics in L1 dir for current frame (shall be <= num_ref_l1)
+* \param[in] pps_rec_list_l0 : List of recon pics in L0 list
+* \param[in] pps_rec_list_l1 : List of recon pics in L1 list
+* \param[in] ps_enc_lap_inp : pointer to input yuv buffer (frame buffer)
+* \param[in] i4_frm_qp : current picture QP
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_coarse_me_frame_init(
+ void *pv_me_ctxt,
+ ihevce_static_cfg_params_t *ps_stat_prms,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ frm_lambda_ctxt_t *ps_frm_lamda,
+ WORD32 num_ref_l0,
+ WORD32 num_ref_l1,
+ WORD32 num_ref_l0_active,
+ WORD32 num_ref_l1_active,
+ recon_pic_buf_t **pps_rec_list_l0,
+ recon_pic_buf_t **pps_rec_list_l1,
+ ihevce_lap_enc_buf_t *ps_enc_lap_inp,
+ WORD32 i4_frm_qp,
+ ihevce_ed_blk_t *ps_layer1_buf, //EIID
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1,
+ UWORD8 *pu1_me_reverse_map_info,
+ WORD32 i4_temporal_layer_id)
+{
+ /* local variables */
+ coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
+ coarse_me_ctxt_t *ps_ctxt;
+ coarse_me_ctxt_t *ps_thrd0_ctxt;
+ WORD32 inp_poc, num_ref;
+ WORD32 i;
+
+ /* Input POC is derived from input buffer */
+ inp_poc = ps_enc_lap_inp->s_lap_out.i4_poc;
+ num_ref = num_ref_l0 + num_ref_l1;
+
+ /* All processing done using shared / common memory across */
+ /* threads is done using thrd 0 ctxt */
+ ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0];
+
+ ps_master_ctxt->s_frm_prms.u1_num_active_ref_l0 = num_ref_l0_active;
+ ps_master_ctxt->s_frm_prms.u1_num_active_ref_l1 = num_ref_l1_active;
+
+ /* store the frm ctb ctxt to all the thrd ctxt */
+ {
+ WORD32 num_thrds;
+
+ /* initialise the parameters for all the threads */
+ for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
+ {
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+ ps_ctxt->pv_ext_frm_prms = (void *)ps_frm_ctb_prms;
+ /*EIID: early decision buffer pointer */
+ ps_ctxt->ps_ed_blk = ps_layer1_buf;
+ ps_ctxt->ps_ed_ctb_l1 = ps_ed_ctb_l1;
+
+ /* weighted pred enable flag */
+ ps_ctxt->i4_wt_pred_enable_flag = ps_enc_lap_inp->s_lap_out.i1_weighted_pred_flag |
+ ps_enc_lap_inp->s_lap_out.i1_weighted_bipred_flag;
+
+ if(1 == ps_ctxt->i4_wt_pred_enable_flag)
+ {
+ /* log2 weight denom */
+ ps_ctxt->s_wt_pred.wpred_log_wdc =
+ ps_enc_lap_inp->s_lap_out.i4_log2_luma_wght_denom;
+ }
+ else
+ {
+ /* default value */
+ ps_ctxt->s_wt_pred.wpred_log_wdc = DENOM_DEFAULT;
+ }
+ ps_ctxt->i4_L1_hme_best_cost = 0;
+ ps_ctxt->i4_L1_hme_sad = 0;
+ ps_ctxt->i4_num_blks_high_sad = 0;
+ ps_ctxt->i4_num_blks = 0;
+
+ ps_ctxt->pv_me_optimised_function_list = ps_master_ctxt->pv_me_optimised_function_list;
+ ps_ctxt->ps_cmn_utils_optimised_function_list = &ps_master_ctxt->s_cmn_opt_func;
+ }
+ }
+ /* Create the reference map for ME */
+ ihevce_me_create_ref_map(
+ pps_rec_list_l0,
+ pps_rec_list_l1,
+ num_ref_l0_active,
+ num_ref_l1_active,
+ num_ref,
+ &ps_master_ctxt->s_ref_map);
+ /*************************************************************************/
+ /* Call the ME frame level processing for further actiion. */
+ /* ToDo: Support Row Level API. */
+ /*************************************************************************/
+ ps_master_ctxt->s_frm_prms.i2_mv_range_x = ps_thrd0_ctxt->s_init_prms.max_horz_search_range;
+ ps_master_ctxt->s_frm_prms.i2_mv_range_y = ps_thrd0_ctxt->s_init_prms.max_vert_search_range;
+
+ ps_master_ctxt->s_frm_prms.is_i_pic = 0;
+ ps_master_ctxt->s_frm_prms.i4_temporal_layer_id = i4_temporal_layer_id;
+
+ ps_master_ctxt->s_frm_prms.is_pic_second_field =
+ (!(ps_enc_lap_inp->s_input_buf.i4_bottom_field ^
+ ps_enc_lap_inp->s_input_buf.i4_topfield_first));
+ {
+ S32 pic_type = ps_enc_lap_inp->s_lap_out.i4_pic_type;
+
+ /*********************************************************************/
+ /* For I Pic, we do not call update fn at ctb level, instead we do */
+ /* one shot update for entire picture. */
+ /*********************************************************************/
+ if((pic_type == IV_I_FRAME) || (pic_type == IV_II_FRAME) || (pic_type == IV_IDR_FRAME))
+ {
+ ps_master_ctxt->s_frm_prms.is_i_pic = 1;
+ ps_master_ctxt->s_frm_prms.bidir_enabled = 0;
+ }
+ else if((pic_type == IV_P_FRAME) || (pic_type == IV_PP_FRAME))
+ {
+ ps_master_ctxt->s_frm_prms.bidir_enabled = 0;
+ }
+ else if((pic_type == IV_B_FRAME) || (pic_type == IV_BB_FRAME))
+ {
+ ps_master_ctxt->s_frm_prms.bidir_enabled = 1;
+ }
+ else
+ {
+ /* not sure whether we need to handle mixed frames like IP, */
+ /* they should ideally come as single field. */
+ /* TODO : resolve thsi ambiguity */
+ ASSERT(0);
+ }
+ }
+ /************************************************************************/
+ /* Lambda calculations moved outside ME and to one place, so as to have */
+ /* consistent lambda across ME, IPE, CL RDOPT etc */
+ /************************************************************************/
+
+ {
+#define CLIP3_F(min, max, val) (((val) < (min)) ? (min) : (((val) > (max)) ? (max) : (val)))
+ double q_steps[6] = { 0.625, 0.703, 0.79, 0.889, 1.0, 1.125 };
+ double d_b_pic_factor;
+ double d_q_factor;
+ //double d_lambda;
+ UWORD8 u1_temp_hier = ps_enc_lap_inp->s_lap_out.i4_temporal_lyr_id;
+
+ if(u1_temp_hier)
+ {
+ d_b_pic_factor = CLIP3_F(2.0, 4.0, (i4_frm_qp - 12.0) / 6.0);
+ }
+ else
+ d_b_pic_factor = 1.0;
+
+ d_q_factor = (1 << (i4_frm_qp / 6)) * q_steps[i4_frm_qp % 6];
+ ps_master_ctxt->s_frm_prms.qstep = (WORD32)d_q_factor;
+ ps_master_ctxt->s_frm_prms.i4_frame_qp = i4_frm_qp;
+ }
+
+ /* HME Dependency Manager : Reset the num ctb processed in every row */
+ /* for ME sync in every layer */
+ {
+ WORD32 ctr;
+ for(ctr = 1; ctr < ps_thrd0_ctxt->num_layers; ctr++)
+ {
+ void *pv_dep_mngr_state;
+ pv_dep_mngr_state = ps_master_ctxt->apv_dep_mngr_hme_sync[ctr - 1];
+
+ ihevce_dmgr_rst_row_row_sync(pv_dep_mngr_state);
+ }
+ }
+
+ /* Frame level init of all threads of ME */
+ {
+ WORD32 num_thrds;
+
+ /* initialise the parameters for all the threads */
+ for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
+ {
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+
+ hme_coarse_process_frm_init(
+ (void *)ps_ctxt, ps_ctxt->ps_hme_ref_map, ps_ctxt->ps_hme_frm_prms);
+ }
+ }
+
+ ps_master_ctxt->s_frm_prms.i4_cl_sad_lambda_qf = ps_frm_lamda->i4_cl_sad_lambda_qf;
+ ps_master_ctxt->s_frm_prms.i4_cl_satd_lambda_qf = ps_frm_lamda->i4_cl_satd_lambda_qf;
+ ps_master_ctxt->s_frm_prms.i4_ol_sad_lambda_qf = ps_frm_lamda->i4_ol_sad_lambda_qf;
+ ps_master_ctxt->s_frm_prms.i4_ol_satd_lambda_qf = ps_frm_lamda->i4_ol_satd_lambda_qf;
+ ps_master_ctxt->s_frm_prms.lambda_q_shift = LAMBDA_Q_SHIFT;
+
+ ps_master_ctxt->s_frm_prms.pf_interp_fxn = NULL;
+
+ /*************************************************************************/
+ /* If num ref is 0, that means that it has to be coded as I. Do nothing */
+ /* However mv bank update needs to happen with "intra" mv. */
+ /*************************************************************************/
+ if(ps_master_ctxt->s_ref_map.i4_num_ref == 0 || ps_master_ctxt->s_frm_prms.is_i_pic)
+ {
+ for(i = 1; i < ps_thrd0_ctxt->num_layers; i++)
+ {
+ layer_ctxt_t *ps_layer_ctxt = ps_thrd0_ctxt->ps_curr_descr->aps_layers[i];
+ BLK_SIZE_T e_blk_size;
+ S32 use_4x4;
+
+ /* The mv bank is filled with "intra" mv */
+ use_4x4 = hme_get_mv_blk_size(
+ ps_thrd0_ctxt->s_init_prms.use_4x4,
+ i,
+ ps_thrd0_ctxt->num_layers,
+ ps_thrd0_ctxt->u1_encode[i]);
+ e_blk_size = use_4x4 ? BLK_4x4 : BLK_8x8;
+ hme_init_mv_bank(ps_layer_ctxt, e_blk_size, 2, 1, ps_ctxt->u1_encode[i]);
+ hme_fill_mvbank_intra(ps_layer_ctxt);
+
+ /* Clear out the global mvs */
+ memset(
+ ps_layer_ctxt->s_global_mv,
+ 0,
+ sizeof(hme_mv_t) * ps_thrd0_ctxt->max_num_ref * NUM_GMV_LOBES);
+ }
+
+ return;
+ }
+
+ /*************************************************************************/
+ /* Coarse & refine Layer frm init (layer mem is common across thrds) */
+ /*************************************************************************/
+ {
+ coarse_prms_t s_coarse_prms;
+ refine_prms_t s_refine_prms;
+ S16 i2_max;
+ S32 layer_id;
+
+ layer_id = ps_thrd0_ctxt->num_layers - 1;
+ i2_max = ps_thrd0_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_x;
+ i2_max = MAX(i2_max, ps_thrd0_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_y);
+ s_coarse_prms.i4_layer_id = layer_id;
+
+ {
+ S32 log_start_step;
+ /* Based on Preset, set the starting step size for Refinement */
+ if(ME_MEDIUM_SPEED > ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets)
+ {
+ log_start_step = 0;
+ }
+ else
+ {
+ log_start_step = 1;
+ }
+ s_coarse_prms.i4_max_iters = i2_max >> log_start_step;
+ s_coarse_prms.i4_start_step = 1 << log_start_step;
+ }
+ s_coarse_prms.i4_num_ref = ps_master_ctxt->s_ref_map.i4_num_ref;
+ s_coarse_prms.do_full_search = 1;
+ s_coarse_prms.num_results = ps_thrd0_ctxt->max_num_results_coarse;
+
+ hme_coarse_frm_init(ps_thrd0_ctxt, &s_coarse_prms);
+
+ layer_id--;
+
+ /*************************************************************************/
+ /* This loop will run for all refine layers (non- encode layers) */
+ /*************************************************************************/
+ while(layer_id > 0)
+ {
+ layer_ctxt_t *ps_curr_layer;
+ layer_ctxt_t *ps_coarse_layer;
+
+ ps_coarse_layer = ps_thrd0_ctxt->ps_curr_descr->aps_layers[layer_id + 1];
+
+ ps_curr_layer = ps_thrd0_ctxt->ps_curr_descr->aps_layers[layer_id];
+
+ hme_set_refine_prms(
+ &s_refine_prms,
+ ps_thrd0_ctxt->u1_encode[layer_id],
+ ps_master_ctxt->s_ref_map.i4_num_ref,
+ layer_id,
+ ps_thrd0_ctxt->num_layers,
+ ps_thrd0_ctxt->num_layers_explicit_search,
+ ps_thrd0_ctxt->s_init_prms.use_4x4,
+ &ps_master_ctxt->s_frm_prms,
+ NULL,
+ &ps_thrd0_ctxt->s_init_prms.s_me_coding_tools);
+
+ hme_refine_frm_init(ps_curr_layer, &s_refine_prms, ps_coarse_layer);
+
+ layer_id--;
+ }
+ }
+
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_decomp_pre_intra_frame_init \endif
+*
+* \brief
+* Frame Intialization for Decomp intra pre analysis.
+*
+* \param[in] pv_ctxt : pointer to module ctxt
+* \param[in] ppu1_decomp_lyr_bufs : pointer to array of layer buffer pointers
+* \param[in] pi4_lyr_buf_stride : pointer to array of layer buffer strides
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32 ihevce_coarse_me_get_lyr_buf_desc(
+ void *pv_me_ctxt, UWORD8 **ppu1_decomp_lyr_bufs, WORD32 *pi4_lyr_buf_stride)
+{
+ /* local variables */
+ coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
+ coarse_me_ctxt_t *ps_thrd0_ctxt;
+ WORD32 lyr_no;
+ layers_descr_t *ps_curr_descr;
+ WORD32 i4_free_idx;
+
+ /* All processing done using shared / common memory across */
+ /* threads is done using thrd0 ctxt */
+ ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0];
+
+ /* Obtain an empty layer descriptor */
+ i4_free_idx = hme_coarse_find_free_descr_idx((void *)ps_thrd0_ctxt);
+
+ ps_curr_descr = &ps_thrd0_ctxt->as_ref_descr[i4_free_idx];
+
+ /* export all the layer buffers except Layer 0 (encode layer) */
+ for(lyr_no = 1; lyr_no < ps_thrd0_ctxt->num_layers; lyr_no++)
+ {
+ pi4_lyr_buf_stride[lyr_no - 1] = ps_curr_descr->aps_layers[lyr_no]->i4_inp_stride;
+ ppu1_decomp_lyr_bufs[lyr_no - 1] = ps_curr_descr->aps_layers[lyr_no]->pu1_inp;
+ }
+
+ return (i4_free_idx);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_coarse_me_get_lyr_prms_job_que \endif
+*
+* \brief Returns to the caller key attributes related to dependency between layers
+* for multi-thread execution
+*
+*
+* \par Description:
+* This function requires the precondition that the width and ht of encode
+* layer is known, and ME API ihevce_me_set_resolution() API called with
+* this info. Based on this, ME populates useful information for the encoder
+* to execute the multi-thread (concurrent across layers) in this API.
+* The number of layers, number of vertical units in each layer, and for
+* each vertial unit in each layer, its dependency on previous layer's units
+* From ME's perspective, a vertical unit is one which is smallest min size
+* vertically (and spans the entire row horizontally). This is CTB for encode
+* layer, and 8x8 / 4x4 for non encode layers.
+*
+* \param[in] pv_ctxt : ME handle
+* \param[in] ps_curr_inp : Input buffer descriptor
+* \param[out] pi4_num_hme_lyrs : Num of HME layers (ME updates)
+* \param[out] pi4_num_vert_units_in_lyr : Array of size N (num layers), each
+* entry has num vertical units in that particular layer
+* \param[in] ps_me_job_q_prms : Array of job queue prms, one for each unit in a
+* layer. Note that this is contiguous in order of processing
+* All k units of layer N-1 from top to bottom, followed by
+* all m units of layer N-2 .... ends with X units of layer 0
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_coarse_me_get_lyr_prms_job_que(
+ void *pv_me_ctxt,
+ ihevce_lap_enc_buf_t *ps_curr_inp,
+ WORD32 *pi4_num_hme_lyrs,
+ WORD32 *pi4_num_vert_units_in_lyr,
+ multi_thrd_me_job_q_prms_t *ps_me_job_q_prms)
+{
+ coarse_me_ctxt_t *ps_ctxt;
+ coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
+
+ /* These arrays and ptrs track input dependencies for units of a layer */
+ /* This is a ping poing design, while using one part, we update other part */
+ U08 au1_inp_dep[2][MAX_NUM_VERT_UNITS_FRM];
+ U08 *pu1_inp_dep_c, *pu1_inp_dep_n;
+
+ /* Height of current and next layers */
+ S32 ht_c, ht_n;
+
+ /* Blk ht at a given layer and next layer*/
+ S32 unit_ht_c, unit_ht_n, blk_ht_c, blk_ht_n;
+
+ /* Number of vertical units in current and next layer */
+ S32 num_vert_c, num_vert_n;
+
+ S32 ctb_size = 64, num_layers, i, j, k;
+
+ /* since same layer desc pointer is stored in all thread ctxt */
+ /* a free idx is obtained using 0th thread ctxt pointer */
+ ps_ctxt = ps_master_ctxt->aps_me_ctxt[0];
+
+ /* Set the number of layers */
+ num_layers = ps_ctxt->num_layers;
+ *pi4_num_hme_lyrs = num_layers;
+
+ pu1_inp_dep_c = &au1_inp_dep[0][0];
+ pu1_inp_dep_n = &au1_inp_dep[1][0];
+
+ ASSERT(num_layers >= 2);
+
+ ht_n = ps_ctxt->a_ht[num_layers - 2];
+ ht_c = ps_ctxt->a_ht[num_layers - 1];
+
+ /* compute blk ht and unit ht for c and n */
+ if(ps_ctxt->u1_encode[num_layers - 1])
+ {
+ blk_ht_c = 16;
+ unit_ht_c = ctb_size;
+ }
+ else
+ {
+ blk_ht_c = hme_get_blk_size(ps_ctxt->s_init_prms.use_4x4, num_layers - 1, num_layers, 0);
+ unit_ht_c = blk_ht_c;
+ }
+
+ num_vert_c = (ht_c + unit_ht_c - 1) / unit_ht_c;
+
+ /* For new design in Coarsest HME layer we need */
+ /* one additional row extra at the end of frame */
+ /* hence num_vert_c is incremented by 1 */
+ num_vert_c++;
+
+ /* Dummy initialization outside loop, not used first time */
+ memset(pu1_inp_dep_c, 0, num_vert_c);
+
+ /*************************************************************************/
+ /* Run through each layer, set the number of vertical units and job queue*/
+ /* attrs for each vert unit in the layer */
+ /*************************************************************************/
+ for(i = num_layers - 1; i > 0; i--)
+ {
+ /* 0th entry is actually layer id num_layers - 1 */
+ /* and entry num_layers-1 equals the biggest layer (id = 0) */
+ pi4_num_vert_units_in_lyr[num_layers - 1 - i] = num_vert_c;
+ /* "n" is computed for first time */
+ ht_n = ps_ctxt->a_ht[i - 1];
+ blk_ht_n = hme_get_blk_size(ps_ctxt->s_init_prms.use_4x4, i - 1, num_layers, 0);
+ unit_ht_n = blk_ht_n;
+ if(ps_ctxt->u1_encode[i - 1])
+ unit_ht_n = ctb_size;
+
+ num_vert_n = (ht_n + unit_ht_n - 1) / unit_ht_n;
+ /* Initialize all units' inp dep in next layer to 0 */
+ memset(pu1_inp_dep_n, 0, num_vert_n * sizeof(U08));
+
+ /* Evaluate dependencies for this layer */
+ for(j = 0; j < num_vert_c; j++)
+ {
+ S32 v1, v2;
+
+ /* Output dependencies. When one unit in current layer finishes, */
+ /* how many in the next layer it affects?. Assuming that the top */
+ /* of this vertical unit and bottom of this vertical unit project*/
+ /* somewhere in the next layer. The top of this vertical unit */
+ /* becomes the bottom right point for somebody, and the bottom of*/
+ /* this vertical unit becomes the colocated pt for somebody, this*/
+ /* is the extremum. */
+
+ /* for the initial unit affected by j in "c" layer, take j-1th */
+ /* unit top and project it. */
+ v1 = (j - 1) * unit_ht_c * ht_n;
+ v1 /= (ht_c * unit_ht_n);
+ v1 -= 1;
+
+ /* for the final unit affected by j in "c" layer, take jth unit */
+ /* bottom and project it. */
+
+ v2 = (j + 1) * unit_ht_c * ht_n;
+ v2 /= (ht_c * unit_ht_n);
+ v2 += 1;
+
+ /* Clip to be within valid limits */
+ v1 = HME_CLIP(v1, 0, (num_vert_n - 1));
+ v2 = HME_CLIP(v2, 0, (num_vert_n - 1));
+
+ /* In the layer "n", units starting at offset v1, and upto v2 are*/
+ /* dependent on unit j of layer "c". So for each of these units */
+ /* increment the dependency by 1 corresponding to "jth" unit in */
+ /* layer "c" */
+ ps_me_job_q_prms->i4_num_output_dep = v2 - v1 + 1;
+ ASSERT(ps_me_job_q_prms->i4_num_output_dep <= MAX_OUT_DEP);
+ for(k = v1; k <= v2; k++)
+ pu1_inp_dep_n[k]++;
+
+ /* Input dependency would have been calculated in prev run */
+ ps_me_job_q_prms->i4_num_inp_dep = pu1_inp_dep_c[j];
+ ASSERT(ps_me_job_q_prms->i4_num_inp_dep <= MAX_OUT_DEP);
+
+ /* Offsets */
+ for(k = v1; k <= v2; k++)
+ ps_me_job_q_prms->ai4_out_dep_unit_off[k - v1] = k;
+
+ ps_me_job_q_prms++;
+ }
+
+ /* Compute the blk size and vert unit size in each layer */
+ /* "c" denotes curr layer, and "n" denotes the layer to which result */
+ /* is projected to */
+ ht_c = ht_n;
+ blk_ht_c = blk_ht_n;
+ unit_ht_c = unit_ht_n;
+ num_vert_c = num_vert_n;
+
+ /* Input dep count for next layer was computed this iteration. */
+ /* Swap so that p_inp_dep_n becomes current for next iteration, */
+ /* and p_inp_dep_c will become update area during next iteration */
+ /* for next to next. */
+ {
+ U08 *pu1_tmp = pu1_inp_dep_n;
+ pu1_inp_dep_n = pu1_inp_dep_c;
+ pu1_inp_dep_c = pu1_tmp;
+ }
+ }
+
+ /* LAYER 0 OR ENCODE LAYER UPDATE : NO OUTPUT DEPS */
+
+ /* set the numebr of vertical units */
+ pi4_num_vert_units_in_lyr[num_layers - 1] = num_vert_c;
+ for(j = 0; j < num_vert_c; j++)
+ {
+ /* Here there is no output dependency for ME. However this data is used for encode, */
+ /* and there is a 1-1 correspondence between this and the encode */
+ /* Hence we set output dependency of 1 */
+ ps_me_job_q_prms->i4_num_output_dep = 1;
+ ps_me_job_q_prms->ai4_out_dep_unit_off[0] = j;
+ ps_me_job_q_prms->i4_num_inp_dep = pu1_inp_dep_c[j];
+ ASSERT(ps_me_job_q_prms->i4_num_inp_dep <= MAX_OUT_DEP);
+ ps_me_job_q_prms++;
+ }
+
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_coarse_me_set_lyr1_mv_bank \endif
+*
+* \brief
+* Frame level ME initialisation of MV bank of penultimate layer
+*
+* \par Description:
+* Updates the Layer1 context with the given buffers
+*
+* \param[in] pv_me_ctxt : pointer to ME module
+* \param[in] pu1_mv_bank : MV bank buffer pointer
+* \param[in] pu1_ref_idx_bank : refrence bank buffer pointer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_coarse_me_set_lyr1_mv_bank(
+ void *pv_me_ctxt,
+ ihevce_lap_enc_buf_t *ps_enc_lap_inp,
+ void *pv_mv_bank,
+ void *pv_ref_idx_bank,
+ WORD32 i4_curr_idx)
+{
+ coarse_me_ctxt_t *ps_thrd0_ctxt;
+ coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
+ layer_ctxt_t *ps_lyr1_ctxt;
+
+ /* Input descriptor that is updated and passed to ME */
+ hme_inp_desc_t s_inp_desc;
+
+ /*************************************************************************/
+ /* Add the current input to ME's DPB. This will also create the pyramids */
+ /* for the HME layers tha are not "encoded". */
+ /*************************************************************************/
+ s_inp_desc.i4_poc = ps_enc_lap_inp->s_lap_out.i4_poc;
+ s_inp_desc.s_layer_desc[0].pu1_y = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_y_buf;
+ s_inp_desc.s_layer_desc[0].pu1_u = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_u_buf;
+ s_inp_desc.s_layer_desc[0].pu1_v = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_v_buf;
+
+ s_inp_desc.s_layer_desc[0].luma_stride = ps_enc_lap_inp->s_lap_out.s_input_buf.i4_y_strd;
+ s_inp_desc.s_layer_desc[0].chroma_stride = ps_enc_lap_inp->s_lap_out.s_input_buf.i4_uv_strd;
+
+ hme_coarse_add_inp(pv_me_ctxt, &s_inp_desc, i4_curr_idx);
+
+ /* All processing done using shared / common memory across */
+ /* threads is done using thrd 0 ctxt since layer ctxt is shared accross all threads */
+ ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0];
+
+ ps_lyr1_ctxt = ps_thrd0_ctxt->ps_curr_descr->aps_layers[1];
+
+ /* register the mv bank & ref idx bank pointer */
+ ps_lyr1_ctxt->ps_layer_mvbank->pi1_ref_idx_base = (S08 *)pv_ref_idx_bank;
+ ps_lyr1_ctxt->ps_layer_mvbank->ps_mv_base = (hme_mv_t *)pv_mv_bank;
+
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_coarse_me_get_lyr1_ctxt \endif
+*
+* \brief
+* function to get teh Layer 1 properties to be passed on the encode layer
+*
+* \par Description:
+* Ucopies the enitre layer ctxt emory to the destination
+*
+* \param[in] pv_me_ctxt : pointer to ME module
+* \param[in] pu1_mv_bank : MV bank buffer pointer
+* \param[in] pu1_ref_idx_bank : refrence bank buffer pointer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_coarse_me_get_lyr1_ctxt(
+ void *pv_me_ctxt, void *pv_layer_ctxt, void *pv_layer_mv_bank_ctxt)
+{
+ coarse_me_ctxt_t *ps_thrd0_ctxt;
+ coarse_me_master_ctxt_t *ps_master_ctxt = (coarse_me_master_ctxt_t *)pv_me_ctxt;
+ layer_ctxt_t *ps_lyr1_ctxt;
+
+ /* All processing done using shared / common memory across */
+ /* threads is done using thrd 0 ctxt since layer ctxt is shared accross all threads */
+ ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0];
+
+ /* get the context of layer 1 */
+ ps_lyr1_ctxt = ps_thrd0_ctxt->ps_curr_descr->aps_layers[1];
+
+ /* copy the layer ctxt eve registerd mv bank & ref idx bank also goes in */
+ memcpy(pv_layer_ctxt, ps_lyr1_ctxt, sizeof(layer_ctxt_t));
+
+ /* copy the layer mv bank contents */
+ memcpy(pv_layer_mv_bank_ctxt, ps_lyr1_ctxt->ps_layer_mvbank, sizeof(layer_mv_t));
+
+ /* register the MV bank pointer in the layer ctxt*/
+ ((layer_ctxt_t *)pv_layer_ctxt)->ps_layer_mvbank = (layer_mv_t *)pv_layer_mv_bank_ctxt;
+
+ return;
+}
diff --git a/encoder/ihevce_coarse_me_pass.h b/encoder/ihevce_coarse_me_pass.h
new file mode 100644
index 0000000..0dc0537
--- /dev/null
+++ b/encoder/ihevce_coarse_me_pass.h
@@ -0,0 +1,417 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_coarse_me_pass.h
+*
+* \brief
+* Interfaces to create, control and run the Coarse ME module
+*
+* \date
+* 22/10/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_COARSE_ME_PASS_H_
+#define _IHEVCE_COARSE_ME_PASS_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_me_get_num_mem_recs \endif
+*
+* \brief
+* Number of memory records are returned for ME module
+*
+*
+* \return
+* Number of memory records
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32 ihevce_coarse_me_get_num_mem_recs();
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_coarse_me_get_mem_recs \endif
+*
+* \brief
+* Memory requirements are returned for coarse ME.
+*
+* \param[in,out] ps_mem_tab : pointer to memory descriptors table
+* \param[in] ps_init_prms : Create time static parameters
+* \param[in] i4_num_proc_thrds : Number of processing threads for this module
+* \param[in] i4_mem_space : memspace in whihc memory request should be done
+*
+* \return
+* Number of records
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32 ihevce_coarse_me_get_mem_recs(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ WORD32 i4_num_proc_thrds,
+ WORD32 i4_mem_space,
+ WORD32 i4_resolution_id);
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_coarse_me_init \endif
+*
+* \brief
+* Intialization for ME context state structure .
+*
+* \param[in] ps_mem_tab : pointer to memory descriptors table
+* \param[in] ps_init_prms : Create time static parameters
+* \param[in] pv_osal_handle : Osal handle
+*
+* \return
+* Handle to the ME context
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void *ihevce_coarse_me_init(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ WORD32 i4_num_proc_thrds,
+ void *pv_osal_handle,
+ WORD32 i4_resolution_id,
+ UWORD8 u1_is_popcnt_available);
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_coarse_me_reg_thrds_sem \endif
+*
+* \brief
+* Intialization for ME context state structure with semaphores .
+*
+* \param[in] pv_me_ctxt : pointer to Coarse ME ctxt
+* \param[in] ppv_sem_hdls : Arry of semaphore handles
+* \param[in] i4_num_proc_thrds : Number of processing threads
+*
+* \return
+* none
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_coarse_me_reg_thrds_sem(void *pv_me_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds);
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_coarse_me_delete \endif
+*
+* \brief
+* Destroy Coarse ME module
+* Note : Only Destroys the resources allocated in the module like
+* semaphore,etc. Memory free is done Separately using memtabs
+*
+* \param[in] pv_me_ctxt : pointer to Coarse ME ctxt
+* \param[in] ps_init_prms : Create time static parameters
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_coarse_me_delete(
+ void *pv_me_ctxt, ihevce_static_cfg_params_t *ps_init_prms, WORD32 i4_resolution_id);
+
+/**
+*******************************************************************************
+* \if Function name : ihevce_me_set_resolution \endif
+*
+* \brief
+* Sets the resolution for ME state
+*
+* \par Description:
+* ME requires information of resolution to prime up its layer descriptors
+* and contexts. This API is called whenever a control call from application
+* causes a change of resolution. Has to be called once initially before
+* processing any frame. Again this is just a glue function and calls the
+* actual ME API for the same.
+*
+* \param[in,out] pv_me_ctxt: Handle to the ME context
+* \param[in] n_enc_layers: Number of layers getting encoded
+* \param[in] p_wd : Pointer containing widths of each layer getting encoded.
+* \param[in] p_ht : Pointer containing heights of each layer getting encoded.
+*
+* \returns
+* none
+*
+* \author
+* Ittiam
+*
+*******************************************************************************
+*/
+void ihevce_coarse_me_set_resolution(
+ void *pv_me_ctxt, WORD32 n_enc_layers, WORD32 *p_wd, WORD32 *p_ht);
+
+void ihevce_coarse_me_get_rc_param(
+ void *pv_me_ctxt,
+ LWORD64 *i8_acc_frame_hme_cost,
+ LWORD64 *i8_acc_frame_hme_sad,
+ LWORD64 *i8_acc_num_blks_higher_sad,
+ LWORD64 *i8_total_blks,
+ WORD32 i4_is_prev_pic_same_scene);
+/*!
+******************************************************************************
+* \if Function name : ihevce_me_frame_init \endif
+*
+* \brief
+* Frame level ME initialisation function
+*
+* \par Description:
+* The following pre-conditions exist for this function: a. We have the input
+* pic ready for encode, b. We have the reference list with POC, L0/L1 IDs
+* and ref ptrs ready for this picture and c. ihevce_me_set_resolution has
+* been called atleast once. Once these are supplied, the following are
+* done here: a. Input pyramid creation, b. Updation of ME's internal DPB
+* based on available ref list information
+*
+* \param[in] pv_ctxt : pointer to ME module
+* \param[in] ps_frm_ctb_prms : CTB characteristics parameters
+* \param[in] ps_frm_lamda : Frame level Lambda params
+* \param[in] num_ref_l0 : Number of reference pics in L0 list
+* \param[in] num_ref_l1 : Number of reference pics in L1 list
+* \param[in] num_ref_l0_active : Active reference pics in L0 dir for current frame (shall be <= num_ref_l0)
+* \param[in] num_ref_l1_active : Active reference pics in L1 dir for current frame (shall be <= num_ref_l1)
+* \param[in] pps_rec_list_l0 : List of recon pics in L0 list
+* \param[in] pps_rec_list_l1 : List of recon pics in L1 list
+* \param[in] ps_enc_lap_inp : pointer to input yuv buffer (frame buffer)
+* \param[in] i4_frm_qp : current picture QP
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_coarse_me_frame_init(
+ void *pv_me_ctxt,
+ ihevce_static_cfg_params_t *ps_stat_prms,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ frm_lambda_ctxt_t *ps_frm_lamda,
+ WORD32 num_ref_l0,
+ WORD32 num_ref_l1,
+ WORD32 num_ref_l0_active,
+ WORD32 num_ref_l1_active,
+ recon_pic_buf_t **pps_rec_list_l0,
+ recon_pic_buf_t **pps_rec_list_l1,
+ ihevce_lap_enc_buf_t *ps_enc_lap_inp,
+ WORD32 i4_frm_qp,
+ ihevce_ed_blk_t *ps_layer1_buf, //EIID
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1,
+ UWORD8 *pu1_me_reverse_map_info,
+ WORD32 i4_temporal_layer_id);
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_me_process \endif
+*
+* \brief
+* Frame level ME function
+*
+* \par Description:
+* Processing of all layers starting from coarse and going
+* to the refinement layers, all layers
+* that are encoded go CTB by CTB. Outputs of this function are populated
+* ctb_analyse_t structures, one per CTB.
+*
+* \param[in] pv_ctxt : pointer to ME module
+* \param[in] ps_enc_lap_inp : pointer to input yuv buffer (frame buffer)
+* \param[in,out] ps_ctb_out : pointer to CTB analyse output structure (frame buffer)
+* \param[out] ps_cu_out : pointer to CU analyse output structure (frame buffer)
+* \param[in] pd_intra_costs : pointerto intra cost buffer
+* \param[in] ps_multi_thrd_ctxt : pointer to multi thread ctxt
+* \param[in] thrd_id : Thread id of the current thrd in which function is executed
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_coarse_me_process(
+ void *pv_me_ctxt,
+ ihevce_lap_enc_buf_t *ps_enc_lap_inp,
+ multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
+ WORD32 thrd_id,
+ WORD32 i4_ping_pong);
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_me_frame_dpb_update \endif
+*
+* \brief
+* Frame level ME initialisation function
+*
+* \par Description:
+* Updation of ME's internal DPB
+* based on available ref list information
+*
+* \param[in] pv_ctxt : pointer to ME module
+* \param[in] num_ref_l0 : Number of reference pics in L0 list
+* \param[in] num_ref_l1 : Number of reference pics in L1 list
+* \param[in] pps_rec_list_l0 : List of recon pics in L0 list
+* \param[in] pps_rec_list_l1 : List of recon pics in L1 list
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_coarse_me_frame_dpb_update(
+ void *pv_me_ctxt,
+ WORD32 num_ref_l0,
+ WORD32 num_ref_l1,
+ recon_pic_buf_t **pps_rec_list_l0,
+ recon_pic_buf_t **pps_rec_list_l1);
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_me_get_lyr_prms_job_que \endif
+*
+* \brief Returns to the caller key attributes related to dependency between layers
+* for multi-thread execution
+*
+*
+* \par Description:
+* This function requires the precondition that the width and ht of encode
+* layer is known, and ME API ihevce_me_set_resolution() API called with
+* this info. Based on this, ME populates useful information for the encoder
+* to execute the multi-thread (concurrent across layers) in this API.
+* The number of layers, number of vertical units in each layer, and for
+* each vertial unit in each layer, its dependency on previous layer's units
+* From ME's perspective, a vertical unit is one which is smallest min size
+* vertically (and spans the entire row horizontally). This is CTB for encode
+* layer, and 8x8 / 4x4 for non encode layers.
+*
+* \param[in] pv_ctxt : ME handle
+* \param[in] ps_curr_inp : Input buffer descriptor
+* \param[out] pi4_num_hme_lyrs : Num of HME layers (ME updates)
+* \param[out] pi4_num_vert_units_in_lyr : Array of size N (num layers), each
+* entry has num vertical units in that particular layer
+* \param[in] ps_me_job_q_prms : Array of job queue prms, one for each unit in a
+* layer. Note that this is contiguous in order of processing
+* All k units of layer N-1 from top to bottom, followed by
+* all m units of layer N-2 .... ends with X units of layer 0
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_coarse_me_get_lyr_prms_job_que(
+ void *pv_me_ctxt,
+ ihevce_lap_enc_buf_t *ps_curr_inp,
+ WORD32 *pi4_num_hme_lyrs,
+ WORD32 *pi4_num_vert_units_in_lyr,
+ multi_thrd_me_job_q_prms_t *ps_me_job_q_prms);
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_me_frame_end \endif
+*
+* \brief
+* End of frame update function performs GMV collation
+*
+* \param[in] pv_ctxt : pointer to ME module
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_coarse_me_frame_end(void *pv_me_ctxt);
+
+void ihevce_coarse_me_get_lyr1_ctxt(
+ void *pv_me_ctxt, void *pv_layer_ctxt, void *pv_layer_mv_bank_ctxt);
+
+void ihevce_coarse_me_set_lyr1_mv_bank(
+ void *pv_me_ctxt,
+ ihevce_lap_enc_buf_t *ps_enc_lap_inp,
+ void *pv_mv_bank,
+ void *pv_ref_idx_bank,
+ WORD32 i4_curr_idx);
+
+WORD32 ihevce_coarse_me_get_lyr_buf_desc(
+ void *pv_me_ctxt, UWORD8 **ppu1_decomp_lyr_bufs, WORD32 *pi4_lyr_buf_stride);
+
+#endif /* _IHEVCE_COARSE_ME_PASS_H_ */
diff --git a/encoder/ihevce_common_utils.c b/encoder/ihevce_common_utils.c
new file mode 100644
index 0000000..dd99132
--- /dev/null
+++ b/encoder/ihevce_common_utils.c
@@ -0,0 +1,1470 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_common_utils.c
+*
+* \brief
+* Contains definitions of common utility functions used across encoder
+*
+* \date
+* 18/09/2012
+*
+* \author
+* ittiam
+*
+* List of Functions
+* ihevce_copy_2d()
+* ihevce_hbd_copy_2d()
+* ihevce_2d_square_copy_luma()
+* ihevce_wt_avg_2d()
+* ihevce_itrans_recon_dc_compute()
+* ihevce_itrans_recon_dc()
+* ihevce_hbd_itrans_recon_dc()
+* ihevce_truncate_16bit_data_to_8bit()
+* ihevce_convert_16bit_recon_to_8bit()
+* ihevce_convert_16bit_input_to_8bit()
+* ihevce_find_num_clusters_of_identical_points_1D()
+* ihevce_hbd_compute_ssd()
+* ihevce_compare_pu_mv_t()
+* ihevce_set_pred_buf_as_free()
+* ihevce_get_free_pred_buf_indices()
+* ihevce_scale_mv()
+* ihevce_osal_alloc()
+* ihevce_osal_free()
+* ihevce_osal_init()
+* ihevce_osal_delete()
+* ihevce_sum_abs_seq()
+* ihevce_ssd_calculator()
+* ihevce_chroma_interleave_ssd_calculator()
+* ihevce_ssd_and_sad_calculator()
+* ihevce_chroma_interleave_2d_copy()
+* ihevce_hbd_chroma_interleave_2d_copy()
+* ihevce_hbd_chroma_interleave_ssd_calculator()
+* ihevce_get_chroma_eo_sao_params()
+* ihevce_get_chroma_eo_sao_params_hbd()
+* ihevce_compute_area_of_valid_cus_in_ctb()
+* ihevce_create_cuNode_children()
+* ihevce_cu_tree_init()
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_debug.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_hle_interface.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_common_utils.h"
+#include "ihevce_global_tables.h"
+
+#include "cast_types.h"
+#include "osal.h"
+#include "osal_defaults.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief Performs the 2D copy
+*
+* @par Description
+* This routine Performs the 2D copy
+*
+* @param[inout] pu1_dst
+* pointer to the destination buffer
+*
+* @param[in] dst_strd
+* destination stride in terms of the size of input/output unit
+*
+* @param[inout] pu1_src
+* pointer to the source buffer
+*
+* @param[in] src_strd
+* source stride in terms of the size of input/output unit
+*
+* @param[in] blk_wd
+* number of samples to copy in a row
+*
+* @param[in] blk_ht
+* number of rows to copy
+*
+******************************************************************************
+*/
+void ihevce_copy_2d(
+ UWORD8 *pu1_dst,
+ WORD32 dst_stride,
+ UWORD8 *pu1_src,
+ WORD32 src_stride,
+ WORD32 blk_wd,
+ WORD32 blk_ht)
+{
+ WORD32 i;
+
+ for(i = 0; i < blk_ht; i++)
+ {
+ memcpy(pu1_dst, pu1_src, blk_wd);
+ pu1_dst += dst_stride;
+ pu1_src += src_stride;
+ }
+}
+
+/**
+******************************************************************************
+*
+* @brief Performs the 2D copy of luma data
+*
+* @par Description
+* This routine performs the 2D square copy of luma data
+*
+* @param[inout] p_dst
+* pointer to the destination buffer
+*
+* @param[in] dst_strd
+* destination stride in terms of the size of input/output unit
+*
+* @param[inout] p_src
+* pointer to the source buffer
+*
+* @param[in] src_strd
+* source stride in terms of the size of input/output unit
+*
+* @param[in] num_cols_to_copy
+* number of units in a line to copy from src to dst buffer
+* Assumption : num_cols_to_copy <= min (dst_strd, src_strd)
+*
+* @param[in] unit_size
+* size of the unit in bytes
+*
+* @return none
+*
+* Assumptions : num_cols_to_copy = num_lines_to_copy,
+* num_lines_to_copy can have {4, 16, 32, 64}
+*
+******************************************************************************
+*/
+void ihevce_2d_square_copy_luma(
+ void *p_dst,
+ WORD32 dst_strd,
+ void *p_src,
+ WORD32 src_strd,
+ WORD32 num_cols_to_copy,
+ WORD32 unit_size)
+{
+ UWORD8 *pu1_dst = (UWORD8 *)p_dst;
+ UWORD8 *pu1_src = (UWORD8 *)p_src;
+ WORD32 i;
+
+ for(i = 0; i < num_cols_to_copy; i++)
+ {
+ memcpy(pu1_dst, pu1_src, (num_cols_to_copy * unit_size));
+ pu1_dst += (dst_strd * unit_size);
+ pu1_src += (src_strd * unit_size);
+ }
+}
+
+/**
+********************************************************************************
+*
+* @brief Weighted pred of 2 predictor buffers as per spec
+*
+* @param[in] pu1_pred0 : Pred0 buffer
+*
+* @param[in] pu1_pred1 : Pred1 buffer
+*
+* @param[in] pred0_strd : Stride of pred0 buffer
+*
+* @param[in] pred1_strd : Stride of pred1 buffer
+*
+* @param[in] wd : Width of pred block
+*
+* @param[in] ht : Height of pred block
+*
+* @param[out] pu1_dst : Destination buffer that will hold result
+*
+* @param[in] dst_strd : Stride of dest buffer
+*
+* @param[in] w0 : Weighting factor of Pred0
+*
+* @param[in] w1 : weighting factor of pred1
+*
+* @param[in] o0 : offset for pred0
+*
+* @param[in] o1 : offset for pred1
+*
+* @param[in] log_wdc : shift factor as per spec
+*
+* @return none
+*
+********************************************************************************
+*/
+void ihevce_wt_avg_2d(
+ UWORD8 *pu1_pred0,
+ UWORD8 *pu1_pred1,
+ WORD32 pred0_strd,
+ WORD32 pred1_strd,
+ WORD32 wd,
+ WORD32 ht,
+ UWORD8 *pu1_dst,
+ WORD32 dst_strd,
+ WORD32 w0,
+ WORD32 w1,
+ WORD32 o0,
+ WORD32 o1,
+ WORD32 log_wdc)
+{
+ /* Total Rounding term to be added, including offset */
+ WORD32 rnd = (o0 + o1 + 1) >> 1; // << log_wdc;
+ /* Downshift */
+ WORD32 shift = log_wdc + 1;
+ /* loop counters */
+ WORD32 i, j;
+
+ /* Dst = ((w0*p0 + w1*p1) + ((o0 + o1 + 1) << logWDc)) >> (logWDc + 1) */
+ /* In above formula, the additive term is constant and is evaluated */
+ /* outside loop and stored as "rnd". */
+ for(i = 0; i < ht; i++)
+ {
+ for(j = 0; j < wd; j++)
+ {
+ WORD32 tmp;
+ tmp = IHEVCE_WT_PRED(pu1_pred0[j], pu1_pred1[j], w0, w1, rnd, shift);
+ pu1_dst[j] = (UWORD8)(CLIP3(tmp, 0, 255));
+ }
+ pu1_pred0 += pred0_strd;
+ pu1_pred1 += pred1_strd;
+ pu1_dst += dst_strd;
+ }
+}
+/**
+******************************************************************************
+*
+* @brief Performs the Recon for DC only coefficient case
+*
+* @par Description
+* This routine performs the Recon for DC only coefficient case
+*
+* @param[inout] pu1_dst
+* pointer to the destination buffer
+*
+* @param[in] pu1_pred
+* pointer to the pred buffer
+*
+* @param[in] dst_strd
+* destination stride
+*
+* @param[in] pred_strd
+* pred buffer stride
+*
+* @param[in] trans_size
+* transform size
+*
+* @param[in] col_mult
+* chroma multiplier
+*
+* @param[in] dc_value
+* residue value
+*
+* @return none
+*
+******************************************************************************
+*/
+static INLINE void ihevce_itrans_recon_dc_compute(
+ UWORD8 *pu1_dst,
+ UWORD8 *pu1_pred,
+ WORD32 dst_strd,
+ WORD32 pred_strd,
+ WORD32 trans_size,
+ WORD32 col_mult,
+ WORD32 dc_value)
+{
+ WORD32 row, col;
+
+ for(row = 0; row < trans_size; row++)
+ {
+ for(col = 0; col < trans_size; col++)
+ {
+ pu1_dst[row * dst_strd + col * col_mult] =
+ CLIP_U8(pu1_pred[row * pred_strd + col * col_mult] + dc_value);
+ }
+ }
+}
+
+/**
+******************************************************************************
+*
+* @brief Performs the IQ+IT+Recon for DC only coefficient case
+*
+* @par Description
+* This routine performs the IQ+IT+Recon for DC only coefficient case
+*
+* @param[in] pu1_pred
+* pointer to the pred buffer
+*
+* @param[in] pred_strd
+* pred buffer stride
+*
+* @param[inout] pu1_dst
+* pointer to the destination buffer
+*
+* @param[in] dst_strd
+* destination stride
+*
+* @param[in] trans_size
+* transform size
+*
+* @param[in] i2_deq_value
+* Dequant Coeffs
+*
+* @param[in] chroma plane
+* -1 : luma, 0 : chroma U, 1 : chroma V
+*
+* @return none
+*
+******************************************************************************
+*/
+void ihevce_itrans_recon_dc(
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ UWORD8 *pu1_dst,
+ WORD32 dst_strd,
+ WORD32 trans_size,
+ WORD16 i2_deq_value,
+ CHROMA_PLANE_ID_T e_chroma_plane)
+{
+ WORD32 add, shift;
+ WORD32 dc_value;
+ UWORD8 *pu1_pred_tmp, *pu1_dst_tmp;
+ WORD32 col_mult;
+
+ assert(e_chroma_plane == NULL_PLANE || e_chroma_plane == U_PLANE || e_chroma_plane == V_PLANE);
+ if(e_chroma_plane == NULL_PLANE)
+ {
+ pu1_pred_tmp = pu1_pred;
+ pu1_dst_tmp = pu1_dst;
+ col_mult = 1;
+ }
+ else
+ {
+ col_mult = 2;
+ pu1_pred_tmp = pu1_pred + e_chroma_plane;
+ pu1_dst_tmp = pu1_dst + e_chroma_plane;
+ }
+
+ shift = IT_SHIFT_STAGE_1;
+ add = 1 << (shift - 1);
+ dc_value = CLIP_S16((i2_deq_value * 64 + add) >> shift);
+ shift = IT_SHIFT_STAGE_2;
+ add = 1 << (shift - 1);
+ dc_value = CLIP_S16((dc_value * 64 + add) >> shift);
+ ihevce_itrans_recon_dc_compute(
+ pu1_dst_tmp, pu1_pred_tmp, dst_strd, pred_strd, trans_size, col_mult, dc_value);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_find_num_clusters_of_identical_points_1D \endif
+*
+* \brief
+*
+*
+*****************************************************************************
+*/
+WORD32 ihevce_find_num_clusters_of_identical_points_1D(
+ UWORD8 *pu1_inp_array,
+ UWORD8 *pu1_out_array,
+ UWORD8 *pu1_freq_of_out_data_in_inp,
+ WORD32 i4_num_inp_array_elements)
+{
+ WORD32 i;
+ UWORD8 u1_value = pu1_inp_array[0];
+ WORD32 i4_num_clusters = i4_num_inp_array_elements;
+ WORD32 i4_output_array_idx = 1;
+
+ pu1_freq_of_out_data_in_inp[0] = 1;
+ pu1_out_array[0] = u1_value;
+
+ if(1 == i4_num_inp_array_elements)
+ {
+ return 1;
+ }
+
+ for(i = 1; i < i4_num_inp_array_elements; i++)
+ {
+ if(pu1_inp_array[i] == u1_value)
+ {
+ pu1_freq_of_out_data_in_inp[0]++;
+ i4_num_clusters--;
+ }
+ else
+ {
+ pu1_out_array[i4_output_array_idx] = pu1_inp_array[i];
+
+ i4_output_array_idx++;
+ }
+ }
+
+ if(i4_num_clusters > 1)
+ {
+ WORD32 i4_num_sub_clusters;
+
+ i4_num_sub_clusters = ihevce_find_num_clusters_of_identical_points_1D(
+ &pu1_out_array[1],
+ &pu1_out_array[1],
+ &pu1_freq_of_out_data_in_inp[1],
+ i4_num_clusters - 1);
+
+ i4_num_clusters = 1 + i4_num_sub_clusters;
+ }
+
+ return i4_num_clusters;
+}
+
+/**
+*******************************************************************************
+*
+* @brief Compare Motion vectors function
+*
+* @par Description:
+* Checks if MVs and Reference idx are excatly matching.
+*
+* @param[inout] ps_1
+* motion vector 1 to be compared
+*
+* @param[in] ps_2
+* motion vector 2 to be compared
+*
+* @returns
+* 0 : if not matching 1 : if matching
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ihevce_compare_pu_mv_t(
+ pu_mv_t *ps_pu_mv_1, pu_mv_t *ps_pu_mv_2, WORD32 i4_pred_mode_1, WORD32 i4_pred_mode_2)
+{
+ WORD32 i4_l0_match, i4_l1_match;
+ WORD32 i4_pred_l0, i4_pred_l1;
+
+ i4_pred_l0 = (i4_pred_mode_1 != PRED_L1);
+ i4_pred_l1 = (i4_pred_mode_1 != PRED_L0);
+
+ if(i4_pred_mode_1 != i4_pred_mode_2)
+ return 0;
+
+ i4_l0_match = 0;
+ i4_l1_match = 0;
+
+ if(i4_pred_l0)
+ {
+ if(ps_pu_mv_1->i1_l0_ref_idx == ps_pu_mv_2->i1_l0_ref_idx)
+ {
+ if(0 == memcmp(&ps_pu_mv_1->s_l0_mv, &ps_pu_mv_2->s_l0_mv, sizeof(mv_t)))
+ i4_l0_match = 1;
+ }
+ }
+ if(i4_pred_l1)
+ {
+ if(ps_pu_mv_1->i1_l1_ref_idx == ps_pu_mv_2->i1_l1_ref_idx)
+ {
+ if(0 == memcmp(&ps_pu_mv_1->s_l1_mv, &ps_pu_mv_2->s_l1_mv, sizeof(mv_t)))
+ i4_l1_match = 1;
+ }
+ }
+
+ if(i4_pred_l0 && i4_pred_l1)
+ return (i4_l0_match & i4_l1_match);
+ else if(i4_pred_l0)
+ return i4_l0_match;
+ else
+ return i4_l1_match;
+
+} /* End of ihevce_compare_pu_mv_t */
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_set_pred_buf_as_free \endif
+*
+* \brief
+* Mark buffer as free
+*
+*****************************************************************************
+*/
+void ihevce_set_pred_buf_as_free(UWORD32 *pu4_idx_array, UWORD8 u1_buf_id)
+{
+ (*pu4_idx_array) &= ~(1 << u1_buf_id);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_get_free_pred_buf_indices \endif
+*
+* \brief
+* get free buffer indices
+*
+*****************************************************************************
+*/
+UWORD8 ihevce_get_free_pred_buf_indices(
+ UWORD8 *pu1_idx_array, UWORD32 *pu4_bitfield, UWORD8 u1_num_bufs_requested)
+{
+ UWORD8 i;
+
+ UWORD8 u1_num_free_bufs_found = 0;
+ UWORD32 u4_local_bitfield = *pu4_bitfield;
+
+ ASSERT(u1_num_bufs_requested <= (32 - ihevce_num_ones_generic(u4_local_bitfield)));
+
+ for(i = 0; u1_num_free_bufs_found < u1_num_bufs_requested; i++)
+ {
+ if(!(u4_local_bitfield & (1 << i)))
+ {
+ pu1_idx_array[u1_num_free_bufs_found++] = i;
+ u4_local_bitfield |= (1 << i);
+ }
+ }
+
+ (*pu4_bitfield) = u4_local_bitfield;
+
+ return u1_num_free_bufs_found;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_scale_mv \endif
+*
+* \brief
+* Scale mv basing on displacement of POC
+*
+*****************************************************************************
+*/
+void ihevce_scale_mv(mv_t *ps_mv, WORD32 i4_poc_to, WORD32 i4_poc_from, WORD32 i4_curr_poc)
+{
+ WORD32 td, tb, tx;
+ WORD32 dist_scale_factor;
+ WORD32 mvx, mvy;
+
+ td = CLIP_S8(i4_curr_poc - i4_poc_from);
+ tb = CLIP_S8(i4_curr_poc - i4_poc_to);
+
+ tx = (16384 + (abs(td) >> 1)) / td;
+
+ dist_scale_factor = (tb * tx + 32) >> 6;
+ dist_scale_factor = CLIP3(dist_scale_factor, -4096, 4095);
+
+ mvx = ps_mv->i2_mvx;
+ mvy = ps_mv->i2_mvy;
+
+ mvx = SIGN(dist_scale_factor * mvx) * ((abs(dist_scale_factor * mvx) + 127) >> 8);
+ mvy = SIGN(dist_scale_factor * mvy) * ((abs(dist_scale_factor * mvy) + 127) >> 8);
+
+ ps_mv->i2_mvx = CLIP_S16(mvx);
+ ps_mv->i2_mvy = CLIP_S16(mvy);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_osal_alloc \endif
+*
+* \brief
+* Memory allocate call back function passed to OSAL
+*
+* \param[in] pv_handle : handle to hle ctxt
+* \param[in] u4_size : size of memory required
+*
+* \return
+* Memory pointer
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void *ihevce_osal_alloc(void *pv_handle, UWORD32 u4_size)
+{
+ ihevce_hle_ctxt_t *ps_hle_ctxt = (ihevce_hle_ctxt_t *)pv_handle;
+ iv_mem_rec_t s_mem_tab;
+
+ /* def init of memtab */
+ s_mem_tab.i4_size = sizeof(iv_mem_rec_t);
+ s_mem_tab.i4_mem_alignment = 8;
+ s_mem_tab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+
+ /* allocate memory for required size */
+ s_mem_tab.i4_mem_size = u4_size;
+
+ ps_hle_ctxt->ihevce_mem_alloc(
+ ps_hle_ctxt->pv_mem_mgr_hdl, &ps_hle_ctxt->ps_static_cfg_prms->s_sys_api, &s_mem_tab);
+
+ return (s_mem_tab.pv_base);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_osal_free \endif
+*
+* \brief
+* Memory free call back function passed to OSAL
+*
+* \param[in] pv_handle : handle to hle ctxt
+* \param[in] pv_mem : memory to be freed
+*
+* \return
+* none
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_osal_free(void *pv_handle, void *pv_mem)
+{
+ ihevce_hle_ctxt_t *ps_hle_ctxt = (ihevce_hle_ctxt_t *)pv_handle;
+ iv_mem_rec_t s_mem_tab;
+
+ /* def init of memtab */
+ s_mem_tab.i4_size = sizeof(iv_mem_rec_t);
+ s_mem_tab.i4_mem_alignment = 8;
+ s_mem_tab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+
+ /* free memory */
+ s_mem_tab.pv_base = pv_mem;
+
+ ps_hle_ctxt->ihevce_mem_free(ps_hle_ctxt->pv_mem_mgr_hdl, &s_mem_tab);
+
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_osal_init \endif
+*
+* \brief
+* Function to initialise OSAL handle
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32 ihevce_osal_init(void *pv_hle_ctxt)
+{
+ /* local variables */
+ ihevce_hle_ctxt_t *ps_hle_ctxt;
+ osal_cb_funcs_t s_cb_funcs;
+ WORD32 status = 0;
+ void *pv_osal_handle;
+ iv_mem_rec_t s_mem_tab;
+
+ ps_hle_ctxt = (ihevce_hle_ctxt_t *)pv_hle_ctxt;
+
+ /* def init of memtab */
+ s_mem_tab.i4_size = sizeof(iv_mem_rec_t);
+ s_mem_tab.i4_mem_alignment = 8;
+ s_mem_tab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+
+ /* --------------------------------------------------------------------- */
+ /* OSAL Hanndle create */
+ /* --------------------------------------------------------------------- */
+
+ /* Allocate memory for the handle */
+ s_mem_tab.i4_mem_size = OSAL_HANDLE_SIZE;
+
+ ps_hle_ctxt->ihevce_mem_alloc(
+ ps_hle_ctxt->pv_mem_mgr_hdl, &ps_hle_ctxt->ps_static_cfg_prms->s_sys_api, &s_mem_tab);
+ if(NULL == s_mem_tab.pv_base)
+ {
+ ps_hle_ctxt->ps_static_cfg_prms->s_sys_api.ihevce_printf(
+ ps_hle_ctxt->ps_static_cfg_prms->s_sys_api.pv_cb_handle,
+ "IHEVCE ERROR: Error in OSAL initialization\n");
+ return (-1);
+ }
+
+ pv_osal_handle = s_mem_tab.pv_base;
+
+ /* Initialize OSAL call back functions */
+ s_cb_funcs.mmr_handle = (void *)ps_hle_ctxt;
+ s_cb_funcs.osal_alloc = &ihevce_osal_alloc;
+ s_cb_funcs.osal_free = &ihevce_osal_free;
+
+ status = osal_init(pv_osal_handle);
+ if(OSAL_SUCCESS != status)
+ {
+ ps_hle_ctxt->ps_static_cfg_prms->s_sys_api.ihevce_printf(
+ ps_hle_ctxt->ps_static_cfg_prms->s_sys_api.pv_cb_handle,
+ "IHEVCE ERROR: Error in OSAL initialization\n");
+ return (-1);
+ }
+
+ status = osal_register_callbacks(pv_osal_handle, &s_cb_funcs);
+ if(OSAL_SUCCESS != status)
+ {
+ ps_hle_ctxt->ps_static_cfg_prms->s_sys_api.ihevce_printf(
+ ps_hle_ctxt->ps_static_cfg_prms->s_sys_api.pv_cb_handle,
+ "IHEVCE ERROR: Error in OSAL initialization\n");
+ return (-1);
+ }
+ ps_hle_ctxt->pv_osal_handle = pv_osal_handle;
+
+ return (0);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_osal_delete \endif
+*
+* \brief
+* Function to delete OSAL handle
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32 ihevce_osal_delete(void *pv_hle_ctxt)
+{
+ /* local variables */
+ ihevce_hle_ctxt_t *ps_hle_ctxt;
+ void *pv_osal_handle;
+ iv_mem_rec_t s_mem_tab;
+
+ ps_hle_ctxt = (ihevce_hle_ctxt_t *)pv_hle_ctxt;
+ pv_osal_handle = ps_hle_ctxt->pv_osal_handle;
+
+ /* def init of memtab */
+ s_mem_tab.i4_size = sizeof(iv_mem_rec_t);
+ s_mem_tab.i4_mem_alignment = 8;
+ s_mem_tab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+
+ if(0 != osal_close(pv_osal_handle))
+ {
+ ps_hle_ctxt->ps_static_cfg_prms->s_sys_api.ihevce_printf(
+ ps_hle_ctxt->ps_static_cfg_prms->s_sys_api.pv_cb_handle,
+ "IHEVCE ERROR>> Unable to close OSAL\n");
+ return (-1);
+ }
+
+ /* free osal handle */
+ s_mem_tab.pv_base = pv_osal_handle;
+
+ ps_hle_ctxt->ihevce_mem_free(ps_hle_ctxt->pv_mem_mgr_hdl, &s_mem_tab);
+
+ return (0);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Compute SSD between two blocks (8 bit input)
+*
+* @par Description:
+*
+* @param[in] pu1_inp
+* UWORD8 pointer to the src block
+*
+* @param[in] pu1_ref
+* UWORD8 pointer to the ref block
+*
+* @param[in] inp_stride
+* UWORD32 Source stride
+*
+* @param[in] ref_stride
+* UWORD32 ref stride
+*
+* @param[in] wd
+* UWORD32 width of the block
+*
+* @param[in] ht
+* UWORD32 height of the block
+*
+* @returns SSD
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+LWORD64 ihevce_ssd_calculator(
+ UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd, UWORD32 ht)
+{
+ UWORD32 i, j;
+ LWORD64 ssd = 0;
+
+ for(i = 0; i < ht; i++)
+ {
+ for(j = 0; j < wd; j++)
+ {
+ ssd += (pu1_inp[j] - pu1_ref[j]) * (pu1_inp[j] - pu1_ref[j]);
+ }
+
+ pu1_inp += inp_stride;
+ pu1_ref += ref_stride;
+ }
+
+ return ssd;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Compute SSD between two blocks (8 bit input, chroma interleaved input)
+*
+* @par Description:
+*
+* @param[in] pu1_inp
+* UWORD8 pointer to the src block
+*
+* @param[in] pu1_ref
+* UWORD8 pointer to the ref block
+*
+* @param[in] inp_stride
+* UWORD32 Source stride
+*
+* @param[in] ref_stride
+* UWORD32 ref stride
+*
+* @param[in] wd
+* UWORD32 width of the block
+*
+* @param[in] ht
+* UWORD32 height of the block
+*
+* @returns SSD
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+LWORD64 ihevce_chroma_interleave_ssd_calculator(
+ UWORD8 *pu1_inp, UWORD8 *pu1_ref, UWORD32 inp_stride, UWORD32 ref_stride, UWORD32 wd, UWORD32 ht)
+{
+ UWORD32 i, j;
+ LWORD64 ssd = 0;
+
+ /* run a loop and find the ssd by doing diff followed by square */
+ for(i = 0; i < ht; i++)
+ {
+ for(j = 0; j < wd; j++)
+ {
+ WORD32 val;
+
+ /* note that chroma is interleaved */
+ val = pu1_inp[j * 2] - pu1_ref[j * 2];
+ ssd += val * val;
+ }
+ /* row level update */
+ pu1_inp += inp_stride;
+ pu1_ref += ref_stride;
+ }
+
+ return (ssd);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Compute SSD & SAD between two blocks (8 bit input)
+*
+* @par Description:
+*
+* @param[in] pu1_recon
+* UWORD8 pointer to the block 1
+*
+* @param[in] recon_strd
+* UWORD32 stride of block 1
+*
+* @param[in] pu1_src
+* UWORD8 pointer to the block 2
+*
+* @param[in] src_strd
+* UWORD32 stride of block 2
+*
+* @param[in] trans_size
+* UWORD32 block wd/ht
+*
+* @param[out] *pu4_blk_sad
+* UWORD32 block SAD
+*
+* @returns SSD
+*
+* @remarks none
+*
+*******************************************************************************
+*/
+LWORD64 ihevce_ssd_and_sad_calculator(
+ UWORD8 *pu1_recon,
+ WORD32 recon_strd,
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD32 trans_size,
+ UWORD32 *pu4_blk_sad)
+{
+ WORD32 i, j, sad = 0;
+ LWORD64 ssd = 0;
+
+ /* run a loop and find the ssd by doing diff followed by square */
+ for(i = 0; i < trans_size; i++)
+ {
+ for(j = 0; j < trans_size; j++)
+ {
+ WORD32 val;
+
+ val = *pu1_src++ - *pu1_recon++;
+ ssd += val * val;
+ sad += abs(val);
+ }
+ /* row level update */
+ pu1_src += src_strd - trans_size;
+ pu1_recon += recon_strd - trans_size;
+ }
+ *pu4_blk_sad = sad;
+
+ /* The return value is of type WORD32 */
+ ssd = CLIP3(ssd, 0, 0x7fffffff);
+
+ return (ssd);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_chroma_interleave_2d_copy \endif
+*
+* \brief
+* This function copies one plane (u/v) of interleaved chroma buffer from
+* source to destination
+******************************************************************************
+*/
+void ihevce_chroma_interleave_2d_copy(
+ UWORD8 *pu1_uv_src_bp,
+ WORD32 src_strd,
+ UWORD8 *pu1_uv_dst_bp,
+ WORD32 dst_strd,
+ WORD32 w,
+ WORD32 h,
+ CHROMA_PLANE_ID_T e_chroma_plane)
+{
+ WORD32 i, j;
+
+ UWORD8 *pu1_src = (U_PLANE == e_chroma_plane) ? pu1_uv_src_bp : pu1_uv_src_bp + 1;
+ UWORD8 *pu1_dst = (U_PLANE == e_chroma_plane) ? pu1_uv_dst_bp : pu1_uv_dst_bp + 1;
+
+ for(i = 0; i < h; i++)
+ {
+ for(j = 0; j < w; j++)
+ {
+ /* note that chroma is interleaved */
+ pu1_dst[j * 2] = pu1_src[j * 2];
+ }
+
+ /* row level update */
+ pu1_src += src_strd;
+ pu1_dst += dst_strd;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Gets edge offset params
+*
+* @par Description:
+* Given the ctb and sao angle this function will calculate accumulated
+* error between source and recon and the corresponding count for 4 edge
+* indexes one each for peak,valley, half peak and half valley.
+*
+* @param[in]
+* ps_sao_ctxt: Pointer to SAO context
+* eo_sao_class: specifies edge offset class
+* pi4_acc_error_category: pointer to an array to store accumulated error between source and recon
+* pi4_category_count : pointer to an array to store number of peaks,valleys,half peaks and half valleys.
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************/
+void ihevce_get_chroma_eo_sao_params(
+ void *pv_sao_ctxt,
+ WORD32 eo_sao_class,
+ WORD32 *pi4_acc_error_category,
+ WORD32 *pi4_category_count)
+{
+ WORD32 row_start, row_end, col_start, col_end, row, col;
+ WORD32 row_offset = 0, col_offset = 0;
+ WORD32 a, b, c, pel_error, edgeidx;
+ sao_ctxt_t *ps_sao_ctxt = (sao_ctxt_t *)pv_sao_ctxt;
+
+ row_start = 0;
+ row_end = ps_sao_ctxt->i4_sao_blk_ht >> 1;
+ col_start = 0;
+ col_end = ps_sao_ctxt->i4_sao_blk_wd;
+
+ if((ps_sao_ctxt->i4_ctb_x == 0) && (eo_sao_class != SAO_EDGE_90_DEG))
+ {
+ col_start = 2;
+ }
+
+ if(((ps_sao_ctxt->i4_ctb_x + 1) == ps_sao_ctxt->ps_sps->i2_pic_wd_in_ctb) &&
+ (eo_sao_class != SAO_EDGE_90_DEG))
+ {
+ col_end = ps_sao_ctxt->i4_sao_blk_wd - 2;
+ }
+
+ if((ps_sao_ctxt->i4_ctb_y == 0) && (eo_sao_class != SAO_EDGE_0_DEG))
+ {
+ row_start = 1;
+ }
+
+ if(((ps_sao_ctxt->i4_ctb_y + 1) == ps_sao_ctxt->ps_sps->i2_pic_ht_in_ctb) &&
+ (eo_sao_class != SAO_EDGE_0_DEG))
+ {
+ row_end = row_end - 1; //ps_sao_ctxt->i4_sao_blk_ht - 1;
+ }
+
+ if(eo_sao_class == SAO_EDGE_0_DEG)
+ {
+ row_offset = 0;
+ col_offset = 2;
+ }
+ else if(eo_sao_class == SAO_EDGE_90_DEG)
+ {
+ row_offset = 1;
+ col_offset = 0;
+ }
+ else if(eo_sao_class == SAO_EDGE_135_DEG)
+ {
+ row_offset = 1;
+ col_offset = 2;
+ }
+ else if(eo_sao_class == SAO_EDGE_45_DEG)
+ {
+ row_offset = 1;
+ col_offset = -2;
+ }
+
+ for(row = row_start; row < row_end; row++)
+ {
+ for(col = col_start; col < col_end; col++)
+ {
+ c = ps_sao_ctxt
+ ->pu1_cur_chroma_recon_buf[col + row * ps_sao_ctxt->i4_cur_chroma_recon_stride];
+ a = ps_sao_ctxt->pu1_cur_chroma_recon_buf
+ [(col - col_offset) +
+ (row - row_offset) * ps_sao_ctxt->i4_cur_chroma_recon_stride];
+ b = ps_sao_ctxt->pu1_cur_chroma_recon_buf
+ [(col + col_offset) +
+ (row + row_offset) * ps_sao_ctxt->i4_cur_chroma_recon_stride];
+ pel_error =
+ ps_sao_ctxt
+ ->pu1_cur_chroma_src_buf[col + row * ps_sao_ctxt->i4_cur_chroma_src_stride] -
+ ps_sao_ctxt
+ ->pu1_cur_chroma_recon_buf[col + row * ps_sao_ctxt->i4_cur_chroma_recon_stride];
+ edgeidx = 2 + SIGN(c - a) + SIGN(c - b);
+
+ if(pel_error != 0)
+ {
+ pi4_acc_error_category[edgeidx] += pel_error;
+ pi4_category_count[edgeidx]++;
+ }
+ }
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Gets edge offset params
+*
+* @par Description:
+* Given the ctb and sao angle this function will calculate accumulated
+* error between source and recon and the coresponding count for 4 edge
+* indexes one each for peak,valley, half peak and half valley.
+*
+* @param[in]
+* ps_sao_ctxt: Pointer to SAO context
+* eo_sao_class: specifies edge offset class
+* pi4_acc_error_category: pointer to an array to store accumulated error between source and recon
+* pi4_category_count : pointer to an array to store number of peaks,valleys,half peaks and half valleys.
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************/
+void ihevce_get_luma_eo_sao_params(
+ void *pv_sao_ctxt,
+ WORD32 eo_sao_class,
+ WORD32 *pi4_acc_error_category,
+ WORD32 *pi4_category_count)
+{
+ WORD32 row_start, row_end, col_start, col_end, row, col;
+ WORD32 row_offset = 0, col_offset = 0;
+ WORD32 a, b, c, pel_error, edgeidx;
+ sao_ctxt_t *ps_sao_ctxt = (sao_ctxt_t *)pv_sao_ctxt;
+
+ row_start = 0;
+ row_end = ps_sao_ctxt->i4_sao_blk_ht;
+ col_start = 0;
+ col_end = ps_sao_ctxt->i4_sao_blk_wd;
+
+ if((ps_sao_ctxt->i4_ctb_x == 0) && (eo_sao_class != SAO_EDGE_90_DEG))
+ {
+ col_start = 1;
+ }
+
+ if(((ps_sao_ctxt->i4_ctb_x + 1) == ps_sao_ctxt->ps_sps->i2_pic_wd_in_ctb) &&
+ (eo_sao_class != SAO_EDGE_90_DEG))
+ {
+ col_end = ps_sao_ctxt->i4_sao_blk_wd - 1;
+ }
+
+ if((ps_sao_ctxt->i4_ctb_y == 0) && (eo_sao_class != SAO_EDGE_0_DEG))
+ {
+ row_start = 1;
+ }
+
+ if(((ps_sao_ctxt->i4_ctb_y + 1) == ps_sao_ctxt->ps_sps->i2_pic_ht_in_ctb) &&
+ (eo_sao_class != SAO_EDGE_0_DEG))
+ {
+ row_end = ps_sao_ctxt->i4_sao_blk_ht - 1;
+ }
+
+ if(eo_sao_class == SAO_EDGE_0_DEG)
+ {
+ row_offset = 0;
+ col_offset = 1;
+ }
+ else if(eo_sao_class == SAO_EDGE_90_DEG)
+ {
+ row_offset = 1;
+ col_offset = 0;
+ }
+ else if(eo_sao_class == SAO_EDGE_135_DEG)
+ {
+ row_offset = 1;
+ col_offset = 1;
+ }
+ else if(eo_sao_class == SAO_EDGE_45_DEG)
+ {
+ row_offset = 1;
+ col_offset = -1;
+ }
+
+ for(row = row_start; row < row_end; row++)
+ {
+ for(col = col_start; col < col_end; col++)
+ {
+ c = ps_sao_ctxt
+ ->pu1_cur_luma_recon_buf[col + row * ps_sao_ctxt->i4_cur_luma_recon_stride];
+ a = ps_sao_ctxt->pu1_cur_luma_recon_buf
+ [(col - col_offset) +
+ (row - row_offset) * ps_sao_ctxt->i4_cur_luma_recon_stride];
+ b = ps_sao_ctxt->pu1_cur_luma_recon_buf
+ [(col + col_offset) +
+ (row + row_offset) * ps_sao_ctxt->i4_cur_luma_recon_stride];
+ pel_error =
+ ps_sao_ctxt->pu1_cur_luma_src_buf[col + row * ps_sao_ctxt->i4_cur_luma_src_stride] -
+ ps_sao_ctxt
+ ->pu1_cur_luma_recon_buf[col + row * ps_sao_ctxt->i4_cur_luma_recon_stride];
+ edgeidx = 2 + SIGN(c - a) + SIGN(c - b);
+
+ if(pel_error != 0)
+ {
+ pi4_acc_error_category[edgeidx] += pel_error;
+ pi4_category_count[edgeidx]++;
+ }
+ }
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_compute_area_of_valid_cus_in_ctb \endif
+*
+* \brief
+*
+*
+*****************************************************************************
+*/
+WORD32 ihevce_compute_area_of_valid_cus_in_ctb(cur_ctb_cu_tree_t *ps_cu_tree)
+{
+ WORD32 i4_area;
+
+ if(NULL == ps_cu_tree)
+ {
+ return 0;
+ }
+
+ if(ps_cu_tree->is_node_valid)
+ {
+ i4_area = ps_cu_tree->u1_cu_size * ps_cu_tree->u1_cu_size;
+ }
+ else
+ {
+ i4_area = ihevce_compute_area_of_valid_cus_in_ctb(ps_cu_tree->ps_child_node_tl) +
+ ihevce_compute_area_of_valid_cus_in_ctb(ps_cu_tree->ps_child_node_tr) +
+ ihevce_compute_area_of_valid_cus_in_ctb(ps_cu_tree->ps_child_node_bl) +
+ ihevce_compute_area_of_valid_cus_in_ctb(ps_cu_tree->ps_child_node_br);
+ }
+
+ return i4_area;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_create_cuNode_children \endif
+*
+* \brief
+*
+*
+*****************************************************************************
+*/
+static WORD32 ihevce_create_cuNode_children(
+ cur_ctb_cu_tree_t *ps_cu_tree_root,
+ cur_ctb_cu_tree_t *ps_cu_tree_cur_node,
+ WORD32 nodes_already_created)
+{
+ cur_ctb_cu_tree_t *ps_tl;
+ cur_ctb_cu_tree_t *ps_tr;
+ cur_ctb_cu_tree_t *ps_bl;
+ cur_ctb_cu_tree_t *ps_br;
+
+ ps_tl = ps_cu_tree_root + nodes_already_created;
+ ps_tr = ps_tl + 1;
+ ps_bl = ps_tr + 1;
+ ps_br = ps_bl + 1;
+ /*
+ ps_tl = (ai4_child_node_enable[0]) ? ps_tl : NULL;
+ ps_tr = (ai4_child_node_enable[1]) ? ps_tr : NULL;
+ ps_bl = (ai4_child_node_enable[2]) ? ps_bl : NULL;
+ ps_br = (ai4_child_node_enable[3]) ? ps_br : NULL;
+ */
+ ps_cu_tree_cur_node->ps_child_node_tl = ps_tl;
+ ps_cu_tree_cur_node->ps_child_node_tr = ps_tr;
+ ps_cu_tree_cur_node->ps_child_node_bl = ps_bl;
+ ps_cu_tree_cur_node->ps_child_node_br = ps_br;
+
+ return 4;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_cu_tree_init \endif
+*
+* \brief
+*
+*
+*****************************************************************************
+*/
+void ihevce_cu_tree_init(
+ cur_ctb_cu_tree_t *ps_cu_tree,
+ cur_ctb_cu_tree_t *ps_cu_tree_root,
+ WORD32 *pi4_nodes_created_in_cu_tree,
+ WORD32 tree_depth,
+ CU_POS_T e_grandparent_blk_pos,
+ CU_POS_T e_parent_blk_pos,
+ CU_POS_T e_cur_blk_pos)
+{
+ WORD32 cu_pos_x = 0;
+ WORD32 cu_pos_y = 0;
+ WORD32 cu_size = 0;
+
+ WORD32 children_nodes_required = 1;
+ WORD32 node_validity = 0;
+
+ switch(tree_depth)
+ {
+ case 0:
+ {
+ /* 64x64 block */
+ cu_size = 64;
+ cu_pos_x = 0;
+ cu_pos_y = 0;
+
+ break;
+ }
+ case 1:
+ {
+ /* 32x32 block */
+ cu_size = 32;
+
+ /* Explanation for logic below - */
+ /* * pos_x and pos_y are in units of 8x8 CU's */
+ /* * pos_x = 0 for TL and BL children */
+ /* * pos_x = 4 for TR and BR children */
+ /* * pos_y = 0 for TL and TR children */
+ /* * pos_y = 4 for BL and BR children */
+ cu_pos_x = (e_cur_blk_pos & 1) << 2;
+ cu_pos_y = (e_cur_blk_pos & 2) << 1;
+
+ break;
+ }
+ case 2:
+ {
+ /* 16x16 block */
+ WORD32 cu_pos_x_parent;
+ WORD32 cu_pos_y_parent;
+
+ cu_size = 16;
+
+ /* Explanation for logic below - */
+ /* See similar explanation above */
+ cu_pos_x_parent = (e_parent_blk_pos & 1) << 2;
+ cu_pos_y_parent = (e_parent_blk_pos & 2) << 1;
+ cu_pos_x = cu_pos_x_parent + ((e_cur_blk_pos & 1) << 1);
+ cu_pos_y = cu_pos_y_parent + (e_cur_blk_pos & 2);
+
+ break;
+ }
+ case 3:
+ {
+ /* 8x8 block */
+ WORD32 cu_pos_x_grandparent;
+ WORD32 cu_pos_y_grandparent;
+
+ WORD32 cu_pos_x_parent;
+ WORD32 cu_pos_y_parent;
+
+ cu_size = 8;
+
+ cu_pos_x_grandparent = (e_grandparent_blk_pos & 1) << 2;
+ cu_pos_y_grandparent = (e_grandparent_blk_pos & 2) << 1;
+ cu_pos_x_parent = cu_pos_x_grandparent + ((e_parent_blk_pos & 1) << 1);
+ cu_pos_y_parent = cu_pos_y_grandparent + (e_parent_blk_pos & 2);
+ cu_pos_x = cu_pos_x_parent + (e_cur_blk_pos & 1);
+ cu_pos_y = cu_pos_y_parent + ((e_cur_blk_pos & 2) >> 1);
+
+ children_nodes_required = 0;
+
+ break;
+ }
+ }
+
+ /* Fill the current cu_tree node */
+ CU_TREE_NODE_FILL(ps_cu_tree, node_validity, cu_pos_x, cu_pos_y, cu_size, 1);
+
+ if(children_nodes_required)
+ {
+ tree_depth++;
+
+ (*pi4_nodes_created_in_cu_tree) += ihevce_create_cuNode_children(
+ ps_cu_tree_root, ps_cu_tree, (*pi4_nodes_created_in_cu_tree));
+
+ ihevce_cu_tree_init(
+ ps_cu_tree->ps_child_node_tl,
+ ps_cu_tree_root,
+ pi4_nodes_created_in_cu_tree,
+ tree_depth,
+ e_parent_blk_pos,
+ e_cur_blk_pos,
+ POS_TL);
+
+ ihevce_cu_tree_init(
+ ps_cu_tree->ps_child_node_tr,
+ ps_cu_tree_root,
+ pi4_nodes_created_in_cu_tree,
+ tree_depth,
+ e_parent_blk_pos,
+ e_cur_blk_pos,
+ POS_TR);
+
+ ihevce_cu_tree_init(
+ ps_cu_tree->ps_child_node_bl,
+ ps_cu_tree_root,
+ pi4_nodes_created_in_cu_tree,
+ tree_depth,
+ e_parent_blk_pos,
+ e_cur_blk_pos,
+ POS_BL);
+
+ ihevce_cu_tree_init(
+ ps_cu_tree->ps_child_node_br,
+ ps_cu_tree_root,
+ pi4_nodes_created_in_cu_tree,
+ tree_depth,
+ e_parent_blk_pos,
+ e_cur_blk_pos,
+ POS_BR);
+ }
+ else
+ {
+ NULLIFY_THE_CHILDREN_NODES(ps_cu_tree);
+ }
+}
diff --git a/encoder/ihevce_common_utils.h b/encoder/ihevce_common_utils.h
new file mode 100644
index 0000000..08f6f02
--- /dev/null
+++ b/encoder/ihevce_common_utils.h
@@ -0,0 +1,217 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file ihevce_common_utils.h
+*
+* @brief
+* Contains the declarations and definitions of common utils for encoder
+*
+* @author
+* ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_COMMON_UTILS_H_
+#define _IHEVCE_COMMON_UTILS_H_
+
+#include <math.h>
+#include <limits.h>
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+* @macro IHEVCE_WT_PRED
+* @brief Implements wt pred formula as per spec
+******************************************************************************
+*/
+#define IHEVCE_WT_PRED(p0, p1, w0, w1, rnd, shift) \
+ (((((WORD32)w0) * ((WORD32)p0) + ((WORD32)w1) * ((WORD32)p1)) >> shift) + rnd)
+
+#define SORT_PRIMARY_INTTYPE_ARRAY_AND_REORDER_GENERIC_COMPANION_ARRAY( \
+ primary_array, companion_array, array_length, type_companion) \
+ { \
+ WORD32 i, j; \
+ \
+ for(i = 0; i < (array_length - 1); i++) \
+ { \
+ for(j = (i + 1); j < array_length; j++) \
+ { \
+ if(primary_array[i] > primary_array[j]) \
+ { \
+ type_companion t; \
+ \
+ SWAP(primary_array[i], primary_array[j]); \
+ \
+ t = companion_array[i]; \
+ companion_array[i] = companion_array[j]; \
+ companion_array[j] = t; \
+ } \
+ } \
+ } \
+ }
+
+#define SORT_PRIMARY_INTTYPE_ARRAY_AND_REORDER_INTTYPE_COMPANION_ARRAY( \
+ primary_array, companion_array, array_length) \
+ { \
+ WORD32 i, j; \
+ \
+ for(i = 0; i < (array_length - 1); i++) \
+ { \
+ for(j = (i + 1); j < array_length; j++) \
+ { \
+ if(primary_array[i] > primary_array[j]) \
+ { \
+ type_companion t; \
+ \
+ SWAP(primary_array[i], primary_array[j]); \
+ SWAP(companion_array[i], companion_array[j]); \
+ } \
+ } \
+ } \
+ }
+
+#define SORT_INTTYPE_ARRAY(primary_array, array_length) \
+ { \
+ WORD32 i, j; \
+ \
+ for(i = 0; i < (array_length - 1); i++) \
+ { \
+ for(j = (i + 1); j < array_length; j++) \
+ { \
+ if(primary_array[i] > primary_array[j]) \
+ { \
+ SWAP(primary_array[i], primary_array[j]); \
+ } \
+ } \
+ } \
+ }
+
+#define SET_BIT(x, bitpos) ((x) | (1 << (bitpos)))
+
+#define CLEAR_BIT(x, bitpos) ((x) & (~(1 << (bitpos))))
+
+#define CU_TREE_NODE_FILL(ps_node, valid_flag, posx, posy, size, inter_eval_enable) \
+ { \
+ ps_node->is_node_valid = valid_flag; \
+ ps_node->u1_cu_size = size; \
+ ps_node->u1_intra_eval_enable = 0; \
+ ps_node->b3_cu_pos_x = posx; \
+ ps_node->b3_cu_pos_y = posy; \
+ ps_node->u1_inter_eval_enable = inter_eval_enable; \
+ }
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+void ihevce_copy_2d(
+ UWORD8 *pu1_dst,
+ WORD32 dst_stride,
+ UWORD8 *pu1_src,
+ WORD32 src_stride,
+ WORD32 blk_wd,
+ WORD32 blk_ht);
+
+void ihevce_2d_square_copy_luma(
+ void *p_dst,
+ WORD32 dst_strd,
+ void *p_src,
+ WORD32 src_strd,
+ WORD32 num_cols_to_copy,
+ WORD32 unit_size);
+
+void ihevce_wt_avg_2d(
+ UWORD8 *pu1_pred0,
+ UWORD8 *pu1_pred1,
+ WORD32 pred0_strd,
+ WORD32 pred1_strd,
+ WORD32 wd,
+ WORD32 ht,
+ UWORD8 *pu1_dst,
+ WORD32 dst_strd,
+ WORD32 w0,
+ WORD32 w1,
+ WORD32 o0,
+ WORD32 o1,
+ WORD32 log_wdc);
+
+void ihevce_itrans_recon_dc(
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ UWORD8 *pu1_dst,
+ WORD32 dst_strd,
+ WORD32 trans_size,
+ WORD16 i2_deq_value,
+ CHROMA_PLANE_ID_T e_chroma_plane);
+
+WORD32 ihevce_find_num_clusters_of_identical_points_1D(
+ UWORD8 *pu1_inp_array,
+ UWORD8 *pu1_out_array,
+ UWORD8 *pu1_freq_of_out_data_in_inp,
+ WORD32 i4_num_inp_array_elements);
+
+void ihevce_scale_mv(mv_t *ps_mv, WORD32 i4_poc_to, WORD32 i4_poc_from, WORD32 i4_curr_poc);
+
+WORD32 ihevce_compare_pu_mv_t(
+ pu_mv_t *ps_1, pu_mv_t *ps_2, WORD32 i4_pred_mode_1, WORD32 i4_pred_mode_2);
+
+void ihevce_set_pred_buf_as_free(UWORD32 *pu4_idx_array, UWORD8 u1_buf_id);
+
+UWORD8 ihevce_get_free_pred_buf_indices(
+ UWORD8 *pu1_idx_array, UWORD32 *pu4_bitfield, UWORD8 u1_num_bufs_requested);
+
+WORD32 ihevce_osal_init(void *pv_hle_ctxt);
+
+WORD32 ihevce_osal_delete(void *pv_hle_ctxt);
+
+static INLINE UWORD32 ihevce_num_ones_generic(UWORD32 bitfield)
+{
+ bitfield = bitfield - ((bitfield >> 1) & 0x55555555);
+ bitfield = (bitfield & 0x33333333) + ((bitfield >> 2) & 0x33333333);
+ return (((bitfield + (bitfield >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24;
+}
+
+static INLINE UWORD32 ihevce_num_ones_popcnt(UWORD32 bitfield)
+{
+ return __builtin_popcount(bitfield);
+}
+
+WORD32 ihevce_compute_area_of_valid_cus_in_ctb(cur_ctb_cu_tree_t *ps_cu_tree);
+
+void ihevce_cu_tree_init(
+ cur_ctb_cu_tree_t *ps_cu_tree,
+ cur_ctb_cu_tree_t *ps_cu_tree_root,
+ WORD32 *pi4_nodes_created_in_cu_tree,
+ WORD32 tree_depth,
+ CU_POS_T e_grandparent_blk_pos,
+ CU_POS_T e_parent_blk_pos,
+ CU_POS_T e_cur_blk_pos);
+
+#endif /* _IHEVCE_COMMON_UTILS_H_ */
diff --git a/encoder/ihevce_deblk.c b/encoder/ihevce_deblk.c
new file mode 100644
index 0000000..5e37a71
--- /dev/null
+++ b/encoder/ihevce_deblk.c
@@ -0,0 +1,685 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevce_deblk.c
+*
+* @brief
+* Contains definition for the ctb level deblk function
+*
+* @author
+* ittiam
+*
+* @List of Functions:
+* ihevce_deblk_populate_qp_map()
+* ihevce_deblk_ctb()
+* ihevce_hbd_deblk_ctb()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_debug.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_deblk_tables.h"
+#include "ihevc_common_tables.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_hle_interface.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_common_utils.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_deblk.h"
+#include "ihevce_tile_interface.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_deblk_populate_qp_map \endif
+*
+* \brief
+*
+*
+*****************************************************************************
+*/
+void ihevce_deblk_populate_qp_map(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ deblk_ctbrow_prms_t *ps_deblk_ctb_row_params,
+ ctb_enc_loop_out_t *ps_ctb_out_dblk,
+ WORD32 vert_ctr,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ ihevce_tile_params_t *ps_col_tile_params)
+{
+ ctb_enc_loop_out_t *ps_ctb_out;
+ WORD32 ctb_ctr, ctb_start, ctb_end;
+ WORD32 tile_qp_offset, tile_qp_size, i4_offset_for_last_cu_qp;
+ /* Create the Qp map for the entire current CTB-row for deblocking purpose(only)*/
+ /* Do this iff cur pic is referred or recon dump is enabled or psnr calc is on*/
+ /*Qp of the last CU of previous CTB row*/
+ WORD8 i1_last_cu_qp;
+ /*A pointer pointing to the top 4x4 block's Qp for all CTb rows*/
+ WORD8 *pi1_qp_top_4x4_ctb_row =
+ ps_deblk_ctb_row_params->api1_qp_top_4x4_ctb_row[ps_ctxt->i4_enc_frm_id] +
+ (ps_deblk_ctb_row_params->u4_qp_top_4x4_buf_size * ps_ctxt->i4_bitrate_instance_num);
+
+ UWORD32 u4_qp_top_4x4_buf_strd = ps_deblk_ctb_row_params->u4_qp_top_4x4_buf_strd;
+
+ /*The Qp map which has to be populated*/
+ UWORD32 u4_qp_buffer_stride = ps_deblk_ctb_row_params->u4_qp_buffer_stride;
+ WORD8 *pi1_ctb_tile_qp = ps_deblk_ctb_row_params->pi1_ctb_row_qp;
+
+ /*Temporary pointers to Qp map at CTB level*/
+ WORD8 *pi1_ctb_qp_map_tile;
+
+ i4_offset_for_last_cu_qp = ps_ctxt->pi4_offset_for_last_cu_qp[ps_ctxt->i4_tile_col_idx];
+ /* total QPs to be copied for current row is : */
+ tile_qp_size = i4_offset_for_last_cu_qp + 1;
+ /*Pointing to the first CTB of current CTB row*/
+ ps_ctb_out = ps_ctb_out_dblk;
+ /* Offset req. for the row QP to the tile start */
+ tile_qp_offset = ps_col_tile_params->i4_first_ctb_x * (ps_frm_ctb_prms->i4_ctb_size / 4);
+
+ ctb_start = ps_col_tile_params->i4_first_ctb_x;
+ ctb_end =
+ (ps_col_tile_params->i4_first_ctb_x + ps_col_tile_params->i4_curr_tile_wd_in_ctb_unit);
+
+ if(vert_ctr) /*Not first CTB row of frame*/
+ {
+ /*copy from top4x4_array data stored by upper CTB-row to qp-map*/
+ memcpy(
+ pi1_ctb_tile_qp,
+ (pi1_qp_top_4x4_ctb_row + (vert_ctr - 1) * u4_qp_top_4x4_buf_strd + tile_qp_offset),
+ tile_qp_size);
+ }
+
+ /*pu1_ctb_row_qp points to top4x4 row in Qp-map.
+ Now pointing pu1_ctb_qp_map to cur 4x4 row*/
+ pi1_ctb_qp_map_tile = pi1_ctb_tile_qp + u4_qp_buffer_stride;
+
+ /* This i1_last_cu_qp will be conditionally overwritten later */
+ i1_last_cu_qp = ps_ctxt->i4_frame_qp;
+
+ /* -- Loop over all the CTBs in a CTB-row for populating the Qp-map ----- */
+ for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
+ {
+ WORD32 cu_ctr;
+ cu_enc_loop_out_t *ps_curr_cu;
+
+ /* Update i1_last_cu_qp based on CTB's position in tile */
+ update_last_coded_cu_qp(
+ (ps_deblk_ctb_row_params->pi1_ctb_row_qp + i4_offset_for_last_cu_qp),
+ ps_ctxt->i1_entropy_coding_sync_enabled_flag,
+ ps_frm_ctb_prms,
+ ps_ctxt->i4_frame_qp,
+ vert_ctr,
+ ctb_ctr,
+ &i1_last_cu_qp);
+
+ /* store the pointer of first cu of current ctb */
+ ps_curr_cu = ps_ctb_out->ps_enc_cu;
+
+ /* --------- loop over all the CUs in the CTB --------------- */
+ for(cu_ctr = 0; cu_ctr < ps_ctb_out->u1_num_cus_in_ctb; cu_ctr++)
+ {
+ UWORD8 u1_vert_4x4, u1_horz_4x4; //for_loop counters
+ WORD8 *pi1_cu_qp_map;
+
+ WORD8 i1_qp, i1_qp_left, i1_qp_top;
+
+ pi1_cu_qp_map = pi1_ctb_qp_map_tile +
+ (ps_curr_cu->b3_cu_pos_y * 2) * u4_qp_buffer_stride +
+ (ps_curr_cu->b3_cu_pos_x * 2);
+
+ /*If the current CU is coded in skip_mode/zero_CBF then
+ for deblocking, Qp of the previously coded CU will be used*/
+ if(ps_curr_cu->b1_skip_flag || ps_curr_cu->b1_no_residual_syntax_flag)
+ {
+ if(0 == ps_curr_cu->b3_cu_pos_x)
+ i1_qp_left = i1_last_cu_qp;
+ else
+ i1_qp_left = *(pi1_cu_qp_map - 1);
+
+ if(0 == ps_curr_cu->b3_cu_pos_y)
+ i1_qp_top = i1_last_cu_qp;
+ else
+ i1_qp_top = *(pi1_cu_qp_map - u4_qp_buffer_stride);
+
+ i1_qp = (i1_qp_left + i1_qp_top + 1) / 2;
+
+ if(0 == ps_curr_cu->b1_first_cu_in_qg)
+ {
+ i1_qp = i1_last_cu_qp;
+ }
+ }
+ else
+ {
+ i1_qp = ps_curr_cu->i1_cu_qp;
+ }
+
+ i1_last_cu_qp = i1_qp;
+
+ /*---- Loop for populating Qp map for the current CU -------*/
+ for(u1_vert_4x4 = 0; u1_vert_4x4 < (ps_curr_cu->b4_cu_size * 2); u1_vert_4x4++)
+ {
+ for(u1_horz_4x4 = 0; u1_horz_4x4 < (ps_curr_cu->b4_cu_size * 2); u1_horz_4x4++)
+ {
+ pi1_cu_qp_map[u1_horz_4x4] = i1_qp;
+ }
+ pi1_cu_qp_map += u4_qp_buffer_stride;
+ }
+ /*Update Qp-map ptr. Qp map is at 4x4 level but b4_cu_size is at 8x8 level*/
+ ps_curr_cu++;
+ }
+ pi1_ctb_qp_map_tile += (ps_frm_ctb_prms->i4_ctb_size / 4); //one qp per 4x4 block.
+ ps_ctb_out++;
+
+ } //for(ctb_ctr = 0; ctb_ctr < num_ctbs_horz; ctb_ctr++)
+
+ /*fill into the top4x4_array Qp for the lower CTB-row from bottom part of cur CTB row*/
+ memcpy(
+ (pi1_qp_top_4x4_ctb_row + vert_ctr * u4_qp_top_4x4_buf_strd + tile_qp_offset),
+ (pi1_ctb_tile_qp + (ps_frm_ctb_prms->i4_ctb_size / 4) * u4_qp_buffer_stride),
+ tile_qp_size);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Deblock CTB level function.
+*
+* @par Description:
+* For a given CTB, deblocking on both vertical and
+* horizontal edges is done. Both the luma and chroma
+* blocks are processed
+*
+* @param[in]
+* ps_deblk: Pointer to the deblock context
+* last_col: if the CTB is the last CTB of current CTB-row value is 1 else 0
+* ps_deblk_ctb_row_params: deblk ctb row params
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+void ihevce_deblk_ctb(
+ deblk_ctb_params_t *ps_deblk, WORD32 last_col, deblk_ctbrow_prms_t *ps_deblk_ctb_row_params)
+{
+ WORD32 ctb_size;
+ UWORD32 u4_bs;
+ WORD32 bs_lz; /*Leading zeros in boundary strength*/
+ WORD32 qp_p, qp_q;
+ UWORD8 *pu1_src;
+ UWORD8 *pu1_src_uv;
+ UWORD8 *pu1_curr_src;
+ WORD32 col_size;
+ WORD32 col, row, i4_edge_count;
+ WORD32 num_columns_for_vert_filt;
+ WORD32 num_blks_for_vert_filt;
+ WORD32 num_rows_for_horz_filt;
+
+ ihevc_deblk_chroma_horz_ft *pf_deblk_chroma_horz;
+ ihevc_deblk_chroma_horz_ft *pf_deblk_chroma_vert;
+
+ /* Filter flags are packed along with the qp info.
+ 6 out of the 8 bits correspond to qp and 1 to filter flag. */
+ /* filter_p and filter_q are initialized to 1.
+ They are to be extracted along with the qp info. */
+ WORD32 filter_p, filter_q;
+ WORD8 *pi1_ctb_row_qp_p, *pi1_ctb_row_qp_temp;
+ WORD8 *pi1_ctb_row_qp_q;
+
+ func_selector_t *ps_func_slector = ps_deblk->ps_func_selector;
+
+ WORD32 left_luma_edge_filter_flag = ps_deblk->i4_deblock_left_ctb_edge;
+ WORD32 top_luma_edge_filter_flag = ps_deblk->i4_deblock_top_ctb_edge;
+ WORD32 left_chroma_edge_filter_flag = ps_deblk->i4_deblock_left_ctb_edge;
+ WORD32 top_chroma_edge_filter_flag = ps_deblk->i4_deblock_top_ctb_edge;
+ UWORD32 *bs_vert = ps_deblk_ctb_row_params->pu4_ctb_row_bs_vert;
+ UWORD32 *bs_horz = ps_deblk_ctb_row_params->pu4_ctb_row_bs_horz;
+ UWORD32 *bs_vert_uv = bs_vert;
+ UWORD32 *bs_horz_uv = bs_horz;
+ UWORD32 u4_qp_buffer_stride = ps_deblk_ctb_row_params->u4_qp_buffer_stride;
+ UWORD8 u1_is_422 = (ps_deblk->u1_chroma_array_type == 2);
+
+ if(u1_is_422)
+ {
+ pf_deblk_chroma_horz = ps_func_slector->ihevc_deblk_422chroma_horz_fptr;
+ pf_deblk_chroma_vert = ps_func_slector->ihevc_deblk_422chroma_vert_fptr;
+ }
+ else
+ {
+ pf_deblk_chroma_horz = ps_func_slector->ihevc_deblk_chroma_horz_fptr;
+ pf_deblk_chroma_vert = ps_func_slector->ihevc_deblk_chroma_vert_fptr;
+ }
+
+ ctb_size = ps_deblk->i4_ctb_size;
+
+ /* The PCM filter flag and bypass trans flag are always set to 1 in encoder profile */
+ /* Can be removed during optimization */
+ filter_q = 1;
+ filter_p = 1;
+
+ //////////////////////////////////////////////////////////////////////////////
+ /* Luma Veritcal Edge */
+ pu1_src = ps_deblk->pu1_ctb_y;
+ pi1_ctb_row_qp_temp = ps_deblk_ctb_row_params->pi1_ctb_row_qp + u4_qp_buffer_stride;
+ num_columns_for_vert_filt = ctb_size / 8;
+ num_blks_for_vert_filt = ctb_size / 4;
+
+ for(i4_edge_count = 0; i4_edge_count < num_columns_for_vert_filt; i4_edge_count++)
+ {
+ u4_bs = *bs_vert;
+ /* get the current 4x4 vertical pointer */
+ pu1_curr_src = pu1_src;
+ pi1_ctb_row_qp_q = pi1_ctb_row_qp_temp + (i4_edge_count << 1);
+
+ /* If the current edge is not the 1st edge of frame or slice */
+ if(1 == left_luma_edge_filter_flag)
+ {
+ for(row = 0; row < num_blks_for_vert_filt;)
+ {
+ bs_lz = CLZ(u4_bs) >> 1;
+ /* If BS = 0, skip the egde filtering */
+ if(0 != bs_lz)
+ {
+ u4_bs = u4_bs << (bs_lz << 1);
+ pu1_curr_src += ((bs_lz << 2) * ps_deblk->i4_luma_pic_stride);
+ pi1_ctb_row_qp_q += (bs_lz * u4_qp_buffer_stride);
+ row += bs_lz;
+ continue;
+ }
+ qp_p = *(pi1_ctb_row_qp_q - 1);
+ qp_q = *pi1_ctb_row_qp_q;
+
+ ps_func_slector->ihevc_deblk_luma_vert_fptr(
+ pu1_curr_src,
+ ps_deblk->i4_luma_pic_stride,
+ (u4_bs >> 30), /* bits 31 and 30 are extracted */
+ qp_p,
+ qp_q,
+ ps_deblk->i4_beta_offset_div2,
+ ps_deblk->i4_tc_offset_div2,
+ filter_p,
+ filter_q);
+
+ u4_bs = u4_bs << 2;
+ pu1_curr_src += (ps_deblk->i4_luma_pic_stride << 2);
+ pi1_ctb_row_qp_q += u4_qp_buffer_stride;
+ row++;
+ }
+ }
+
+ /* Increment the boundary strength and src pointer for the next column */
+ bs_vert += 1;
+ pu1_src += 8;
+
+ /* Enable for the next edges of ctb*/
+ left_luma_edge_filter_flag = 1;
+ }
+
+ //////////////////////////////////////////////////////////////////////////////
+ /* Chroma Veritcal Edge */
+ pu1_src_uv = ps_deblk->pu1_ctb_uv;
+ pi1_ctb_row_qp_temp = ps_deblk_ctb_row_params->pi1_ctb_row_qp + u4_qp_buffer_stride;
+
+ /* Column spacing is 4 for each chroma component */
+ /* and hence 8 when they are interleaved. */
+ /* But, only those columns with a x co-ordinate */
+ /* that is divisiblee by 8 are filtered */
+ /* Hence, denominator is 16 */
+ num_columns_for_vert_filt = ctb_size / 16;
+ /* blk_size is 4 and chroma_ctb_height is ctb_size/2 */
+ num_blks_for_vert_filt = (0 == u1_is_422) ? (ctb_size / 2) / 4 : (ctb_size) / 4;
+
+ for(i4_edge_count = 0; i4_edge_count < num_columns_for_vert_filt; i4_edge_count++)
+ {
+ /* Every alternate boundary strength value is used for 420 chroma */
+ u4_bs = *(bs_vert_uv) & ((0 == u1_is_422) ? 0x88888888 : 0xaaaaaaaa);
+ pu1_curr_src = pu1_src_uv;
+ pi1_ctb_row_qp_q = pi1_ctb_row_qp_temp + (i4_edge_count << 2);
+
+ /* If the current edge is not the 1st edge of frame or slice */
+ if(1 == left_chroma_edge_filter_flag)
+ {
+ /* Each 'bs' is 2 bits long */
+ /* The divby4 in 420 is */
+ /* necessitated by the fact that */
+ /* chroma ctb_ht is half that of luma */
+ WORD32 i4_log2_num_bits_per_bs = ((0 == u1_is_422) + 1);
+ /* i4_sub_heightC = 2 for 420 */
+ /* i4_sub_heightC = 1 for 422 */
+ WORD32 i4_sub_heightC = i4_log2_num_bits_per_bs;
+
+ for(row = 0; row < num_blks_for_vert_filt;)
+ {
+ bs_lz = CLZ(u4_bs) >> i4_log2_num_bits_per_bs;
+
+ /* If BS = 0, skip the egde filtering */
+ if(0 != bs_lz)
+ {
+ row += bs_lz;
+ u4_bs = u4_bs << (bs_lz << i4_log2_num_bits_per_bs);
+ /* '<<2' because of blk_size being 4x4 */
+ pu1_curr_src += ((bs_lz << 2) * ps_deblk->i4_chroma_pic_stride);
+
+ /* In 420, every alternate QP row is skipped, because chroma height */
+ /* In 422, no row is skipped */
+ pi1_ctb_row_qp_q += ((u4_qp_buffer_stride << (i4_sub_heightC - 1)) * bs_lz);
+
+ continue;
+ }
+
+ qp_p = *(pi1_ctb_row_qp_q - i4_sub_heightC);
+ qp_q = *pi1_ctb_row_qp_q;
+
+ pf_deblk_chroma_vert(
+ pu1_curr_src,
+ ps_deblk->i4_chroma_pic_stride,
+ qp_p,
+ qp_q,
+ ps_deblk->i4_cb_qp_indx_offset,
+ ps_deblk->i4_cr_qp_indx_offset,
+ ps_deblk->i4_tc_offset_div2,
+ filter_p,
+ filter_q);
+
+ u4_bs = u4_bs << (1 << i4_log2_num_bits_per_bs);
+ pu1_curr_src += (ps_deblk->i4_chroma_pic_stride << 2);
+ pi1_ctb_row_qp_q += (u4_qp_buffer_stride << (i4_sub_heightC - 1));
+ row++;
+ }
+ }
+ /* Increment the boundary strength by 2 and src pointer for the next column */
+ /* As the edge filtering happens for alternate column */
+ bs_vert_uv += 2;
+ pu1_src_uv += 16;
+ left_chroma_edge_filter_flag = 1;
+ }
+
+ //////////////////////////////////////////////////////////////////////////////
+
+ /* Luma Horizontal Edge */
+ pu1_src = ps_deblk->pu1_ctb_y;
+ col_size = ctb_size / 4;
+
+ /* If the ctb is the 1st ctb of row, */
+ /* Decrement the loop count to exclude filtering of last 4 pixels */
+ /* else shift the src pointer by 4 pixels to do filtering for shifted ctb */
+ if(ps_deblk->i4_deblock_left_ctb_edge == 1)
+ {
+ pu1_src -= 4;
+ /*If the ctb is at the horizonatl end of PIC*/
+ /* Increase the column size to filter last 4 pixels */
+ col_size += last_col;
+ }
+ else if(!last_col)
+ {
+ col_size -= 1;
+ }
+ {
+ UWORD8 *pu1_src_temp = pu1_src;
+ //pu1_ctb_row_qp_p and pu1_ctb_row_qp_q point to alternate rows
+ pi1_ctb_row_qp_p = ps_deblk_ctb_row_params->pi1_ctb_row_qp;
+
+ num_rows_for_horz_filt = ctb_size / 8;
+
+ for(i4_edge_count = 0; i4_edge_count < num_rows_for_horz_filt; i4_edge_count++)
+ {
+ WORD32 col_size_temp = col_size;
+ pi1_ctb_row_qp_q = pi1_ctb_row_qp_p + u4_qp_buffer_stride;
+ pu1_src = pu1_src_temp + (i4_edge_count * 8 * ps_deblk->i4_luma_pic_stride);
+
+ if(1 == top_luma_edge_filter_flag)
+ {
+ //Deblock the last vertical_4x4_column of previous CTB
+ if(ps_deblk->i4_deblock_left_ctb_edge == 1)
+ {
+ u4_bs = ps_deblk->au1_prev_bs[i4_edge_count] & 0x3;
+ if(u4_bs != 0)
+ {
+ qp_p = *(pi1_ctb_row_qp_p - 1);
+ qp_q = *(pi1_ctb_row_qp_q - 1);
+
+ ps_func_slector->ihevc_deblk_luma_horz_fptr(
+ pu1_src,
+ ps_deblk->i4_luma_pic_stride,
+ u4_bs,
+ qp_p,
+ qp_q,
+ ps_deblk->i4_beta_offset_div2,
+ ps_deblk->i4_tc_offset_div2,
+ 1,
+ 1);
+ }
+
+ pu1_src += 4;
+ col_size_temp--;
+ }
+ //Start deblocking current CTB
+ u4_bs = *(bs_horz);
+
+ for(col = 0; col < col_size_temp;)
+ {
+ bs_lz = CLZ(u4_bs) >> 1;
+ if(0 != bs_lz)
+ {
+ u4_bs = u4_bs << (bs_lz << 1);
+ pu1_src += 4 * bs_lz;
+ col += bs_lz;
+ continue;
+ }
+ qp_p = *(pi1_ctb_row_qp_p + col);
+ qp_q = *(pi1_ctb_row_qp_q + col);
+
+ ps_func_slector->ihevc_deblk_luma_horz_fptr(
+ pu1_src,
+ ps_deblk->i4_luma_pic_stride,
+ u4_bs >> (sizeof(u4_bs) * 8 - 2),
+ qp_p,
+ qp_q,
+ ps_deblk->i4_beta_offset_div2,
+ ps_deblk->i4_tc_offset_div2,
+ filter_p,
+ filter_q);
+
+ pu1_src += 4;
+ u4_bs = u4_bs << 2;
+ col++;
+ }
+ //Store the last vertical_4x4 column of CTB's info for next CTB deblocking
+ u4_bs = *bs_horz;
+ ps_deblk->au1_prev_bs[i4_edge_count] =
+ (UWORD8)(((u4_bs << ((ctb_size >> 1) - 2))) >> 30);
+ }
+ bs_horz += 1;
+ pi1_ctb_row_qp_p += (u4_qp_buffer_stride << 1);
+ top_luma_edge_filter_flag = 1;
+ }
+ }
+
+ //////////////////////////////////////////////////////////////////////////////
+ /* Chroma Horizontal Edge */
+ pu1_src_uv = ps_deblk->pu1_ctb_uv;
+ col_size = ctb_size / 8;
+
+ /* If the ctb is the 1st ctb of row, */
+ /* Decrement the loop count to exclude filtering of last 4 pixels */
+ /* else shift the src pointer by 8 (uv) pixels to do filtering for shifted ctb */
+ if(ps_deblk->i4_deblock_left_ctb_edge == 1)
+ {
+ pu1_src_uv -= 8;
+
+ /*If the ctb is at the horizonatl end of PIC*/
+ /* Increase the column size to filter last 8 (uv) pixels */
+ col_size += last_col;
+ }
+ else if(!last_col)
+ {
+ col_size--;
+ }
+
+ {
+ UWORD8 *pu1_src_temp = pu1_src_uv;
+
+ //pu1_ctb_row_qp_p and pu1_ctb_row_qp_q point to alternate rows
+ pi1_ctb_row_qp_p = ps_deblk_ctb_row_params->pi1_ctb_row_qp;
+ num_rows_for_horz_filt = ctb_size / ((0 == u1_is_422) ? 16 : 8);
+
+ for(i4_edge_count = 0; i4_edge_count < num_rows_for_horz_filt; i4_edge_count++)
+ {
+ WORD32 col_size_temp = col_size;
+
+ pi1_ctb_row_qp_q = pi1_ctb_row_qp_p + u4_qp_buffer_stride;
+ pu1_src_uv = pu1_src_temp + (i4_edge_count * 8 * ps_deblk->i4_chroma_pic_stride);
+
+ if(1 == top_chroma_edge_filter_flag)
+ {
+ //Deblock the last vertical _4x4_column of previous CTB
+ if(ps_deblk->i4_deblock_left_ctb_edge == 1)
+ {
+ u4_bs = ps_deblk->au1_prev_bs_uv[i4_edge_count] & 0x2;
+
+ if(u4_bs == 2)
+ {
+ qp_p = *(pi1_ctb_row_qp_p - 1);
+ qp_q = *(pi1_ctb_row_qp_q - 1);
+
+ pf_deblk_chroma_horz(
+ pu1_src_uv,
+ ps_deblk->i4_chroma_pic_stride,
+ qp_p,
+ qp_q,
+ ps_deblk->i4_cb_qp_indx_offset,
+ ps_deblk->i4_cr_qp_indx_offset,
+ ps_deblk->i4_tc_offset_div2,
+ 1,
+ 1);
+ }
+
+ pu1_src_uv += 8;
+ col_size_temp--;
+ }
+
+ //Start deblocking current CTB
+ u4_bs = *(bs_horz_uv)&0x88888888;
+
+ for(col = 0; col < col_size_temp;)
+ {
+ bs_lz = CLZ(u4_bs) >> 2;
+
+ if(0 != bs_lz)
+ {
+ u4_bs = u4_bs << (bs_lz << 2);
+ pu1_src_uv += (8 * bs_lz);
+
+ col += bs_lz;
+ continue;
+ }
+
+ qp_p = *(pi1_ctb_row_qp_p + (col << 1));
+ qp_q = *(pi1_ctb_row_qp_q + (col << 1));
+
+ pf_deblk_chroma_horz(
+ pu1_src_uv,
+ ps_deblk->i4_chroma_pic_stride,
+ qp_p,
+ qp_q,
+ ps_deblk->i4_cb_qp_indx_offset,
+ ps_deblk->i4_cr_qp_indx_offset,
+ ps_deblk->i4_tc_offset_div2,
+ filter_p,
+ filter_q);
+
+ pu1_src_uv += 8;
+ u4_bs = u4_bs << 4;
+ col++;
+ }
+
+ //Store the last vertical_4x4 column of CTB's info for next CTB deblocking
+ u4_bs = *bs_horz_uv;
+ ps_deblk->au1_prev_bs_uv[i4_edge_count] =
+ (UWORD8)(((u4_bs << ((ctb_size >> 1) - 4))) >> 30);
+ }
+
+ bs_horz_uv += ((0 == u1_is_422) + 1);
+ pi1_ctb_row_qp_p += (u4_qp_buffer_stride << ((0 == u1_is_422) + 1));
+ top_chroma_edge_filter_flag = 1;
+ }
+ }
+
+ return;
+}
diff --git a/encoder/ihevce_deblk.h b/encoder/ihevce_deblk.h
new file mode 100644
index 0000000..2e8276f
--- /dev/null
+++ b/encoder/ihevce_deblk.h
@@ -0,0 +1,78 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_deblk.h
+*
+* \brief
+* This file contains interface defination of deblock ctb function
+*
+* \date
+* 06/11/2012
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_DEBLK_H_
+#define _IHEVCE_DEBLK_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+void ihevce_deblk_populate_qp_map(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ deblk_ctbrow_prms_t *ps_deblk_ctb_row_params,
+ ctb_enc_loop_out_t *ps_ctb_out_dblk,
+ WORD32 vert_ctr,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ ihevce_tile_params_t *ps_col_tile_params);
+
+void ihevce_deblk_ctb(
+ deblk_ctb_params_t *ps_deblk, WORD32 last_col, deblk_ctbrow_prms_t *ps_deblk_ctb_row_params);
+
+#endif /* _IHEVCE_DEBLK_H_ */
diff --git a/encoder/ihevce_decomp_pre_intra_pass.c b/encoder/ihevce_decomp_pre_intra_pass.c
new file mode 100644
index 0000000..a6bd77e
--- /dev/null
+++ b/encoder/ihevce_decomp_pre_intra_pass.c
@@ -0,0 +1,3600 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+******************************************************************************
+* \file ihevce_decomp_pre_intra_pass.c
+*
+* \brief
+* This file contains definitions related to frame decomposition done during
+* pre intra processing
+*
+* \date
+* 19/02/2013
+*
+* \author
+* Ittiam
+*
+* List of Functions
+* ihevce_intra_populate_mode_bits_cost()
+* ihevce_8x8_sad_computer()
+* ihevce_4x4_sad_computer()
+* ihevce_ed_4x4_find_best_modes()
+* ihevce_ed_calc_4x4_blk()
+* ihevce_ed_calc_8x8_blk()
+* ihevce_ed_calc_incomplete_ctb()
+* ihevce_cu_level_qp_mod()
+* ihevce_ed_calc_ctb()
+* ihevce_ed_frame_init()
+* ihevce_scale_by_2()
+* ihevce_decomp_pre_intra_process_row()
+* ihevce_decomp_pre_intra_process()
+* ihevce_decomp_pre_intra_get_num_mem_recs()
+* ihevce_decomp_pre_intra_get_mem_recs()
+* ihevce_decomp_pre_intra_init()
+* ihevce_decomp_pre_intra_frame_init()
+* ihevce_merge_sort()
+* ihevce_decomp_pre_intra_curr_frame_pre_intra_deinit()
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+#include <limits.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_debug.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_hle_interface.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_ipe_instr_set_router.h"
+#include "ihevce_decomp_pre_intra_structs.h"
+#include "ihevce_decomp_pre_intra_pass.h"
+#include "ihevce_enc_loop_structs.h"
+#include "hme_datatype.h"
+#include "hme_interface.h"
+#include "hme_common_defs.h"
+#include "ihevce_global_tables.h"
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+typedef void (*pf_ed_calc_ctb)(
+ ihevce_ed_ctxt_t *ps_ed_ctxt,
+ ihevce_ed_blk_t *ps_ed_ctb,
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1,
+ UWORD8 *pu1_src,
+ WORD32 src_stride,
+ WORD32 num_4x4_blks_x,
+ WORD32 num_4x4_blks_y,
+ WORD32 *nbr_flags,
+ WORD32 i4_layer_id,
+ WORD32 row_block_no,
+ WORD32 col_block_no,
+ ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list,
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list);
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define SATD_NOISE_FLOOR_THRESHOLD 16
+#define MINIMUM_VARIANCE 15
+#define SCALE_FACTOR_VARIANCE 20
+#define SCALE_FACTOR_VARIANCE_8x8 60
+#define MIN_SATD_THRSHLD 0
+#define MAX_SATD_THRSHLD 64
+#define SUB_NOISE_THRSHLD 0
+#define MIN_BLKS 2
+
+/*****************************************************************************/
+/* Global variables */
+/*****************************************************************************/
+
+/**
+*****************************************************************************
+* @brief list of pointers to luma intra pred functions
+*****************************************************************************
+*/
+pf_intra_pred g_apf_lum_ip[NUM_IP_FUNCS];
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_intra_populate_mode_bits_cost \endif
+*
+* \brief: look-up table of cost of signalling an intra mode in the
+* bitstream
+*
+*****************************************************************************
+*/
+void ihevce_intra_populate_mode_bits_cost(
+ WORD32 top_intra_mode,
+ WORD32 left_intra_mode,
+ WORD32 available_top,
+ WORD32 available_left,
+ WORD32 cu_pos_y,
+ UWORD16 *mode_bits_cost,
+ WORD32 lambda)
+{
+ WORD32 i;
+ // 5.5 * lambda
+ UWORD16 five_bits_cost = COMPUTE_RATE_COST_CLIP30(11, lambda, (LAMBDA_Q_SHIFT + 1));
+
+ (void)top_intra_mode;
+ (void)left_intra_mode;
+ (void)available_top;
+ (void)available_left;
+ (void)cu_pos_y;
+ for(i = 0; i < NUM_MODES; i++)
+ {
+ mode_bits_cost[i] = five_bits_cost;
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_8x8_sad_computer \endif
+*
+* \brief: compute sad between 2 8x8 blocks
+*
+*****************************************************************************
+*/
+UWORD16
+ ihevce_8x8_sad_computer(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 src_strd, WORD32 pred_strd)
+{
+ UWORD16 sad = 0;
+ WORD32 i, j;
+
+ for(i = 0; i < 8; i++)
+ {
+ for(j = 0; j < 8; j++)
+ {
+ sad += ABS(*pu1_src - *pu1_pred);
+ pu1_src++;
+ pu1_pred++;
+ }
+ pu1_src = pu1_src + (src_strd - 8);
+ pu1_pred = pu1_pred + (pred_strd - 8);
+ }
+
+ return sad;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_4x4_sad_computer \endif
+*
+* \brief: compute sad between 2 4x4 blocks
+*
+*****************************************************************************
+*/
+UWORD16
+ ihevce_4x4_sad_computer(UWORD8 *pu1_src, UWORD8 *pu1_pred, WORD32 src_strd, WORD32 pred_strd)
+{
+ UWORD16 sad = 0;
+ WORD32 i, j;
+
+ for(i = 0; i < 4; i++)
+ {
+ for(j = 0; j < 4; j++)
+ {
+ sad += ABS(*pu1_src - *pu1_pred);
+ pu1_src++;
+ pu1_pred++;
+ }
+ pu1_src = pu1_src + (src_strd - 4);
+ pu1_pred = pu1_pred + (pred_strd - 4);
+ }
+
+ return sad;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_ed_4x4_find_best_modes \endif
+*
+* \brief: evaluate input 4x4 block for pre-selected list of angular and normal
+* intra modes and return best sad, cost
+*
+*****************************************************************************
+*/
+void ihevce_ed_4x4_find_best_modes(
+ UWORD8 *pu1_src,
+ WORD32 src_stride,
+ UWORD8 *ref,
+ UWORD16 *mode_bits_cost,
+ UWORD8 *pu1_best_modes,
+ WORD32 *pu1_best_sad_costs,
+ WORD32 u1_low_resol,
+ FT_SAD_COMPUTER *pf_4x4_sad_computer)
+{
+ WORD32 i;
+ UWORD8 mode = 0, best_amode = 0, best_nmode = 0;
+ UWORD8 pred[16];
+ WORD32 sad = 0;
+ WORD32 sad_cost = 0;
+ WORD32 best_asad_cost = 0xFFFFF;
+ WORD32 best_nsad_cost = 0xFFFFF;
+
+ /* If lower layers, l1 or l2, all the 11 modes are evaluated */
+ /* If L0 layer, all modes excluding DC and Planar are evaluated */
+ if(1 == u1_low_resol)
+ i = 0;
+ else
+ i = 2;
+
+ /* Find the best non-angular and angular mode till level 4 */
+ for(; i < 11; i++)
+ {
+ mode = gau1_modes_to_eval[i];
+ g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode);
+ sad = pf_4x4_sad_computer(pu1_src, &pred[0], src_stride, 4);
+ sad_cost = sad;
+ sad_cost += mode_bits_cost[mode];
+ if(mode < 2)
+ {
+ if(sad_cost < best_nsad_cost)
+ {
+ best_nmode = mode;
+ best_nsad_cost = sad_cost;
+ }
+ }
+ else
+ {
+ if(sad_cost < best_asad_cost)
+ {
+ best_amode = mode;
+ best_asad_cost = sad_cost;
+ }
+ }
+ }
+
+ pu1_best_modes[0] = best_amode;
+ pu1_best_sad_costs[0] = best_asad_cost;
+
+ /* Accumalate the best non-angular mode and cost for the l1 and l2 layers */
+ if(1 == u1_low_resol)
+ {
+ pu1_best_modes[1] = best_nmode;
+ pu1_best_sad_costs[1] = best_nsad_cost;
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_ed_calc_4x4_blk \endif
+*
+* \brief: evaluate input 4x4 block for all intra modes and return best sad &
+* cost
+*
+*****************************************************************************
+*/
+static void ihevce_ed_calc_4x4_blk(
+ ihevce_ed_blk_t *ps_ed,
+ UWORD8 *pu1_src,
+ WORD32 src_stride,
+ UWORD8 *ref,
+ UWORD16 *mode_bits_cost,
+ WORD32 *sad_ptr,
+ WORD32 *pi4_best_satd,
+ WORD32 i4_quality_preset,
+ WORD32 *pi4_best_sad_cost,
+ ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list)
+{
+ WORD32 i, i_end;
+ UWORD8 mode, best_amode, best_nmode;
+ UWORD8 pred[16];
+
+ UWORD16 sad;
+ WORD32 sad_cost = 0;
+ WORD32 best_asad_cost = 0xFFFFF;
+ WORD32 best_nsad_cost = 0xFFFFF;
+
+ UWORD8 au1_best_modes[2];
+ WORD32 ai4_best_sad_costs[2];
+
+ /* L1/L2 resolution hence low resolution enable */
+ WORD32 u1_low_resol = 1;
+
+ UWORD8 modes_to_eval[2];
+
+ /* The *pi4_best_satd will be consumed only if current
+ layer has odd number of 4x4 blocks in either x or y
+ direction. But the function hme_derive_num_layers() makes
+ sure that every layer has width and height such that each one
+ is a multiple of 16. Which makes pi4_best_satd useless. Hence
+ feel free to remove pi4_best_satd. Concluded on 29th Aug13 */
+ *pi4_best_satd = -1;
+ ps_ipe_optimised_function_list->pf_ed_4x4_find_best_modes(
+ pu1_src,
+ src_stride,
+ ref,
+ mode_bits_cost,
+ au1_best_modes,
+ ai4_best_sad_costs,
+ u1_low_resol,
+ ps_ipe_optimised_function_list->pf_4x4_sad_computer);
+
+ best_nmode = au1_best_modes[1];
+ best_amode = au1_best_modes[0];
+ best_nsad_cost = ai4_best_sad_costs[1];
+ best_asad_cost = ai4_best_sad_costs[0];
+
+ /* Updation of pi4_best_satd here needed iff the mode given by
+ ihevce_ed_4x4_find_best_modes() comes out to be
+ the best mode at the end of the function */
+ *pi4_best_satd = best_asad_cost - mode_bits_cost[best_amode];
+
+ /* Around best level 4 angular mode, search for best level 2 mode */
+ modes_to_eval[0] = best_amode - 2;
+ modes_to_eval[1] = best_amode + 2;
+ i = 0;
+ i_end = 2;
+ if(best_amode == 2)
+ i = 1;
+ else if(best_amode == 34)
+ i_end = 1;
+ for(; i < i_end; i++)
+ {
+ mode = modes_to_eval[i];
+ g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode);
+ sad = ps_ipe_optimised_function_list->pf_4x4_sad_computer(pu1_src, &pred[0], src_stride, 4);
+ sad_cost = sad;
+ sad_cost += mode_bits_cost[mode];
+ if(sad_cost < best_asad_cost)
+ {
+ best_amode = mode;
+ best_asad_cost = sad_cost;
+ *pi4_best_satd = sad;
+ }
+ sad_ptr[mode] = sad;
+ }
+
+ /*To be done : Add a flag here instead of preset condn*/
+ if((i4_quality_preset < IHEVCE_QUALITY_P4))
+ {
+ /* Around best level 2 angular mode, search for best level 1 mode */
+ modes_to_eval[0] = best_amode - 1;
+ modes_to_eval[1] = best_amode + 1;
+ i = 0;
+ i_end = 2;
+ if(best_amode == 2)
+ i = 1;
+ else if(best_amode == 34)
+ i_end = 1;
+ for(; i < i_end; i++)
+ {
+ mode = modes_to_eval[i];
+ g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode);
+ sad = ps_ipe_optimised_function_list->pf_4x4_sad_computer(
+ pu1_src, &pred[0], src_stride, 4);
+ sad_cost = sad;
+ sad_cost += mode_bits_cost[mode];
+ if(sad_cost < best_asad_cost)
+ {
+ best_amode = mode;
+ best_asad_cost = sad_cost;
+ *pi4_best_satd = sad;
+ }
+ sad_ptr[mode] = sad;
+ }
+ }
+
+ if(best_asad_cost < best_nsad_cost)
+ {
+ ps_ed->best_mode = best_amode;
+ *pi4_best_sad_cost = best_asad_cost;
+ }
+ else
+ {
+ ps_ed->best_mode = best_nmode;
+ *pi4_best_sad_cost = best_nsad_cost;
+ }
+ ps_ed->intra_or_inter = 0;
+ ps_ed->merge_success = 0;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_ed_calc_8x8_blk \endif
+*
+* \brief: evaluate input 8x8 block for intra modes basing on the intra mode
+* decisions made at 4x4 level. This function also makes a decision whether
+* to split blk in to 4x4 partitions or not.
+*
+*****************************************************************************
+*/
+static void ihevce_ed_calc_8x8_blk(
+ ihevce_ed_ctxt_t *ps_ed_ctxt,
+ ihevce_ed_blk_t *ps_ed_8x8,
+ UWORD8 *pu1_src,
+ WORD32 src_stride,
+ WORD32 *nbr_flags_ptr,
+ WORD32 *top_intra_mode_ptr,
+ WORD32 *left_intra_mode_ptr,
+ WORD32 cu_pos_y,
+ WORD32 lambda,
+ WORD32 *sad_ptr_8x8,
+ WORD32 *pi4_best_satd,
+ WORD32 i4_layer_id,
+ WORD32 i4_quality_preset,
+ WORD32 i4_slice_type,
+ WORD32 *pi4_best_sad_cost_8x8_l1_ipe,
+ WORD32 *pi4_best_sad_8x8_l1_ipe,
+ WORD32 *pi4_sum_4x4_satd,
+ WORD32 *pi4_min_4x4_satd,
+ ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list,
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list)
+{
+ WORD32 i, j;
+ WORD32 nbr_flags, nbr_flags_TR;
+ UWORD8 *pu1_src_4x4;
+ WORD32 top_available;
+ WORD32 left_available;
+ ihevce_ed_blk_t *ps_ed_4x4 = ps_ed_8x8;
+ WORD32 top_intra_mode;
+ WORD32 left_intra_mode;
+ WORD32 next_left_intra_mode;
+ WORD32 *sad_ptr = sad_ptr_8x8;
+ UWORD8 *pu1_src_arr[4];
+ WORD32 i4_4x4_best_sad_cost[4];
+ func_selector_t *ps_func_selector = ps_ed_ctxt->ps_func_selector;
+ ihevc_intra_pred_luma_ref_substitution_ft *pf_intra_pred_luma_ref_substitution =
+ ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
+
+ (void)i4_slice_type;
+
+ /* Compute ref samples for 8x8 merge block */
+ nbr_flags = nbr_flags_ptr[0];
+ nbr_flags_TR = nbr_flags_ptr[1];
+
+ if(CHECK_TR_AVAILABLE(nbr_flags_TR))
+ {
+ SET_TR_AVAILABLE(nbr_flags);
+ }
+ else
+ {
+ SET_TR_UNAVAILABLE(nbr_flags);
+ }
+
+ if(CHECK_BL_AVAILABLE(nbr_flags))
+ {
+ SET_BL_AVAILABLE(nbr_flags);
+ }
+ else
+ {
+ SET_BL_UNAVAILABLE(nbr_flags);
+ }
+
+ /* call the function which populates ref data for intra predicion */
+ pf_intra_pred_luma_ref_substitution(
+ pu1_src - src_stride - 1,
+ pu1_src - src_stride,
+ pu1_src - 1,
+ src_stride,
+ 8,
+ nbr_flags,
+ &ps_ed_ctxt->au1_ref_8x8[0][0],
+ 0);
+
+ for(i = 0; i < 2; i++)
+ {
+ pu1_src_4x4 = pu1_src + i * 4 * src_stride;
+ cu_pos_y += i * 4;
+ next_left_intra_mode = left_intra_mode_ptr[i];
+ for(j = 0; j < 2; j++)
+ {
+ WORD32 i4_best_satd;
+ pu1_src_arr[i * 2 + j] = pu1_src_4x4;
+ nbr_flags = nbr_flags_ptr[i * 8 + j];
+ top_intra_mode = top_intra_mode_ptr[j];
+ left_intra_mode = next_left_intra_mode;
+ /* call the function which populates ref data for intra predicion */
+ pf_intra_pred_luma_ref_substitution(
+ pu1_src_4x4 - src_stride - 1,
+ pu1_src_4x4 - src_stride,
+ pu1_src_4x4 - 1,
+ src_stride,
+ 4,
+ nbr_flags,
+ &ps_ed_ctxt->au1_ref_full_ctb[i * 2 + j][0],
+ 0);
+
+ top_available = CHECK_T_AVAILABLE(nbr_flags);
+ left_available = CHECK_L_AVAILABLE(nbr_flags);
+ /* call the function which populates sad cost for all the modes */
+ ihevce_intra_populate_mode_bits_cost(
+ top_intra_mode,
+ left_intra_mode,
+ top_available,
+ left_available,
+ cu_pos_y,
+ &ps_ed_ctxt->au2_mode_bits_cost_full_ctb[i * 2 + j][0],
+ lambda);
+ ihevce_ed_calc_4x4_blk(
+ ps_ed_4x4,
+ pu1_src_4x4,
+ src_stride,
+ &ps_ed_ctxt->au1_ref_full_ctb[i * 2 + j][0],
+ &ps_ed_ctxt->au2_mode_bits_cost_full_ctb[i * 2 + j][0],
+ sad_ptr,
+ &i4_best_satd,
+ i4_quality_preset,
+ &i4_4x4_best_sad_cost[i * 2 + j],
+ ps_ipe_optimised_function_list);
+
+ top_intra_mode_ptr[j] = ps_ed_4x4->best_mode;
+ next_left_intra_mode = ps_ed_4x4->best_mode;
+ pu1_src_4x4 += 4;
+ ps_ed_4x4 += 1;
+ sad_ptr += NUM_MODES;
+ }
+ left_intra_mode_ptr[i] = next_left_intra_mode;
+ }
+
+ /* 8x8 merge */
+ {
+ UWORD8 modes_to_eval[6];
+ WORD32 sad;
+ UWORD8 pred[16];
+ UWORD8 pred_8x8[64] = { 0 };
+ WORD32 merge_success;
+ UWORD8 mode;
+
+ ps_ed_4x4 = ps_ed_8x8;
+ mode = (ps_ed_4x4)->best_mode;
+
+ *pi4_best_satd = -1;
+
+ merge_success =
+ ((((ps_ed_4x4)->best_mode == (ps_ed_4x4 + 1)->best_mode) +
+ ((ps_ed_4x4)->best_mode == (ps_ed_4x4 + 2)->best_mode) +
+ ((ps_ed_4x4)->best_mode == (ps_ed_4x4 + 3)->best_mode)) == 3);
+
+ {
+ WORD32 i4_satd;
+ //UWORD16 au2_4x4_sad_cost_array[4];/*SAD of 4x4 blocks*/
+ UWORD16 u2_sum_best_4x4_sad_cost; /*Sum of 4x4 sad costs*/
+ UWORD16 u2_sum_best_4x4_satd_cost; /*Sum of 4x4 satd costs*/
+ UWORD8 u1_best_8x8_mode; /*8x8 mode.*/
+ UWORD16 u2_best_8x8_cost; /*8x8 Cost. Can store SATD/SAD cost*/
+ WORD32 i4_best_8x8_sad_satd; /* SATD/SAD value of 8x8 block*/
+ UWORD16 au2_8x8_costs[6] = { 0 }; /*Cost of 8x8 block for 6 modes*/
+ UWORD8 u1_cond_4x4_satd; /*condition if 4x4 SATD needs to be done*/
+ UWORD8 u1_cond_8x8_satd; /*condition if 8x8 SATD needs to be done*/
+ UWORD8 u1_good_quality;
+ WORD32 i4_merge_success_stage2;
+
+ /*Initiallization*/
+ *pi4_best_satd = 0;
+ u2_best_8x8_cost = (UWORD16)(-1) /*max value*/;
+ u2_sum_best_4x4_sad_cost = 0;
+ *pi4_sum_4x4_satd = -1;
+ *pi4_min_4x4_satd = 0x7FFFFFFF;
+ i4_best_8x8_sad_satd = 0;
+ u2_sum_best_4x4_satd_cost = 0;
+ u1_best_8x8_mode = ps_ed_4x4->best_mode;
+
+ /*We thought of "replacing" SATDs by SADs for 4x4 vs 8x8 decision
+ for speed improvement, but it gave opposite results. Setting
+ good_quality to 1 in order to throw away the idea of "replacing".*/
+ u1_good_quality = 1;
+ //u1_good_quality = ((i4_quality_preset != IHEVCE_QUALITY_P5)
+ // && (i4_quality_preset != IHEVCE_QUALITY_P4));
+
+ /*Needed to disable some processing based on speed preset*/
+ i4_merge_success_stage2 = 0;
+
+ /*Store SAD cost of 4x4 blocks */
+ for(i = 0; i < 4; i++)
+ {
+ //au2_4x4_sad_cost_array[i] = (ps_ed_4x4 + i)->best_sad_cost;
+ u2_sum_best_4x4_sad_cost +=
+ i4_4x4_best_sad_cost[i]; //(ps_ed_4x4 + i)->best_sad_cost;
+ modes_to_eval[i] = (ps_ed_4x4 + i)->best_mode;
+ /*NOTE_01: i4_4x4_satd is not used anywhere at present.
+ Setting it to zero to avoid ASSERT failure */
+ /*Now taken care of incomplete CTB*/
+ //(ps_ed_4x4 + i)->i4_4x4_satd = 0;
+ }
+
+ /*Calculate SATD/SAd for 4x4 blocks*/
+ /*For (layer_2 && high_speed): No need to get 4x4 SATDs bcoz
+ it won't have any impact on quality but speed will improve.*/
+ u1_cond_4x4_satd = ((1 == i4_layer_id) || (u1_good_quality && (!merge_success)));
+
+ if(u1_cond_4x4_satd)
+ {
+ *pi4_sum_4x4_satd = 0;
+ /*FYI: 1. Level 2 doesn't need the SATD.
+ 2. The 4x4 vs. 8x8 decision for high_speed will
+ happen based on SAD. */
+ /*Get SATD for 4x4 blocks */
+ for(i = 0; i < 4; i++)
+ {
+ mode = modes_to_eval[i];
+ g_apf_lum_ip[g_i4_ip_funcs[mode]](
+ &ps_ed_ctxt->au1_ref_full_ctb[i][0], 0, &pred[0], 4, 4, mode);
+
+ i4_satd = ps_cmn_utils_optimised_function_list->pf_HAD_4x4_8bit(
+ pu1_src_arr[i], src_stride, &pred[0], 4, NULL, 0);
+
+ {
+ /*Save 4x4x satd in ed blk struct */
+ (ps_ed_4x4 + i)->i4_4x4_satd = i4_satd;
+ }
+
+ /*(ps_ed_4x4 + i)->i4_4x4_satd = i4_satd; // See NOTE_01*/
+ u2_sum_best_4x4_satd_cost +=
+ ((UWORD16)i4_satd + ps_ed_ctxt->au2_mode_bits_cost_full_ctb[i][mode]);
+ *pi4_best_satd += i4_satd;
+ }
+ }
+ /* Not being used in current code */
+ else /* (Level_2 && extreme_speed) */
+ {
+ /******DONT ENTER HERE AT aNY COST***************************/
+ /* Transistor killers lie ahead!!!!!!! */
+ /*This else part is not getting executed as of now*/
+ if(2 != i4_layer_id)
+ ASSERT(0);
+ /*Update values by SAD_cost_array */
+ for(i = 0; i < 4; i++)
+ {
+ mode = modes_to_eval[i];
+ //u2_sum_best_4x4_satd_cost += au2_4x4_sad_cost_array[i];
+ //sad = (WORD32)((ps_ed_4x4 + i)->best_sad_cost - ps_ed_ctxt->au2_mode_bits_cost_full_ctb[i][mode]);
+ sad = (WORD32)(
+ i4_4x4_best_sad_cost[i] - ps_ed_ctxt->au2_mode_bits_cost_full_ctb[i][mode]);
+ *pi4_sum_4x4_satd += sad;
+ /*(ps_ed_4x4 + i)->i4_4x4_satd = sad;// See NOTE_01*/
+ *pi4_best_satd += sad;
+
+ if(*pi4_min_4x4_satd > sad)
+ *pi4_min_4x4_satd = sad;
+ }
+ }
+ if(!merge_success) /*If the modes are not identical*/
+ {
+ UWORD8 i1_start; /* no of modes to evaluate */
+ UWORD8 ai1_modes[6];
+
+ /* Prepare 6 candidates for 8x8 block. Two are DC and planar */
+ ai1_modes[4] = 0;
+ ai1_modes[5] = 1;
+ i1_start = 4;
+
+ /*Assign along with removing duplicates rest 4 candidates. */
+ for(i = 3; i >= 0; i--)
+ {
+ WORD8 i1_fresh_mode_flag = 1;
+ mode = modes_to_eval[i];
+ /*Check if duplicate already exists in ai1_modes*/
+ for(j = i1_start; j < 6; j++)
+ {
+ if(mode == ai1_modes[j])
+ i1_fresh_mode_flag = 0;
+ }
+ if(i1_fresh_mode_flag)
+ {
+ i1_start--;
+ ai1_modes[i1_start] = mode;
+ }
+ }
+
+ /*Calculate SATD/SAD of 8x8 block for all modes*/
+ /*If (u1_good_quality == 0) then SATD gets replaced by SAD*/
+ if(u1_good_quality && (i4_quality_preset <= IHEVCE_QUALITY_P4))
+ {
+ //7.5 * lambda to incorporate transfrom flags
+ u2_sum_best_4x4_satd_cost +=
+ (COMPUTE_RATE_COST_CLIP30(12, lambda, (LAMBDA_Q_SHIFT + 1)));
+
+ /*Loop over all modes for calculating SATD*/
+ for(i = i1_start; i < 6; i++)
+ {
+ mode = ai1_modes[i];
+ g_apf_lum_ip[g_i4_ip_funcs[mode]](
+ &ps_ed_ctxt->au1_ref_8x8[0][0], 0, &pred_8x8[0], 8, 8, mode);
+
+ i4_satd = ps_cmn_utils_optimised_function_list->pf_HAD_8x8_8bit(
+ pu1_src_arr[0], src_stride, &pred_8x8[0], 8, NULL, 0);
+
+ au2_8x8_costs[i] =
+ ((UWORD16)i4_satd + ps_ed_ctxt->au2_mode_bits_cost_full_ctb[0][mode]);
+
+ /*Update data correspoinding to least 8x8 cost */
+ if(au2_8x8_costs[i] <= u2_best_8x8_cost)
+ {
+ u2_best_8x8_cost = au2_8x8_costs[i];
+ i4_best_8x8_sad_satd = i4_satd;
+ u1_best_8x8_mode = mode;
+ }
+ }
+ /*8x8 vs 4x4 decision based on SATD values*/
+ if((u2_best_8x8_cost <= u2_sum_best_4x4_satd_cost) || (u2_best_8x8_cost <= 300))
+ {
+ i4_merge_success_stage2 = 1;
+ }
+
+ /* EIID: Early inter-intra decision */
+ /* Find the SAD based cost for 8x8 block for best mode */
+ if(/*(ISLICE != i4_slice_type) && */ (1 == i4_layer_id))
+ {
+ UWORD8 i4_best_8x8_mode = u1_best_8x8_mode;
+ WORD32 i4_best_8x8_sad_curr;
+
+ g_apf_lum_ip[g_i4_ip_funcs[i4_best_8x8_mode]](
+ &ps_ed_ctxt->au1_ref_8x8[0][0], 0, &pred_8x8[0], 8, 8, i4_best_8x8_mode);
+
+ i4_best_8x8_sad_curr = ps_ipe_optimised_function_list->pf_8x8_sad_computer(
+ pu1_src_arr[0], &pred_8x8[0], src_stride, 8);
+
+ //register best sad in the context
+ //ps_ed_8x8->i4_best_sad_8x8_l1_ipe = i4_best_8x8_sad_curr;
+
+ //register the best cost in the context
+ //[0]th index is used since all 4 blocks are having same cost right now
+ //also it doesnt depends on mode. It only depends on the lambda
+
+ *pi4_best_sad_cost_8x8_l1_ipe =
+ i4_best_8x8_sad_curr +
+ ps_ed_ctxt->au2_mode_bits_cost_full_ctb[0][i4_best_8x8_mode];
+ *pi4_best_sad_8x8_l1_ipe = i4_best_8x8_sad_curr;
+ }
+ }
+ else /*If high_speed or extreme speed*/
+ {
+ //7.5 * lambda to incorporate transfrom flags
+ u2_sum_best_4x4_sad_cost +=
+ (COMPUTE_RATE_COST_CLIP30(12, lambda, (LAMBDA_Q_SHIFT + 1)));
+
+ /*Loop over all modes for calculating SAD*/
+ for(i = i1_start; i < 6; i++)
+ {
+ mode = ai1_modes[i];
+ g_apf_lum_ip[g_i4_ip_funcs[mode]](
+ &ps_ed_ctxt->au1_ref_8x8[0][0], 0, &pred_8x8[0], 8, 8, mode);
+
+ sad = ps_ipe_optimised_function_list->pf_8x8_sad_computer(
+ pu1_src_arr[0], &pred_8x8[0], src_stride, 8);
+
+ au2_8x8_costs[i] +=
+ ((UWORD16)sad + ps_ed_ctxt->au2_mode_bits_cost_full_ctb[0][mode]);
+
+ /*Find the data correspoinding to least cost */
+ if(au2_8x8_costs[i] <= u2_best_8x8_cost)
+ {
+ u2_best_8x8_cost = au2_8x8_costs[i];
+ i4_best_8x8_sad_satd = sad;
+ u1_best_8x8_mode = mode;
+ }
+ }
+ /*8x8 vs 4x4 decision based on SAD values*/
+ if((u2_best_8x8_cost <= u2_sum_best_4x4_sad_cost) || (u2_best_8x8_cost <= 300))
+ {
+ i4_merge_success_stage2 = 1;
+ }
+
+ /* EIID: Early inter-intra decision */
+ /* Find the SAD based cost for 8x8 block for best mode */
+ if(/*(ISLICE != i4_slice_type) && */ (1 == i4_layer_id))
+ {
+ //UWORD8 i4_best_8x8_mode = u1_best_8x8_mode;
+ WORD32 i4_best_8x8_sad_cost_curr = u2_best_8x8_cost;
+
+ //register best sad in the context
+ //ps_ed_8x8->i4_best_sad_8x8_l1_ipe = i4_best_8x8_sad_curr;
+
+ //register the best cost in the context
+ *pi4_best_sad_cost_8x8_l1_ipe = i4_best_8x8_sad_cost_curr;
+ *pi4_best_sad_8x8_l1_ipe =
+ i4_best_8x8_sad_satd; //i4_best_8x8_sad_cost_curr;
+ }
+ }
+ }
+
+ /***** Modes for 4x4 and 8x8 are decided before this point ****/
+ if(merge_success || i4_merge_success_stage2)
+ {
+ /*FYI: 1. 8x8 SATD is not needed if merge is failed.
+ 2. For layer_2: SATD won't be calculated for 8x8. So
+ the best_8x8_cost is SAD-cost. */
+
+ /* Store the 8x8 level data in the first 4x4 block*/
+ ps_ed_4x4->merge_success = 1;
+ ps_ed_4x4->best_merge_mode = u1_best_8x8_mode;
+ /* ps_ed_4x4->best_merge_sad_cost = u2_best_8x8_cost;
+ This data is not getting consumed anywhere at present */
+
+ top_intra_mode_ptr[0] = u1_best_8x8_mode;
+ top_intra_mode_ptr[1] = u1_best_8x8_mode;
+ left_intra_mode_ptr[0] = u1_best_8x8_mode;
+ left_intra_mode_ptr[1] = u1_best_8x8_mode;
+
+ /*If it is layer_1 and high_speed*/
+ u1_cond_8x8_satd =
+ ((1 == i4_layer_id) &&
+ (merge_success || ((!u1_good_quality) && i4_merge_success_stage2)));
+ if(u1_cond_8x8_satd)
+ {
+ mode = u1_best_8x8_mode;
+ g_apf_lum_ip[g_i4_ip_funcs[mode]](
+ &ps_ed_ctxt->au1_ref_8x8[0][0], 0, &pred_8x8[0], 8, 8, mode);
+
+ if(i4_quality_preset > IHEVCE_QUALITY_P3)
+ {
+ i4_satd = ps_ipe_optimised_function_list->pf_8x8_sad_computer(
+ pu1_src_arr[0], &pred_8x8[0], src_stride, 8);
+ }
+ else
+ {
+ i4_satd = ps_cmn_utils_optimised_function_list->pf_HAD_8x8_8bit(
+ pu1_src_arr[0], src_stride, &pred_8x8[0], 8, NULL, 0);
+ }
+ /* u2_best_8x8_cost = ((UWORD16)i4_satd + mode_bits_cost[0][mode]);
+ This data is not getting consumed at present */
+ i4_best_8x8_sad_satd = i4_satd;
+ }
+ *pi4_best_satd = i4_best_8x8_sad_satd;
+
+ /* EIID: Early inter-intra decision */
+ /* Find the SAD based cost for 8x8 block for best mode */
+ if(/*(ISLICE != i4_slice_type) && */ (1 == i4_layer_id))
+ {
+ UWORD8 i4_best_8x8_mode = u1_best_8x8_mode;
+ WORD32 i4_best_8x8_sad_curr;
+
+ g_apf_lum_ip[g_i4_ip_funcs[i4_best_8x8_mode]](
+ &ps_ed_ctxt->au1_ref_8x8[0][0], 0, &pred_8x8[0], 8, 8, i4_best_8x8_mode);
+
+ i4_best_8x8_sad_curr = ps_ipe_optimised_function_list->pf_8x8_sad_computer(
+ pu1_src_arr[0], &pred_8x8[0], src_stride, 8);
+ //register best sad in the context
+ //ps_ed_8x8->i4_best_sad_8x8_l1_ipe = i4_best_8x8_sad_curr;
+
+ //register the best cost in the context
+ //[0]th index is used since all 4 blocks are having same cost right now
+ //also it doesnt depends on mode. It only depends on the lambda
+
+ *pi4_best_sad_cost_8x8_l1_ipe =
+ i4_best_8x8_sad_curr +
+ ps_ed_ctxt->au2_mode_bits_cost_full_ctb[0][i4_best_8x8_mode];
+ *pi4_best_sad_8x8_l1_ipe = i4_best_8x8_sad_curr;
+
+ } // EIID ends
+
+ } //if(merge_success || i4_merge_success_stage2)
+ }
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_ed_calc_incomplete_ctb \endif
+*
+* \brief: performs L1 8x8 and 4x4 intra mode analysis
+*
+*****************************************************************************
+*/
+void ihevce_ed_calc_incomplete_ctb(
+ ihevce_ed_ctxt_t *ps_ed_ctxt,
+ ihevce_ed_blk_t *ps_ed_ctb,
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1,
+ UWORD8 *pu1_src,
+ WORD32 src_stride,
+ WORD32 num_4x4_blks_x,
+ WORD32 num_4x4_blks_y,
+ WORD32 *nbr_flags,
+ WORD32 i4_layer_id,
+ WORD32 i4_row_block_no,
+ WORD32 i4_col_block_no,
+ ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list,
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list)
+{
+ WORD32 i, j, k;
+ WORD32 z_scan_idx = 0;
+ WORD32 z_scan_act_idx = 0;
+ ihevc_intra_pred_luma_ref_substitution_ft *pf_intra_pred_luma_ref_substitution =
+ ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
+
+ //UWORD8 ref[18];
+ //WORD32 top_intra_modes[20];
+ WORD32 *sad_ptr = &ps_ed_ctxt->sad[0];
+ WORD32 lambda = ps_ed_ctxt->lambda;
+ //UWORD16 mode_bits_cost[NUM_MODES];
+
+ UWORD8 *pu1_src_8x8;
+ ihevce_ed_blk_t *ps_ed_8x8, *ps_ed_4x4;
+ WORD32 *top_intra_mode_ptr;
+ WORD32 *left_intra_mode_ptr = ps_ed_ctxt->left_ctb_intra_modes;
+ WORD32 *nbr_flags_ptr;
+ WORD32 top_intra_mode;
+ WORD32 left_intra_mode;
+ WORD32 next_left_intra_mode;
+ WORD32 nbr_flag = 0;
+ WORD32 top_available;
+ WORD32 left_available;
+ UWORD8 *pu1_src_4x4;
+ WORD32 left_over_4x4_blks;
+ WORD32 i4_incomplete_sum_4x4_satd = 0;
+ WORD32 i4_incomplete_min_4x4_satd = 0x7FFFFFFF;
+ WORD32 i4_best_sad_cost_8x8_l1_ipe, i4_best_sad_8x8_l1_ipe, i4_sum_4x4_satd, i4_min_4x4_satd;
+
+ (void)i4_row_block_no;
+ (void)i4_col_block_no;
+ /*Find the modulated qp of 16*16 at L2 from 8*8 SATDs in L2
+ THis is used as 64*64 Qp in L0*/
+ /*For Incomplete CTB, init all SATD to -1 and then popualate for the complete 8x8 blocks (CU 16 in L0)*/
+ /* Not populated for 4x4 blocks (CU 8 in L0), can be done */
+ /*Also, not 32x32 satd is not populated, as it would correspong to CU 64 and it is not an incomplete CTB */
+ if(i4_layer_id == 1)
+ {
+ WORD32 i4_i;
+
+ for(i4_i = 0; i4_i < 64; i4_i++)
+ {
+ (ps_ed_ctb + i4_i)->i4_4x4_satd = -1;
+ (ps_ed_ctb + i4_i)->i4_4x4_cur_satd = -1;
+ }
+
+ for(i4_i = 0; i4_i < 16; i4_i++)
+ {
+ ps_ed_ctb_l1->i4_sum_4x4_satd[i4_i] = -2;
+ ps_ed_ctb_l1->i4_min_4x4_satd[i4_i] = 0x7FFFFFFF;
+ ps_ed_ctb_l1->i4_8x8_satd[i4_i][0] = -2;
+ ps_ed_ctb_l1->i4_8x8_satd[i4_i][1] = -2;
+ }
+
+ for(i4_i = 0; i4_i < 4; i4_i++)
+ {
+ ps_ed_ctb_l1->i4_16x16_satd[i4_i][0] = -2;
+ ps_ed_ctb_l1->i4_16x16_satd[i4_i][1] = -2;
+ ps_ed_ctb_l1->i4_16x16_satd[i4_i][2] = -2;
+ }
+ ps_ed_ctb_l1->i4_32x32_satd[0][0] = -2;
+ ps_ed_ctb_l1->i4_32x32_satd[0][1] = -2;
+ ps_ed_ctb_l1->i4_32x32_satd[0][2] = -2;
+
+ ps_ed_ctb_l1->i4_32x32_satd[0][3] = -2;
+
+ for(i4_i = 0; i4_i < 16; i4_i++)
+ {
+ ps_ed_ctb_l1->i4_best_satd_8x8[i4_i] = -1;
+ ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_ipe[i4_i] = -1;
+ ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[i4_i] = -1;
+ ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_me[i4_i] = -1;
+ ps_ed_ctb_l1->i4_sad_cost_me_for_ref[i4_i] = -1;
+ ps_ed_ctb_l1->i4_sad_me_for_ref[i4_i] = -1;
+ ps_ed_ctb_l1->i4_best_sad_8x8_l1_me[i4_i] = -1;
+
+ ps_ed_ctb_l1->i4_best_sad_8x8_l1_me_for_decide[i4_i] = -1;
+ }
+ }
+ /*
+ * src scan happens in raster scan order. ps_ed update happens in z-scan order.
+ */
+ for(i = 0; i < num_4x4_blks_x; i++)
+ {
+ ps_ed_ctxt->ai4_top_intra_modes_ic_ctb[i] = INTRA_DC;
+ }
+ next_left_intra_mode = left_intra_mode_ptr[0];
+ for(i = 0; i < num_4x4_blks_y / 2; i++)
+ {
+ pu1_src_8x8 = pu1_src + i * 2 * 4 * src_stride;
+ top_intra_mode_ptr = &ps_ed_ctxt->ai4_top_intra_modes_ic_ctb[0];
+ nbr_flags_ptr = &nbr_flags[0] + 2 * 8 * i;
+
+ for(j = 0; j < num_4x4_blks_x / 2; j++)
+ {
+ WORD32 i4_best_satd;
+ // Multiply i by 16 since the
+ // matrix is prepared for ctb_size = 64
+ z_scan_idx = gau1_ctb_raster_to_zscan[i * 2 * 16 + j * 2];
+ z_scan_act_idx = gau1_ctb_raster_to_zscan[i * 16 + j];
+ ASSERT(z_scan_act_idx <= 15);
+ ps_ed_8x8 = ps_ed_ctb + z_scan_idx;
+
+ ihevce_ed_calc_8x8_blk(
+ ps_ed_ctxt,
+ ps_ed_8x8,
+ pu1_src_8x8,
+ src_stride,
+ nbr_flags_ptr,
+ top_intra_mode_ptr,
+ left_intra_mode_ptr,
+ i * 8,
+ lambda,
+ sad_ptr + z_scan_idx * NUM_MODES,
+ &i4_best_satd,
+ i4_layer_id,
+ ps_ed_ctxt->i4_quality_preset,
+ ps_ed_ctxt->i4_slice_type,
+ &i4_best_sad_cost_8x8_l1_ipe,
+ &i4_best_sad_8x8_l1_ipe,
+ &i4_sum_4x4_satd,
+ &i4_min_4x4_satd,
+ ps_ipe_optimised_function_list,
+ ps_cmn_utils_optimised_function_list);
+
+ ASSERT(i4_best_satd >= 0);
+ if(i4_layer_id == 1)
+ {
+ ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_ipe[z_scan_act_idx] =
+ i4_best_sad_cost_8x8_l1_ipe;
+ ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[z_scan_act_idx] = i4_best_sad_8x8_l1_ipe;
+ ps_ed_ctb_l1->i4_best_satd_8x8[z_scan_act_idx] = i4_best_satd;
+ ps_ed_ctxt->i8_sum_best_satd += i4_best_satd;
+ ps_ed_ctxt->i8_sum_sq_best_satd += (i4_best_satd * i4_best_satd);
+ //ps_ed_ctb_l1->i4_sum_4x4_satd[z_scan_act_idx] = i4_sum_4x4_satd;
+ //ps_ed_ctb_l1->i4_min_4x4_satd[z_scan_act_idx] = i4_min_4x4_satd;
+ }
+
+ pu1_src_8x8 += 8;
+ //ps_ed_8x8 += 4;
+ top_intra_mode_ptr += 2;
+ nbr_flags_ptr += 2;
+ }
+
+ next_left_intra_mode = left_intra_mode_ptr[0];
+ left_over_4x4_blks = (num_4x4_blks_x - (2 * (num_4x4_blks_x / 2)));
+ left_over_4x4_blks = left_over_4x4_blks * 2;
+
+ pu1_src_4x4 = pu1_src_8x8;
+
+ i4_incomplete_sum_4x4_satd = 0;
+ i4_incomplete_min_4x4_satd = 0x7FFFFFFF;
+
+ /* For leftover right 4x4 blks (num_4x4_blks_x - 2 *(num_4x4_blks_x/2))*/
+ for(k = 0; k < left_over_4x4_blks; k++)
+ {
+ WORD32 i4_best_satd;
+ WORD32 i4_dummy_sad_cost;
+ // Multiply i by 16 since the
+ // matrix is prepared for ctb_size = 64
+ ASSERT(left_over_4x4_blks == 2);
+ z_scan_idx = gau1_ctb_raster_to_zscan[i * 2 * 16 + k * 16 + j * 2];
+ ps_ed_4x4 = ps_ed_ctb + z_scan_idx;
+
+ top_intra_mode = ps_ed_ctxt->ai4_top_intra_modes_ic_ctb[j];
+ left_intra_mode = next_left_intra_mode;
+
+ nbr_flag = nbr_flags[i * 2 * 8 + k * 8 + j * 2];
+
+ /* call the function which populates ref data for intra predicion */
+ pf_intra_pred_luma_ref_substitution(
+ pu1_src_4x4 - src_stride - 1,
+ pu1_src_4x4 - src_stride,
+ pu1_src_4x4 - 1,
+ src_stride,
+ 4,
+ nbr_flag,
+ &ps_ed_ctxt->au1_ref_ic_ctb[0],
+ 0);
+
+ top_available = CHECK_T_AVAILABLE(nbr_flag);
+ left_available = CHECK_L_AVAILABLE(nbr_flag);
+ /* call the function which populates sad cost for all the modes */
+ ihevce_intra_populate_mode_bits_cost(
+ top_intra_mode,
+ left_intra_mode,
+ top_available,
+ left_available,
+ i * 4,
+ &ps_ed_ctxt->au2_mode_bits_cost_ic_ctb[0],
+ lambda);
+
+ ihevce_ed_calc_4x4_blk(
+ ps_ed_4x4,
+ pu1_src_4x4,
+ src_stride,
+ &ps_ed_ctxt->au1_ref_ic_ctb[0],
+ &ps_ed_ctxt->au2_mode_bits_cost_ic_ctb[0],
+ sad_ptr + z_scan_idx * NUM_MODES,
+ &i4_best_satd,
+ ps_ed_ctxt->i4_quality_preset,
+ &i4_dummy_sad_cost,
+ ps_ipe_optimised_function_list);
+
+ ASSERT(i4_best_satd >= 0);
+ if(i4_layer_id == 1) //Can we ignore this check?
+ {
+ z_scan_act_idx = gau1_ctb_raster_to_zscan[i * 16 + j];
+ /*Note : The satd population is not populated for last 4*4 block in incomplete CTB */
+ /* Which corresponds to CU 8 in L0 */
+
+ /*MAM_VAR_L1 */
+ i4_incomplete_sum_4x4_satd = i4_incomplete_sum_4x4_satd + i4_best_satd;
+ if(i4_incomplete_min_4x4_satd >= i4_best_satd)
+ i4_incomplete_min_4x4_satd = i4_best_satd;
+ ps_ed_ctxt->i8_sum_best_satd += i4_best_satd;
+ ps_ed_ctxt->i8_sum_sq_best_satd += (i4_best_satd * i4_best_satd);
+ if((k & 1) == 0)
+ {
+ ps_ed_ctb_l1->i4_best_satd_8x8[z_scan_act_idx] = 0;
+ }
+ ps_ed_ctb_l1->i4_best_satd_8x8[z_scan_act_idx] += i4_best_satd;
+ }
+
+ ps_ed_ctxt->ai4_top_intra_modes_ic_ctb[j * 2] = ps_ed_4x4->best_mode;
+ next_left_intra_mode = ps_ed_4x4->best_mode;
+ pu1_src_4x4 += src_stride;
+ left_intra_mode_ptr[k] = next_left_intra_mode;
+ }
+ left_intra_mode_ptr += 2;
+ }
+
+ if(num_4x4_blks_y & 1)
+ {
+ /* For leftover bottom 4x4 blks. (num_4x4_blks_x) */
+ pu1_src_4x4 = pu1_src + i * 2 * 4 * src_stride;
+ //memset(&ps_ed_ctb_l1->i4_best_satd_8x8[i][0],0,4*sizeof(WORD32));
+ for(j = 0; j < num_4x4_blks_x; j++)
+ {
+ WORD32 i4_best_satd;
+ WORD32 i4_dummy_sad_cost;
+ // Multiply i by 16 since the
+ // matrix is prepared for ctb_size = 64
+ z_scan_idx = gau1_ctb_raster_to_zscan[i * 2 * 16 + j];
+ ps_ed_4x4 = ps_ed_ctb + z_scan_idx;
+
+ if((j & 1) == 0)
+ {
+ i4_incomplete_sum_4x4_satd = 0;
+ i4_incomplete_min_4x4_satd = 0x7FFFFFFF;
+ }
+
+ top_intra_mode = ps_ed_ctxt->ai4_top_intra_modes_ic_ctb[j];
+ left_intra_mode = next_left_intra_mode;
+
+ nbr_flag = nbr_flags[i * 2 * 8 + j];
+
+ /* call the function which populates ref data for intra predicion */
+ pf_intra_pred_luma_ref_substitution(
+ pu1_src_4x4 - src_stride - 1,
+ pu1_src_4x4 - src_stride,
+ pu1_src_4x4 - 1,
+ src_stride,
+ 4,
+ nbr_flag,
+ &ps_ed_ctxt->au1_ref_ic_ctb[0],
+ 0);
+
+ top_available = CHECK_T_AVAILABLE(nbr_flag);
+ left_available = CHECK_L_AVAILABLE(nbr_flag);
+ /* call the function which populates sad cost for all the modes */
+ ihevce_intra_populate_mode_bits_cost(
+ top_intra_mode,
+ left_intra_mode,
+ top_available,
+ left_available,
+ i * 4,
+ &ps_ed_ctxt->au2_mode_bits_cost_ic_ctb[0],
+ lambda);
+
+ ihevce_ed_calc_4x4_blk(
+ ps_ed_4x4,
+ pu1_src_4x4,
+ src_stride,
+ &ps_ed_ctxt->au1_ref_ic_ctb[0],
+ &ps_ed_ctxt->au2_mode_bits_cost_ic_ctb[0],
+ sad_ptr + z_scan_idx * NUM_MODES,
+ &i4_best_satd,
+ ps_ed_ctxt->i4_quality_preset,
+ &i4_dummy_sad_cost,
+ ps_ipe_optimised_function_list);
+
+ /*Note : The satd population is not populated for last 4*4 block in incomplete CTB */
+ /* Which corresponds to CU 8 in L0 */
+
+ /*MAM_VAR_L1 */
+ ASSERT(i4_best_satd >= 0);
+ if(i4_layer_id == 1) //Can we ignore this check?
+ {
+ z_scan_act_idx = gau1_ctb_raster_to_zscan[i * 16 + (j >> 1)];
+ if((j & 1) == 0)
+ {
+ ps_ed_ctb_l1->i4_best_satd_8x8[z_scan_act_idx] = 0;
+ }
+ ps_ed_ctb_l1->i4_best_satd_8x8[z_scan_act_idx] += i4_best_satd;
+ ps_ed_ctxt->i8_sum_best_satd += i4_best_satd;
+ ps_ed_ctxt->i8_sum_sq_best_satd += (i4_best_satd * i4_best_satd);
+ i4_incomplete_sum_4x4_satd = i4_incomplete_sum_4x4_satd + i4_best_satd;
+ if(i4_incomplete_min_4x4_satd >= i4_best_satd)
+ i4_incomplete_min_4x4_satd = i4_best_satd;
+ }
+
+ ps_ed_ctxt->ai4_top_intra_modes_ic_ctb[j] = ps_ed_4x4->best_mode;
+ next_left_intra_mode = ps_ed_4x4->best_mode;
+ pu1_src_4x4 += 4;
+ }
+ }
+ left_intra_mode_ptr[0] = next_left_intra_mode;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_cu_level_qp_mod \endif
+*
+* \brief: Performs CU level QP modulation
+*
+*****************************************************************************
+*/
+WORD32 ihevce_cu_level_qp_mod(
+ WORD32 i4_qscale,
+ WORD32 i4_satd,
+ long double ld_curr_frame_log_avg_act,
+ float f_mod_strength,
+ WORD32 *pi4_act_factor,
+ WORD32 *pi4_q_scale_mod,
+ rc_quant_t *ps_rc_quant_ctxt)
+{
+ WORD32 i4_temp_qscale;
+ WORD32 i4_temp_qp;
+
+ if(i4_satd != -1)
+ {
+ WORD32 i4_loc_satd = i4_satd;
+ if(i4_loc_satd < 1)
+ {
+ i4_loc_satd = 1;
+ }
+ if((WORD32)ld_curr_frame_log_avg_act == 0)
+ {
+ *pi4_act_factor = (1 << (QP_LEVEL_MOD_ACT_FACTOR));
+ }
+ else
+ {
+ UWORD32 u4_log2_sq_cur_satd;
+ ULWORD64 u8_sq_cur_satd;
+ WORD32 qp_offset;
+
+ ASSERT(USE_SQRT_AVG_OF_SATD_SQR);
+ u8_sq_cur_satd = (i4_loc_satd * i4_loc_satd);
+ GET_POS_MSB_64(u4_log2_sq_cur_satd, u8_sq_cur_satd);
+ if(ABS((
+ long double)(((1 << u4_log2_sq_cur_satd) * POW_2_TO_1_BY_4) - ((long double)u8_sq_cur_satd))) >
+ ABS((
+ long double)(((1 << u4_log2_sq_cur_satd) * POW_2_TO_3_BY_4) - ((long double)u8_sq_cur_satd))))
+ {
+ u4_log2_sq_cur_satd += 1;
+ }
+ qp_offset = (WORD32)(
+ f_mod_strength *
+ (float)((long double)u4_log2_sq_cur_satd - ld_curr_frame_log_avg_act));
+ qp_offset = CLIP3(qp_offset, MIN_QP_MOD_OFFSET, MAX_QP_MOD_OFFSET);
+ *pi4_act_factor = (WORD32)(
+ gad_look_up_activity[qp_offset + ABS(MIN_QP_MOD_OFFSET)] *
+ (1 << QP_LEVEL_MOD_ACT_FACTOR));
+ }
+
+ ASSERT(*pi4_act_factor > 0);
+ i4_temp_qscale = ((i4_qscale * (*pi4_act_factor)) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
+ QP_LEVEL_MOD_ACT_FACTOR;
+ }
+ else
+ {
+ i4_temp_qscale = i4_qscale;
+ *pi4_act_factor = (1 << QP_LEVEL_MOD_ACT_FACTOR);
+ }
+ ASSERT(*pi4_act_factor > 0);
+
+ if(i4_temp_qscale > ps_rc_quant_ctxt->i2_max_qscale)
+ {
+ i4_temp_qscale = ps_rc_quant_ctxt->i2_max_qscale;
+ }
+ else if(i4_temp_qscale < ps_rc_quant_ctxt->i2_min_qscale)
+ {
+ i4_temp_qscale = ps_rc_quant_ctxt->i2_min_qscale;
+ }
+ /*store q scale for stat gen for I frame model*/
+ /*Here activity factor is not modified as the cu qp would be clipped in rd-opt stage*/
+ *pi4_q_scale_mod = i4_temp_qscale;
+ i4_temp_qp = ps_rc_quant_ctxt->pi4_qscale_to_qp[i4_temp_qscale];
+ if(i4_temp_qp > ps_rc_quant_ctxt->i2_max_qp)
+ {
+ i4_temp_qp = ps_rc_quant_ctxt->i2_max_qp;
+ }
+ else if(i4_temp_qp < ps_rc_quant_ctxt->i2_min_qp)
+ {
+ i4_temp_qp = ps_rc_quant_ctxt->i2_min_qp;
+ }
+ return (i4_temp_qp);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_ed_calc_ctb \endif
+*
+* \brief: performs L1 8x8 and 4x4 intra mode analysis
+*
+*****************************************************************************
+*/
+void ihevce_ed_calc_ctb(
+ ihevce_ed_ctxt_t *ps_ed_ctxt,
+ ihevce_ed_blk_t *ps_ed_ctb,
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1,
+ UWORD8 *pu1_src,
+ WORD32 src_stride,
+ WORD32 num_4x4_blks_x,
+ WORD32 num_4x4_blks_y,
+ WORD32 *nbr_flags,
+ WORD32 i4_layer_id,
+ WORD32 i4_row_block_no,
+ WORD32 i4_col_block_no,
+ ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list,
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list)
+{
+ WORD32 i, j;
+ WORD32 z_scan_idx = 0;
+ WORD32 z_scan_act_idx = 0;
+ ihevce_ed_blk_t *ps_ed_8x8;
+ UWORD8 *pu1_src_8x8;
+
+ WORD32 top_intra_modes[20];
+ WORD32 *top_intra_mode_ptr;
+ WORD32 *left_intra_mode_ptr = ps_ed_ctxt->left_ctb_intra_modes;
+
+ WORD32 *sad_ptr = &ps_ed_ctxt->sad[0];
+ WORD32 lambda = ps_ed_ctxt->lambda;
+ WORD32 *nbr_flags_ptr;
+ WORD32 i4_best_sad_cost_8x8_l1_ipe, i4_best_sad_8x8_l1_ipe, i4_sum_4x4_satd, i4_min_4x4_satd;
+
+ (void)num_4x4_blks_y;
+ (void)i4_row_block_no;
+ (void)i4_col_block_no;
+ ASSERT(num_4x4_blks_x % 2 == 0);
+ ASSERT(num_4x4_blks_y % 2 == 0);
+ ASSERT((num_4x4_blks_x == 4) || (num_4x4_blks_x == 8));
+ ASSERT((num_4x4_blks_y == 4) || (num_4x4_blks_y == 8));
+
+ if(i4_layer_id == 1)
+ {
+ WORD32 i4_i;
+
+ for(i4_i = 0; i4_i < 64; i4_i++)
+ {
+ (ps_ed_ctb + i4_i)->i4_4x4_satd = -1;
+ (ps_ed_ctb + i4_i)->i4_4x4_cur_satd = -1;
+ }
+
+ for(i4_i = 0; i4_i < 16; i4_i++)
+ {
+ ps_ed_ctb_l1->i4_sum_4x4_satd[i4_i] = -2;
+ ps_ed_ctb_l1->i4_min_4x4_satd[i4_i] = 0x7FFFFFFF;
+ ps_ed_ctb_l1->i4_8x8_satd[i4_i][0] = -2;
+ ps_ed_ctb_l1->i4_8x8_satd[i4_i][1] = -2;
+ }
+
+ for(i4_i = 0; i4_i < 4; i4_i++)
+ {
+ ps_ed_ctb_l1->i4_16x16_satd[i4_i][0] = -2;
+ ps_ed_ctb_l1->i4_16x16_satd[i4_i][1] = -2;
+ ps_ed_ctb_l1->i4_16x16_satd[i4_i][2] = -2;
+ }
+ ps_ed_ctb_l1->i4_32x32_satd[0][0] = -2;
+ ps_ed_ctb_l1->i4_32x32_satd[0][1] = -2;
+ ps_ed_ctb_l1->i4_32x32_satd[0][2] = -2;
+ ps_ed_ctb_l1->i4_32x32_satd[0][3] = -2;
+ for(i4_i = 0; i4_i < 16; i4_i++)
+ {
+ ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_me[i4_i] = -2;
+ ps_ed_ctb_l1->i4_sad_cost_me_for_ref[i4_i] = -2;
+ ps_ed_ctb_l1->i4_sad_me_for_ref[i4_i] = -2;
+ ps_ed_ctb_l1->i4_best_sad_8x8_l1_me[i4_i] = -2;
+
+ ps_ed_ctb_l1->i4_best_sad_8x8_l1_me_for_decide[i4_i] = -2;
+
+ ps_ed_ctb_l1->i4_best_satd_8x8[i4_i] = -2;
+ ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_ipe[i4_i] = -2;
+ ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[i4_i] = -2;
+ }
+ }
+ /*
+ * src scan happens in raster scan order. ps_ed update happens in z-scan order.
+ */
+ for(i = 0; i < num_4x4_blks_x; i++)
+ {
+ top_intra_modes[i] = INTRA_DC;
+ }
+ for(i = 0; i < num_4x4_blks_x / 2; i++)
+ {
+ pu1_src_8x8 = pu1_src + i * 2 * 4 * src_stride;
+ top_intra_mode_ptr = &top_intra_modes[0];
+ nbr_flags_ptr = &nbr_flags[0] + 2 * 8 * i;
+
+ for(j = 0; j < num_4x4_blks_x / 2; j++)
+ {
+ WORD32 i4_best_satd;
+ ASSERT(i <= 3);
+ ASSERT(j <= 3);
+
+ // Multiply i by 16 since the
+ // matrix is prepared for ctb_size = 64
+ z_scan_idx = gau1_ctb_raster_to_zscan[i * 2 * 16 + j * 2];
+ z_scan_act_idx = gau1_ctb_raster_to_zscan[i * 16 + j];
+ ASSERT(z_scan_act_idx <= 15);
+
+ ps_ed_8x8 = ps_ed_ctb + z_scan_idx;
+
+ ihevce_ed_calc_8x8_blk(
+ ps_ed_ctxt,
+ ps_ed_8x8,
+ pu1_src_8x8,
+ src_stride,
+ nbr_flags_ptr,
+ top_intra_mode_ptr,
+ left_intra_mode_ptr,
+ i * 8,
+ lambda,
+ sad_ptr + z_scan_idx * NUM_MODES,
+ &i4_best_satd,
+ i4_layer_id,
+ ps_ed_ctxt->i4_quality_preset,
+ ps_ed_ctxt->i4_slice_type,
+ &i4_best_sad_cost_8x8_l1_ipe,
+ &i4_best_sad_8x8_l1_ipe,
+ &i4_sum_4x4_satd,
+ &i4_min_4x4_satd,
+ ps_ipe_optimised_function_list,
+ ps_cmn_utils_optimised_function_list);
+
+ if(i4_layer_id == 1)
+ {
+ ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_ipe[z_scan_act_idx] =
+ i4_best_sad_cost_8x8_l1_ipe;
+ ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[z_scan_act_idx] = i4_best_sad_8x8_l1_ipe;
+ ps_ed_ctb_l1->i4_best_satd_8x8[z_scan_act_idx] = i4_best_satd;
+ ps_ed_ctxt->i8_sum_best_satd += i4_best_satd;
+ ps_ed_ctxt->i8_sum_sq_best_satd += (i4_best_satd * i4_best_satd);
+ //ps_ed_ctb_l1->i4_sum_4x4_satd[z_scan_act_idx] = i4_sum_4x4_satd;
+ //ps_ed_ctb_l1->i4_min_4x4_satd[z_scan_act_idx] = i4_min_4x4_satd;
+ }
+
+ pu1_src_8x8 += 8;
+ //ps_ed_8x8 += 4;
+ top_intra_mode_ptr += 2;
+ nbr_flags_ptr += 2;
+ }
+ left_intra_mode_ptr += 2;
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_ed_frame_init \endif
+*
+* \brief: Initialize frame context for early decision
+*
+*****************************************************************************
+*/
+void ihevce_ed_frame_init(void *pv_ed_ctxt, WORD32 i4_layer_no)
+{
+ ihevce_ed_ctxt_t *ps_ed_ctxt = (ihevce_ed_ctxt_t *)pv_ed_ctxt;
+
+ g_apf_lum_ip[IP_FUNC_MODE_0] = ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_planar_fptr;
+ g_apf_lum_ip[IP_FUNC_MODE_1] = ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_dc_fptr;
+ g_apf_lum_ip[IP_FUNC_MODE_2] = ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode2_fptr;
+ g_apf_lum_ip[IP_FUNC_MODE_3TO9] =
+ ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_3_to_9_fptr;
+ g_apf_lum_ip[IP_FUNC_MODE_10] = ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_horz_fptr;
+ g_apf_lum_ip[IP_FUNC_MODE_11TO17] =
+ ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_11_to_17_fptr;
+ g_apf_lum_ip[IP_FUNC_MODE_18_34] =
+ ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_18_34_fptr;
+ g_apf_lum_ip[IP_FUNC_MODE_19TO25] =
+ ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_19_to_25_fptr;
+ g_apf_lum_ip[IP_FUNC_MODE_26] = ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_ver_fptr;
+ g_apf_lum_ip[IP_FUNC_MODE_27TO33] =
+ ps_ed_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_27_to_33_fptr;
+
+ if(i4_layer_no == 1)
+ {
+ ps_ed_ctxt->i8_sum_best_satd = 0;
+ ps_ed_ctxt->i8_sum_sq_best_satd = 0;
+ }
+}
+
+/**
+********************************************************************************
+*
+* @brief downscales by 2 in horz and vertical direction, creates output of
+* size wd/2 * ht/2
+*
+* @param[in] pu1_src : source pointer
+* @param[in] src_stride : source stride
+* @param[out] pu1_dst : destination pointer. Starting of a row.
+* @param[in] dst_stride : destination stride
+* @param[in] wd : width
+* @param[in] ht : height
+* @param[in] pu1_wkg_mem : working memory (atleast of size CEIL16(wd) * ht))
+* @param[in] ht_offset : height offset of the block to be scaled
+* @param[in] block_ht : height of the block to be scaled
+* @param[in] wd_offset : width offset of the block to be scaled
+* @param[in] block_wd : width of the block to be scaled
+*
+* @return void
+*
+* @remarks Assumption made block_ht should me multiple of 2. LANCZOS_SCALER
+*
+********************************************************************************
+*/
+void ihevce_scaling_filter_mxn(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_scrtch,
+ WORD32 scrtch_strd,
+ UWORD8 *pu1_dst,
+ WORD32 dst_strd,
+ WORD32 ht,
+ WORD32 wd)
+{
+#define FILT_TAP_Q 8
+#define N_TAPS 7
+ const WORD16 i4_ftaps[N_TAPS] = { -18, 0, 80, 132, 80, 0, -18 };
+ WORD32 i, j;
+ WORD32 tmp;
+ UWORD8 *pu1_src_tmp = pu1_src - 3 * src_strd;
+ UWORD8 *pu1_scrtch_tmp = pu1_scrtch;
+
+ /* horizontal filtering */
+ for(i = -3; i < ht + 2; i++)
+ {
+ for(j = 0; j < wd; j += 2)
+ {
+ tmp = (i4_ftaps[3] * pu1_src_tmp[j] +
+ i4_ftaps[2] * (pu1_src_tmp[j - 1] + pu1_src_tmp[j + 1]) +
+ i4_ftaps[1] * (pu1_src_tmp[j + 2] + pu1_src_tmp[j - 2]) +
+ i4_ftaps[0] * (pu1_src_tmp[j + 3] + pu1_src_tmp[j - 3]) +
+ (1 << (FILT_TAP_Q - 1))) >>
+ FILT_TAP_Q;
+ pu1_scrtch_tmp[j >> 1] = CLIP_U8(tmp);
+ }
+ pu1_scrtch_tmp += scrtch_strd;
+ pu1_src_tmp += src_strd;
+ }
+ /* vertical filtering */
+ pu1_scrtch_tmp = pu1_scrtch + 3 * scrtch_strd;
+ for(i = 0; i < ht; i += 2)
+ {
+ for(j = 0; j < (wd >> 1); j++)
+ {
+ tmp =
+ (i4_ftaps[3] * pu1_scrtch_tmp[j] +
+ i4_ftaps[2] * (pu1_scrtch_tmp[j + scrtch_strd] + pu1_scrtch_tmp[j - scrtch_strd]) +
+ i4_ftaps[1] *
+ (pu1_scrtch_tmp[j + 2 * scrtch_strd] + pu1_scrtch_tmp[j - 2 * scrtch_strd]) +
+ i4_ftaps[0] *
+ (pu1_scrtch_tmp[j + 3 * scrtch_strd] + pu1_scrtch_tmp[j - 3 * scrtch_strd]) +
+ (1 << (FILT_TAP_Q - 1))) >>
+ FILT_TAP_Q;
+ pu1_dst[j] = CLIP_U8(tmp);
+ }
+ pu1_dst += dst_strd;
+ pu1_scrtch_tmp += (scrtch_strd << 1);
+ }
+}
+
+void ihevce_scale_by_2(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_dst,
+ WORD32 dst_strd,
+ WORD32 wd,
+ WORD32 ht,
+ UWORD8 *pu1_wkg_mem,
+ WORD32 ht_offset,
+ WORD32 block_ht,
+ WORD32 wd_offset,
+ WORD32 block_wd,
+ FT_COPY_2D *pf_copy_2d,
+ FT_SCALING_FILTER_BY_2 *pf_scaling_filter_mxn)
+{
+#define N_TAPS 7
+#define MAX_BLK_SZ (MAX_CTB_SIZE + ((N_TAPS >> 1) << 1))
+ UWORD8 au1_cpy[MAX_BLK_SZ * MAX_BLK_SZ];
+ UWORD32 cpy_strd = MAX_BLK_SZ;
+ UWORD8 *pu1_cpy = au1_cpy + cpy_strd * (N_TAPS >> 1) + (N_TAPS >> 1);
+
+ UWORD8 *pu1_in, *pu1_out;
+ WORD32 in_strd, wkg_mem_strd;
+
+ WORD32 row_start, row_end;
+ WORD32 col_start, col_end;
+ WORD32 i, fun_select;
+ WORD32 ht_tmp, wd_tmp;
+ FT_SCALING_FILTER_BY_2 *ihevce_scaling_filters[2];
+
+ assert((wd & 1) == 0);
+ assert((ht & 1) == 0);
+ assert(block_wd <= MAX_CTB_SIZE);
+ assert(block_ht <= MAX_CTB_SIZE);
+
+ /* function pointers for filtering different dimensions */
+ ihevce_scaling_filters[0] = ihevce_scaling_filter_mxn;
+ ihevce_scaling_filters[1] = pf_scaling_filter_mxn;
+
+ /* handle boundary blks */
+ col_start = (wd_offset < (N_TAPS >> 1)) ? 1 : 0;
+ row_start = (ht_offset < (N_TAPS >> 1)) ? 1 : 0;
+ col_end = ((wd_offset + block_wd) > (wd - (N_TAPS >> 1))) ? 1 : 0;
+ row_end = ((ht_offset + block_ht) > (ht - (N_TAPS >> 1))) ? 1 : 0;
+ if(col_end && (wd % block_wd != 0))
+ {
+ block_wd = (wd % block_wd);
+ }
+ if(row_end && (ht % block_ht != 0))
+ {
+ block_ht = (ht % block_ht);
+ }
+
+ /* boundary blks needs to be padded, copy src to tmp buffer */
+ if(col_start || col_end || row_end || row_start)
+ {
+ UWORD8 *pu1_src_tmp = pu1_src + wd_offset + ht_offset * src_strd;
+
+ pu1_cpy -= (3 * (1 - col_start) + cpy_strd * 3 * (1 - row_start));
+ pu1_src_tmp -= (3 * (1 - col_start) + src_strd * 3 * (1 - row_start));
+ ht_tmp = block_ht + 3 * (1 - row_start) + 3 * (1 - row_end);
+ wd_tmp = block_wd + 3 * (1 - col_start) + 3 * (1 - col_end);
+ pf_copy_2d(pu1_cpy, cpy_strd, pu1_src_tmp, src_strd, wd_tmp, ht_tmp);
+ pu1_in = au1_cpy + cpy_strd * 3 + 3;
+ in_strd = cpy_strd;
+ }
+ else
+ {
+ pu1_in = pu1_src + wd_offset + ht_offset * src_strd;
+ in_strd = src_strd;
+ }
+
+ /*top padding*/
+ if(row_start)
+ {
+ UWORD8 *pu1_cpy_tmp = au1_cpy + cpy_strd * 3;
+
+ pu1_cpy = au1_cpy + cpy_strd * (3 - 1);
+ memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6);
+ pu1_cpy -= cpy_strd;
+ memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6);
+ pu1_cpy -= cpy_strd;
+ memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6);
+ }
+
+ /*bottom padding*/
+ if(row_end)
+ {
+ UWORD8 *pu1_cpy_tmp = au1_cpy + cpy_strd * 3 + (block_ht - 1) * cpy_strd;
+
+ pu1_cpy = pu1_cpy_tmp + cpy_strd;
+ memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6);
+ pu1_cpy += cpy_strd;
+ memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6);
+ pu1_cpy += cpy_strd;
+ memcpy(pu1_cpy, pu1_cpy_tmp, block_wd + 6);
+ }
+
+ /*left padding*/
+ if(col_start)
+ {
+ UWORD8 *pu1_cpy_tmp = au1_cpy + 3;
+
+ pu1_cpy = au1_cpy;
+ for(i = 0; i < block_ht + 6; i++)
+ {
+ pu1_cpy[0] = pu1_cpy[1] = pu1_cpy[2] = pu1_cpy_tmp[0];
+ pu1_cpy += cpy_strd;
+ pu1_cpy_tmp += cpy_strd;
+ }
+ }
+
+ /*right padding*/
+ if(col_end)
+ {
+ UWORD8 *pu1_cpy_tmp = au1_cpy + 3 + block_wd - 1;
+
+ pu1_cpy = au1_cpy + 3 + block_wd;
+ for(i = 0; i < block_ht + 6; i++)
+ {
+ pu1_cpy[0] = pu1_cpy[1] = pu1_cpy[2] = pu1_cpy_tmp[0];
+ pu1_cpy += cpy_strd;
+ pu1_cpy_tmp += cpy_strd;
+ }
+ }
+
+ wkg_mem_strd = block_wd >> 1;
+ pu1_out = pu1_dst + (wd_offset >> 1);
+ fun_select = (block_wd % 16 == 0);
+ ihevce_scaling_filters[fun_select](
+ pu1_in, in_strd, pu1_wkg_mem, wkg_mem_strd, pu1_out, dst_strd, block_ht, block_wd);
+
+ /* Left padding of 16 for 1st block of every row */
+ if(wd_offset == 0)
+ {
+ UWORD8 u1_val;
+ WORD32 pad_wd = 16;
+ WORD32 pad_ht = block_ht >> 1;
+ UWORD8 *dst = pu1_dst;
+
+ for(i = 0; i < pad_ht; i++)
+ {
+ u1_val = dst[0];
+ memset(&dst[-pad_wd], u1_val, pad_wd);
+ dst += dst_strd;
+ }
+ }
+
+ if(wd == wd_offset + block_wd)
+ {
+ /* Right padding of (16 + (CEIL16(wd/2))-wd/2) for last block of every row */
+ /* Right padding is done only after processing of last block of that row is done*/
+ UWORD8 u1_val;
+ WORD32 pad_wd = 16 + CEIL16((wd >> 1)) - (wd >> 1) + 4;
+ WORD32 pad_ht = block_ht >> 1;
+ UWORD8 *dst = pu1_dst + (wd >> 1) - 1;
+
+ for(i = 0; i < pad_ht; i++)
+ {
+ u1_val = dst[0];
+ memset(&dst[1], u1_val, pad_wd);
+ dst += dst_strd;
+ }
+
+ if(ht_offset == 0)
+ {
+ /* Top padding of 16 is done for 1st row only after we reach end of that row */
+ WORD32 pad_wd = dst_strd;
+ WORD32 pad_ht = 16;
+ UWORD8 *dst = pu1_dst - 16;
+
+ for(i = 1; i <= pad_ht; i++)
+ {
+ memcpy(dst - (i * dst_strd), dst, pad_wd);
+ }
+ }
+
+ /* Bottom padding of (16 + (CEIL16(ht/2)) - ht/2) is done only if we have
+ reached end of frame */
+ if(ht - ht_offset - block_ht == 0)
+ {
+ WORD32 pad_wd = dst_strd;
+ WORD32 pad_ht = 16 + CEIL16((ht >> 1)) - (ht >> 1) + 4;
+ UWORD8 *dst = pu1_dst + (((block_ht >> 1) - 1) * dst_strd) - 16;
+
+ for(i = 1; i <= pad_ht; i++)
+ memcpy(dst + (i * dst_strd), dst, pad_wd);
+ }
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_decomp_pre_intra_process_row \endif
+*
+* \brief
+* Row level function which down scales a given row by 2 in horz and
+* vertical direction creates output of size wd/2 * ht/2.
+*
+* @param[in] pu1_src : soource pointer
+* @param[in] src_stride : source stride
+* @param[out] pu1_dst : desitnation pointer
+* @param[in] dst_stride : destination stride
+* @param[in] layer_wd : layer width
+* @param[in] layer_ht : layer height
+* @param[in] ht_offset : height offset of the block to be scaled
+* @param[in] block_ht : height of the block to be scaled
+* @param[in] wd_offset : width offset of the block to be scaled
+* @param[in] block_wd : width of the block to be scaled
+* @param[in] num_col_blks : number of col blks in that row
+*
+* \return None
+*
+* @NOTE : When decompositionis done from L1 to L2 pre intra analysis is
+* done on L1
+*
+*****************************************************************************
+*/
+void ihevce_decomp_pre_intra_process_row(
+ UWORD8 *pu1_src,
+ WORD32 src_stride,
+ UWORD8 *pu1_dst_decomp,
+ WORD32 dst_stride,
+ WORD32 layer_wd,
+ WORD32 layer_ht,
+ UWORD8 *pu1_wkg_mem,
+ WORD32 ht_offset,
+ WORD32 block_ht,
+ WORD32 block_wd,
+ WORD32 i4_cu_aligned_pic_wd,
+ WORD32 i4_cu_aligned_pic_ht,
+ WORD32 num_col_blks,
+ WORD32 layer_no,
+ ihevce_ed_ctxt_t *ps_ed_ctxt,
+ ihevce_ed_blk_t *ps_ed_row,
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_row,
+ ihevce_8x8_L0_satd_t *ps_layer0_cur_satd,
+ ihevce_8x8_L0_mean_t *ps_layer0_cur_mean,
+ WORD32 num_4x4_blks_ctb_y,
+ WORD32 num_4x4_blks_last_ctb_x,
+ WORD32 skip_decomp,
+ WORD32 skip_pre_intra,
+ WORD32 row_block_no,
+ WORD32 i4_enable_noise_detection,
+ ctb_analyse_t *ps_ctb_analyse,
+ ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list,
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list)
+{
+ WORD32 col_block_no;
+
+ //ihevce_ed_ctxt_t *ps_ed_ctxt = (ihevce_ed_ctxt_t *)pv_ed_ctxt;
+ UWORD8 *pu1_src_pre_intra = pu1_src + (ht_offset * src_stride);
+ WORD32 num_4x4_blks_in_ctb = block_wd >> 2;
+ //WORD32 nbr_flags[64];
+ WORD32 *nbr_flags_ptr = &ps_ed_ctxt->ai4_nbr_flags[0];
+ WORD32 src_inc_pre_intra = num_4x4_blks_in_ctb * 4;
+ WORD32 inc_ctb = 0;
+ ihevce_ed_blk_t *ps_ed_ctb = ps_ed_row;
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1 = ps_ed_ctb_l1_row;
+ WORD32 i, j;
+ WORD32 do_pre_intra_analysis;
+ pf_ed_calc_ctb ed_calc_ctb;
+ ctb_analyse_t *ps_ctb_analyse_curr;
+
+ (void)i4_cu_aligned_pic_wd;
+ (void)i4_cu_aligned_pic_ht;
+ (void)ps_layer0_cur_satd;
+ (void)ps_layer0_cur_mean;
+ (void)i4_enable_noise_detection;
+ /*increment the struct pointer to point to the first CTB of the current row. */
+ ps_ctb_analyse_curr = ps_ctb_analyse + row_block_no * num_col_blks;
+
+ //if((num_4x4_blks_ctb_x == num_4x4_blks_ctb_y) && (num_4x4_blks_in_ctb == num_4x4_blks_ctb_x) )
+ if(num_4x4_blks_in_ctb == num_4x4_blks_ctb_y)
+ {
+ ed_calc_ctb = ihevce_ed_calc_ctb;
+ }
+ else
+ {
+ ed_calc_ctb = ihevce_ed_calc_incomplete_ctb;
+ }
+
+ inc_ctb = num_4x4_blks_in_ctb * num_4x4_blks_in_ctb;
+
+ do_pre_intra_analysis = ((layer_no == 1) || (layer_no == 2)) && (!skip_pre_intra);
+
+ /*
+ * For optimal pre intra analysis first block is processed outside
+ * the loop.
+ */
+ if(!skip_decomp)
+ {
+ ihevce_scale_by_2(
+ pu1_src,
+ src_stride,
+ pu1_dst_decomp,
+ dst_stride,
+ layer_wd,
+ layer_ht,
+ pu1_wkg_mem,
+ ht_offset,
+ block_ht,
+ block_wd * 0,
+ block_wd,
+ ps_cmn_utils_optimised_function_list->pf_copy_2d,
+ ps_ipe_optimised_function_list->pf_scaling_filter_mxn);
+ /* Disable noise detection */
+ ps_ctb_analyse_curr->s_ctb_noise_params.i4_noise_present = 0;
+
+ memset(
+ ps_ctb_analyse_curr->s_ctb_noise_params.au1_is_8x8Blk_noisy,
+ 0,
+ sizeof(ps_ctb_analyse_curr->s_ctb_noise_params.au1_is_8x8Blk_noisy));
+ }
+
+ /*
+ * Pre intra analysis for the first ctb.
+ * To analyse any given CTB we need to set the availability flags of the
+ * following neighbouring CTB: BL,L,TL,T,TR.
+ */
+ if(do_pre_intra_analysis)
+ {
+ /*
+ * At the beginning of ctb row set left intra modes to default value.
+ */
+ for(j = 0; j < num_4x4_blks_ctb_y; j++)
+ {
+ ps_ed_ctxt->left_ctb_intra_modes[j] = INTRA_DC;
+ }
+
+ /*
+ * Copy the neighbor flags for a general ctb (ctb inside the frame; not any corners).
+ * The table gau4_nbr_flags_8x8_4x4blks generated for 16x16 4x4 blocks(ctb_size = 64).
+ * But the same table holds good for other 4x4 blocks 2d arrays(eg 8x8 4x4 blks,4x4 4x4blks).
+ * But the flags must be accessed with stride of 16 since the table has been generated for
+ * ctb_size = 64. For odd 4x4 2d arrays(eg 3x3 4x4 blks) the flags needs modification.
+ * The flags also need modification for corner ctbs.
+ */
+ memcpy(
+ ps_ed_ctxt->ai4_nbr_flags,
+ gau4_nbr_flags_8x8_4x4blks,
+ sizeof(gau4_nbr_flags_8x8_4x4blks));
+
+ /*
+ * Since this is the fist ctb in the ctb row, set left flags unavailable for 1st CTB col
+ */
+ for(j = 0; j < num_4x4_blks_ctb_y; j++)
+ {
+ SET_L_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j * 8]);
+ SET_BL_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j * 8]);
+ SET_TL_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j * 8]);
+ }
+ /*
+ * If this is the fist ctb row, set top flags unavailable.
+ */
+ if(ht_offset == 0)
+ {
+ for(j = 0; j < num_4x4_blks_in_ctb; j++)
+ {
+ SET_T_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j]);
+ SET_TR_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j]);
+ SET_TL_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j]);
+ }
+ }
+
+ /* If this is last ctb row,set BL as not available. */
+ if(ht_offset + block_ht >= layer_ht)
+ {
+ for(j = 0; j < num_4x4_blks_in_ctb; j++)
+ {
+ SET_BL_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[(num_4x4_blks_ctb_y - 1) * 8 + j]);
+ }
+ }
+ col_block_no = 0;
+ /* Call intra analysis for the ctb */
+ ed_calc_ctb(
+ ps_ed_ctxt,
+ ps_ed_ctb,
+ ps_ed_ctb_l1,
+ pu1_src_pre_intra,
+ src_stride,
+ num_4x4_blks_in_ctb,
+ num_4x4_blks_ctb_y,
+ nbr_flags_ptr,
+ layer_no,
+ row_block_no,
+ col_block_no,
+ ps_ipe_optimised_function_list,
+ ps_cmn_utils_optimised_function_list
+
+ );
+
+ pu1_src_pre_intra += src_inc_pre_intra;
+ ps_ed_ctb += inc_ctb;
+ ps_ed_ctb_l1 += 1;
+ /*
+ * For the rest of the ctbs, set left flags available.
+ */
+ for(j = 0; j < num_4x4_blks_ctb_y; j++)
+ {
+ SET_L_AVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j * 8]);
+ }
+ for(j = 0; j < num_4x4_blks_ctb_y - 1; j++)
+ {
+ SET_BL_AVAILABLE(ps_ed_ctxt->ai4_nbr_flags[j * 8]);
+ SET_TL_AVAILABLE(ps_ed_ctxt->ai4_nbr_flags[(j + 1) * 8]);
+ }
+ if(ht_offset != 0)
+ {
+ SET_TL_AVAILABLE(ps_ed_ctxt->ai4_nbr_flags[0]);
+ }
+ }
+
+ /* The first ctb is processed before the loop.
+ * The last one is processed after the loop.
+ */
+ for(col_block_no = 1; col_block_no < num_col_blks - 1; col_block_no++)
+ {
+ if(!skip_decomp)
+ {
+ ihevce_scale_by_2(
+ pu1_src,
+ src_stride,
+ pu1_dst_decomp,
+ dst_stride,
+ layer_wd,
+ layer_ht,
+ pu1_wkg_mem,
+ ht_offset,
+ block_ht,
+ block_wd * col_block_no,
+ block_wd,
+ ps_cmn_utils_optimised_function_list->pf_copy_2d,
+ ps_ipe_optimised_function_list->pf_scaling_filter_mxn);
+ /* Disable noise detection */
+ memset(
+ ps_ctb_analyse_curr->s_ctb_noise_params.au1_is_8x8Blk_noisy,
+ 0,
+ sizeof(ps_ctb_analyse_curr->s_ctb_noise_params.au1_is_8x8Blk_noisy));
+
+ ps_ctb_analyse_curr->s_ctb_noise_params.i4_noise_present = 0;
+ }
+
+ if(do_pre_intra_analysis)
+ {
+ ed_calc_ctb(
+ ps_ed_ctxt,
+ ps_ed_ctb,
+ ps_ed_ctb_l1,
+ pu1_src_pre_intra,
+ src_stride,
+ num_4x4_blks_in_ctb,
+ num_4x4_blks_ctb_y,
+ nbr_flags_ptr,
+ layer_no,
+ row_block_no,
+ col_block_no,
+ ps_ipe_optimised_function_list,
+ ps_cmn_utils_optimised_function_list);
+ pu1_src_pre_intra += src_inc_pre_intra;
+ ps_ed_ctb += inc_ctb;
+ ps_ed_ctb_l1 += 1;
+ }
+ }
+
+ /* Last ctb in row */
+ if((!skip_decomp) && (col_block_no == (num_col_blks - 1)))
+ {
+ ihevce_scale_by_2(
+ pu1_src,
+ src_stride,
+ pu1_dst_decomp,
+ dst_stride,
+ layer_wd,
+ layer_ht,
+ pu1_wkg_mem,
+ ht_offset,
+ block_ht,
+ block_wd * col_block_no,
+ block_wd,
+ ps_cmn_utils_optimised_function_list->pf_copy_2d,
+ ps_ipe_optimised_function_list->pf_scaling_filter_mxn);
+ {
+ /* Disable noise detection */
+ memset(
+ ps_ctb_analyse_curr->s_ctb_noise_params.au1_is_8x8Blk_noisy,
+ 0,
+ sizeof(ps_ctb_analyse_curr->s_ctb_noise_params.au1_is_8x8Blk_noisy));
+
+ ps_ctb_analyse_curr->s_ctb_noise_params.i4_noise_present = 0;
+ }
+ }
+
+ if(do_pre_intra_analysis && (col_block_no == (num_col_blks - 1)))
+ {
+ /*
+ * The last ctb can be complete or incomplete. The complete
+ * ctb is handled in the if and incomplete is handled in the
+ * else case
+ */
+ //if(num_4x4_blks_last_ctb == num_4x4_blks_in_ctb)
+ if((num_4x4_blks_last_ctb_x == num_4x4_blks_ctb_y) &&
+ (num_4x4_blks_in_ctb == num_4x4_blks_last_ctb_x))
+ {
+ /* Last ctb so set top right not available */
+ SET_TR_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[num_4x4_blks_in_ctb - 1]);
+
+ ed_calc_ctb(
+ ps_ed_ctxt,
+ ps_ed_ctb,
+ ps_ed_ctb_l1,
+ pu1_src_pre_intra,
+ src_stride,
+ num_4x4_blks_in_ctb,
+ num_4x4_blks_in_ctb,
+ nbr_flags_ptr,
+ layer_no,
+ row_block_no,
+ col_block_no,
+ ps_ipe_optimised_function_list,
+ ps_cmn_utils_optimised_function_list);
+ pu1_src_pre_intra += src_inc_pre_intra;
+ ps_ed_ctb += inc_ctb;
+ ps_ed_ctb_l1 += 1;
+ }
+ else
+ {
+ /* Last ctb so set top right not available */
+ for(i = 0; i < num_4x4_blks_ctb_y; i++)
+ {
+ SET_TR_UNAVAILABLE(ps_ed_ctxt->ai4_nbr_flags[i * 8 + num_4x4_blks_in_ctb - 1]);
+ }
+
+ ihevce_ed_calc_incomplete_ctb(
+ ps_ed_ctxt,
+ ps_ed_ctb,
+ ps_ed_ctb_l1,
+ pu1_src_pre_intra,
+ src_stride,
+ num_4x4_blks_last_ctb_x,
+ num_4x4_blks_ctb_y,
+ nbr_flags_ptr,
+ layer_no,
+ row_block_no,
+ col_block_no,
+ ps_ipe_optimised_function_list,
+ ps_cmn_utils_optimised_function_list);
+ }
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_decomp_pre_intra_process \endif
+*
+* \brief
+* Frame level function to decompose given layer L0 into coarser layers
+*
+* \param[in] pv_ctxt : pointer to master context of decomp_pre_intra module
+* \param[in] ps_inp : pointer to input yuv buffer (frame buffer)
+* \param[in] pv_multi_thrd_ctxt : pointer to multithread context
+* \param[out] thrd_id : thread id
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_decomp_pre_intra_process(
+ void *pv_ctxt,
+ ihevce_lap_output_params_t *ps_lap_out_prms,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ void *pv_multi_thrd_ctxt,
+ WORD32 thrd_id,
+ WORD32 i4_ping_pong,
+ ihevce_8x8_L0_satd_t *ps_layer0_cur_satd,
+ ihevce_8x8_L0_mean_t *ps_layer0_cur_mean)
+{
+ WORD32 i4_layer_no;
+ WORD32 i4_num_layers;
+ WORD32 end_of_layer;
+ UWORD8 *pu1_src, *pu1_dst;
+ WORD32 src_stride, dst_stride;
+ WORD32 i4_layer_wd, i4_layer_ht;
+ WORD32 ht_offset, block_ht;
+ WORD32 row_block_no, num_row_blocks;
+ UWORD8 *pu1_wkg_mem;
+ WORD32 block_wd;
+ WORD32 num_col_blks;
+ WORD32 skip_decomp, skip_pre_intra;
+ WORD32 i4_cu_aligned_pic_wd, i4_cu_aligned_pic_ht;
+ ihevce_decomp_pre_intra_master_ctxt_t *ps_master_ctxt =
+ (ihevce_decomp_pre_intra_master_ctxt_t *)pv_ctxt;
+
+ ihevce_decomp_pre_intra_ctxt_t *ps_ctxt =
+ ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[thrd_id];
+ multi_thrd_ctxt_t *ps_multi_thrd = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
+
+ ihevce_ed_ctxt_t *ps_ed_ctxt;
+ ihevce_ed_blk_t *ps_ed;
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1;
+ WORD32 inc_ctb = 0;
+ WORD32 num_4x4_blks_lyr;
+
+ i4_num_layers = ps_ctxt->i4_num_layers;
+
+ ASSERT(i4_num_layers >= 3);
+
+ /*
+ * Always force minimum layers as 4 so that we would have both l1 and l2
+ * pre intra analysis
+ */
+ if(i4_num_layers == 3)
+ {
+ i4_num_layers = 4;
+ }
+
+ ps_ctxt->as_layers[0].pu1_inp = (UWORD8 *)ps_lap_out_prms->s_input_buf.pv_y_buf;
+ ps_ctxt->as_layers[0].i4_inp_stride = ps_lap_out_prms->s_input_buf.i4_y_strd;
+ ps_ctxt->as_layers[0].i4_actual_wd = ps_lap_out_prms->s_input_buf.i4_y_wd;
+ ps_ctxt->as_layers[0].i4_actual_ht = ps_lap_out_prms->s_input_buf.i4_y_ht;
+
+ /* ------------ Loop over all the layers --------------- */
+ /* This loop does only decomp for all layers by picking jobs from job queue */
+ /* Decomp for all layers will completed with this for loop */
+ for(i4_layer_no = 0; i4_layer_no < (i4_num_layers - 1); i4_layer_no++)
+ {
+ WORD32 idx = 0;
+ src_stride = ps_ctxt->as_layers[i4_layer_no].i4_inp_stride;
+ pu1_src = ps_ctxt->as_layers[i4_layer_no].pu1_inp;
+ i4_layer_wd = ps_ctxt->as_layers[i4_layer_no].i4_actual_wd;
+ i4_layer_ht = ps_ctxt->as_layers[i4_layer_no].i4_actual_ht;
+ pu1_dst = ps_ctxt->as_layers[i4_layer_no + 1].pu1_inp;
+ dst_stride = ps_ctxt->as_layers[i4_layer_no + 1].i4_inp_stride;
+ block_wd = ps_ctxt->as_layers[i4_layer_no].i4_decomp_blk_wd;
+ block_ht = ps_ctxt->as_layers[i4_layer_no].i4_decomp_blk_ht;
+ num_col_blks = ps_ctxt->as_layers[i4_layer_no].i4_num_col_blks;
+ num_row_blocks = ps_ctxt->as_layers[i4_layer_no].i4_num_row_blks;
+ i4_cu_aligned_pic_wd = ps_frm_ctb_prms->i4_cu_aligned_pic_wd;
+ i4_cu_aligned_pic_ht = ps_frm_ctb_prms->i4_cu_aligned_pic_ht;
+
+ /* register ed_ctxt buffer pointer */
+ //pv_ed_ctxt = &ps_ctxt->as_layers[i4_layer_no].s_early_decision;
+ //ps_ed_ctxt = (ihevce_ed_ctxt_t *)pv_ed_ctxt;
+ //ps_ed = ps_ed_ctxt->ps_ed;
+
+ //pv_ed_ctxt = &ps_ctxt->ps_ed_ctxt;
+ ps_ed_ctxt = ps_ctxt->ps_ed_ctxt;
+
+ /* initialize ed_ctxt here */
+ /* init is moved here since now allocation is happening for only one instance
+ is allocated. for each layer it is re-used */
+ ps_ed_ctxt->lambda = ps_ctxt->ai4_lambda[i4_layer_no];
+ ps_ed_ctxt->i4_slice_type = ps_ctxt->i4_slice_type;
+ ps_ed_ctxt->level = ps_ctxt->i4_codec_level;
+ if(0 == i4_layer_no)
+ {
+ ps_ed_ctxt->ps_ed_pic = NULL;
+ ps_ed_ctxt->ps_ed = NULL;
+ ps_ed_ctxt->ps_ed_ctb_l1_pic = NULL;
+ ps_ed_ctxt->ps_ed_ctb_l1 = NULL;
+ }
+ else if(1 == i4_layer_no)
+ {
+ ps_ed_ctxt->ps_ed_pic = ps_ctxt->ps_layer1_buf;
+ ps_ed_ctxt->ps_ed = ps_ctxt->ps_layer1_buf;
+ ps_ed_ctxt->ps_ed_ctb_l1_pic = ps_ctxt->ps_ed_ctb_l1;
+ ps_ed_ctxt->ps_ed_ctb_l1 = ps_ctxt->ps_ed_ctb_l1;
+ ps_ctxt->ps_layer0_cur_satd = NULL;
+ ps_ctxt->ps_layer0_cur_mean = NULL;
+ }
+ else if(2 == i4_layer_no)
+ {
+ ps_ed_ctxt->ps_ed_pic = ps_ctxt->ps_layer2_buf;
+ ps_ed_ctxt->ps_ed = ps_ctxt->ps_layer2_buf;
+ ps_ed_ctxt->ps_ed_ctb_l1_pic = NULL;
+ ps_ed_ctxt->ps_ed_ctb_l1 = NULL;
+ ps_ctxt->ps_layer0_cur_satd = NULL;
+ ps_ctxt->ps_layer0_cur_mean = NULL;
+ }
+
+ /*Calculate the number of 4x4 blocks in a CTB in that layer*/
+ /*Divide block_wd by 4. 4 to get no of 4x4 blks*/
+ num_4x4_blks_lyr = block_wd >> 2;
+ inc_ctb = num_4x4_blks_lyr * num_4x4_blks_lyr;
+
+ ps_ed = ps_ed_ctxt->ps_ed;
+ ps_ed_ctb_l1 = ps_ed_ctxt->ps_ed_ctb_l1;
+
+ end_of_layer = 0;
+ skip_decomp = 0;
+ skip_pre_intra = 1;
+ //if( i4_layer_no >= ps_ctxt->i4_num_layers)
+ if(i4_layer_no >= (ps_ctxt->i4_num_layers - 1))
+ {
+ skip_decomp = 1;
+ }
+ /* ------------ Loop over all the CTB rows --------------- */
+ while(0 == end_of_layer)
+ {
+ job_queue_t *ps_pre_enc_job;
+ WORD32 num_4x4_blks_ctb_y = 0;
+ WORD32 num_4x4_blks_last_ctb_x = 0;
+
+ /* Get the current row from the job queue */
+ ps_pre_enc_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job(
+ pv_multi_thrd_ctxt, (DECOMP_JOB_LYR0 + i4_layer_no), 1, i4_ping_pong);
+
+ pu1_wkg_mem = ps_ctxt->pu1_wkg_mem;
+
+ /* If all rows are done, set the end of layer flag to 1, */
+ if(NULL == ps_pre_enc_job)
+ {
+ end_of_layer = 1;
+ }
+ else
+ {
+ /* Obtain the current row's details from the job */
+ row_block_no = ps_pre_enc_job->s_job_info.s_decomp_job_info.i4_vert_unit_row_no;
+ ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx] = row_block_no;
+ ht_offset = row_block_no * block_ht;
+
+ if(row_block_no < (num_row_blocks))
+ {
+ pu1_dst = ps_ctxt->as_layers[i4_layer_no + 1].pu1_inp +
+ ((block_ht >> 1) * dst_stride * row_block_no);
+
+ /*L0 8x8 curr satd for qp mod*/
+ if(i4_layer_no == 0)
+ {
+ ps_ctxt->ps_layer0_cur_satd =
+ ps_layer0_cur_satd + (row_block_no * num_col_blks /*num ctbs*/ *
+ (block_wd >> 3) * (block_ht >> 3));
+ ps_ctxt->ps_layer0_cur_mean =
+ ps_layer0_cur_mean + (row_block_no * num_col_blks /*num ctbs*/ *
+ (block_wd >> 3) * (block_ht >> 3));
+ }
+
+ /* call the row level processing function */
+ ihevce_decomp_pre_intra_process_row(
+ pu1_src,
+ src_stride,
+ pu1_dst,
+ dst_stride,
+ i4_layer_wd,
+ i4_layer_ht,
+ pu1_wkg_mem,
+ ht_offset,
+ block_ht,
+ block_wd,
+ i4_cu_aligned_pic_wd,
+ i4_cu_aligned_pic_ht,
+ num_col_blks,
+ i4_layer_no,
+ ps_ed_ctxt,
+ ps_ed,
+ ps_ed_ctb_l1,
+ ps_ctxt->ps_layer0_cur_satd,
+ ps_ctxt->ps_layer0_cur_mean,
+ num_4x4_blks_ctb_y,
+ num_4x4_blks_last_ctb_x,
+ skip_decomp,
+ skip_pre_intra,
+ row_block_no,
+ ps_ctxt->i4_enable_noise_detection,
+ ps_ctxt->ps_ctb_analyse,
+ &ps_ctxt->s_ipe_optimised_function_list,
+ &ps_ctxt->s_cmn_opt_func);
+
+ /*When decompositionis done from L1 to L2
+ pre intra analysis is done on L1*/
+ if(i4_layer_no == 1 || i4_layer_no == 2)
+ {
+ // ps_ed = ps_ed_ctxt->ps_ed +
+ // (row_block_no * inc_ctb * (num_col_blks));
+ }
+ }
+ idx++;
+ /* set the output dependency */
+ ihevce_pre_enc_grp_job_set_out_dep(
+ pv_multi_thrd_ctxt, ps_pre_enc_job, i4_ping_pong);
+ }
+ }
+ ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed = idx;
+
+ ihevce_ed_frame_init(ps_ed_ctxt, i4_layer_no);
+
+ if((1 == i4_layer_no) && (IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset))
+ {
+ WORD32 vert_ctr, ctb_ctr, i;
+ WORD32 ctb_ctr_blks = ps_ctxt->as_layers[1].i4_num_col_blks;
+ WORD32 vert_ctr_blks = ps_ctxt->as_layers[1].i4_num_row_blks;
+
+ if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6) &&
+ (ps_lap_out_prms->i4_temporal_lyr_id > TEMPORAL_LAYER_DISABLE))
+ {
+ for(vert_ctr = 0; vert_ctr < vert_ctr_blks; vert_ctr++)
+ {
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_row_l1 =
+ ps_ctxt->ps_ed_ctb_l1 + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
+
+ for(ctb_ctr = 0; ctb_ctr < ctb_ctr_blks; ctb_ctr++)
+ {
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_curr_l1 = ps_ed_ctb_row_l1 + ctb_ctr;
+ for(i = 0; i < 16; i++)
+ {
+ ps_ed_ctb_curr_l1->i4_best_sad_cost_8x8_l1_ipe[i] = 0x7fffffff;
+ ps_ed_ctb_curr_l1->i4_best_sad_8x8_l1_ipe[i] = 0x7fffffff;
+ }
+ }
+ }
+ }
+ }
+
+#if DISABLE_L2_IPE_IN_PB_L1_IN_B
+ if(((2 == i4_layer_no) && (ps_lap_out_prms->i4_pic_type == IV_I_FRAME ||
+ ps_lap_out_prms->i4_pic_type == IV_IDR_FRAME)) ||
+ ((1 == i4_layer_no) &&
+ (ps_lap_out_prms->i4_temporal_lyr_id <= TEMPORAL_LAYER_DISABLE)) ||
+ ((IHEVCE_QUALITY_P6 != ps_ctxt->i4_quality_preset) && (0 != i4_layer_no)))
+#else
+ if((0 != i4_layer_no) &&
+ (1 != ((IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset) &&
+ (ps_lap_out_prms->i4_temporal_lyr_id > TEMPORAL_LAYER_DISABLE))))
+#endif
+ {
+ WORD32 i4_num_rows = ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed;
+
+ src_stride = ps_ctxt->as_layers[i4_layer_no].i4_inp_stride;
+ pu1_src = ps_ctxt->as_layers[i4_layer_no].pu1_inp;
+ i4_layer_wd = ps_ctxt->as_layers[i4_layer_no].i4_actual_wd;
+ i4_layer_ht = ps_ctxt->as_layers[i4_layer_no].i4_actual_ht;
+ pu1_dst = ps_ctxt->as_layers[i4_layer_no + 1].pu1_inp;
+ dst_stride = ps_ctxt->as_layers[i4_layer_no + 1].i4_inp_stride;
+ block_wd = ps_ctxt->as_layers[i4_layer_no].i4_decomp_blk_wd;
+ block_ht = ps_ctxt->as_layers[i4_layer_no].i4_decomp_blk_ht;
+ num_col_blks = ps_ctxt->as_layers[i4_layer_no].i4_num_col_blks;
+ num_row_blocks = ps_ctxt->as_layers[i4_layer_no].i4_num_row_blks;
+ i4_cu_aligned_pic_wd = ps_frm_ctb_prms->i4_cu_aligned_pic_wd;
+ i4_cu_aligned_pic_ht = ps_frm_ctb_prms->i4_cu_aligned_pic_ht;
+
+ /* register ed_ctxt buffer pointer */
+ ps_ed_ctxt = ps_ctxt->ps_ed_ctxt;
+
+ /* initialize ed_ctxt here */
+ /* init is moved here since now allocation is happening for only one instance
+ is allocated. for each layer it is re-used */
+ ps_ed_ctxt->lambda = ps_ctxt->ai4_lambda[i4_layer_no];
+ ps_ed_ctxt->i4_slice_type = ps_ctxt->i4_slice_type;
+ ps_ed_ctxt->level = ps_ctxt->i4_codec_level;
+ if(1 == i4_layer_no)
+ {
+ ps_ed_ctxt->ps_ed_pic = ps_ctxt->ps_layer1_buf;
+ ps_ed_ctxt->ps_ed = ps_ctxt->ps_layer1_buf;
+ ps_ed_ctxt->ps_ed_ctb_l1_pic = ps_ctxt->ps_ed_ctb_l1;
+ ps_ed_ctxt->ps_ed_ctb_l1 = ps_ctxt->ps_ed_ctb_l1;
+ ps_ctxt->ps_layer0_cur_satd = NULL;
+ ps_ctxt->ps_layer0_cur_mean = NULL;
+ }
+ else if(2 == i4_layer_no)
+ {
+ ps_ed_ctxt->ps_ed_pic = ps_ctxt->ps_layer2_buf;
+ ps_ed_ctxt->ps_ed = ps_ctxt->ps_layer2_buf;
+ ps_ed_ctxt->ps_ed_ctb_l1_pic = NULL;
+ ps_ed_ctxt->ps_ed_ctb_l1 = NULL;
+ ps_ctxt->ps_layer0_cur_satd = NULL;
+ ps_ctxt->ps_layer0_cur_mean = NULL;
+ }
+
+ /*Calculate the number of 4x4 blocks in a CTB in that layer*/
+ /*Divide block_wd by 4. 4 to get no of 4x4 blks*/
+ num_4x4_blks_lyr = block_wd >> 2;
+ inc_ctb = num_4x4_blks_lyr * num_4x4_blks_lyr;
+
+ ps_ed = ps_ed_ctxt->ps_ed;
+ ps_ed_ctb_l1 = ps_ed_ctxt->ps_ed_ctb_l1;
+ skip_decomp = 1;
+ skip_pre_intra = 0;
+ for(idx = 0; idx < i4_num_rows; idx++)
+ {
+ WORD32 num_4x4_blks_ctb_y = 0;
+ WORD32 num_4x4_blks_last_ctb_x = 0;
+
+ pu1_wkg_mem = ps_ctxt->pu1_wkg_mem;
+
+ {
+ /* Obtain the current row's details from the job */
+ row_block_no = ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx];
+ ht_offset = row_block_no * block_ht;
+
+ if(row_block_no < (num_row_blocks))
+ {
+ pu1_dst = ps_ctxt->as_layers[i4_layer_no + 1].pu1_inp +
+ ((block_ht >> 1) * dst_stride * row_block_no);
+
+ if(i4_layer_no == 1 || i4_layer_no == 2)
+ {
+ ps_ed = ps_ed_ctxt->ps_ed + (row_block_no * inc_ctb * (num_col_blks));
+ ps_ed_ctb_l1 = ps_ed_ctxt->ps_ed_ctb_l1 + (row_block_no * num_col_blks);
+
+ ps_ed_ctxt->i4_quality_preset = ps_ctxt->i4_quality_preset;
+ num_4x4_blks_ctb_y = block_ht >> 2;
+ num_4x4_blks_last_ctb_x = block_wd >> 2;
+
+ if(row_block_no == num_row_blocks - 1)
+ {
+ if(i4_layer_ht % block_ht)
+ {
+ num_4x4_blks_ctb_y = ((i4_layer_ht % block_ht) + 3) >> 2;
+ }
+ }
+
+ if(i4_layer_wd % block_wd)
+ {
+ num_4x4_blks_last_ctb_x = ((i4_layer_wd % block_wd) + 3) >> 2;
+ }
+ }
+
+ /* call the row level processing function */
+ ihevce_decomp_pre_intra_process_row(
+ pu1_src,
+ src_stride,
+ pu1_dst,
+ dst_stride,
+ i4_layer_wd,
+ i4_layer_ht,
+ pu1_wkg_mem,
+ ht_offset,
+ block_ht,
+ block_wd,
+ i4_cu_aligned_pic_wd,
+ i4_cu_aligned_pic_ht,
+ num_col_blks,
+ i4_layer_no,
+ ps_ed_ctxt,
+ ps_ed,
+ ps_ed_ctb_l1,
+ ps_ctxt->ps_layer0_cur_satd,
+ ps_ctxt->ps_layer0_cur_mean,
+ num_4x4_blks_ctb_y,
+ num_4x4_blks_last_ctb_x,
+ skip_decomp,
+ skip_pre_intra,
+ row_block_no,
+ 0,
+ NULL,
+ &ps_ctxt->s_ipe_optimised_function_list,
+ &ps_ctxt->s_cmn_opt_func);
+ }
+ }
+ if(1 == i4_layer_no)
+ {
+ ps_multi_thrd->aai4_l1_pre_intra_done[i4_ping_pong][row_block_no] = 1;
+ }
+ }
+ for(idx = 0; idx < MAX_NUM_CTB_ROWS_FRM; idx++)
+ {
+ ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx] = -1;
+ }
+ ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed = 0;
+ }
+
+#if DISABLE_L2_IPE_IN_PB_L1_IN_B
+ if((IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset) &&
+ (((i4_layer_no == 2) && (ps_lap_out_prms->i4_pic_type == ISLICE)) ||
+ ((i4_layer_no == 1) && (ps_lap_out_prms->i4_temporal_lyr_id > TEMPORAL_LAYER_DISABLE))))
+ {
+ WORD32 i4_num_rows = ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed;
+ if(1 == i4_layer_no)
+ {
+ for(idx = 0; idx < i4_num_rows; idx++)
+ {
+ row_block_no = ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx];
+
+ {
+ ps_multi_thrd->aai4_l1_pre_intra_done[i4_ping_pong][row_block_no] = 1;
+ }
+ }
+ }
+ for(idx = 0; idx < MAX_NUM_CTB_ROWS_FRM; idx++)
+ {
+ ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx] = -1;
+ }
+ ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed = 0;
+ }
+#else
+ if((i4_layer_no != 0) && ((IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset) &&
+ (ps_lap_out_prms->i4_temporal_lyr_id > TEMPORAL_LAYER_DISABLE)))
+ {
+ WORD32 i4_num_rows = ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed;
+ for(idx = 0; idx < i4_num_rows; idx++)
+ {
+ row_block_no = ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx];
+ if(1 == i4_layer_no)
+ {
+ ps_multi_thrd->aai4_l1_pre_intra_done[i4_ping_pong][row_block_no] = 1;
+ }
+ }
+ for(idx = 0; idx < MAX_NUM_CTB_ROWS_FRM; idx++)
+ {
+ ps_ctxt->as_layers[i4_layer_no].ai4_curr_row_no[idx] = -1;
+ }
+ ps_ctxt->as_layers[i4_layer_no].i4_num_rows_processed = 0;
+ }
+#endif
+ }
+}
+
+/*!
+************************************************************************
+* \brief
+* return number of records used by decomp pre intra
+*
+************************************************************************
+*/
+WORD32 ihevce_decomp_pre_intra_get_num_mem_recs(void)
+{
+ return (NUM_DECOMP_PRE_INTRA_MEM_RECS);
+}
+
+/*!
+************************************************************************
+* @brief
+* return each record attributes of decomp pre intra
+************************************************************************
+*/
+WORD32 ihevce_decomp_pre_intra_get_mem_recs(
+ iv_mem_rec_t *ps_mem_tab, WORD32 i4_num_proc_thrds, WORD32 i4_mem_space)
+{
+ /* memories should be requested assuming worst case requirememnts */
+
+ /* Module context structure */
+ ps_mem_tab[DECOMP_PRE_INTRA_CTXT].i4_mem_size = sizeof(ihevce_decomp_pre_intra_master_ctxt_t);
+ ps_mem_tab[DECOMP_PRE_INTRA_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+ ps_mem_tab[DECOMP_PRE_INTRA_CTXT].i4_mem_alignment = 8;
+
+ /* Thread context structure */
+ ps_mem_tab[DECOMP_PRE_INTRA_THRDS_CTXT].i4_mem_size =
+ i4_num_proc_thrds * sizeof(ihevce_decomp_pre_intra_ctxt_t);
+ ps_mem_tab[DECOMP_PRE_INTRA_THRDS_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+ ps_mem_tab[DECOMP_PRE_INTRA_THRDS_CTXT].i4_mem_alignment = 8;
+
+ /* early decision context structure */
+ ps_mem_tab[DECOMP_PRE_INTRA_ED_CTXT].i4_mem_size = i4_num_proc_thrds * sizeof(ihevce_ed_ctxt_t);
+ ps_mem_tab[DECOMP_PRE_INTRA_ED_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+ ps_mem_tab[DECOMP_PRE_INTRA_ED_CTXT].i4_mem_alignment = 8;
+
+ return (NUM_DECOMP_PRE_INTRA_MEM_RECS);
+}
+
+/*!
+************************************************************************
+* @brief
+* Init decomp pre intra context
+************************************************************************
+*/
+void *ihevce_decomp_pre_intra_init(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ WORD32 i4_num_proc_thrds,
+ func_selector_t *ps_func_selector,
+ WORD32 i4_resolution_id,
+ UWORD8 u1_is_popcnt_available)
+{
+ ihevce_decomp_pre_intra_master_ctxt_t *ps_master_ctxt;
+ ihevce_decomp_pre_intra_ctxt_t *ps_ctxt;
+ WORD32 thread_no;
+ WORD32 n_tot_layers;
+ WORD32 count;
+ WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS], layer_no;
+ WORD32 a_disp_wd[MAX_NUM_LAYERS], a_disp_ht[MAX_NUM_LAYERS];
+ ihevce_ed_ctxt_t *ps_ed_ctxt;
+ WORD32 min_cu_size;
+
+ /* get the min cu size from config params */
+ min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size;
+
+ min_cu_size = 1 << min_cu_size;
+
+ /* Get the height and width of each layer */
+ *a_wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
+ SET_CTB_ALIGN(
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size);
+ *a_ht =
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
+ SET_CTB_ALIGN(
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size);
+
+ n_tot_layers = hme_derive_num_layers(1, a_wd, a_ht, a_disp_wd, a_disp_ht);
+
+ /* Decomp state structure */
+ ps_master_ctxt =
+ (ihevce_decomp_pre_intra_master_ctxt_t *)ps_mem_tab[DECOMP_PRE_INTRA_CTXT].pv_base;
+ ps_master_ctxt->i4_num_proc_thrds = i4_num_proc_thrds;
+
+ ps_ctxt = (ihevce_decomp_pre_intra_ctxt_t *)ps_mem_tab[DECOMP_PRE_INTRA_THRDS_CTXT].pv_base;
+ ps_ed_ctxt = (ihevce_ed_ctxt_t *)ps_mem_tab[DECOMP_PRE_INTRA_ED_CTXT].pv_base;
+
+ for(thread_no = 0; thread_no < ps_master_ctxt->i4_num_proc_thrds; thread_no++)
+ {
+ ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[thread_no] = ps_ctxt;
+
+ ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[thread_no]->i4_num_layers = n_tot_layers;
+
+ ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[thread_no]->pu1_wkg_mem =
+ &ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[thread_no]->au1_wkg_mem[0];
+
+ ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[thread_no]->ps_ed_ctxt = ps_ed_ctxt;
+
+ for(layer_no = 0; layer_no < n_tot_layers; layer_no++)
+ {
+ WORD32 max_ctb_size;
+ WORD32 decomp_blk_ht, decomp_blk_wd;
+
+ ps_ctxt->as_layers[layer_no].i4_actual_wd = a_wd[layer_no];
+ ps_ctxt->as_layers[layer_no].i4_actual_ht = a_ht[layer_no];
+ ps_ctxt->as_layers[layer_no].i4_inp_stride = 0;
+ ps_ctxt->as_layers[layer_no].pu1_inp = NULL;
+ ps_ctxt->as_layers[layer_no].i4_num_rows_processed = 0;
+
+ for(count = 0; count < MAX_NUM_CTB_ROWS_FRM; count++)
+ {
+ ps_ctxt->as_layers[layer_no].ai4_curr_row_no[count] = -1;
+ }
+ if(0 == layer_no)
+ {
+ ps_ctxt->as_layers[layer_no].i4_padded_ht = a_ht[layer_no];
+ ps_ctxt->as_layers[layer_no].i4_padded_wd = a_wd[layer_no];
+ }
+ else
+ {
+ ps_ctxt->as_layers[layer_no].i4_padded_ht = a_ht[layer_no] + 32 + 4;
+ ps_ctxt->as_layers[layer_no].i4_padded_wd = a_wd[layer_no] + 32 + 4;
+ }
+
+ /** If CTB size= 64.decomp_blk_wd = 64 for L0, 32 for L1 , 16 for L2, 8 for L3 */
+ max_ctb_size = 1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size;
+
+ ps_ctxt->as_layers[layer_no].i4_decomp_blk_ht = max_ctb_size >> layer_no;
+ ps_ctxt->as_layers[layer_no].i4_decomp_blk_wd = max_ctb_size >> layer_no;
+
+ decomp_blk_ht = ps_ctxt->as_layers[layer_no].i4_decomp_blk_ht;
+ decomp_blk_wd = ps_ctxt->as_layers[layer_no].i4_decomp_blk_wd;
+
+ ps_ctxt->as_layers[layer_no].i4_num_row_blks =
+ ((a_ht[layer_no] + (decomp_blk_ht - 1)) / decomp_blk_ht);
+
+ ps_ctxt->as_layers[layer_no].i4_num_col_blks =
+ ((a_wd[layer_no] + (decomp_blk_wd - 1)) / decomp_blk_wd);
+ }
+ ps_ed_ctxt->ps_func_selector = ps_func_selector;
+
+ ps_ctxt->i4_quality_preset =
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
+
+ if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P7)
+ {
+ ps_ctxt->i4_quality_preset = IHEVCE_QUALITY_P6;
+ }
+
+ if(ps_init_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER))
+ {
+ if(ps_init_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION))
+ {
+ ps_ctxt->i4_enable_noise_detection = 1;
+ }
+ else
+ {
+ ps_ctxt->i4_enable_noise_detection = 0;
+ }
+ }
+ else
+ {
+ ps_ctxt->i4_enable_noise_detection = 0;
+ }
+
+ ihevce_cmn_utils_instr_set_router(
+ &ps_ctxt->s_cmn_opt_func, u1_is_popcnt_available, ps_init_prms->e_arch_type);
+
+ ihevce_ipe_instr_set_router(
+ &ps_ctxt->s_ipe_optimised_function_list, ps_init_prms->e_arch_type);
+
+ ps_ctxt++;
+ ps_ed_ctxt++;
+ }
+ /* return the handle to caller */
+ return ((void *)ps_master_ctxt);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_decomp_pre_intra_frame_init \endif
+*
+* \brief
+* Frame Intialization for Decomp intra pre analysis.
+*
+* \param[in] pv_ctxt : pointer to module ctxt
+* \param[in] ppu1_decomp_lyr_bufs : pointer to array of layer buffer pointers
+* \param[in] pi4_lyr_buf_stride : pointer to array of layer buffer strides
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_decomp_pre_intra_frame_init(
+ void *pv_ctxt,
+ UWORD8 **ppu1_decomp_lyr_bufs,
+ WORD32 *pi4_lyr_buf_stride,
+ ihevce_ed_blk_t *ps_layer1_buf,
+ ihevce_ed_blk_t *ps_layer2_buf,
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1,
+ WORD32 i4_ol_sad_lambda_qf,
+ WORD32 i4_slice_type,
+ ctb_analyse_t *ps_ctb_analyse)
+{
+ ihevce_decomp_pre_intra_master_ctxt_t *ps_master_ctxt;
+ ihevce_decomp_pre_intra_ctxt_t *ps_ctxt;
+ WORD32 thread_no;
+
+ /* Decomp state structure */
+ ps_master_ctxt = (ihevce_decomp_pre_intra_master_ctxt_t *)pv_ctxt;
+
+ for(thread_no = 0; thread_no < ps_master_ctxt->i4_num_proc_thrds; thread_no++)
+ {
+ WORD32 layer_no;
+
+ ps_ctxt = ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[thread_no];
+
+ /* L0 layer (actual input) is registered in process call */
+ for(layer_no = 1; layer_no < ps_ctxt->i4_num_layers; layer_no++)
+ {
+ ps_ctxt->as_layers[layer_no].i4_inp_stride = pi4_lyr_buf_stride[layer_no - 1];
+ ps_ctxt->as_layers[layer_no].pu1_inp = ppu1_decomp_lyr_bufs[layer_no - 1];
+
+ /*Populating the buffer pointers for layer1 and layer2 buffers to store the
+ structure for each 4x4 block after pre intra analysis on their respective laeyrs*/
+
+ if(layer_no == 1)
+ {
+ WORD32 sad_lambda_l1 = (3 * i4_ol_sad_lambda_qf >> 2);
+ WORD32 temp = 1 << LAMBDA_Q_SHIFT;
+ WORD32 lambda = ((temp) > sad_lambda_l1) ? temp : sad_lambda_l1;
+ //ps_ctxt->as_layers[1].s_early_decision.ps_ed_pic = ps_layer1_buf;
+ //ps_ctxt->as_layers[1].s_early_decision.ps_ed = ps_layer1_buf;
+ ps_ctxt->ps_layer1_buf = ps_layer1_buf;
+ ps_ctxt->ps_ed_ctb_l1 = ps_ed_ctb_l1;
+ ps_ctxt->ai4_lambda[layer_no] = lambda;
+ ps_ctxt->i4_codec_level = 0;
+ ps_ctxt->i4_slice_type = i4_slice_type;
+ }
+ else if(layer_no == 2)
+ {
+ WORD32 sad_lambda_l2 = i4_ol_sad_lambda_qf >> 1;
+ WORD32 temp = 1 << LAMBDA_Q_SHIFT;
+ WORD32 lambda = ((temp) > sad_lambda_l2) ? temp : sad_lambda_l2;
+
+ //ps_ctxt->as_layers[2].s_early_decision.ps_ed_pic = ps_layer2_buf;
+ //ps_ctxt->as_layers[2].s_early_decision.ps_ed = ps_layer2_buf;
+ ps_ctxt->ps_layer2_buf = ps_layer2_buf;
+ //ihevce_ed_frame_init(ps_ctxt->ps_ed_ctxt);
+ ps_ctxt->ai4_lambda[layer_no] = lambda;
+ ps_ctxt->i4_codec_level = 0;
+ ps_ctxt->i4_slice_type = i4_slice_type;
+ }
+ else
+ {
+ //ps_ctxt->as_layers[0].s_early_decision.ps_ed_pic = NULL;
+ //ps_ctxt->as_layers[0].s_early_decision.ps_ed = NULL;
+ //ps_ctxt->ps_layer1_buf = NULL;
+ ps_ctxt->ai4_lambda[layer_no] = -1;
+ ps_ctxt->i4_codec_level = 0;
+ ps_ctxt->i4_slice_type = i4_slice_type;
+ }
+ }
+
+ /* make the ps_ctb_analyse refernce as a part of the private context */
+ ps_ctxt->ps_ctb_analyse = ps_ctb_analyse;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Merge Sort function.
+*
+* @par Description:
+* This function sorts the data in the input array in ascending
+* order using merge sort algorithm. Intermediate data obtained in
+* merge sort are stored in output 2-D array.
+*
+* @param[in]
+* pi4_input_val : Input 1-D array
+* aai4_output_val: Output 2-D array containing elements sorted in sets of
+* 4,16,64 etc.
+* i4_length : length of the array
+* i4_ip_sort_level: Input sort level. Specifies the level upto which array is sorted.
+* It should be 1 if the array is unsorted. Should be 4 if array is sorted
+* in sets of 4.
+* i4_op_sort_level: Output sort level. Specify the level upto which sorting is required.
+* If it is given as length of array it sorts for whole array.
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+void ihevce_merge_sort(
+ WORD32 *pi4_input_val,
+ WORD32 aai4_output_val[][64],
+ WORD32 i4_length,
+ WORD32 i4_ip_sort_level,
+ WORD32 i4_op_sort_level)
+{
+ WORD32 i, j, k;
+ WORD32 count, level;
+ WORD32 temp[64];
+ WORD32 *pi4_temp_buf_cpy;
+ WORD32 *pi4_temp = &temp[0];
+ WORD32 calc_level;
+
+ pi4_temp_buf_cpy = pi4_temp;
+
+ GETRANGE(calc_level, i4_op_sort_level / i4_ip_sort_level);
+
+ calc_level = calc_level - 1;
+
+ /*** This function is written under the assumption that we need only intermediate values of
+ sort in the range of 4,16,64 etc. ***/
+ ASSERT((calc_level % 2) == 0);
+
+ /** One iteration of this for loop does 1 sets of sort and produces one intermediate value in 2 iterations **/
+ for(level = 0; level < calc_level; level++)
+ {
+ /** Merges adjacent sets of elements based on current sort level **/
+ for(count = 0; count < i4_length; (count = count + (i4_ip_sort_level * 2)))
+ {
+ i = 0;
+ j = 0;
+ if(pi4_input_val[i4_ip_sort_level - 1] < pi4_input_val[i4_ip_sort_level])
+ {
+ /*** Condition for early exit ***/
+ memcpy(&pi4_temp[0], pi4_input_val, sizeof(WORD32) * i4_ip_sort_level * 2);
+ }
+ else
+ {
+ for(k = 0; k < (i4_ip_sort_level * 2); k++)
+ {
+ if((i < i4_ip_sort_level) && (j < i4_ip_sort_level))
+ {
+ if(pi4_input_val[i] > pi4_input_val[j + i4_ip_sort_level])
+ {
+ /** copy to output array **/
+ pi4_temp[k] = pi4_input_val[j + i4_ip_sort_level];
+ j++;
+ }
+ else
+ {
+ /** copy to output array **/
+ pi4_temp[k] = pi4_input_val[i];
+ i++;
+ }
+ }
+ else if(i == i4_ip_sort_level)
+ {
+ /** copy the remaining data to output array **/
+ pi4_temp[k] = pi4_input_val[j + i4_ip_sort_level];
+ j++;
+ }
+ else
+ {
+ /** copy the remaining data to output array **/
+ pi4_temp[k] = pi4_input_val[i];
+ i++;
+ }
+ }
+ }
+ pi4_input_val += (i4_ip_sort_level * 2);
+ pi4_temp += (i4_ip_sort_level * 2);
+ }
+ pi4_input_val = pi4_temp - i4_length;
+
+ if(level % 2)
+ {
+ /** Assign a temp address for storing next sort level output as we will not need this data as output **/
+ pi4_temp = pi4_temp_buf_cpy;
+ }
+ else
+ {
+ /** Assign address for storing the intermediate data into output 2-D array **/
+ pi4_temp = aai4_output_val[level / 2];
+ }
+ i4_ip_sort_level *= 2;
+ }
+}
+
+void ihevce_decomp_pre_intra_curr_frame_pre_intra_deinit(
+ void *pv_pre_intra_ctxt,
+ pre_enc_me_ctxt_t *ps_curr_out,
+ WORD32 i4_is_last_thread,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ WORD32 i4_temporal_lyr_id,
+ WORD32 i4_enable_noise_detection)
+{
+ ihevce_decomp_pre_intra_master_ctxt_t *ps_pre_intra_master_ctxt =
+ (ihevce_decomp_pre_intra_master_ctxt_t *)pv_pre_intra_ctxt;
+ ihevce_decomp_pre_intra_ctxt_t *ps_pre_intra_ctxt =
+ ps_pre_intra_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[0];
+
+ WORD32 i4_k;
+ WORD32 ctb_ctr, vert_ctr;
+
+ WORD32 ai4_curr_frame_8x8_sum_act[2] = { 0, 0 };
+ LWORD64 ai8_curr_frame_8x8_sum_act_sqr[2] = { 0, 0 };
+ WORD32 ai4_curr_frame_8x8_sum_blks[2] = { 0, 0 };
+ ULWORD64 u8_curr_frame_8x8_sum_act_sqr = 0;
+
+ LWORD64 ai8_curr_frame_16x16_sum_act_sqr[3] = { 0, 0, 0 };
+ WORD32 ai4_curr_frame_16x16_sum_act[3] = { 0, 0, 0 };
+ WORD32 ai4_curr_frame_16x16_sum_blks[3] = { 0, 0, 0 };
+
+ LWORD64 ai8_curr_frame_32x32_sum_act_sqr[3] = { 0, 0, 0 };
+ WORD32 ai4_curr_frame_32x32_sum_act[3] = { 0, 0, 0 };
+ WORD32 ai4_curr_frame_32x32_sum_blks[3] = { 0, 0, 0 };
+
+ (void)i4_temporal_lyr_id;
+ (void)i4_enable_noise_detection;
+
+ if(i4_is_last_thread == 1)
+ {
+ WORD32 i4_slice_type = ps_curr_out->s_slice_hdr.i1_slice_type;
+ //ps_pre_intra_ctxt->i4_slice_type;
+ WORD32 ctb_ctr_blks = ps_pre_intra_ctxt->as_layers[1].i4_num_col_blks;
+ WORD32 vert_ctr_blks = ps_pre_intra_ctxt->as_layers[1].i4_num_row_blks;
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_pic_l1 = ps_curr_out->ps_ed_ctb_l1;
+ WORD32 block_wd = ps_pre_intra_ctxt->as_layers[1].i4_decomp_blk_wd;
+ WORD32 inc_ctb = ((block_wd >> 2) * (block_wd >> 2));
+ ihevce_ed_blk_t *ps_ed_blk_l1 = ps_curr_out->ps_layer1_buf;
+ ihevce_ed_blk_t *ps_ed;
+ WORD32 i, j;
+ WORD32 i4_avg_noise_satd;
+ WORD32 k;
+ WORD32 i4_layer_wd = ps_pre_intra_ctxt->as_layers[1].i4_actual_wd;
+ WORD32 i4_layer_ht = ps_pre_intra_ctxt->as_layers[1].i4_actual_ht;
+
+ /*Calculate min noise threshold */
+ /*Min noise threshold is calculted by taking average of lowest 1% satd val in the complete 4x4 frame satds*/
+ //ihevce_ed_ctxt_t *ps_ed_ctxt = ps_pre_intra_ctxt->ps_ed_ctxt;
+ WORD32 i4_min_blk = ((MIN_BLKS * (i4_layer_wd >> 1) * (i4_layer_ht >> 1)) / 100);
+ WORD32 ai4_noise_thr_hstrgm[MAX_SATD_THRSHLD];
+ memset(&ai4_noise_thr_hstrgm[0], 0, (sizeof(WORD32) * MAX_SATD_THRSHLD));
+ ASSERT(!(USE_CUR_L0_SATD && USE_CUR_SATD));
+ for(vert_ctr = 0; vert_ctr < vert_ctr_blks; vert_ctr++)
+ {
+ ps_ed = ps_ed_blk_l1 + (vert_ctr * inc_ctb * (ctb_ctr_blks));
+ for(ctb_ctr = 0; ctb_ctr < ctb_ctr_blks; ctb_ctr++)
+ {
+ /* Populate avg satd to calculate MI and activity factors */
+ for(i = 0; i < 4; i++)
+ {
+ for(j = 0; j < 4; j++)
+ {
+ for(k = 0; k < 4; k++)
+ {
+ if(-1 != (ps_ed + j * 4 + i * 16 + k)->i4_4x4_satd)
+ {
+ WORD32 i4_satd_lim;
+ i4_satd_lim = (ps_ed + j * 4 + i * 16 + k)->i4_4x4_satd;
+ /* Histogram creation for Noise threshold */
+ if(i4_satd_lim < MAX_SATD_THRSHLD)
+ {
+ ai4_noise_thr_hstrgm[i4_satd_lim]++;
+ }
+ }
+ }
+ }
+ }
+ ps_ed += inc_ctb;
+ }
+ }
+ {
+ WORD32 i4_total_blks = 0;
+ LWORD64 i8_acc_satd = 0;
+ for(i = MIN_SATD_THRSHLD; i < MAX_SATD_THRSHLD; i++)
+ {
+ i4_total_blks += ai4_noise_thr_hstrgm[i];
+ i8_acc_satd += (i * ai4_noise_thr_hstrgm[i]);
+
+ if(i4_total_blks > i4_min_blk)
+ break;
+ }
+ if(i4_total_blks < i4_min_blk)
+ {
+ i4_avg_noise_satd = SATD_NOISE_FLOOR_THRESHOLD;
+ }
+ else
+ {
+ i4_avg_noise_satd = (WORD32)(i8_acc_satd + (i4_total_blks >> 1)) / i4_total_blks;
+ }
+ }
+
+ ps_curr_out->i4_avg_noise_thrshld_4x4 = i4_avg_noise_satd;
+
+ for(vert_ctr = 0; vert_ctr < vert_ctr_blks; vert_ctr++)
+ {
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_row_l1 =
+ ps_ed_ctb_pic_l1 + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
+ ps_ed = ps_ed_blk_l1 + (vert_ctr * inc_ctb * (ctb_ctr_blks));
+
+ for(ctb_ctr = 0; ctb_ctr < ctb_ctr_blks; ctb_ctr++)
+ {
+ /*sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16 level */
+ WORD32 ai4_sum_sum_4x4_satd_16x16[4] = { 0, 0, 0, 0 };
+ /*min of (sum of L1_4x4 @ L1_8x8) @ L1_16x16 level */
+ WORD32 ai4_min_sum_4x4_satd_16x16[4] = {
+ MAX_32BIT_VAL, MAX_32BIT_VAL, MAX_32BIT_VAL, MAX_32BIT_VAL
+ };
+ /*min of (min of L1_4x4 @ L1_8x8) @ L1_16x16 level */
+ WORD32 ai4_min_min_4x4_satd_16x16[4] = {
+ MAX_32BIT_VAL, MAX_32BIT_VAL, MAX_32BIT_VAL, MAX_32BIT_VAL
+ };
+ WORD32 i4_sum_4x4_satd, i4_min_4x4_satd;
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_curr_l1 = ps_ed_ctb_row_l1 + ctb_ctr;
+
+ WORD32 is_min_block_uncompensated_in_l32x32 = 0;
+
+ /*min of L1_4x4 @ L1_8x8*/
+ WORD32 ai4_min_satd_ctb[MAX_CTB_SIZE];
+ /*** This 2-D array will contain 4x4 satds sorted in ascending order in sets of 4,16,64 ***/
+ /*** For example : '5 10 2 7 6 12 3 1' array input will return '2 5 7 10 1 3 6 12' if sorted in sets of 4 ***/
+ WORD32 aai4_min_4_16_64_satd[3][MAX_CTB_SIZE];
+
+ /*sum of L1_4x4 @ L1_8x8*/
+ WORD32 ai4_sum_satd_ctb[MAX_CTB_SIZE >> 2];
+ /*** This 2-D array will contain 4x4 satds sorted in ascending order in sets of 4,16***/
+ WORD32 aai4_sum_4_16_satd_ctb[2][MAX_CTB_SIZE];
+
+ /* sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16 */
+ WORD32 ai4_sum_sum_satd_ctb[(MAX_CTB_SIZE >> 2) >> 2];
+ /*L1_32x32 = L0_64x64
+ so in L1_32x32 there are 64 L1_4x4blocks*/
+ for(i = 0; i < MAX_CTB_SIZE; i++)
+ {
+ ai4_min_satd_ctb[i] = -1;
+ }
+ for(j = 0; j < 3; j++)
+ {
+ for(i = 0; i < MAX_CTB_SIZE; i++)
+ {
+ aai4_min_4_16_64_satd[j][i] = -1;
+ }
+ }
+ /*L1_32x32 = L0_64x64
+ so in L1_32x32 there are 16 L1_8x8blocks*/
+ for(i = 0; i < (MAX_CTB_SIZE >> 2); i++)
+ {
+ ai4_sum_satd_ctb[i] = -1;
+ }
+ for(j = 0; j < 2; j++)
+ {
+ for(i = 0; i < (MAX_CTB_SIZE >> 2); i++)
+ {
+ aai4_sum_4_16_satd_ctb[j][i] = -1;
+ }
+ }
+ /*L1_32x32 = L0_64x64
+ so in L1_32x32 there are 16 L1_16x16blocks*/
+ for(i = 0; i < ((MAX_CTB_SIZE >> 2) >> 2); i++)
+ {
+ ai4_sum_sum_satd_ctb[i] = 0;
+ }
+ /*Populate sum min 4x4 activty */
+ /*loop for L1_32x32 block*/
+ for(i = 0; i < 4; i++)
+ {
+ /*loop for L1_16x16 block*/
+ for(j = 0; j < 4; j++)
+ {
+ WORD32 i4_sum_satd_dumyy = 0;
+ WORD32 i4_num_satd_blks = 0;
+ /* loop for L1_8x8 block*/
+ for(k = 0; k < 4; k++)
+ {
+ WORD32 i4_satd_lim;
+ i4_satd_lim = (ps_ed + j * 4 + i * 16 + k)->i4_4x4_satd;
+
+ /*complete ctb will not have i4_4x4_satd = -1*/
+ if(-1 != i4_satd_lim)
+ {
+#if SUB_NOISE_THRSHLD
+ i4_satd_lim = i4_satd_lim - i4_avg_noise_satd;
+ if(i4_satd_lim < 0)
+ {
+ i4_satd_lim = 0;
+ }
+#else
+ if(i4_satd_lim < i4_avg_noise_satd)
+ {
+ i4_satd_lim = i4_avg_noise_satd;
+ }
+#endif
+ i4_num_satd_blks++;
+ /*populate 4x4 data to calculate modulation index */
+ (ps_ed + j * 4 + i * 16 + k)->i4_4x4_satd = i4_satd_lim;
+
+ i4_sum_satd_dumyy += i4_satd_lim;
+ ai4_min_satd_ctb[j * 4 + i * 16 + k] = i4_satd_lim;
+ }
+ }
+ if(i4_num_satd_blks != 0)
+ {
+ /*make the sum of satd always for 4 blocks even it is incomplete ctb */
+ i4_sum_satd_dumyy = i4_sum_satd_dumyy * 4 / i4_num_satd_blks;
+ }
+ else
+ {
+ i4_sum_satd_dumyy = -1;
+ }
+ /*sum of L1_4x4 @ L1_8x8block level*/
+ ai4_sum_satd_ctb[j + i * 4] = i4_sum_satd_dumyy;
+ /*sum of L1_8x8 @ L1_16x16block level*/
+ ai4_sum_sum_satd_ctb[i] += i4_sum_satd_dumyy;
+ /*store sum of 4x4 @ L1_8x8block level*/
+ ps_ed_ctb_curr_l1->i4_sum_4x4_satd[i * 4 + j] = i4_sum_satd_dumyy;
+ /*store min of 4x4 @ L1_8x8block level */
+ //ps_ed_ctb_curr_l1->i4_min_4x4_satd[i * 4 + j] = i4_min_satd_dumyy;
+ }
+ }
+ {
+ WORD32 i4_array_length = sizeof(ai4_min_satd_ctb) / sizeof(WORD32);
+
+ /*** This function will sort 64 elements in array ai4_min_satd_ctb in ascending order to ***/
+ /*** 3 arrays in sets of 4,16,64 into the 2-D array aai4_min_4_16_64_satd ***/
+ ihevce_merge_sort(
+ &ai4_min_satd_ctb[0], aai4_min_4_16_64_satd, i4_array_length, 1, 64);
+
+ i4_array_length = sizeof(ai4_sum_satd_ctb) / sizeof(WORD32);
+
+ /*** This function will sort 16 elements in array ai4_sum_satd_ctb in ascending order to ***/
+ /*** 2 arrays in sets of 4,16 into the 2-D array aai4_sum_4_16_satd_ctb ***/
+ ihevce_merge_sort(
+ &ai4_sum_satd_ctb[0], aai4_sum_4_16_satd_ctb, i4_array_length, 1, 16);
+ }
+
+ /*Populate avg satd to calculate MI and activity factors*/
+ for(i = 0; i < 4; i++)
+ {
+ WORD32 is_min_block_uncompensated_in_l116x16 = 0;
+ ps_ed_ctb_curr_l1->i4_16x16_satd[i][0] = -1;
+ ps_ed_ctb_curr_l1->i4_16x16_satd[i][1] = -1;
+ ps_ed_ctb_curr_l1->i4_16x16_satd[i][2] = -1;
+
+ for(j = 0; j < 4; j++)
+ {
+ ps_ed_ctb_curr_l1->i4_min_4x4_satd[i * 4 + j] =
+ aai4_min_4_16_64_satd[0][i * 16 + j * 4 + MEDIAN_CU_TU];
+ /*Accumulate the sum of 8*8 activities in the current layer (16*16 CU in L0)*/
+ i4_sum_4x4_satd = ps_ed_ctb_curr_l1->i4_sum_4x4_satd[i * 4 + j];
+ i4_min_4x4_satd = ps_ed_ctb_curr_l1->i4_min_4x4_satd[i * 4 + j];
+ ps_ed_ctb_curr_l1->i4_8x8_satd[i * 4 + j][0] = -1;
+ ps_ed_ctb_curr_l1->i4_8x8_satd[i * 4 + j][1] = -1;
+ ASSERT(-2 != i4_sum_4x4_satd);
+
+ if((-1 != i4_sum_4x4_satd))
+ {
+ WORD32 not_skipped = 1;
+
+ if((i4_slice_type == ISLICE) || (1 == not_skipped))
+ {
+ is_min_block_uncompensated_in_l116x16 = 1;
+ is_min_block_uncompensated_in_l32x32 = 1;
+
+ u8_curr_frame_8x8_sum_act_sqr +=
+ (i4_sum_4x4_satd * i4_sum_4x4_satd);
+
+ ai4_curr_frame_8x8_sum_act[0] += i4_sum_4x4_satd;
+ ai8_curr_frame_8x8_sum_act_sqr[0] +=
+ (i4_sum_4x4_satd * i4_sum_4x4_satd);
+ ai4_curr_frame_8x8_sum_blks[0] += 1;
+ ai4_curr_frame_8x8_sum_act[1] += i4_min_4x4_satd;
+ ai8_curr_frame_8x8_sum_act_sqr[1] +=
+ (i4_min_4x4_satd * i4_min_4x4_satd);
+ ai4_curr_frame_8x8_sum_blks[1] += 1;
+ }
+
+ ps_ed_ctb_curr_l1->i4_8x8_satd[i * 4 + j][0] = i4_sum_4x4_satd;
+ ps_ed_ctb_curr_l1->i4_8x8_satd[i * 4 + j][1] = i4_min_4x4_satd;
+ }
+ else
+ {
+ ai4_sum_sum_4x4_satd_16x16[i] = MAX_32BIT_VAL;
+ ai4_min_sum_4x4_satd_16x16[i] = MAX_32BIT_VAL;
+ ai4_min_min_4x4_satd_16x16[i] = MAX_32BIT_VAL;
+ }
+ }
+
+ //if(1 == is_min_block_comensated_in_l116x16)
+ {
+ ai4_min_sum_4x4_satd_16x16[i] =
+ aai4_sum_4_16_satd_ctb[0][i * 4 + MEDIAN_CU_TU];
+ ai4_min_min_4x4_satd_16x16[i] =
+ aai4_min_4_16_64_satd[1][i * 16 + MEDIAN_CU_TU_BY_2];
+
+ if(ai4_sum_sum_4x4_satd_16x16[i] != MAX_32BIT_VAL)
+ {
+ ai4_sum_sum_4x4_satd_16x16[i] = 0;
+ for(j = 0; j < 4; j++)
+ {
+ ai4_sum_sum_4x4_satd_16x16[i] +=
+ ps_ed_ctb_curr_l1->i4_sum_4x4_satd[i * 4 + j];
+ }
+ ps_ed_ctb_curr_l1->i4_16x16_satd[i][0] = ai4_sum_sum_4x4_satd_16x16[i];
+ ps_ed_ctb_curr_l1->i4_16x16_satd[i][1] = ai4_min_sum_4x4_satd_16x16[i];
+ ps_ed_ctb_curr_l1->i4_16x16_satd[i][2] = ai4_min_min_4x4_satd_16x16[i];
+ }
+ }
+ if(1 == is_min_block_uncompensated_in_l116x16)
+ {
+ if(MAX_32BIT_VAL != ai4_sum_sum_4x4_satd_16x16[i])
+ {
+ ai4_curr_frame_16x16_sum_act[0] += ai4_sum_sum_4x4_satd_16x16[i];
+ ai8_curr_frame_16x16_sum_act_sqr[0] +=
+ (ai4_sum_sum_4x4_satd_16x16[i] * ai4_sum_sum_4x4_satd_16x16[i]);
+ ai4_curr_frame_16x16_sum_blks[0] += 1;
+ }
+ if(MAX_32BIT_VAL != ai4_min_sum_4x4_satd_16x16[i])
+ {
+ ai4_curr_frame_16x16_sum_act[1] += ai4_min_sum_4x4_satd_16x16[i];
+ ai8_curr_frame_16x16_sum_act_sqr[1] +=
+ (ai4_min_sum_4x4_satd_16x16[i] * ai4_min_sum_4x4_satd_16x16[i]);
+ ai4_curr_frame_16x16_sum_blks[1] += 1;
+ ai4_curr_frame_16x16_sum_act[2] += ai4_min_min_4x4_satd_16x16[i];
+ ai8_curr_frame_16x16_sum_act_sqr[2] +=
+ (ai4_min_min_4x4_satd_16x16[i] * ai4_min_min_4x4_satd_16x16[i]);
+ ai4_curr_frame_16x16_sum_blks[2] += 1;
+ }
+ }
+ }
+ /*32x32*/
+ {
+ ps_ed_ctb_curr_l1->i4_32x32_satd[0][0] = -1;
+ ps_ed_ctb_curr_l1->i4_32x32_satd[0][1] = -1;
+ ps_ed_ctb_curr_l1->i4_32x32_satd[0][2] = -1;
+ ps_ed_ctb_curr_l1->i4_32x32_satd[0][3] = -1;
+
+ if((MAX_32BIT_VAL != ai4_sum_sum_4x4_satd_16x16[0]) ||
+ (MAX_32BIT_VAL != ai4_sum_sum_4x4_satd_16x16[2]) ||
+ (MAX_32BIT_VAL != ai4_sum_sum_4x4_satd_16x16[1]) ||
+ (MAX_32BIT_VAL != ai4_sum_sum_4x4_satd_16x16[3]))
+ {
+ //if(1 == is_min_block_comensated_in_l32x32)
+ {
+ {
+ WORD32 aai4_min_sum_sum_4x4_satd_16x16[1][64];
+ WORD32 i4_array_length =
+ sizeof(ai4_sum_sum_4x4_satd_16x16) / sizeof(WORD32);
+ /*** Sort 4 elements in ascending order ***/
+ ihevce_merge_sort(
+ &ai4_sum_sum_4x4_satd_16x16[0],
+ aai4_min_sum_sum_4x4_satd_16x16,
+ i4_array_length,
+ 1,
+ 4);
+
+ ps_ed_ctb_curr_l1->i4_32x32_satd[0][0] =
+ aai4_min_sum_sum_4x4_satd_16x16[0][MEDIAN_CU_TU];
+ }
+ {
+ ps_ed_ctb_curr_l1->i4_32x32_satd[0][1] =
+ aai4_sum_4_16_satd_ctb[1][MEDIAN_CU_TU_BY_2];
+ }
+ {
+ ps_ed_ctb_curr_l1->i4_32x32_satd[0][2] =
+ aai4_min_4_16_64_satd[2][MEDIAN_CU_TU_BY_4];
+ }
+
+ /*Sum of all 32x32 activity */
+ ps_ed_ctb_curr_l1->i4_32x32_satd[0][3] = 0;
+ for(j = 0; j < 4; j++)
+ {
+ if(MAX_32BIT_VAL != ai4_sum_sum_4x4_satd_16x16[j])
+ ps_ed_ctb_curr_l1->i4_32x32_satd[0][3] +=
+ ai4_sum_sum_4x4_satd_16x16[j];
+ }
+
+ if(1 == is_min_block_uncompensated_in_l32x32)
+ {
+ /*Accumulate the sum of 32*32 activities in the current layer (64*64 CU in L0)*/
+ if(MAX_32BIT_VAL != ps_ed_ctb_curr_l1->i4_32x32_satd[0][0])
+ {
+ ai4_curr_frame_32x32_sum_act[0] +=
+ ps_ed_ctb_curr_l1->i4_32x32_satd[0][0];
+ ai8_curr_frame_32x32_sum_act_sqr[0] +=
+ (ps_ed_ctb_curr_l1->i4_32x32_satd[0][0] *
+ ps_ed_ctb_curr_l1->i4_32x32_satd[0][0]);
+ ai4_curr_frame_32x32_sum_blks[0] += 1;
+ }
+
+ if(MAX_32BIT_VAL != ps_ed_ctb_curr_l1->i4_32x32_satd[0][1])
+ {
+ ai4_curr_frame_32x32_sum_act[1] +=
+ ps_ed_ctb_curr_l1->i4_32x32_satd[0][1];
+ ai8_curr_frame_32x32_sum_act_sqr[1] +=
+ (ps_ed_ctb_curr_l1->i4_32x32_satd[0][1] *
+ ps_ed_ctb_curr_l1->i4_32x32_satd[0][1]);
+ ai4_curr_frame_32x32_sum_blks[1] += 1;
+ }
+
+ if(MAX_32BIT_VAL != ps_ed_ctb_curr_l1->i4_32x32_satd[0][2])
+ {
+ ai4_curr_frame_32x32_sum_act[2] +=
+ ps_ed_ctb_curr_l1->i4_32x32_satd[0][2];
+ ai8_curr_frame_32x32_sum_act_sqr[2] +=
+ (ps_ed_ctb_curr_l1->i4_32x32_satd[0][2] *
+ ps_ed_ctb_curr_l1->i4_32x32_satd[0][2]);
+ ai4_curr_frame_32x32_sum_blks[2] += 1;
+ }
+ }
+ }
+ }
+ }
+ /*Increment ctb count*/
+ ps_ed += inc_ctb;
+ }
+ }
+
+ /* Spatial Variation and modulation index calculated for the frame */
+ {
+ for(i4_k = 0; i4_k < 2; i4_k++)
+ {
+ /*8x8*/
+#if USE_SQRT_AVG_OF_SATD_SQR
+ ps_curr_out->i8_curr_frame_8x8_sum_act[i4_k] = ai8_curr_frame_8x8_sum_act_sqr[i4_k];
+#else
+ ps_curr_out->i8_curr_frame_8x8_sum_act[i4_k] = ai4_curr_frame_8x8_sum_act[i4_k];
+#endif
+ ps_curr_out->i4_curr_frame_8x8_sum_act_for_strength[i4_k] =
+ ai4_curr_frame_8x8_sum_act[i4_k];
+ ps_curr_out->i4_curr_frame_8x8_num_blks[i4_k] = ai4_curr_frame_8x8_sum_blks[i4_k];
+ ps_curr_out->u8_curr_frame_8x8_sum_act_sqr = u8_curr_frame_8x8_sum_act_sqr;
+
+ /*16x16*/
+#if USE_SQRT_AVG_OF_SATD_SQR
+ ps_curr_out->i8_curr_frame_16x16_sum_act[i4_k] =
+ ai8_curr_frame_16x16_sum_act_sqr[i4_k];
+#else
+ ps_curr_out->i8_curr_frame_16x16_sum_act[i4_k] = ai4_curr_frame_16x16_sum_act[i4_k];
+#endif
+ ps_curr_out->i4_curr_frame_16x16_num_blks[i4_k] =
+ ai4_curr_frame_16x16_sum_blks[i4_k];
+
+ /*32x32*/
+#if USE_SQRT_AVG_OF_SATD_SQR
+ ps_curr_out->i8_curr_frame_32x32_sum_act[i4_k] =
+ ai8_curr_frame_32x32_sum_act_sqr[i4_k];
+#else
+ ps_curr_out->i8_curr_frame_32x32_sum_act[i4_k] = ai4_curr_frame_32x32_sum_act[i4_k];
+#endif
+ ps_curr_out->i4_curr_frame_32x32_num_blks[i4_k] =
+ ai4_curr_frame_32x32_sum_blks[i4_k];
+ }
+
+ /*16x16*/
+#if USE_SQRT_AVG_OF_SATD_SQR
+ ps_curr_out->i8_curr_frame_16x16_sum_act[2] = ai8_curr_frame_16x16_sum_act_sqr[2];
+#else
+ ps_curr_out->i8_curr_frame_16x16_sum_act[2] = ai4_curr_frame_16x16_sum_act[2];
+#endif
+
+ ps_curr_out->i4_curr_frame_16x16_num_blks[2] = ai4_curr_frame_16x16_sum_blks[2];
+
+ /*32x32*/
+#if USE_SQRT_AVG_OF_SATD_SQR
+ ps_curr_out->i8_curr_frame_32x32_sum_act[2] = ai8_curr_frame_32x32_sum_act_sqr[2];
+#else
+ ps_curr_out->i8_curr_frame_32x32_sum_act[2] = ai4_curr_frame_32x32_sum_act[2];
+#endif
+ ps_curr_out->i4_curr_frame_32x32_num_blks[2] = ai4_curr_frame_32x32_sum_blks[2];
+ }
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_decomp_pre_intra_get_frame_satd \endif
+*
+* \brief
+* Number of memory records are returned for enc_loop module
+*
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+LWORD64 ihevce_decomp_pre_intra_get_frame_satd(void *pv_ctxt, WORD32 *i4_width, WORD32 *i4_hieght)
+{
+ ihevce_decomp_pre_intra_master_ctxt_t *ps_master_ctxt =
+ (ihevce_decomp_pre_intra_master_ctxt_t *)pv_ctxt;
+ WORD32 i4_i;
+ LWORD64 i8_tot_satd = 0;
+
+ /*accumulate SATD acorss all thread. note that every thread will enter this function,
+ hence it must be guranteed that all thread must have completed preintra pass by now*/
+ for(i4_i = 0; i4_i < ps_master_ctxt->i4_num_proc_thrds; i4_i++)
+ {
+ ihevce_decomp_pre_intra_ctxt_t *ps_ctxt =
+ ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[i4_i];
+
+ //i8_tot_satd += ps_ctxt->as_layers[1].s_early_decision.i8_sum_best_satd;
+ i8_tot_satd += ps_ctxt->ps_ed_ctxt->i8_sum_best_satd;
+
+ *i4_width = ps_ctxt->as_layers[1].i4_actual_wd;
+ *i4_hieght = ps_ctxt->as_layers[1].i4_actual_ht;
+ }
+
+ return i8_tot_satd;
+}
+
+LWORD64 ihevce_decomp_pre_intra_get_frame_satd_squared(
+ void *pv_ctxt, WORD32 *i4_width, WORD32 *i4_hieght)
+{
+ ihevce_decomp_pre_intra_master_ctxt_t *ps_master_ctxt =
+ (ihevce_decomp_pre_intra_master_ctxt_t *)pv_ctxt;
+ WORD32 i4_i;
+ LWORD64 i8_tot_satd = 0;
+
+ /*accumulate SATD acorss all thread. note that every thread will enter this function,
+ hence it must be guranteed that all thread must have completed preintra pass by now*/
+ for(i4_i = 0; i4_i < ps_master_ctxt->i4_num_proc_thrds; i4_i++)
+ {
+ ihevce_decomp_pre_intra_ctxt_t *ps_ctxt =
+ ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[i4_i];
+
+ //i8_tot_satd += ps_ctxt->as_layers[1].s_early_decision.i8_sum_best_satd;
+ i8_tot_satd += (ps_ctxt->ps_ed_ctxt->i8_sum_sq_best_satd);
+
+ *i4_width = ps_ctxt->as_layers[1].i4_actual_wd;
+ *i4_hieght = ps_ctxt->as_layers[1].i4_actual_ht;
+ }
+
+ return i8_tot_satd;
+}
diff --git a/encoder/ihevce_decomp_pre_intra_pass.h b/encoder/ihevce_decomp_pre_intra_pass.h
new file mode 100644
index 0000000..94f5a02
--- /dev/null
+++ b/encoder/ihevce_decomp_pre_intra_pass.h
@@ -0,0 +1,153 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_decomp_pre_intra_pass.h
+*
+* \brief
+* This file contains declarations related to frame decomposition done during
+* pre intra processing
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_DECOMP_PRE_INTRA_PASS_H_
+#define _IHEVCE_DECOMP_PRE_INTRA_PASS_H_
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Globals */
+/*****************************************************************************/
+extern WORD32 g_i4_ip_funcs[MAX_NUM_IP_MODES];
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define POW_2_TO_1_BY_4 (1.1892)
+#define POW_2_TO_3_BY_4 (1.6818)
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+void ihevce_ed_4x4_find_best_modes(
+ UWORD8 *pu1_src,
+ WORD32 src_stride,
+ UWORD8 *pu1_nbr,
+ UWORD16 *pu2_mode_bits_cost,
+ UWORD8 *pu1_best_modes,
+ WORD32 *pu1_best_sad_costs,
+ WORD32 u1_low_resol,
+ FT_SAD_COMPUTER *pf_4x4_sad_computer);
+
+WORD32 ihevce_decomp_pre_intra_get_num_mem_recs(void);
+
+WORD32 ihevce_decomp_pre_intra_get_mem_recs(
+ iv_mem_rec_t *ps_mem_tab, WORD32 i4_num_proc_thrds, WORD32 i4_mem_space);
+
+void *ihevce_decomp_pre_intra_init(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ WORD32 i4_num_proc_thrds,
+ func_selector_t *ps_func_selector,
+ WORD32 i4_resolution_id,
+ UWORD8 u1_is_popcnt_available);
+
+void ihevce_decomp_pre_intra_process(
+ void *pv_ctxt,
+ ihevce_lap_output_params_t *ps_lap_out_prms,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ void *pv_multi_thrd_ctxt,
+ WORD32 thrd_id,
+ WORD32 i4_ping_pong,
+ ihevce_8x8_L0_satd_t *ps_layer0_cur_satd,
+ ihevce_8x8_L0_mean_t *ps_layer0_cur_mean);
+
+void ihevce_decomp_pre_intra_frame_init(
+ void *pv_ctxt,
+ UWORD8 **ppu1_decomp_lyr_bufs,
+ WORD32 *pi4_lyr_buf_stride,
+ ihevce_ed_blk_t *ps_layer1_buf,
+ ihevce_ed_blk_t *ps_layer2_buf,
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1,
+ WORD32 i4_ol_sad_lambda_qf,
+ WORD32 i4_slice_type,
+ ctb_analyse_t *ps_ctb_analyse);
+
+/* Calculate the average activitiies at 16*16 (8*8 in L1)
+and 32*32 (8*8 in L2) block sizes */
+void ihevce_decomp_pre_intra_curr_frame_pre_intra_deinit(
+ void *pv_pre_intra_ctxt,
+ pre_enc_me_ctxt_t *ps_curr_out,
+ WORD32 i4_is_last_thread,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ WORD32 i4_temporal_lyr_id,
+ WORD32 i4_enable_noise_detection);
+
+void ihevce_scale_by_2(
+ UWORD8 *pu1_src,
+ WORD32 src_stride,
+ UWORD8 *pu1_dst,
+ WORD32 dst_stride,
+ WORD32 wd,
+ WORD32 ht,
+ UWORD8 *pu1_wkg_mem,
+ WORD32 ht_offset,
+ WORD32 block_ht,
+ WORD32 wd_offset,
+ WORD32 block_wd,
+ FT_COPY_2D *pf_copy_2d,
+ FT_SCALING_FILTER_BY_2 *pf_scaling_filter_mxn);
+
+void ihevce_ed_frame_init(void *pv_ed_ctxt, WORD32 i4_layer_no);
+
+void ihevce_intra_populate_mode_bits_cost(
+ WORD32 top_intra_mode,
+ WORD32 left_intra_mode,
+ WORD32 available_top,
+ WORD32 available_left,
+ WORD32 cu_pos_y,
+ UWORD16 *mode_bits_cost,
+ WORD32 lambda);
+
+WORD32 ihevce_cu_level_qp_mod(
+ WORD32 i4_qscale,
+ WORD32 i4_satd,
+ long double ld_curr_frame_log_avg,
+ float f_mod_strength,
+ WORD32 *pi4_8x8_act_factor,
+ WORD32 *pi4_qscale_mod,
+ rc_quant_t *ps_rc_quant_ctxt);
+
+/*return intra SATD of entire frame*/
+LWORD64 ihevce_decomp_pre_intra_get_frame_satd(void *pv_ctxt, WORD32 *i4_width, WORD32 *i4_hieght);
+
+LWORD64 ihevce_decomp_pre_intra_get_frame_satd_squared(
+ void *pv_ctxt, WORD32 *i4_width, WORD32 *i4_hieght);
+
+#endif
diff --git a/encoder/ihevce_decomp_pre_intra_structs.h b/encoder/ihevce_decomp_pre_intra_structs.h
new file mode 100644
index 0000000..3e20d1d
--- /dev/null
+++ b/encoder/ihevce_decomp_pre_intra_structs.h
@@ -0,0 +1,269 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_decomp_pre_intra_structs.h
+*
+* \brief
+* This file contains strcutures of pre_enc_loop pass
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_DECOMP_PRE_INTRA_STRUCTS_H_
+#define _IHEVCE_DECOMP_PRE_INTRA_STRUCTS_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/*For decomposition of every row we need some extra rows above n below that row*/
+#define NUM_EXTRA_ROWS_REQ 3
+
+/*Macros for pre intra early decisions*/
+#define NUM_MODES 35
+#define SAD_NOT_VALID 0xFFFFF
+
+#define SET_T_AVAILABLE(x) (x = x | (1 << 8))
+#define SET_L_AVAILABLE(x) (x = x | (1 << 7))
+#define SET_TL_AVAILABLE(x) (x = x | (1 << 16))
+#define SET_TR_AVAILABLE(x) (x = x | (1 << 12))
+#define SET_BL_AVAILABLE(x) (x = x | (1 << 3))
+#define SET_ALL_AVAILABLE(x) (x = (1 << 8) + (1 << 7) + (1 << 16) + (1 << 12) + (1 << 3))
+
+#define SET_T_UNAVAILABLE(x) (x = x & ~((WORD32)1 << 8))
+#define SET_L_UNAVAILABLE(x) (x = x & ~((WORD32)1 << 7))
+#define SET_TL_UNAVAILABLE(x) (x = x & ~((WORD32)1 << 16))
+#define SET_TR_UNAVAILABLE(x) (x = x & ~((WORD32)1 << 12))
+#define SET_BL_UNAVAILABLE(x) (x = x & ~((WORD32)1 << 3))
+#define SET_ALL_UNAVAILABLE(x) (x = 0)
+
+#define CHECK_T_AVAILABLE(x) ((x & (1 << 8)) >> 8)
+#define CHECK_L_AVAILABLE(x) ((x & (1 << 7)) >> 7)
+#define CHECK_TL_AVAILABLE(x) ((x & (1 << 16)) >> 16)
+#define CHECK_TR_AVAILABLE(x) ((x & (1 << 12)) >> 12)
+#define CHECK_BL_AVAILABLE(x) ((x & (1 << 3)) >> 3)
+
+/* q format for lamba used in the encoder */
+#define LAMBDA_Q_SHIFT 8
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+typedef enum
+{
+ DECOMP_PRE_INTRA_CTXT = 0,
+ DECOMP_PRE_INTRA_THRDS_CTXT,
+ DECOMP_PRE_INTRA_ED_CTXT,
+
+ /* should always be the last entry */
+ NUM_DECOMP_PRE_INTRA_MEM_RECS
+} DECOMP_PRE_INTRA_MEM_TABS_T;
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief Context for early intra or inter decision
+******************************************************************************
+ */
+typedef struct
+{
+ /** lambda for cost calculation */
+ WORD32 lambda;
+
+ /*pic type*/
+ WORD32 i4_slice_type;
+
+ /**
+ * Until what level, intra is evaluated.
+ * 0 - 8
+ * 1 - 4
+ * 2 - 2
+ * 3 - 4
+ */
+ WORD32 level;
+
+ /*Pointer to 4x4 blocks of entire frame */
+ ihevce_ed_blk_t *ps_ed_pic;
+
+ /*Pointer to present 4x4 block */
+ ihevce_ed_blk_t *ps_ed;
+
+ /*Pointer to ctb level data of entire frame */
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_pic;
+
+ /*Pointer to ctb level data of current ctb */
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1;
+
+ WORD32 left_ctb_intra_modes[20];
+
+ WORD32 sad[64 * NUM_MODES];
+
+ /*Sum of best SATDs at L1*/
+ LWORD64 i8_sum_best_satd;
+
+ /*Sum of best SATDs at L1*/
+ LWORD64 i8_sum_sq_best_satd;
+
+ /** Encoder quality preset : See IHEVCE_QUALITY_CONFIG_T for presets */
+ WORD32 i4_quality_preset;
+
+ /*following are the changes for reducing the stack memory used by this module. Local variables are copied to context memory */
+
+ /** Neighbour flags. Used as local variable in pre_intra_process_row function. Shouldnt be used by other functions */
+ WORD32 ai4_nbr_flags[64];
+
+ /** reference data for four 4x4 blocks. This is used as local variable in ed_calc_8x8_blk */
+ UWORD8 au1_ref_full_ctb[4][18];
+
+ /** reference data for 8x8 block. This is used as local variable in ed_calc_8x8_blk */
+ UWORD8 au1_ref_8x8[1][33];
+
+ /** mode bits costs array. This is used as local variable in ed_calc_8x8_blk */
+ UWORD16 au2_mode_bits_cost_full_ctb[4][NUM_MODES];
+
+ /** reference data for 4x4 block for incomplete ctb proc. Variable local to ed_calc_incomplete_ctb*/
+ UWORD8 au1_ref_ic_ctb[18];
+
+ /** top intra modes for incomplete ctb proc. Variable local to ed_calc_incomplete_ctb*/
+ WORD32 ai4_top_intra_modes_ic_ctb[20];
+
+ /** mode bits cost for incomplete ctb proc. Variable local to ed_calc_incomplete_ctb*/
+ UWORD16 au2_mode_bits_cost_ic_ctb[NUM_MODES];
+
+ /** Pointer to structure containing function pointers of common*/
+ func_selector_t *ps_func_selector;
+
+} ihevce_ed_ctxt_t; //early decision
+
+typedef struct
+{
+ /** Actual Width of this layer */
+ WORD32 i4_actual_wd;
+
+ /** Actual height of this layer */
+ WORD32 i4_actual_ht;
+
+ /** Padded width of this layer */
+ WORD32 i4_padded_wd;
+
+ /** Padded height of this layer */
+ WORD32 i4_padded_ht;
+
+ /** input pointer. */
+ UWORD8 *pu1_inp;
+
+ /** stride of input buffer */
+ WORD32 i4_inp_stride;
+
+ /** Decomposition block height size */
+ WORD32 i4_decomp_blk_ht;
+
+ /** Decomposition block width size */
+ WORD32 i4_decomp_blk_wd;
+
+ /** Number of blocks in a row */
+ WORD32 i4_num_col_blks;
+
+ /** Number of rows in a layer */
+ WORD32 i4_num_row_blks;
+ WORD32 ai4_curr_row_no[MAX_NUM_CTB_ROWS_FRM];
+
+ WORD32 i4_num_rows_processed;
+} decomp_layer_ctxt_t;
+
+typedef struct
+{
+ /* Number of layers */
+ WORD32 i4_num_layers;
+
+ /** Handles for all layers. Entry 0 refers to L0 , 3 refers to L3 */
+ decomp_layer_ctxt_t as_layers[MAX_NUM_HME_LAYERS];
+
+ /** Array for working memory of the thread */
+ UWORD8 au1_wkg_mem[((MAX_CTB_SIZE >> 1) * (MAX_CTB_SIZE + 2 * NUM_EXTRA_ROWS_REQ))];
+
+ /** Pointer for working memory of the thread */
+ UWORD8 *pu1_wkg_mem;
+
+ /** Encoder quality preset : See IHEVCE_QUALITY_CONFIG_T for presets */
+ WORD32 i4_quality_preset;
+
+ /** ed_ctxt pointer. This memory is re-used across layers now */
+ ihevce_ed_ctxt_t *ps_ed_ctxt;
+
+ ihevce_ed_blk_t *ps_layer1_buf;
+ ihevce_ed_blk_t *ps_layer2_buf;
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1;
+
+ /** to store the L0 8x8 cur SATD */
+ ihevce_8x8_L0_satd_t *ps_layer0_cur_satd;
+
+ /** to store the L0 8x8 cur mean*/
+ ihevce_8x8_L0_mean_t *ps_layer0_cur_mean;
+
+ WORD32 i4_slice_type;
+ WORD32 i4_codec_level;
+ WORD32 ai4_lambda[MAX_NUM_HME_LAYERS];
+
+ /* pointer to the structure ps_ctb_analyse in pre_enc_me_ctxt_t */
+ ctb_analyse_t *ps_ctb_analyse;
+
+ WORD32 i4_enable_noise_detection;
+
+ ihevce_ipe_optimised_function_list_t s_ipe_optimised_function_list;
+
+ ihevce_cmn_opt_func_t s_cmn_opt_func;
+
+} ihevce_decomp_pre_intra_ctxt_t;
+
+/**
+******************************************************************************
+ * @brief Encode loop master context structure
+******************************************************************************
+*/
+typedef struct
+{
+ /** Array of encode loop structure */
+ ihevce_decomp_pre_intra_ctxt_t *aps_decomp_pre_intra_thrd_ctxt[MAX_NUM_FRM_PROC_THRDS_PRE_ENC];
+
+ /** Number of processing threads created run time */
+ WORD32 i4_num_proc_thrds;
+
+} ihevce_decomp_pre_intra_master_ctxt_t;
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+#endif /* _IHEVCE_ENC_LOOP_STRUCTS_H_ */
diff --git a/encoder/ihevce_defs.h b/encoder/ihevce_defs.h
new file mode 100644
index 0000000..f1ccc33
--- /dev/null
+++ b/encoder/ihevce_defs.h
@@ -0,0 +1,982 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ihevce_defs.h
+*
+* @brief
+* Definitions used in the codec
+*
+* @author
+* Ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+#ifndef _IHEVCE_DEFS_H_
+#define _IHEVCE_DEFS_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+#define SINGLE_THREAD_INTERFACE 0
+
+#define DEFAULT_VPS_ID 0
+
+#define DEFAULT_SPS_ID 0
+
+#define DEFAULT_PPS_ID 0
+
+#define DEFAULT_CHROMA_FORMAT_IDC 1
+
+#define AMP_ENABLED 1
+
+#define AMP_DISABLED 0
+
+#define LISTS_MODIFICATION_ABSENT 0
+
+#define LISTS_MODIFICATION_PRESENT 1
+
+#define LONG_TERM_REF_PICS_PRESENT 1
+
+#define LONG_TERM_REF_PICS_ABSENT 0
+
+#define PCM_ENABLED 1
+
+#define PCM_DISABLED 0
+
+#define PCM_LOOP_FILTER_DISABLED 1
+
+#define PCM_LOOP_FILTER_ENABLED 0
+
+#define REF_PIC_LISTS_RESTRICTED 1
+
+#define REF_PIC_LISTS_UNRESTRICTED 0
+
+#define SCALING_LIST_DISABLED 0
+
+#define SCALING_LIST_ENABLED 1
+
+#define DEFAULT_SPS_MAX_SUB_LAYERS 1
+
+#define VPS_SUB_LAYER_ORDERING_INFO_PRESENT 1
+
+#define VPS_SUB_LAYER_ORDERING_INFO_ABSENT 0
+
+#define SPS_SUB_LAYER_ORDERING_INFO_PRESENT 1
+
+#define SPS_SUB_LAYER_ORDERING_INFO_ABSENT 0
+
+#define SCALING_LIST_DATA_PRESENT 1
+
+#define SCALING_LIST_DATA_ABSENT 0
+
+#define SPS_TEMPORAL_ID_NESTING_DONE 1
+
+#define NO_SPS_TEMPORAL_ID_NESTING_DONE 0
+
+#define STRONG_INTRA_SMOOTHING_FLAG_DISABLE 0
+
+#define STRONG_INTRA_SMOOTHING_FLAG_ENABLE 1
+
+#define DEFAULT_LOG2_MAX_POC_LSB 10
+
+#define DEFAULT_PIC_CROP_TOP_OFFSET 0
+
+#define DEFAULT_PIC_CROP_LEFT_OFFSET 0
+
+#define DEFAULT_PIC_CROP_RIGHT_OFFSET 0
+
+#define DEFAULT_PIC_CROP_BOTTOM_OFFSET 0
+
+#define DEFAULT_MAX_DEC_PIC_BUFFERING 8
+
+#define DEFAULT_MAX_NUM_REORDER_PICS 8
+
+#define DEFAULT_MAX_LATENCY_INCREASE 8
+
+#define HIGH_TIER 1
+
+#define MAIN_TIER 0
+
+#define DEFAULT_BETA_OFFSET 0
+
+#define CABAC_INIT_PRESENT 1
+
+#define CABAC_INIT_ABSENT 0
+
+#define CU_QP_DELTA_ENABLED 1
+
+#define CU_QP_DELTA_DISABLED 0
+
+#define MAX_MERGE_CANDIDATES 5
+
+#define CONSTR_IPRED_ENABLED 1
+
+#define CONSTR_IPRED_DISABLED 0
+
+#define DISABLE_DEBLOCKING_FLAG 1
+
+#define ENABLE_DEBLOCKING_FLAG 0
+
+#define DEBLOCKING_FILTER_CONTROL_PRESENT 1
+
+#define DEBLOCKING_FILTER_CONTROL_ABSENT 0
+
+#define DEBLOCKING_FILTER_OVERRIDE_ENABLED 1
+
+#define DEBLOCKING_FILTER_OVERRIDE_DISABLED 0
+
+#define DEPENDENT_SLICE_ENABLED 1
+
+#define DEPENDENT_SLICE_DISABLED 0
+
+#define DEFAULT_DIFF_CU_QP_DELTA_DEPTH 0
+
+#define ENTROPY_CODING_SYNC_ENABLED 1
+
+#define ENTROPY_CODING_SYNC_DISABLED 0
+
+#define ENTROPY_SLICE_ENABLED 1
+
+#define ENTROPY_SLICE_DISABLED 0
+
+#define DEFAULT_PARALLEL_MERGE_LEVEL 2
+
+#define DEFAULT_NUM_REF_IDX_L0_DEFAULT_ACTIVE 6
+
+#define DEFAULT_NUM_REF_IDX_L1_DEFAULT_ACTIVE 6
+
+#define NUM_TILES_COLS 0
+
+#define NUM_TILES_ROWS 0
+
+#define OUTPUT_FLAG_PRESENT 1
+
+#define OUTPUT_FLAG_ABSENT 0
+
+#define DEFAULT_PIC_CB_QP_OFFSET 0
+
+#define DEFAULT_PIC_CR_QP_OFFSET 0
+
+#define SLICE_LEVEL_CHROMA_QP_OFFSETS_PRESENT 1
+
+#define SLICE_LEVEL_CHROMA_QP_OFFSETS_ABSENT 0
+
+#define DEBLOCKING_FILTER_DISABLED 1
+
+#define DEBLOCKING_FILTER_ENABLED 0
+
+#define LF_ACROSS_SLICES_ENABLED 1
+
+#define LF_ACROSS_SLICES_DISABLED 0
+
+#define SAO_ENABLED 1
+
+#define SAO_DISABLED 0
+
+#define SCALING_LIST_DATA_PRESENT 1
+
+#define SCALING_LIST_DATA_ABSENT 0
+
+#define SIGN_DATA_HIDDEN 1
+
+#define SIGN_DATA_UNHIDDEN 0
+
+#define SLICE_EXTENSION_PRESENT 1
+
+#define SLICE_EXTENSION_ABSENT 0
+
+#define SLICE_HEADER_EXTENSION_PRESENT 1
+
+#define SLICE_HEADER_EXTENSION_ABSENT 0
+
+#define DEFAULT_TC_OFFSET 0
+
+#define TRANSFORM_SKIP_ENABLED 1
+
+#define TRANSFORM_SKIP_DISABLED 0
+
+#define TRANSFORM_BYPASS_ENABLED 1
+
+#define TRANSFORM_BYPASS_DISABLED 0
+
+#define SPACING_IS_UNIFORM 1
+
+#define SPACING_IS_NONUNIFORM 0
+
+#define TILES_ENABLED 1
+
+#define TILES_DISABLED 0
+
+#define TOTAL_NUM_TIERS 2
+
+#define TOTAL_NUM_LEVELS 13
+
+#define SET_CTB_ALIGN(x, y) ((((x) & ((y)-1)) == 0) ? 0 : (y) - ((x) & ((y)-1)))
+
+/* Enables HM-8.1 compatible stream, setting to 0 will make it 8.2 compatible*/
+#define HM_8DOT1_SYNTAX 0
+
+/* Enables cu level RD optimized encoding by computing cabac bits for the cu */
+#define RDOPT_ENABLE 1
+
+/* Enables inclusion of chroma coding cost for RD opt decisions */
+#define CHROMA_RDOPT_ENABLE 1
+
+/* Enables tu level zero cbf based RD optimized encoding */
+#define RDOPT_ZERO_CBF_ENABLE 1
+
+/* Enables bit savings in tu tree of inter cus by merging not coded child nodes to parent node */
+#define SHRINK_INTER_TUTREE 1
+
+/* q format for lamba used in the encoder */
+#define LAMBDA_Q_SHIFT 8
+
+/* If 0, Allign PIC Wd/ht to Min CU size */
+/* If 1, Allign PIC Wd/ht to CTB size */
+#define PIC_ALIGN_CTB_SIZE 0
+
+/** Enables DCT integer transform / Hadamard Transform based SATD evaluation
+ * 1 : DCT integer Transform, 0 : Hadamard Transform
+ */
+#define USE_EXACT_TFR 0
+
+/** Enable colocated PU population */
+#define ENABLE_COL_PU_POPULATION 1
+
+#define MAX_MVX_SUPPORTED_IN_COARSE_LAYER 128
+
+#define MAX_MVY_SUPPORTED_IN_COARSE_LAYER 64
+
+//ME_Experiments
+
+#define USE_4x4_IN_L1 0
+
+#define DIAMOND_GRID 1
+
+#define SUBPEL_DEDUPLICATE_ENABLE 1
+
+/** Enables CU delta QP population within a frame : Random for now */
+//#define RANDOM_CU_QP 0
+
+/**
+ * @brief Mapping of Minimum HEVC qp to MPEG2 QP
+ */
+#define MIN_RC_QP (1)
+/**
+ * @brief Mapping of Maximum HEVC qp to MPEG2 QP
+ */
+#define MAX_RC_QP (228)
+/**
+ * @brief Total NUmber of MPEG2 QPs
+ */
+#define MPEG2_QP_ELEM (MAX_RC_QP + 1)
+/**
+ * @brief Total NUmber of HEVC QPs
+ */
+#define HEVC_QP_ELEM (MAX_HEVC_QP_10bit + 1)
+
+#define QP_LEVEL_MOD_ACT_FACTOR 10
+
+#define TWO_POW_QP_LEVEL_MOD_ACT_FACTOR (1 << (QP_LEVEL_MOD_ACT_FACTOR))
+
+#define DEFAULT_NON_PACKED_CONSTRAINT_FLAG 1
+
+#define DEFAULT_FRAME_ONLY_CONSTRAINT_FLAG 0
+
+#define ENABLE_CU_TREE_CULLING (1 && ENABLE_4CTB_EVALUATION)
+
+#define RATIONALISE_NUM_RDO_MODES_IN_PQ_AND_HQ 1
+
+#define MAX_NUMBER_OF_INTER_RDOPT_CANDS_IN_PQ_AND_HQ 2
+
+#define MAX_NUMBER_OF_INTER_RDOPT_CANDS_IN_MS 2
+
+#define MAX_NUMBER_OF_INTER_RDOPT_CANDS_IN_HS_AND_XS 1
+
+#define BUFFER_SIZE_MULTIPLIER_IF_HBD 3
+
+/* If */
+/* qp_bdoffset = 6 * (bit_depth - 8) */
+/* and */
+/* lambda = pow(2.0, (((i4_cur_frame_qp + qp_bdoffset - 12)) / 3)), */
+/* Then 'Lambda Types' are - */
+/* 0, when bit_depth_in_module = 8 => qp_bdoffset = 0 always */
+/* 1, when bit_depth_in_module > 8, => qp_bdoffset = value derived above */
+/* 2, when both of the lambdas referred to in the previous cases are required */
+
+#define PRE_ENC_LAMBDA_TYPE 0
+
+#define ENC_LAMBDA_TYPE 0
+
+#define IPE_LAMBDA_TYPE 0
+
+#define ME_LAMBDA_TYPE 0
+
+#define ENC_LOOP_LAMBDA_TYPE 2
+
+#define ENABLE_SSIM 0
+
+#define VUI_BIT_RATE_SCALE 6
+
+#define VUI_CPB_SIZE_SCALE 8
+
+#define ENABLE_REFINED_QP_MOD 1
+
+#if ENABLE_REFINED_QP_MOD
+/* to find the uncovered region (or new region) which will be used for reference for the upcoming pictures
+ will be coded well to enhance coding efficiency*/
+#define ENABLE_TEMPORAL_WEIGHING 0
+
+/* to enable modulation factor based on spatial variance when we calculate activity factor using
+ the following equaltion
+ act_factor = (m * c + a )/(c + m * a)*/
+// SATD_NOISE_FLOOR_THRESHOLD was earlier controlled using this
+#define ENABLE_QP_MOD_BASED_ON_SPATIAL_VARIANCE 0
+
+/* To enable the trace for delta Qp bits */
+#define QP_DELTA_BITS_TRACE
+
+/* to enable modulation based LAP2 average satd*/
+#define MODULATION_OVER_LAP 1
+
+/* 0 - Lamda and Qp are decoupled,
+ 1 - Lamda and Qp are coupled*/
+#define LAMDA_BASED_ON_QUANT 0
+
+/*
+ 0 - act_factor = (m * c + a )/(c + m * a)
+ m = modulation factor
+ c = cur satd
+ a = average satd
+ ----------------------------------------
+ 1 - act_factor = (c/a) ^ (s/3)
+ s = strength
+ c = cur satd
+ a = average satd
+ */
+#define LOGARITHMIC_MODULATION 1
+
+#define MEDIAN_ENABLE 1
+#define MIN_ENABLE 0
+
+/* well compensatable regions are not considered for
+ QP modulation*/
+#define DISABLE_COMPENSATION 1
+
+#define CST_NOISE_THRSHLD 0
+
+/*decrease intra cu qp by 1 in Inter Pictures*/
+#define DECREASE_QP 0
+
+/*strength calculation based on deviation*/
+#define STRENGTH_BASED_ON_DEVIATION 1
+
+/*enable allow cliping of qctivity factor such that
+ deviation of qp in modulation is controlled*/
+#define ALLOW_ACT_FACTOR_CLIP_IN_QP_MOD 1
+
+/*instead of avg activity use sqrt(avg of satd square)*/
+#define USE_SQRT_AVG_OF_SATD_SQR 1
+
+/*use sum of squared transform coeff*/
+#define USE_SQR_SATD_COEFF 0
+
+/*instead of L1 IPE SATD, use L1 CUR SATD*/
+#define USE_CUR_SATD 0 // else it will use satd of cur - pred
+
+/*use L0 CUR SATD */
+#define USE_CUR_L0_SATD 0
+
+/* strength based on only curr frame deviation else it is based on average over lap2 */
+#define STRENGTH_BASED_ON_CURR_FRM 0
+
+#define POW_OPT 1
+
+#else /*INITIAL QP MOD*/
+/*Same as 11_0 Mod version */
+// SATD_NOISE_FLOOR_THRESHOLD was earlier controlled using this
+#define ENABLE_QP_MOD_BASED_ON_SPATIAL_VARIANCE 0
+#define ENABLE_TEMPORAL_WEIGHING 0
+#define MODULATION_OVER_LAP 0
+#define LAMDA_BASED_ON_QUANT 1
+#define LOGARITHMIC_MODULATION 0
+#define MIN_ENABLE 1
+#define DISABLE_COMPENSATION 1
+#define CST_NOISE_THRSHLD 1
+#define DECREASE_QP 0
+#endif
+
+#define MASK_4AC 0xFFFFFFFEFEFEFCE0
+#define MASK_3AC 0xFFFFFFFFFEFEFCF0
+#define MASK_2AC 0xFFFFFFFFFFFEFCF8
+#define MASK_DC 0xFFFFFFFFFFFFFFFE
+#define I_PIC_LAMDA_MODIFIER 0.5
+#define CONST_LAMDA_MODIFIER 1
+#define NO_EXTRA_MULTIPLIER 1
+#define NEW_LAMDA_MODIFIER (!CONST_LAMDA_MODIFIER)
+#define LAMDA_MODIFIER(QP, Tid) \
+ (0.85 * pow(2.0, (Tid * (CLIP3(((QP + 5.0) / 25.0), 1.0, 2.0) - 1.0)) / 3.0))
+#define CONST_LAMDA_MOD_VAL (0.85)
+#define MEAN_BASED_QP_MOD 0
+
+#if MEDIAN_ENABLE
+#define MEDIAN_CU_TU 1
+#define MEDIAN_CU_TU_BY_2 3
+#define MEDIAN_CU_TU_BY_4 10
+#endif
+
+#if MIN_ENABLE
+#define MEDIAN_CU_TU 0
+#define MEDIAN_CU_TU_BY_2 0
+#define MEDIAN_CU_TU_BY_4 0
+#endif
+
+#define COMP_RATIO_NORM 5
+#define COMP_RATIO_MIN 0
+#define COMP_RATIO_MAX 3
+#define NOISE_THRE_MAP_TO_8 3
+
+#define REF_MOD_VARIANCE (0.6696)
+
+#define REF_MOD_DEVIATION (473.0) //(0.6696) //
+#define NO_MOD_DEVIATION (220.0)
+#define BELOW_REF_DEVIATION (0.0)
+#define ABOVE_REF_DEVIATION (220.0)
+
+#define MIN_QP_MOD_OFFSET -10
+#define MAX_QP_MOD_OFFSET 3
+#define TOT_QP_MOD_OFFSET (MAX_QP_MOD_OFFSET - MIN_QP_MOD_OFFSET + 1)
+
+#define ENABLE_UNIFORM_CU_SIZE_16x16 0
+
+#define ENABLE_UNIFORM_CU_SIZE_8x8 0
+
+#define MAX_QP_BD_OFFSET 24
+
+// chroma mode index for derived from luma intra mode
+#define DM_CHROMA_IDX 36
+
+#define DISABLE_RDOQ 0
+
+#define DISABLE_SKIP_AND_MERGE_EVAL 0
+
+#define ENABLE_PICKING_4_BEST_IN_B_PIC_IN_ME 0
+
+#define ENABLE_TU_TREE_DETERMINATION_IN_RDOPT 0
+
+#define MAX_NUM_MIXED_MODE_INTER_RDO_CANDS (MAX_NUMBER_OF_INTER_RDOPT_CANDS_IN_PQ_AND_HQ * 2)
+
+#define MAX_NUM_CU_MERGE_SKIP_CAND (MAX_NUMBER_OF_INTER_RDOPT_CANDS_IN_PQ_AND_HQ + 1)
+
+#define NUM_MODE_COMBINATIONS_IN_INTER_CU_WITH_2_PUS 4
+
+/* +1 for skip candidate */
+#define MAX_NUM_INTER_RDO_CANDS \
+ (NUM_MODE_COMBINATIONS_IN_INTER_CU_WITH_2_PUS * MAX_NUMBER_OF_INTER_RDOPT_CANDS_IN_PQ_AND_HQ + \
+ 1)
+
+#define UNI_SATD_SCALE 1
+
+#define ENABLE_MIXED_INTER_MODE_EVAL 1
+
+#define DISABLE_SAO 0
+
+#define DISABLE_LUMA_SAO (0 || (DISABLE_SAO))
+
+#define DISABLE_CHROMA_SAO (0 || (DISABLE_SAO))
+
+#define MAX_NUM_INTER_CANDS_PQ 4 /*MAX_NUM_INTER_RDO_CANDS*/
+
+#define MAX_NUM_INTER_CANDS_HQ 4 /*MAX_NUM_INTER_RDO_CANDS*/
+
+#define MAX_NUM_INTER_CANDS_MS 3
+
+#define MAX_NUM_INTER_CANDS_HS 2
+
+#define MAX_NUM_INTER_CANDS_ES 2
+
+#define RESTRICT_NUM_INTER_CANDS_PER_PART_TYPE 0
+
+#define MAX_NUM_INTER_CANDS_PER_PART_TYPE 3
+
+#define PICK_ONLY_BEST_CAND_PER_PART_TYPE 0
+
+#define REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING 0
+
+#define DISABLE_SBH 0
+
+#define DISABLE_TMVP 0
+
+#define DISABLE_QUANT_ROUNDING 0
+
+#define ENABLE_SEPARATE_LUMA_CHROMA_INTRA_MODE 1
+
+#define FORCE_INTRA_TU_DEPTH_TO_0 0
+
+#define WEIGH_CHROMA_COST 1
+
+#define ENABLE_ZERO_CBF_IN_INTRA 0
+
+#define DISABLE_ZERO_ZBF_IN_INTER 0
+
+#define ENABLE_INTER_ZCU_COST 1
+
+#define ADAPT_COLOCATED_FROM_L0_FLAG 1
+
+#define CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT 10
+
+#define ENABLE_SSD_CALC_RC 0
+
+#define SRC_PADDING_FOR_TRAQO 1
+
+#define ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION 1
+
+#define ZCBF_SKIP_DISTORTION_THRESHOLD (1.2)
+
+#define ENABLE_CHROMA_RDOPT_EVAL_IN_PQ 1
+
+#define ENABLE_CHROMA_RDOPT_EVAL_IN_HQ 1
+
+#define ENABLE_CHROMA_RDOPT_EVAL_IN_MS 1
+
+#define ENABLE_CHROMA_RDOPT_EVAL_IN_HS 0
+
+#define ENABLE_CHROMA_RDOPT_EVAL_IN_XS 0
+
+#define ENABLE_CHROMA_RDOPT_EVAL_IN_XS6 0
+
+#define ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ \
+ (1 && (ENABLE_CHROMA_RDOPT_EVAL_IN_PQ) && (ENABLE_SEPARATE_LUMA_CHROMA_INTRA_MODE))
+
+#define ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ \
+ (1 && (ENABLE_CHROMA_RDOPT_EVAL_IN_HQ) && (ENABLE_SEPARATE_LUMA_CHROMA_INTRA_MODE))
+
+#define ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS \
+ (0 && (ENABLE_CHROMA_RDOPT_EVAL_IN_MS) && (ENABLE_SEPARATE_LUMA_CHROMA_INTRA_MODE))
+
+#define ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS \
+ (0 && (ENABLE_CHROMA_RDOPT_EVAL_IN_HS) && (ENABLE_SEPARATE_LUMA_CHROMA_INTRA_MODE))
+
+#define ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS \
+ (0 && (ENABLE_CHROMA_RDOPT_EVAL_IN_XS) && (ENABLE_SEPARATE_LUMA_CHROMA_INTRA_MODE))
+
+#define ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS6 \
+ (0 && (ENABLE_CHROMA_RDOPT_EVAL_IN_XS6) && (ENABLE_SEPARATE_LUMA_CHROMA_INTRA_MODE))
+
+#define RC_BUFFER_INFO 0
+
+#define DISABLE_SMP_IN_XS25 1
+
+#define DISABLE_64X64_BLOCK_MERGE_IN_ME_IN_XS25 1
+
+#define MAX_NUM_TU_RECUR_CANDS_IN_XS25 1
+
+#define MAX_NUM_CANDS_FOR_FPEL_REFINE_IN_XS25 1
+
+#define MAX_NUM_CONSTITUENT_MVS_TO_ENABLE_32MERGE_IN_XS25 4
+
+#define NUM_INIT_SEARCH_CANDS_IN_L1_AND_L2_ME_IN_XS25 2
+
+#define DISABLE_TOP_SYNC 0
+
+#define ENABLE_MULTI_THREAD_FILE_WRITES 0
+
+#define DISABLE_EARLY_ZCBF 0
+
+#define EARLY_CBF_ON 1
+
+#define DUMP_CBF_HIST_DATA 0
+
+#define ENABLE_INTRA_MODE_FILTERING_IN_XS25 1
+
+#define MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25 2
+
+#define MAX_NUM_REFS_IN_PPICS_IN_XS25 1
+
+#define USE_CONSTANT_LAMBDA_MODIFIER 0
+
+/* Actual Lambda in ME -> (100 - ME_LAMBDA_DISCOUNT) * lambda / 100 */
+#define ME_LAMBDA_DISCOUNT 0
+
+#define FORCE_AT_LEAST_1_UNICAND_IN_BPICS 0
+
+#define MULTI_REF_ENABLE 1
+
+#define PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3 0
+
+#define ENABLE_INTRA_GATING_FOR_HQ 0
+
+#define ADD_NOISE_TERM_TO_COST 1
+
+#define ALPHA_Q_FORMAT 4
+#define ALPHA_FOR_NOISE_TERM_IN_ME_P (1 << ((ALPHA_Q_FORMAT)-2)) //0.25
+#define ALPHA_FOR_NOISE_TERM_IN_ME_BREF (1 << ((ALPHA_Q_FORMAT)-2)) //0.25
+#define ALPHA_FOR_NOISE_TERM_IN_RDOPT_P (1 << ((ALPHA_Q_FORMAT)-2))
+
+#define ALPHA_FOR_NOISE_TERM (1 << ((ALPHA_Q_FORMAT)-2))
+
+#define ALPHA_FOR_NOISE_TERM_IN_ME (ALPHA_FOR_NOISE_TERM)
+
+#define ALPHA_FOR_NOISE_TERM_IN_RDOPT (ALPHA_FOR_NOISE_TERM)
+
+#define ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT 50
+
+#define ALPHA_FOR_ZERO_CODING_DECISIONS (ALPHA_FOR_NOISE_TERM_IN_RDOPT)
+
+#define STIM_Q_FORMAT 8
+
+#define USE_NOISE_TERM_IN_L0_ME (1 && (ADD_NOISE_TERM_TO_COST))
+
+#define USE_NOISE_TERM_IN_ENC_LOOP (1 && (ADD_NOISE_TERM_TO_COST))
+
+#define COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL (1 && (USE_NOISE_TERM_IN_ENC_LOOP))
+
+#define DISABLE_SUBPEL_REFINEMENT_WHEN_SRC_IS_NOISY (0 && (USE_NOISE_TERM_IN_L0_ME))
+
+#define USE_NOISE_TERM_DURING_BICAND_SEARCH (1 && (USE_NOISE_TERM_IN_L0_ME))
+
+#define DISABLE_BLK_MERGE_WHEN_NOISY (0 && (USE_NOISE_TERM_IN_L0_ME))
+
+/* Macros for Noise detection implmentation */
+#define NOISE_DETECT (ADD_NOISE_TERM_TO_COST)
+
+#define PSY_RD_DEBUG_CTBX 2048
+#define PSY_RD_DEBUG_CTBY 1600
+#define DEBUG_POC_NO 0
+
+#define DISABLE_LARGE_INTRA_PQ 1
+
+#define EVERYWHERE_NOISY 0
+
+#define DEBUG_PRINT_NOISE_SPATIAL 0
+
+#define DEBUG_PRINT_NOISE_TEMPORAL 0
+
+#define TEMPORAL_NOISE_DETECT (1 && (USE_NOISE_TERM_IN_L0_ME) && !(EVERYWHERE_NOISY))
+
+#define MIN_NOISY_BLOCKS_CNT_16x16 7
+
+#define ALTERNATE_METRIC 0
+
+#define PSY_STRENGTH_CHROMA 2 // 0.5 in Q2
+
+#define Q_PSY_STRENGTH_CHROMA 2
+
+#define PSY_STRENGTH 4 // 0.5 in Q2
+
+#define Q_PSY_STRENGTH 2
+
+/* between 0 and 100 */
+#define MIN_ENERGY_FOR_NOISE_PERCENT_16x16 20
+
+/* normalised value between 0 and 1 */
+#define MIN_VARIANCE_FOR_NOISE_16x16 0.6
+
+/* HAD size is restricted to square blocks only. so we specify only one dimension */
+#define HAD_BLOCK_SIZE_16x16 16
+
+#define MIN_NUM_COEFFS_ABOVE_AVG_16x16 41
+
+#define MIN_COEFF_AVG_ENERGY_16x16 0
+
+#define MIN_NOISY_BLOCKS_CNT_8x8 30
+
+/* between 0 and 100 */
+#define MIN_ENERGY_FOR_NOISE_PERCENT_8x8 20
+
+/* normalised value between 0 and 1 */
+#define MIN_VARIANCE_FOR_NOISE_8x8 0.6
+
+/* HAD size is restricted to square blocks only. so we specify only one dimension */
+#define HAD_BLOCK_SIZE_8x8 8
+
+#define MIN_NUM_COEFFS_ABOVE_AVG_8x8 17
+
+#define MIN_COEFF_AVG_ENERGY_8x8 0
+
+#define SATD_NOISE_FLOOR_THRESHOLD 16
+
+#define ENABLE_DEBUG_PRINTS_IN_ME 0
+
+#define RC_DEBUG_LEVEL_1 0
+
+#define RC_2PASS_GOP_DEBUG 0
+
+#define DUMP_NOISE_MAP 0
+
+#define DISABLE_SKIP 0
+
+#define DISABLE_NOISE_DETECTION_IN_P_PICS (0 && (NOISE_DETECT))
+
+#define MAX_LAYER_ID_OF_B_PICS_WITHOUT_NOISE_DETECTION \
+ ((1 == (DISABLE_NOISE_DETECTION_IN_P_PICS)) ? 0 : 0)
+
+#define DISABLE_INTRA_WHEN_NOISY (0 && (NOISE_DETECT))
+
+#define DISABLE_BIPRED_MODES_WHEN_NOISY (0 && (ADD_NOISE_TERM_TO_COST))
+
+#define TEMPORAL_VARIANCE_FACTOR 3 // in Q2
+
+#define Q_TEMPORAL_VARIANCE_FACTOR 2
+
+/* Actual Lambda -> (100 - ME_LAMBDA_DISCOUNT_WHEN_NOISY) * lambda / 100 */
+/*(((100 * (ALPHA_FOR_NOISE_TERM_IN_ME) + (1 << ((ALPHA_Q_FORMAT) - 1)))) >> (ALPHA_Q_FORMAT))*/
+#define ME_LAMBDA_DISCOUNT_WHEN_NOISY 50
+
+/*(((100 * (ALPHA_FOR_NOISE_TERM_IN_RDOPT) + (1 << ((ALPHA_Q_FORMAT) - 1)))) >> (ALPHA_Q_FORMAT))*/
+#define RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY 25
+
+#define DISABLE_SKIP_AND_MERGE_WHEN_NOISY (0 && (USE_NOISE_TERM_IN_ENC_LOOP))
+
+#define NO_QP_OFFSET 0
+
+#define CONVERT_SSDS_TO_SPATIAL_DOMAIN (1 || (USE_NOISE_TERM_IN_ENC_LOOP))
+
+#define MAX_QP_WHERE_SPATIAL_SSD_ENABLED 18
+
+#define USE_RECON_TO_EVALUATE_STIM_IN_RDOPT (1 && (USE_NOISE_TERM_IN_ENC_LOOP))
+
+#define DISABLE_SAO_WHEN_NOISY (1 && (USE_NOISE_TERM_IN_ENC_LOOP))
+
+#define MAX_TU_SIZE_WHEN_NOISY 64
+
+#define RANDOMIZE_MERGE_IDX_WHEN_NOISY (0 && (USE_NOISE_TERM_IN_ENC_LOOP))
+
+#define MAX_CU_SIZE_WHERE_MERGE_AND_SKIPS_ENABLED_AND_WHEN_NOISY 64
+
+#define NUM_ELEMENTS_IN_RANDOMIZED_MERGE_IDX_LIST 113
+
+#define NUM_MERGE_INDICES_TO_PICK_WHEN_LIST_RANDOMIZED_MAXIDX4 \
+ ((DISABLE_SKIP_AND_MERGE_WHEN_NOISY) ? 0 : 2)
+
+#define NUM_MERGE_INDICES_TO_PICK_WHEN_LIST_RANDOMIZED_MAXIDX3 \
+ ((DISABLE_SKIP_AND_MERGE_WHEN_NOISY) ? 0 : 2)
+
+#define NUM_MERGE_INDICES_TO_PICK_WHEN_LIST_RANDOMIZED_MAXIDX2 \
+ ((DISABLE_SKIP_AND_MERGE_WHEN_NOISY) ? 0 : 1)
+
+#define NUM_MERGE_INDICES_TO_PICK_WHEN_LIST_RANDOMIZED_MAXIDX1 \
+ ((DISABLE_SKIP_AND_MERGE_WHEN_NOISY) ? 0 : 1)
+
+#define USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS \
+ (1 && (COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL))
+
+#define BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER 0
+
+#define BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION 29
+
+#define BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1 5
+#define BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_2 6
+#define BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_3 7
+
+#define MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON 1
+
+#define MODULATE_LAMDA_WHEN_TRAQO_MOD_ON 1
+
+#define ENABLE_RUNTIME_ARCH_SWITCH 1
+
+#define DISABLE_8X8CUS_IN_NREFBPICS_IN_P6 1
+
+#define DISABLE_8X8CUS_IN_REFBPICS_IN_P6 (0 && (DISABLE_8X8CUS_IN_NREFBPICS_IN_P6))
+
+#define DISABLE_8X8CUS_IN_PPICS_IN_P6 (0 && (DISABLE_8X8CUS_IN_REFBPICS_IN_P6))
+
+#define L0ME_IN_OPENLOOP_MODE 0
+
+#define DISABLE_INTRAS_IN_BPIC 0
+
+#define MAX_RE_ENC 1
+
+#define ENABLE_RDO_BASED_TU_RECURSION 1
+
+#define ENABLE_TOP_DOWN_TU_RECURSION 1
+
+#define INCLUDE_CHROMA_DURING_TU_RECURSION (0 && (ENABLE_RDO_BASED_TU_RECURSION))
+
+#define PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS (0)
+
+#define PROCESS_INTRA_AND_INTER_CU_TREES_SEPARATELY \
+ (0 && (PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS))
+
+#define RESTRICT_NUM_PARTITION_LEVEL_L0ME_RESULTS_TO_1 1
+
+#define RESTRICT_NUM_PARTITION_LEVEL_L1ME_RESULTS_TO_1 1
+
+#define RESTRICT_NUM_PARTITION_LEVEL_L2ME_RESULTS_TO_1 1
+
+#define RESTRICT_NUM_SEARCH_CANDS_PER_SEARCH_CAND_LOC 1
+
+#define RESTRICT_NUM_2NX2N_TU_RECUR_CANDS 1
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+#define CREATE_SUBBLOCK2CSBFID_MAP(map, numMapElements, transSize, csbfBufStride) \
+ { \
+ WORD32 i, j; \
+ \
+ WORD32 i4NumSubBlocksPerRow = transSize / 4; \
+ WORD32 i4NumSubBlocksPerColumn = i4NumSubBlocksPerRow; \
+ \
+ ASSERT(numMapElements >= i4NumSubBlocksPerRow * i4NumSubBlocksPerColumn); \
+ \
+ for(i = 0; i < i4NumSubBlocksPerColumn; i++) \
+ { \
+ for(j = 0; j < i4NumSubBlocksPerRow; j++) \
+ { \
+ map[j + i * i4NumSubBlocksPerRow] = j + i * csbfBufStride; \
+ } \
+ } \
+ }
+
+#define COPY_CABAC_STATES(dest, src, size) \
+ { \
+ memcpy(dest, src, size); \
+ }
+
+#define COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(dest, src, size) \
+ { \
+ memcpy(dest, src, size); \
+ }
+
+#define PAD_BUF(pu1_start, stride, wd, ht, p_x, p_y, plane, function_pointer1, function_pointer2) \
+ { \
+ function_pointer1(pu1_start, stride, ht, wd, p_x); \
+ function_pointer2((pu1_start) - (p_x), stride, ht, wd + ((p_x) << 1), p_y); \
+ }
+
+#define PAD_BUF_HOR(pu1_start, stride, ht, p_x, p_y, function_pointer) \
+ { \
+ function_pointer(pu1_start, stride, ht, p_x); \
+ }
+
+#define PAD_BUF_VER(pu1_start, stride, wd, p_x, p_y, function_pointer) \
+ { \
+ function_pointer(pu1_start, stride, wd, p_y); \
+ }
+
+#define POPULATE_PART_RESULTS(ps_part_results, ps_search_node) \
+ { \
+ ps_part_results->i1_ref_idx = ps_search_node->i1_ref_idx; \
+ ps_part_results->i2_mv_x = ps_search_node->i2_mv_x; \
+ ps_part_results->i2_mv_y = ps_search_node->i2_mv_y; \
+ ps_part_results->i4_sad = ps_search_node->i4_sad; \
+ }
+
+#define GET_IDX_CIRCULAR_BUF(idx, increment, size) \
+ { \
+ if(increment < 0) \
+ { \
+ idx += increment; \
+ if(idx < 0) \
+ { \
+ idx += size; \
+ } \
+ } \
+ else \
+ { \
+ idx += increment; \
+ if(idx >= size) \
+ { \
+ idx %= size; \
+ } \
+ } \
+ }
+
+#define CLIPUCHAR(x) CLIP3((x), 0, 255)
+
+#define CLIPUCHAR10BIT(x) CLIP3((x), 0, 1023)
+
+#define CEIL4(x) (((x + 3) >> 2) << 2)
+
+#define CEIL8(x) (((x + 7) >> 3) << 3)
+
+#define CEIL2(x) (((x + 1) >> 1) << 1)
+
+#define CEIL16(x) (((x) + 15) & (~15))
+
+#define CEIL_POW2(x, align) (((x) + (align)-1) & (~((align)-1)))
+
+#define PAD_SUBPEL PAD_BUF
+#define PAD_FPEL PAD_BUF
+#define PAD_FPEL_HOR PAD_BUF_HOR
+#define PAD_FPEL_VER PAD_BUF_VER
+
+/* Defining a printf macro: To disable all prints inside codec in release mode */
+#ifdef _DEBUG
+#define DBG_PRINTF(...) printf(__VA_ARGS__)
+#else
+#define DBG_PRINTF(...)
+#endif
+
+/*****************************************************************************/
+/* Enumerations */
+/*****************************************************************************/
+
+typedef enum
+{
+ LEVEL1 = 30,
+ LEVEL2 = 60,
+ LEVEL2_1 = 63,
+ LEVEL3 = 90,
+ LEVEL3_1 = 93,
+ LEVEL4 = 120,
+ LEVEL4_1 = 123,
+ LEVEL5 = 150,
+ LEVEL5_1 = 153,
+ LEVEL5_2 = 156,
+ LEVEL6 = 180,
+ LEVEL6_1 = 183,
+ LEVEL6_2 = 186
+} LEVEL_T;
+
+typedef enum
+{
+ LIST_0,
+ LIST_1,
+
+ NUM_REF_LISTS
+
+} REF_LISTS_t;
+
+typedef enum
+{
+ NULL_PLANE = -1,
+ U_PLANE = 0,
+ V_PLANE = 1
+} CHROMA_PLANE_ID_T;
+
+typedef enum SSD_TYPE_T
+{
+ NULL_TYPE = -1,
+ SPATIAL_DOMAIN_SSD,
+ FREQUENCY_DOMAIN_SSD
+
+} SSD_TYPE_T;
+
+#endif
diff --git a/encoder/ihevce_dep_mngr.c b/encoder/ihevce_dep_mngr.c
new file mode 100644
index 0000000..ba269de
--- /dev/null
+++ b/encoder/ihevce_dep_mngr.c
@@ -0,0 +1,1215 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_dep_mngr.c
+*
+* \brief
+* This file contains all the functions related to Sync manager
+*
+* \date
+* 12/12/2013
+*
+* \author
+* Ittiam
+*
+* List of Functions
+* <TODO: TO BE ADDED>
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevc_debug.h"
+#include "ihevc_macros.h"
+#include "ihevc_platform_macros.h"
+
+#include "ihevce_api.h"
+#include "ihevce_dep_mngr_interface.h"
+#include "ihevce_dep_mngr_private.h"
+
+#include "cast_types.h"
+#include "osal.h"
+#include "osal_defaults.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_dmgr_get_num_mem_recs \endif
+*
+* \brief
+* Number of memory records are returned for Dependency manager.
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32 ihevce_dmgr_get_num_mem_recs()
+{
+ return (NUM_DEP_MNGR_MEM_RECS);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_dmgr_get_mem_recs \endif
+*
+* \brief
+* Memory requirements are returned for Dependency manager.
+*
+* \param[in,out] ps_mem_tab : pointer to memory descriptors table
+* \param[in] dep_mngr_mode : Mode of operation of dependency manager
+* \param[in] max_num_vert_units : Maximum nunber of units to be processed
+* \param[in] num_tile_cols : Number of column tiles for which encoder is working
+* \param[in] num_threads : Number of threads among which sync will be established
+* \param[in] i4_mem_space : memspace in which memory request should be done
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32 ihevce_dmgr_get_mem_recs(
+ iv_mem_rec_t *ps_mem_tab,
+ WORD32 dep_mngr_mode,
+ WORD32 max_num_vert_units,
+ WORD32 num_tile_cols,
+ WORD32 num_threads,
+ WORD32 i4_mem_space)
+{
+ WORD32 num_vert_units;
+ WORD32 num_wait_thrd_ids;
+
+ /* Dependency manager state structure */
+ ps_mem_tab[DEP_MNGR_CTXT].i4_mem_size = sizeof(dep_mngr_state_t);
+ ps_mem_tab[DEP_MNGR_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+ ps_mem_tab[DEP_MNGR_CTXT].i4_mem_alignment = 8;
+
+ /* SANITY CHECK */
+ ASSERT(
+ (DEP_MNGR_FRM_FRM_SYNC == dep_mngr_mode) || (DEP_MNGR_ROW_FRM_SYNC == dep_mngr_mode) ||
+ (DEP_MNGR_ROW_ROW_SYNC == dep_mngr_mode));
+
+ /* Default value */
+ if(num_tile_cols < 1)
+ {
+ num_tile_cols = 1;
+ }
+
+ /**************** Get Processed status Memory Requirements *********************/
+ if(DEP_MNGR_FRM_FRM_SYNC == dep_mngr_mode)
+ {
+ /* for frame to frame sync
+ 2 words are used for holding num units processed prev
+ 2 words are used for holding num units processed curr
+ */
+ num_vert_units = (2 + 2) * num_threads;
+ }
+ else
+ {
+ /* for both frm-row and row-row num vertical units in frame is allocated */
+ /* (* num_tile_cols) as each column tile can separately update and check */
+ num_vert_units = max_num_vert_units * num_tile_cols;
+ }
+
+ ps_mem_tab[DEP_MNGR_UNITS_PRCSD_MEM].i4_mem_size = (sizeof(WORD32) * num_vert_units);
+ ps_mem_tab[DEP_MNGR_UNITS_PRCSD_MEM].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+ ps_mem_tab[DEP_MNGR_UNITS_PRCSD_MEM].i4_mem_alignment = 8;
+
+ /**************** Get Wait thread ids Memory Requirements *********************/
+ if(DEP_MNGR_FRM_FRM_SYNC == dep_mngr_mode)
+ {
+ /* for frame to frame sync number of threads worth memory is allocated */
+ num_wait_thrd_ids = num_threads;
+ }
+ else if(DEP_MNGR_ROW_ROW_SYNC == dep_mngr_mode)
+ {
+ /* for row to row sync number of vertical rows worth memory is allocated */
+ num_wait_thrd_ids = max_num_vert_units;
+ }
+ else
+ {
+ /* for row to frame sync number of threads * number of vertical rows worth memory is allocated */
+ num_wait_thrd_ids = max_num_vert_units * num_threads;
+ }
+
+ ps_mem_tab[DEP_MNGR_WAIT_THRD_ID_MEM].i4_mem_size = (sizeof(WORD32) * num_wait_thrd_ids);
+ ps_mem_tab[DEP_MNGR_WAIT_THRD_ID_MEM].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+ ps_mem_tab[DEP_MNGR_WAIT_THRD_ID_MEM].i4_mem_alignment = 8;
+
+ /**************** Get Semaphore Requirements *********************/
+ ps_mem_tab[DEP_MNGR_SEM_HANDLE_MEM].i4_mem_size = (sizeof(void *) * num_threads);
+ ps_mem_tab[DEP_MNGR_SEM_HANDLE_MEM].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+ ps_mem_tab[DEP_MNGR_SEM_HANDLE_MEM].i4_mem_alignment = 8;
+
+ return (NUM_DEP_MNGR_MEM_RECS);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_dmgr_map_get_mem_recs \endif
+*
+* \brief
+* Memory requirements are returned for Dependency manager.
+*
+* \param[in,out] ps_mem_tab : pointer to memory descriptors table
+* \param[in] num_units : Number of units in the map
+* \param[in] num_threads : Number of threads among which sync will be established
+* \param[in] i4_mem_space : memspace in which memory request should be done
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32 ihevce_dmgr_map_get_mem_recs(
+ iv_mem_rec_t *ps_mem_tab, WORD32 num_units, WORD32 num_threads, WORD32 i4_mem_space)
+{
+ /* Dependency manager state structure */
+ ps_mem_tab[DEP_MNGR_CTXT].i4_mem_size = sizeof(dep_mngr_state_t);
+ ps_mem_tab[DEP_MNGR_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+ ps_mem_tab[DEP_MNGR_CTXT].i4_mem_alignment = 8;
+
+ /**************** Get Processed status Memory Requirements *********************/
+ ps_mem_tab[DEP_MNGR_UNITS_PRCSD_MEM].i4_mem_size = (sizeof(WORD8) * num_units);
+ ps_mem_tab[DEP_MNGR_UNITS_PRCSD_MEM].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+ ps_mem_tab[DEP_MNGR_UNITS_PRCSD_MEM].i4_mem_alignment = 8;
+
+ /**************** Get Wait thread ids Memory Requirements *********************/
+ /* Map-mode: semaphore post is unconditionally done on all threads */
+ ps_mem_tab[DEP_MNGR_WAIT_THRD_ID_MEM].i4_mem_size = (sizeof(WORD32) * num_threads);
+ ps_mem_tab[DEP_MNGR_WAIT_THRD_ID_MEM].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+ ps_mem_tab[DEP_MNGR_WAIT_THRD_ID_MEM].i4_mem_alignment = 8;
+
+ /**************** Get Semaphore Requirements *********************/
+ ps_mem_tab[DEP_MNGR_SEM_HANDLE_MEM].i4_mem_size = (sizeof(void *) * num_threads);
+ ps_mem_tab[DEP_MNGR_SEM_HANDLE_MEM].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+ ps_mem_tab[DEP_MNGR_SEM_HANDLE_MEM].i4_mem_alignment = 8;
+
+ return (NUM_DEP_MNGR_MEM_RECS);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_dmgr_rst_frm_frm_sync \endif
+*
+* \brief
+* Resets the values stored to init value
+*
+* \param[in,out] pv_dep_mngr_state : Pointer to Sync Manager handle.
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_dmgr_rst_frm_frm_sync(void *pv_dep_mngr_state)
+{
+ dep_mngr_state_t *ps_dep_mngr_state;
+ WORD32 thrds;
+ ULWORD64 *pu8_num_units_proc_prev;
+ ULWORD64 *pu8_num_units_proc_curr;
+
+ /* dep manager state structure */
+ ps_dep_mngr_state = (dep_mngr_state_t *)pv_dep_mngr_state;
+
+ /* Reset the num units processed by each thread */
+ pu8_num_units_proc_curr = (ULWORD64 *)ps_dep_mngr_state->pv_units_prcsd_in_row;
+ pu8_num_units_proc_prev = pu8_num_units_proc_curr + ps_dep_mngr_state->i4_num_thrds;
+
+ /* Reset the values thread ids waiting */
+ for(thrds = 0; thrds < ps_dep_mngr_state->i4_num_thrds; thrds++)
+ {
+ pu8_num_units_proc_prev[thrds] = 0;
+ pu8_num_units_proc_curr[thrds] = 0;
+ ps_dep_mngr_state->pi4_wait_thrd_id[thrds] = -1;
+ }
+
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_dmgr_rst_row_frm_sync \endif
+*
+* \brief
+* Resets the values stored to init value
+*
+* \param[in,out] pv_dep_mngr_state : Pointer to Sync Manager handle.
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_dmgr_rst_row_frm_sync(void *pv_dep_mngr_state)
+{
+ dep_mngr_state_t *ps_dep_mngr_state;
+ WORD32 ctr, thrds;
+
+ /* dep manager state structure */
+ ps_dep_mngr_state = (dep_mngr_state_t *)pv_dep_mngr_state;
+
+ /* Reset the values of number of units processed in a row */
+ for(ctr = 0; ctr < ps_dep_mngr_state->i4_num_vert_units; ctr++)
+ {
+ ((WORD32 *)ps_dep_mngr_state->pv_units_prcsd_in_row)[ctr] = 0;
+ }
+
+ /* Reset the values thread ids waiting on each row */
+ for(ctr = 0; ctr < ps_dep_mngr_state->i4_num_vert_units; ctr++)
+ {
+ for(thrds = 0; thrds < ps_dep_mngr_state->i4_num_thrds; thrds++)
+ {
+ ps_dep_mngr_state->pi4_wait_thrd_id[thrds + (ps_dep_mngr_state->i4_num_thrds * ctr)] =
+ -1;
+ }
+ }
+
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_dmgr_map_rst_sync \endif
+*
+* \brief
+* Resets the values stored to init value
+*
+* \param[in,out] pv_dep_mngr_state : Pointer to Sync Manager handle.
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_dmgr_map_rst_sync(void *pv_dep_mngr_state)
+{
+ dep_mngr_state_t *ps_dep_mngr_state;
+ WORD8 *pi1_ptr;
+
+ /* dep manager state structure */
+ ps_dep_mngr_state = (dep_mngr_state_t *)pv_dep_mngr_state;
+
+ pi1_ptr = (WORD8 *)ps_dep_mngr_state->pv_units_prcsd_in_row -
+ ps_dep_mngr_state->ai4_tile_xtra_ctb[0] * ps_dep_mngr_state->i4_num_horz_units -
+ ps_dep_mngr_state->ai4_tile_xtra_ctb[1];
+
+ memset(
+ pi1_ptr,
+ MAP_CTB_INIT,
+ ps_dep_mngr_state->i4_num_vert_units * ps_dep_mngr_state->i4_num_horz_units *
+ sizeof(WORD8));
+
+ //ps_dep_mngr_state->i4_frame_map_complete = 0;
+
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_dmgr_rst_row_row_sync \endif
+*
+* \brief
+* Resets the values stored to init value
+*
+* \param[in,out] pv_dep_mngr_state : Pointer to Sync Manager handle.
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_dmgr_rst_row_row_sync(void *pv_dep_mngr_state)
+{
+ dep_mngr_state_t *ps_dep_mngr_state;
+ WORD32 ctr;
+
+ /* dep manager state structure */
+ ps_dep_mngr_state = (dep_mngr_state_t *)pv_dep_mngr_state;
+
+ /* Reset the values of number of units processed in a row */
+ for(ctr = 0; ctr < (ps_dep_mngr_state->i4_num_vert_units * ps_dep_mngr_state->i4_num_tile_cols);
+ ctr++)
+ {
+ ((WORD32 *)ps_dep_mngr_state->pv_units_prcsd_in_row)[ctr] = 0;
+ }
+
+ /* Reset the values thread ids waiting on each row */
+ for(ctr = 0; ctr < ps_dep_mngr_state->i4_num_vert_units; ctr++)
+ {
+ ps_dep_mngr_state->pi4_wait_thrd_id[ctr] = -1;
+ }
+
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_dmgr_init \endif
+*
+* \brief
+* Intialization for Dependency manager state structure .
+*
+* \param[in] ps_mem_tab : pointer to memory descriptors table
+* \param[in] pv_osal_handle : osal handle
+* \param[in] dep_mngr_mode : Mode of operation of dependency manager
+* \param[in] max_num_vert_units : Maximum nunber of units to be processed (Frame Data)
+* \param[in] max_num_horz_units : Maximun Number of Horizontal units to be processed (Frame Data)
+* \param[in] num_tile_cols : Number of column tiles for which encoder is working
+* \param[in] sem_enable : Whether you want to enable semaphore or not
+ 1 : Sem. Enabled, 0 : Spin lock enabled (do-while)
+* \param[in] num_threads : Number of threads among which sync will be established
+* \param[in] i4_mem_space : memspace in which memory request should be do
+*
+* \return
+* Handle to context
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void *ihevce_dmgr_init(
+ iv_mem_rec_t *ps_mem_tab,
+ void *pv_osal_handle,
+ WORD32 dep_mngr_mode,
+ WORD32 max_num_vert_units,
+ WORD32 max_num_horz_units,
+ WORD32 num_tile_cols,
+ WORD32 num_threads,
+ WORD32 sem_enable)
+{
+ dep_mngr_state_t *ps_dep_mngr_state;
+
+ (void)pv_osal_handle;
+ /* dep manager state structure */
+ ps_dep_mngr_state = (dep_mngr_state_t *)ps_mem_tab[DEP_MNGR_CTXT].pv_base;
+
+ /* dep manager memory init */
+ ps_dep_mngr_state->ppv_thrd_sem_handles = (void **)ps_mem_tab[DEP_MNGR_SEM_HANDLE_MEM].pv_base;
+ ps_dep_mngr_state->pi4_wait_thrd_id = (WORD32 *)ps_mem_tab[DEP_MNGR_WAIT_THRD_ID_MEM].pv_base;
+ ps_dep_mngr_state->pv_units_prcsd_in_row = ps_mem_tab[DEP_MNGR_UNITS_PRCSD_MEM].pv_base;
+
+ /* SANITY CHECK */
+ ASSERT(NULL != pv_osal_handle);
+ ASSERT(
+ (DEP_MNGR_FRM_FRM_SYNC == dep_mngr_mode) || (DEP_MNGR_ROW_FRM_SYNC == dep_mngr_mode) ||
+ (DEP_MNGR_ROW_ROW_SYNC == dep_mngr_mode));
+
+ /* Default value */
+ if(num_tile_cols < 1)
+ {
+ num_tile_cols = 1;
+ }
+
+ /* reset the state structure variables */
+ ps_dep_mngr_state->i4_num_horz_units = max_num_horz_units;
+ ps_dep_mngr_state->i4_num_vert_units = max_num_vert_units;
+ ps_dep_mngr_state->i1_sem_enable = sem_enable;
+ ps_dep_mngr_state->i4_dep_mngr_mode = dep_mngr_mode;
+ ps_dep_mngr_state->i4_num_thrds = num_threads;
+ ps_dep_mngr_state->i4_num_tile_cols = num_tile_cols;
+
+ /* call the reset function baed on mode */
+ if(DEP_MNGR_FRM_FRM_SYNC == dep_mngr_mode)
+ {
+ ihevce_dmgr_rst_frm_frm_sync((void *)ps_dep_mngr_state);
+ }
+ else if(DEP_MNGR_ROW_ROW_SYNC == dep_mngr_mode)
+ {
+ ihevce_dmgr_rst_row_row_sync((void *)ps_dep_mngr_state);
+ }
+ else
+ {
+ ihevce_dmgr_rst_row_frm_sync((void *)ps_dep_mngr_state);
+ }
+
+ return ((void *)ps_dep_mngr_state);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_dmgr_map_init \endif
+*
+* \brief
+* Intialization for Dependency manager state structure .
+*
+* \param[in] ps_mem_tab : pointer to memory descriptors table
+* \param[in] max_num_vert_units : Maximum nunber of units to be processed
+* \param[in] max_num_horz_units : Maximun Number of Horizontal units to be processed
+* \param[in] sem_enable : Whether you want to enable semaphore or not
+ 1 : Sem. Enabled, 0 : Spin lock enabled (do-while)
+* \param[in] num_threads : Number of threads among which sync will be established
+* \param[in] ai4_tile_xtra_ctb : Array containing the number of CTBs which are
+* are present in the Search Range outside the tile in dist-client mode.
+* In standalone mode this array should be zero.
+*
+* \return
+* Handle to context
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void *ihevce_dmgr_map_init(
+ iv_mem_rec_t *ps_mem_tab,
+ WORD32 max_num_vert_units,
+ WORD32 max_num_horz_units,
+ WORD32 sem_enable,
+ WORD32 num_threads,
+ WORD32 ai4_tile_xtra_ctb[4])
+{
+ WORD32 ctr;
+ dep_mngr_state_t *ps_dep_mngr_state;
+
+ /* dep manager state structure */
+ ps_dep_mngr_state = (dep_mngr_state_t *)ps_mem_tab[DEP_MNGR_CTXT].pv_base;
+
+ ps_dep_mngr_state->ai4_tile_xtra_ctb[0] = ai4_tile_xtra_ctb[0];
+ ps_dep_mngr_state->ai4_tile_xtra_ctb[1] = ai4_tile_xtra_ctb[1];
+ ps_dep_mngr_state->ai4_tile_xtra_ctb[2] = ai4_tile_xtra_ctb[2];
+ ps_dep_mngr_state->ai4_tile_xtra_ctb[3] = ai4_tile_xtra_ctb[3];
+
+ /* dep manager memory init */
+ ps_dep_mngr_state->pv_units_prcsd_in_row = ps_mem_tab[DEP_MNGR_UNITS_PRCSD_MEM].pv_base;
+ ps_dep_mngr_state->pi4_wait_thrd_id = (WORD32 *)ps_mem_tab[DEP_MNGR_WAIT_THRD_ID_MEM].pv_base;
+ ps_dep_mngr_state->ppv_thrd_sem_handles = (void **)ps_mem_tab[DEP_MNGR_SEM_HANDLE_MEM].pv_base;
+
+ /* Pointing to first CTB of tile */
+ ps_dep_mngr_state->pv_units_prcsd_in_row =
+ (void*)((WORD8*)ps_dep_mngr_state->pv_units_prcsd_in_row +
+ ps_dep_mngr_state->ai4_tile_xtra_ctb[1] +
+ max_num_horz_units * ps_dep_mngr_state->ai4_tile_xtra_ctb[0]);
+
+ /* Map-mode: semaphore post is unconditionally done on all threads. Hence
+ store these one time IDs. The use of pi4_wait_thrd_id itself can be removed
+ altogether for map-mode, but keeping it for the sake of laziness */
+ for(ctr = 0; ctr < num_threads; ctr++)
+ {
+ ps_dep_mngr_state->pi4_wait_thrd_id[ctr] = ctr;
+ }
+
+ /* reset the state structure variables */
+ ps_dep_mngr_state->i4_num_horz_units = max_num_horz_units;
+ ps_dep_mngr_state->i4_num_vert_units = max_num_vert_units;
+ ps_dep_mngr_state->i1_sem_enable = sem_enable;
+ ps_dep_mngr_state->i4_dep_mngr_mode = DEP_MNGR_MAP_SYNC;
+ ps_dep_mngr_state->i4_num_thrds = num_threads;
+
+ /* call the reset function baed on mode */
+ ihevce_dmgr_map_rst_sync((void *)ps_dep_mngr_state);
+
+ return ((void *)ps_dep_mngr_state);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_dmgr_del \endif
+*
+* \brief
+* Delete the Dependency manager state structure.
+* Note : Destroys the mutex only. System has to free the allocated memory
+*
+* \param[in,out] pv_dep_mngr_state : Pointer to Sync Manager handle.
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_dmgr_del(void *pv_dep_mngr_state)
+{
+ dep_mngr_state_t *ps_dep_mngr_state;
+
+ /* dep manager state structure */
+ (void)ps_dep_mngr_state;
+ ps_dep_mngr_state = (dep_mngr_state_t *)pv_dep_mngr_state;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_dmgr_register_sem_hdls \endif
+*
+* \brief
+* Register sem handles of threads wihci are part of dependency group
+*
+* \param[in,out] pv_dep_mngr_state : Pointer to Sync Manager handle.
+* \param[in] ppv_thread_sem_hdl : arry of pointer to all the sem handles
+* \param[in] num_threads : Number of threads part of this dependency group
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_dmgr_reg_sem_hdls(void *pv_dep_mngr_state, void **ppv_thread_sem_hdl, WORD32 num_threads)
+{
+ dep_mngr_state_t *ps_dep_mngr_state;
+ WORD32 ctr;
+
+ /* dep manager state structure */
+ ps_dep_mngr_state = (dep_mngr_state_t *)pv_dep_mngr_state;
+
+ ASSERT(num_threads <= ps_dep_mngr_state->i4_num_thrds);
+
+ for(ctr = 0; ctr < num_threads; ctr++)
+ {
+ ps_dep_mngr_state->ppv_thrd_sem_handles[ctr] = ppv_thread_sem_hdl[ctr];
+ }
+
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_dmgr_set_prev_done_frm_frm_sync \endif
+*
+* \brief
+* Set the values to dependency not resolved state
+*
+* \param[in,out] pv_dep_mngr_state : Pointer to Sync Manager handle.
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_dmgr_set_prev_done_frm_frm_sync(void *pv_dep_mngr_state)
+{
+ dep_mngr_state_t *ps_dep_mngr_state;
+ WORD32 thrds;
+ ULWORD64 *pu8_num_units_proc_curr;
+ ULWORD64 *pu8_num_units_proc_prev;
+
+ /* dep manager state structure */
+ ps_dep_mngr_state = (dep_mngr_state_t *)pv_dep_mngr_state;
+
+ /* Reset the values num threads entering processing state */
+ pu8_num_units_proc_curr = (ULWORD64 *)ps_dep_mngr_state->pv_units_prcsd_in_row;
+ pu8_num_units_proc_prev =
+ (ULWORD64 *)(pu8_num_units_proc_curr + ps_dep_mngr_state->i4_num_thrds);
+
+ /* Reset the values thread ids waiting */
+ for(thrds = 0; thrds < ps_dep_mngr_state->i4_num_thrds; thrds++)
+ {
+ pu8_num_units_proc_prev[thrds] = 1;
+ ps_dep_mngr_state->pi4_wait_thrd_id[thrds] = -1;
+ }
+
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_dmgr_set_done_frm_frm_sync \endif
+*
+* \brief
+* Set the values to dependency met state
+*
+* \param[in,out] pv_dep_mngr_state : Pointer to Sync Manager handle.
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_dmgr_set_done_frm_frm_sync(void *pv_dep_mngr_state)
+{
+ dep_mngr_state_t *ps_dep_mngr_state;
+ WORD32 thrds;
+ ULWORD64 *pu8_num_units_proc_curr;
+
+ /* dep manager state structure */
+ ps_dep_mngr_state = (dep_mngr_state_t *)pv_dep_mngr_state;
+
+ /* Reset the values num threads entering processing state */
+ pu8_num_units_proc_curr = (ULWORD64 *)ps_dep_mngr_state->pv_units_prcsd_in_row;
+
+ /* Reset the values thread ids waiting */
+ for(thrds = 0; thrds < ps_dep_mngr_state->i4_num_thrds; thrds++)
+ {
+ pu8_num_units_proc_curr[thrds] = 1;
+ ps_dep_mngr_state->pi4_wait_thrd_id[thrds] = -1;
+ }
+
+ return;
+}
+
+/*!
+**************************************************************************
+* \if Function name : ihevce_dmgr_chk_row_row_sync \endif
+*
+* \brief
+* This function checks whether the dependency is met to proceed with
+* processing. If condition is not met, it should go to a sem_wait state,
+* else start processing.
+*
+* \param[in] pv_dep_mngr_state : Pointer to Sync Manager handle.
+* \param[in] cur_offset : Current offset of the dep. variable
+* \param[in] dep_offset : Offset from the current value to meet the dep.
+* \param[in] dep_row : The position of the Ref.
+* \param[in] cur_tile_col : The current column tile number (not tile id)
+* Assuming the dependency is within the tile only (Acroos tiles won't work now)
+* \param[in] thrd_id : Thread id of the current thread checking for dependency
+*
+* \return
+* 0 on Success and -1 on error
+*
+* \author
+* Ittiam
+*
+**************************************************************************
+*/
+WORD32 ihevce_dmgr_chk_row_row_sync(
+ void *pv_dep_mngr_state,
+ WORD32 cur_offset,
+ WORD32 dep_offset,
+ WORD32 dep_row,
+ WORD32 cur_tile_col,
+ WORD32 thrd_id)
+{
+ dep_mngr_state_t *ps_dep_mngr_state;
+ volatile WORD32 *pi4_ref_value;
+ WORD32 ref_value;
+
+ ps_dep_mngr_state = (dep_mngr_state_t *)pv_dep_mngr_state;
+
+ /* Sanity Check */
+ ASSERT(dep_row >= 0);
+ ASSERT(dep_row < ps_dep_mngr_state->i4_num_vert_units);
+ ASSERT(cur_tile_col >= 0);
+ ASSERT(cur_tile_col < ps_dep_mngr_state->i4_num_tile_cols);
+
+ pi4_ref_value = ((volatile WORD32 *)(ps_dep_mngr_state->pv_units_prcsd_in_row)) +
+ (cur_tile_col * ps_dep_mngr_state->i4_num_vert_units) + dep_row;
+
+ /* Sanity Check */
+ ASSERT((cur_offset + dep_offset) <= ps_dep_mngr_state->i4_num_horz_units);
+
+ /* Check whether Dep. is met */
+ while(1)
+ {
+ ref_value = *pi4_ref_value;
+
+ if(ref_value >= (cur_offset + dep_offset))
+ break;
+
+ if(1 == ps_dep_mngr_state->i1_sem_enable)
+ {
+ void *pv_sem_handle;
+ WORD32 ret_val;
+
+ (void)ret_val;
+ pv_sem_handle = ps_dep_mngr_state->ppv_thrd_sem_handles[thrd_id];
+
+ /* register the thread id before going to pend state */
+ ps_dep_mngr_state->pi4_wait_thrd_id[dep_row] = thrd_id;
+
+ /* go to the pend state */
+ ret_val = osal_sem_wait(pv_sem_handle);
+ //ASSERT(0 == ret_val);
+ }
+ }
+
+ return 0;
+}
+
+/*!
+**************************************************************************
+* \if Function name : ihevce_dmgr_set_row_row_sync \endif
+*
+* \brief
+* This function sets the dependency and wakes up the proper semaphores
+* to start processing.
+*
+* \param[in] pv_dep_mngr_state : Pointer to Sync Manager handle.
+* \param[in] cur_offset : Current offset processed
+* \param[in] cur_row : The cur. vertical position
+* \param[in] cur_tile_col : The current column tile number (not tile id)
+* Assuming the dependency is within the tile only (Acroos tiles won't work now)
+*
+* \return
+* 0 on Success and -1 on error
+*
+* \author
+* Ittiam
+*
+**************************************************************************
+*/
+WORD32 ihevce_dmgr_set_row_row_sync(
+ void *pv_dep_mngr_state, WORD32 cur_offset, WORD32 cur_row, WORD32 cur_tile_col)
+{
+ dep_mngr_state_t *ps_dep_mngr_state;
+ WORD32 *pi4_units_prcsd;
+ void *pv_sem_handle;
+ WORD32 ret_val;
+
+ (void)ret_val;
+ ps_dep_mngr_state = (dep_mngr_state_t *)pv_dep_mngr_state;
+
+ /* Sanity Check */
+ ASSERT(cur_offset >= 0);
+ ASSERT(cur_offset <= ps_dep_mngr_state->i4_num_horz_units);
+ ASSERT(cur_row <= ps_dep_mngr_state->i4_num_vert_units);
+ ASSERT(cur_tile_col >= 0);
+ ASSERT(cur_tile_col < ps_dep_mngr_state->i4_num_tile_cols);
+
+ DATA_SYNC();
+
+ pi4_units_prcsd = ((WORD32 *)(ps_dep_mngr_state->pv_units_prcsd_in_row)) +
+ (cur_tile_col * ps_dep_mngr_state->i4_num_vert_units) + cur_row;
+
+ /* Update the number of units processed */
+ *pi4_units_prcsd = cur_offset;
+
+ if(1 == ps_dep_mngr_state->i1_sem_enable)
+ {
+ WORD32 wait_thrd_id;
+
+ wait_thrd_id = ps_dep_mngr_state->pi4_wait_thrd_id[cur_row];
+
+ /* Post on threads waiting on the current row */
+ if(-1 != wait_thrd_id)
+ {
+ pv_sem_handle = ps_dep_mngr_state->ppv_thrd_sem_handles[wait_thrd_id];
+ /* Post on the semaphore */
+ ret_val = osal_sem_post(pv_sem_handle);
+ //ASSERT(0 == ret_val);
+
+ ps_dep_mngr_state->pi4_wait_thrd_id[cur_row] = -1;
+ }
+
+ /* towards end of row all threads are posted (to avoid any corner cases) */
+ if(cur_offset == ps_dep_mngr_state->i4_num_horz_units)
+ {
+ WORD32 ctr;
+
+ for(ctr = 0; ctr < ps_dep_mngr_state->i4_num_thrds; ctr++)
+ {
+ ret_val = osal_sem_post(ps_dep_mngr_state->ppv_thrd_sem_handles[ctr]);
+ //ASSERT(0 == ret_val);
+ }
+ }
+ }
+
+ return 0;
+}
+
+/*!
+**************************************************************************
+* \if Function name : ihevce_dmgr_chk_frm_frm_sync \endif
+*
+* \brief
+* This function checks whether the dependency is met to proceed with
+* processing. If condition is not met, it should go to a sem_wait state,
+* else start processing.
+* For Barrier case, the thread will wait till all threads have completed
+* the processing on the previosu instance of same stage
+* \param[in] pv_dep_mngr_state : Pointer to Sync Manager handle.
+* \param[in] thrd_id : Thread id checking for dependency
+*
+* \return
+* 0 on Success and -1 on error
+*
+* \author
+* Ittiam
+*
+**************************************************************************
+*/
+WORD32 ihevce_dmgr_chk_frm_frm_sync(void *pv_dep_mngr_state, WORD32 thrd_id)
+{
+ dep_mngr_state_t *ps_dep_mngr_state;
+ void *pv_sem_handle;
+ volatile ULWORD64 *pu8_num_units_proc_prev;
+ volatile ULWORD64 *pu8_num_units_proc_curr;
+ ULWORD64 prev_value;
+ ULWORD64 curr_value;
+
+ ps_dep_mngr_state = (dep_mngr_state_t *)pv_dep_mngr_state;
+ pv_sem_handle = ps_dep_mngr_state->ppv_thrd_sem_handles[thrd_id];
+
+ pu8_num_units_proc_curr = (volatile ULWORD64 *)ps_dep_mngr_state->pv_units_prcsd_in_row;
+ pu8_num_units_proc_prev =
+ (volatile ULWORD64 *)(pu8_num_units_proc_curr + ps_dep_mngr_state->i4_num_thrds);
+
+ /* Check whether Dep. is met */
+ while(1)
+ {
+ WORD32 ret_val;
+
+ (void)ret_val;
+ curr_value = pu8_num_units_proc_curr[thrd_id];
+ prev_value = pu8_num_units_proc_prev[thrd_id];
+
+ if(curr_value == (prev_value + 1))
+ {
+ break;
+ }
+ else
+ {
+ /* register the thread id before going to pend state */
+ ps_dep_mngr_state->pi4_wait_thrd_id[thrd_id] = thrd_id;
+
+ /* go to the pend state */
+ ret_val = osal_sem_wait(pv_sem_handle);
+ //ASSERT(0 == ret_val);
+ }
+ }
+
+ /* store curr value to prev for next iteration */
+ pu8_num_units_proc_prev[thrd_id] = pu8_num_units_proc_curr[thrd_id];
+
+ return 0;
+}
+
+/*!
+**************************************************************************
+* \if Function name : ihevce_dmgr_update_frm_frm_sync \endif
+*
+* \brief
+* This function sets the dependency and wakes up the proper semaphores
+* to start processing.
+* For barrier case, if the dep. is met, all waiting threads should be waked up
+*
+* \param[in] pv_dep_mngr_state : Pointer to Sync Manager handle.
+*
+* \return
+* 0 on Success and -1 on error
+*
+* \author
+* Ittiam
+*
+**************************************************************************
+*/
+WORD32 ihevce_dmgr_update_frm_frm_sync(void *pv_dep_mngr_state)
+{
+ dep_mngr_state_t *ps_dep_mngr_state;
+ void *pv_sem_handle;
+ volatile ULWORD64 *pu8_num_units_proc_curr;
+ WORD32 ctr;
+
+ ps_dep_mngr_state = (dep_mngr_state_t *)pv_dep_mngr_state;
+
+ pu8_num_units_proc_curr = (volatile ULWORD64 *)ps_dep_mngr_state->pv_units_prcsd_in_row;
+
+ /* Post on All vertical waiting threads semaphores & update the cur unit proc */
+ for(ctr = 0; ctr < ps_dep_mngr_state->i4_num_thrds; ctr++)
+ {
+ WORD32 ret_val;
+ WORD32 wait_thrd_id;
+
+ (void)ret_val;
+ /* increment the curr unit counter for all threads */
+ pu8_num_units_proc_curr[ctr] = pu8_num_units_proc_curr[ctr] + 1;
+
+ wait_thrd_id = ctr;
+ //wait_thrd_id = ps_dep_mngr_state->pi4_wait_thrd_id[ctr];
+
+ if(-1 != wait_thrd_id)
+ {
+ pv_sem_handle = ps_dep_mngr_state->ppv_thrd_sem_handles[wait_thrd_id];
+ /* Post on the semaphore */
+ ret_val = osal_sem_post(pv_sem_handle);
+ //ASSERT(0 == ret_val);
+
+ ps_dep_mngr_state->pi4_wait_thrd_id[ctr] = -1;
+ }
+ }
+
+ return 0;
+}
+
+/*!
+**************************************************************************
+* \if Function name : ihevce_dmgr_map_chk \endif
+*
+* \brief
+* This function checks whether all entries in the dependency map are set
+*
+* \param[in] pu1_start : Pointer to the start of the search area
+* \param[in] i4_num_ctb_x : Size of search area
+* \param[in] i4_num_ctb_y : Size of search area
+* \param[in] i4_stride : Stride of search area
+*
+* \return
+* 1 on Success otherwise 0
+*
+* \author
+* Ittiam
+*
+**************************************************************************
+*/
+WORD32
+ ihevce_dmgr_map_chk(WORD8 *pu1_start, WORD32 i4_num_ctb_x, WORD32 i4_num_ctb_y, WORD32 i4_stride)
+{
+ WORD8 *pi1_ctb = pu1_start;
+ WORD32 row, col;
+ WORD32 map_ready_flag = MAP_CTB_COMPLETE;
+
+ for(row = 0; row < i4_num_ctb_y; row++)
+ {
+ for(col = 0; col < i4_num_ctb_x; col++)
+ {
+ map_ready_flag &= pi1_ctb[col];
+ }
+ pi1_ctb += i4_stride;
+ }
+
+ /* NOTE: early exit in the above loop can taken if map_ready_flag
+ is found to be zero somewhere at the start itself */
+ return (map_ready_flag == MAP_CTB_COMPLETE);
+}
+
+/*!
+**************************************************************************
+* \if Function name : ihevce_dmgr_map_chk_sync \endif
+*
+* \brief
+* This function checks whether the dependency is met by searching in a
+* rectangular area. If condition is not met, it should go to a sem_wait state,
+* else start processing.
+*
+* \param[in] pv_dep_mngr_state : Pointer to Sync Manager handle.
+* \param[in] thrd_id : Thread id of the current thread checking for dependency
+* \param[in] offset_x : Offset of current CTB in Tile in ctb-unit
+* \param[in] offset_y : Offset of current CTB in Tile in ctb-unit
+* \param[in] i4_sr_ctb_x : Search Range in ctb-unit
+* \param[in] i4_sr_ctb_y : Search Range in ctb-unit
+*
+* \return
+* 0 on Success and -1 on error
+*
+* \author
+* Ittiam
+*
+**************************************************************************
+*/
+WORD32 ihevce_dmgr_map_chk_sync(
+ void *pv_dep_mngr_state,
+ WORD32 thrd_id,
+ WORD32 offset_x,
+ WORD32 offset_y,
+ WORD32 i4_sr_ctb_x,
+ WORD32 i4_sr_ctb_y)
+{
+ dep_mngr_state_t *ps_dep_mngr_state;
+ volatile WORD8 *pi1_ctb;
+ WORD8 *pi1_tile_start;
+ WORD32 i4_avail_left, i4_avail_right, i4_avail_top, i4_avail_bot;
+ WORD32 i4_num_ctb_x, i4_num_ctb_y;
+ WORD32 i4_stride;
+ WORD32 i4_tile_wd, i4_tile_ht; //in ctb units
+
+ ps_dep_mngr_state = (dep_mngr_state_t *)pv_dep_mngr_state;
+
+ i4_tile_wd = ps_dep_mngr_state->i4_num_horz_units - ps_dep_mngr_state->ai4_tile_xtra_ctb[1] -
+ ps_dep_mngr_state->ai4_tile_xtra_ctb[2];
+
+ i4_tile_ht = ps_dep_mngr_state->i4_num_vert_units - ps_dep_mngr_state->ai4_tile_xtra_ctb[0] -
+ ps_dep_mngr_state->ai4_tile_xtra_ctb[3];
+
+ i4_stride = ps_dep_mngr_state->i4_num_horz_units;
+
+ /* Sanity Checks, confirm if ctb offsets are within tiles */
+ ASSERT(offset_x >= 0);
+ ASSERT(offset_y >= 0);
+ ASSERT(offset_x < i4_tile_wd);
+ ASSERT(offset_y < i4_tile_ht);
+
+ pi1_tile_start = (WORD8 *)ps_dep_mngr_state->pv_units_prcsd_in_row;
+ pi1_ctb = (volatile WORD8 *)pi1_tile_start;
+
+ if(ps_dep_mngr_state->ai4_tile_xtra_ctb[0])
+ {
+ i4_avail_top = i4_sr_ctb_y;
+ }
+ else
+ {
+ i4_avail_top = MIN(i4_sr_ctb_y, offset_y);
+ }
+
+ if(ps_dep_mngr_state->ai4_tile_xtra_ctb[1])
+ {
+ i4_avail_left = i4_sr_ctb_x;
+ }
+ else
+ {
+ i4_avail_left = MIN(i4_sr_ctb_x, offset_x);
+ }
+
+ if(ps_dep_mngr_state->ai4_tile_xtra_ctb[2])
+ {
+ i4_avail_right = i4_sr_ctb_x;
+ }
+ else
+ {
+ i4_avail_right = MIN(i4_sr_ctb_x, (i4_tile_wd - offset_x - 1));
+ }
+
+ if(ps_dep_mngr_state->ai4_tile_xtra_ctb[3])
+ {
+ i4_avail_bot = i4_sr_ctb_y;
+ }
+ else
+ {
+ i4_avail_bot = MIN(i4_sr_ctb_y, (i4_tile_ht - offset_y - 1));
+ }
+
+ i4_num_ctb_x = (i4_avail_left + 1 + i4_avail_right);
+ i4_num_ctb_y = (i4_avail_top + 1 + i4_avail_bot);
+
+ /* Point to the start of the search-area */
+ pi1_ctb += ((offset_y - i4_avail_top) * i4_stride + (offset_x - i4_avail_left));
+
+ /* Check whether Dep. is met */
+ while(1)
+ {
+ if(1 == ihevce_dmgr_map_chk((WORD8 *)pi1_ctb, i4_num_ctb_x, i4_num_ctb_y, i4_stride))
+ {
+ break;
+ }
+ else
+ {
+ if(1 == ps_dep_mngr_state->i1_sem_enable)
+ {
+ osal_sem_wait(ps_dep_mngr_state->ppv_thrd_sem_handles[thrd_id]);
+ }
+ }
+ }
+
+ return 0;
+}
+
+/*!
+**************************************************************************
+* \if Function name : ihevce_dmgr_map_set_sync \endif
+*
+* \brief
+* This function sets the dependency and wakes up the proper semaphores
+* to start processing.
+*
+* \param[in] pv_dep_mngr_state : Pointer to Sync Manager handle.
+* \param[in] offset_x : Offset of current CTB in Tile(ctb unit)
+* \param[in] offset_y : Offset of current CTB in Tile(ctb unit)
+*
+* \return
+* 0 on Success and -1 on error
+*
+* \author
+* Ittiam
+*
+**************************************************************************
+*/
+WORD32 ihevce_dmgr_map_set_sync(
+ void *pv_dep_mngr_state, WORD32 offset_x, WORD32 offset_y, WORD32 i4_map_value)
+{
+ dep_mngr_state_t *ps_dep_mngr_state;
+ WORD8 *pi1_tile_start;
+ WORD32 map_stride;
+
+ ps_dep_mngr_state = (dep_mngr_state_t *)pv_dep_mngr_state;
+
+ /* Sanity Checks */
+ ASSERT(offset_x >= (-ps_dep_mngr_state->ai4_tile_xtra_ctb[1]));
+ ASSERT(offset_y >= (-ps_dep_mngr_state->ai4_tile_xtra_ctb[0]));
+
+ ASSERT(
+ offset_x <
+ (ps_dep_mngr_state->i4_num_horz_units - ps_dep_mngr_state->ai4_tile_xtra_ctb[1]));
+
+ ASSERT(
+ offset_y <
+ (ps_dep_mngr_state->i4_num_vert_units - ps_dep_mngr_state->ai4_tile_xtra_ctb[0]));
+
+ DATA_SYNC();
+
+ map_stride = ps_dep_mngr_state->i4_num_horz_units;
+
+ pi1_tile_start = (WORD8 *)ps_dep_mngr_state->pv_units_prcsd_in_row;
+
+ /* Set the flag to indicate that this CTB has been processed */
+ *(pi1_tile_start + offset_y * map_stride + offset_x) = (WORD8)i4_map_value;
+
+ if(1 == ps_dep_mngr_state->i1_sem_enable)
+ {
+ WORD32 wait_thrd_id;
+
+ /* Post on threads waiting on the current row */
+ for(wait_thrd_id = 0; wait_thrd_id < ps_dep_mngr_state->i4_num_thrds; wait_thrd_id++)
+ {
+ /* Post on the semaphore */
+ /* Map-mode: semaphore post is unconditionally done on all threads */
+ osal_sem_post(ps_dep_mngr_state->ppv_thrd_sem_handles[wait_thrd_id]);
+ }
+ }
+
+ return 0;
+}
diff --git a/encoder/ihevce_dep_mngr_interface.h b/encoder/ihevce_dep_mngr_interface.h
new file mode 100644
index 0000000..28d22b7
--- /dev/null
+++ b/encoder/ihevce_dep_mngr_interface.h
@@ -0,0 +1,191 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_dep_mngr_interface.h
+*
+* \brief
+* This file contains infertace prototypes of Sync manager functions
+*
+* \date
+* 13/12/2013
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_DEPENDENCY_MANAGER_INTERFACE_H_
+#define _IHEVCE_DEPENDENCY_MANAGER_INTERFACE_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+typedef enum
+{
+ DEP_MNGR_FRM_FRM_SYNC = 0, /*!< To be used for multi threads Frame-
+ Frame level sync, where threads entering
+ a particular frame processing stage at
+ a particular index waits for all the threads
+ to complete the that stage at the same index
+ in the previous iteration
+ Ex: Wait for Encloop at Index i frame
+ to complete before starting encloop of
+ MAX_NUM_ENCLOOP + i frame at Index i
+ (FRAME LEVEl SYNCS)*/
+
+ DEP_MNGR_ROW_FRM_SYNC, /*!< To be used for multi threads Row-
+ Frame level sync, where multiple threads
+ entering a particular frame processing stage at
+ a particular index waits for corresponding
+ row to be completely processed in the
+ dependent stage
+ Ex: Multiple threads Wait in ME at a
+ particular row X (of Frame I)
+ until encloop of row X in Frame I
+ is completed
+ (REVERSE ME DEPENDENCY SYNC)*/
+
+ DEP_MNGR_ROW_ROW_SYNC, /*!< To be used for multi threads Row-
+ Row level sync, where a thread
+ entering a particular frame processing stage at
+ a particular index waits for corresponding
+ row to be processed til dependent position
+ in the dependent stage
+ Ex: (ENC LOOP to ME FORWARD Sync)
+ ( TOP RIGHT SYNC) */
+
+ DEP_MNGR_MAP_SYNC
+
+} DEP_MNGR_MODE_T;
+
+typedef enum
+{
+ MAP_CTB_INIT = 0,
+ MAP_CTB_RECON_DONE = 1,
+ MAP_CTB_COMPLETE = 2, //after hpel plane creation
+
+} DEP_MNGR_MAP_CTB_STATUS_T;
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/* Create APIs */
+WORD32 ihevce_dmgr_get_num_mem_recs(void);
+
+WORD32 ihevce_dmgr_get_mem_recs(
+ iv_mem_rec_t *ps_mem_tab,
+ WORD32 dep_mngr_mode, /* should be part of DEP_MNGR_MODE_T*/
+ WORD32 max_num_vert_units,
+ WORD32 num_tile_cols,
+ WORD32 num_threads,
+ WORD32 i4_mem_space);
+
+WORD32 ihevce_dmgr_map_get_mem_recs(
+ iv_mem_rec_t *ps_mem_tab, WORD32 num_units, WORD32 num_threads, WORD32 i4_mem_space);
+
+void *ihevce_dmgr_init(
+ iv_mem_rec_t *ps_mem_tab,
+ void *pv_osal_handle,
+ WORD32 dep_mngr_mode, /* should be part of DEP_MNGR_MODE_T*/
+ WORD32 max_num_vert_units,
+ WORD32 max_num_horz_units,
+ WORD32 num_tile_cols,
+ WORD32 num_threads,
+ WORD32 sem_enable);
+
+void *ihevce_dmgr_map_init(
+ iv_mem_rec_t *ps_mem_tab,
+ WORD32 max_num_vert_units,
+ WORD32 max_num_horz_units,
+ WORD32 sem_enable,
+ WORD32 num_threads,
+ WORD32 ai4_tile_xtra_ctb[4]);
+
+void ihevce_dmgr_reg_sem_hdls(
+ void *pv_dep_mngr_state, void **ppv_thread_sem_hdl, WORD32 num_threads);
+
+/* Row-Row sync Process APIs*/
+void ihevce_dmgr_rst_row_row_sync(void *pv_dep_mngr_state);
+
+WORD32 ihevce_dmgr_chk_row_row_sync(
+ void *pv_dep_mngr_state,
+ WORD32 cur_offset,
+ WORD32 dep_offset,
+ WORD32 dep_row,
+ WORD32 cur_tile_col,
+ WORD32 thrd_id);
+
+WORD32 ihevce_dmgr_set_row_row_sync(
+ void *pv_dep_mngr_state, WORD32 cur_offset, WORD32 cur_row, WORD32 cur_tile_col);
+
+/* Row-Frame sync Process APIs*/
+void ihevce_dmgr_rst_row_frm_sync(void *pv_dep_mngr_state);
+
+/* Frame-Frame sync Process APIs*/
+void ihevce_dmgr_set_done_frm_frm_sync(void *pv_dep_mngr_state);
+
+void ihevce_dmgr_set_prev_done_frm_frm_sync(void *pv_dep_mngr_state);
+
+WORD32 ihevce_dmgr_chk_frm_frm_sync(void *pv_dep_mngr_state, WORD32 thrd_id);
+
+WORD32 ihevce_dmgr_update_frm_frm_sync(void *pv_dep_mngr_state);
+
+/* Map sync Process APIs*/
+void ihevce_dmgr_map_rst_sync(void *pv_dep_mngr_state);
+
+WORD32 ihevce_dmgr_map_chk_sync(
+ void *pv_dep_mngr_state,
+ WORD32 thrd_id,
+ WORD32 offset_x,
+ WORD32 offset_y,
+ WORD32 i4_sr_ctb_x,
+ WORD32 i4_sr_ctb_y);
+
+WORD32 ihevce_dmgr_map_set_sync(
+ void *pv_dep_mngr_state, WORD32 offset_x, WORD32 offset_y, WORD32 e_map_value);
+
+/* Delete APIs */
+void ihevce_dmgr_del(void *pv_dep_mngr_state);
+
+#endif /* _IHEVCE_DEPENDENCY_MANAGER_INTERFACE_H_ */
diff --git a/encoder/ihevce_dep_mngr_private.h b/encoder/ihevce_dep_mngr_private.h
new file mode 100644
index 0000000..3ab57d0
--- /dev/null
+++ b/encoder/ihevce_dep_mngr_private.h
@@ -0,0 +1,137 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_dep_mngr_private.h
+*
+* \brief
+* This file contains private structures & definations of Sync manager
+*
+* \date
+* 13/12/2013
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_DEP_MANAGER_PRIVATE_H_
+#define _IHEVCE_DEP_MANAGER_PRIVATE_H_
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+typedef enum
+{
+ DEP_MNGR_CTXT = 0,
+ DEP_MNGR_UNITS_PRCSD_MEM,
+ DEP_MNGR_WAIT_THRD_ID_MEM,
+ DEP_MNGR_SEM_HANDLE_MEM,
+
+ /* should be last entry */
+ NUM_DEP_MNGR_MEM_RECS
+} DEP_MNGR_MEM_T;
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+typedef struct
+{
+ /*! Number of Vertical units to be processed */
+ WORD32 i4_num_vert_units;
+
+ /*! Maximun Number of Horizontal units to be processed */
+ WORD32 i4_num_horz_units;
+
+ /*! Number of column tiles for which encoder is working */
+ WORD32 i4_num_tile_cols;
+
+ /*! Array to update the units which got processed in each row */
+ /*! For num_tile_cols > 1 , the memory layout is
+ 0-max_num_vert_units for col_tile 0
+ 0-max_num_vert_units for col_tile 1
+ ..
+ ..
+ 0-max_num_vert_units for col_tile N-1
+ */
+ void *pv_units_prcsd_in_row;
+
+ /*! Array to register the thread ids of waiting threads in each row */
+ /*! Memory Layout : (Row - Row) 1 entry per row
+ Memory Layout : (Frame - Frame) Num threads per frame
+ Memory layout : (Row - Frame)
+ Num threads for Row 0
+ Num threads for Row 1
+ Num threads for Row 2
+ ..
+ ..
+ Num threads for Row N-1
+ */
+ WORD32 *pi4_wait_thrd_id;
+
+ /*! Number of threads in the dependency chain */
+ WORD32 i4_num_thrds;
+
+ /*! Pointer to Array of Thread semaphore handle */
+ void **ppv_thrd_sem_handles;
+
+ /*! Dependency Manager Mode */
+ WORD32 i4_dep_mngr_mode; /* @sa DEP_MNGR_MODE_T */
+
+ /*! 0 : Semaphore not used., 1 : Uses semaphore */
+ /*! Note : This is required for using spin-lock for some dependencies. */
+ /*! If 0, uses spin-lock(do-while) rather than semaphore for Sync */
+ WORD8 i1_sem_enable;
+
+ /*0: top, 1: left, 2: right, 3: bottom */
+ WORD8 ai4_tile_xtra_ctb[4];
+
+ /* temp var: delete it */
+ //WORD32 i4_frame_map_complete;
+
+} dep_mngr_state_t;
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+#endif //_IHEVCE_DEP_MANAGER_PRIVATE_H_
diff --git a/encoder/ihevce_enc_cu_recursion.c b/encoder/ihevce_enc_cu_recursion.c
new file mode 100644
index 0000000..2589daa
--- /dev/null
+++ b/encoder/ihevce_enc_cu_recursion.c
@@ -0,0 +1,3267 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* \file ihevce_enc_cu_recursion.c
+*
+* \brief
+* This file contains Encoder normative loop pass related functions
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+*
+* List of Functions
+*
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_macros.h"
+#include "ihevc_debug.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_hle_interface.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_ipe_instr_set_router.h"
+#include "ihevce_decomp_pre_intra_structs.h"
+#include "ihevce_decomp_pre_intra_pass.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_nbr_avail.h"
+#include "ihevce_enc_loop_utils.h"
+#include "ihevce_bs_compute_ctb.h"
+#include "ihevce_cabac_rdo.h"
+#include "ihevce_dep_mngr_interface.h"
+#include "ihevce_enc_loop_pass.h"
+#include "ihevce_rc_enc_structs.h"
+#include "ihevce_enc_cu_recursion.h"
+#include "ihevce_stasino_helpers.h"
+
+#include "cast_types.h"
+#include "osal.h"
+#include "osal_defaults.h"
+
+/*****************************************************************************/
+/* Macros */
+/*****************************************************************************/
+#define NUM_CTB_QUANT_ROUNDING 6
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*********************************************************************************
+* Function name : ihevce_store_cu_final
+*
+* \brief
+* This function store cu info to the enc loop cu context
+*
+* \param[in] ps_ctxt : pointer to enc loop context structure
+* \param[in] ps_cu_final : pointer to enc loop output CU structure
+* \param[in] pu1_ecd_data : ecd data pointer
+* \param[in] ps_enc_out_ctxt : pointer to CU information structure
+* \param[in] ps_cu_prms : pointer to cu level parameters for SATD / RDOPT
+*
+* \return
+* None
+*
+**********************************************************************************/
+void ihevce_store_cu_final(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ cu_enc_loop_out_t *ps_cu_final,
+ UWORD8 *pu1_ecd_data,
+ ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt,
+ enc_loop_cu_prms_t *ps_cu_prms)
+{
+ enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
+ WORD32 i4_8x8_blks_in_cu;
+ WORD32 i4_br_id, i4_enc_frm_id;
+
+ WORD32 u4_tex_bits, u4_hdr_bits;
+ WORD32 i4_qscale, i4_qscale_ctb;
+ ps_enc_loop_bestprms = ps_enc_out_ctxt->ps_cu_prms;
+ i4_qscale = ((ps_ctxt->ps_rc_quant_ctxt->pi4_qp_to_qscale
+ [ps_enc_out_ctxt->i1_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]));
+ i4_qscale_ctb = ((
+ ps_ctxt->ps_rc_quant_ctxt
+ ->pi4_qp_to_qscale[ps_ctxt->i4_frame_mod_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]));
+
+ /* All texture bits accumulated */
+ u4_tex_bits = ps_enc_loop_bestprms->u4_cu_luma_res_bits +
+ ps_enc_loop_bestprms->u4_cu_chroma_res_bits +
+ ps_enc_loop_bestprms->u4_cu_cbf_bits;
+
+ u4_hdr_bits = ps_enc_loop_bestprms->u4_cu_hdr_bits;
+
+ i4_br_id = ps_ctxt->i4_bitrate_instance_num;
+ i4_enc_frm_id = ps_ctxt->i4_enc_frm_id;
+
+ i4_8x8_blks_in_cu = ((ps_enc_out_ctxt->u1_cu_size >> 3) * (ps_enc_out_ctxt->u1_cu_size >> 3));
+
+ ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_open_loop_ssd +=
+ ps_enc_loop_bestprms
+ ->i8_cu_ssd; // + (((float)(ps_ctxt->i8_cl_ssd_lambda_qf/ (1<< LAMBDA_Q_SHIFT))) * ps_enc_loop_bestprms->u4_cu_hdr_bits);
+
+ ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_open_loop_intra_sad +=
+ (UWORD32)(
+ ps_enc_loop_bestprms->u4_cu_open_intra_sad +
+ (((float)(ps_ctxt->i4_sad_lamda) / (1 << LAMBDA_Q_SHIFT)) *
+ ps_enc_loop_bestprms->u4_cu_hdr_bits));
+
+ if(1 == ps_enc_loop_bestprms->u1_intra_flag)
+ {
+ ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_intra_sad_acc +=
+ ps_enc_loop_bestprms->u4_cu_sad;
+ ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_intra_cost_acc +=
+ ps_enc_loop_bestprms->i8_best_rdopt_cost;
+ }
+ else
+ {
+ ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_inter_sad_acc +=
+ ps_enc_loop_bestprms->u4_cu_sad;
+ ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_inter_cost_acc +=
+ ps_enc_loop_bestprms->i8_best_rdopt_cost;
+ }
+ /*accumulating the frame level stats across frame*/
+ ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_sad_acc +=
+ ps_enc_loop_bestprms->u4_cu_sad;
+
+ ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_cost_acc +=
+ ps_enc_loop_bestprms->i8_best_rdopt_cost;
+
+ ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_bits +=
+ (u4_tex_bits + u4_hdr_bits);
+
+ /*Total bits and header bits accumalted here for CTB*/
+ ps_ctxt->u4_total_cu_bits += (u4_tex_bits + u4_hdr_bits);
+ ps_ctxt->u4_total_cu_bits_mul_qs +=
+ ((ULWORD64)((u4_tex_bits + u4_hdr_bits) * (i4_qscale_ctb)) + (1 << (QSCALE_Q_FAC_3 - 1))) >>
+ QSCALE_Q_FAC_3;
+ ps_ctxt->u4_total_cu_hdr_bits += u4_hdr_bits;
+ ps_ctxt->u4_cu_tot_bits_into_qscale +=
+ ((ULWORD64)((u4_tex_bits + u4_hdr_bits) * (i4_qscale)) + (1 << (QSCALE_Q_FAC_3 - 1))) >>
+ QSCALE_Q_FAC_3;
+ ps_ctxt->u4_cu_tot_bits += (u4_tex_bits + u4_hdr_bits);
+
+ ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_header_bits +=
+ u4_hdr_bits;
+
+ ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
+ ->i8_sad_by_qscale[ps_enc_loop_bestprms->u1_intra_flag] +=
+ ((((LWORD64)ps_enc_loop_bestprms->u4_cu_sad) << SAD_BY_QSCALE_Q) / i4_qscale);
+
+ ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
+ ->i4_qp_normalized_8x8_cu_sum[ps_enc_loop_bestprms->u1_intra_flag] +=
+ (i4_8x8_blks_in_cu * i4_qscale);
+
+ ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
+ ->i4_8x8_cu_sum[ps_enc_loop_bestprms->u1_intra_flag] += i4_8x8_blks_in_cu;
+
+ /* PCM not supported */
+ ps_cu_final->b1_pcm_flag = 0;
+ ps_cu_final->b1_pred_mode_flag = ps_enc_loop_bestprms->u1_intra_flag;
+
+ ps_cu_final->b1_skip_flag = ps_enc_loop_bestprms->u1_skip_flag;
+ ps_cu_final->b1_tq_bypass_flag = 0;
+ ps_cu_final->b3_part_mode = ps_enc_loop_bestprms->u1_part_mode;
+
+ ps_cu_final->pv_coeff = pu1_ecd_data;
+
+ ps_cu_final->i1_cu_qp = ps_enc_out_ctxt->i1_cu_qp;
+ if(ps_enc_loop_bestprms->u1_is_cu_coded)
+ {
+ ps_ctxt->i4_last_cu_qp_from_prev_ctb = ps_enc_out_ctxt->i1_cu_qp;
+ }
+ else
+ {
+ ps_ctxt->i4_last_cu_qp_from_prev_ctb = ps_ctxt->i4_pred_qp;
+ }
+ ps_cu_final->b1_first_cu_in_qg = ps_enc_out_ctxt->b1_first_cu_in_qg;
+
+ /* Update the no residue flag. Needed for inter cu. */
+ /* Needed for deblocking inter/intra both */
+ //if(ps_cu_final->b1_pred_mode_flag == PRED_MODE_INTER)
+ {
+ ps_cu_final->b1_no_residual_syntax_flag = !ps_enc_loop_bestprms->u1_is_cu_coded;
+ }
+
+ /* store the number of TUs */
+ ps_cu_final->u2_num_tus_in_cu = ps_enc_loop_bestprms->u2_num_tus_in_cu;
+
+ /* ---- copy the TUs to final structure ----- */
+ memcpy(
+ ps_cu_final->ps_enc_tu,
+ &ps_enc_loop_bestprms->as_tu_enc_loop[0],
+ ps_enc_loop_bestprms->u2_num_tus_in_cu * sizeof(tu_enc_loop_out_t));
+
+ /* ---- copy the PUs to final structure ----- */
+ memcpy(
+ ps_cu_final->ps_pu,
+ &ps_enc_loop_bestprms->as_pu_enc_loop[0],
+ ps_enc_loop_bestprms->u2_num_pus_in_cu * sizeof(pu_t));
+
+ /* --- copy reminder and prev_flags ----- */
+ /* only required for intra */
+ if(PRED_MODE_INTRA == ps_cu_final->b1_pred_mode_flag)
+ {
+ memcpy(
+ &ps_cu_final->as_prev_rem[0],
+ &ps_enc_loop_bestprms->as_intra_prev_rem[0],
+ ps_enc_loop_bestprms->u2_num_tus_in_cu * sizeof(intra_prev_rem_flags_t));
+
+ ps_cu_final->b3_chroma_intra_pred_mode = ps_enc_loop_bestprms->u1_chroma_intra_pred_mode;
+ }
+
+ /* --------------------------------------------------- */
+ /* ---- Boundary Strength Calculation at CU level ---- */
+ /* --------------------------------------------------- */
+ if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
+ {
+ WORD32 num_4x4_in_ctb;
+ nbr_4x4_t *ps_left_nbr_4x4;
+ nbr_4x4_t *ps_top_nbr_4x4;
+ nbr_4x4_t *ps_curr_nbr_4x4;
+ WORD32 nbr_4x4_left_strd;
+
+ num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2);
+
+ ps_curr_nbr_4x4 = &ps_ctxt->as_ctb_nbr_arr[0];
+ ps_curr_nbr_4x4 += (ps_enc_out_ctxt->b3_cu_pos_x << 1);
+ ps_curr_nbr_4x4 += ((ps_enc_out_ctxt->b3_cu_pos_y << 1) * num_4x4_in_ctb);
+
+ /* CU left */
+ if(0 == ps_enc_out_ctxt->b3_cu_pos_x)
+ {
+ ps_left_nbr_4x4 = &ps_ctxt->as_left_col_nbr[0];
+ ps_left_nbr_4x4 += ps_enc_out_ctxt->b3_cu_pos_y << 1;
+ nbr_4x4_left_strd = 1;
+ }
+ else
+ {
+ /* inside CTB */
+ ps_left_nbr_4x4 = ps_curr_nbr_4x4 - 1;
+ nbr_4x4_left_strd = num_4x4_in_ctb;
+ }
+
+ /* CU top */
+ if(0 == ps_enc_out_ctxt->b3_cu_pos_y)
+ {
+ /* CTB boundary */
+ ps_top_nbr_4x4 = ps_ctxt->ps_top_row_nbr;
+ ps_top_nbr_4x4 += (ps_cu_prms->i4_ctb_pos * (ps_cu_prms->i4_ctb_size >> 2));
+ ps_top_nbr_4x4 += (ps_enc_out_ctxt->b3_cu_pos_x << 1);
+ }
+ else
+ {
+ /* inside CTB */
+ ps_top_nbr_4x4 = ps_curr_nbr_4x4 - num_4x4_in_ctb;
+ }
+
+ ihevce_bs_compute_cu(
+ ps_cu_final,
+ ps_top_nbr_4x4,
+ ps_left_nbr_4x4,
+ ps_curr_nbr_4x4,
+ nbr_4x4_left_strd,
+ num_4x4_in_ctb,
+ &ps_ctxt->s_deblk_bs_prms);
+ }
+}
+
+/**
+*********************************************************************************
+* Function name : ihevce_store_cu_results
+*
+* \brief
+* This function store cu result to cu info context
+*
+* \param[in] ps_ctxt : pointer to enc loop context structure
+* \param[out] ps_cu_prms : pointer to cu level parameters for SATD / RDOPT
+*
+* \return
+* None
+*
+**********************************************************************************/
+void ihevce_store_cu_results(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ enc_loop_cu_prms_t *ps_cu_prms,
+ final_mode_state_t *ps_final_state)
+{
+ ihevce_enc_cu_node_ctxt_t *ps_enc_tmp_out_ctxt;
+ nbr_4x4_t *ps_nbr_4x4, *ps_tmp_nbr_4x4, *ps_curr_nbr_4x4;
+
+ UWORD8 *pu1_recon, *pu1_final_recon;
+ WORD32 num_4x4_in_ctb, ctr;
+ WORD32 num_4x4_in_cu;
+ UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
+ WORD32 cu_depth, log2_ctb_size, log2_cu_size;
+
+ ps_enc_tmp_out_ctxt = ps_ctxt->ps_enc_out_ctxt;
+ (void)ps_final_state;
+#if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
+ {
+ /* ---- copy the child luma recon back to curr. recon -------- */
+ pu1_recon = (UWORD8 *)ps_ctxt->pv_cu_luma_recon;
+
+ /* based on CU position derive the luma pointers */
+ pu1_final_recon = ps_cu_prms->pu1_luma_recon + (ps_enc_tmp_out_ctxt->b3_cu_pos_x << 3);
+
+ pu1_final_recon +=
+ ((ps_enc_tmp_out_ctxt->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_recon_stride);
+
+ ps_ctxt->s_cmn_opt_func.pf_copy_2d(
+ pu1_final_recon,
+ ps_cu_prms->i4_luma_recon_stride,
+ pu1_recon,
+ ps_enc_tmp_out_ctxt->u1_cu_size,
+ ps_enc_tmp_out_ctxt->u1_cu_size,
+ ps_enc_tmp_out_ctxt->u1_cu_size);
+
+ /* ---- copy the child chroma recon back to curr. recon -------- */
+ pu1_recon = (UWORD8 *)ps_ctxt->pv_cu_chrma_recon;
+
+ /* based on CU position derive the chroma pointers */
+ pu1_final_recon = ps_cu_prms->pu1_chrm_recon + (ps_enc_tmp_out_ctxt->b3_cu_pos_x << 3);
+
+ pu1_final_recon +=
+ ((ps_enc_tmp_out_ctxt->b3_cu_pos_y << (u1_is_422 + 2)) *
+ ps_cu_prms->i4_chrm_recon_stride);
+
+ /* Cb and Cr pixel interleaved */
+ ps_ctxt->s_cmn_opt_func.pf_copy_2d(
+ pu1_final_recon,
+ ps_cu_prms->i4_chrm_recon_stride,
+ pu1_recon,
+ ps_enc_tmp_out_ctxt->u1_cu_size,
+ ps_enc_tmp_out_ctxt->u1_cu_size,
+ (ps_enc_tmp_out_ctxt->u1_cu_size >> (0 == u1_is_422)));
+ }
+#else
+ if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
+ {
+ /* ---- copy the child luma recon back to curr. recon -------- */
+ pu1_recon = (UWORD8 *)ps_ctxt->pv_cu_luma_recon;
+
+ /* based on CU position derive the luma pointers */
+ pu1_final_recon = ps_cu_prms->pu1_luma_recon + (ps_enc_tmp_out_ctxt->b3_cu_pos_x << 3);
+
+ pu1_final_recon +=
+ ((ps_enc_tmp_out_ctxt->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_recon_stride);
+
+ ps_ctxt->s_cmn_opt_func.pf_copy_2d(
+ pu1_final_recon,
+ ps_cu_prms->i4_luma_recon_stride,
+ pu1_recon,
+ ps_enc_tmp_out_ctxt->u1_cu_size,
+ ps_enc_tmp_out_ctxt->u1_cu_size,
+ ps_enc_tmp_out_ctxt->u1_cu_size);
+
+ /* ---- copy the child chroma recon back to curr. recon -------- */
+ pu1_recon = (UWORD8 *)ps_ctxt->pv_cu_chrma_recon;
+
+ /* based on CU position derive the chroma pointers */
+ pu1_final_recon = ps_cu_prms->pu1_chrm_recon + (ps_enc_tmp_out_ctxt->b3_cu_pos_x << 3);
+
+ pu1_final_recon +=
+ ((ps_enc_tmp_out_ctxt->b3_cu_pos_y << (u1_is_422 + 2)) *
+ ps_cu_prms->i4_chrm_recon_stride);
+
+ ps_ctxt->s_cmn_opt_func.pf_copy_2d(
+ pu1_final_recon,
+ ps_cu_prms->i4_chrm_recon_stride,
+ pu1_recon,
+ ps_enc_tmp_out_ctxt->u1_cu_size,
+ ps_enc_tmp_out_ctxt->u1_cu_size,
+ (ps_enc_tmp_out_ctxt->u1_cu_size >> (0 == u1_is_422)));
+ }
+#endif
+ /*copy qp for qg*/
+ {
+ WORD32 i4_num_8x8, i4_x, i4_y;
+ WORD32 i4_cu_pos_x, i4_cu_pox_y;
+ i4_num_8x8 = ps_enc_tmp_out_ctxt->u1_cu_size >> 3;
+ i4_cu_pos_x = ps_enc_tmp_out_ctxt->b3_cu_pos_x;
+ i4_cu_pox_y = ps_enc_tmp_out_ctxt->b3_cu_pos_y;
+ for(i4_y = 0; i4_y < i4_num_8x8; i4_y++)
+ {
+ for(i4_x = 0; i4_x < i4_num_8x8; i4_x++)
+ {
+ if(ps_enc_tmp_out_ctxt->ps_cu_prms->u1_is_cu_coded)
+ {
+ ps_ctxt->ai4_qp_qg[((i4_cu_pox_y + i4_y) * 8) + (i4_cu_pos_x + i4_x)] =
+ ps_ctxt->i4_cu_qp;
+ }
+ else
+ {
+ ps_ctxt->ai4_qp_qg[((i4_cu_pox_y + i4_y) * 8) + (i4_cu_pos_x + i4_x)] =
+ ps_ctxt->i4_pred_qp;
+ }
+ }
+ }
+ }
+
+ /* ------ copy the nbr 4x4 to final output ------ */
+ num_4x4_in_cu = ps_enc_tmp_out_ctxt->u1_cu_size >> 2;
+ num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2);
+
+ ps_curr_nbr_4x4 = &ps_ctxt->as_ctb_nbr_arr[0];
+ ps_curr_nbr_4x4 += (ps_enc_tmp_out_ctxt->b3_cu_pos_x << 1);
+ ps_curr_nbr_4x4 += ((ps_enc_tmp_out_ctxt->b3_cu_pos_y << 1) * num_4x4_in_ctb);
+ ps_tmp_nbr_4x4 = ps_curr_nbr_4x4;
+
+ ps_nbr_4x4 = ps_ctxt->ps_cu_recur_nbr;
+
+ GETRANGE(log2_ctb_size, ps_cu_prms->i4_ctb_size);
+ GETRANGE(log2_cu_size, ps_enc_tmp_out_ctxt->u1_cu_size);
+ cu_depth = log2_ctb_size - log2_cu_size;
+
+ ASSERT(cu_depth <= 3);
+ ASSERT(cu_depth >= 0);
+
+ /*assign qp for all 4x4 nbr blocks*/
+ for(ctr = 0; ctr < num_4x4_in_cu * num_4x4_in_cu; ctr++, ps_nbr_4x4++)
+ {
+ ps_nbr_4x4->b1_skip_flag = ps_enc_tmp_out_ctxt->s_cu_prms.u1_skip_flag;
+ ps_nbr_4x4->b2_cu_depth = cu_depth;
+ ps_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
+ }
+
+ ps_nbr_4x4 = ps_ctxt->ps_cu_recur_nbr;
+
+ for(ctr = 0; ctr < num_4x4_in_cu; ctr++)
+ {
+ memcpy(ps_tmp_nbr_4x4, ps_nbr_4x4, num_4x4_in_cu * sizeof(nbr_4x4_t));
+
+ ps_tmp_nbr_4x4 += num_4x4_in_ctb;
+ ps_nbr_4x4 += num_4x4_in_cu;
+ }
+}
+
+/**
+*********************************************************************************
+* Function name : ihevce_populate_cu_struct
+*
+* \brief
+* This function populate cu struct
+*
+* \param[in] ps_ctxt : pointer to enc loop context structure
+* \param[in] ps_cur_ipe_ctb : pointer to IPE L0 analyze structure
+* \param[in] ps_cu_tree_analyse : pointer to Structure for CU recursion
+* \param[in] ps_best_results : pointer to strcuture contain result for partition type of CU
+* \param[in] ps_cu_out : pointer to structre contain mode analysis info
+* \param[in] i4_32x32_id : noise estimation id
+* \param[in] u1_num_best_results : num best result value
+*
+* \return
+* None
+*
+**********************************************************************************/
+void ihevce_populate_cu_struct(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
+ cur_ctb_cu_tree_t *ps_cu_tree_analyse,
+ part_type_results_t *ps_best_results,
+ cu_analyse_t *ps_cu_out,
+ WORD32 i4_32x32_id,
+#if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
+ UWORD8 u1_is_cu_noisy,
+#endif
+ UWORD8 u1_num_best_results)
+{
+ cu_inter_cand_t *ps_cu_candt;
+
+ WORD32 j;
+ /* open loop intra cost by IPE */
+ WORD32 intra_cost_ol;
+ /* closed loop intra cost based on empirical coding noise estimate */
+ WORD32 intra_cost_cl_est = 0;
+ /* closed loop intra coding noise estimate */
+ WORD32 intra_noise_cl_est;
+ WORD32 num_results_to_copy = 0;
+
+ WORD32 found_intra = 0;
+ WORD32 quality_preset = ps_ctxt->i4_quality_preset;
+ WORD32 frm_qp = ps_ctxt->i4_frame_qp;
+ WORD32 frm_qstep_multiplier = gau4_frame_qstep_multiplier[frm_qp - 1];
+ WORD32 frm_qstep = ps_ctxt->i4_frame_qstep;
+ UWORD8 u1_cu_size = ps_cu_tree_analyse->u1_cu_size;
+ UWORD8 u1_x_off = ps_cu_tree_analyse->b3_cu_pos_x << 3;
+ UWORD8 u1_y_off = ps_cu_tree_analyse->b3_cu_pos_y << 3;
+ UWORD8 u1_threshold_multi;
+ switch(quality_preset)
+ {
+ case IHEVCE_QUALITY_P0:
+ case IHEVCE_QUALITY_P2:
+ {
+ num_results_to_copy =
+ MIN(MAX_NUMBER_OF_INTER_RDOPT_CANDS_IN_PQ_AND_HQ, u1_num_best_results);
+ break;
+ }
+ case IHEVCE_QUALITY_P3:
+ {
+ num_results_to_copy = MIN(MAX_NUMBER_OF_INTER_RDOPT_CANDS_IN_MS, u1_num_best_results);
+ break;
+ }
+ case IHEVCE_QUALITY_P4:
+ case IHEVCE_QUALITY_P5:
+ case IHEVCE_QUALITY_P6:
+ {
+ num_results_to_copy =
+ MIN(MAX_NUMBER_OF_INTER_RDOPT_CANDS_IN_HS_AND_XS, u1_num_best_results);
+ break;
+ }
+ }
+
+ ps_cu_out->u1_num_inter_cands = 0;
+
+ /***************************************************************/
+ /* Depending CU size that has won in ME, */
+ /* Estimate the closed loop intra cost for enabling intra */
+ /* evaluation in rdopt stage based on preset */
+ /***************************************************************/
+ switch(u1_cu_size)
+ {
+ case 64:
+ {
+ /* coding noise estimate for intra closed loop cost */
+ intra_cost_ol = ps_cur_ipe_ctb->i4_best64x64_intra_cost - frm_qstep * 256;
+
+ intra_noise_cl_est = (frm_qstep * frm_qstep_multiplier) + (intra_cost_ol >> 4);
+
+ intra_noise_cl_est = MIN(intra_noise_cl_est, (frm_qstep * 16)) * 16;
+
+ intra_cost_cl_est = intra_cost_ol + intra_noise_cl_est;
+ break;
+ }
+ case 32:
+ {
+ /* coding noise estimate for intra closed loop cost */
+ intra_cost_ol = ps_cur_ipe_ctb->ai4_best32x32_intra_cost[i4_32x32_id] - frm_qstep * 64;
+
+ intra_noise_cl_est = (frm_qstep * frm_qstep_multiplier) + (intra_cost_ol >> 4);
+
+ intra_noise_cl_est = MIN(intra_noise_cl_est, (frm_qstep * 16)) * 4;
+
+ intra_cost_cl_est = intra_cost_ol + intra_noise_cl_est;
+ break;
+ }
+ case 16:
+ {
+ /* coding noise estimate for intra closed loop cost */
+ intra_cost_ol =
+ ps_cur_ipe_ctb->ai4_best16x16_intra_cost[(u1_x_off >> 4) + ((u1_y_off >> 4) << 2)] -
+ frm_qstep * 16;
+
+ intra_noise_cl_est = (frm_qstep * frm_qstep_multiplier) + (intra_cost_ol >> 4);
+
+ intra_noise_cl_est = MIN(intra_noise_cl_est, (frm_qstep * 16));
+
+ intra_cost_cl_est = intra_cost_ol + intra_noise_cl_est;
+ break;
+ }
+ case 8:
+ {
+ /* coding noise estimate for intra closed loop cost */
+ intra_cost_ol =
+ ps_cur_ipe_ctb->ai4_best8x8_intra_cost[(u1_x_off >> 3) + u1_y_off] - frm_qstep * 4;
+
+ intra_noise_cl_est = (frm_qstep * frm_qstep_multiplier) + (intra_cost_ol >> 4);
+
+ intra_noise_cl_est = MIN(intra_noise_cl_est, (frm_qstep * 16)) >> 2;
+
+ intra_cost_cl_est = intra_cost_ol + intra_noise_cl_est;
+ break;
+ }
+ }
+#if DISABLE_INTER_CANDIDATES
+ return;
+#endif
+
+ u1_threshold_multi = 1;
+#if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
+ if(u1_is_cu_noisy)
+ {
+ intra_cost_cl_est = INT_MAX;
+ }
+#endif
+
+ ps_cu_candt = ps_cu_out->as_cu_inter_cand;
+
+ /* Check if the first best candidate is inter or intra */
+ if(ps_best_results[0].as_pu_results[0].pu.b1_intra_flag)
+ {
+ ps_cu_out->u1_best_is_intra = 1;
+ }
+ else
+ {
+ ps_cu_out->u1_best_is_intra = 0;
+ }
+
+ for(j = 0; j < u1_num_best_results; j++)
+ {
+ part_type_results_t *ps_best = &ps_best_results[j];
+
+ if(ps_best->as_pu_results[0].pu.b1_intra_flag)
+ {
+ found_intra = 1;
+ }
+ else
+ {
+ /* populate the TU split flags, 4 flags copied as max cu can be 64 */
+ memcpy(ps_cu_candt->ai4_tu_split_flag, ps_best->ai4_tu_split_flag, 4 * sizeof(WORD32));
+
+ /* populate the TU early CBF flags, 4 flags copied as max cu can be 64 */
+ memcpy(ps_cu_candt->ai4_tu_early_cbf, ps_best->ai4_tu_early_cbf, 4 * sizeof(WORD32));
+
+ /* Note: the enums of part size and me part types shall match */
+ ps_cu_candt->b3_part_size = ps_best->u1_part_type;
+
+ /* ME will always set the skip flag to 0 */
+ /* in closed loop skip will be added as a candidate */
+ ps_cu_candt->b1_skip_flag = 0;
+
+ /* copy the inter pus : Note: assuming NxN part type is not supported */
+ ps_cu_candt->as_inter_pu[0] = ps_best->as_pu_results[0].pu;
+
+ ps_cu_candt->as_inter_pu[0].b1_merge_flag = 0;
+
+ /* Copy the total cost of the CU candt */
+ ps_cu_candt->i4_total_cost = ps_best->i4_tot_cost;
+
+ ps_cu_out->ai4_mv_cost[ps_cu_out->u1_num_inter_cands][0] =
+ ps_best->as_pu_results[0].i4_mv_cost;
+
+#if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING
+ ps_cu_out->ai4_err_metric[ps_cu_out->u1_num_inter_cands][0] =
+ ps_best->as_pu_results[0].i4_tot_cost - ps_best->as_pu_results[0].i4_mv_cost;
+#endif
+
+ if(ps_best->u1_part_type)
+ {
+ ps_cu_candt->as_inter_pu[1] = ps_best->as_pu_results[1].pu;
+ ps_cu_out->ai4_mv_cost[ps_cu_out->u1_num_inter_cands][1] =
+ ps_best->as_pu_results[1].i4_mv_cost;
+#if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING
+ ps_cu_out->ai4_err_metric[ps_cu_out->u1_num_inter_cands][1] =
+ ps_best->as_pu_results[1].i4_tot_cost - ps_best->as_pu_results[1].i4_mv_cost;
+#endif
+
+ ps_cu_candt->as_inter_pu[1].b1_merge_flag = 0;
+ }
+
+ ps_cu_candt++;
+ ps_cu_out->u1_num_inter_cands++;
+ if(intra_cost_cl_est < ((ps_best->i4_tot_cost * u1_threshold_multi) >> 0))
+ {
+ /* The rationale - */
+ /* Artefacts were being observed in some sequences, */
+ /* Brooklyn_1080p in particular - where it was readily */
+ /* apparent. The cause was coding of CU's as inter CU's */
+ /* when they actually needed to be coded as intra CU's. */
+ /* This was observed during either fade-outs aor flashes. */
+ /* After tinkering with the magnitude of the coding noise */
+ /* factor that was added to the intra cost to see when the */
+ /* artefacts in Brooklyn vanished, it was observed that the */
+ /* factor multiplied with the frame_qstep followed a pattern. */
+ /* When the pattern was subjected to a regression analysis, the */
+ /* formula seen below emerged. Also note the fact that the coding */
+ /* noise factor is the product of the frame_qstep and a constant */
+ /* multiplier */
+
+ /*UWORD32 frm_qstep_multiplier =
+ -3.346 * log((float)frm_qstep) + 15.925;*/
+ found_intra = 1;
+ }
+
+ if(ps_cu_out->u1_num_inter_cands >= num_results_to_copy)
+ {
+ break;
+ }
+ }
+ }
+
+ if(quality_preset < IHEVCE_QUALITY_P4)
+ {
+ found_intra = 1;
+ }
+
+ if(!found_intra)
+ {
+ /* rdopt evaluation of intra disabled as inter is clear winner */
+ ps_cu_out->u1_num_intra_rdopt_cands = 0;
+
+ /* all the modes invalidated */
+ ps_cu_out->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
+ ps_cu_out->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
+ ps_cu_out->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][0] = 255;
+ ps_cu_out->u1_chroma_intra_pred_mode = 255;
+
+ /* no intra candt to verify */
+ ps_cu_out->s_cu_intra_cand.b6_num_intra_cands = 0;
+ }
+}
+
+/**
+*********************************************************************************
+* Function name : ihevce_create_child_nodes_cu_tree
+*
+* \brief
+* This function create child node from cu tree
+*
+* \param[in] ps_cu_tree_root : pointer to Structure for CU recursion
+* \param[out] ps_cu_tree_cur_node : pointer to Structure for CU recursion
+* \param[in] ai4_child_node_enable : child node enable flag
+* \param[in] nodes_already_created : already created node value
+* \return
+* None
+*
+**********************************************************************************/
+WORD32 ihevce_create_child_nodes_cu_tree(
+ cur_ctb_cu_tree_t *ps_cu_tree_root,
+ cur_ctb_cu_tree_t *ps_cu_tree_cur_node,
+ WORD32 *ai4_child_node_enable,
+ WORD32 nodes_already_created)
+{
+ cur_ctb_cu_tree_t *ps_tl;
+ cur_ctb_cu_tree_t *ps_tr;
+ cur_ctb_cu_tree_t *ps_bl;
+ cur_ctb_cu_tree_t *ps_br;
+
+ ps_tl = ps_cu_tree_root + nodes_already_created;
+ ps_tr = ps_tl + 1;
+ ps_bl = ps_tr + 1;
+ ps_br = ps_bl + 1;
+
+ if(1 == ps_cu_tree_cur_node->is_node_valid)
+ {
+ ps_tl = (ai4_child_node_enable[0]) ? ps_tl : NULL;
+ ps_tr = (ai4_child_node_enable[1]) ? ps_tr : NULL;
+ ps_bl = (ai4_child_node_enable[2]) ? ps_bl : NULL;
+ ps_br = (ai4_child_node_enable[3]) ? ps_br : NULL;
+
+ /* In incomplete CTB, if any of the child nodes are assigned to NULL */
+ /* then parent node ceases to be valid */
+ if((ps_tl == NULL) || (ps_tr == NULL) || (ps_br == NULL) || (ps_bl == NULL))
+ {
+ ps_cu_tree_cur_node->is_node_valid = 0;
+ }
+ }
+ ps_cu_tree_cur_node->ps_child_node_tl = ps_tl;
+ ps_cu_tree_cur_node->ps_child_node_tr = ps_tr;
+ ps_cu_tree_cur_node->ps_child_node_bl = ps_bl;
+ ps_cu_tree_cur_node->ps_child_node_br = ps_br;
+
+ return 4;
+}
+
+/**
+*********************************************************************************
+* Function name : ihevce_populate_cu_tree
+*
+* \brief
+* This function create child node from cu tree
+*
+* \param[in] ps_cur_ipe_ctb : pointer to Structure for CU recursion
+* \param[out] ps_cu_tree : pointer to Structure for CU recursion
+* \param[in] tree_depth : child node enable flag
+* \param[in] e_quality_preset : already created node value
+* \param[in] e_grandparent_blk_pos : already created node value
+* \param[in] e_parent_blk_pos : already created node value
+* \param[in] e_cur_blk_pos : already created node value
+*
+* \return
+* None
+*
+**********************************************************************************/
+void ihevce_populate_cu_tree(
+ ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
+ cur_ctb_cu_tree_t *ps_cu_tree,
+ WORD32 tree_depth,
+ IHEVCE_QUALITY_CONFIG_T e_quality_preset,
+ CU_POS_T e_grandparent_blk_pos,
+ CU_POS_T e_parent_blk_pos,
+ CU_POS_T e_cur_blk_pos)
+{
+ WORD32 ai4_child_enable[4];
+ WORD32 children_nodes_required = 0;
+ WORD32 cu_pos_x = 0;
+ WORD32 cu_pos_y = 0;
+ WORD32 cu_size = 0;
+ WORD32 i;
+ WORD32 node_validity = 0;
+
+ if(NULL == ps_cu_tree)
+ {
+ return;
+ }
+
+ switch(tree_depth)
+ {
+ case 0:
+ {
+ /* 64x64 block */
+ intra32_analyse_t *ps_intra32_analyse = ps_cur_ipe_ctb->as_intra32_analyse;
+
+ children_nodes_required = 1;
+ cu_size = 64;
+ cu_pos_x = 0;
+ cu_pos_y = 0;
+
+ node_validity = !ps_cur_ipe_ctb->u1_split_flag;
+
+ if(e_quality_preset >= IHEVCE_QUALITY_P2)
+ {
+ if(node_validity == 1)
+ {
+ children_nodes_required = 0;
+ }
+ }
+
+ for(i = 0; i < 4; i++)
+ {
+ ai4_child_enable[i] = ps_intra32_analyse[i].b1_valid_cu;
+ }
+
+ break;
+ }
+ case 1:
+ {
+ /* 32x32 block */
+ WORD32 valid_flag_32 = (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_valid_cu);
+
+ intra16_analyse_t *ps_intra16_analyse =
+ ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].as_intra16_analyse;
+
+ cu_size = 32;
+
+ /* Explanation for logic below - */
+ /* * pos_x and pos_y are in units of 8x8 CU's */
+ /* * pos_x = 0 for TL and BL children */
+ /* * pos_x = 4 for TR and BR children */
+ /* * pos_y = 0 for TL and TR children */
+ /* * pos_y = 4 for BL and BR children */
+ cu_pos_x = (e_cur_blk_pos & 1) << 2;
+ cu_pos_y = (e_cur_blk_pos & 2) << 1;
+
+ {
+ node_validity = (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_merge_flag);
+
+ if(e_quality_preset >= IHEVCE_QUALITY_P2)
+ {
+ node_validity = (!ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_split_flag);
+ }
+
+ node_validity = node_validity && valid_flag_32;
+ children_nodes_required = !node_validity || ps_cur_ipe_ctb->u1_split_flag;
+ }
+
+ if(e_quality_preset >= IHEVCE_QUALITY_P2)
+ {
+ if(node_validity == 1)
+ {
+ children_nodes_required = 0;
+ }
+ else
+ {
+ children_nodes_required =
+ (ps_cur_ipe_ctb->as_intra32_analyse[e_cur_blk_pos].b1_split_flag);
+ }
+ }
+
+ for(i = 0; i < 4; i++)
+ {
+ ai4_child_enable[i] = ps_intra16_analyse[i].b1_valid_cu;
+ }
+
+ break;
+ }
+ case 2:
+ {
+ /* 16x16 block */
+ WORD32 cu_pos_x_parent;
+ WORD32 cu_pos_y_parent;
+ WORD32 merge_flag_16;
+ WORD32 merge_flag_32;
+
+ intra8_analyse_t *ps_intra8_analyse = ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
+ .as_intra16_analyse[e_cur_blk_pos]
+ .as_intra8_analyse;
+
+ WORD32 valid_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
+ .as_intra16_analyse[e_cur_blk_pos]
+ .b1_valid_cu);
+
+ cu_size = 16;
+
+ /* Explanation for logic below - */
+ /* See similar explanation above */
+ cu_pos_x_parent = (e_parent_blk_pos & 1) << 2;
+ cu_pos_y_parent = (e_parent_blk_pos & 2) << 1;
+ cu_pos_x = cu_pos_x_parent + ((e_cur_blk_pos & 1) << 1);
+ cu_pos_y = cu_pos_y_parent + (e_cur_blk_pos & 2);
+
+ merge_flag_16 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
+ .as_intra16_analyse[e_cur_blk_pos]
+ .b1_merge_flag);
+ merge_flag_32 = (ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos].b1_merge_flag);
+
+#if !ENABLE_UNIFORM_CU_SIZE_8x8
+ node_validity = (merge_flag_16) || ((ps_cur_ipe_ctb->u1_split_flag) && (!merge_flag_32));
+#else
+ node_validity = 0;
+#endif
+
+ node_validity = (merge_flag_16) || ((ps_cur_ipe_ctb->u1_split_flag) && (!merge_flag_32));
+
+ if(e_quality_preset >= IHEVCE_QUALITY_P2)
+ {
+ node_validity = (!ps_cur_ipe_ctb->as_intra32_analyse[e_parent_blk_pos]
+ .as_intra16_analyse[e_cur_blk_pos]
+ .b1_split_flag);
+ }
+
+ node_validity = node_validity && valid_flag_16;
+
+ children_nodes_required = ((ps_cur_ipe_ctb->u1_split_flag) && (!merge_flag_32)) ||
+ !merge_flag_16;
+
+ if(e_quality_preset >= IHEVCE_QUALITY_P2)
+ {
+ children_nodes_required = !node_validity;
+ }
+
+ for(i = 0; i < 4; i++)
+ {
+ ai4_child_enable[i] = ps_intra8_analyse[i].b1_valid_cu;
+ }
+ break;
+ }
+ case 3:
+ {
+ /* 8x8 block */
+ WORD32 cu_pos_x_grandparent;
+ WORD32 cu_pos_y_grandparent;
+
+ WORD32 cu_pos_x_parent;
+ WORD32 cu_pos_y_parent;
+
+ WORD32 valid_flag_8 = (ps_cur_ipe_ctb->as_intra32_analyse[e_grandparent_blk_pos]
+ .as_intra16_analyse[e_parent_blk_pos]
+ .as_intra8_analyse[e_cur_blk_pos]
+ .b1_valid_cu);
+
+ cu_size = 8;
+
+ cu_pos_x_grandparent = (e_grandparent_blk_pos & 1) << 2;
+ cu_pos_y_grandparent = (e_grandparent_blk_pos & 2) << 1;
+ cu_pos_x_parent = cu_pos_x_grandparent + ((e_parent_blk_pos & 1) << 1);
+ cu_pos_y_parent = cu_pos_y_grandparent + (e_parent_blk_pos & 2);
+ cu_pos_x = cu_pos_x_parent + (e_cur_blk_pos & 1);
+ cu_pos_y = cu_pos_y_parent + ((e_cur_blk_pos & 2) >> 1);
+
+ node_validity = 1 && valid_flag_8;
+
+ children_nodes_required = 0;
+
+ break;
+ }
+ }
+
+ /* Fill the current cu_tree node */
+ ps_cu_tree->is_node_valid = node_validity;
+ ps_cu_tree->u1_cu_size = cu_size;
+ ps_cu_tree->b3_cu_pos_x = cu_pos_x;
+ ps_cu_tree->b3_cu_pos_y = cu_pos_y;
+
+ if(children_nodes_required)
+ {
+ tree_depth++;
+
+ ps_cur_ipe_ctb->nodes_created_in_cu_tree += ihevce_create_child_nodes_cu_tree(
+ ps_cur_ipe_ctb->ps_cu_tree_root,
+ ps_cu_tree,
+ ai4_child_enable,
+ ps_cur_ipe_ctb->nodes_created_in_cu_tree);
+
+ ihevce_populate_cu_tree(
+ ps_cur_ipe_ctb,
+ ps_cu_tree->ps_child_node_tl,
+ tree_depth,
+ e_quality_preset,
+ e_parent_blk_pos,
+ e_cur_blk_pos,
+ POS_TL);
+
+ ihevce_populate_cu_tree(
+ ps_cur_ipe_ctb,
+ ps_cu_tree->ps_child_node_tr,
+ tree_depth,
+ e_quality_preset,
+ e_parent_blk_pos,
+ e_cur_blk_pos,
+ POS_TR);
+
+ ihevce_populate_cu_tree(
+ ps_cur_ipe_ctb,
+ ps_cu_tree->ps_child_node_bl,
+ tree_depth,
+ e_quality_preset,
+ e_parent_blk_pos,
+ e_cur_blk_pos,
+ POS_BL);
+
+ ihevce_populate_cu_tree(
+ ps_cur_ipe_ctb,
+ ps_cu_tree->ps_child_node_br,
+ tree_depth,
+ e_quality_preset,
+ e_parent_blk_pos,
+ e_cur_blk_pos,
+ POS_BR);
+ }
+ else
+ {
+ ps_cu_tree->ps_child_node_tl = NULL;
+ ps_cu_tree->ps_child_node_tr = NULL;
+ ps_cu_tree->ps_child_node_bl = NULL;
+ ps_cu_tree->ps_child_node_br = NULL;
+ }
+}
+
+/**
+*********************************************************************************
+* Function name : ihevce_intra_mode_populator
+*
+* \brief
+* This function populate intra mode info to strcut
+*
+* \param[in] ps_cu_intra_cand : pointer to Structure contain cu intra candidate info
+* \param[out] ps_ipe_data : pointer to IPE L0 analyze structure
+* \param[in] ps_cu_tree_data : poniter to cu recursive struct
+* \param[in] i1_slice_type : contain slice type value
+* \param[in] i4_quality_preset : contain quality preset value
+*
+* \return
+* None
+*
+**********************************************************************************/
+static void ihevce_intra_mode_populator(
+ cu_intra_cand_t *ps_cu_intra_cand,
+ ipe_l0_ctb_analyse_for_me_t *ps_ipe_data,
+ cur_ctb_cu_tree_t *ps_cu_tree_data,
+ WORD8 i1_slice_type,
+ WORD32 i4_quality_preset)
+{
+ WORD32 i4_32x32_id, i4_16x16_id, i4_8x8_id;
+
+ UWORD8 u1_cu_pos_x = ps_cu_tree_data->b3_cu_pos_x;
+ UWORD8 u1_cu_pos_y = ps_cu_tree_data->b3_cu_pos_y;
+
+ i4_32x32_id = ((u1_cu_pos_x & 4) >> 2) + ((u1_cu_pos_y & 4) >> 1);
+
+ i4_16x16_id = ((u1_cu_pos_x & 2) >> 1) + ((u1_cu_pos_y & 2));
+
+ i4_8x8_id = (u1_cu_pos_x & 1) + ((u1_cu_pos_y & 1) << 1);
+
+ if(i4_quality_preset < IHEVCE_QUALITY_P3)
+ {
+ switch(ps_cu_tree_data->u1_cu_size)
+ {
+ case 64:
+ {
+ memcpy(
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
+ ps_ipe_data->au1_best_modes_32x32_tu,
+ MAX_INTRA_CU_CANDIDATES + 1);
+
+ break;
+ }
+ case 32:
+ {
+ intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
+
+ memcpy(
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
+ ps_32x32_ipe_analyze->au1_best_modes_32x32_tu,
+ MAX_INTRA_CU_CANDIDATES + 1);
+
+ if((i1_slice_type != ISLICE) && (i4_quality_preset == IHEVCE_QUALITY_P0))
+ {
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
+ }
+ else if((i1_slice_type == ISLICE) && (i4_quality_preset == IHEVCE_QUALITY_P0))
+ {
+ if((ps_cu_tree_data->ps_child_node_bl != NULL) &&
+ (ps_cu_tree_data->ps_child_node_bl->is_node_valid))
+ {
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
+ }
+ else
+ {
+ memcpy(
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
+ ps_32x32_ipe_analyze->au1_best_modes_16x16_tu,
+ MAX_INTRA_CU_CANDIDATES + 1);
+ }
+ }
+ else
+ {
+ memcpy(
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
+ ps_32x32_ipe_analyze->au1_best_modes_16x16_tu,
+ MAX_INTRA_CU_CANDIDATES + 1);
+ }
+
+ break;
+ }
+ case 16:
+ {
+ /* Copy best 16x16 CU modes */
+ intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
+
+ intra16_analyse_t *ps_16x16_ipe_analyze =
+ &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
+
+ memcpy(
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
+ ps_16x16_ipe_analyze->au1_best_modes_16x16_tu,
+ MAX_INTRA_CU_CANDIDATES + 1);
+
+ if((i1_slice_type != ISLICE) && (i4_quality_preset == IHEVCE_QUALITY_P0))
+ {
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
+ }
+ else if((i1_slice_type == ISLICE) && (i4_quality_preset == IHEVCE_QUALITY_P0))
+ {
+ if((ps_cu_tree_data->ps_child_node_bl != NULL) &&
+ (ps_cu_tree_data->ps_child_node_bl->is_node_valid))
+ {
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
+ }
+ else
+ {
+ memcpy(
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
+ ps_16x16_ipe_analyze->au1_best_modes_8x8_tu,
+ MAX_INTRA_CU_CANDIDATES + 1);
+ }
+ }
+ else
+ {
+ memcpy(
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
+ ps_16x16_ipe_analyze->au1_best_modes_8x8_tu,
+ MAX_INTRA_CU_CANDIDATES + 1);
+ }
+
+ break;
+ }
+ case 8:
+ {
+ intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
+
+ intra16_analyse_t *ps_16x16_ipe_analyze =
+ &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
+
+ intra8_analyse_t *ps_8x8_ipe_analyze =
+ &ps_16x16_ipe_analyze->as_intra8_analyse[i4_8x8_id];
+
+ memcpy(
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
+ ps_8x8_ipe_analyze->au1_best_modes_8x8_tu,
+ MAX_INTRA_CU_CANDIDATES + 1);
+
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
+
+ /* Initialise the hash */
+ {
+ WORD32 i, j;
+
+ for(i = 0; i < NUM_PU_PARTS; i++)
+ {
+ ps_cu_intra_cand->au1_num_modes_added[i] = 0;
+
+ for(j = 0; j < MAX_INTRA_CANDIDATES; j++)
+ {
+ ps_cu_intra_cand->au1_intra_luma_mode_nxn_hash[i][j] = 0;
+ }
+ }
+
+ for(i = 0; i < NUM_PU_PARTS; i++)
+ {
+ for(j = 0; j < MAX_INTRA_CU_CANDIDATES; j++)
+ {
+ if(ps_8x8_ipe_analyze->au1_4x4_best_modes[i][j] == 255)
+ {
+ ps_cu_intra_cand->au1_intra_luma_modes_nxn[i][j] = 255;
+ break;
+ }
+
+ ps_cu_intra_cand->au1_intra_luma_modes_nxn[i][j] =
+ ps_8x8_ipe_analyze->au1_4x4_best_modes[i][j];
+
+ ps_cu_intra_cand->au1_intra_luma_mode_nxn_hash
+ [i][ps_8x8_ipe_analyze->au1_4x4_best_modes[i][j]] = 1;
+
+ ps_cu_intra_cand->au1_num_modes_added[i]++;
+ }
+
+ if(ps_cu_intra_cand->au1_num_modes_added[i] == MAX_INTRA_CU_CANDIDATES)
+ {
+ if(i1_slice_type != BSLICE)
+ {
+ ps_cu_intra_cand->au1_num_modes_added[i] =
+ ihevce_intra_mode_nxn_hash_updater(
+ ps_cu_intra_cand->au1_intra_luma_modes_nxn[i],
+ ps_cu_intra_cand->au1_intra_luma_mode_nxn_hash[i],
+ ps_cu_intra_cand->au1_num_modes_added[i]);
+ }
+ }
+ }
+ }
+
+ break;
+ }
+ }
+ }
+ else if(i4_quality_preset == IHEVCE_QUALITY_P6)
+ {
+ switch(ps_cu_tree_data->u1_cu_size)
+ {
+ case 64:
+ {
+ memcpy(
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
+ ps_ipe_data->au1_best_modes_32x32_tu,
+ (NUM_BEST_MODES + 1) * sizeof(UWORD8));
+
+ ps_cu_intra_cand->b1_eval_tx_cusize = 0;
+ ps_cu_intra_cand->b1_eval_tx_cusize_by2 = 1;
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
+
+#if ENABLE_INTRA_MODE_FILTERING_IN_XS25
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2
+ [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
+#endif
+
+ break;
+ }
+ case 32:
+ {
+ intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
+
+ memcpy(
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
+ ps_32x32_ipe_analyze->au1_best_modes_32x32_tu,
+ (NUM_BEST_MODES + 1) * sizeof(UWORD8));
+
+ memcpy(
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
+ ps_32x32_ipe_analyze->au1_best_modes_16x16_tu,
+ (NUM_BEST_MODES + 1) * sizeof(UWORD8));
+
+#if ENABLE_INTRA_MODE_FILTERING_IN_XS25
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu
+ [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2
+ [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
+#endif
+
+ break;
+ }
+ case 16:
+ {
+ /* Copy best 16x16 CU modes */
+ intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
+
+ intra16_analyse_t *ps_16x16_ipe_analyze =
+ &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
+
+ memcpy(
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
+ ps_16x16_ipe_analyze->au1_best_modes_16x16_tu,
+ (NUM_BEST_MODES + 1) * sizeof(UWORD8));
+
+ memcpy(
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
+ ps_16x16_ipe_analyze->au1_best_modes_8x8_tu,
+ (NUM_BEST_MODES + 1) * sizeof(UWORD8));
+
+#if ENABLE_INTRA_MODE_FILTERING_IN_XS25
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu
+ [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2
+ [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
+#endif
+
+ break;
+ }
+ case 8:
+ {
+ WORD32 i;
+
+ intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
+
+ intra16_analyse_t *ps_16x16_ipe_analyze =
+ &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
+
+ intra8_analyse_t *ps_8x8_ipe_analyze =
+ &ps_16x16_ipe_analyze->as_intra8_analyse[i4_8x8_id];
+
+ memcpy(
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
+ ps_8x8_ipe_analyze->au1_best_modes_8x8_tu,
+ (NUM_BEST_MODES + 1) * sizeof(UWORD8));
+
+#if !ENABLE_INTRA_MODE_FILTERING_IN_XS25
+ memcpy(
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
+ ps_8x8_ipe_analyze->au1_best_modes_4x4_tu,
+ (NUM_BEST_MODES + 1) * sizeof(UWORD8));
+
+ for(i = 0; i < 4; i++)
+ {
+ memcpy(
+ ps_cu_intra_cand->au1_intra_luma_modes_nxn[i],
+ ps_8x8_ipe_analyze->au1_4x4_best_modes[i],
+ (NUM_BEST_MODES + 1) * sizeof(UWORD8));
+
+ ps_cu_intra_cand->au1_intra_luma_modes_nxn[i][MAX_INTRA_CU_CANDIDATES] = 255;
+ }
+#else
+ if(255 == ps_8x8_ipe_analyze->au1_4x4_best_modes[0][0])
+ {
+ memcpy(
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
+ ps_8x8_ipe_analyze->au1_best_modes_4x4_tu,
+ (NUM_BEST_MODES + 1) * sizeof(UWORD8));
+
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2
+ [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
+ }
+ else
+ {
+ for(i = 0; i < 4; i++)
+ {
+ memcpy(
+ ps_cu_intra_cand->au1_intra_luma_modes_nxn[i],
+ ps_8x8_ipe_analyze->au1_4x4_best_modes[i],
+ (NUM_BEST_MODES + 1) * sizeof(UWORD8));
+
+ ps_cu_intra_cand->au1_intra_luma_modes_nxn
+ [i][MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
+ }
+ }
+
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu
+ [MAX_NUM_INTRA_MODES_PER_TU_DISTRIBUTION_IN_XS25] = 255;
+#endif
+
+#if FORCE_NXN_MODE_BASED_ON_OL_IPE
+ if((i4_quality_preset == IHEVCE_QUALITY_P6) && (i1_slice_type != ISLICE))
+ {
+ /*Evaluate nxn mode for 8x8 if ol ipe wins for nxn over cu=tu and cu=4tu.*/
+ /*Disbale CU=TU and CU=4TU modes */
+ if(ps_8x8_ipe_analyze->b1_enable_nxn == 1)
+ {
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
+ ps_cu_intra_cand->au1_intra_luma_modes_nxn[0][1] = 255;
+ ps_cu_intra_cand->au1_intra_luma_modes_nxn[1][1] = 255;
+ ps_cu_intra_cand->au1_intra_luma_modes_nxn[2][1] = 255;
+ ps_cu_intra_cand->au1_intra_luma_modes_nxn[3][1] = 255;
+ }
+ }
+#endif
+
+ break;
+ }
+ }
+ }
+ else
+ {
+ switch(ps_cu_tree_data->u1_cu_size)
+ {
+ case 64:
+ {
+ memcpy(
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
+ ps_ipe_data->au1_best_modes_32x32_tu,
+ (NUM_BEST_MODES + 1) * sizeof(UWORD8));
+
+ ps_cu_intra_cand->b1_eval_tx_cusize = 0;
+ ps_cu_intra_cand->b1_eval_tx_cusize_by2 = 1;
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
+
+ break;
+ }
+ case 32:
+ {
+ intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
+
+ memcpy(
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
+ ps_32x32_ipe_analyze->au1_best_modes_32x32_tu,
+ (NUM_BEST_MODES + 1) * sizeof(UWORD8));
+
+ memcpy(
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
+ ps_32x32_ipe_analyze->au1_best_modes_16x16_tu,
+ (NUM_BEST_MODES + 1) * sizeof(UWORD8));
+
+ break;
+ }
+ case 16:
+ {
+ /* Copy best 16x16 CU modes */
+ intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
+
+ intra16_analyse_t *ps_16x16_ipe_analyze =
+ &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
+
+ memcpy(
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
+ ps_16x16_ipe_analyze->au1_best_modes_16x16_tu,
+ (NUM_BEST_MODES + 1) * sizeof(UWORD8));
+
+ memcpy(
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
+ ps_16x16_ipe_analyze->au1_best_modes_8x8_tu,
+ (NUM_BEST_MODES + 1) * sizeof(UWORD8));
+
+ break;
+ }
+ case 8:
+ {
+ WORD32 i;
+
+ intra32_analyse_t *ps_32x32_ipe_analyze = &ps_ipe_data->as_intra32_analyse[i4_32x32_id];
+
+ intra16_analyse_t *ps_16x16_ipe_analyze =
+ &ps_32x32_ipe_analyze->as_intra16_analyse[i4_16x16_id];
+
+ intra8_analyse_t *ps_8x8_ipe_analyze =
+ &ps_16x16_ipe_analyze->as_intra8_analyse[i4_8x8_id];
+
+ memcpy(
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu,
+ ps_8x8_ipe_analyze->au1_best_modes_8x8_tu,
+ (NUM_BEST_MODES + 1) * sizeof(UWORD8));
+
+ memcpy(
+ ps_cu_intra_cand->au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2,
+ ps_8x8_ipe_analyze->au1_best_modes_4x4_tu,
+ (NUM_BEST_MODES + 1) * sizeof(UWORD8));
+
+ for(i = 0; i < 4; i++)
+ {
+ memcpy(
+ ps_cu_intra_cand->au1_intra_luma_modes_nxn[i],
+ ps_8x8_ipe_analyze->au1_4x4_best_modes[i],
+ (NUM_BEST_MODES + 1) * sizeof(UWORD8));
+
+ ps_cu_intra_cand->au1_intra_luma_modes_nxn[i][MAX_INTRA_CU_CANDIDATES] = 255;
+ }
+
+ break;
+ }
+ }
+ }
+}
+/**
+******************************************************************************
+* \if Function name : ihevce_compute_rdo \endif
+*
+* \brief
+* Coding Unit mode decide function. Performs RD opt and decides the best mode
+*
+* \param[in] pv_ctxt : pointer to enc_loop module
+* \param[in] ps_cu_prms : pointer to coding unit params (position, buffer pointers)
+* \param[in] ps_cu_analyse : pointer to cu analyse
+* \param[out] ps_cu_final : pointer to cu final
+* \param[out] pu1_ecd_data :pointer to store coeff data for ECD
+* \param[out]ps_row_col_pu; colocated pu buffer pointer
+* \param[out]pu1_row_pu_map; colocated pu map buffer pointer
+* \param[in]col_start_pu_idx : pu index start value
+*
+* \return
+* None
+*
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+LWORD64 ihevce_compute_rdo(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ enc_loop_cu_prms_t *ps_cu_prms,
+ cur_ctb_cu_tree_t *ps_cu_tree_analyse,
+ ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
+ me_ctb_data_t *ps_cu_me_data,
+ pu_col_mv_t *ps_col_pu,
+ final_mode_state_t *ps_final_mode_state,
+ UWORD8 *pu1_col_pu_map,
+ UWORD8 *pu1_ecd_data,
+ WORD32 col_start_pu_idx,
+ WORD32 i4_ctb_x_off,
+ WORD32 i4_ctb_y_off)
+{
+ /* Populate the rdo candiates to the structure */
+ cu_analyse_t s_cu_analyse;
+ LWORD64 rdopt_best_cost;
+ /* Populate candidates of child nodes to CU analyse struct for further evaluation */
+ cu_analyse_t *ps_cu_analyse;
+ WORD32 curr_cu_pos_in_row;
+ WORD32 cu_top_right_offset, cu_top_right_dep_pos;
+ WORD32 is_first_cu_in_ctb, is_ctb_level_quant_rounding, is_nctb_level_quant_rounding;
+
+ WORD32 cu_pos_x = ps_cu_tree_analyse->b3_cu_pos_x;
+ WORD32 cu_pos_y = ps_cu_tree_analyse->b3_cu_pos_y;
+
+ /*Derive the indices of 32*32, 16*16 and 8*8 blocks*/
+ WORD32 i4_32x32_id = ((cu_pos_x & 4) >> 2) + ((cu_pos_y & 4) >> 1);
+
+ WORD32 i4_16x16_id = ((cu_pos_x & 2) >> 1) + ((cu_pos_y & 2));
+
+ WORD32 i4_8x8_id = (cu_pos_x & 1) + ((cu_pos_y & 1) << 1);
+ if(i4_ctb_y_off == 0)
+ {
+ /* No wait for 1st row */
+ cu_top_right_offset = -(MAX_CTB_SIZE);
+ {
+ ihevce_tile_params_t *ps_col_tile_params =
+ ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base + ps_ctxt->i4_tile_col_idx);
+
+ cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
+ }
+
+ cu_top_right_dep_pos = 0;
+ }
+ else
+ {
+ cu_top_right_offset = ps_cu_tree_analyse->u1_cu_size << 1;
+ cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
+ }
+ ps_cu_analyse = &s_cu_analyse;
+
+ ps_cu_analyse->b3_cu_pos_x = cu_pos_x;
+ ps_cu_analyse->b3_cu_pos_y = cu_pos_y;
+ ps_cu_analyse->u1_cu_size = ps_cu_tree_analyse->u1_cu_size;
+
+ /* Default initializations */
+ ps_cu_analyse->u1_num_intra_rdopt_cands = MAX_INTRA_CU_CANDIDATES;
+ ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][0] = 255;
+ ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
+ ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
+
+ ps_cu_analyse->s_cu_intra_cand.b1_eval_tx_cusize = 1;
+ ps_cu_analyse->s_cu_intra_cand.b1_eval_tx_cusize_by2 = 1;
+
+ switch(ps_cu_tree_analyse->u1_cu_size)
+ {
+ case 64:
+ {
+ memcpy(
+ ps_cu_analyse[0].i4_act_factor,
+ ps_cur_ipe_ctb->i4_64x64_act_factor,
+ 4 * 2 * sizeof(WORD32));
+
+ ps_cu_analyse[0].s_cu_intra_cand.b1_eval_tx_cusize = 0;
+ ps_cu_analyse[0].s_cu_intra_cand.b1_eval_tx_cusize_by2 = 1;
+ ps_cu_analyse[0].s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
+
+ break;
+ }
+ case 32:
+ {
+ memcpy(
+ ps_cu_analyse[0].i4_act_factor,
+ ps_cur_ipe_ctb->i4_32x32_act_factor[i4_32x32_id],
+ 3 * 2 * sizeof(WORD32));
+
+ break;
+ }
+ case 16:
+ {
+ memcpy(
+ ps_cu_analyse[0].i4_act_factor,
+ ps_cur_ipe_ctb->i4_16x16_act_factor[(i4_32x32_id << 2) + i4_16x16_id],
+ 2 * 2 * sizeof(WORD32));
+
+ break;
+ }
+ case 8:
+ {
+ memcpy(
+ ps_cu_analyse[0].i4_act_factor,
+ ps_cur_ipe_ctb->i4_16x16_act_factor[(i4_32x32_id << 2) + i4_16x16_id],
+ 2 * 2 * sizeof(WORD32));
+
+ break;
+ }
+ }
+
+ /* Populate the me data in cu_analyse struct */
+ /* For CU size 32 and 64, add me data to array of cu analyse struct */
+ if(ISLICE != ps_ctxt->i1_slice_type)
+ {
+ if((ps_cu_tree_analyse->u1_cu_size >= 32) && (ps_cu_tree_analyse->u1_inter_eval_enable))
+ {
+ if(32 == ps_cu_tree_analyse->u1_cu_size)
+ {
+ ihevce_populate_cu_struct(
+ ps_ctxt,
+ ps_cur_ipe_ctb,
+ ps_cu_tree_analyse,
+ ps_cu_me_data->as_32x32_block_data[i4_32x32_id].as_best_results,
+ ps_cu_analyse,
+ i4_32x32_id,
+#if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
+ ps_cu_prms->u1_is_cu_noisy,
+#endif
+ ps_cu_me_data->as_32x32_block_data[i4_32x32_id].num_best_results);
+ }
+ else
+ {
+ ihevce_populate_cu_struct(
+ ps_ctxt,
+ ps_cur_ipe_ctb,
+ ps_cu_tree_analyse,
+ ps_cu_me_data->s_64x64_block_data.as_best_results,
+ ps_cu_analyse,
+ i4_32x32_id,
+#if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
+ ps_cu_prms->u1_is_cu_noisy,
+#endif
+ ps_cu_me_data->s_64x64_block_data.num_best_results);
+ }
+ }
+ else if(ps_cu_tree_analyse->u1_cu_size < 32)
+ {
+ i4_8x8_id += (i4_32x32_id << 4) + (i4_16x16_id << 2);
+ i4_16x16_id += (i4_32x32_id << 2);
+
+ if(16 == ps_cu_tree_analyse->u1_cu_size)
+ {
+ block_data_16x16_t *ps_data = &ps_cu_me_data->as_block_data[i4_16x16_id];
+
+ if(ps_cu_tree_analyse->u1_inter_eval_enable)
+ {
+ ihevce_populate_cu_struct(
+ ps_ctxt,
+ ps_cur_ipe_ctb,
+ ps_cu_tree_analyse,
+ ps_data->as_best_results,
+ ps_cu_analyse,
+ i4_32x32_id,
+#if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
+ ps_cu_prms->u1_is_cu_noisy,
+#endif
+ ps_data->num_best_results);
+ }
+ else
+ {
+ ps_cu_analyse->u1_num_inter_cands = 0;
+ ps_cu_analyse->u1_best_is_intra = 1;
+ }
+ }
+ else /* If CU size is 8 */
+ {
+ block_data_8x8_t *ps_data = &ps_cu_me_data->as_8x8_block_data[i4_8x8_id];
+
+ if(ps_cu_tree_analyse->u1_inter_eval_enable)
+ {
+ ihevce_populate_cu_struct(
+ ps_ctxt,
+ ps_cur_ipe_ctb,
+ ps_cu_tree_analyse,
+ ps_data->as_best_results,
+ ps_cu_analyse,
+ i4_32x32_id,
+#if DISABLE_INTRA_WHEN_NOISY && USE_NOISE_TERM_IN_ENC_LOOP
+ ps_cu_prms->u1_is_cu_noisy,
+#endif
+ ps_data->num_best_results);
+ }
+ else
+ {
+ ps_cu_analyse->u1_num_inter_cands = 0;
+ ps_cu_analyse->u1_best_is_intra = 1;
+ }
+ }
+ }
+ else
+ {
+ ps_cu_analyse->u1_num_inter_cands = 0;
+ ps_cu_analyse->u1_best_is_intra = 1;
+ }
+ }
+ else
+ {
+ ps_cu_analyse->u1_num_inter_cands = 0;
+ ps_cu_analyse->u1_best_is_intra = 1;
+ }
+
+ if(!ps_ctxt->i1_cu_qp_delta_enable)
+ {
+ ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_frame_qp;
+
+ /*cu qp must be populated in cu_analyse_t struct*/
+ ps_ctxt->i4_cu_qp = ps_cu_analyse->i1_cu_qp;
+ }
+ else
+ {
+ ASSERT(ps_cu_analyse->i4_act_factor[0] > 0);
+ ASSERT(
+ ((ps_cu_analyse->i4_act_factor[1] > 0) && (ps_cu_analyse->u1_cu_size != 8)) ||
+ ((ps_cu_analyse->u1_cu_size == 8)));
+ ASSERT(
+ ((ps_cu_analyse->i4_act_factor[2] > 0) && (ps_cu_analyse->u1_cu_size == 32)) ||
+ ((ps_cu_analyse->u1_cu_size != 32)));
+ }
+
+ if(ps_ctxt->u1_disable_intra_eval)
+ {
+ /* rdopt evaluation of intra disabled as inter is clear winner */
+ ps_cu_analyse->u1_num_intra_rdopt_cands = 0;
+
+ /* all the modes invalidated */
+ ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
+ ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
+ ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][0] = 255;
+ ps_cu_analyse->u1_chroma_intra_pred_mode = 255;
+
+ /* no intra candt to verify */
+ ps_cu_analyse->s_cu_intra_cand.b6_num_intra_cands = 0;
+ }
+
+#if DISABLE_L2_IPE_IN_PB_L1_IN_B
+ if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6) && (ps_cu_analyse->u1_cu_size == 32) &&
+ (ps_ctxt->i1_slice_type != ISLICE))
+ {
+ /* rdopt evaluation of intra disabled as inter is clear winner */
+ ps_cu_analyse->u1_num_intra_rdopt_cands = 0;
+
+ /* all the modes invalidated */
+ ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
+ ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] = 255;
+ ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][0] = 255;
+ ps_cu_analyse->u1_chroma_intra_pred_mode = 255;
+
+ /* no intra candt to verify */
+ ps_cu_analyse->s_cu_intra_cand.b6_num_intra_cands = 0;
+ }
+#endif
+
+ if(DISABLE_INTRA_WHEN_NOISY && ps_cu_prms->u1_is_cu_noisy)
+ {
+ ps_cu_analyse->u1_num_intra_rdopt_cands = 0;
+ }
+
+ if(ps_cu_analyse->u1_num_intra_rdopt_cands || ps_cu_tree_analyse->u1_intra_eval_enable)
+ {
+ ihevce_intra_mode_populator(
+ &ps_cu_analyse->s_cu_intra_cand,
+ ps_cur_ipe_ctb,
+ ps_cu_tree_analyse,
+ ps_ctxt->i1_slice_type,
+ ps_ctxt->i4_quality_preset);
+
+ ps_cu_analyse->u1_num_intra_rdopt_cands = 1;
+ }
+
+ ASSERT(!!ps_cu_analyse->u1_num_intra_rdopt_cands || ps_cu_analyse->u1_num_inter_cands);
+
+ if(ps_ctxt->u1_use_top_at_ctb_boundary)
+ {
+ /* Wait till top data is ready */
+ /* Currently checking till top right CU */
+ curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
+
+ if(0 == ps_cu_analyse->b3_cu_pos_y)
+ {
+ ihevce_dmgr_chk_row_row_sync(
+ ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
+ curr_cu_pos_in_row,
+ cu_top_right_offset,
+ cu_top_right_dep_pos,
+ ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
+ ps_ctxt->thrd_id);
+ }
+ }
+
+#if !DISABLE_TOP_SYNC
+ {
+ if(0 == ps_cu_analyse->b3_cu_pos_y)
+ {
+ if((0 == i4_ctb_x_off) && (i4_ctb_y_off != 0))
+ {
+ if(ps_cu_analyse->b3_cu_pos_x == 0)
+ {
+ if(!ps_ctxt->u1_use_top_at_ctb_boundary)
+ {
+ /* Wait till top data is ready */
+ /* Currently checking till top right CU */
+ curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
+
+ if(0 == ps_cu_analyse->b3_cu_pos_y)
+ {
+ ihevce_dmgr_chk_row_row_sync(
+ ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
+ curr_cu_pos_in_row,
+ cu_top_right_offset,
+ cu_top_right_dep_pos,
+ ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
+ ps_ctxt->thrd_id);
+ }
+ }
+
+ ihevce_entropy_rdo_copy_states(
+ &ps_ctxt->s_rdopt_entropy_ctxt,
+ ps_ctxt->pu1_top_rt_cabac_state,
+ UPDATE_ENT_SYNC_RDO_STATE);
+ }
+ }
+ }
+ }
+#else
+ {
+ if((0 == ps_cu_analyse->b3_cu_pos_y) && (IHEVCE_QUALITY_P6 != ps_ctxt->i4_quality_preset))
+ {
+ if((0 == i4_ctb_x_off) && (i4_ctb_y_off != 0))
+ {
+ if(ps_cu_analyse->b3_cu_pos_x == 0)
+ {
+ if(!ps_ctxt->u1_use_top_at_ctb_boundary)
+ {
+ /* Wait till top data is ready */
+ /* Currently checking till top right CU */
+ curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
+
+ if(0 == ps_cu_analyse->b3_cu_pos_y)
+ {
+ ihevce_dmgr_chk_row_row_sync(
+ ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
+ curr_cu_pos_in_row,
+ cu_top_right_offset,
+ cu_top_right_dep_pos,
+ ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
+ ps_ctxt->thrd_id);
+ }
+ }
+
+ ihevce_entropy_rdo_copy_states(
+ &ps_ctxt->s_rdopt_entropy_ctxt,
+ ps_ctxt->pu1_top_rt_cabac_state,
+ UPDATE_ENT_SYNC_RDO_STATE);
+ }
+ }
+ }
+ else if((0 == ps_cu_analyse->b3_cu_pos_y) && (IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset))
+ {
+ UWORD8 u1_cabac_init_idc;
+ WORD8 i1_cabac_init_flag =
+ ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt->ps_slice_hdr->i1_cabac_init_flag;
+
+ if(ps_ctxt->i1_slice_type == ISLICE)
+ {
+ u1_cabac_init_idc = 0;
+ }
+ else if(ps_ctxt->i1_slice_type == PSLICE)
+ {
+ u1_cabac_init_idc = i1_cabac_init_flag ? 2 : 1;
+ }
+ else
+ {
+ u1_cabac_init_idc = i1_cabac_init_flag ? 1 : 2;
+ }
+
+ ihevce_entropy_rdo_copy_states(
+ &ps_ctxt->s_rdopt_entropy_ctxt,
+ (UWORD8 *)gau1_ihevc_cab_ctxts[u1_cabac_init_idc][ps_ctxt->i4_frame_qp],
+ UPDATE_ENT_SYNC_RDO_STATE);
+ }
+ }
+#endif
+
+ /*2 Multi- dimensinal array based on trans size of rounding factor to be added here */
+ /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */
+ /* Currently the complete array will contain only single value*/
+ /*The rounding factor is calculated with the formula
+ Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2
+ rounding factor = (1 - DeadZone Val)
+
+ Assumption: Cabac states of All the sub-blocks in the TU are considered independent
+ */
+
+ /*As long as coef level rdoq is enabled perform this operation */
+ is_first_cu_in_ctb = ((0 == ps_cu_analyse->b3_cu_pos_x) && (0 == ps_cu_analyse->b3_cu_pos_y));
+ is_ctb_level_quant_rounding =
+ ((ps_ctxt->i4_quant_rounding_level == CTB_LEVEL_QUANT_ROUNDING) &&
+ (1 == is_first_cu_in_ctb));
+ is_nctb_level_quant_rounding =
+ ((ps_ctxt->i4_quant_rounding_level == NCTB_LEVEL_QUANT_ROUNDING) &&
+ (1 == is_first_cu_in_ctb) && (((i4_ctb_x_off >> 6) % NUM_CTB_QUANT_ROUNDING) == 0));
+
+ if((ps_ctxt->i4_quant_rounding_level == CU_LEVEL_QUANT_ROUNDING) ||
+ (ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) ||
+ (1 == is_ctb_level_quant_rounding) || (1 == is_nctb_level_quant_rounding))
+ {
+ double i4_lamda_modifier, i4_lamda_modifier_uv;
+ WORD32 trans_size, trans_size_cr;
+ trans_size = ps_cu_analyse->u1_cu_size;
+
+ if((1 == is_ctb_level_quant_rounding) || (1 == is_nctb_level_quant_rounding))
+ {
+ trans_size = MAX_TU_SIZE;
+ }
+ else
+ {
+ if(ps_cu_analyse->u1_cu_size == 64)
+ {
+ trans_size >>= 1;
+ }
+ }
+
+ /*Chroma trans size = half of luma trans size */
+ trans_size_cr = trans_size >> 1;
+
+ if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id))
+ {
+ i4_lamda_modifier = ps_ctxt->i4_lamda_modifier *
+ CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
+ i4_lamda_modifier_uv =
+ ps_ctxt->i4_uv_lamda_modifier *
+ CLIP3((((double)(ps_ctxt->i4_chrm_cu_qp - 12)) / 6.0), 2.00, 4.00);
+ }
+ else
+ {
+ i4_lamda_modifier = ps_ctxt->i4_lamda_modifier;
+ i4_lamda_modifier_uv = ps_ctxt->i4_uv_lamda_modifier;
+ }
+ if(ps_ctxt->i4_use_const_lamda_modifier)
+ {
+ if(ISLICE == ps_ctxt->i1_slice_type)
+ {
+ i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
+ i4_lamda_modifier_uv = ps_ctxt->f_i_pic_lamda_modifier;
+ }
+ else
+ {
+ i4_lamda_modifier = CONST_LAMDA_MOD_VAL;
+ i4_lamda_modifier_uv = CONST_LAMDA_MOD_VAL;
+ }
+ }
+
+ do
+ {
+ memset(
+ ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3],
+ 0,
+ trans_size * trans_size * sizeof(WORD32));
+ memset(
+ ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3],
+ 0,
+ trans_size * trans_size * sizeof(WORD32));
+
+ /*ps_ctxt->i4_quant_rnd_factor[intra_flag], is currently not used */
+ ihevce_quant_rounding_factor_gen(
+ trans_size,
+ 1, //is_luma = 1
+ &ps_ctxt->s_rdopt_entropy_ctxt,
+ ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3],
+ ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3],
+ i4_lamda_modifier,
+ 0); //is_tu_level_quant rounding = 0
+
+ trans_size = trans_size >> 1;
+
+ } while(trans_size >= 4);
+
+ /*CHROMA Quant Rounding is to be enabled with CU/TU/CTB/NCTB Luma rounding */
+ /*Please note chroma is calcualted only for 1st TU at TU level Rounding */
+ if(ps_ctxt->i4_chroma_quant_rounding_level == CHROMA_QUANT_ROUNDING)
+ {
+ do
+ {
+ memset(
+ ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size_cr >> 3],
+ 0,
+ trans_size_cr * trans_size_cr * sizeof(WORD32));
+ memset(
+ ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size_cr >> 3],
+ 0,
+ trans_size_cr * trans_size_cr * sizeof(WORD32));
+
+ ihevce_quant_rounding_factor_gen(
+ trans_size_cr,
+ 0, //is_luma = 0
+ &ps_ctxt->s_rdopt_entropy_ctxt,
+ ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size_cr >> 3],
+ ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size_cr >> 3],
+ i4_lamda_modifier_uv,
+ 0); //is_tu_level_quant rounding = 0
+
+ trans_size_cr = trans_size_cr >> 1;
+
+ } while(trans_size_cr >= 4);
+ }
+ }
+
+#if DISABLE_INTRAS_IN_BPIC
+ if((ps_ctxt->i1_slice_type == BSLICE) && (ps_cu_analyse->u1_num_inter_cands))
+ {
+ ps_cu_analyse->u1_num_intra_rdopt_cands = 0;
+ }
+#endif
+
+ rdopt_best_cost = ihevce_cu_mode_decide(
+ ps_ctxt,
+ ps_cu_prms,
+ ps_cu_analyse,
+ ps_final_mode_state,
+ pu1_ecd_data,
+ ps_col_pu,
+ pu1_col_pu_map,
+ col_start_pu_idx);
+
+ return rdopt_best_cost;
+}
+
+/**
+******************************************************************************
+* \if Function name : ihevce_enc_loop_cu_bot_copy \endif
+*
+* \brief
+* This function copy the bottom data at CU level to row buffers
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+* \return
+*
+* List of Functions
+*
+*
+******************************************************************************
+*/
+void ihevce_enc_loop_cu_bot_copy(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ enc_loop_cu_prms_t *ps_cu_prms,
+ ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt,
+ WORD32 curr_cu_pos_in_row,
+ WORD32 curr_cu_pos_in_ctb)
+{
+ /* ---------------------------------------------- */
+ /* copy the bottom row data to the row buffers */
+ /* ---------------------------------------------- */
+ nbr_4x4_t *ps_top_nbr;
+ UWORD8 *pu1_buff;
+ UWORD8 *pu1_luma_top, *pu1_chrm_top;
+ WORD32 nbr_strd;
+
+ WORD32 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
+
+ /* derive the appropraite pointers */
+ pu1_luma_top = (UWORD8 *)ps_ctxt->pv_bot_row_luma + curr_cu_pos_in_row;
+ pu1_chrm_top = (UWORD8 *)ps_ctxt->pv_bot_row_chroma + curr_cu_pos_in_row;
+ ps_top_nbr = ps_ctxt->ps_bot_row_nbr + (curr_cu_pos_in_row >> 2);
+ nbr_strd = ps_cu_prms->i4_ctb_size >> 2;
+
+ /* copy bottom luma data */
+ pu1_buff = ps_cu_prms->pu1_luma_recon +
+ (ps_cu_prms->i4_luma_recon_stride * (ps_cu_prms->i4_ctb_size - 1));
+
+ pu1_buff += curr_cu_pos_in_ctb;
+
+ memcpy(pu1_luma_top, pu1_buff, ps_enc_out_ctxt->u1_cu_size);
+
+ /* copy bottom chroma data cb and cr pixel interleaved */
+ pu1_buff = ps_cu_prms->pu1_chrm_recon + (ps_cu_prms->i4_chrm_recon_stride *
+ ((ps_cu_prms->i4_ctb_size >> (0 == u1_is_422)) - 1));
+
+ pu1_buff += curr_cu_pos_in_ctb;
+
+ memcpy(pu1_chrm_top, pu1_buff, ps_enc_out_ctxt->u1_cu_size);
+
+ /* store the nbr 4x4 data at cu level */
+ {
+ nbr_4x4_t *ps_nbr;
+
+ /* copy bottom nbr data */
+ ps_nbr = &ps_ctxt->as_ctb_nbr_arr[0];
+ ps_nbr += ((ps_cu_prms->i4_ctb_size >> 2) - 1) * nbr_strd;
+
+ ps_nbr += (curr_cu_pos_in_ctb >> 2);
+
+ memcpy(ps_top_nbr, ps_nbr, (ps_enc_out_ctxt->u1_cu_size >> 2) * sizeof(nbr_4x4_t));
+ }
+ return;
+}
+
+/**
+******************************************************************************
+* \if Function name : ihevce_update_final_cu_results \endif
+*
+* \brief
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_update_final_cu_results(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt,
+ enc_loop_cu_prms_t *ps_cu_prms,
+ pu_col_mv_t **pps_row_col_pu,
+ WORD32 *pi4_col_pu_map_idx,
+ cu_final_update_prms *ps_cu_update_prms,
+ WORD32 ctb_ctr,
+ WORD32 vert_ctb_ctr)
+{
+ WORD32 curr_cu_pos_in_row;
+
+ cu_enc_loop_out_t *ps_cu_final = *ps_cu_update_prms->pps_cu_final;
+ pu_t **pps_row_pu = ps_cu_update_prms->pps_row_pu;
+ tu_enc_loop_out_t **pps_row_tu = ps_cu_update_prms->pps_row_tu;
+ UWORD8 **ppu1_row_ecd_data = ps_cu_update_prms->ppu1_row_ecd_data;
+ WORD32 *pi4_num_pus_in_ctb = ps_cu_update_prms->pi4_num_pus_in_ctb;
+ UWORD32 u4_cu_size = ps_enc_out_ctxt->u1_cu_size;
+ ps_cu_final->b3_cu_pos_x = ps_enc_out_ctxt->b3_cu_pos_x;
+ ps_cu_final->b3_cu_pos_y = ps_enc_out_ctxt->b3_cu_pos_y;
+
+ ps_cu_final->b4_cu_size = ps_enc_out_ctxt->u1_cu_size >> 3;
+
+ /* store the current pu and tu pointes */
+ ps_cu_final->ps_pu = *pps_row_pu;
+ ps_cu_final->ps_enc_tu = *pps_row_tu;
+ curr_cu_pos_in_row = ctb_ctr * ps_cu_prms->i4_ctb_size + (ps_cu_final->b3_cu_pos_x << 3);
+
+ ihevce_store_cu_final(ps_ctxt, ps_cu_final, *ppu1_row_ecd_data, ps_enc_out_ctxt, ps_cu_prms);
+
+ if(NULL != pps_row_col_pu)
+ {
+ (*pps_row_col_pu) += ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
+ }
+ if(NULL != pi4_col_pu_map_idx)
+ {
+ (*pi4_col_pu_map_idx) += ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
+ }
+ (*pi4_num_pus_in_ctb) += ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
+ (*pps_row_tu) += ps_cu_final->u2_num_tus_in_cu;
+ (*pps_row_pu) += ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
+ (*ppu1_row_ecd_data) += ps_enc_out_ctxt->ps_cu_prms->i4_num_bytes_ecd_data;
+
+ (*ps_cu_update_prms->pps_cu_final)++;
+ (*ps_cu_update_prms->pu1_num_cus_in_ctb_out)++;
+
+ /* Updated for each CU in bottom row of CTB */
+ if(((ps_cu_final->b3_cu_pos_y << 3) + u4_cu_size) == ps_ctxt->u4_cur_ctb_ht)
+ {
+ /* copy the bottom data to row buffers */
+ ((pf_enc_loop_cu_bot_copy)ps_ctxt->pv_enc_loop_cu_bot_copy)(
+ ps_ctxt,
+ ps_cu_prms,
+ ps_enc_out_ctxt,
+ curr_cu_pos_in_row,
+ (ps_enc_out_ctxt->b3_cu_pos_x << 3));
+
+ /* Setting Dependency for CU TopRight */
+ ihevce_dmgr_set_row_row_sync(
+ ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
+ (curr_cu_pos_in_row + ps_enc_out_ctxt->u1_cu_size),
+ vert_ctb_ctr,
+ ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
+
+ /* Setting Dependency for Entropy to consume is made at CTB level */
+ }
+}
+
+/**
+******************************************************************************
+* \if Function name : ihevce_cu_recurse_decide \endif
+*
+* \brief
+* Coding Unit mode decide function. Performs RD opt and decides the best mode
+*
+* \param[in] pv_ctxt : pointer to enc_loop module
+* \param[in] ps_cu_prms : pointer to coding unit params (position, buffer pointers)
+* \param[in] ps_cu_analyse : pointer to cu analyse
+* \param[out] ps_cu_final : pointer to cu final
+* \param[out] pu1_ecd_data :pointer to store coeff data for ECD
+* \param[out]ps_row_col_pu; colocated pu buffer pointer
+* \param[out]pu1_row_pu_map; colocated pu map buffer pointer
+* \param[in]col_start_pu_idx : pu index start value
+*
+* \return
+* None
+*
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32 ihevce_cu_recurse_decide(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ enc_loop_cu_prms_t *ps_cu_prms,
+ cur_ctb_cu_tree_t *ps_cu_tree_analyse,
+ cur_ctb_cu_tree_t *ps_cu_tree_analyse_parent,
+ ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
+ me_ctb_data_t *ps_cu_me_data,
+ pu_col_mv_t **pps_col_pu,
+ cu_final_update_prms *ps_cu_update_prms,
+ UWORD8 *pu1_col_pu_map,
+ WORD32 *pi4_col_start_pu_idx,
+ WORD32 i4_tree_depth,
+ WORD32 i4_ctb_x_off,
+ WORD32 i4_ctb_y_off,
+ WORD32 cur_ctb_ht)
+{
+ cur_ctb_cu_tree_t *ps_cu_tree_analyse_child[4];
+ final_mode_state_t s_final_mode_state;
+
+ WORD32 i;
+ WORD32 child_nodes_null;
+ LWORD64 i8_least_child_cost;
+
+ WORD32 num_children_encoded = 0;
+
+ /* Take backup of collocated start PU index for parent node rdo for PQ */
+ WORD32 i4_col_pu_idx_bkup = *pi4_col_start_pu_idx;
+ pu_col_mv_t *ps_col_mv_bkup = *pps_col_pu;
+
+#if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
+ WORD32 x0_frm = i4_ctb_x_off + (ps_cu_tree_analyse->b3_cu_pos_x << 3);
+ WORD32 y0_frm = i4_ctb_y_off + (ps_cu_tree_analyse->b3_cu_pos_y << 3);
+ WORD32 pic_wd = ps_ctxt->s_sao_ctxt_t.ps_sps->i2_pic_width_in_luma_samples;
+ WORD32 pic_ht = ps_ctxt->s_sao_ctxt_t.ps_sps->i2_pic_height_in_luma_samples;
+ WORD32 log2_min_cb_size = ps_ctxt->s_sao_ctxt_t.ps_sps->i1_log2_min_coding_block_size;
+ WORD32 cu_size = ps_cu_tree_analyse->u1_cu_size;
+
+ /* bits for coding split_cu_flag = 1 */
+ WORD32 split_cu1_bits_q12 = 0;
+
+ /* bits for coding split_cu_flag = 0 */
+ WORD32 split_cu0_bits_q12 = 0;
+#endif
+
+ UWORD8 u1_is_cu_noisy = ps_ctxt->u1_is_stasino_enabled
+ ? ihevce_determine_cu_noise_based_on_8x8Blk_data(
+ ps_cu_prms->pu1_is_8x8Blk_noisy,
+ ((ps_cu_tree_analyse->b3_cu_pos_x << 3) >> 4) << 4,
+ ((ps_cu_tree_analyse->b3_cu_pos_y << 3) >> 4) << 4,
+ MAX(16, ps_cu_tree_analyse->u1_cu_size))
+ : 0;
+
+#if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
+ LWORD64 i8_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
+#endif
+
+ (void)ps_cu_tree_analyse_parent;
+
+#if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
+ if(!ps_ctxt->u1_enable_psyRDOPT && u1_is_cu_noisy)
+ {
+ ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
+ ps_ctxt->i8_cl_ssd_lambda_chroma_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
+ }
+#endif
+
+ if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
+ {
+ i8_lambda_qf = ((float)i8_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
+ }
+
+ ps_cu_tree_analyse_child[0] = ps_cu_tree_analyse->ps_child_node_tl;
+ ps_cu_tree_analyse_child[1] = ps_cu_tree_analyse->ps_child_node_tr;
+ ps_cu_tree_analyse_child[2] = ps_cu_tree_analyse->ps_child_node_bl;
+ ps_cu_tree_analyse_child[3] = ps_cu_tree_analyse->ps_child_node_br;
+
+ child_nodes_null =
+ ((ps_cu_tree_analyse_child[0] == NULL) + (ps_cu_tree_analyse_child[1] == NULL) +
+ (ps_cu_tree_analyse_child[2] == NULL) + (ps_cu_tree_analyse_child[3] == NULL));
+
+#if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
+#if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
+ if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
+#endif
+ {
+ /*----------------------------------------------*/
+ /* ---------- CU Depth Bit Estimation --------- */
+ /*----------------------------------------------*/
+
+ /* Encode cu split flags based on following conditions; See section 7.3.8*/
+ if(((x0_frm + cu_size) <= pic_wd) && ((y0_frm + cu_size) <= pic_ht) &&
+ (cu_size > (1 << log2_min_cb_size))) /* &&(ps_entropy_ctxt->i1_ctb_num_pcm_blks == 0)) */
+ {
+ WORD32 left_cu_depth = 0;
+ WORD32 top_cu_depth = 0;
+ WORD32 pos_x_4x4 = ps_cu_tree_analyse->b3_cu_pos_x << 1;
+ WORD32 pos_y_4x4 = ps_cu_tree_analyse->b3_cu_pos_y << 1;
+ WORD32 num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2);
+ WORD32 cur_4x4_in_ctb = pos_x_4x4 + (pos_y_4x4 * num_4x4_in_ctb);
+ UWORD8 u1_split_cu_flag_cab_model;
+ WORD32 split_cu_ctxt_inc;
+
+ /* Left and Top CU depth is required for cabac context */
+
+ /* CU left */
+ if(0 == pos_x_4x4)
+ {
+ /* CTB boundary */
+ if(i4_ctb_x_off)
+ {
+ left_cu_depth = ps_ctxt->as_left_col_nbr[pos_y_4x4].b2_cu_depth;
+ }
+ }
+ else
+ {
+ /* inside CTB */
+ left_cu_depth = ps_ctxt->as_ctb_nbr_arr[cur_4x4_in_ctb - 1].b2_cu_depth;
+ }
+
+ /* CU top */
+ if(0 == pos_y_4x4)
+ {
+ /* CTB boundary */
+ if(i4_ctb_y_off)
+ {
+ /* Wait till top cu depth is available */
+ ihevce_dmgr_chk_row_row_sync(
+ ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
+ (i4_ctb_x_off) + (pos_x_4x4 << 2),
+ 4,
+ ((i4_ctb_y_off >> 6) - 1),
+ ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
+ ps_ctxt->thrd_id);
+
+ top_cu_depth =
+ ps_ctxt->ps_top_row_nbr[(i4_ctb_x_off >> 2) + pos_x_4x4].b2_cu_depth;
+ }
+ }
+ else
+ {
+ /* inside CTB */
+ top_cu_depth = ps_ctxt->as_ctb_nbr_arr[cur_4x4_in_ctb - num_4x4_in_ctb].b2_cu_depth;
+ }
+
+ split_cu_ctxt_inc = IHEVC_CAB_SPLIT_CU_FLAG + (left_cu_depth > i4_tree_depth) +
+ (top_cu_depth > i4_tree_depth);
+
+ u1_split_cu_flag_cab_model =
+ ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][split_cu_ctxt_inc];
+
+ /* bits for coding split_cu_flag = 1 */
+ split_cu1_bits_q12 = gau2_ihevce_cabac_bin_to_bits[u1_split_cu_flag_cab_model ^ 1];
+
+ /* bits for coding split_cu_flag = 0 */
+ split_cu0_bits_q12 = gau2_ihevce_cabac_bin_to_bits[u1_split_cu_flag_cab_model ^ 0];
+
+ /* update the cu split cabac context of all child nodes before evaluating child */
+ for(i = (i4_tree_depth + 1); i < 4; i++)
+ {
+ ps_ctxt->au1_rdopt_recur_ctxt_models[i][split_cu_ctxt_inc] =
+ gau1_ihevc_next_state[(u1_split_cu_flag_cab_model << 1) | 1];
+ }
+
+ /* update the cu split cabac context of the parent node with split flag = 0 */
+ ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][split_cu_ctxt_inc] =
+ gau1_ihevc_next_state[(u1_split_cu_flag_cab_model << 1) | 0];
+ }
+ }
+#endif
+
+ /* If all the child nodes are null, then do rdo for this node and return the cost */
+ if((1 == ps_cu_tree_analyse->is_node_valid) && (4 == child_nodes_null))
+ {
+ WORD32 i4_num_bytes_ecd_data;
+
+#if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
+ COPY_CABAC_STATES(
+ &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
+ &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
+ IHEVC_CAB_CTXT_END * sizeof(UWORD8));
+#else
+ if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
+ {
+ COPY_CABAC_STATES(
+ &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
+ &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
+ IHEVC_CAB_CTXT_END * sizeof(UWORD8));
+ }
+#endif
+
+ ps_cu_prms->u1_is_cu_noisy = u1_is_cu_noisy;
+ ihevce_update_pred_qp(
+ ps_ctxt, ps_cu_tree_analyse->b3_cu_pos_x, ps_cu_tree_analyse->b3_cu_pos_y);
+ /* DO rdo for current node here */
+ /* return rdo cost for current node*/
+ ps_cu_tree_analyse->i8_best_rdopt_cost = ihevce_compute_rdo(
+ ps_ctxt,
+ ps_cu_prms,
+ ps_cu_tree_analyse,
+ ps_cur_ipe_ctb,
+ ps_cu_me_data,
+ *pps_col_pu,
+ &s_final_mode_state,
+ pu1_col_pu_map,
+ *ps_cu_update_prms->ppu1_row_ecd_data,
+ *pi4_col_start_pu_idx,
+ i4_ctb_x_off,
+ i4_ctb_y_off);
+
+ if((((ps_cu_tree_analyse->b3_cu_pos_y << 3) + ps_cu_tree_analyse->u1_cu_size) ==
+ cur_ctb_ht) &&
+ (ps_cu_tree_analyse->b3_cu_pos_x == 0) && (i4_ctb_x_off == 0))
+ {
+ /* copy the state to row level context after 1st Cu, in the Last CU row of CTB */
+ /* copy current ctb CU states into a entropy sync state */
+ /* to be used for next row */
+ COPY_CABAC_STATES(
+ ps_ctxt->pu1_curr_row_cabac_state,
+ &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
+ IHEVC_CAB_CTXT_END * sizeof(UWORD8));
+ }
+
+#if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
+ {
+#if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
+ /* Add parent split cu = 0 cost signalling */
+ ps_cu_tree_analyse->i8_best_rdopt_cost += COMPUTE_RATE_COST_CLIP30(
+ split_cu0_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
+#endif
+ for(i = (i4_tree_depth); i < 4; i++)
+ {
+ COPY_CABAC_STATES(
+ &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
+ &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
+ IHEVC_CAB_CTXT_END * sizeof(UWORD8));
+ }
+ }
+#else
+ if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
+ {
+#if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
+ /* Add parent split cu = 0 cost signalling */
+ ps_cu_tree_analyse->i8_best_rdopt_cost += COMPUTE_RATE_COST_CLIP30(
+ split_cu0_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
+#endif
+
+ for(i = (i4_tree_depth); i < 4; i++)
+ {
+ COPY_CABAC_STATES(
+ &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
+ &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
+ IHEVC_CAB_CTXT_END * sizeof(UWORD8));
+ }
+ }
+#endif
+
+ ((pf_store_cu_results)ps_ctxt->pv_store_cu_results)(
+ ps_ctxt, ps_cu_prms, &s_final_mode_state);
+
+#if(!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
+ if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P2)
+ {
+ ihevce_update_final_cu_results(
+ ps_ctxt,
+ ps_ctxt->ps_enc_out_ctxt,
+ ps_cu_prms,
+ pps_col_pu,
+ pi4_col_start_pu_idx,
+ ps_cu_update_prms,
+ i4_ctb_x_off >> 6,
+ i4_ctb_y_off >> 6);
+ }
+ else
+ {
+ /* ---- copy the luma & chroma coeffs to final output -------- */
+ i4_num_bytes_ecd_data = ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->i4_num_bytes_ecd_data;
+
+ if(0 != i4_num_bytes_ecd_data)
+ {
+ memcpy(
+ ps_ctxt->pu1_ecd_data,
+ &ps_ctxt->pu1_cu_recur_coeffs[0],
+ i4_num_bytes_ecd_data * sizeof(UWORD8));
+
+ ps_ctxt->pu1_ecd_data += i4_num_bytes_ecd_data;
+ }
+
+ /* Collocated PU updates */
+ *pps_col_pu += ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
+ *pi4_col_start_pu_idx += ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
+ }
+#else
+ /* ---- copy the luma & chroma coeffs to final output -------- */
+ i4_num_bytes_ecd_data = ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->i4_num_bytes_ecd_data;
+ if(0 != i4_num_bytes_ecd_data)
+ {
+ memcpy(
+ ps_ctxt->pu1_ecd_data,
+ &ps_ctxt->pu1_cu_recur_coeffs[0],
+ i4_num_bytes_ecd_data * sizeof(UWORD8));
+
+ ps_ctxt->pu1_ecd_data += i4_num_bytes_ecd_data;
+ }
+
+ /* Collocated PU updates */
+ *pps_col_pu += ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
+ *pi4_col_start_pu_idx += ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
+#endif
+
+ ps_ctxt->ps_enc_out_ctxt++;
+ num_children_encoded++;
+ }
+ else
+ {
+ i8_least_child_cost = 0;
+
+ for(i = 0; i < 4; i++)
+ {
+ if(ps_cu_tree_analyse_child[i] != NULL)
+ {
+ num_children_encoded += ihevce_cu_recurse_decide(
+ ps_ctxt,
+ ps_cu_prms,
+ ps_cu_tree_analyse_child[i],
+ ps_cu_tree_analyse,
+ ps_cur_ipe_ctb,
+ ps_cu_me_data,
+ pps_col_pu,
+ ps_cu_update_prms,
+ pu1_col_pu_map,
+ pi4_col_start_pu_idx,
+ i4_tree_depth + 1,
+ i4_ctb_x_off,
+ i4_ctb_y_off,
+ cur_ctb_ht);
+
+ /* In case of incomplete ctb, */
+ //if(MAX_COST != ps_cu_tree_analyse_child[i]->i4_best_rdopt_cost)
+ if(((ULWORD64)(
+ i8_least_child_cost + ps_cu_tree_analyse_child[i]->i8_best_rdopt_cost)) >
+ MAX_COST_64)
+ {
+ i8_least_child_cost = MAX_COST_64;
+ }
+ else
+ {
+ i8_least_child_cost += ps_cu_tree_analyse_child[i]->i8_best_rdopt_cost;
+ }
+ }
+ else
+ {
+ /* If the child node is NULL, return MAX_COST*/
+ i8_least_child_cost = MAX_COST_64;
+ }
+ }
+
+ if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
+ {
+#if !ENABLE_4CTB_EVALUATION
+ if((ps_cu_tree_analyse->u1_cu_size == 64) && (num_children_encoded > 10) &&
+ (ps_ctxt->i1_slice_type != ISLICE))
+ {
+ ps_cu_tree_analyse->is_node_valid = 0;
+ }
+#endif
+ }
+
+ /* If current CU node is valid, do rdo for the node and decide btwn child nodes and parent nodes */
+ if(ps_cu_tree_analyse->is_node_valid)
+ {
+ UWORD8 au1_cu_pu_map[(MAX_CTB_SIZE / MIN_PU_SIZE) * (MAX_CTB_SIZE / MIN_PU_SIZE)];
+ pu_col_mv_t as_col_mv[2]; /* Max of 2 PUs only per CU */
+
+ WORD32 i4_col_pu_idx_start = i4_col_pu_idx_bkup;
+
+ /* Copy the collocated PU map to the local array */
+ memcpy(
+ au1_cu_pu_map,
+ pu1_col_pu_map,
+ (MAX_CTB_SIZE / MIN_PU_SIZE) * (MAX_CTB_SIZE / MIN_PU_SIZE));
+
+#if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
+ COPY_CABAC_STATES(
+ &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
+ &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
+ IHEVC_CAB_CTXT_END * sizeof(UWORD8));
+
+ /* Reset the nbr maps while computing Parent CU node ()*/
+ /* set the neighbour map to 0 */
+ ihevce_set_nbr_map(
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ (ps_cu_tree_analyse->b3_cu_pos_x << 1),
+ (ps_cu_tree_analyse->b3_cu_pos_y << 1),
+ (ps_cu_tree_analyse->u1_cu_size >> 2),
+ 0);
+#else
+ if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
+ {
+ COPY_CABAC_STATES(
+ &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
+ &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
+ IHEVC_CAB_CTXT_END * sizeof(UWORD8));
+
+ /* Reset the nbr maps while computing Parent CU node ()*/
+ /* set the neighbour map to 0 */
+ ihevce_set_nbr_map(
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ (ps_cu_tree_analyse->b3_cu_pos_x << 1),
+ (ps_cu_tree_analyse->b3_cu_pos_y << 1),
+ (ps_cu_tree_analyse->u1_cu_size >> 2),
+ 0);
+ }
+#endif
+
+ /* Do rdo for the parent node */
+ /* Compare parent node cost vs child node costs */
+ ps_ctxt->is_parent_cu_rdopt = 1;
+
+ ps_cu_prms->u1_is_cu_noisy = u1_is_cu_noisy;
+
+ ihevce_update_pred_qp(
+ ps_ctxt, ps_cu_tree_analyse->b3_cu_pos_x, ps_cu_tree_analyse->b3_cu_pos_y);
+
+ ps_cu_tree_analyse->i8_best_rdopt_cost = ihevce_compute_rdo(
+ ps_ctxt,
+ ps_cu_prms,
+ ps_cu_tree_analyse,
+ ps_cur_ipe_ctb,
+ ps_cu_me_data,
+ as_col_mv,
+ &s_final_mode_state,
+ au1_cu_pu_map,
+ *ps_cu_update_prms->ppu1_row_ecd_data,
+ i4_col_pu_idx_start,
+ i4_ctb_x_off,
+ i4_ctb_y_off);
+
+ ps_ctxt->is_parent_cu_rdopt = 0;
+
+#if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
+ /* Add parent split cu cost signalling */
+ ps_cu_tree_analyse->i8_best_rdopt_cost += COMPUTE_RATE_COST_CLIP30(
+ split_cu0_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
+
+ COPY_CABAC_STATES(
+ &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
+ &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
+ IHEVC_CAB_CTXT_END * sizeof(UWORD8));
+
+ /* i8_least_child_cost += (num_children_encoded * ps_ctxt->i4_sad_lamda\
+ + ((1 << (LAMBDA_Q_SHIFT)))) >> (LAMBDA_Q_SHIFT + 1) */
+ ;
+ /* bits for coding cu split flag as 1 */
+ i8_least_child_cost += COMPUTE_RATE_COST_CLIP30(
+ split_cu1_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
+#else
+#if ENABLE_CU_SPLIT_FLAG_RATE_ESTIMATION
+ if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
+ {
+ /* Add parent split cu cost signalling */
+ ps_cu_tree_analyse->i8_best_rdopt_cost += COMPUTE_RATE_COST_CLIP30(
+ split_cu0_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
+
+ COPY_CABAC_STATES(
+ &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
+ &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
+ IHEVC_CAB_CTXT_END * sizeof(UWORD8));
+
+ /* i8_least_child_cost += (num_children_encoded * ps_ctxt->i4_sad_lamda\
+ + ((1 << (LAMBDA_Q_SHIFT)))) >> (LAMBDA_Q_SHIFT + 1) */
+ ;
+ /* bits for coding cu split flag as 1 */
+ i8_least_child_cost += COMPUTE_RATE_COST_CLIP30(
+ split_cu1_bits_q12, i8_lambda_qf, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
+ }
+#else
+ i8_least_child_cost +=
+ (num_children_encoded * ps_ctxt->i4_sad_lamda + ((1 << (LAMBDA_Q_SHIFT)))) >>
+ (LAMBDA_Q_SHIFT + 1);
+#endif
+#endif
+
+ /* If child modes win over parent, discard parent enc ctxt */
+ /* else discard child ctxt */
+ if(ps_cu_tree_analyse->i8_best_rdopt_cost > i8_least_child_cost)
+ {
+#if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
+ /* Store child node Models for evalution of next CU */
+ for(i = (i4_tree_depth); i < 4; i++)
+ {
+ COPY_CABAC_STATES(
+ &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
+ &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
+ IHEVC_CAB_CTXT_END * sizeof(UWORD8));
+ }
+ /* Reset cabac states if child has won */
+ COPY_CABAC_STATES(
+ &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
+ &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
+ IHEVC_CAB_CTXT_END * sizeof(UWORD8));
+#else
+ if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
+ {
+ for(i = i4_tree_depth; i < 4; i++)
+ {
+ COPY_CABAC_STATES(
+ &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
+ &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
+ IHEVC_CAB_CTXT_END * sizeof(UWORD8));
+ }
+ /* Reset cabac states if child has won */
+ COPY_CABAC_STATES(
+ &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
+ &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
+ IHEVC_CAB_CTXT_END * sizeof(UWORD8));
+ }
+#endif
+ ps_cu_tree_analyse->i8_best_rdopt_cost = i8_least_child_cost;
+ ps_cu_tree_analyse->is_node_valid = 0;
+ }
+ else
+ {
+ /* Parent node wins over child node */
+ ihevce_enc_cu_node_ctxt_t *ps_enc_tmp_out_ctxt;
+ WORD32 i4_num_bytes_ecd_data;
+ WORD32 num_child_nodes = 0;
+ WORD32 i4_num_pus_in_cu;
+
+ if((((ps_cu_tree_analyse->b3_cu_pos_y << 3) + ps_cu_tree_analyse->u1_cu_size) ==
+ cur_ctb_ht) &&
+ (ps_cu_tree_analyse->b3_cu_pos_x == 0) && (i4_ctb_x_off == 0))
+ {
+ /* copy the state to row level context after 1st Cu, in the Last CU row of CTB */
+ /* copy current ctb CU states into a entropy sync state */
+ /* to be used for next row */
+ COPY_CABAC_STATES(
+ ps_ctxt->pu1_curr_row_cabac_state,
+ &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
+ IHEVC_CAB_CTXT_END * sizeof(UWORD8));
+ }
+
+#if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
+ /* Store parent node Models for evalution of next CU */
+ for(i = (i4_tree_depth + 1); i < 4; i++)
+ {
+ COPY_CABAC_STATES(
+ &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
+ &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
+ IHEVC_CAB_CTXT_END * sizeof(UWORD8));
+ }
+#else
+ if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
+ {
+ for(i = (i4_tree_depth + 1); i < 4; i++)
+ {
+ COPY_CABAC_STATES(
+ &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
+ &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth][0],
+ IHEVC_CAB_CTXT_END * sizeof(UWORD8));
+ }
+ }
+#endif
+ ((pf_store_cu_results)ps_ctxt->pv_store_cu_results)(
+ ps_ctxt, ps_cu_prms, &s_final_mode_state);
+
+#if(!PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
+ if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P2)
+ {
+ ihevce_update_final_cu_results(
+ ps_ctxt,
+ ps_ctxt->ps_enc_out_ctxt,
+ ps_cu_prms,
+ pps_col_pu,
+ pi4_col_start_pu_idx,
+ ps_cu_update_prms,
+ i4_ctb_x_off >> 6,
+ i4_ctb_y_off >> 6);
+
+ ps_ctxt->ps_enc_out_ctxt++;
+ }
+ else
+ {
+ ps_enc_tmp_out_ctxt = ps_ctxt->ps_enc_out_ctxt;
+
+ num_child_nodes = num_children_encoded;
+
+ /* ---- copy the luma & chroma coeffs to final output -------- */
+ for(i = 0; i < num_child_nodes; i++)
+ {
+ i4_num_bytes_ecd_data =
+ (ps_ctxt->ps_enc_out_ctxt - i - 1)->ps_cu_prms->i4_num_bytes_ecd_data;
+ ps_ctxt->pu1_ecd_data -= i4_num_bytes_ecd_data;
+ }
+
+ i4_num_bytes_ecd_data =
+ ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->i4_num_bytes_ecd_data;
+ if(0 != i4_num_bytes_ecd_data)
+ {
+ memcpy(
+ ps_ctxt->pu1_ecd_data,
+ &ps_ctxt->pu1_cu_recur_coeffs[0],
+ i4_num_bytes_ecd_data);
+
+ ps_ctxt->pu1_ecd_data += i4_num_bytes_ecd_data;
+ }
+
+ ps_enc_tmp_out_ctxt = ps_ctxt->ps_enc_out_ctxt - num_child_nodes;
+
+ memcpy(
+ ps_enc_tmp_out_ctxt,
+ ps_ctxt->ps_enc_out_ctxt,
+ sizeof(ihevce_enc_cu_node_ctxt_t));
+ ps_enc_tmp_out_ctxt->ps_cu_prms = &ps_enc_tmp_out_ctxt->s_cu_prms;
+
+ /* Collocated PU updates */
+ i4_num_pus_in_cu = ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
+ /* Copy the collocated MVs and the PU map to frame buffers */
+ memcpy(ps_col_mv_bkup, as_col_mv, sizeof(pu_col_mv_t) * i4_num_pus_in_cu);
+ memcpy(
+ pu1_col_pu_map,
+ au1_cu_pu_map,
+ (MAX_CTB_SIZE / MIN_PU_SIZE) * (MAX_CTB_SIZE / MIN_PU_SIZE));
+ /* Update the frame buffer pointer and the map index */
+ *pps_col_pu = ps_col_mv_bkup + i4_num_pus_in_cu;
+ *pi4_col_start_pu_idx = i4_col_pu_idx_bkup + i4_num_pus_in_cu;
+
+ ps_ctxt->ps_enc_out_ctxt = ps_enc_tmp_out_ctxt + 1;
+ }
+#else
+
+ ps_enc_tmp_out_ctxt = ps_ctxt->ps_enc_out_ctxt;
+
+ num_child_nodes = num_children_encoded;
+
+ /* ---- copy the luma & chroma coeffs to final output -------- */
+ for(i = 0; i < num_child_nodes; i++)
+ {
+ i4_num_bytes_ecd_data =
+ (ps_ctxt->ps_enc_out_ctxt - i - 1)->ps_cu_prms->i4_num_bytes_ecd_data;
+ ps_ctxt->pu1_ecd_data -= i4_num_bytes_ecd_data;
+ }
+
+ i4_num_bytes_ecd_data = ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->i4_num_bytes_ecd_data;
+ if(0 != i4_num_bytes_ecd_data)
+ {
+ memcpy(
+ ps_ctxt->pu1_ecd_data,
+ &ps_ctxt->pu1_cu_recur_coeffs[0],
+ i4_num_bytes_ecd_data * sizeof(UWORD8));
+
+ ps_ctxt->pu1_ecd_data += i4_num_bytes_ecd_data;
+ }
+
+ ps_enc_tmp_out_ctxt = ps_ctxt->ps_enc_out_ctxt - num_child_nodes;
+
+ memcpy(
+ ps_enc_tmp_out_ctxt,
+ ps_ctxt->ps_enc_out_ctxt,
+ sizeof(ihevce_enc_cu_node_ctxt_t));
+
+ ps_enc_tmp_out_ctxt->ps_cu_prms = &ps_enc_tmp_out_ctxt->s_cu_prms;
+
+ /* Collocated PU updates */
+ i4_num_pus_in_cu = ps_ctxt->ps_enc_out_ctxt->ps_cu_prms->u2_num_pus_in_cu;
+ /* Copy the collocated MVs and the PU map to frame buffers */
+ memcpy(ps_col_mv_bkup, as_col_mv, sizeof(pu_col_mv_t) * i4_num_pus_in_cu);
+ memcpy(
+ pu1_col_pu_map,
+ au1_cu_pu_map,
+ (MAX_CTB_SIZE / MIN_PU_SIZE) * (MAX_CTB_SIZE / MIN_PU_SIZE));
+ /* Update the frame buffer pointer and the map index */
+ *pps_col_pu = ps_col_mv_bkup + i4_num_pus_in_cu;
+ *pi4_col_start_pu_idx = i4_col_pu_idx_bkup + i4_num_pus_in_cu;
+
+ ps_ctxt->ps_enc_out_ctxt = ps_enc_tmp_out_ctxt + 1;
+#endif
+
+ num_children_encoded = 1;
+ DISABLE_THE_CHILDREN_NODES(ps_cu_tree_analyse);
+ }
+ }
+ else /* if(ps_cu_tree_analyse->is_node_valid) */
+ {
+ ps_cu_tree_analyse->i8_best_rdopt_cost = i8_least_child_cost;
+
+ /* Tree depth of four will occur for Incomplete CTB */
+ if((i8_least_child_cost > 0) && (i4_tree_depth != 3))
+ {
+#if(PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS)
+ /* Store child node Models for evalution of next CU */
+ for(i = i4_tree_depth; i < 4; i++)
+ {
+ COPY_CABAC_STATES(
+ &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
+ &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
+ IHEVC_CAB_CTXT_END * sizeof(UWORD8));
+ }
+#else
+ if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
+ {
+ for(i = (i4_tree_depth); i < 4; i++)
+ {
+ COPY_CABAC_STATES(
+ &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
+ &ps_ctxt->au1_rdopt_recur_ctxt_models[i4_tree_depth + 1][0],
+ IHEVC_CAB_CTXT_END * sizeof(UWORD8));
+ }
+ }
+#endif
+ }
+ }
+ }
+
+ return num_children_encoded;
+}
+
+static UWORD8 ihevce_intraData_availability_extractor(
+ WORD8 *pi1_8x8CULevel_intraData_availability_indicator,
+ UWORD8 u1_cu_size,
+ UWORD8 u1_x_8x8CU_units,
+ UWORD8 u1_y_8x8CU_units)
+{
+ if(8 == u1_cu_size)
+ {
+ return (!pi1_8x8CULevel_intraData_availability_indicator
+ [u1_x_8x8CU_units + MAX_CU_IN_CTB_ROW * u1_y_8x8CU_units]);
+ }
+ else
+ {
+ UWORD8 u1_data_availability = 0;
+ UWORD8 u1_child_cu_size = u1_cu_size / 2;
+
+ u1_data_availability |= ihevce_intraData_availability_extractor(
+ pi1_8x8CULevel_intraData_availability_indicator,
+ u1_child_cu_size,
+ u1_x_8x8CU_units,
+ u1_y_8x8CU_units);
+
+ u1_data_availability |= ihevce_intraData_availability_extractor(
+ pi1_8x8CULevel_intraData_availability_indicator,
+ u1_child_cu_size,
+ u1_x_8x8CU_units + u1_child_cu_size / 8,
+ u1_y_8x8CU_units);
+
+ u1_data_availability |= ihevce_intraData_availability_extractor(
+ pi1_8x8CULevel_intraData_availability_indicator,
+ u1_child_cu_size,
+ u1_x_8x8CU_units,
+ u1_y_8x8CU_units + u1_child_cu_size / 8);
+
+ u1_data_availability |= ihevce_intraData_availability_extractor(
+ pi1_8x8CULevel_intraData_availability_indicator,
+ u1_child_cu_size,
+ u1_x_8x8CU_units + u1_child_cu_size / 8,
+ u1_y_8x8CU_units + u1_child_cu_size / 8);
+
+ return u1_data_availability;
+ }
+}
+
+void ihevce_intra_and_inter_cuTree_merger(
+ cur_ctb_cu_tree_t *ps_merged_tree,
+ cur_ctb_cu_tree_t *ps_intra_tree,
+ cur_ctb_cu_tree_t *ps_inter_tree,
+ WORD8 *pi1_8x8CULevel_intraData_availability_indicator)
+{
+ /* 0 => Intra and inter children valid */
+ /* 1 => Only Intra valid */
+ /* 2 => Only Inter valid */
+ /* 3 => Neither */
+ UWORD8 au1_children_recursive_call_type[4];
+
+ if(NULL != ps_intra_tree)
+ {
+ ps_intra_tree->is_node_valid =
+ ps_intra_tree->is_node_valid &
+ ihevce_intraData_availability_extractor(
+ pi1_8x8CULevel_intraData_availability_indicator,
+ ps_intra_tree->u1_cu_size,
+ ps_intra_tree->b3_cu_pos_x & ((8 == ps_intra_tree->u1_cu_size) ? 0xfe : 0xff),
+ ps_intra_tree->b3_cu_pos_y & ((8 == ps_intra_tree->u1_cu_size) ? 0xfe : 0xff));
+ }
+
+ switch(((NULL == ps_intra_tree) << 1) | (NULL == ps_inter_tree))
+ {
+ case 0:
+ {
+ ps_merged_tree->is_node_valid = ps_intra_tree->is_node_valid ||
+ ps_inter_tree->is_node_valid;
+ ps_merged_tree->u1_inter_eval_enable = ps_inter_tree->is_node_valid;
+ ps_merged_tree->u1_intra_eval_enable = ps_intra_tree->is_node_valid;
+
+ au1_children_recursive_call_type[POS_TL] =
+ ((NULL == ps_intra_tree->ps_child_node_tl) << 1) |
+ (NULL == ps_inter_tree->ps_child_node_tl);
+ au1_children_recursive_call_type[POS_TR] =
+ ((NULL == ps_intra_tree->ps_child_node_tr) << 1) |
+ (NULL == ps_inter_tree->ps_child_node_tr);
+ au1_children_recursive_call_type[POS_BL] =
+ ((NULL == ps_intra_tree->ps_child_node_bl) << 1) |
+ (NULL == ps_inter_tree->ps_child_node_bl);
+ au1_children_recursive_call_type[POS_BR] =
+ ((NULL == ps_intra_tree->ps_child_node_br) << 1) |
+ (NULL == ps_inter_tree->ps_child_node_br);
+
+ break;
+ }
+ case 1:
+ {
+ ps_merged_tree->is_node_valid = ps_intra_tree->is_node_valid;
+ ps_merged_tree->u1_inter_eval_enable = 0;
+ ps_merged_tree->u1_intra_eval_enable = ps_intra_tree->is_node_valid;
+
+ au1_children_recursive_call_type[POS_TL] =
+ ((NULL == ps_intra_tree->ps_child_node_tl) << 1) + 1;
+ au1_children_recursive_call_type[POS_TR] =
+ ((NULL == ps_intra_tree->ps_child_node_tr) << 1) + 1;
+ au1_children_recursive_call_type[POS_BL] =
+ ((NULL == ps_intra_tree->ps_child_node_bl) << 1) + 1;
+ au1_children_recursive_call_type[POS_BR] =
+ ((NULL == ps_intra_tree->ps_child_node_br) << 1) + 1;
+
+ break;
+ }
+ case 2:
+ {
+ ps_merged_tree->is_node_valid = ps_inter_tree->is_node_valid;
+ ps_merged_tree->u1_inter_eval_enable = ps_inter_tree->is_node_valid;
+ ps_merged_tree->u1_intra_eval_enable = 0;
+
+ au1_children_recursive_call_type[POS_TL] = 2 + (NULL == ps_inter_tree->ps_child_node_tl);
+ au1_children_recursive_call_type[POS_TR] = 2 + (NULL == ps_inter_tree->ps_child_node_tr);
+ au1_children_recursive_call_type[POS_BL] = 2 + (NULL == ps_inter_tree->ps_child_node_bl);
+ au1_children_recursive_call_type[POS_BR] = 2 + (NULL == ps_inter_tree->ps_child_node_br);
+
+ break;
+ }
+ case 3:
+ {
+ /* The swamps of Dagobah! */
+ ASSERT(0);
+
+ break;
+ }
+ }
+
+ switch(au1_children_recursive_call_type[POS_TL])
+ {
+ case 0:
+ {
+ ihevce_intra_and_inter_cuTree_merger(
+ ps_merged_tree->ps_child_node_tl,
+ ps_intra_tree->ps_child_node_tl,
+ ps_inter_tree->ps_child_node_tl,
+ pi1_8x8CULevel_intraData_availability_indicator);
+
+ break;
+ }
+ case 2:
+ {
+ ihevce_intra_and_inter_cuTree_merger(
+ ps_merged_tree->ps_child_node_tl,
+ NULL,
+ ps_inter_tree->ps_child_node_tl,
+ pi1_8x8CULevel_intraData_availability_indicator);
+
+ break;
+ }
+ case 1:
+ {
+ ihevce_intra_and_inter_cuTree_merger(
+ ps_merged_tree->ps_child_node_tl,
+ ps_intra_tree->ps_child_node_tl,
+ NULL,
+ pi1_8x8CULevel_intraData_availability_indicator);
+
+ break;
+ }
+ }
+
+ switch(au1_children_recursive_call_type[POS_TR])
+ {
+ case 0:
+ {
+ ihevce_intra_and_inter_cuTree_merger(
+ ps_merged_tree->ps_child_node_tr,
+ ps_intra_tree->ps_child_node_tr,
+ ps_inter_tree->ps_child_node_tr,
+ pi1_8x8CULevel_intraData_availability_indicator);
+
+ break;
+ }
+ case 2:
+ {
+ ihevce_intra_and_inter_cuTree_merger(
+ ps_merged_tree->ps_child_node_tr,
+ NULL,
+ ps_inter_tree->ps_child_node_tr,
+ pi1_8x8CULevel_intraData_availability_indicator);
+
+ break;
+ }
+ case 1:
+ {
+ ihevce_intra_and_inter_cuTree_merger(
+ ps_merged_tree->ps_child_node_tr,
+ ps_intra_tree->ps_child_node_tr,
+ NULL,
+ pi1_8x8CULevel_intraData_availability_indicator);
+
+ break;
+ }
+ }
+
+ switch(au1_children_recursive_call_type[POS_BL])
+ {
+ case 0:
+ {
+ ihevce_intra_and_inter_cuTree_merger(
+ ps_merged_tree->ps_child_node_bl,
+ ps_intra_tree->ps_child_node_bl,
+ ps_inter_tree->ps_child_node_bl,
+ pi1_8x8CULevel_intraData_availability_indicator);
+
+ break;
+ }
+ case 2:
+ {
+ ihevce_intra_and_inter_cuTree_merger(
+ ps_merged_tree->ps_child_node_bl,
+ NULL,
+ ps_inter_tree->ps_child_node_bl,
+ pi1_8x8CULevel_intraData_availability_indicator);
+
+ break;
+ }
+ case 1:
+ {
+ ihevce_intra_and_inter_cuTree_merger(
+ ps_merged_tree->ps_child_node_bl,
+ ps_intra_tree->ps_child_node_bl,
+ NULL,
+ pi1_8x8CULevel_intraData_availability_indicator);
+
+ break;
+ }
+ }
+
+ switch(au1_children_recursive_call_type[POS_BR])
+ {
+ case 0:
+ {
+ ihevce_intra_and_inter_cuTree_merger(
+ ps_merged_tree->ps_child_node_br,
+ ps_intra_tree->ps_child_node_br,
+ ps_inter_tree->ps_child_node_br,
+ pi1_8x8CULevel_intraData_availability_indicator);
+
+ break;
+ }
+ case 2:
+ {
+ ihevce_intra_and_inter_cuTree_merger(
+ ps_merged_tree->ps_child_node_br,
+ NULL,
+ ps_inter_tree->ps_child_node_br,
+ pi1_8x8CULevel_intraData_availability_indicator);
+
+ break;
+ }
+ case 1:
+ {
+ ihevce_intra_and_inter_cuTree_merger(
+ ps_merged_tree->ps_child_node_br,
+ ps_intra_tree->ps_child_node_br,
+ NULL,
+ pi1_8x8CULevel_intraData_availability_indicator);
+
+ break;
+ }
+ }
+}
diff --git a/encoder/ihevce_enc_cu_recursion.h b/encoder/ihevce_enc_cu_recursion.h
new file mode 100644
index 0000000..d4bafcd
--- /dev/null
+++ b/encoder/ihevce_enc_cu_recursion.h
@@ -0,0 +1,108 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_enc_cu_recursion.h
+*
+* \brief
+* This file contains interface declarations of encoder normative loop pass
+* related functions
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_ENC_CU_RECURION_H_
+#define _IHEVCE_ENC_CU_RECURION_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define ENABLE_TREE_DUMP 0
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+void ihevce_store_cu_final(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ cu_enc_loop_out_t *ps_cu_final,
+ UWORD8 *pu1_ecd_data,
+ ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt,
+ enc_loop_cu_prms_t *ps_cu_prms);
+
+void ihevce_populate_cu_tree(
+ ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
+ cur_ctb_cu_tree_t *ps_cu_tree,
+ WORD32 tree_depth,
+ IHEVCE_QUALITY_CONFIG_T e_quality_preset,
+ CU_POS_T e_grandparent_blk_pos,
+ CU_POS_T e_parent_blk_pos,
+ CU_POS_T e_cur_blk_pos);
+
+void ihevce_update_final_cu_results(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt,
+ enc_loop_cu_prms_t *ps_cu_prms,
+ pu_col_mv_t **pps_row_col_pu,
+ WORD32 *pi4_col_pu_map_idx,
+ cu_final_update_prms *ps_cu_update_prms,
+ WORD32 ctb_ctr,
+ WORD32 vert_ctb_ctr);
+
+WORD32 ihevce_cu_recurse_decide(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ enc_loop_cu_prms_t *ps_cu_prms,
+ cur_ctb_cu_tree_t *ps_cu_tree_analyse,
+ cur_ctb_cu_tree_t *ps_cu_tree_analyse_parent,
+ ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb,
+ me_ctb_data_t *ps_cu_me_data,
+ pu_col_mv_t **pps_col_pu,
+ cu_final_update_prms *ps_cu_update_prms,
+ UWORD8 *pu1_col_pu_map,
+ WORD32 *pi4_col_start_pu_idx,
+ WORD32 i4_tree_depth,
+ WORD32 i4_ctb_x_off,
+ WORD32 i4_ctb_y_off,
+ WORD32 cur_ctb_ht);
+
+void ihevce_store_cu_results(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ enc_loop_cu_prms_t *ps_cu_prms,
+ final_mode_state_t *ps_final_state);
+
+void ihevce_enc_loop_cu_bot_copy(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ enc_loop_cu_prms_t *ps_cu_prms,
+ ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt,
+ WORD32 curr_cu_pos_in_row,
+ WORD32 curr_cu_pos_in_ctb);
+
+void ihevce_intra_and_inter_cuTree_merger(
+ cur_ctb_cu_tree_t *ps_merged_tree,
+ cur_ctb_cu_tree_t *ps_intra_tree,
+ cur_ctb_cu_tree_t *ps_inter_tree,
+ WORD8 *pi1_8x8CULevel_intraData_availability_indicator);
+
+#endif /* _IHEVCE_ENC_CU_RECURION_H_ */
diff --git a/encoder/ihevce_enc_loop_inter_mode_sifter.c b/encoder/ihevce_enc_loop_inter_mode_sifter.c
new file mode 100644
index 0000000..6a05cda
--- /dev/null
+++ b/encoder/ihevce_enc_loop_inter_mode_sifter.c
@@ -0,0 +1,2745 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+******************************************************************************
+* \file ihevce_enc_loop_inter_mode_sifter.c
+*
+* \brief
+* This file contains functions for selecting best inter candidates for RDOPT evaluation
+*
+* \date
+* 10/09/2014
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+#include <limits.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_macros.h"
+#include "ihevc_debug.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_hle_interface.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_ipe_instr_set_router.h"
+#include "ihevce_decomp_pre_intra_structs.h"
+#include "ihevce_decomp_pre_intra_pass.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_nbr_avail.h"
+#include "ihevce_enc_loop_utils.h"
+#include "ihevce_bs_compute_ctb.h"
+#include "ihevce_cabac_rdo.h"
+#include "ihevce_dep_mngr_interface.h"
+#include "ihevce_enc_loop_pass.h"
+#include "ihevce_rc_enc_structs.h"
+#include "ihevce_common_utils.h"
+#include "ihevce_stasino_helpers.h"
+
+#include "hme_datatype.h"
+#include "hme_common_defs.h"
+#include "hme_common_utils.h"
+#include "hme_interface.h"
+#include "hme_defs.h"
+#include "ihevce_me_instr_set_router.h"
+#include "hme_err_compute.h"
+#include "hme_globals.h"
+#include "ihevce_mv_pred.h"
+#include "ihevce_mv_pred_merge.h"
+#include "ihevce_inter_pred.h"
+#include "ihevce_enc_loop_inter_mode_sifter.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+static WORD32 ihevce_get_num_part_types_in_me_cand_list(
+ cu_inter_cand_t *ps_me_cand_list,
+ UWORD8 *pu1_part_type_ref_cand,
+ UWORD8 *pu1_idx_ref_cand,
+ UWORD8 *pu1_diff_skip_cand_flag,
+ WORD8 *pi1_skip_cand_from_merge_idx,
+ WORD8 *pi1_final_skip_cand_merge_idx,
+ UWORD8 u1_max_num_part_types_to_select,
+ UWORD8 u1_num_me_cands)
+{
+ UWORD8 i, j;
+ UWORD8 u1_num_unique_parts = 0;
+
+ for(i = 0; i < u1_num_me_cands; i++)
+ {
+ UWORD8 u1_cur_part_type = ps_me_cand_list[i].b3_part_size;
+ UWORD8 u1_is_unique = 1;
+
+ if(u1_num_unique_parts >= u1_max_num_part_types_to_select)
+ {
+ return u1_num_unique_parts;
+ }
+
+ /* loop to check if the current cand is already present in the list */
+ for(j = 0; j < u1_num_unique_parts; j++)
+ {
+ if(u1_cur_part_type == pu1_part_type_ref_cand[j])
+ {
+ u1_is_unique = 0;
+ break;
+ }
+ }
+
+ if(u1_is_unique)
+ {
+ if(SIZE_2Nx2N == u1_cur_part_type)
+ {
+ *pu1_diff_skip_cand_flag = 0;
+ *pi1_skip_cand_from_merge_idx = u1_num_unique_parts;
+ *pi1_final_skip_cand_merge_idx = u1_num_unique_parts;
+ }
+
+ pu1_part_type_ref_cand[u1_num_unique_parts] = u1_cur_part_type;
+ pu1_idx_ref_cand[u1_num_unique_parts] = i;
+ u1_num_unique_parts++;
+ }
+ }
+
+ return u1_num_unique_parts;
+}
+
+static WORD32 ihevce_compute_inter_pred_and_cost(
+ inter_pred_ctxt_t *ps_mc_ctxt,
+ PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu,
+ PF_SAD_FXN_T pf_sad_func,
+ pu_t *ps_pu,
+ void *pv_src,
+ void *pv_pred,
+ WORD32 i4_src_stride,
+ WORD32 i4_pred_stride,
+ UWORD8 u1_compute_error,
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list)
+{
+ IV_API_CALL_STATUS_T u1_is_valid_mv;
+ WORD32 i4_error;
+
+ u1_is_valid_mv = pf_luma_inter_pred_pu(ps_mc_ctxt, ps_pu, pv_pred, i4_pred_stride, 0);
+
+ if(u1_compute_error)
+ {
+ if(IV_SUCCESS == u1_is_valid_mv)
+ {
+ err_prms_t s_err_prms;
+
+ s_err_prms.i4_blk_ht = (ps_pu->b4_ht + 1) << 2;
+ s_err_prms.i4_blk_wd = (ps_pu->b4_wd + 1) << 2;
+ s_err_prms.pu1_inp = (UWORD8 *)pv_src;
+ s_err_prms.pu2_inp = (UWORD16 *)pv_src;
+ s_err_prms.pu1_ref = (UWORD8 *)pv_pred;
+ s_err_prms.pu2_ref = (UWORD16 *)pv_pred;
+ s_err_prms.i4_inp_stride = i4_src_stride;
+ s_err_prms.i4_ref_stride = i4_pred_stride;
+ s_err_prms.pi4_sad_grid = &i4_error;
+
+ s_err_prms.ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list;
+
+ pf_sad_func(&s_err_prms);
+ }
+ else
+ {
+ /* max 32 bit satd */
+ i4_error = INT_MAX;
+ }
+
+ return i4_error;
+ }
+
+ return INT_MAX;
+}
+
+static WORD32 ihevce_determine_best_merge_pu(
+ merge_prms_t *ps_prms,
+ pu_t *ps_pu_merge,
+ pu_t *ps_pu_me,
+ void *pv_src,
+ WORD32 i4_me_cand_cost,
+ WORD32 i4_pred_buf_offset,
+ UWORD8 u1_num_cands,
+ UWORD8 u1_part_id,
+ UWORD8 u1_force_pred_evaluation)
+{
+ pu_t *ps_pu;
+
+ INTER_CANDIDATE_ID_T e_cand_id;
+
+ UWORD8 i;
+ UWORD8 u1_best_pred_mode;
+ WORD32 i4_mean;
+ UWORD32 u4_cur_variance, u4_best_variance;
+
+ merge_cand_list_t *ps_list = ps_prms->ps_list;
+ inter_pred_ctxt_t *ps_mc_ctxt = ps_prms->ps_mc_ctxt;
+ PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu = ps_prms->pf_luma_inter_pred_pu;
+ PF_SAD_FXN_T pf_sad_fxn = ps_prms->pf_sad_fxn;
+
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list =
+ ps_prms->ps_cmn_utils_optimised_function_list;
+
+ WORD32(*pai4_noise_term)[MAX_NUM_INTER_PARTS] = ps_prms->pai4_noise_term;
+ UWORD32(*pau4_pred_variance)[MAX_NUM_INTER_PARTS] = ps_prms->pau4_pred_variance;
+ WORD32 i4_alpha_stim_multiplier = ps_prms->i4_alpha_stim_multiplier;
+ UWORD32 *pu4_src_variance = ps_prms->pu4_src_variance;
+ UWORD8 u1_is_cu_noisy = ps_prms->u1_is_cu_noisy;
+ UWORD8 u1_is_hbd = ps_prms->u1_is_hbd;
+ UWORD8 *pu1_valid_merge_indices = ps_prms->au1_valid_merge_indices;
+ void **ppv_pred_buf_list = ps_prms->ppv_pred_buf_list;
+ UWORD8 *pu1_merge_pred_buf_array = ps_prms->pu1_merge_pred_buf_array;
+ UWORD8(*pau1_best_pred_buf_id)[MAX_NUM_INTER_PARTS] = ps_prms->pau1_best_pred_buf_id;
+ UWORD8 u1_merge_idx_cabac_model = ps_prms->u1_merge_idx_cabac_model;
+ WORD32 i4_lambda = ps_prms->i4_lambda;
+ WORD32 i4_src_stride = ps_prms->i4_src_stride;
+ WORD32 i4_pred_stride = ps_prms->i4_pred_stride;
+ UWORD8 u1_max_cands = ps_prms->u1_max_cands;
+ UWORD8 u1_best_buf_id = pu1_merge_pred_buf_array[0];
+ UWORD8 u1_cur_buf_id = pu1_merge_pred_buf_array[1];
+ UWORD8 u1_best_cand_id = UCHAR_MAX;
+ WORD32 i4_best_cost = INT_MAX;
+ WORD32 i4_cur_noise_term = 0;
+ WORD32 i4_best_noise_term = 0;
+
+ ps_pu = ps_pu_merge;
+ e_cand_id = MERGE_DERIVED;
+
+ ASSERT(ps_pu->b1_merge_flag);
+
+ for(i = 0; i < u1_num_cands; i++)
+ {
+ WORD32 i4_cur_cost;
+
+ void *pv_pred = (UWORD8 *)ppv_pred_buf_list[u1_cur_buf_id] + i4_pred_buf_offset;
+ UWORD8 u1_is_pred_available = 0;
+
+ if(!ps_prms->u1_use_merge_cand_from_top_row && ps_prms->pu1_is_top_used[i])
+ {
+ continue;
+ }
+
+ ps_pu->mv = ps_list[i].mv;
+ ps_pu->b3_merge_idx = pu1_valid_merge_indices[i];
+
+ /* set the prediction mode */
+ if(ps_list[i].u1_pred_flag_l0 && ps_list[i].u1_pred_flag_l1)
+ {
+ ps_pu->b2_pred_mode = PRED_BI;
+ }
+ else if(ps_list[i].u1_pred_flag_l0)
+ {
+ ps_pu->b2_pred_mode = PRED_L0;
+ }
+ else
+ {
+ ps_pu->b2_pred_mode = PRED_L1;
+ }
+
+ /* 8x8 SMPs should not have bipred mode as per std */
+ {
+ WORD32 i4_part_wd, i4_part_ht;
+
+ i4_part_wd = (ps_pu->b4_wd + 1) << 2;
+ i4_part_ht = (ps_pu->b4_ht + 1) << 2;
+
+ if((PRED_BI == ps_pu->b2_pred_mode) && ((i4_part_wd + i4_part_ht) < 16))
+ {
+ continue;
+ }
+ }
+
+ if((!u1_force_pred_evaluation) &&
+ (ihevce_compare_pu_mv_t(
+ &ps_pu->mv, &ps_pu_me->mv, ps_pu->b2_pred_mode, ps_pu_me->b2_pred_mode)))
+ {
+ i4_cur_cost = i4_me_cand_cost;
+ u1_is_pred_available = 1;
+
+ if((i4_cur_cost < INT_MAX) && u1_is_cu_noisy && i4_alpha_stim_multiplier)
+ {
+ i4_cur_noise_term = pai4_noise_term[ME_OR_SKIP_DERIVED][u1_part_id];
+ u4_cur_variance = pau4_pred_variance[ME_OR_SKIP_DERIVED][u1_part_id];
+ }
+ }
+ else
+ {
+ i4_cur_cost = ihevce_compute_inter_pred_and_cost(
+ ps_mc_ctxt,
+ pf_luma_inter_pred_pu,
+ pf_sad_fxn,
+ ps_pu,
+ pv_src,
+ pv_pred,
+ i4_src_stride,
+ i4_pred_stride,
+ 1,
+ ps_cmn_utils_optimised_function_list);
+
+ if((i4_cur_cost < INT_MAX) && u1_is_cu_noisy && i4_alpha_stim_multiplier)
+ {
+ ihevce_calc_variance(
+ pv_pred,
+ i4_pred_stride,
+ &i4_mean,
+ &u4_cur_variance,
+ (ps_pu->b4_ht + 1) << 2,
+ (ps_pu->b4_wd + 1) << 2,
+ u1_is_hbd,
+ 0);
+
+ i4_cur_noise_term = ihevce_compute_noise_term(
+ i4_alpha_stim_multiplier, pu4_src_variance[u1_part_id], u4_cur_variance);
+
+ MULTIPLY_STIM_WITH_DISTORTION(
+ i4_cur_cost, i4_cur_noise_term, STIM_Q_FORMAT, ALPHA_Q_FORMAT);
+ }
+ }
+
+ if(i4_cur_cost < INT_MAX)
+ {
+ WORD32 i4_merge_idx_cost = 0;
+ COMPUTE_MERGE_IDX_COST(
+ u1_merge_idx_cabac_model, i, u1_max_cands, i4_lambda, i4_merge_idx_cost);
+ i4_cur_cost += i4_merge_idx_cost;
+ }
+
+ if(i4_cur_cost < i4_best_cost)
+ {
+ i4_best_cost = i4_cur_cost;
+
+ if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
+ {
+ i4_best_noise_term = i4_cur_noise_term;
+ u4_best_variance = u4_cur_variance;
+ }
+
+ u1_best_cand_id = i;
+ u1_best_pred_mode = ps_pu->b2_pred_mode;
+
+ if(u1_is_pred_available)
+ {
+ pau1_best_pred_buf_id[e_cand_id][u1_part_id] =
+ pau1_best_pred_buf_id[ME_OR_SKIP_DERIVED][u1_part_id];
+ }
+ else
+ {
+ SWAP(u1_best_buf_id, u1_cur_buf_id);
+ pau1_best_pred_buf_id[e_cand_id][u1_part_id] = u1_best_buf_id;
+ }
+ }
+ }
+
+ if(u1_best_cand_id != UCHAR_MAX)
+ {
+ ps_pu->mv = ps_list[u1_best_cand_id].mv;
+ ps_pu->b2_pred_mode = u1_best_pred_mode;
+ ps_pu->b3_merge_idx = pu1_valid_merge_indices[u1_best_cand_id];
+
+ if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
+ {
+ pai4_noise_term[MERGE_DERIVED][u1_part_id] = i4_best_noise_term;
+ pau4_pred_variance[MERGE_DERIVED][u1_part_id] = u4_best_variance;
+ }
+ }
+
+ return i4_best_cost;
+}
+
+static WORD8 ihevce_merge_cand_pred_buffer_preparation(
+ void **ppv_pred_buf_list,
+ cu_inter_cand_t *ps_cand,
+ UWORD8 (*pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],
+ WORD32 i4_pred_stride,
+ UWORD8 u1_cu_size,
+ UWORD8 u1_part_type,
+ UWORD8 u1_num_bytes_per_pel,
+ FT_COPY_2D *pf_copy_2d)
+{
+ WORD32 i4_part_wd;
+ WORD32 i4_part_ht;
+ WORD32 i4_part_wd_pu2;
+ WORD32 i4_part_ht_pu2;
+ WORD32 i4_buf_offset;
+ UWORD8 *pu1_pred_src;
+ UWORD8 *pu1_pred_dst;
+ WORD8 i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][0];
+
+ WORD32 i4_stride = i4_pred_stride * u1_num_bytes_per_pel;
+
+ if((0 == u1_part_type) ||
+ (pau1_final_pred_buf_id[MERGE_DERIVED][0] == pau1_final_pred_buf_id[MERGE_DERIVED][1]))
+ {
+ ps_cand->pu1_pred_data =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
+ ps_cand->pu2_pred_data =
+ (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
+ ps_cand->i4_pred_data_stride = i4_pred_stride;
+
+ i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][0];
+ }
+ else if(pau1_final_pred_buf_id[MERGE_DERIVED][0] == pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0])
+ {
+ i4_part_wd = (ps_cand->as_inter_pu[0].b4_wd + 1) << 2;
+ i4_part_ht = (ps_cand->as_inter_pu[0].b4_ht + 1) << 2;
+
+ i4_buf_offset = 0;
+
+ pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]] +
+ i4_buf_offset;
+ pu1_pred_dst =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]] + i4_buf_offset;
+
+ pf_copy_2d(
+ pu1_pred_dst,
+ i4_stride,
+ pu1_pred_src,
+ i4_stride,
+ i4_part_wd * u1_num_bytes_per_pel,
+ i4_part_ht);
+
+ ps_cand->pu1_pred_data =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
+ ps_cand->pu2_pred_data =
+ (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
+ ps_cand->i4_pred_data_stride = i4_pred_stride;
+
+ i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][1];
+ }
+ else if(pau1_final_pred_buf_id[MERGE_DERIVED][1] == pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1])
+ {
+ i4_part_wd = (ps_cand->as_inter_pu[0].b4_wd + 1) << 2;
+ i4_part_ht = (ps_cand->as_inter_pu[0].b4_ht + 1) << 2;
+
+ i4_buf_offset = (i4_part_ht < u1_cu_size) * i4_part_ht * i4_pred_stride +
+ (i4_part_wd < u1_cu_size) * i4_part_wd;
+
+ i4_buf_offset *= u1_num_bytes_per_pel;
+
+ i4_part_wd = (ps_cand->as_inter_pu[1].b4_wd + 1) << 2;
+ i4_part_ht = (ps_cand->as_inter_pu[1].b4_ht + 1) << 2;
+
+ pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
+ i4_buf_offset;
+ pu1_pred_dst =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]] + i4_buf_offset;
+
+ pf_copy_2d(
+ pu1_pred_dst,
+ i4_stride,
+ pu1_pred_src,
+ i4_stride,
+ i4_part_wd * u1_num_bytes_per_pel,
+ i4_part_ht);
+
+ ps_cand->pu1_pred_data =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
+ ps_cand->pu2_pred_data =
+ (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
+ ps_cand->i4_pred_data_stride = i4_pred_stride;
+
+ i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][0];
+ }
+ else
+ {
+ i4_part_wd = (ps_cand->as_inter_pu[0].b4_wd + 1) << 2;
+ i4_part_ht = (ps_cand->as_inter_pu[0].b4_ht + 1) << 2;
+
+ i4_part_wd_pu2 = (ps_cand->as_inter_pu[1].b4_wd + 1) << 2;
+ i4_part_ht_pu2 = (ps_cand->as_inter_pu[1].b4_ht + 1) << 2;
+
+ switch((PART_TYPE_T)u1_part_type)
+ {
+ case PRT_2NxN:
+ case PRT_Nx2N:
+ case PRT_2NxnU:
+ case PRT_nLx2N:
+ {
+ pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
+ pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
+
+ ps_cand->pu1_pred_data =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
+ ps_cand->pu2_pred_data =
+ (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
+
+ i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][1];
+
+ break;
+ }
+ case PRT_nRx2N:
+ case PRT_2NxnD:
+ {
+ i4_buf_offset = (i4_part_ht < u1_cu_size) * i4_part_ht * i4_pred_stride +
+ (i4_part_wd < u1_cu_size) * i4_part_wd;
+
+ i4_buf_offset *= u1_num_bytes_per_pel;
+
+ pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]] +
+ i4_buf_offset;
+ pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]] +
+ i4_buf_offset;
+
+ i4_part_wd = i4_part_wd_pu2;
+ i4_part_ht = i4_part_ht_pu2;
+
+ ps_cand->pu1_pred_data =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
+ ps_cand->pu2_pred_data =
+ (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
+
+ i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][0];
+
+ break;
+ }
+ }
+
+ pf_copy_2d(
+ pu1_pred_dst,
+ i4_stride,
+ pu1_pred_src,
+ i4_stride,
+ i4_part_wd * u1_num_bytes_per_pel,
+ i4_part_ht);
+
+ ps_cand->i4_pred_data_stride = i4_pred_stride;
+ }
+
+ return i1_retval;
+}
+
+static WORD8 ihevce_mixed_mode_cand_type1_pred_buffer_preparation(
+ void **ppv_pred_buf_list,
+ cu_inter_cand_t *ps_cand,
+ UWORD8 (*pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],
+ UWORD8 *pu1_merge_pred_buf_idx_array,
+ WORD32 i4_pred_stride,
+ UWORD8 u1_me_pred_buf_id,
+ UWORD8 u1_merge_pred_buf_id,
+ UWORD8 u1_type0_cand_is_valid,
+ UWORD8 u1_cu_size,
+ UWORD8 u1_part_type,
+ UWORD8 u1_num_bytes_per_pel,
+ FT_COPY_2D *pf_copy_2d)
+{
+ WORD32 i4_part_wd;
+ WORD32 i4_part_ht;
+ WORD32 i4_part_wd_pu2;
+ WORD32 i4_part_ht_pu2;
+ UWORD8 *pu1_pred_src;
+ UWORD8 *pu1_pred_dst = NULL;
+ WORD8 i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
+
+ WORD32 i4_stride = i4_pred_stride * u1_num_bytes_per_pel;
+
+ ASSERT(0 != u1_part_type);
+
+ i4_part_wd = (ps_cand->as_inter_pu[0].b4_wd + 1) << 2;
+ i4_part_ht = (ps_cand->as_inter_pu[0].b4_ht + 1) << 2;
+
+ i4_part_wd_pu2 = (ps_cand->as_inter_pu[1].b4_wd + 1) << 2;
+ i4_part_ht_pu2 = (ps_cand->as_inter_pu[1].b4_ht + 1) << 2;
+
+ if(pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1] == pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1])
+ {
+ ps_cand->pu1_pred_data =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
+ ps_cand->pu2_pred_data =
+ (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
+ ps_cand->i4_pred_data_stride = i4_pred_stride;
+
+ i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
+
+ return i1_retval;
+ }
+ else
+ {
+ UWORD8 u1_bitfield = ((u1_merge_pred_buf_id == UCHAR_MAX) << 3) |
+ ((u1_me_pred_buf_id == UCHAR_MAX) << 2) |
+ ((!u1_type0_cand_is_valid) << 1) |
+ (pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1] ==
+ pau1_final_pred_buf_id[MERGE_DERIVED][1]);
+
+ WORD32 i4_buf_offset = (i4_part_ht < u1_cu_size) * i4_part_ht * i4_pred_stride +
+ (i4_part_wd < u1_cu_size) * i4_part_wd;
+
+ i4_buf_offset *= u1_num_bytes_per_pel;
+
+ switch(u1_bitfield)
+ {
+ case 15:
+ case 14:
+ case 6:
+ {
+ switch((PART_TYPE_T)u1_part_type)
+ {
+ case PRT_2NxN:
+ case PRT_Nx2N:
+ case PRT_2NxnU:
+ case PRT_nLx2N:
+ {
+ pu1_pred_src =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
+ pu1_pred_dst =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1]];
+
+ i1_retval = pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1];
+
+ break;
+ }
+ case PRT_nRx2N:
+ case PRT_2NxnD:
+ {
+ pu1_pred_src =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1]] +
+ i4_buf_offset;
+ pu1_pred_dst =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]] +
+ i4_buf_offset;
+
+ i4_part_wd = i4_part_wd_pu2;
+ i4_part_ht = i4_part_ht_pu2;
+
+ i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
+
+ break;
+ }
+ }
+
+ ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
+ ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
+ ps_cand->i4_pred_data_stride = i4_pred_stride;
+
+ pf_copy_2d(
+ pu1_pred_dst,
+ i4_stride,
+ pu1_pred_src,
+ i4_stride,
+ i4_part_wd * u1_num_bytes_per_pel,
+ i4_part_ht);
+
+ break;
+ }
+ case 13:
+ case 9:
+ case 5:
+ {
+ UWORD8 i;
+
+ for(i = 0; i < 3; i++)
+ {
+ if((pu1_merge_pred_buf_idx_array[i] != pau1_final_pred_buf_id[MERGE_DERIVED][1]) &&
+ (pu1_merge_pred_buf_idx_array[i] != pau1_final_pred_buf_id[MERGE_DERIVED][0]))
+ {
+ pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[pu1_merge_pred_buf_idx_array[i]] +
+ i4_buf_offset;
+
+ i1_retval = pu1_merge_pred_buf_idx_array[i];
+
+ break;
+ }
+ }
+
+ pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]] +
+ i4_buf_offset;
+
+ pf_copy_2d(
+ pu1_pred_dst,
+ i4_stride,
+ pu1_pred_src,
+ i4_stride,
+ i4_part_wd_pu2 * u1_num_bytes_per_pel,
+ i4_part_ht_pu2);
+ /* Copy PU1 */
+ pu1_pred_src =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
+ pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[i1_retval];
+
+ pf_copy_2d(
+ pu1_pred_dst,
+ i4_stride,
+ pu1_pred_src,
+ i4_stride,
+ i4_part_wd * u1_num_bytes_per_pel,
+ i4_part_ht);
+
+ ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
+ ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
+ ps_cand->i4_pred_data_stride = i4_pred_stride;
+
+ break;
+ }
+ case 12:
+ case 10:
+ case 8:
+ case 4:
+ case 2:
+ case 0:
+ {
+ pu1_pred_src =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
+ pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1]];
+
+ i1_retval = pau1_final_pred_buf_id[MIXED_MODE_TYPE1][1];
+
+ ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
+ ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
+ ps_cand->i4_pred_data_stride = i4_pred_stride;
+
+ pf_copy_2d(
+ pu1_pred_dst,
+ i4_stride,
+ pu1_pred_src,
+ i4_stride,
+ i4_part_wd * u1_num_bytes_per_pel,
+ i4_part_ht);
+
+ break;
+ }
+ case 11:
+ {
+ pu1_pred_src =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
+ pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
+
+ i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][1];
+
+ ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
+ ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
+ ps_cand->i4_pred_data_stride = i4_pred_stride;
+
+ pf_copy_2d(
+ pu1_pred_dst,
+ i4_stride,
+ pu1_pred_src,
+ i4_stride,
+ i4_part_wd * u1_num_bytes_per_pel,
+ i4_part_ht);
+
+ break;
+ }
+ case 7:
+ {
+ pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]] +
+ i4_buf_offset;
+ pu1_pred_dst =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
+ i4_buf_offset;
+
+ i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1];
+
+ ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
+ ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
+ ps_cand->i4_pred_data_stride = i4_pred_stride;
+
+ pf_copy_2d(
+ pu1_pred_dst,
+ i4_stride,
+ pu1_pred_src,
+ i4_stride,
+ i4_part_wd_pu2 * u1_num_bytes_per_pel,
+ i4_part_ht_pu2);
+
+ break;
+ }
+ case 3:
+ case 1:
+ {
+ if((u1_merge_pred_buf_id == pau1_final_pred_buf_id[MERGE_DERIVED][0]) &&
+ (u1_merge_pred_buf_id != pau1_final_pred_buf_id[MERGE_DERIVED][1]))
+ {
+ pu1_pred_src =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
+ pu1_pred_dst =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]];
+
+ i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][1];
+
+ ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
+ ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
+ ps_cand->i4_pred_data_stride = i4_pred_stride;
+
+ pf_copy_2d(
+ pu1_pred_dst,
+ i4_stride,
+ pu1_pred_src,
+ i4_stride,
+ i4_part_wd * u1_num_bytes_per_pel,
+ i4_part_ht);
+ }
+ else
+ {
+ UWORD8 i;
+
+ for(i = 0; i < 3; i++)
+ {
+ if((pu1_merge_pred_buf_idx_array[i] !=
+ pau1_final_pred_buf_id[MERGE_DERIVED][1]) &&
+ (pu1_merge_pred_buf_idx_array[i] !=
+ pau1_final_pred_buf_id[MERGE_DERIVED][0]))
+ {
+ pu1_pred_dst =
+ (UWORD8 *)ppv_pred_buf_list[pu1_merge_pred_buf_idx_array[i]] +
+ i4_buf_offset;
+
+ i1_retval = pu1_merge_pred_buf_idx_array[i];
+
+ break;
+ }
+ }
+
+ pu1_pred_src =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][1]] +
+ i4_buf_offset;
+
+ pf_copy_2d(
+ pu1_pred_dst,
+ i4_stride,
+ pu1_pred_src,
+ i4_stride,
+ i4_part_wd_pu2 * u1_num_bytes_per_pel,
+ i4_part_ht_pu2);
+
+ /* Copy PU1 */
+ pu1_pred_src =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
+ pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[i1_retval];
+
+ pf_copy_2d(
+ pu1_pred_dst,
+ i4_stride,
+ pu1_pred_src,
+ i4_stride,
+ i4_part_wd * u1_num_bytes_per_pel,
+ i4_part_ht);
+
+ ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
+ ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
+ ps_cand->i4_pred_data_stride = i4_pred_stride;
+
+ break;
+ }
+ }
+ }
+ }
+
+ return i1_retval;
+}
+
+static WORD8 ihevce_mixed_mode_cand_type0_pred_buffer_preparation(
+ void **ppv_pred_buf_list,
+ cu_inter_cand_t *ps_cand,
+ UWORD8 (*pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],
+ UWORD8 *pu1_merge_pred_buf_idx_array,
+ UWORD8 u1_me_pred_buf_id,
+ UWORD8 u1_merge_pred_buf_id,
+ UWORD8 u1_mixed_tyep1_pred_buf_id,
+ WORD32 i4_pred_stride,
+ UWORD8 u1_cu_size,
+ UWORD8 u1_part_type,
+ UWORD8 u1_num_bytes_per_pel,
+ FT_COPY_2D *pf_copy_2d)
+{
+ WORD32 i4_part_wd;
+ WORD32 i4_part_ht;
+ WORD32 i4_part_wd_pu2;
+ WORD32 i4_part_ht_pu2;
+ WORD32 i4_buf_offset;
+ UWORD8 *pu1_pred_src;
+ UWORD8 *pu1_pred_dst = NULL;
+ WORD8 i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
+
+ WORD32 i4_stride = i4_pred_stride * u1_num_bytes_per_pel;
+
+ ASSERT(0 != u1_part_type);
+
+ i4_part_wd = (ps_cand->as_inter_pu[0].b4_wd + 1) << 2;
+ i4_part_ht = (ps_cand->as_inter_pu[0].b4_ht + 1) << 2;
+ i4_part_wd_pu2 = (ps_cand->as_inter_pu[1].b4_wd + 1) << 2;
+ i4_part_ht_pu2 = (ps_cand->as_inter_pu[1].b4_ht + 1) << 2;
+
+ i4_buf_offset = (i4_part_ht < u1_cu_size) * i4_part_ht * i4_pred_stride +
+ (i4_part_wd < u1_cu_size) * i4_part_wd;
+
+ i4_buf_offset *= u1_num_bytes_per_pel;
+
+ if(pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0] == pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0])
+ {
+ ps_cand->pu1_pred_data =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
+ ps_cand->pu2_pred_data =
+ (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
+ ps_cand->i4_pred_data_stride = i4_pred_stride;
+
+ i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
+ }
+ else
+ {
+ UWORD8 u1_bitfield =
+ ((u1_merge_pred_buf_id == UCHAR_MAX) << 2) | ((u1_me_pred_buf_id == UCHAR_MAX) << 1) |
+ (u1_mixed_tyep1_pred_buf_id != pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]);
+
+ switch(u1_bitfield)
+ {
+ case 7:
+ {
+ switch((PART_TYPE_T)u1_part_type)
+ {
+ case PRT_2NxN:
+ case PRT_Nx2N:
+ case PRT_2NxnU:
+ case PRT_nLx2N:
+ {
+ pu1_pred_src =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0]];
+ pu1_pred_dst =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]];
+
+ i1_retval = pau1_final_pred_buf_id[MIXED_MODE_TYPE0][1];
+
+ break;
+ }
+ case PRT_nRx2N:
+ case PRT_2NxnD:
+ {
+ pu1_pred_src =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
+ i4_buf_offset;
+ pu1_pred_dst =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0]] +
+ i4_buf_offset;
+
+ i4_part_wd = i4_part_wd_pu2;
+ i4_part_ht = i4_part_ht_pu2;
+
+ i1_retval = pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0];
+
+ break;
+ }
+ }
+
+ ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
+ ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
+ ps_cand->i4_pred_data_stride = i4_pred_stride;
+
+ pf_copy_2d(
+ pu1_pred_dst,
+ i4_stride,
+ pu1_pred_src,
+ i4_stride,
+ i4_part_wd * u1_num_bytes_per_pel,
+ i4_part_ht);
+
+ break;
+ }
+ case 6:
+ case 5:
+ case 4:
+ {
+ pu1_pred_src =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
+ i4_buf_offset;
+ pu1_pred_dst =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0]] +
+ i4_buf_offset;
+
+ i1_retval = pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0];
+
+ ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
+ ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
+ ps_cand->i4_pred_data_stride = i4_pred_stride;
+
+ pf_copy_2d(
+ pu1_pred_dst,
+ i4_stride,
+ pu1_pred_src,
+ i4_stride,
+ i4_part_wd_pu2 * u1_num_bytes_per_pel,
+ i4_part_ht_pu2);
+ break;
+ }
+ case 3:
+ {
+ pu1_pred_src = (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MIXED_MODE_TYPE0][0]];
+ pu1_pred_dst =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]];
+
+ i1_retval = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1];
+
+ ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
+ ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
+ ps_cand->i4_pred_data_stride = i4_pred_stride;
+
+ pf_copy_2d(
+ pu1_pred_dst,
+ i4_stride,
+ pu1_pred_src,
+ i4_stride,
+ i4_part_wd * u1_num_bytes_per_pel,
+ i4_part_ht);
+
+ break;
+ }
+ case 2:
+ case 1:
+ case 0:
+ {
+ if((u1_merge_pred_buf_id == pau1_final_pred_buf_id[MERGE_DERIVED][1]) &&
+ (u1_merge_pred_buf_id != pau1_final_pred_buf_id[MERGE_DERIVED][0]))
+ {
+ pu1_pred_src =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
+ i4_buf_offset;
+ pu1_pred_dst =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]] +
+ i4_buf_offset;
+
+ i1_retval = pau1_final_pred_buf_id[MERGE_DERIVED][0];
+
+ ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
+ ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
+ ps_cand->i4_pred_data_stride = i4_pred_stride;
+
+ pf_copy_2d(
+ pu1_pred_dst,
+ i4_stride,
+ pu1_pred_src,
+ i4_stride,
+ i4_part_wd_pu2 * u1_num_bytes_per_pel,
+ i4_part_ht_pu2);
+ }
+ else
+ {
+ UWORD8 i;
+
+ for(i = 0; i < 3; i++)
+ {
+ if((pu1_merge_pred_buf_idx_array[i] != u1_merge_pred_buf_id) &&
+ (pu1_merge_pred_buf_idx_array[i] != u1_mixed_tyep1_pred_buf_id))
+ {
+ pu1_pred_dst =
+ (UWORD8 *)ppv_pred_buf_list[pu1_merge_pred_buf_idx_array[i]] +
+ i4_buf_offset;
+
+ i1_retval = pu1_merge_pred_buf_idx_array[i];
+
+ break;
+ }
+ }
+
+ pu1_pred_src =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][1]] +
+ i4_buf_offset;
+
+ pf_copy_2d(
+ pu1_pred_dst,
+ i4_stride,
+ pu1_pred_src,
+ i4_stride,
+ i4_part_wd_pu2 * u1_num_bytes_per_pel,
+ i4_part_ht_pu2);
+
+ /* Copy PU1 */
+ pu1_pred_src =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[MERGE_DERIVED][0]];
+ pu1_pred_dst = (UWORD8 *)ppv_pred_buf_list[i1_retval];
+
+ pf_copy_2d(
+ pu1_pred_dst,
+ i4_stride,
+ pu1_pred_src,
+ i4_stride,
+ i4_part_wd * u1_num_bytes_per_pel,
+ i4_part_ht);
+
+ ps_cand->pu1_pred_data = (UWORD8 *)ppv_pred_buf_list[i1_retval];
+ ps_cand->pu2_pred_data = (UWORD16 *)ppv_pred_buf_list[i1_retval];
+ ps_cand->i4_pred_data_stride = i4_pred_stride;
+
+ break;
+ }
+ }
+ }
+ }
+
+ return i1_retval;
+}
+
+static UWORD8 ihevce_find_idx_of_worst_cost(UWORD32 *pu4_cost_array, UWORD8 u1_array_size)
+{
+ WORD32 i;
+
+ UWORD8 u1_worst_cost_idx = 0;
+
+ for(i = 1; i < u1_array_size; i++)
+ {
+ if(pu4_cost_array[i] > pu4_cost_array[u1_worst_cost_idx])
+ {
+ u1_worst_cost_idx = i;
+ }
+ }
+
+ return u1_worst_cost_idx;
+}
+
+static void ihevce_free_unused_buf_indices(
+ UWORD32 *pu4_pred_buf_usage_indicator,
+ UWORD8 *pu1_merge_pred_buf_idx_array,
+ UWORD8 *pu1_buf_id_in_use,
+ UWORD8 *pu1_buf_id_to_free,
+ UWORD8 u1_me_buf_id,
+ UWORD8 u1_num_available_cands,
+ UWORD8 u1_num_bufs_to_free,
+ UWORD8 u1_eval_merge,
+ UWORD8 u1_eval_skip,
+ UWORD8 u1_part_type)
+{
+ UWORD8 i;
+
+ if(u1_eval_skip)
+ {
+ if(pu1_buf_id_in_use[ME_OR_SKIP_DERIVED] == pu1_merge_pred_buf_idx_array[0])
+ {
+ ihevce_set_pred_buf_as_free(
+ pu4_pred_buf_usage_indicator, pu1_merge_pred_buf_idx_array[1]);
+ }
+ else if(pu1_buf_id_in_use[ME_OR_SKIP_DERIVED] == pu1_merge_pred_buf_idx_array[1])
+ {
+ ihevce_set_pred_buf_as_free(
+ pu4_pred_buf_usage_indicator, pu1_merge_pred_buf_idx_array[0]);
+ }
+ else
+ {
+ ihevce_set_pred_buf_as_free(
+ pu4_pred_buf_usage_indicator, pu1_merge_pred_buf_idx_array[0]);
+
+ ihevce_set_pred_buf_as_free(
+ pu4_pred_buf_usage_indicator, pu1_merge_pred_buf_idx_array[1]);
+ }
+
+ for(i = 0; i < u1_num_bufs_to_free; i++)
+ {
+ if(pu1_buf_id_to_free[i] != u1_me_buf_id)
+ {
+ ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, pu1_buf_id_to_free[i]);
+ }
+ }
+ }
+ else if((!u1_eval_merge) && (!u1_eval_skip) && (pu1_buf_id_in_use[ME_OR_SKIP_DERIVED] == UCHAR_MAX))
+ {
+ ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, u1_me_buf_id);
+
+ for(i = 0; i < u1_num_bufs_to_free; i++)
+ {
+ if(pu1_buf_id_to_free[i] != u1_me_buf_id)
+ {
+ ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, pu1_buf_id_to_free[i]);
+ }
+ }
+ }
+ else if((!u1_eval_merge) && (!u1_eval_skip) && (pu1_buf_id_in_use[ME_OR_SKIP_DERIVED] != UCHAR_MAX))
+ {
+ for(i = 0; i < u1_num_bufs_to_free; i++)
+ {
+ if(pu1_buf_id_to_free[i] != u1_me_buf_id)
+ {
+ ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, pu1_buf_id_to_free[i]);
+ }
+ }
+ }
+ else if((u1_eval_merge) && (0 == u1_part_type))
+ {
+ /* ME pred buf */
+ COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
+ u1_me_buf_id,
+ pu1_buf_id_in_use,
+ pu1_buf_id_to_free,
+ 4,
+ u1_num_bufs_to_free,
+ pu4_pred_buf_usage_indicator);
+
+ /* Merge pred buf 0 */
+ COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
+ pu1_merge_pred_buf_idx_array[0],
+ pu1_buf_id_in_use,
+ pu1_buf_id_to_free,
+ 4,
+ u1_num_bufs_to_free,
+ pu4_pred_buf_usage_indicator);
+
+ /* Merge pred buf 1 */
+ COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
+ pu1_merge_pred_buf_idx_array[1],
+ pu1_buf_id_in_use,
+ pu1_buf_id_to_free,
+ 4,
+ u1_num_bufs_to_free,
+ pu4_pred_buf_usage_indicator);
+
+ for(i = 0; i < u1_num_bufs_to_free; i++)
+ {
+ if((pu1_buf_id_to_free[i] != u1_me_buf_id) &&
+ (pu1_merge_pred_buf_idx_array[0] != pu1_buf_id_to_free[i]) &&
+ (pu1_merge_pred_buf_idx_array[1] != pu1_buf_id_to_free[i]))
+ {
+ ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, pu1_buf_id_to_free[i]);
+ }
+ }
+ }
+ else if((u1_eval_merge) || (u1_eval_skip))
+ {
+ /* ME pred buf */
+ COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
+ u1_me_buf_id,
+ pu1_buf_id_in_use,
+ pu1_buf_id_to_free,
+ 4,
+ u1_num_bufs_to_free,
+ pu4_pred_buf_usage_indicator);
+
+ /* Merge pred buf 0 */
+ COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
+ pu1_merge_pred_buf_idx_array[0],
+ pu1_buf_id_in_use,
+ pu1_buf_id_to_free,
+ 4,
+ u1_num_bufs_to_free,
+ pu4_pred_buf_usage_indicator);
+
+ /* Merge pred buf 1 */
+ COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
+ pu1_merge_pred_buf_idx_array[1],
+ pu1_buf_id_in_use,
+ pu1_buf_id_to_free,
+ 4,
+ u1_num_bufs_to_free,
+ pu4_pred_buf_usage_indicator);
+
+ /* Merge pred buf 2 */
+ COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO(
+ pu1_merge_pred_buf_idx_array[2],
+ pu1_buf_id_in_use,
+ pu1_buf_id_to_free,
+ 4,
+ u1_num_bufs_to_free,
+ pu4_pred_buf_usage_indicator);
+
+ for(i = 0; i < u1_num_bufs_to_free; i++)
+ {
+ if((pu1_buf_id_to_free[i] != u1_me_buf_id) &&
+ (pu1_merge_pred_buf_idx_array[0] != pu1_buf_id_to_free[i]) &&
+ (pu1_merge_pred_buf_idx_array[1] != pu1_buf_id_to_free[i]))
+ {
+ ihevce_set_pred_buf_as_free(pu4_pred_buf_usage_indicator, pu1_buf_id_to_free[i]);
+ }
+ }
+ }
+}
+
+static UWORD8 ihevce_check_if_buf_can_be_freed(
+ UWORD8 *pu1_pred_id_of_winners,
+ UWORD8 u1_idx_of_worst_cost_in_pred_buf_array,
+ UWORD8 u1_num_cands_previously_added)
+{
+ UWORD8 i;
+
+ UWORD8 u1_num_trysts = 0;
+
+ for(i = 0; i < u1_num_cands_previously_added; i++)
+ {
+ if(u1_idx_of_worst_cost_in_pred_buf_array == pu1_pred_id_of_winners[i])
+ {
+ u1_num_trysts++;
+
+ if(u1_num_trysts > 1)
+ {
+ return 0;
+ }
+ }
+ }
+
+ ASSERT(u1_num_trysts > 0);
+
+ return 1;
+}
+
+static void ihevce_get_worst_costs_and_indices(
+ UWORD32 *pu4_cost_src,
+ UWORD32 *pu4_cost_dst,
+ UWORD8 *pu1_worst_dst_cand_idx,
+ UWORD8 u1_src_array_length,
+ UWORD8 u1_num_cands_to_pick,
+ UWORD8 u1_worst_cost_idx_in_dst_array)
+{
+ WORD32 i;
+
+ pu4_cost_dst[0] = pu4_cost_src[u1_worst_cost_idx_in_dst_array];
+ pu4_cost_src[u1_worst_cost_idx_in_dst_array] = 0;
+ pu1_worst_dst_cand_idx[0] = u1_worst_cost_idx_in_dst_array;
+
+ for(i = 1; i < u1_num_cands_to_pick; i++)
+ {
+ pu1_worst_dst_cand_idx[i] =
+ ihevce_find_idx_of_worst_cost(pu4_cost_src, u1_src_array_length);
+
+ pu4_cost_dst[i] = pu4_cost_src[pu1_worst_dst_cand_idx[i]];
+ pu4_cost_src[pu1_worst_dst_cand_idx[i]] = 0;
+ }
+
+ for(i = 0; i < u1_num_cands_to_pick; i++)
+ {
+ pu4_cost_src[pu1_worst_dst_cand_idx[i]] = pu4_cost_dst[i];
+ }
+}
+
+static UWORD8 ihevce_select_cands_to_replace_previous_worst(
+ UWORD32 *pu4_cost_src,
+ UWORD32 *pu4_cost_dst,
+ INTER_CANDIDATE_ID_T *pe_cand_id,
+ UWORD8 *pu1_cand_idx_in_dst_array,
+ UWORD8 *pu1_buf_id_to_free,
+ UWORD8 *pu1_pred_id_of_winners,
+ UWORD8 *pu1_num_bufs_to_free,
+ WORD32 i4_max_num_inter_rdopt_cands,
+ UWORD8 u1_num_cands_previously_added,
+ UWORD8 u1_num_available_cands,
+ UWORD8 u1_worst_cost_idx_in_dst_array)
+{
+ WORD32 i, j, k;
+ UWORD32 au4_worst_dst_costs[4];
+ UWORD8 au1_worst_dst_cand_idx[4];
+
+ INTER_CANDIDATE_ID_T ae_default_cand_id[4] = {
+ ME_OR_SKIP_DERIVED, MERGE_DERIVED, MIXED_MODE_TYPE1, MIXED_MODE_TYPE0
+ };
+
+ UWORD8 u1_num_cands_to_add_wo_comparisons =
+ i4_max_num_inter_rdopt_cands - u1_num_cands_previously_added;
+ UWORD8 u1_num_cands_to_add_after_comparisons =
+ u1_num_available_cands - u1_num_cands_to_add_wo_comparisons;
+ UWORD8 u1_num_cands_to_add = 0;
+ UWORD8 au1_valid_src_cands[4] = { 0, 0, 0, 0 };
+
+ ASSERT(u1_num_cands_to_add_after_comparisons >= 0);
+
+ /* Sorting src costs */
+ SORT_PRIMARY_INTTYPE_ARRAY_AND_REORDER_GENERIC_COMPANION_ARRAY(
+ pu4_cost_src, pe_cand_id, u1_num_available_cands, INTER_CANDIDATE_ID_T);
+
+ for(i = 0; i < u1_num_cands_to_add_wo_comparisons; i++)
+ {
+ pu1_cand_idx_in_dst_array[u1_num_cands_to_add++] = u1_num_cands_previously_added + i;
+ au1_valid_src_cands[pe_cand_id[i]] = 1;
+ }
+
+ if(u1_num_cands_previously_added)
+ {
+ WORD8 i1_last_index = 0;
+
+ ihevce_get_worst_costs_and_indices(
+ pu4_cost_dst,
+ au4_worst_dst_costs,
+ au1_worst_dst_cand_idx,
+ u1_num_cands_previously_added,
+ u1_num_cands_to_add_after_comparisons,
+ u1_worst_cost_idx_in_dst_array);
+
+ for(i = u1_num_available_cands - 1; i >= u1_num_cands_to_add_wo_comparisons; i--)
+ {
+ for(j = u1_num_cands_to_add_after_comparisons - 1; j >= i1_last_index; j--)
+ {
+ if((pu4_cost_src[i] < au4_worst_dst_costs[j]))
+ {
+ if((i - u1_num_cands_to_add_wo_comparisons) <= j)
+ {
+ for(k = 0; k <= (i - u1_num_cands_to_add_wo_comparisons); k++)
+ {
+ pu1_cand_idx_in_dst_array[u1_num_cands_to_add++] =
+ au1_worst_dst_cand_idx[k];
+ au1_valid_src_cands[pe_cand_id[u1_num_cands_to_add_wo_comparisons + k]] =
+ 1;
+
+ if(1 == ihevce_check_if_buf_can_be_freed(
+ pu1_pred_id_of_winners,
+ pu1_pred_id_of_winners[au1_worst_dst_cand_idx[k]],
+ u1_num_cands_previously_added))
+ {
+ pu1_buf_id_to_free[(*pu1_num_bufs_to_free)++] =
+ pu1_pred_id_of_winners[au1_worst_dst_cand_idx[k]];
+ }
+ else
+ {
+ pu1_pred_id_of_winners[au1_worst_dst_cand_idx[k]] = UCHAR_MAX;
+ }
+ }
+
+ i1_last_index = -1;
+ }
+ else
+ {
+ i1_last_index = j;
+ }
+
+ break;
+ }
+ }
+
+ if(-1 == i1_last_index)
+ {
+ break;
+ }
+ }
+ }
+
+ for(i = 0, j = 0; i < u1_num_available_cands; i++)
+ {
+ if(au1_valid_src_cands[ae_default_cand_id[i]])
+ {
+ pe_cand_id[j++] = ae_default_cand_id[i];
+ }
+ }
+
+ return u1_num_cands_to_add;
+}
+
+static UWORD8 ihevce_merge_cands_with_existing_best(
+ inter_cu_mode_info_t *ps_mode_info,
+ cu_inter_cand_t **pps_cand_src,
+ pu_mv_t (*pas_mvp_winner)[NUM_INTER_PU_PARTS],
+ UWORD32 (*pau4_cost)[MAX_NUM_INTER_PARTS],
+ void **ppv_pred_buf_list,
+ UWORD8 (*pau1_final_pred_buf_id)[MAX_NUM_INTER_PARTS],
+ UWORD32 *pu4_pred_buf_usage_indicator,
+ UWORD8 *pu1_num_merge_cands,
+ UWORD8 *pu1_num_skip_cands,
+ UWORD8 *pu1_num_mixed_mode_type0_cands,
+ UWORD8 *pu1_num_mixed_mode_type1_cands,
+ UWORD8 *pu1_merge_pred_buf_idx_array,
+
+ FT_COPY_2D *pf_copy_2d,
+
+ WORD32 i4_pred_stride,
+ WORD32 i4_max_num_inter_rdopt_cands,
+ UWORD8 u1_cu_size,
+ UWORD8 u1_part_type,
+ UWORD8 u1_eval_merge,
+ UWORD8 u1_eval_skip,
+ UWORD8 u1_num_bytes_per_pel)
+{
+ UWORD32 au4_cost_src[4];
+ WORD32 i;
+ WORD32 u1_num_available_cands;
+ UWORD8 au1_buf_id_in_use[4];
+ UWORD8 au1_buf_id_to_free[4];
+ UWORD8 au1_cand_idx_in_dst_array[4];
+
+ INTER_CANDIDATE_ID_T ae_cand_id[4] = {
+ ME_OR_SKIP_DERIVED, MERGE_DERIVED, MIXED_MODE_TYPE1, MIXED_MODE_TYPE0
+ };
+
+ cu_inter_cand_t **pps_cand_dst = ps_mode_info->aps_cu_data;
+
+ UWORD8 u1_num_cands_previously_added = ps_mode_info->u1_num_inter_cands;
+ UWORD8 u1_worst_cost_idx = ps_mode_info->u1_idx_of_worst_cost_in_cost_array;
+ UWORD8 u1_idx_of_worst_cost_in_pred_buf_array =
+ ps_mode_info->u1_idx_of_worst_cost_in_pred_buf_array;
+ UWORD32 *pu4_cost_dst = ps_mode_info->au4_cost;
+ UWORD8 *pu1_pred_id_of_winners = ps_mode_info->au1_pred_buf_idx;
+ UWORD8 u1_num_bufs_to_free = 0;
+ UWORD8 u1_skip_or_merge_cand_is_valid = 0;
+ UWORD8 u1_num_invalid_cands = 0;
+
+ memset(au1_buf_id_in_use, UCHAR_MAX, sizeof(au1_buf_id_in_use));
+
+ u1_num_available_cands = (u1_eval_merge) ? 2 + ((u1_part_type != 0) + 1) : 1;
+
+ for(i = 0; i < u1_num_available_cands; i++)
+ {
+ WORD32 i4_idx = i - u1_num_invalid_cands;
+
+ if(u1_part_type == 0)
+ {
+ au4_cost_src[i4_idx] = pau4_cost[ae_cand_id[i4_idx]][0];
+ }
+ else
+ {
+ au4_cost_src[i4_idx] =
+ pau4_cost[ae_cand_id[i4_idx]][0] + pau4_cost[ae_cand_id[i4_idx]][1];
+ }
+
+ if(au4_cost_src[i4_idx] >= INT_MAX)
+ {
+ memmove(
+ &ae_cand_id[i4_idx],
+ &ae_cand_id[i4_idx + 1],
+ sizeof(INTER_CANDIDATE_ID_T) * (u1_num_available_cands - i - 1));
+
+ u1_num_invalid_cands++;
+ }
+ }
+
+ u1_num_available_cands -= u1_num_invalid_cands;
+
+ if((u1_num_cands_previously_added + u1_num_available_cands) > i4_max_num_inter_rdopt_cands)
+ {
+ u1_num_available_cands = ihevce_select_cands_to_replace_previous_worst(
+ au4_cost_src,
+ pu4_cost_dst,
+ ae_cand_id,
+ au1_cand_idx_in_dst_array,
+ au1_buf_id_to_free,
+ pu1_pred_id_of_winners,
+ &u1_num_bufs_to_free,
+ i4_max_num_inter_rdopt_cands,
+ u1_num_cands_previously_added,
+ u1_num_available_cands,
+ u1_worst_cost_idx);
+ }
+ else
+ {
+ for(i = 0; i < u1_num_available_cands; i++)
+ {
+ au1_cand_idx_in_dst_array[i] = u1_num_cands_previously_added + i;
+ }
+ }
+
+ for(i = 0; i < u1_num_available_cands; i++)
+ {
+ UWORD8 u1_dst_array_idx = au1_cand_idx_in_dst_array[i];
+
+ if(u1_part_type == 0)
+ {
+ au4_cost_src[i] = pau4_cost[ae_cand_id[i]][0];
+ }
+ else
+ {
+ au4_cost_src[i] = pau4_cost[ae_cand_id[i]][0] + pau4_cost[ae_cand_id[i]][1];
+ }
+
+ pps_cand_dst[u1_dst_array_idx] = pps_cand_src[ae_cand_id[i]];
+
+ /* Adding a skip candidate identical to the merge winner */
+ if((u1_eval_merge) && (0 == u1_part_type) && (MIXED_MODE_TYPE1 == ae_cand_id[i]))
+ {
+ (*pu1_num_skip_cands)++;
+
+ pu4_cost_dst[u1_dst_array_idx] = au4_cost_src[i];
+
+ if(u1_num_cands_previously_added >= i4_max_num_inter_rdopt_cands)
+ {
+ u1_worst_cost_idx =
+ ihevce_find_idx_of_worst_cost(pu4_cost_dst, u1_num_cands_previously_added);
+
+ u1_idx_of_worst_cost_in_pred_buf_array = pu1_pred_id_of_winners[u1_worst_cost_idx];
+ }
+ else
+ {
+ u1_num_cands_previously_added++;
+ }
+
+ if(u1_skip_or_merge_cand_is_valid)
+ {
+ pps_cand_dst[u1_dst_array_idx]->pu1_pred_data =
+ (UWORD8 *)ppv_pred_buf_list[au1_buf_id_in_use[MERGE_DERIVED]];
+ pps_cand_dst[u1_dst_array_idx]->pu2_pred_data =
+ (UWORD16 *)ppv_pred_buf_list[au1_buf_id_in_use[MERGE_DERIVED]];
+ pps_cand_dst[u1_dst_array_idx]->i4_pred_data_stride = i4_pred_stride;
+
+ au1_buf_id_in_use[MIXED_MODE_TYPE1] = au1_buf_id_in_use[MERGE_DERIVED];
+ pu1_pred_id_of_winners[u1_dst_array_idx] = au1_buf_id_in_use[MERGE_DERIVED];
+ }
+ else
+ {
+ u1_skip_or_merge_cand_is_valid = 1;
+
+ au1_buf_id_in_use[MIXED_MODE_TYPE1] = ihevce_merge_cand_pred_buffer_preparation(
+ ppv_pred_buf_list,
+ pps_cand_dst[u1_dst_array_idx],
+ pau1_final_pred_buf_id,
+ i4_pred_stride,
+ u1_cu_size,
+ u1_part_type,
+ u1_num_bytes_per_pel,
+ pf_copy_2d);
+
+ pu1_pred_id_of_winners[u1_dst_array_idx] = au1_buf_id_in_use[MIXED_MODE_TYPE1];
+ }
+
+ continue;
+ }
+
+ if(u1_num_cands_previously_added < i4_max_num_inter_rdopt_cands)
+ {
+ if(u1_num_cands_previously_added)
+ {
+ if(au4_cost_src[i] > pu4_cost_dst[u1_worst_cost_idx])
+ {
+ u1_worst_cost_idx = u1_num_cands_previously_added;
+ }
+ }
+
+ pu4_cost_dst[u1_dst_array_idx] = au4_cost_src[i];
+
+ u1_num_cands_previously_added++;
+ }
+ else
+ {
+ pu4_cost_dst[u1_dst_array_idx] = au4_cost_src[i];
+
+ u1_worst_cost_idx = ihevce_find_idx_of_worst_cost(
+ ps_mode_info->au4_cost, u1_num_cands_previously_added);
+
+ u1_idx_of_worst_cost_in_pred_buf_array = pu1_pred_id_of_winners[u1_worst_cost_idx];
+ }
+
+ switch(ae_cand_id[i])
+ {
+ case ME_OR_SKIP_DERIVED:
+ {
+ (*pu1_num_skip_cands) += u1_eval_skip;
+
+ pps_cand_dst[u1_dst_array_idx]->pu1_pred_data =
+ (UWORD8 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
+ pps_cand_dst[u1_dst_array_idx]->pu2_pred_data =
+ (UWORD16 *)ppv_pred_buf_list[pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0]];
+ pps_cand_dst[u1_dst_array_idx]->i4_pred_data_stride = i4_pred_stride;
+
+ if(u1_worst_cost_idx == u1_dst_array_idx)
+ {
+ u1_idx_of_worst_cost_in_pred_buf_array =
+ pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
+ }
+
+ u1_skip_or_merge_cand_is_valid = u1_eval_skip;
+
+ au1_buf_id_in_use[ME_OR_SKIP_DERIVED] = pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
+ pu1_pred_id_of_winners[u1_dst_array_idx] =
+ pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0];
+
+ break;
+ }
+ case MERGE_DERIVED:
+ {
+ (*pu1_num_merge_cands)++;
+
+ au1_buf_id_in_use[MERGE_DERIVED] = ihevce_merge_cand_pred_buffer_preparation(
+ ppv_pred_buf_list,
+ pps_cand_dst[u1_dst_array_idx],
+ pau1_final_pred_buf_id,
+ i4_pred_stride,
+ u1_cu_size,
+ u1_part_type,
+ u1_num_bytes_per_pel,
+ pf_copy_2d
+
+ );
+
+ pu1_pred_id_of_winners[u1_dst_array_idx] = au1_buf_id_in_use[MERGE_DERIVED];
+
+ if(u1_worst_cost_idx == u1_dst_array_idx)
+ {
+ u1_idx_of_worst_cost_in_pred_buf_array = au1_buf_id_in_use[MERGE_DERIVED];
+ }
+
+ u1_skip_or_merge_cand_is_valid = 1;
+
+ break;
+ }
+ case MIXED_MODE_TYPE1:
+ {
+ (*pu1_num_mixed_mode_type1_cands)++;
+
+ au1_buf_id_in_use[MIXED_MODE_TYPE1] =
+ ihevce_mixed_mode_cand_type1_pred_buffer_preparation(
+ ppv_pred_buf_list,
+ pps_cand_dst[u1_dst_array_idx],
+ pau1_final_pred_buf_id,
+ pu1_merge_pred_buf_idx_array,
+ i4_pred_stride,
+ au1_buf_id_in_use[ME_OR_SKIP_DERIVED],
+ au1_buf_id_in_use[MERGE_DERIVED],
+ (u1_num_available_cands - i) > 1,
+ u1_cu_size,
+ u1_part_type,
+ u1_num_bytes_per_pel,
+ pf_copy_2d
+
+ );
+
+ pu1_pred_id_of_winners[u1_dst_array_idx] = au1_buf_id_in_use[MIXED_MODE_TYPE1];
+
+ if(u1_worst_cost_idx == u1_dst_array_idx)
+ {
+ u1_idx_of_worst_cost_in_pred_buf_array = au1_buf_id_in_use[MIXED_MODE_TYPE1];
+ }
+
+ break;
+ }
+ case MIXED_MODE_TYPE0:
+ {
+ (*pu1_num_mixed_mode_type0_cands)++;
+
+ au1_buf_id_in_use[MIXED_MODE_TYPE0] =
+ ihevce_mixed_mode_cand_type0_pred_buffer_preparation(
+ ppv_pred_buf_list,
+ pps_cand_dst[u1_dst_array_idx],
+ pau1_final_pred_buf_id,
+ pu1_merge_pred_buf_idx_array,
+ au1_buf_id_in_use[ME_OR_SKIP_DERIVED],
+ au1_buf_id_in_use[MERGE_DERIVED],
+ au1_buf_id_in_use[MIXED_MODE_TYPE1],
+ i4_pred_stride,
+ u1_cu_size,
+ u1_part_type,
+ u1_num_bytes_per_pel,
+ pf_copy_2d);
+
+ pu1_pred_id_of_winners[u1_dst_array_idx] = au1_buf_id_in_use[MIXED_MODE_TYPE0];
+
+ if(u1_worst_cost_idx == u1_dst_array_idx)
+ {
+ u1_idx_of_worst_cost_in_pred_buf_array = au1_buf_id_in_use[MIXED_MODE_TYPE0];
+ }
+
+ break;
+ }
+ }
+ }
+
+ ihevce_free_unused_buf_indices(
+ pu4_pred_buf_usage_indicator,
+ pu1_merge_pred_buf_idx_array,
+ au1_buf_id_in_use,
+ au1_buf_id_to_free,
+ pau1_final_pred_buf_id[ME_OR_SKIP_DERIVED][0],
+ u1_num_available_cands,
+ u1_num_bufs_to_free,
+ u1_eval_merge,
+ u1_eval_skip,
+ u1_part_type);
+
+ ps_mode_info->u1_idx_of_worst_cost_in_cost_array = u1_worst_cost_idx;
+ ps_mode_info->u1_num_inter_cands = u1_num_cands_previously_added;
+ ps_mode_info->u1_idx_of_worst_cost_in_pred_buf_array = u1_idx_of_worst_cost_in_pred_buf_array;
+
+ return u1_skip_or_merge_cand_is_valid;
+}
+
+static UWORD8 ihevce_prepare_cand_containers(
+ ihevce_inter_cand_sifter_prms_t *ps_ctxt,
+ cu_inter_cand_t **pps_cands,
+ UWORD8 *pu1_merge_pred_buf_idx_array,
+ UWORD8 *pu1_me_pred_buf_idx,
+ UWORD8 u1_part_type,
+ UWORD8 u1_me_cand_list_idx,
+ UWORD8 u1_eval_merge,
+ UWORD8 u1_eval_skip)
+{
+ UWORD8 u1_num_bufs_currently_allocated;
+
+ WORD32 i4_pred_stride = ps_ctxt->ps_pred_buf_data->i4_pred_stride;
+ UWORD8 u1_cu_size = ps_ctxt->u1_cu_size;
+ UWORD8 u1_cu_pos_x = ps_ctxt->u1_cu_pos_x;
+ UWORD8 u1_cu_pos_y = ps_ctxt->u1_cu_pos_y;
+ void **ppv_pred_buf_list = ps_ctxt->ps_pred_buf_data->apv_inter_pred_data;
+
+ if(!u1_eval_merge)
+ {
+ if(u1_eval_skip)
+ {
+ u1_num_bufs_currently_allocated = ihevce_get_free_pred_buf_indices(
+ pu1_merge_pred_buf_idx_array, &ps_ctxt->ps_pred_buf_data->u4_is_buf_in_use, 2);
+
+ if(u1_num_bufs_currently_allocated < 2)
+ {
+ return 0;
+ }
+
+ pps_cands[ME_OR_SKIP_DERIVED] =
+ &ps_ctxt->ps_cu_inter_merge_skip->as_cu_inter_merge_skip_cand
+ [MAX_NUM_CU_MERGE_SKIP_CAND - 1 -
+ ps_ctxt->ps_cu_inter_merge_skip->u1_num_skip_cands];
+
+ pps_cands[ME_OR_SKIP_DERIVED]->b1_skip_flag = 1;
+ pps_cands[ME_OR_SKIP_DERIVED]->b1_eval_mark = 1;
+ pps_cands[ME_OR_SKIP_DERIVED]->b1_eval_tx_cusize = 1;
+ pps_cands[ME_OR_SKIP_DERIVED]->b1_eval_tx_cusize_by2 = 1;
+ pps_cands[ME_OR_SKIP_DERIVED]->b1_intra_has_won = 0;
+ pps_cands[ME_OR_SKIP_DERIVED]->b3_part_size = 0;
+ pps_cands[ME_OR_SKIP_DERIVED]->i4_pred_data_stride = i4_pred_stride;
+ pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b1_intra_flag = 0;
+ pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b1_merge_flag = 1;
+ pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b4_pos_x = u1_cu_pos_x >> 2;
+ pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b4_pos_y = u1_cu_pos_y >> 2;
+ pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b4_wd = (u1_cu_size >> 2) - 1;
+ pps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu->b4_ht = (u1_cu_size >> 2) - 1;
+
+ pps_cands[MERGE_DERIVED] = pps_cands[ME_OR_SKIP_DERIVED];
+ }
+ else
+ {
+ u1_num_bufs_currently_allocated = ihevce_get_free_pred_buf_indices(
+ pu1_me_pred_buf_idx, &ps_ctxt->ps_pred_buf_data->u4_is_buf_in_use, 1);
+
+ if(u1_num_bufs_currently_allocated < 1)
+ {
+ return 0;
+ }
+
+ pps_cands[ME_OR_SKIP_DERIVED] = &ps_ctxt->ps_me_cands[u1_me_cand_list_idx];
+ pps_cands[ME_OR_SKIP_DERIVED]->i4_pred_data_stride = i4_pred_stride;
+ pps_cands[ME_OR_SKIP_DERIVED]->pu1_pred_data =
+ (UWORD8 *)ppv_pred_buf_list[*pu1_me_pred_buf_idx];
+ pps_cands[ME_OR_SKIP_DERIVED]->pu2_pred_data =
+ (UWORD16 *)ppv_pred_buf_list[*pu1_me_pred_buf_idx];
+ }
+ }
+ else
+ {
+ u1_num_bufs_currently_allocated = ihevce_get_free_pred_buf_indices(
+ pu1_me_pred_buf_idx, &ps_ctxt->ps_pred_buf_data->u4_is_buf_in_use, 1);
+
+ if(u1_num_bufs_currently_allocated < 1)
+ {
+ return 0;
+ }
+
+ pps_cands[ME_OR_SKIP_DERIVED] = &ps_ctxt->ps_me_cands[u1_me_cand_list_idx];
+
+ if(u1_part_type > 0)
+ {
+ u1_num_bufs_currently_allocated = ihevce_get_free_pred_buf_indices(
+ pu1_merge_pred_buf_idx_array, &ps_ctxt->ps_pred_buf_data->u4_is_buf_in_use, 3);
+
+ if(u1_num_bufs_currently_allocated < 3)
+ {
+ return 0;
+ }
+
+ pps_cands[MERGE_DERIVED] = &ps_ctxt->ps_cu_inter_merge_skip->as_cu_inter_merge_skip_cand
+ [ps_ctxt->ps_cu_inter_merge_skip->u1_num_merge_cands];
+
+ pps_cands[MIXED_MODE_TYPE0] =
+ &ps_ctxt->ps_mixed_modes_datastore
+ ->as_cu_data[ps_ctxt->ps_mixed_modes_datastore->u1_num_mixed_mode_type0_cands];
+
+ pps_cands[MIXED_MODE_TYPE1] =
+ &ps_ctxt->ps_mixed_modes_datastore->as_cu_data
+ [MAX_NUM_MIXED_MODE_INTER_RDO_CANDS - 1 -
+ ps_ctxt->ps_mixed_modes_datastore->u1_num_mixed_mode_type1_cands];
+
+ *pps_cands[MERGE_DERIVED] = *pps_cands[ME_OR_SKIP_DERIVED];
+ *pps_cands[MIXED_MODE_TYPE0] = *pps_cands[ME_OR_SKIP_DERIVED];
+ *pps_cands[MIXED_MODE_TYPE1] = *pps_cands[ME_OR_SKIP_DERIVED];
+ }
+ else
+ {
+ u1_num_bufs_currently_allocated = ihevce_get_free_pred_buf_indices(
+ pu1_merge_pred_buf_idx_array, &ps_ctxt->ps_pred_buf_data->u4_is_buf_in_use, 2);
+
+ if(u1_num_bufs_currently_allocated < 2)
+ {
+ return 0;
+ }
+
+ pps_cands[MERGE_DERIVED] = &ps_ctxt->ps_cu_inter_merge_skip->as_cu_inter_merge_skip_cand
+ [ps_ctxt->ps_cu_inter_merge_skip->u1_num_merge_cands];
+
+ *pps_cands[MERGE_DERIVED] = *pps_cands[ME_OR_SKIP_DERIVED];
+ }
+
+ pps_cands[MERGE_DERIVED]->as_inter_pu[0].b1_merge_flag = 1;
+ pps_cands[MERGE_DERIVED]->as_inter_pu[1].b1_merge_flag = 1;
+ }
+
+ return u1_num_bufs_currently_allocated;
+}
+
+static __inline void ihevce_merge_prms_init(
+ merge_prms_t *ps_prms,
+ merge_cand_list_t *ps_list,
+ inter_pred_ctxt_t *ps_mc_ctxt,
+ mv_pred_ctxt_t *ps_mv_pred_ctxt,
+ PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu,
+ PF_SAD_FXN_T pf_sad_fxn,
+ void **ppv_pred_buf_list,
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list,
+ UWORD8 *pu1_merge_pred_buf_array,
+ UWORD8 (*pau1_best_pred_buf_id)[MAX_NUM_INTER_PARTS],
+ UWORD8 *pu1_is_top_used,
+ WORD32 (*pai4_noise_term)[MAX_NUM_INTER_PARTS],
+ UWORD32 (*pau4_pred_variance)[MAX_NUM_INTER_PARTS],
+ UWORD32 *pu4_src_variance,
+ WORD32 i4_alpha_stim_multiplier,
+ WORD32 i4_src_stride,
+ WORD32 i4_pred_stride,
+ WORD32 i4_lambda,
+ UWORD8 u1_is_cu_noisy,
+ UWORD8 u1_is_hbd,
+ UWORD8 u1_max_cands,
+ UWORD8 u1_merge_idx_cabac_model,
+ UWORD8 u1_use_merge_cand_from_top_row)
+{
+ ps_prms->ps_list = ps_list;
+ ps_prms->ps_mc_ctxt = ps_mc_ctxt;
+ ps_prms->ps_mv_pred_ctxt = ps_mv_pred_ctxt;
+ ps_prms->pf_luma_inter_pred_pu = pf_luma_inter_pred_pu;
+ ps_prms->pf_sad_fxn = pf_sad_fxn;
+ ps_prms->ppv_pred_buf_list = ppv_pred_buf_list;
+ ps_prms->ps_cmn_utils_optimised_function_list = ps_cmn_utils_optimised_function_list;
+
+ ps_prms->pu1_merge_pred_buf_array = pu1_merge_pred_buf_array;
+ ps_prms->pau1_best_pred_buf_id = pau1_best_pred_buf_id;
+ ps_prms->pu1_is_top_used = pu1_is_top_used;
+ ps_prms->pai4_noise_term = pai4_noise_term;
+ ps_prms->pau4_pred_variance = pau4_pred_variance;
+ ps_prms->pu4_src_variance = pu4_src_variance;
+ ps_prms->i4_alpha_stim_multiplier = i4_alpha_stim_multiplier;
+ ps_prms->i4_src_stride = i4_src_stride;
+ ps_prms->i4_pred_stride = i4_pred_stride;
+ ps_prms->i4_lambda = i4_lambda;
+ ps_prms->u1_is_cu_noisy = u1_is_cu_noisy;
+ ps_prms->u1_is_hbd = u1_is_hbd;
+ ps_prms->u1_max_cands = u1_max_cands;
+ ps_prms->u1_merge_idx_cabac_model = u1_merge_idx_cabac_model;
+ ps_prms->u1_use_merge_cand_from_top_row = u1_use_merge_cand_from_top_row;
+}
+
+static UWORD8 ihevce_merge_candidate_seive(
+ nbr_avail_flags_t *ps_nbr,
+ merge_cand_list_t *ps_merge_cand,
+ UWORD8 *pu1_is_top_used,
+ UWORD8 u1_num_merge_cands,
+ UWORD8 u1_use_merge_cand_from_top_row)
+{
+ if(!u1_use_merge_cand_from_top_row)
+ {
+ if(ps_nbr->u1_bot_lt_avail || ps_nbr->u1_left_avail)
+ {
+ return !pu1_is_top_used[0];
+ }
+ else
+ {
+ return 0;
+ }
+ }
+ else
+ {
+ return u1_num_merge_cands;
+ }
+}
+
+static UWORD8 ihevce_compute_pred_and_populate_modes(
+ ihevce_inter_cand_sifter_prms_t *ps_ctxt,
+ PF_SAD_FXN_T pf_sad_func,
+ UWORD32 *pu4_src_variance,
+ UWORD8 u1_part_type,
+ UWORD8 u1_me_cand_list_idx,
+ UWORD8 u1_eval_merge,
+ UWORD8 u1_eval_skip)
+{
+ cu_inter_cand_t *aps_cands[4];
+ pu_mv_t as_mvp_winner[4][NUM_INTER_PU_PARTS];
+ merge_prms_t s_merge_prms;
+ merge_cand_list_t as_merge_cand[MAX_NUM_MERGE_CAND];
+
+ UWORD8 i, j;
+ UWORD32 au4_cost[4][NUM_INTER_PU_PARTS];
+ UWORD8 au1_final_pred_buf_id[4][NUM_INTER_PU_PARTS];
+ UWORD8 au1_merge_pred_buf_idx_array[3];
+ UWORD8 au1_is_top_used[MAX_NUM_MERGE_CAND];
+ UWORD8 u1_me_pred_buf_idx;
+ UWORD8 u1_num_bufs_currently_allocated;
+ WORD32 i4_mean;
+ UWORD32 au4_pred_variance[4][NUM_INTER_PU_PARTS];
+ WORD32 ai4_noise_term[4][NUM_INTER_PU_PARTS];
+
+ UWORD8 u1_cu_pos_x = ps_ctxt->u1_cu_pos_x;
+ UWORD8 u1_cu_pos_y = ps_ctxt->u1_cu_pos_y;
+
+ inter_cu_mode_info_t *ps_cu_mode_info = ps_ctxt->ps_inter_cu_mode_info;
+ inter_pred_ctxt_t *ps_mc_ctxt = ps_ctxt->ps_mc_ctxt;
+ nbr_4x4_t *ps_cu_nbr_buf = ps_ctxt->aps_cu_nbr_buf[0];
+ nbr_4x4_t *ps_pu_left_nbr = ps_ctxt->ps_left_nbr_4x4;
+ nbr_4x4_t *ps_pu_top_nbr = ps_ctxt->ps_top_nbr_4x4;
+ nbr_4x4_t *ps_pu_topleft_nbr = ps_ctxt->ps_topleft_nbr_4x4;
+
+ ihevce_inter_pred_buf_data_t *ps_pred_buf_info = ps_ctxt->ps_pred_buf_data;
+ mv_pred_ctxt_t *ps_mv_pred_ctxt = ps_ctxt->ps_mv_pred_ctxt;
+
+ PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu = ps_ctxt->pf_luma_inter_pred_pu;
+
+ void *pv_src = ps_ctxt->pv_src;
+ WORD32 i4_src_stride = ps_ctxt->i4_src_strd;
+ WORD32 i4_pred_stride = ps_ctxt->ps_pred_buf_data->i4_pred_stride;
+ UWORD8 u1_num_parts = (u1_part_type != PRT_2Nx2N) + 1;
+ UWORD8 u1_num_bytes_per_pel = ps_ctxt->u1_is_hbd + 1;
+ void **ppv_pred_buf_list = ps_ctxt->ps_pred_buf_data->apv_inter_pred_data;
+ UWORD8 u1_cu_size = ps_ctxt->u1_cu_size;
+ WORD32 i4_nbr_4x4_left_stride = ps_ctxt->i4_nbr_4x4_left_strd;
+ UWORD8 *pu1_ctb_nbr_map = ps_ctxt->pu1_ctb_nbr_map;
+ WORD32 i4_nbr_map_stride = ps_ctxt->i4_ctb_nbr_map_stride;
+ UWORD8 u1_max_merge_candidates = ps_ctxt->u1_max_merge_candidates;
+ WORD32 i4_max_num_inter_rdopt_cands = ps_ctxt->i4_max_num_inter_rdopt_cands;
+ WORD32 i4_pred_buf_offset = 0;
+ WORD32 i4_src_buf_offset = 0;
+ UWORD8 u1_single_mcl_flag =
+ ((8 == u1_cu_size) && (ps_mv_pred_ctxt->i4_log2_parallel_merge_level_minus2 > 0));
+ UWORD8 u1_skip_or_merge_cand_is_valid = 0;
+ WORD32 i4_lambda_qf = ps_ctxt->i4_lambda_qf;
+ UWORD8 u1_is_cu_noisy = ps_ctxt->u1_is_cu_noisy;
+
+ ASSERT(0 == (u1_eval_skip && u1_eval_merge));
+ ASSERT(u1_me_cand_list_idx < ps_ctxt->u1_num_me_cands);
+
+ /*
+ Algorithm -
+ 1. Determine pred and satd for ME cand.
+ 2. Determine merge winner for PU1.
+ 3. Determine pred and satd for mixed_type0 cand.
+ 4. Determine merge winner for PU2 and hence derive pred and satd for merge cand.
+ 5. Determine merge winner for PU2 assuming ME cand as PU1 winner and hence derive
+ pred and satd for mixed_type1 cand.
+ 6. Sort the 4 preceding costs and hence, the cand list.
+ 7. Merge the sorted lists with the final cand list.
+
+ PS : 2 - 7 will be relevant only if u1_eval_merge = 1 and u1_eval_skip = 0
+ PPS : 1 will not be relevant if u1_eval_skip = 1
+ */
+
+ /*
+ Explanatory notes -
+ 1. Motion Vector Merge candidates and nbr's in all merge mode (RealD)
+ 2. Motion Vector Merge candidates and nbr's in mixed mode (AltD)
+ */
+
+ u1_num_bufs_currently_allocated = ihevce_prepare_cand_containers(
+ ps_ctxt,
+ aps_cands,
+ au1_merge_pred_buf_idx_array,
+ &u1_me_pred_buf_idx,
+ u1_part_type,
+ u1_me_cand_list_idx,
+ u1_eval_merge,
+ u1_eval_skip);
+
+ if(0 == u1_num_bufs_currently_allocated)
+ {
+ return 0;
+ }
+
+ if((u1_eval_merge) || (u1_eval_skip))
+ {
+ ihevce_merge_prms_init(
+ &s_merge_prms,
+ as_merge_cand,
+ ps_mc_ctxt,
+ ps_mv_pred_ctxt,
+ pf_luma_inter_pred_pu,
+ pf_sad_func,
+ ppv_pred_buf_list,
+ ps_ctxt->ps_cmn_utils_optimised_function_list,
+ au1_merge_pred_buf_idx_array,
+ au1_final_pred_buf_id,
+ au1_is_top_used,
+ ai4_noise_term,
+ au4_pred_variance,
+ pu4_src_variance,
+ ps_ctxt->i4_alpha_stim_multiplier,
+ i4_src_stride,
+ i4_pred_stride,
+ i4_lambda_qf,
+ u1_is_cu_noisy,
+ ps_ctxt->u1_is_hbd,
+ u1_max_merge_candidates,
+ ps_ctxt->u1_merge_idx_cabac_model,
+ ps_ctxt->u1_use_merge_cand_from_top_row);
+ }
+
+ for(i = 0; i < u1_num_parts; i++)
+ {
+ nbr_avail_flags_t s_nbr;
+
+ UWORD8 u1_part_wd;
+ UWORD8 u1_part_ht;
+ UWORD8 u1_pu_pos_x_4x4;
+ UWORD8 u1_pu_pos_y_4x4;
+
+ pu_t *ps_pu = &aps_cands[MERGE_DERIVED]->as_inter_pu[i];
+
+ PART_SIZE_E e_part_size = (PART_SIZE_E)aps_cands[ME_OR_SKIP_DERIVED]->b3_part_size;
+
+ void *pv_pu_src = (UWORD8 *)pv_src + i4_src_buf_offset;
+ UWORD8 u1_num_merge_cands = 0;
+
+ u1_part_wd = (aps_cands[0]->as_inter_pu[i].b4_wd + 1) << 2;
+ u1_part_ht = (aps_cands[0]->as_inter_pu[i].b4_ht + 1) << 2;
+ u1_pu_pos_x_4x4 = aps_cands[0]->as_inter_pu[i].b4_pos_x;
+ u1_pu_pos_y_4x4 = aps_cands[0]->as_inter_pu[i].b4_pos_y;
+
+ /* Inter cand pred and satd */
+ if(!u1_eval_skip)
+ {
+ void *pv_pu_pred = (UWORD8 *)ppv_pred_buf_list[u1_me_pred_buf_idx] + i4_pred_buf_offset;
+
+ if(ps_ctxt->u1_reuse_me_sad)
+ {
+ ihevce_compute_inter_pred_and_cost(
+ ps_mc_ctxt,
+ pf_luma_inter_pred_pu,
+ pf_sad_func,
+ &aps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu[i],
+ pv_pu_src,
+ pv_pu_pred,
+ i4_src_stride,
+ i4_pred_stride,
+ 0,
+ ps_ctxt->ps_cmn_utils_optimised_function_list);
+
+ au4_cost[ME_OR_SKIP_DERIVED][i] =
+ ps_ctxt->pai4_me_err_metric[u1_me_cand_list_idx][i];
+ }
+ else
+ {
+ au4_cost[ME_OR_SKIP_DERIVED][i] = ihevce_compute_inter_pred_and_cost(
+ ps_mc_ctxt,
+ pf_luma_inter_pred_pu,
+ pf_sad_func,
+ &aps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu[i],
+ pv_pu_src,
+ pv_pu_pred,
+ i4_src_stride,
+ i4_pred_stride,
+ 1,
+ ps_ctxt->ps_cmn_utils_optimised_function_list);
+ }
+
+ au1_final_pred_buf_id[ME_OR_SKIP_DERIVED][i] = u1_me_pred_buf_idx;
+
+ if(u1_is_cu_noisy && ps_ctxt->i4_alpha_stim_multiplier)
+ {
+ ihevce_calc_variance(
+ pv_pu_pred,
+ i4_pred_stride,
+ &i4_mean,
+ &au4_pred_variance[ME_OR_SKIP_DERIVED][i],
+ u1_part_ht,
+ u1_part_wd,
+ ps_ctxt->u1_is_hbd,
+ 0);
+
+ ai4_noise_term[ME_OR_SKIP_DERIVED][i] = ihevce_compute_noise_term(
+ ps_ctxt->i4_alpha_stim_multiplier,
+ pu4_src_variance[i],
+ au4_pred_variance[ME_OR_SKIP_DERIVED][i]);
+
+ MULTIPLY_STIM_WITH_DISTORTION(
+ au4_cost[ME_OR_SKIP_DERIVED][i],
+ ai4_noise_term[ME_OR_SKIP_DERIVED][i],
+ STIM_Q_FORMAT,
+ ALPHA_Q_FORMAT);
+ }
+ }
+
+ if(u1_eval_skip || u1_eval_merge)
+ {
+ pu_t s_pu, *ps_pu_merge;
+
+ UWORD8 u1_is_any_top_available = 1;
+ UWORD8 u1_are_valid_merge_cands_available = 1;
+
+ /* get the neighbour availability flags */
+ if((u1_num_parts > 1) && u1_single_mcl_flag)
+ { /* 8x8 SMPs take the 2Nx2N neighbours */
+ ihevce_get_only_nbr_flag(
+ &s_nbr,
+ pu1_ctb_nbr_map,
+ i4_nbr_map_stride,
+ aps_cands[0]->as_inter_pu[0].b4_pos_x,
+ aps_cands[0]->as_inter_pu[0].b4_pos_y,
+ u1_cu_size >> 2,
+ u1_cu_size >> 2);
+
+ /* Make the PU width and height as 8 */
+ memcpy(&s_pu, ps_pu, sizeof(pu_t));
+ s_pu.b4_pos_x = u1_cu_pos_x >> 2;
+ s_pu.b4_pos_y = u1_cu_pos_y >> 2;
+ s_pu.b4_wd = (u1_cu_size >> 2) - 1;
+ s_pu.b4_ht = (u1_cu_size >> 2) - 1;
+
+ /* Give the local PU structure to MV merge */
+ ps_pu_merge = &s_pu;
+ }
+ else
+ {
+ ihevce_get_only_nbr_flag(
+ &s_nbr,
+ pu1_ctb_nbr_map,
+ i4_nbr_map_stride,
+ u1_pu_pos_x_4x4,
+ u1_pu_pos_y_4x4,
+ u1_part_wd >> 2,
+ u1_part_ht >> 2);
+
+ u1_is_any_top_available = s_nbr.u1_top_avail || s_nbr.u1_top_rt_avail ||
+ s_nbr.u1_top_lt_avail;
+
+ if(!ps_ctxt->u1_use_merge_cand_from_top_row)
+ {
+ if(u1_is_any_top_available)
+ {
+ if(s_nbr.u1_left_avail || s_nbr.u1_bot_lt_avail)
+ {
+ s_nbr.u1_top_avail = 0;
+ s_nbr.u1_top_rt_avail = 0;
+ s_nbr.u1_top_lt_avail = 0;
+ }
+ else
+ {
+ u1_are_valid_merge_cands_available = 0;
+ }
+ }
+ }
+
+ /* Actual PU passed to MV merge */
+ ps_pu_merge = ps_pu;
+ }
+ if(u1_are_valid_merge_cands_available)
+ {
+ u1_num_merge_cands = ihevce_mv_pred_merge(
+ ps_mv_pred_ctxt,
+ ps_pu_top_nbr,
+ ps_pu_left_nbr,
+ ps_pu_topleft_nbr,
+ i4_nbr_4x4_left_stride,
+ &s_nbr,
+ NULL,
+ ps_pu_merge,
+ e_part_size,
+ i,
+ u1_single_mcl_flag,
+ as_merge_cand,
+ au1_is_top_used);
+
+ if(u1_num_merge_cands > u1_max_merge_candidates)
+ {
+ u1_num_merge_cands = u1_max_merge_candidates;
+ }
+
+ u1_num_merge_cands = ihevce_merge_candidate_seive(
+ &s_nbr,
+ as_merge_cand,
+ au1_is_top_used,
+ u1_num_merge_cands,
+ ps_ctxt->u1_use_merge_cand_from_top_row || !u1_is_any_top_available);
+
+ for(j = 0; j < u1_num_merge_cands; j++)
+ {
+ s_merge_prms.au1_valid_merge_indices[j] = j;
+ }
+
+ au4_cost[MERGE_DERIVED][i] = ihevce_determine_best_merge_pu(
+ &s_merge_prms,
+ &aps_cands[MERGE_DERIVED]->as_inter_pu[i],
+ &aps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu[i],
+ pv_pu_src,
+ au4_cost[ME_OR_SKIP_DERIVED][i],
+ i4_pred_buf_offset,
+ u1_num_merge_cands,
+ i,
+ u1_eval_skip);
+ }
+ else
+ {
+ au4_cost[MERGE_DERIVED][i] = INT_MAX;
+ }
+
+ au4_cost[(i) ? MIXED_MODE_TYPE1 : MIXED_MODE_TYPE0][i] = au4_cost[MERGE_DERIVED][i];
+
+ if(u1_eval_skip)
+ {
+ /* This statement ensures that the skip candidate is always added */
+ au4_cost[ME_OR_SKIP_DERIVED][i] =
+ (au4_cost[MERGE_DERIVED][0] < INT_MAX) ? SKIP_MODE_COST : INT_MAX;
+ au1_final_pred_buf_id[ME_OR_SKIP_DERIVED][i] =
+ au1_final_pred_buf_id[MERGE_DERIVED][i];
+ }
+ else
+ {
+ au4_cost[ME_OR_SKIP_DERIVED][i] += ps_ctxt->pai4_mv_cost[u1_me_cand_list_idx][i];
+ au4_cost[(i) ? MIXED_MODE_TYPE0 : MIXED_MODE_TYPE1][i] =
+ au4_cost[ME_OR_SKIP_DERIVED][i];
+ }
+
+ au1_final_pred_buf_id[(i) ? MIXED_MODE_TYPE1 : MIXED_MODE_TYPE0][i] =
+ au1_final_pred_buf_id[MERGE_DERIVED][i];
+ au1_final_pred_buf_id[(i) ? MIXED_MODE_TYPE0 : MIXED_MODE_TYPE1][i] =
+ au1_final_pred_buf_id[ME_OR_SKIP_DERIVED][i];
+ }
+ else
+ {
+ au4_cost[ME_OR_SKIP_DERIVED][i] += ps_ctxt->pai4_mv_cost[u1_me_cand_list_idx][i];
+ }
+
+ if(!i && (u1_num_parts > 1) && u1_eval_merge)
+ {
+ ihevce_set_inter_nbr_map(
+ pu1_ctb_nbr_map,
+ i4_nbr_map_stride,
+ u1_pu_pos_x_4x4,
+ u1_pu_pos_y_4x4,
+ (u1_part_wd >> 2),
+ (u1_part_ht >> 2),
+ 1);
+ ihevce_populate_nbr_4x4_with_pu_data(
+ ps_cu_nbr_buf, &aps_cands[ME_OR_SKIP_DERIVED]->as_inter_pu[i], u1_cu_size >> 2);
+
+ if(u1_part_wd < u1_cu_size)
+ {
+ i4_pred_buf_offset = i4_src_buf_offset = u1_part_wd;
+
+ if(!u1_single_mcl_flag) /* 8x8 SMPs take the 2Nx2N neighbours */
+ {
+ ps_cu_nbr_buf += (u1_part_wd >> 2);
+ ps_pu_left_nbr = ps_cu_nbr_buf - 1;
+ ps_pu_top_nbr += (u1_part_wd >> 2);
+ ps_pu_topleft_nbr = ps_pu_top_nbr - 1;
+
+ i4_nbr_4x4_left_stride = (u1_cu_size >> 2);
+ }
+ }
+ else if(u1_part_ht < u1_cu_size)
+ {
+ i4_pred_buf_offset = u1_part_ht * i4_pred_stride;
+ i4_src_buf_offset = u1_part_ht * i4_src_stride;
+
+ if(!u1_single_mcl_flag) /* 8x8 SMPs take the 2Nx2N neighbours */
+ {
+ ps_cu_nbr_buf += (u1_part_ht >> 2) * (u1_cu_size >> 2);
+ ps_pu_left_nbr += (u1_part_ht >> 2) * i4_nbr_4x4_left_stride;
+ ps_pu_top_nbr = ps_cu_nbr_buf - (u1_cu_size >> 2);
+ ps_pu_topleft_nbr = ps_pu_left_nbr - i4_nbr_4x4_left_stride;
+ }
+ }
+
+ i4_pred_buf_offset *= u1_num_bytes_per_pel;
+ i4_src_buf_offset *= u1_num_bytes_per_pel;
+
+ aps_cands[MIXED_MODE_TYPE0]->as_inter_pu[0] = aps_cands[MERGE_DERIVED]->as_inter_pu[0];
+ }
+ else if(!i && (u1_num_parts > 1) && (!u1_eval_merge))
+ {
+ if(u1_part_wd < u1_cu_size)
+ {
+ i4_pred_buf_offset = i4_src_buf_offset = u1_part_wd;
+ }
+ else if(u1_part_ht < u1_cu_size)
+ {
+ i4_pred_buf_offset = u1_part_ht * i4_pred_stride;
+ i4_src_buf_offset = u1_part_ht * i4_src_stride;
+ }
+
+ i4_pred_buf_offset *= u1_num_bytes_per_pel;
+ i4_src_buf_offset *= u1_num_bytes_per_pel;
+ }
+ else if(i && (u1_num_parts > 1) && u1_eval_merge)
+ {
+ aps_cands[MIXED_MODE_TYPE1]->as_inter_pu[1] = aps_cands[MERGE_DERIVED]->as_inter_pu[1];
+ }
+ }
+
+ /* Adding a skip candidate */
+ if((u1_eval_merge) && (0 == u1_part_type))
+ {
+ cu_inter_cand_t *ps_cand = &ps_ctxt->ps_cu_inter_merge_skip->as_cu_inter_merge_skip_cand
+ [MAX_NUM_CU_MERGE_SKIP_CAND - 1 -
+ ps_ctxt->ps_cu_inter_merge_skip->u1_num_skip_cands];
+
+ (*ps_cand) = (*aps_cands[MERGE_DERIVED]);
+
+ ps_cand->b1_skip_flag = 1;
+
+ aps_cands[MIXED_MODE_TYPE1] = ps_cand;
+ au4_cost[MIXED_MODE_TYPE1][0] = (au4_cost[MERGE_DERIVED][0] < INT_MAX) ? SKIP_MODE_COST
+ : INT_MAX;
+ }
+
+ /* Sort and populate */
+ u1_skip_or_merge_cand_is_valid = ihevce_merge_cands_with_existing_best(
+ ps_cu_mode_info,
+ aps_cands,
+ as_mvp_winner,
+ au4_cost,
+ ppv_pred_buf_list,
+ au1_final_pred_buf_id,
+ &ps_pred_buf_info->u4_is_buf_in_use,
+ &ps_ctxt->ps_cu_inter_merge_skip->u1_num_merge_cands,
+ &ps_ctxt->ps_cu_inter_merge_skip->u1_num_skip_cands,
+ &ps_ctxt->ps_mixed_modes_datastore->u1_num_mixed_mode_type0_cands,
+ &ps_ctxt->ps_mixed_modes_datastore->u1_num_mixed_mode_type1_cands,
+ au1_merge_pred_buf_idx_array,
+ ps_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d,
+
+ i4_pred_stride,
+ i4_max_num_inter_rdopt_cands,
+ u1_cu_size,
+ u1_part_type,
+ u1_eval_merge,
+ u1_eval_skip,
+ u1_num_bytes_per_pel);
+
+ return u1_skip_or_merge_cand_is_valid;
+}
+
+static __inline void ihevce_redundant_candidate_pruner(inter_cu_mode_info_t *ps_inter_cu_mode_info)
+{
+ WORD8 i, j;
+ WORD8 i1_num_merge_vs_mvds;
+
+ UWORD8 au1_redundant_cand_indices[MAX_NUM_INTER_RDO_CANDS] = { 0 };
+
+ for(i = 0; i < (ps_inter_cu_mode_info->u1_num_inter_cands - 1); i++)
+ {
+ if(au1_redundant_cand_indices[i] || ps_inter_cu_mode_info->aps_cu_data[i]->b1_skip_flag)
+ {
+ continue;
+ }
+
+ for(j = i + 1; j < ps_inter_cu_mode_info->u1_num_inter_cands; j++)
+ {
+ if(au1_redundant_cand_indices[j] || ps_inter_cu_mode_info->aps_cu_data[j]->b1_skip_flag)
+ {
+ continue;
+ }
+
+ i1_num_merge_vs_mvds = 0;
+
+ if(ps_inter_cu_mode_info->aps_cu_data[j]->b3_part_size ==
+ ps_inter_cu_mode_info->aps_cu_data[i]->b3_part_size)
+ {
+ if(ihevce_compare_pu_mv_t(
+ &ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu->mv,
+ &ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu->mv,
+ ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu->b2_pred_mode,
+ ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu->b2_pred_mode))
+ {
+ i1_num_merge_vs_mvds +=
+ ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu->b1_merge_flag -
+ ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu->b1_merge_flag;
+
+ if(ps_inter_cu_mode_info->aps_cu_data[i]->b3_part_size)
+ {
+ if(ihevce_compare_pu_mv_t(
+ &ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu[1].mv,
+ &ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu[1].mv,
+ ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu[1].b2_pred_mode,
+ ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu[1].b2_pred_mode))
+ {
+ i1_num_merge_vs_mvds +=
+ ps_inter_cu_mode_info->aps_cu_data[i]->as_inter_pu[1].b1_merge_flag -
+ ps_inter_cu_mode_info->aps_cu_data[j]->as_inter_pu[1].b1_merge_flag;
+ }
+ }
+ }
+ }
+
+ if(i1_num_merge_vs_mvds != 0)
+ {
+ au1_redundant_cand_indices[(i1_num_merge_vs_mvds > 0) ? j : i] = 1;
+ }
+ }
+ }
+
+ for(i = 0; i < ps_inter_cu_mode_info->u1_num_inter_cands; i++)
+ {
+ if(au1_redundant_cand_indices[i])
+ {
+ memmove(
+ &ps_inter_cu_mode_info->aps_cu_data[i],
+ &ps_inter_cu_mode_info->aps_cu_data[i + 1],
+ (ps_inter_cu_mode_info->u1_num_inter_cands - i - 1) *
+ sizeof(ps_inter_cu_mode_info->aps_cu_data[i]));
+
+ memmove(
+ &ps_inter_cu_mode_info->au4_cost[i],
+ &ps_inter_cu_mode_info->au4_cost[i + 1],
+ (ps_inter_cu_mode_info->u1_num_inter_cands - i - 1) *
+ sizeof(ps_inter_cu_mode_info->au4_cost[i]));
+
+ memmove(
+ &ps_inter_cu_mode_info->au1_pred_buf_idx[i],
+ &ps_inter_cu_mode_info->au1_pred_buf_idx[i + 1],
+ (ps_inter_cu_mode_info->u1_num_inter_cands - i - 1) *
+ sizeof(ps_inter_cu_mode_info->au1_pred_buf_idx[i]));
+
+ memmove(
+ &au1_redundant_cand_indices[i],
+ &au1_redundant_cand_indices[i + 1],
+ (ps_inter_cu_mode_info->u1_num_inter_cands - i - 1) *
+ sizeof(au1_redundant_cand_indices[i]));
+
+ ps_inter_cu_mode_info->u1_num_inter_cands--;
+ i--;
+ }
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_inter_cand_sifter \endif
+*
+* \brief
+* Selects the best inter candidate modes amongst ME, merge,
+* skip and mixed modes. Also computes corresponding preds
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_inter_cand_sifter(ihevce_inter_cand_sifter_prms_t *ps_ctxt)
+{
+ PF_SAD_FXN_T pf_sad_func;
+
+ UWORD8 au1_final_cand_idx[MAX_INTER_CU_CANDIDATES];
+ UWORD8 au1_part_types_evaluated[MAX_INTER_CU_CANDIDATES];
+ UWORD8 u1_num_unique_parts;
+ UWORD8 i, j;
+ UWORD32 au4_src_variance[NUM_INTER_PU_PARTS];
+ WORD32 i4_mean;
+
+ cu_inter_cand_t *ps_me_cands = ps_ctxt->ps_me_cands;
+ inter_cu_mode_info_t *ps_cu_mode_info = ps_ctxt->ps_inter_cu_mode_info;
+
+ UWORD8 u1_diff_skip_cand_flag = 1;
+ WORD8 i1_skip_cand_from_merge_idx = -1;
+ WORD8 i1_final_skip_cand_merge_idx = -1;
+ UWORD8 u1_max_num_part_types_to_select = MAX_INTER_CU_CANDIDATES;
+ UWORD8 u1_num_me_cands = ps_ctxt->u1_num_me_cands;
+ UWORD8 u1_num_parts_evaluated_for_merge = 0;
+ UWORD8 u1_is_cu_noisy = ps_ctxt->u1_is_cu_noisy;
+
+ if((ps_ctxt->u1_quality_preset >= IHEVCE_QUALITY_P3) && (ps_ctxt->i1_slice_type == BSLICE))
+ {
+ u1_max_num_part_types_to_select = 1;
+ }
+
+ {
+ pf_sad_func = (ps_ctxt->u1_use_satd_for_merge_eval) ? compute_satd_8bit
+ : ps_ctxt->pf_evalsad_pt_npu_mxn_8bit;
+ }
+
+ u1_num_unique_parts = ihevce_get_num_part_types_in_me_cand_list(
+ ps_me_cands,
+ au1_part_types_evaluated,
+ au1_final_cand_idx,
+ &u1_diff_skip_cand_flag,
+ &i1_skip_cand_from_merge_idx,
+ &i1_final_skip_cand_merge_idx,
+ u1_max_num_part_types_to_select,
+ u1_num_me_cands);
+
+ if((u1_num_me_cands + u1_diff_skip_cand_flag) && u1_is_cu_noisy &&
+ ps_ctxt->i4_alpha_stim_multiplier)
+ {
+ ihevce_calc_variance(
+ ps_ctxt->pv_src,
+ ps_ctxt->i4_src_strd,
+ &i4_mean,
+ &ps_cu_mode_info->u4_src_variance,
+ ps_ctxt->u1_cu_size,
+ ps_ctxt->u1_cu_size,
+ ps_ctxt->u1_is_hbd,
+ 0);
+ }
+
+ if(DISABLE_SKIP_AND_MERGE_WHEN_NOISY && u1_is_cu_noisy)
+ {
+ u1_diff_skip_cand_flag = 0;
+ }
+ else if(!DISABLE_SKIP_AND_MERGE_WHEN_NOISY && u1_is_cu_noisy)
+ {
+ if(ps_ctxt->u1_cu_size > MAX_CU_SIZE_WHERE_MERGE_AND_SKIPS_ENABLED_AND_WHEN_NOISY)
+ {
+ u1_diff_skip_cand_flag = 0;
+ }
+ }
+
+ for(i = 0; i < u1_num_me_cands + u1_diff_skip_cand_flag; i++)
+ {
+ UWORD8 u1_part_type;
+ UWORD8 u1_eval_skip;
+ UWORD8 u1_eval_merge;
+ UWORD8 u1_valid_cand;
+
+ if(i == u1_num_me_cands)
+ {
+ u1_eval_skip = 1;
+ u1_eval_merge = 0;
+ u1_part_type = 0;
+ }
+ else
+ {
+ u1_eval_skip = 0;
+ u1_part_type = ps_me_cands[i].b3_part_size;
+
+ if(u1_num_parts_evaluated_for_merge >= u1_num_unique_parts)
+ {
+ u1_eval_merge = 0;
+ u1_num_parts_evaluated_for_merge = u1_num_unique_parts;
+ }
+ else
+ {
+ u1_eval_merge = (i == au1_final_cand_idx[u1_num_parts_evaluated_for_merge]);
+ }
+
+ for(j = 0; (j < u1_num_parts_evaluated_for_merge) && (u1_eval_merge); j++)
+ {
+ if(u1_part_type == au1_part_types_evaluated[j])
+ {
+ u1_eval_merge = 0;
+ break;
+ }
+ }
+ }
+
+ if(u1_is_cu_noisy && u1_part_type && ps_ctxt->i4_alpha_stim_multiplier)
+ {
+ void *pv_src = ps_ctxt->pv_src;
+ UWORD8 u1_pu_wd = (ps_me_cands[i].as_inter_pu[0].b4_wd + 1) << 2;
+ UWORD8 u1_pu_ht = (ps_me_cands[i].as_inter_pu[0].b4_ht + 1) << 2;
+
+ ihevce_calc_variance(
+ pv_src,
+ ps_ctxt->i4_src_strd,
+ &i4_mean,
+ &au4_src_variance[0],
+ u1_pu_ht,
+ u1_pu_wd,
+ ps_ctxt->u1_is_hbd,
+ 0);
+
+ pv_src = (void *) (((UWORD8 *) pv_src) +
+ ((ps_ctxt->u1_cu_size == u1_pu_wd) ? ps_ctxt->i4_src_strd * u1_pu_ht : u1_pu_wd)
+ * (ps_ctxt->u1_is_hbd + 1));
+ u1_pu_wd = (ps_me_cands[i].as_inter_pu[1].b4_wd + 1) << 2;
+ u1_pu_ht = (ps_me_cands[i].as_inter_pu[1].b4_ht + 1) << 2;
+
+ ihevce_calc_variance(
+ pv_src,
+ ps_ctxt->i4_src_strd,
+ &i4_mean,
+ &au4_src_variance[1],
+ u1_pu_ht,
+ u1_pu_wd,
+ ps_ctxt->u1_is_hbd,
+ 0);
+ }
+ else if(u1_is_cu_noisy && !u1_part_type && ps_ctxt->i4_alpha_stim_multiplier)
+ {
+ au4_src_variance[0] = ps_cu_mode_info->u4_src_variance;
+ }
+
+ if(DISABLE_SKIP_AND_MERGE_WHEN_NOISY && u1_is_cu_noisy)
+ {
+ u1_eval_merge = 0;
+ }
+ else if(!DISABLE_SKIP_AND_MERGE_WHEN_NOISY && u1_is_cu_noisy)
+ {
+ if(ps_ctxt->u1_cu_size > MAX_CU_SIZE_WHERE_MERGE_AND_SKIPS_ENABLED_AND_WHEN_NOISY)
+ {
+ u1_eval_merge = 0;
+ }
+ }
+
+ u1_valid_cand = ihevce_compute_pred_and_populate_modes(
+ ps_ctxt,
+ pf_sad_func,
+ au4_src_variance,
+ u1_part_type,
+ MIN(i, (u1_num_me_cands - 1)),
+ u1_eval_merge,
+ u1_eval_skip);
+
+ u1_num_parts_evaluated_for_merge += u1_eval_merge;
+
+ /* set the neighbour map to 0 */
+ if(u1_part_type)
+ {
+ ihevce_set_nbr_map(
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_ctb_nbr_map_stride,
+ (ps_ctxt->u1_cu_pos_x >> 2),
+ (ps_ctxt->u1_cu_pos_y >> 2),
+ (ps_ctxt->u1_cu_size >> 2),
+ 0);
+ }
+ }
+
+ ihevce_redundant_candidate_pruner(ps_ctxt->ps_inter_cu_mode_info);
+}
diff --git a/encoder/ihevce_enc_loop_inter_mode_sifter.h b/encoder/ihevce_enc_loop_inter_mode_sifter.h
new file mode 100644
index 0000000..153c4e4
--- /dev/null
+++ b/encoder/ihevce_enc_loop_inter_mode_sifter.h
@@ -0,0 +1,225 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_enc_loop_inter_mode_sifter.h
+*
+* \brief
+*
+* \date
+* 11/09/2014
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+#ifndef _IHEVCE_ENC_LOOP_INTER_MODE_SIFTER
+#define _IHEVCE_ENC_LOOP_INTER_MODE_SIFTER
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+#define SKIP_MODE_COST ((DISABLE_SKIP) ? INT_MAX : 1)
+
+#define COMPUTE_NUM_POSITIVE_REFERENCES_AND_FREE_IF_ZERO( \
+ value_referred, pos_array, neg_array, length_pos_array, length_neg_array, usage_indicator) \
+ { \
+ UWORD8 i; \
+ \
+ UWORD8 num_references = 0; \
+ \
+ for(i = 0; i < length_pos_array; i++) \
+ { \
+ num_references += (value_referred == pos_array[i]); \
+ } \
+ \
+ for(i = 0; i < length_neg_array; i++) \
+ { \
+ num_references -= (value_referred == neg_array[i]); \
+ } \
+ \
+ if(num_references <= 0) \
+ { \
+ ihevce_set_pred_buf_as_free(usage_indicator, value_referred); \
+ } \
+ }
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+typedef enum
+{
+ ME_OR_SKIP_DERIVED = 0,
+ MERGE_DERIVED = 1,
+ MIXED_MODE_TYPE0 = 2,
+ MIXED_MODE_TYPE1 = 3
+
+} INTER_CANDIDATE_ID_T;
+
+typedef enum
+{
+ CLASS1,
+ CLASS2,
+ CLASS3
+
+} UNIVERSE_CLASS_ID_T;
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+typedef struct
+{
+ cu_inter_merge_skip_t *ps_cu_inter_merge_skip;
+
+ cu_mixed_mode_inter_t *ps_mixed_modes_datastore;
+
+ cu_inter_cand_t *ps_me_cands;
+
+ inter_cu_mode_info_t *ps_inter_cu_mode_info;
+
+ mv_pred_ctxt_t *ps_mv_pred_ctxt;
+
+ inter_pred_ctxt_t *ps_mc_ctxt;
+
+ WORD32 (*pai4_mv_cost)[NUM_INTER_PU_PARTS];
+
+ WORD32 (*pai4_me_err_metric)[NUM_INTER_PU_PARTS];
+
+ void *pv_src;
+
+ ihevce_inter_pred_buf_data_t *ps_pred_buf_data;
+
+ UWORD8 *pu1_ctb_nbr_map;
+
+ nbr_4x4_t *aps_cu_nbr_buf[2];
+
+ nbr_4x4_t *ps_left_nbr_4x4;
+
+ nbr_4x4_t *ps_top_nbr_4x4;
+
+ nbr_4x4_t *ps_topleft_nbr_4x4;
+
+ cu_me_intra_pred_prms_t *ps_cu_me_intra_pred_prms;
+
+ PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu;
+
+ WORD32 i4_ctb_nbr_map_stride;
+
+ WORD32 i4_src_strd;
+
+ WORD32 i4_nbr_4x4_left_strd;
+
+ WORD32 i4_max_num_inter_rdopt_cands;
+
+ WORD32 i4_lambda_qf;
+
+ UWORD8 u1_cu_size;
+
+ UWORD8 u1_cu_pos_x;
+
+ UWORD8 u1_cu_pos_y;
+
+ UWORD8 u1_num_me_cands;
+
+ UWORD8 u1_max_merge_candidates;
+
+ UWORD8 u1_use_satd_for_merge_eval;
+
+ UWORD8 u1_quality_preset;
+
+ WORD8 i1_slice_type;
+
+ UWORD8 u1_is_hbd;
+
+ UWORD8 u1_reuse_me_sad;
+
+ UWORD8 u1_merge_idx_cabac_model;
+
+ UWORD8 u1_use_merge_cand_from_top_row;
+
+ UWORD8 u1_is_cu_noisy;
+
+ WORD32 i4_alpha_stim_multiplier;
+
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list;
+
+ FT_SAD_EVALUATOR *pf_evalsad_pt_npu_mxn_8bit;
+
+} ihevce_inter_cand_sifter_prms_t;
+
+typedef struct
+{
+ UWORD8 au1_valid_merge_indices[MAX_NUM_MERGE_CAND];
+
+ merge_cand_list_t *ps_list;
+
+ inter_pred_ctxt_t *ps_mc_ctxt;
+
+ mv_pred_ctxt_t *ps_mv_pred_ctxt;
+
+ PF_LUMA_INTER_PRED_PU pf_luma_inter_pred_pu;
+
+ PF_SAD_FXN_T pf_sad_fxn;
+
+ void **ppv_pred_buf_list;
+
+ UWORD8 *pu1_merge_pred_buf_array;
+
+ UWORD8 (*pau1_best_pred_buf_id)[MAX_NUM_INTER_PARTS];
+
+ UWORD8 *pu1_is_top_used;
+
+ WORD32 (*pai4_noise_term)[MAX_NUM_INTER_PARTS];
+
+ UWORD32 (*pau4_pred_variance)[MAX_NUM_INTER_PARTS];
+
+ UWORD32 *pu4_src_variance;
+
+ WORD32 i4_alpha_stim_multiplier;
+
+ UWORD8 u1_merge_idx_cabac_model;
+ WORD32 i4_src_stride;
+
+ WORD32 i4_pred_stride;
+
+ WORD32 i4_lambda;
+
+ UWORD8 u1_max_cands;
+
+ UWORD8 u1_use_merge_cand_from_top_row;
+
+ UWORD8 u1_is_cu_noisy;
+
+ UWORD8 u1_is_hbd;
+
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list;
+
+} merge_prms_t;
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+void ihevce_inter_cand_sifter(ihevce_inter_cand_sifter_prms_t *ps_ctxt);
+
+#endif /* _IHEVCE_ENC_LOOP_INTER_MODE_SIFTER */
diff --git a/encoder/ihevce_enc_loop_pass.c b/encoder/ihevce_enc_loop_pass.c
new file mode 100644
index 0000000..f1cb79c
--- /dev/null
+++ b/encoder/ihevce_enc_loop_pass.c
@@ -0,0 +1,6354 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+******************************************************************************
+* \file ihevce_enc_loop_pass.c
+*
+* \brief
+* This file contains Encoder normative loop pass related functions
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+*
+* List of Functions
+*
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+#include <limits.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_macros.h"
+#include "ihevc_debug.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+#include "ihevc_common_tables.h"
+#include "ihevc_quant_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_hle_interface.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_ipe_instr_set_router.h"
+#include "ihevce_decomp_pre_intra_structs.h"
+#include "ihevce_decomp_pre_intra_pass.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_nbr_avail.h"
+#include "ihevce_enc_loop_utils.h"
+#include "ihevce_sub_pic_rc.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_bs_compute_ctb.h"
+#include "ihevce_cabac_rdo.h"
+#include "ihevce_deblk.h"
+#include "ihevce_frame_process.h"
+#include "ihevce_rc_enc_structs.h"
+#include "hme_datatype.h"
+#include "hme_interface.h"
+#include "hme_common_defs.h"
+#include "hme_defs.h"
+#include "ihevce_me_instr_set_router.h"
+#include "ihevce_enc_subpel_gen.h"
+#include "ihevce_inter_pred.h"
+#include "ihevce_mv_pred.h"
+#include "ihevce_mv_pred_merge.h"
+#include "ihevce_enc_loop_inter_mode_sifter.h"
+#include "ihevce_enc_cu_recursion.h"
+#include "ihevce_enc_loop_pass.h"
+#include "ihevce_common_utils.h"
+#include "ihevce_dep_mngr_interface.h"
+#include "ihevce_sao.h"
+#include "ihevce_tile_interface.h"
+#include "ihevce_profile.h"
+
+#include "cast_types.h"
+#include "osal.h"
+#include "osal_defaults.h"
+
+/*****************************************************************************/
+/* Globals */
+/*****************************************************************************/
+extern PART_ID_T ge_part_type_to_part_id[MAX_PART_TYPES][MAX_NUM_PARTS];
+
+extern UWORD8 gau1_num_parts_in_part_type[MAX_PART_TYPES];
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define UPDATE_QP_AT_CTB 6
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_enc_loop_ctb_left_copy \endif
+*
+* \brief
+* This function copy the right data of CTB to context buffers
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+* \return
+*
+* List of Functions
+*
+*
+******************************************************************************
+*/
+void ihevce_enc_loop_ctb_left_copy(ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_prms_t *ps_cu_prms)
+{
+ /* ------------------------------------------------------------------ */
+ /* copy the right coloum data to the context buffers */
+ /* ------------------------------------------------------------------ */
+
+ nbr_4x4_t *ps_left_nbr;
+ nbr_4x4_t *ps_nbr;
+ UWORD8 *pu1_buff;
+ WORD32 num_pels;
+ UWORD8 *pu1_luma_left, *pu1_chrm_left;
+
+ UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
+
+ pu1_luma_left = (UWORD8 *)ps_ctxt->pv_left_luma_data;
+ pu1_chrm_left = (UWORD8 *)ps_ctxt->pv_left_chrm_data;
+ ps_left_nbr = &ps_ctxt->as_left_col_nbr[0];
+
+ /* copy right luma data */
+ pu1_buff = ps_cu_prms->pu1_luma_recon + ps_cu_prms->i4_ctb_size - 1;
+
+ for(num_pels = 0; num_pels < ps_cu_prms->i4_ctb_size; num_pels++)
+ {
+ WORD32 i4_indx = ps_cu_prms->i4_luma_recon_stride * num_pels;
+
+ pu1_luma_left[num_pels] = pu1_buff[i4_indx];
+ }
+
+ /* copy right chroma data */
+ pu1_buff = ps_cu_prms->pu1_chrm_recon + ps_cu_prms->i4_ctb_size - 2;
+
+ for(num_pels = 0; num_pels < (ps_cu_prms->i4_ctb_size >> (0 == u1_is_422)); num_pels++)
+ {
+ WORD32 i4_indx = ps_cu_prms->i4_chrm_recon_stride * num_pels;
+
+ *pu1_chrm_left++ = pu1_buff[i4_indx];
+ *pu1_chrm_left++ = pu1_buff[i4_indx + 1];
+ }
+
+ /* store the nbr 4x4 data at ctb level */
+ {
+ WORD32 ctr;
+ WORD32 nbr_strd;
+
+ nbr_strd = ps_cu_prms->i4_ctb_size >> 2;
+
+ /* copy right nbr data */
+ ps_nbr = &ps_ctxt->as_ctb_nbr_arr[0];
+ ps_nbr += ((ps_cu_prms->i4_ctb_size >> 2) - 1);
+
+ for(ctr = 0; ctr < (ps_cu_prms->i4_ctb_size >> 2); ctr++)
+ {
+ WORD32 i4_indx = nbr_strd * ctr;
+
+ ps_left_nbr[ctr] = ps_nbr[i4_indx];
+ }
+ }
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_mark_all_modes_to_evaluate \endif
+*
+* \brief
+* Mark all modes for inter/intra for evaluation. This function will be
+* called by ref instance
+*
+* \param[in] pv_ctxt : pointer to enc_loop module
+* \param[in] ps_cu_analyse : pointer to cu analyse
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_mark_all_modes_to_evaluate(void *pv_ctxt, cu_analyse_t *ps_cu_analyse)
+{
+ UWORD8 ctr;
+ WORD32 i4_part;
+
+ (void)pv_ctxt;
+ /* run a loop over all Inter cands */
+ for(ctr = 0; ctr < MAX_INTER_CU_CANDIDATES; ctr++)
+ {
+ ps_cu_analyse->as_cu_inter_cand[ctr].b1_eval_mark = 1;
+ }
+
+ /* run a loop over all intra candidates */
+ if(0 != ps_cu_analyse->u1_num_intra_rdopt_cands)
+ {
+ for(ctr = 0; ctr < MAX_INTRA_CU_CANDIDATES + 1; ctr++)
+ {
+ ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[ctr] = 1;
+ ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[ctr] = 1;
+
+ for(i4_part = 0; i4_part < NUM_PU_PARTS; i4_part++)
+ {
+ ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[i4_part][ctr] = 1;
+ }
+ }
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_cu_mode_decide \endif
+*
+* \brief
+* Coding Unit mode decide function. Performs RD opt and decides the best mode
+*
+* \param[in] ps_ctxt : pointer to enc_loop module
+* \param[in] ps_cu_prms : pointer to coding unit params (position, buffer pointers)
+* \param[in] ps_cu_analyse : pointer to cu analyse
+* \param[out] ps_cu_final : pointer to cu final
+* \param[out] pu1_ecd_data :pointer to store coeff data for ECD
+* \param[out]ps_row_col_pu; colocated pu buffer pointer
+* \param[out]pu1_row_pu_map; colocated pu map buffer pointer
+* \param[in]col_start_pu_idx : pu index start value
+*
+* \return
+* None
+*
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+LWORD64 ihevce_cu_mode_decide(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ enc_loop_cu_prms_t *ps_cu_prms,
+ cu_analyse_t *ps_cu_analyse,
+ final_mode_state_t *ps_final_mode_state,
+ UWORD8 *pu1_ecd_data,
+ pu_col_mv_t *ps_col_pu,
+ UWORD8 *pu1_col_pu_map,
+ WORD32 col_start_pu_idx)
+{
+ enc_loop_chrm_cu_buf_prms_t s_chrm_cu_buf_prms;
+ cu_nbr_prms_t s_cu_nbr_prms;
+ inter_cu_mode_info_t s_inter_cu_mode_info;
+ cu_inter_cand_t *ps_best_inter_cand = NULL;
+ UWORD8 *pu1_cu_top;
+ UWORD8 *pu1_cu_top_left;
+ UWORD8 *pu1_cu_left;
+ UWORD8 *pu1_final_recon = NULL;
+ UWORD8 *pu1_curr_src = NULL;
+ void *pv_curr_src = NULL;
+ void *pv_cu_left = NULL;
+ void *pv_cu_top = NULL;
+ void *pv_cu_top_left = NULL;
+
+ WORD32 cu_left_stride = 0;
+ WORD32 ctr;
+ WORD32 rd_opt_best_idx;
+ LWORD64 rd_opt_least_cost;
+ WORD32 rd_opt_curr_idx;
+ WORD32 num_4x4_in_ctb;
+ WORD32 nbr_4x4_left_strd = 0;
+
+ nbr_4x4_t *ps_topleft_nbr_4x4;
+ nbr_4x4_t *ps_left_nbr_4x4 = NULL;
+ nbr_4x4_t *ps_top_nbr_4x4 = NULL;
+ nbr_4x4_t *ps_curr_nbr_4x4;
+ WORD32 enable_intra_eval_flag;
+ WORD32 i4_best_cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp - 1;
+ WORD32 curr_cu_pos_in_row;
+ WORD32 cu_top_right_offset;
+ WORD32 cu_top_right_dep_pos;
+ WORD32 i4_ctb_x_off, i4_ctb_y_off;
+
+ UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
+ (void)ps_final_mode_state;
+ /* default init */
+ rd_opt_least_cost = MAX_COST_64;
+ ps_ctxt->as_cu_prms[0].i8_best_rdopt_cost = MAX_COST_64;
+ ps_ctxt->as_cu_prms[1].i8_best_rdopt_cost = MAX_COST_64;
+
+ /* Zero cbf tool is enabled by default for all presets */
+ ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
+
+ rd_opt_best_idx = 1;
+ rd_opt_curr_idx = 0;
+ enable_intra_eval_flag = 1;
+
+ /* CU params in enc ctxt*/
+ ps_ctxt->ps_enc_out_ctxt->b3_cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
+ ps_ctxt->ps_enc_out_ctxt->b3_cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
+ ps_ctxt->ps_enc_out_ctxt->u1_cu_size = ps_cu_analyse->u1_cu_size;
+
+ num_4x4_in_ctb = (ps_cu_prms->i4_ctb_size >> 2);
+ ps_curr_nbr_4x4 = &ps_ctxt->as_ctb_nbr_arr[0];
+ ps_curr_nbr_4x4 += (ps_cu_analyse->b3_cu_pos_x << 1);
+ ps_curr_nbr_4x4 += ((ps_cu_analyse->b3_cu_pos_y << 1) * num_4x4_in_ctb);
+
+ /* CB and Cr are pixel interleaved */
+ s_chrm_cu_buf_prms.i4_chrm_recon_stride = ps_cu_prms->i4_chrm_recon_stride;
+
+ s_chrm_cu_buf_prms.i4_chrm_src_stride = ps_cu_prms->i4_chrm_src_stride;
+
+ if(!ps_ctxt->u1_is_input_data_hbd)
+ {
+ /* --------------------------------------- */
+ /* ----- Luma Pointers Derivation -------- */
+ /* --------------------------------------- */
+
+ /* based on CU position derive the pointers */
+ pu1_final_recon = ps_cu_prms->pu1_luma_recon + (ps_cu_analyse->b3_cu_pos_x << 3);
+
+ pu1_curr_src = ps_cu_prms->pu1_luma_src + (ps_cu_analyse->b3_cu_pos_x << 3);
+
+ pu1_final_recon += ((ps_cu_analyse->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_recon_stride);
+
+ pu1_curr_src += ((ps_cu_analyse->b3_cu_pos_y << 3) * ps_cu_prms->i4_luma_src_stride);
+
+ pv_curr_src = pu1_curr_src;
+
+ /* CU left */
+ if(0 == ps_cu_analyse->b3_cu_pos_x)
+ {
+ /* CTB boundary */
+ pu1_cu_left = (UWORD8 *)ps_ctxt->pv_left_luma_data;
+ pu1_cu_left += (ps_cu_analyse->b3_cu_pos_y << 3);
+ cu_left_stride = 1;
+
+ ps_left_nbr_4x4 = &ps_ctxt->as_left_col_nbr[0];
+ ps_left_nbr_4x4 += ps_cu_analyse->b3_cu_pos_y << 1;
+ nbr_4x4_left_strd = 1;
+ }
+ else
+ {
+ /* inside CTB */
+ pu1_cu_left = pu1_final_recon - 1;
+ cu_left_stride = ps_cu_prms->i4_luma_recon_stride;
+
+ ps_left_nbr_4x4 = ps_curr_nbr_4x4 - 1;
+ nbr_4x4_left_strd = num_4x4_in_ctb;
+ }
+
+ pv_cu_left = pu1_cu_left;
+
+ /* CU top */
+ if(0 == ps_cu_analyse->b3_cu_pos_y)
+ {
+ /* CTB boundary */
+ pu1_cu_top = (UWORD8 *)ps_ctxt->pv_top_row_luma;
+ pu1_cu_top += ps_cu_prms->i4_ctb_pos * ps_cu_prms->i4_ctb_size;
+ pu1_cu_top += (ps_cu_analyse->b3_cu_pos_x << 3);
+
+ ps_top_nbr_4x4 = ps_ctxt->ps_top_row_nbr;
+ ps_top_nbr_4x4 += (ps_cu_prms->i4_ctb_pos * (ps_cu_prms->i4_ctb_size >> 2));
+ ps_top_nbr_4x4 += (ps_cu_analyse->b3_cu_pos_x << 1);
+ }
+ else
+ {
+ /* inside CTB */
+ pu1_cu_top = pu1_final_recon - ps_cu_prms->i4_luma_recon_stride;
+
+ ps_top_nbr_4x4 = ps_curr_nbr_4x4 - num_4x4_in_ctb;
+ }
+
+ pv_cu_top = pu1_cu_top;
+
+ /* CU top left */
+ if((0 == ps_cu_analyse->b3_cu_pos_x) && (0 != ps_cu_analyse->b3_cu_pos_y))
+ {
+ /* left ctb boundary but not first row */
+ pu1_cu_top_left = pu1_cu_left - 1; /* stride is 1 */
+ ps_topleft_nbr_4x4 = ps_left_nbr_4x4 - 1; /* stride is 1 */
+ }
+ else
+ {
+ /* rest all cases topleft is top -1 */
+ pu1_cu_top_left = pu1_cu_top - 1;
+ ps_topleft_nbr_4x4 = ps_top_nbr_4x4 - 1;
+ }
+
+ pv_cu_top_left = pu1_cu_top_left;
+
+ /* Store the CU nbr information in the ctxt for final reconstruction fun. */
+ s_cu_nbr_prms.nbr_4x4_left_strd = nbr_4x4_left_strd;
+ s_cu_nbr_prms.ps_left_nbr_4x4 = ps_left_nbr_4x4;
+ s_cu_nbr_prms.ps_topleft_nbr_4x4 = ps_topleft_nbr_4x4;
+ s_cu_nbr_prms.ps_top_nbr_4x4 = ps_top_nbr_4x4;
+ s_cu_nbr_prms.pu1_cu_left = pu1_cu_left;
+ s_cu_nbr_prms.pu1_cu_top = pu1_cu_top;
+ s_cu_nbr_prms.pu1_cu_top_left = pu1_cu_top_left;
+ s_cu_nbr_prms.cu_left_stride = cu_left_stride;
+
+ /* ------------------------------------------------------------ */
+ /* -- Initialize the number of neigbour skip cu count for rdo --*/
+ /* ------------------------------------------------------------ */
+ {
+ nbr_avail_flags_t s_nbr;
+ WORD32 i4_num_nbr_skip_cus = 0;
+
+ /* get the neighbour availability flags for current cu */
+ ihevce_get_nbr_intra(
+ &s_nbr,
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ (ps_cu_analyse->b3_cu_pos_x << 1),
+ (ps_cu_analyse->b3_cu_pos_y << 1),
+ (ps_cu_analyse->u1_cu_size >> 2));
+ if(s_nbr.u1_top_avail)
+ {
+ i4_num_nbr_skip_cus += ps_top_nbr_4x4->b1_skip_flag;
+ }
+
+ if(s_nbr.u1_left_avail)
+ {
+ i4_num_nbr_skip_cus += ps_left_nbr_4x4->b1_skip_flag;
+ }
+ ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[0].i4_num_nbr_skip_cus =
+ i4_num_nbr_skip_cus;
+ ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[1].i4_num_nbr_skip_cus =
+ i4_num_nbr_skip_cus;
+ }
+
+ /* --------------------------------------- */
+ /* --- Chroma Pointers Derivation -------- */
+ /* --------------------------------------- */
+
+ /* based on CU position derive the pointers */
+ s_chrm_cu_buf_prms.pu1_final_recon =
+ ps_cu_prms->pu1_chrm_recon + (ps_cu_analyse->b3_cu_pos_x << 3);
+
+ s_chrm_cu_buf_prms.pu1_curr_src =
+ ps_cu_prms->pu1_chrm_src + (ps_cu_analyse->b3_cu_pos_x << 3);
+
+ s_chrm_cu_buf_prms.pu1_final_recon +=
+ ((ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 2)) * ps_cu_prms->i4_chrm_recon_stride);
+
+ s_chrm_cu_buf_prms.pu1_curr_src +=
+ ((ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 2)) * ps_cu_prms->i4_chrm_src_stride);
+
+ /* CU left */
+ if(0 == ps_cu_analyse->b3_cu_pos_x)
+ {
+ /* CTB boundary */
+ s_chrm_cu_buf_prms.pu1_cu_left = (UWORD8 *)ps_ctxt->pv_left_chrm_data;
+ s_chrm_cu_buf_prms.pu1_cu_left += (ps_cu_analyse->b3_cu_pos_y << (u1_is_422 + 3));
+ s_chrm_cu_buf_prms.i4_cu_left_stride = 2;
+ }
+ else
+ {
+ /* inside CTB */
+ s_chrm_cu_buf_prms.pu1_cu_left = s_chrm_cu_buf_prms.pu1_final_recon - 2;
+ s_chrm_cu_buf_prms.i4_cu_left_stride = ps_cu_prms->i4_chrm_recon_stride;
+ }
+
+ /* CU top */
+ if(0 == ps_cu_analyse->b3_cu_pos_y)
+ {
+ /* CTB boundary */
+ s_chrm_cu_buf_prms.pu1_cu_top = (UWORD8 *)ps_ctxt->pv_top_row_chroma;
+ s_chrm_cu_buf_prms.pu1_cu_top += ps_cu_prms->i4_ctb_pos * ps_cu_prms->i4_ctb_size;
+ s_chrm_cu_buf_prms.pu1_cu_top += (ps_cu_analyse->b3_cu_pos_x << 3);
+ }
+ else
+ {
+ /* inside CTB */
+ s_chrm_cu_buf_prms.pu1_cu_top =
+ s_chrm_cu_buf_prms.pu1_final_recon - ps_cu_prms->i4_chrm_recon_stride;
+ }
+
+ /* CU top left */
+ if((0 == ps_cu_analyse->b3_cu_pos_x) && (0 != ps_cu_analyse->b3_cu_pos_y))
+ {
+ /* left ctb boundary but not first row */
+ s_chrm_cu_buf_prms.pu1_cu_top_left =
+ s_chrm_cu_buf_prms.pu1_cu_left - 2; /* stride is 1 (2 pixels) */
+ }
+ else
+ {
+ /* rest all cases topleft is top -2 */
+ s_chrm_cu_buf_prms.pu1_cu_top_left = s_chrm_cu_buf_prms.pu1_cu_top - 2;
+ }
+ }
+
+ /* Set Variables for Dep. Checking and Setting */
+ i4_ctb_x_off = (ps_cu_prms->i4_ctb_pos << 6);
+
+ i4_ctb_y_off = ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y;
+ ps_ctxt->i4_satd_buf_idx = rd_opt_curr_idx;
+
+ /* Set the pred pointer count for ME/intra to 0 to start */
+ ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count = 0;
+
+ ASSERT(
+ (ps_cu_analyse->u1_num_inter_cands > 0) || (ps_cu_analyse->u1_num_intra_rdopt_cands > 0));
+
+ ASSERT(ps_cu_analyse->u1_num_inter_cands <= MAX_INTER_CU_CANDIDATES);
+ s_inter_cu_mode_info.u1_num_inter_cands = 0;
+ s_inter_cu_mode_info.u1_idx_of_worst_cost_in_cost_array = 0;
+ s_inter_cu_mode_info.u1_idx_of_worst_cost_in_pred_buf_array = 0;
+
+ ps_ctxt->s_cu_inter_merge_skip.u1_num_merge_cands = 0;
+ ps_ctxt->s_cu_inter_merge_skip.u1_num_skip_cands = 0;
+ ps_ctxt->s_mixed_mode_inter_cu.u1_num_mixed_mode_type0_cands = 0;
+ ps_ctxt->s_mixed_mode_inter_cu.u1_num_mixed_mode_type1_cands = 0;
+ ps_ctxt->s_pred_buf_data.i4_pred_stride = ps_cu_analyse->u1_cu_size;
+ if(0 != ps_cu_analyse->u1_num_inter_cands)
+ {
+ ihevce_inter_cand_sifter_prms_t s_prms;
+
+ UWORD8 u1_enable_top_row_sync;
+
+ if(ps_ctxt->u1_disable_intra_eval)
+ {
+ u1_enable_top_row_sync = !DISABLE_TOP_SYNC;
+ }
+ else
+ {
+ u1_enable_top_row_sync = 1;
+ }
+
+ if((!ps_ctxt->u1_use_top_at_ctb_boundary) && u1_enable_top_row_sync)
+ {
+ /* Wait till top data is ready */
+ /* Currently checking till top right CU */
+ curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
+
+ if(i4_ctb_y_off == 0)
+ {
+ /* No wait for 1st row */
+ cu_top_right_offset = -(MAX_CTB_SIZE);
+ {
+ ihevce_tile_params_t *ps_col_tile_params =
+ ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
+ ps_ctxt->i4_tile_col_idx);
+ /* No wait for 1st row */
+ cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
+ }
+ cu_top_right_dep_pos = 0;
+ }
+ else
+ {
+ cu_top_right_offset = (ps_cu_analyse->u1_cu_size) + 4;
+ cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
+ }
+
+ if(0 == ps_cu_analyse->b3_cu_pos_y)
+ {
+ ihevce_dmgr_chk_row_row_sync(
+ ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
+ curr_cu_pos_in_row,
+ cu_top_right_offset,
+ cu_top_right_dep_pos,
+ ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
+ ps_ctxt->thrd_id);
+ }
+ }
+
+ s_prms.i4_ctb_nbr_map_stride = ps_ctxt->i4_nbr_map_strd;
+ s_prms.i4_max_num_inter_rdopt_cands = ps_ctxt->i4_max_num_inter_rdopt_cands;
+ s_prms.i4_nbr_4x4_left_strd = nbr_4x4_left_strd;
+ s_prms.i4_src_strd = ps_cu_prms->i4_luma_src_stride;
+ s_prms.ps_cu_inter_merge_skip = &ps_ctxt->s_cu_inter_merge_skip;
+ s_prms.aps_cu_nbr_buf[0] = &ps_ctxt->as_cu_nbr[ps_ctxt->i4_satd_buf_idx][0];
+ s_prms.aps_cu_nbr_buf[1] = &ps_ctxt->as_cu_nbr[!ps_ctxt->i4_satd_buf_idx][0];
+ s_prms.ps_left_nbr_4x4 = ps_left_nbr_4x4;
+ s_prms.ps_mc_ctxt = &ps_ctxt->s_mc_ctxt;
+ s_prms.ps_me_cands = ps_cu_analyse->as_cu_inter_cand;
+ s_prms.ps_mixed_modes_datastore = &ps_ctxt->s_mixed_mode_inter_cu;
+ s_prms.ps_mv_pred_ctxt = &ps_ctxt->s_mv_pred_ctxt;
+ s_prms.ps_pred_buf_data = &ps_ctxt->s_pred_buf_data;
+ s_prms.ps_topleft_nbr_4x4 = ps_topleft_nbr_4x4;
+ s_prms.ps_top_nbr_4x4 = ps_top_nbr_4x4;
+ s_prms.pu1_ctb_nbr_map = ps_ctxt->pu1_ctb_nbr_map;
+ s_prms.pv_src = pv_curr_src;
+ s_prms.u1_cu_pos_x = ps_cu_analyse->b3_cu_pos_x << 3;
+ s_prms.u1_cu_pos_y = ps_cu_analyse->b3_cu_pos_y << 3;
+ s_prms.u1_cu_size = ps_cu_analyse->u1_cu_size;
+ s_prms.u1_max_merge_candidates = ps_ctxt->i4_max_merge_candidates;
+ s_prms.u1_num_me_cands = ps_cu_analyse->u1_num_inter_cands;
+ s_prms.u1_use_satd_for_merge_eval = ps_ctxt->i4_use_satd_for_merge_eval;
+ s_prms.u1_quality_preset = ps_ctxt->i4_quality_preset;
+ s_prms.i1_slice_type = ps_ctxt->i1_slice_type;
+ s_prms.ps_cu_me_intra_pred_prms = &ps_ctxt->s_cu_me_intra_pred_prms;
+ s_prms.u1_is_hbd = (ps_ctxt->u1_bit_depth > 8);
+ s_prms.ps_inter_cu_mode_info = &s_inter_cu_mode_info;
+ s_prms.pai4_mv_cost = ps_cu_analyse->ai4_mv_cost;
+ s_prms.i4_lambda_qf = ps_ctxt->i4_sad_lamda;
+ s_prms.u1_use_merge_cand_from_top_row =
+ (u1_enable_top_row_sync || (s_prms.u1_cu_pos_y > 0));
+ s_prms.u1_merge_idx_cabac_model =
+ ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[IHEVC_CAB_MERGE_IDX_EXT];
+#if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING
+ s_prms.pai4_me_err_metric = ps_cu_analyse->ai4_err_metric;
+ s_prms.u1_reuse_me_sad = 1;
+#else
+ s_prms.u1_reuse_me_sad = 0;
+#endif
+
+ if(ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_slice_type != PSLICE)
+ {
+ if(ps_ctxt->i4_temporal_layer == 1)
+ {
+ s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME_BREF;
+ }
+ else
+ {
+ s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME;
+ }
+ }
+ else
+ {
+ s_prms.i4_alpha_stim_multiplier = ALPHA_FOR_NOISE_TERM_IN_ME_P;
+ }
+ s_prms.u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
+
+ if(s_prms.u1_is_cu_noisy)
+ {
+ s_prms.i4_lambda_qf =
+ ((float)s_prms.i4_lambda_qf) * (100.0f - ME_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f;
+ }
+ s_prms.pf_luma_inter_pred_pu = ihevce_luma_inter_pred_pu;
+
+ s_prms.ps_cmn_utils_optimised_function_list = &ps_ctxt->s_cmn_opt_func;
+
+ s_prms.pf_evalsad_pt_npu_mxn_8bit = (FT_SAD_EVALUATOR *)ps_ctxt->pv_evalsad_pt_npu_mxn_8bit;
+ ihevce_inter_cand_sifter(&s_prms);
+ }
+ if(u1_is_422)
+ {
+ UWORD8 au1_buf_ids[NUM_CU_ME_INTRA_PRED_IDX - 1];
+ UWORD8 u1_num_bufs_allocated;
+
+ u1_num_bufs_allocated = ihevce_get_free_pred_buf_indices(
+ au1_buf_ids, &ps_ctxt->s_pred_buf_data.u4_is_buf_in_use, NUM_CU_ME_INTRA_PRED_IDX - 1);
+
+ ASSERT(u1_num_bufs_allocated == (NUM_CU_ME_INTRA_PRED_IDX - 1));
+
+ for(ctr = ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count; ctr < u1_num_bufs_allocated;
+ ctr++)
+ {
+ {
+ ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
+ (UWORD8 *)ps_ctxt->s_pred_buf_data.apv_inter_pred_data[au1_buf_ids[ctr]];
+ }
+
+ ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
+
+ ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
+ }
+
+ {
+ ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
+ (UWORD8 *)ps_ctxt->pv_422_chroma_intra_pred_buf;
+ }
+
+ ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
+
+ ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
+ }
+ else
+ {
+ UWORD8 au1_buf_ids[NUM_CU_ME_INTRA_PRED_IDX];
+ UWORD8 u1_num_bufs_allocated;
+
+ u1_num_bufs_allocated = ihevce_get_free_pred_buf_indices(
+ au1_buf_ids, &ps_ctxt->s_pred_buf_data.u4_is_buf_in_use, NUM_CU_ME_INTRA_PRED_IDX);
+
+ ASSERT(u1_num_bufs_allocated == NUM_CU_ME_INTRA_PRED_IDX);
+
+ for(ctr = ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count; ctr < u1_num_bufs_allocated;
+ ctr++)
+ {
+ {
+ ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[ctr] =
+ (UWORD8 *)ps_ctxt->s_pred_buf_data.apv_inter_pred_data[au1_buf_ids[ctr]];
+ }
+
+ ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[ctr] = ps_cu_analyse->u1_cu_size;
+
+ ps_ctxt->s_cu_me_intra_pred_prms.i4_pointer_count++;
+ }
+ }
+
+ ihevce_mark_all_modes_to_evaluate(ps_ctxt, ps_cu_analyse);
+
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.u1_is_lumaRecon_available = 0;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.u1_is_lumaRecon_available = 0;
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[1] = 0;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[1] = 0;
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.au1_is_chromaRecon_available[2] = 0;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.au1_is_chromaRecon_available[2] = 0;
+ /* --------------------------------------- */
+ /* ------ Inter RD OPT stage ------------- */
+ /* --------------------------------------- */
+ if(0 != s_inter_cu_mode_info.u1_num_inter_cands)
+ {
+ UWORD8 u1_ssd_bit_info_ctr = 0;
+
+ /* -- run a loop over all Inter rd opt cands ------ */
+ for(ctr = 0; ctr < s_inter_cu_mode_info.u1_num_inter_cands; ctr++)
+ {
+ cu_inter_cand_t *ps_inter_cand;
+
+ LWORD64 rd_opt_cost = 0;
+
+ ps_inter_cand = s_inter_cu_mode_info.aps_cu_data[ctr];
+
+ if((ps_inter_cand->b1_skip_flag) || (ps_inter_cand->as_inter_pu[0].b1_merge_flag) ||
+ (ps_inter_cand->b3_part_size && ps_inter_cand->as_inter_pu[1].b1_merge_flag))
+ {
+ ps_inter_cand->b1_eval_mark = 1;
+ }
+
+ /****************************************************************/
+ /* This check is only valid for derived instances. */
+ /* check if this mode needs to be evaluated or not. */
+ /* if it is a skip candidate, go ahead and evaluate it even if */
+ /* it has not been marked while sorting. */
+ /****************************************************************/
+ if((0 == ps_inter_cand->b1_eval_mark) && (0 == ps_inter_cand->b1_skip_flag))
+ {
+ continue;
+ }
+
+ /* RDOPT related copies and settings */
+ ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
+
+ /* RDOPT copy States : Prev Cu best to current init */
+ COPY_CABAC_STATES(
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0],
+ &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
+ IHEVC_CAB_CTXT_END * sizeof(UWORD8));
+ /* MVP ,MVD calc and Motion compensation */
+ rd_opt_cost = ((pf_inter_rdopt_cu_mc_mvp)ps_ctxt->pv_inter_rdopt_cu_mc_mvp)(
+ ps_ctxt,
+ ps_inter_cand,
+ ps_cu_analyse->u1_cu_size,
+ ps_cu_analyse->b3_cu_pos_x,
+ ps_cu_analyse->b3_cu_pos_y,
+ ps_left_nbr_4x4,
+ ps_top_nbr_4x4,
+ ps_topleft_nbr_4x4,
+ nbr_4x4_left_strd,
+ rd_opt_curr_idx);
+
+#if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
+ if((ps_ctxt->u1_bit_depth == 8) && (!ps_inter_cand->b1_skip_flag))
+ {
+ ihevce_determine_tu_tree_distribution(
+ ps_inter_cand,
+ (me_func_selector_t *)ps_ctxt->pv_err_func_selector,
+ ps_ctxt->ai2_scratch,
+ (UWORD8 *)pv_curr_src,
+ ps_cu_prms->i4_luma_src_stride,
+ ps_ctxt->i4_satd_lamda,
+ LAMBDA_Q_SHIFT,
+ ps_cu_analyse->u1_cu_size,
+ ps_ctxt->u1_max_tr_depth);
+ }
+#endif
+#if DISABLE_ZERO_ZBF_IN_INTER
+ ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
+#else
+ ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
+#endif
+ /* Recon loop with different TUs based on partition type*/
+ rd_opt_cost += ((pf_inter_rdopt_cu_ntu)ps_ctxt->pv_inter_rdopt_cu_ntu)(
+ ps_ctxt,
+ ps_cu_prms,
+ pv_curr_src,
+ ps_cu_analyse->u1_cu_size,
+ ps_cu_analyse->b3_cu_pos_x,
+ ps_cu_analyse->b3_cu_pos_y,
+ rd_opt_curr_idx,
+ &s_chrm_cu_buf_prms,
+ ps_inter_cand,
+ ps_cu_analyse,
+ !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
+ : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
+ (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
+ 100.0);
+
+#if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
+ if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
+ {
+ ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
+ ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
+ ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
+ }
+#endif
+
+ /* based on the rd opt cost choose the best and current index */
+ if(rd_opt_cost < rd_opt_least_cost)
+ {
+ /* swap the best and current indx */
+ rd_opt_best_idx = !rd_opt_best_idx;
+ rd_opt_curr_idx = !rd_opt_curr_idx;
+
+ ps_ctxt->as_cu_prms[rd_opt_best_idx].i8_best_rdopt_cost = rd_opt_cost;
+ rd_opt_least_cost = rd_opt_cost;
+ i4_best_cu_qp = ps_ctxt->i4_cu_qp;
+
+ /* Store the best Inter cand. for final_recon function */
+ ps_best_inter_cand = ps_inter_cand;
+ }
+
+ /* set the neighbour map to 0 */
+ ihevce_set_nbr_map(
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ (ps_cu_analyse->b3_cu_pos_x << 1),
+ (ps_cu_analyse->b3_cu_pos_y << 1),
+ (ps_cu_analyse->u1_cu_size >> 2),
+ 0);
+
+ } /* end of loop for all the Inter RD OPT cand */
+ }
+ /* --------------------------------------- */
+ /* ---- Conditional Eval of Intra -------- */
+ /* --------------------------------------- */
+ {
+ enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
+ ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
+
+ /* check if inter candidates are valid */
+ if(0 != ps_cu_analyse->u1_num_inter_cands)
+ {
+ /* if skip or no residual inter candidates has won then */
+ /* evaluation of intra candidates is disabled */
+ if((1 == ps_enc_loop_bestprms->u1_skip_flag) ||
+ (0 == ps_enc_loop_bestprms->u1_is_cu_coded))
+ {
+ enable_intra_eval_flag = 0;
+ }
+ }
+ /* Disable Intra Gating for HIGH QUALITY PRESET */
+#if !ENABLE_INTRA_GATING_FOR_HQ
+ if(IHEVCE_QUALITY_P3 > ps_ctxt->i4_quality_preset)
+ {
+ enable_intra_eval_flag = 1;
+
+#if DISABLE_LARGE_INTRA_PQ
+ if((IHEVCE_QUALITY_P0 == ps_ctxt->i4_quality_preset) && (ps_cu_prms->u1_is_cu_noisy) &&
+ (ps_ctxt->i1_slice_type != ISLICE) && (0 != s_inter_cu_mode_info.u1_num_inter_cands))
+ {
+ if(ps_cu_analyse->u1_cu_size > 16)
+ {
+ /* Disable 32x32 / 64x64 Intra in PQ P and B pics */
+ enable_intra_eval_flag = 0;
+ }
+ else if(ps_cu_analyse->u1_cu_size == 16)
+ {
+ /* Disable tu equal to cu mode in 16x16 Intra in PQ P and B pics */
+ ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0] = 255;
+ }
+ }
+#endif
+ }
+#endif
+ }
+
+ /* --------------------------------------- */
+ /* ------ Intra RD OPT stage ------------- */
+ /* --------------------------------------- */
+
+ /* -- run a loop over all Intra rd opt cands ------ */
+ if((0 != ps_cu_analyse->u1_num_intra_rdopt_cands) && (1 == enable_intra_eval_flag))
+ {
+ LWORD64 rd_opt_cost;
+ WORD32 end_flag = 0;
+ WORD32 cu_eval_done = 0;
+ WORD32 subcu_eval_done = 0;
+ WORD32 subpu_eval_done = 0;
+ WORD32 max_trans_size;
+ WORD32 sync_wait_stride;
+ max_trans_size = MIN(MAX_TU_SIZE, (ps_cu_analyse->u1_cu_size));
+ sync_wait_stride = (ps_cu_analyse->u1_cu_size) + max_trans_size;
+
+ if(!ps_ctxt->u1_use_top_at_ctb_boundary)
+ {
+ /* Wait till top data is ready */
+ /* Currently checking till top right CU */
+ curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
+
+ if(i4_ctb_y_off == 0)
+ {
+ /* No wait for 1st row */
+ cu_top_right_offset = -(MAX_CTB_SIZE);
+ {
+ ihevce_tile_params_t *ps_col_tile_params =
+ ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
+ ps_ctxt->i4_tile_col_idx);
+ /* No wait for 1st row */
+ cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
+ }
+ cu_top_right_dep_pos = 0;
+ }
+ else
+ {
+ cu_top_right_offset = sync_wait_stride;
+ cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
+ }
+
+ if(0 == ps_cu_analyse->b3_cu_pos_y)
+ {
+ ihevce_dmgr_chk_row_row_sync(
+ ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
+ curr_cu_pos_in_row,
+ cu_top_right_offset,
+ cu_top_right_dep_pos,
+ ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
+ ps_ctxt->thrd_id);
+ }
+ }
+ ctr = 0;
+
+ /* Zero cbf tool is disabled for intra CUs */
+#if ENABLE_ZERO_CBF_IN_INTRA
+ ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
+#else
+ ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
+#endif
+
+ /* Intra Mode gating based on MPM cand list and encoder quality preset */
+ if((ps_ctxt->i1_slice_type != ISLICE) && (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
+ {
+ ihevce_mpm_idx_based_filter_RDOPT_cand(
+ ps_ctxt,
+ ps_cu_analyse,
+ ps_left_nbr_4x4,
+ ps_top_nbr_4x4,
+ &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[0],
+ &ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[0]);
+
+ ihevce_mpm_idx_based_filter_RDOPT_cand(
+ ps_ctxt,
+ ps_cu_analyse,
+ ps_left_nbr_4x4,
+ ps_top_nbr_4x4,
+ &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0],
+ &ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[0]);
+ }
+
+ /* Call Chroma SATD function for curr_func_mode in HIGH QUALITY mode */
+ if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd)
+ {
+ /* For cu_size = 64, there won't be any TU_EQ_CU case */
+ if(64 != ps_cu_analyse->u1_cu_size)
+ {
+ /* RDOPT copy States : Prev Cu best to current init */
+ COPY_CABAC_STATES(
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0],
+ &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
+ IHEVC_CAB_CTXT_END);
+
+ /* RDOPT related copies and settings */
+ ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
+
+ /* Calc. best SATD mode for TU_EQ_CU case */
+ ((pf_intra_chroma_pred_mode_selector)ps_ctxt->pv_intra_chroma_pred_mode_selector)(
+ ps_ctxt,
+ &s_chrm_cu_buf_prms,
+ ps_cu_analyse,
+ rd_opt_curr_idx,
+ TU_EQ_CU,
+ !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
+ : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
+ (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
+ 100.0,
+ ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY);
+
+#if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
+ if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
+ {
+ ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
+ ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
+ ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
+ }
+#endif
+ }
+
+ /* For cu_size=8 case, chroma cost will be same for TU_EQ_CU and
+ TU_EQ_CU_DIV2 case */
+
+ if((ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] !=
+ 255) &&
+ (8 != ps_cu_analyse->u1_cu_size))
+ {
+ /* RDOPT copy States : Prev Cu best to current init */
+ COPY_CABAC_STATES(
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0],
+ &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
+ IHEVC_CAB_CTXT_END);
+
+ /* RDOPT related copies and settings */
+ ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
+
+ /* Calc. best SATD mode for TU_EQ_CU_DIV2 case */
+ ((pf_intra_chroma_pred_mode_selector)ps_ctxt->pv_intra_chroma_pred_mode_selector)(
+ ps_ctxt,
+ &s_chrm_cu_buf_prms,
+ ps_cu_analyse,
+ rd_opt_curr_idx,
+ TU_EQ_CU_DIV2,
+ !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
+ : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
+ (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
+ 100.0,
+ ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY);
+
+#if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
+ if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
+ {
+ ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
+ ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
+ ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
+ }
+#endif
+ }
+ }
+
+ while(0 == end_flag)
+ {
+ UWORD8 *pu1_mode = NULL;
+ WORD32 curr_func_mode = 0;
+ void *pv_pred;
+
+ ASSERT(ctr < 36);
+
+ /* TU equal to CU size evaluation of different modes */
+ if(0 == cu_eval_done)
+ {
+ /* check if the all the modes have been evaluated */
+ if(255 == ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[ctr])
+ {
+ cu_eval_done = 1;
+ ctr = 0;
+ }
+ else if(
+ (1 == ctr) &&
+ ((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) ||
+ (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) &&
+ (ps_ctxt->i1_slice_type != ISLICE))
+ {
+ ctr = 0;
+ cu_eval_done = 1;
+ subcu_eval_done = 1;
+ subpu_eval_done = 1;
+ }
+ else
+ {
+ if(0 == ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_eval_mark[ctr])
+ {
+ ctr++;
+ continue;
+ }
+
+ pu1_mode =
+ &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu[ctr];
+ ctr++;
+ curr_func_mode = TU_EQ_CU;
+ }
+ }
+ /* Sub CU (NXN) mode evaluation of different pred modes */
+ if((0 == subpu_eval_done) && (1 == cu_eval_done))
+ {
+ /*For NxN modes evaluation all candidates for all PU parts are evaluated */
+ /*inside the ihevce_intra_rdopt_cu_ntu function, so the subpu_eval_done is set to 1 */
+ {
+ pu1_mode = &ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_nxn[0][ctr];
+
+ curr_func_mode = TU_EQ_SUBCU;
+ /* check if the any modes have to be evaluated */
+ if(255 == *pu1_mode)
+ {
+ subpu_eval_done = 1;
+ ctr = 0;
+ }
+ else if(ctr != 0) /* If the modes have to be evaluated, then terminate, as all modes are already evaluated */
+ {
+ subpu_eval_done = 1;
+ ctr = 0;
+ }
+ else
+ {
+ ctr++;
+ }
+ }
+ }
+
+ /* TU size equal to CU div2 mode evaluation of different pred modes */
+ if((0 == subcu_eval_done) && (1 == subpu_eval_done) && (1 == cu_eval_done))
+ {
+ /* check if the all the modes have been evaluated */
+ if(255 ==
+ ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[ctr])
+ {
+ subcu_eval_done = 1;
+ }
+ else if(
+ (1 == ctr) &&
+ ((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) ||
+ (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) &&
+ (ps_ctxt->i1_slice_type != ISLICE) && (ps_cu_analyse->u1_cu_size == 64))
+ {
+ subcu_eval_done = 1;
+ }
+ else
+ {
+ if(0 == ps_cu_analyse->s_cu_intra_cand.au1_2nx2n_tu_eq_cu_by_2_eval_mark[ctr])
+ {
+ ctr++;
+ continue;
+ }
+
+ pu1_mode = &ps_cu_analyse->s_cu_intra_cand
+ .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[ctr];
+
+ ctr++;
+ curr_func_mode = TU_EQ_CU_DIV2;
+ }
+ }
+
+ /* check if all CU option have been evalueted */
+ if((1 == cu_eval_done) && (1 == subcu_eval_done) && (1 == subpu_eval_done))
+ {
+ break;
+ }
+
+ /* RDOPT related copies and settings */
+ ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_curr_idx;
+
+ /* Assign ME/Intra pred buf. to the current intra cand. since we
+ are storing pred data for final_reon function */
+ {
+ pv_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[rd_opt_curr_idx];
+ }
+
+ /* RDOPT copy States : Prev Cu best to current init */
+ COPY_CABAC_STATES(
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0],
+ &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
+ IHEVC_CAB_CTXT_END);
+
+ /* call the function which performs the normative Intra encode */
+ rd_opt_cost = ((pf_intra_rdopt_cu_ntu)ps_ctxt->pv_intra_rdopt_cu_ntu)(
+ ps_ctxt,
+ ps_cu_prms,
+ pv_pred,
+ ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[rd_opt_curr_idx],
+ &s_chrm_cu_buf_prms,
+ pu1_mode,
+ ps_cu_analyse,
+ pv_curr_src,
+ pv_cu_left,
+ pv_cu_top,
+ pv_cu_top_left,
+ ps_left_nbr_4x4,
+ ps_top_nbr_4x4,
+ nbr_4x4_left_strd,
+ cu_left_stride,
+ rd_opt_curr_idx,
+ curr_func_mode,
+ !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
+ : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
+ (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
+ 100.0);
+
+#if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
+ if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
+ {
+ ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
+ ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
+ ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
+ }
+#endif
+
+ /* based on the rd opt cost choose the best and current index */
+ if(rd_opt_cost < rd_opt_least_cost)
+ {
+ /* swap the best and current indx */
+ rd_opt_best_idx = !rd_opt_best_idx;
+ rd_opt_curr_idx = !rd_opt_curr_idx;
+ i4_best_cu_qp = ps_ctxt->i4_cu_qp;
+
+ rd_opt_least_cost = rd_opt_cost;
+ ps_ctxt->as_cu_prms[rd_opt_best_idx].i8_best_rdopt_cost = rd_opt_cost;
+ }
+
+ if((TU_EQ_SUBCU == curr_func_mode) &&
+ (ps_ctxt->as_cu_prms[rd_opt_best_idx].u1_intra_flag) &&
+ (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P2) && !FORCE_INTRA_TU_DEPTH_TO_0)
+ {
+ UWORD8 au1_tu_eq_cu_div2_modes[4];
+ UWORD8 au1_freq_of_mode[4];
+
+ if(ps_ctxt->as_cu_prms[rd_opt_best_idx].u1_part_mode == SIZE_2Nx2N)
+ {
+ ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
+ 255; //ps_ctxt->as_cu_prms[rd_opt_best_idx].au1_intra_pred_mode[0];
+ ps_cu_analyse->s_cu_intra_cand.au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] =
+ 255;
+ }
+ else
+ {
+ WORD32 i4_num_clusters = ihevce_find_num_clusters_of_identical_points_1D(
+ ps_ctxt->as_cu_prms[rd_opt_best_idx].au1_intra_pred_mode,
+ au1_tu_eq_cu_div2_modes,
+ au1_freq_of_mode,
+ 4);
+
+ if(2 == i4_num_clusters)
+ {
+ if(au1_freq_of_mode[0] == 3)
+ {
+ ps_cu_analyse->s_cu_intra_cand
+ .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
+ au1_tu_eq_cu_div2_modes[0];
+ ps_cu_analyse->s_cu_intra_cand
+ .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 255;
+ }
+ else if(au1_freq_of_mode[1] == 3)
+ {
+ ps_cu_analyse->s_cu_intra_cand
+ .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
+ au1_tu_eq_cu_div2_modes[1];
+ ps_cu_analyse->s_cu_intra_cand
+ .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] = 255;
+ }
+ else
+ {
+ ps_cu_analyse->s_cu_intra_cand
+ .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[0] =
+ au1_tu_eq_cu_div2_modes[0];
+ ps_cu_analyse->s_cu_intra_cand
+ .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[1] =
+ au1_tu_eq_cu_div2_modes[1];
+ ps_cu_analyse->s_cu_intra_cand
+ .au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[2] = 255;
+ }
+ }
+ }
+ }
+
+ /* set the neighbour map to 0 */
+ ihevce_set_nbr_map(
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ (ps_cu_analyse->b3_cu_pos_x << 1),
+ (ps_cu_analyse->b3_cu_pos_y << 1),
+ (ps_cu_analyse->u1_cu_size >> 2),
+ 0);
+ }
+
+ } /* end of Intra RD OPT cand evaluation */
+
+ ASSERT(i4_best_cu_qp > (ps_ctxt->ps_rc_quant_ctxt->i2_min_qp - 1));
+ ps_ctxt->i4_cu_qp = i4_best_cu_qp;
+ ps_cu_analyse->i1_cu_qp = i4_best_cu_qp;
+
+ /* --------------------------------------- */
+ /* --------Final mode Recon ---------- */
+ /* --------------------------------------- */
+ {
+ enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
+ void *pv_final_pred = NULL;
+ WORD32 final_pred_strd = 0;
+ void *pv_final_pred_chrm = NULL;
+ WORD32 final_pred_strd_chrm = 0;
+ WORD32 packed_pred_mode;
+
+#if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
+ if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
+ {
+ pu1_ecd_data = &ps_ctxt->pu1_cu_recur_coeffs[0];
+ }
+#else
+ pu1_ecd_data = &ps_ctxt->pu1_cu_recur_coeffs[0];
+#endif
+
+ ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
+ packed_pred_mode =
+ ps_enc_loop_bestprms->u1_intra_flag + (ps_enc_loop_bestprms->u1_skip_flag) * 2;
+
+ if(!ps_ctxt->u1_is_input_data_hbd)
+ {
+ if(ps_enc_loop_bestprms->u1_intra_flag)
+ {
+ pv_final_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[rd_opt_best_idx];
+ final_pred_strd =
+ ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[rd_opt_best_idx];
+ }
+ else
+ {
+ pv_final_pred = ps_best_inter_cand->pu1_pred_data;
+ final_pred_strd = ps_best_inter_cand->i4_pred_data_stride;
+ }
+
+ pv_final_pred_chrm =
+ ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
+ rd_opt_best_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) +
+ (u1_is_422 * (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
+ final_pred_strd_chrm =
+ ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
+ }
+
+ ihevce_set_eval_flags(ps_ctxt, ps_enc_loop_bestprms);
+
+ {
+ final_mode_process_prms_t s_prms;
+
+ void *pv_cu_luma_recon;
+ void *pv_cu_chroma_recon;
+ WORD32 luma_stride, chroma_stride;
+
+ if(!ps_ctxt->u1_is_input_data_hbd)
+ {
+#if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
+ if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
+ {
+ pv_cu_luma_recon = ps_ctxt->pv_cu_luma_recon;
+ pv_cu_chroma_recon = ps_ctxt->pv_cu_chrma_recon;
+ luma_stride = ps_cu_analyse->u1_cu_size;
+ chroma_stride = ps_cu_analyse->u1_cu_size;
+ }
+ else
+ {
+ /* based on CU position derive the luma pointers */
+ pv_cu_luma_recon = pu1_final_recon;
+
+ /* based on CU position derive the chroma pointers */
+ pv_cu_chroma_recon = s_chrm_cu_buf_prms.pu1_final_recon;
+
+ luma_stride = ps_cu_prms->i4_luma_recon_stride;
+
+ chroma_stride = ps_cu_prms->i4_chrm_recon_stride;
+ }
+#else
+ pv_cu_luma_recon = ps_ctxt->pv_cu_luma_recon;
+ pv_cu_chroma_recon = ps_ctxt->pv_cu_chrma_recon;
+ luma_stride = ps_cu_analyse->u1_cu_size;
+ chroma_stride = ps_cu_analyse->u1_cu_size;
+#endif
+
+ s_prms.ps_cu_nbr_prms = &s_cu_nbr_prms;
+ s_prms.ps_best_inter_cand = ps_best_inter_cand;
+ s_prms.ps_chrm_cu_buf_prms = &s_chrm_cu_buf_prms;
+ s_prms.packed_pred_mode = packed_pred_mode;
+ s_prms.rd_opt_best_idx = rd_opt_best_idx;
+ s_prms.pv_src = pu1_curr_src;
+ s_prms.src_strd = ps_cu_prms->i4_luma_src_stride;
+ s_prms.pv_pred = pv_final_pred;
+ s_prms.pred_strd = final_pred_strd;
+ s_prms.pv_pred_chrm = pv_final_pred_chrm;
+ s_prms.pred_chrm_strd = final_pred_strd_chrm;
+ s_prms.pu1_final_ecd_data = pu1_ecd_data;
+ s_prms.pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
+ s_prms.csbf_strd = ps_ctxt->i4_cu_csbf_strd;
+ s_prms.pv_luma_recon = pv_cu_luma_recon;
+ s_prms.recon_luma_strd = luma_stride;
+ s_prms.pv_chrm_recon = pv_cu_chroma_recon;
+ s_prms.recon_chrma_strd = chroma_stride;
+ s_prms.u1_cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
+ s_prms.u1_cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
+ s_prms.u1_cu_size = ps_cu_analyse->u1_cu_size;
+ s_prms.i1_cu_qp = ps_cu_analyse->i1_cu_qp;
+ s_prms.u1_will_cabac_state_change = 1;
+ s_prms.u1_recompute_sbh_and_rdoq = 0;
+ s_prms.u1_is_first_pass = 1;
+ }
+
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ s_prms.u1_is_cu_noisy = !ps_enc_loop_bestprms->u1_intra_flag
+ ? ps_cu_prms->u1_is_cu_noisy
+ : ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY;
+#endif
+
+ ((pf_final_rdopt_mode_prcs)ps_ctxt->pv_final_rdopt_mode_prcs)(ps_ctxt, &s_prms);
+
+#if USE_NOISE_TERM_IN_ENC_LOOP && RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY
+ if(!ps_ctxt->u1_enable_psyRDOPT && ps_cu_prms->u1_is_cu_noisy)
+ {
+ ps_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf;
+ ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
+ ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf;
+ }
+#endif
+ }
+ }
+
+ /* --------------------------------------- */
+ /* --------Populate CU out prms ---------- */
+ /* --------------------------------------- */
+ {
+ enc_loop_cu_final_prms_t *ps_enc_loop_bestprms;
+ UWORD8 *pu1_pu_map;
+ ps_enc_loop_bestprms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
+
+ /* Corner case : If Part is 2Nx2N and Merge has all TU with zero cbf */
+ /* then it has to be coded as skip CU */
+ if((SIZE_2Nx2N == ps_enc_loop_bestprms->u1_part_mode) &&
+ (1 == ps_enc_loop_bestprms->as_pu_enc_loop[0].b1_merge_flag) &&
+ (0 == ps_enc_loop_bestprms->u1_skip_flag) && (0 == ps_enc_loop_bestprms->u1_is_cu_coded))
+ {
+ ps_enc_loop_bestprms->u1_skip_flag = 1;
+ }
+
+ /* update number PUs in CU */
+ ps_cu_prms->i4_num_pus_in_cu = ps_enc_loop_bestprms->u2_num_pus_in_cu;
+
+ /* ---- populate the colocated pu map index --- */
+ for(ctr = 0; ctr < ps_enc_loop_bestprms->u2_num_pus_in_cu; ctr++)
+ {
+ WORD32 i;
+ WORD32 vert_ht;
+ WORD32 horz_wd;
+
+ if(ps_enc_loop_bestprms->u1_intra_flag)
+ {
+ ps_enc_loop_bestprms->as_col_pu_enc_loop[ctr].b1_intra_flag = 1;
+ vert_ht = ps_cu_analyse->u1_cu_size >> 2;
+ horz_wd = ps_cu_analyse->u1_cu_size >> 2;
+ }
+ else
+ {
+ vert_ht = (((ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_ht + 1) << 2) >> 2);
+ horz_wd = (((ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_wd + 1) << 2) >> 2);
+ }
+
+ pu1_pu_map = pu1_col_pu_map + ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_pos_x;
+ pu1_pu_map += (ps_enc_loop_bestprms->as_pu_enc_loop[ctr].b4_pos_y * num_4x4_in_ctb);
+
+ for(i = 0; i < vert_ht; i++)
+ {
+ memset(pu1_pu_map, col_start_pu_idx, horz_wd);
+ pu1_pu_map += num_4x4_in_ctb;
+ }
+ /* increment the index */
+ col_start_pu_idx++;
+ }
+ /* ---- copy the colocated PUs to frm pu ----- */
+ memcpy(
+ ps_col_pu,
+ &ps_enc_loop_bestprms->as_col_pu_enc_loop[0],
+ ps_enc_loop_bestprms->u2_num_pus_in_cu * sizeof(pu_col_mv_t));
+
+ /*---populate qp for 4x4 nbr array based on skip and cbf zero flag---*/
+ {
+ entropy_context_t *ps_entropy_ctxt;
+
+ WORD32 diff_cu_qp_delta_depth, log2_ctb_size;
+
+ WORD32 log2_min_cu_qp_delta_size;
+ UWORD32 block_addr_align;
+ ps_entropy_ctxt = ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt;
+
+ log2_ctb_size = ps_entropy_ctxt->i1_log2_ctb_size;
+ diff_cu_qp_delta_depth = ps_entropy_ctxt->ps_pps->i1_diff_cu_qp_delta_depth;
+
+ log2_min_cu_qp_delta_size = log2_ctb_size - diff_cu_qp_delta_depth;
+ block_addr_align = 15 << (log2_min_cu_qp_delta_size - 3);
+
+ ps_entropy_ctxt->i4_qg_pos_x = ps_cu_analyse->b3_cu_pos_x & block_addr_align;
+ ps_entropy_ctxt->i4_qg_pos_y = ps_cu_analyse->b3_cu_pos_y & block_addr_align;
+ /*Update the Qp value used. It will not have a valid value iff
+ current CU is (skipped/no_cbf). In that case the Qp needed for
+ deblocking is calculated from top/left/previous coded CU*/
+
+ ps_ctxt->ps_enc_out_ctxt->i1_cu_qp = ps_cu_analyse->i1_cu_qp;
+
+ if(ps_entropy_ctxt->i4_qg_pos_x == ps_cu_analyse->b3_cu_pos_x &&
+ ps_entropy_ctxt->i4_qg_pos_y == ps_cu_analyse->b3_cu_pos_y)
+ {
+ ps_ctxt->ps_enc_out_ctxt->b1_first_cu_in_qg = 1;
+ }
+ else
+ {
+ ps_ctxt->ps_enc_out_ctxt->b1_first_cu_in_qg = 0;
+ }
+ }
+
+ /* -- at the end of CU set the neighbour map to 1 -- */
+ ihevce_set_nbr_map(
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ (ps_cu_analyse->b3_cu_pos_x << 1),
+ (ps_cu_analyse->b3_cu_pos_y << 1),
+ (ps_cu_analyse->u1_cu_size >> 2),
+ 1);
+
+ /* -- at the end of CU update best cabac rdopt states -- */
+ /* -- and also set the top row skip flags ------------- */
+ ihevce_entropy_update_best_cu_states(
+ &ps_ctxt->s_rdopt_entropy_ctxt,
+ ps_cu_analyse->b3_cu_pos_x,
+ ps_cu_analyse->b3_cu_pos_y,
+ ps_cu_analyse->u1_cu_size,
+ 0,
+ rd_opt_best_idx);
+ }
+
+ /* Store Output struct */
+#if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
+ {
+ {
+ memcpy(
+ &ps_ctxt->ps_enc_out_ctxt->s_cu_prms,
+ &ps_ctxt->as_cu_prms[rd_opt_best_idx],
+ sizeof(enc_loop_cu_final_prms_t));
+ }
+
+ memcpy(
+ &ps_ctxt->as_cu_recur_nbr[0],
+ &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0],
+ sizeof(nbr_4x4_t) * (ps_cu_analyse->u1_cu_size >> 2) *
+ (ps_cu_analyse->u1_cu_size >> 2));
+
+ ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->ps_enc_out_ctxt->s_cu_prms;
+
+ ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_recur_nbr[0];
+ }
+#else
+ if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P2)
+ {
+ ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
+
+ ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0];
+
+ if(ps_ctxt->u1_disable_intra_eval && ps_ctxt->i4_deblk_pad_hpel_cur_pic)
+ {
+ /* Wait till top data is ready */
+ /* Currently checking till top right CU */
+ curr_cu_pos_in_row = i4_ctb_x_off + (ps_cu_analyse->b3_cu_pos_x << 3);
+
+ if(i4_ctb_y_off == 0)
+ {
+ /* No wait for 1st row */
+ cu_top_right_offset = -(MAX_CTB_SIZE);
+ {
+ ihevce_tile_params_t *ps_col_tile_params =
+ ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
+ ps_ctxt->i4_tile_col_idx);
+
+ /* No wait for 1st row */
+ cu_top_right_offset = -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
+ }
+ cu_top_right_dep_pos = 0;
+ }
+ else
+ {
+ cu_top_right_offset = (ps_cu_analyse->u1_cu_size);
+ cu_top_right_dep_pos = (i4_ctb_y_off >> 6) - 1;
+ }
+
+ if(0 == ps_cu_analyse->b3_cu_pos_y)
+ {
+ ihevce_dmgr_chk_row_row_sync(
+ ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
+ curr_cu_pos_in_row,
+ cu_top_right_offset,
+ cu_top_right_dep_pos,
+ ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
+ ps_ctxt->thrd_id);
+ }
+ }
+ }
+ else
+ {
+ {
+ memcpy(
+ &ps_ctxt->ps_enc_out_ctxt->s_cu_prms,
+ &ps_ctxt->as_cu_prms[rd_opt_best_idx],
+ sizeof(enc_loop_cu_final_prms_t));
+ }
+
+ memcpy(
+ &ps_ctxt->as_cu_recur_nbr[0],
+ &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0],
+ sizeof(nbr_4x4_t) * (ps_cu_analyse->u1_cu_size >> 2) *
+ (ps_cu_analyse->u1_cu_size >> 2));
+
+ ps_ctxt->ps_enc_out_ctxt->ps_cu_prms = &ps_ctxt->ps_enc_out_ctxt->s_cu_prms;
+
+ ps_ctxt->ps_cu_recur_nbr = &ps_ctxt->as_cu_recur_nbr[0];
+ }
+#endif
+
+ ps_ctxt->s_pred_buf_data.u4_is_buf_in_use &=
+ ~((1 << (ps_ctxt->i4_max_num_inter_rdopt_cands + 4)) - 1);
+
+ return rd_opt_least_cost;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_enc_loop_process_row \endif
+*
+* \brief
+* Row level enc_loop pass function
+*
+* \param[in] pv_ctxt : pointer to enc_loop module
+* \param[in] ps_curr_src_bufs : pointer to input yuv buffer (row buffer)
+* \param[out] ps_curr_recon_bufs : pointer recon picture structure pointer (row buffer)
+* \param[in] ps_ctb_in : pointer CTB structure (output of ME/IPE) (row buffer)
+* \param[out] ps_ctb_out : pointer CTB output structure (row buffer)
+* \param[out] ps_cu_out : pointer CU output structure (row buffer)
+* \param[out] ps_tu_out : pointer TU output structure (row buffer)
+* \param[out] pi2_frm_coeffs : pointer coeff output (row buffer)
+* \param[in] i4_poc : current poc. Needed to send recon in dist-client mode
+*
+* \return
+* None
+*
+* Note : Currently the frame level calcualtions done assumes that
+* framewidth of the input /recon are excat multiple of ctbsize
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_enc_loop_process_row(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ iv_enc_yuv_buf_t *ps_curr_src_bufs,
+ iv_enc_yuv_buf_t *ps_curr_recon_bufs,
+ iv_enc_yuv_buf_src_t *ps_curr_recon_bufs_src,
+ UWORD8 **ppu1_y_subpel_planes,
+ ctb_analyse_t *ps_ctb_in,
+ ctb_enc_loop_out_t *ps_ctb_out,
+ ipe_l0_ctb_analyse_for_me_t *ps_row_ipe_analyse,
+ cur_ctb_cu_tree_t *ps_row_cu_tree,
+ cu_enc_loop_out_t *ps_row_cu,
+ tu_enc_loop_out_t *ps_row_tu,
+ pu_t *ps_row_pu,
+ pu_col_mv_t *ps_row_col_pu,
+ UWORD16 *pu2_num_pu_map,
+ UWORD8 *pu1_row_pu_map,
+ UWORD8 *pu1_row_ecd_data,
+ UWORD32 *pu4_pu_offsets,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ WORD32 vert_ctr,
+ recon_pic_buf_t *ps_frm_recon,
+ void *pv_dep_mngr_encloop_dep_me,
+ pad_interp_recon_frm_t *ps_pad_interp_recon,
+ WORD32 i4_pass,
+ multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
+ ihevce_tile_params_t *ps_tile_params)
+{
+ enc_loop_cu_prms_t s_cu_prms;
+ ctb_enc_loop_out_t *ps_ctb_out_dblk;
+
+ WORD32 ctb_ctr, ctb_start, ctb_end;
+ WORD32 col_pu_map_idx;
+ WORD32 num_ctbs_horz_pic;
+ WORD32 ctb_size;
+ WORD32 last_ctb_row_flag;
+ WORD32 last_ctb_col_flag;
+ WORD32 last_hz_ctb_wd;
+ WORD32 last_vt_ctb_ht;
+ void *pv_dep_mngr_enc_loop_dblk;
+ void *pv_dep_mngr_enc_loop_cu_top_right;
+ WORD32 dblk_offset, dblk_check_dep_pos;
+ WORD32 aux_offset, aux_check_dep_pos;
+ void *pv_dep_mngr_me_dep_encloop;
+ ctb_enc_loop_out_t *ps_ctb_out_sao;
+ /*Structure to store deblocking parameters at CTB-row level*/
+ deblk_ctbrow_prms_t s_deblk_ctb_row_params;
+ UWORD8 is_inp_422 = (ps_ctxt->u1_chroma_array_type == 2);
+
+ pv_dep_mngr_me_dep_encloop = (void *)ps_frm_recon->pv_dep_mngr_recon;
+ num_ctbs_horz_pic = ps_frm_ctb_prms->i4_num_ctbs_horz;
+ ctb_size = ps_frm_ctb_prms->i4_ctb_size;
+
+ /* Store the num_ctb_horz in sao context*/
+ ps_ctxt->s_sao_ctxt_t.u4_num_ctbs_horz = ps_frm_ctb_prms->i4_num_ctbs_horz;
+ ps_ctxt->s_sao_ctxt_t.u4_num_ctbs_vert = ps_frm_ctb_prms->i4_num_ctbs_vert;
+
+ /* Get the EncLoop Deblock Dep Mngr */
+ pv_dep_mngr_enc_loop_dblk = ps_ctxt->pv_dep_mngr_enc_loop_dblk;
+ /* Get the EncLoop Top-Right CU Dep Mngr */
+ pv_dep_mngr_enc_loop_cu_top_right = ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right;
+ /* Set Variables for Dep. Checking and Setting */
+ aux_check_dep_pos = vert_ctr;
+ aux_offset = 2; /* Should be there for 0th row also */
+ if(vert_ctr > 0)
+ {
+ dblk_check_dep_pos = vert_ctr - 1;
+ dblk_offset = 2;
+ }
+ else
+ {
+ /* First row should run without waiting */
+ dblk_check_dep_pos = 0;
+ dblk_offset = -(ps_tile_params->i4_first_sample_x + 1);
+ }
+
+ /* check if the current row processed in last CTb row */
+ last_ctb_row_flag = (vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1));
+
+ /* Valid Width (pixels) in the last CTB in every row (padding cases) */
+ last_hz_ctb_wd = ps_frm_ctb_prms->i4_cu_aligned_pic_wd - ((num_ctbs_horz_pic - 1) * ctb_size);
+
+ /* Valid Height (pixels) in the last CTB row (padding cases) */
+ last_vt_ctb_ht = ps_frm_ctb_prms->i4_cu_aligned_pic_ht -
+ ((ps_frm_ctb_prms->i4_num_ctbs_vert - 1) * ctb_size);
+ /* reset the states copied flag */
+ ps_ctxt->u1_cabac_states_next_row_copied_flag = 0;
+ ps_ctxt->u1_cabac_states_first_cu_copied_flag = 0;
+
+ /* populate the cu prms which are common for entire ctb row */
+ s_cu_prms.i4_luma_src_stride = ps_curr_src_bufs->i4_y_strd;
+ s_cu_prms.i4_chrm_src_stride = ps_curr_src_bufs->i4_uv_strd;
+ s_cu_prms.i4_luma_recon_stride = ps_curr_recon_bufs->i4_y_strd;
+ s_cu_prms.i4_chrm_recon_stride = ps_curr_recon_bufs->i4_uv_strd;
+ s_cu_prms.i4_ctb_size = ctb_size;
+
+ ps_ctxt->i4_is_first_cu_qg_coded = 0;
+
+ /* Initialize the number of PUs for the first CTB to 0 */
+ *pu2_num_pu_map = 0;
+
+ /*Getting the address of BS and Qp arrays and other info*/
+ memcpy(&s_deblk_ctb_row_params, &ps_ctxt->s_deblk_ctbrow_prms, sizeof(deblk_ctbrow_prms_t));
+ {
+ WORD32 num_ctbs_horz_tile;
+ /* Update the pointers which are accessed not by using ctb_ctr
+ to the tile start here! */
+ ps_ctb_in += ps_tile_params->i4_first_ctb_x;
+ ps_ctb_out += ps_tile_params->i4_first_ctb_x;
+
+ ps_row_cu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_cus_in_ctb);
+ ps_row_tu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_tus_in_ctb);
+ ps_row_pu += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_pus_in_ctb);
+ pu1_row_pu_map += (ps_tile_params->i4_first_ctb_x * ps_frm_ctb_prms->i4_num_pus_in_ctb);
+ pu1_row_ecd_data +=
+ (ps_tile_params->i4_first_ctb_x *
+ ((is_inp_422 == 1) ? (ps_frm_ctb_prms->i4_num_tus_in_ctb << 1)
+ : ((ps_frm_ctb_prms->i4_num_tus_in_ctb * 3) >> 1)) *
+ MAX_SCAN_COEFFS_BYTES_4x4);
+
+ /* Update the pointers to the tile start */
+ s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
+ (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one vertical edge per 8x8 block
+ s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
+ (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one horizontal edge per 8x8 block
+ s_deblk_ctb_row_params.pi1_ctb_row_qp += (ps_tile_params->i4_first_ctb_x * (ctb_size >> 2));
+
+ num_ctbs_horz_tile = ps_tile_params->i4_curr_tile_wd_in_ctb_unit;
+
+ ctb_start = ps_tile_params->i4_first_ctb_x;
+ ctb_end = ps_tile_params->i4_first_ctb_x + num_ctbs_horz_tile;
+ }
+ ps_ctb_out_dblk = ps_ctb_out;
+
+ ps_ctxt->i4_last_cu_qp_from_prev_ctb = ps_ctxt->i4_frame_qp;
+
+ /* --------- Loop over all the CTBs in a row --------------- */
+ for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
+ {
+ cu_final_update_prms s_cu_update_prms;
+
+ cur_ctb_cu_tree_t *ps_cu_tree_analyse;
+ me_ctb_data_t *ps_cu_me_data;
+ ipe_l0_ctb_analyse_for_me_t *ps_ctb_ipe_analyse;
+ cu_enc_loop_out_t *ps_cu_final;
+ pu_col_mv_t *ps_ctb_col_pu;
+
+ WORD32 cur_ctb_ht, cur_ctb_wd;
+ WORD32 last_cu_pos_in_ctb;
+ WORD32 last_cu_size;
+ WORD32 num_pus_in_ctb;
+ UWORD8 u1_is_ctb_noisy;
+ ps_ctb_col_pu = ps_row_col_pu + ctb_ctr * ps_frm_ctb_prms->i4_num_pus_in_ctb;
+
+ if(ctb_ctr)
+ {
+ ps_ctxt->i4_prev_QP = ps_ctxt->i4_last_cu_qp_from_prev_ctb;
+ }
+ /*If Sup pic rc is enabled*/
+ if(ps_ctxt->i4_sub_pic_level_rc)
+ {
+ ihevce_sub_pic_rc_scale_query((void *)ps_multi_thrd_ctxt, (void *)ps_ctxt);
+ }
+ /* check if the current row processed in last CTb row */
+ last_ctb_col_flag = (ctb_ctr == (num_ctbs_horz_pic - 1));
+ if(1 == last_ctb_col_flag)
+ {
+ cur_ctb_wd = last_hz_ctb_wd;
+ }
+ else
+ {
+ cur_ctb_wd = ctb_size;
+ }
+
+ /* If it's the last CTB, get the actual ht of CTB */
+ if(1 == last_ctb_row_flag)
+ {
+ cur_ctb_ht = last_vt_ctb_ht;
+ }
+ else
+ {
+ cur_ctb_ht = ctb_size;
+ }
+
+ ps_ctxt->u4_cur_ctb_ht = cur_ctb_ht;
+ ps_ctxt->u4_cur_ctb_wd = cur_ctb_wd;
+
+ /* Wait till reference frame recon is available */
+
+ /* ------------ Wait till current data is ready from ME -------------- */
+
+ /*only for ref instance and Non I pics */
+ if((ps_ctxt->i4_bitrate_instance_num == 0) &&
+ ((ISLICE != ps_ctxt->i1_slice_type) || L0ME_IN_OPENLOOP_MODE))
+ {
+ if(ctb_ctr < (num_ctbs_horz_pic))
+ {
+ ihevce_dmgr_chk_row_row_sync(
+ pv_dep_mngr_encloop_dep_me,
+ ctb_ctr,
+ 1,
+ vert_ctr,
+ ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
+ ps_ctxt->thrd_id);
+ }
+ }
+
+ /* store the cu pointer for current ctb out */
+ ps_ctb_out->ps_enc_cu = ps_row_cu;
+ ps_cu_final = ps_row_cu;
+
+ /* Get the base point of CU recursion tree */
+ if(ISLICE != ps_ctxt->i1_slice_type)
+ {
+ ps_cu_tree_analyse = ps_ctb_in->ps_cu_tree;
+ ASSERT(ps_ctb_in->ps_cu_tree == (ps_row_cu_tree + (ctb_ctr * MAX_NUM_NODES_CU_TREE)));
+ }
+ else
+ {
+ /* Initialize ptr to current CTB */
+ ps_cu_tree_analyse = ps_row_cu_tree + (ctb_ctr * MAX_NUM_NODES_CU_TREE);
+ }
+
+ /* Get the ME data pointer for 16x16 block data in ctb */
+ ps_cu_me_data = ps_ctb_in->ps_me_ctb_data;
+ u1_is_ctb_noisy = ps_ctb_in->s_ctb_noise_params.i4_noise_present;
+ s_cu_prms.u1_is_cu_noisy = u1_is_ctb_noisy;
+ s_cu_prms.pu1_is_8x8Blk_noisy = ps_ctb_in->s_ctb_noise_params.au1_is_8x8Blk_noisy;
+
+ /* store the ctb level prms in cu prms */
+ s_cu_prms.i4_ctb_pos = ctb_ctr;
+
+ s_cu_prms.pu1_luma_src = (UWORD8 *)ps_curr_src_bufs->pv_y_buf + ctb_ctr * ctb_size;
+ s_cu_prms.pu1_luma_recon = (UWORD8 *)ps_curr_recon_bufs->pv_y_buf + ctb_ctr * ctb_size;
+
+ {
+ s_cu_prms.pu1_chrm_src = (UWORD8 *)ps_curr_src_bufs->pv_u_buf + ctb_ctr * ctb_size;
+ s_cu_prms.pu1_chrm_recon = (UWORD8 *)ps_curr_recon_bufs->pv_u_buf + ctb_ctr * ctb_size;
+ }
+
+ s_cu_prms.pu1_sbpel_hxfy = (UWORD8 *)ppu1_y_subpel_planes[0] + ctb_ctr * ctb_size;
+
+ s_cu_prms.pu1_sbpel_fxhy = (UWORD8 *)ppu1_y_subpel_planes[1] + ctb_ctr * ctb_size;
+
+ s_cu_prms.pu1_sbpel_hxhy = (UWORD8 *)ppu1_y_subpel_planes[2] + ctb_ctr * ctb_size;
+
+ /* Initialize ptr to current CTB */
+ ps_ctb_ipe_analyse = ps_row_ipe_analyse + ctb_ctr; // * ctb_size;
+
+ /* reset the map idx for current ctb */
+ col_pu_map_idx = 0;
+ num_pus_in_ctb = 0;
+
+ /* reset the map buffer to 0*/
+
+ memset(
+ &ps_ctxt->au1_nbr_ctb_map[0][0],
+ 0,
+ (MAX_PU_IN_CTB_ROW + 1 + 8) * (MAX_PU_IN_CTB_ROW + 1 + 8));
+
+ /* set the CTB neighbour availability flags */
+ ihevce_set_ctb_nbr(
+ &ps_ctb_out->s_ctb_nbr_avail_flags,
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ ctb_ctr,
+ vert_ctr,
+ ps_frm_ctb_prms);
+
+ /* -------- update the cur CTB offsets for inter prediction-------- */
+ ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x = ctb_ctr * ctb_size;
+ ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y = vert_ctr * ctb_size;
+
+ /* -------- update the cur CTB offsets for MV prediction-------- */
+ ps_ctxt->s_mv_pred_ctxt.i4_ctb_x = ctb_ctr;
+ ps_ctxt->s_mv_pred_ctxt.i4_ctb_y = vert_ctr;
+
+ /* -------------- Boundary Strength Initialization ----------- */
+ if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
+ {
+ ihevce_bs_init_ctb(&ps_ctxt->s_deblk_bs_prms, ps_frm_ctb_prms, ctb_ctr, vert_ctr);
+ }
+
+ /* -------- update cur CTB offsets for entropy rdopt context------- */
+ ihevce_entropy_rdo_ctb_init(&ps_ctxt->s_rdopt_entropy_ctxt, ctb_ctr, vert_ctr);
+
+ /* --------- CU Recursion --------------- */
+
+ {
+#if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
+ WORD32 i4_max_tree_depth = 4;
+#endif
+ WORD32 i4_tree_depth = 0;
+ /* Init no. of CU in CTB to 0*/
+ ps_ctb_out->u1_num_cus_in_ctb = 0;
+
+#if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
+ if(ps_ctxt->i4_bitrate_instance_num == 0)
+ {
+ WORD32 i4_max_tree_depth = 4;
+ WORD32 i;
+ for(i = 0; i < i4_max_tree_depth; i++)
+ {
+ COPY_CABAC_STATES(
+ &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
+ &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
+ IHEVC_CAB_CTXT_END * sizeof(UWORD8));
+ }
+ }
+#else
+ if(ps_ctxt->i4_bitrate_instance_num == 0)
+ {
+ if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
+ {
+ WORD32 i4_max_tree_depth = 4;
+ WORD32 i;
+ for(i = 0; i < i4_max_tree_depth; i++)
+ {
+ COPY_CABAC_STATES(
+ &ps_ctxt->au1_rdopt_recur_ctxt_models[i][0],
+ &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
+ IHEVC_CAB_CTXT_END * sizeof(UWORD8));
+ }
+ }
+ }
+
+#endif
+ if(ps_ctxt->i4_bitrate_instance_num == 0)
+ {
+ /* FOR I- PIC populate the curr_ctb accordingly */
+ if(ISLICE == ps_ctxt->i1_slice_type)
+ {
+ ps_ctb_ipe_analyse->ps_cu_tree_root = ps_cu_tree_analyse;
+ ps_ctb_ipe_analyse->nodes_created_in_cu_tree = 1;
+
+ ihevce_populate_cu_tree(
+ ps_ctb_ipe_analyse,
+ ps_cu_tree_analyse,
+ 0,
+ (IHEVCE_QUALITY_CONFIG_T)ps_ctxt->i4_quality_preset,
+ POS_NA,
+ POS_NA,
+ POS_NA);
+ }
+ }
+ ps_ctb_ipe_analyse->nodes_created_in_cu_tree = 1;
+ ps_ctxt->ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
+ ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
+ if(ps_ctxt->i4_use_ctb_level_lamda)
+ {
+ ihevce_compute_cu_level_QP(
+ ps_ctxt, -1, ps_ctb_ipe_analyse->i4_64x64_act_factor[3][1], 0);
+ }
+
+ s_cu_update_prms.ppu1_row_ecd_data = &pu1_row_ecd_data;
+ s_cu_update_prms.pi4_last_cu_pos_in_ctb = &last_cu_pos_in_ctb;
+ s_cu_update_prms.pi4_last_cu_size = &last_cu_size;
+ s_cu_update_prms.pi4_num_pus_in_ctb = &num_pus_in_ctb;
+ s_cu_update_prms.pps_cu_final = &ps_cu_final;
+ s_cu_update_prms.pps_row_pu = &ps_row_pu;
+ s_cu_update_prms.pps_row_tu = &ps_row_tu;
+ s_cu_update_prms.pu1_num_cus_in_ctb_out = &ps_ctb_out->u1_num_cus_in_ctb;
+
+ // source satd computation
+ /* compute the source 8x8 SATD for the current CTB */
+ /* populate pui4_source_satd in some structure and pass it inside */
+ if(ps_ctxt->u1_enable_psyRDOPT)
+ {
+ /* declare local variables */
+ WORD32 i;
+ WORD32 ctb_size;
+ WORD32 num_comp_had_blocks;
+ UWORD8 *pu1_l0_block;
+ WORD32 block_ht;
+ WORD32 block_wd;
+ WORD32 ht_offset;
+ WORD32 wd_offset;
+
+ WORD32 num_horz_blocks;
+ WORD32 had_block_size;
+ WORD32 total_had_block_size;
+ WORD16 pi2_residue_had_zscan[64];
+ UWORD8 ai1_zeros_buffer[64];
+
+ WORD32 index_satd;
+ WORD32 is_hbd;
+ /* initialize the variables */
+ block_ht = cur_ctb_ht;
+ block_wd = cur_ctb_wd;
+
+ is_hbd = ps_ctxt->u1_is_input_data_hbd;
+
+ had_block_size = 8;
+ total_had_block_size = had_block_size * had_block_size;
+
+ for(i = 0; i < total_had_block_size; i++)
+ {
+ ai1_zeros_buffer[i] = 0;
+ }
+
+ ctb_size = block_ht * block_wd; //ctb_width * ctb_height;
+ num_comp_had_blocks = ctb_size / (had_block_size * had_block_size);
+
+ num_horz_blocks = block_wd / had_block_size; //ctb_width / had_block_size;
+ ht_offset = -had_block_size;
+ wd_offset = -had_block_size;
+
+ index_satd = 0;
+ /*Loop over all 8x8 blocsk in the CTB*/
+ for(i = 0; i < num_comp_had_blocks; i++)
+ {
+ if(i % num_horz_blocks == 0)
+ {
+ wd_offset = -had_block_size;
+ ht_offset += had_block_size;
+ }
+ wd_offset += had_block_size;
+
+ if(!is_hbd)
+ {
+ /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
+ pu1_l0_block = s_cu_prms.pu1_luma_src +
+ ps_curr_src_bufs->i4_y_strd * ht_offset + wd_offset;
+
+ ps_ctxt->ai4_source_satd_8x8[index_satd] =
+
+ ps_ctxt->s_cmn_opt_func.pf_AC_HAD_8x8_8bit(
+ pu1_l0_block,
+ ps_curr_src_bufs->i4_y_strd,
+ ai1_zeros_buffer,
+ had_block_size,
+ pi2_residue_had_zscan,
+ had_block_size);
+ }
+ index_satd++;
+ }
+ }
+
+ if(ps_ctxt->u1_enable_psyRDOPT)
+ {
+ /* declare local variables */
+ WORD32 i;
+ WORD32 ctb_size;
+ WORD32 num_comp_had_blocks;
+ UWORD8 *pu1_l0_block;
+ UWORD8 *pu1_l0_block_prev = NULL;
+ WORD32 block_ht;
+ WORD32 block_wd;
+ WORD32 ht_offset;
+ WORD32 wd_offset;
+
+ WORD32 num_horz_blocks;
+ WORD32 had_block_size;
+ WORD16 pi2_residue_had[64];
+ UWORD8 ai1_zeros_buffer[64];
+ WORD32 index_satd = 0;
+
+ WORD32 is_hbd;
+ is_hbd = ps_ctxt->u1_is_input_data_hbd; // 8 bit
+
+ /* initialize the variables */
+ /* change this based ont he bit depth */
+ // ps_ctxt->u1_chroma_array_type
+ if(ps_ctxt->u1_chroma_array_type == 1)
+ {
+ block_ht = cur_ctb_ht / 2;
+ block_wd = cur_ctb_wd / 2;
+ }
+ else
+ {
+ block_ht = cur_ctb_ht;
+ block_wd = cur_ctb_wd / 2;
+ }
+
+ had_block_size = 4;
+ memset(ai1_zeros_buffer, 0, 64 * sizeof(UWORD8));
+
+ ctb_size = block_ht * block_wd; //ctb_width * ctb_height;
+ num_comp_had_blocks = 2 * ctb_size / (had_block_size * had_block_size);
+
+ num_horz_blocks = 2 * block_wd / had_block_size; //ctb_width / had_block_size;
+ ht_offset = -had_block_size;
+ wd_offset = -had_block_size;
+
+ if(!is_hbd)
+ {
+ /* loop over for every 4x4 blocks in the CU for Cb */
+ for(i = 0; i < num_comp_had_blocks; i++)
+ {
+ if(i % num_horz_blocks == 0)
+ {
+ wd_offset = -had_block_size;
+ ht_offset += had_block_size;
+ }
+ wd_offset += had_block_size;
+
+ /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
+ if(i % 2 != 0)
+ {
+ if(!is_hbd)
+ {
+ pu1_l0_block = pu1_l0_block_prev + 1;
+ }
+ }
+ else
+ {
+ if(!is_hbd)
+ {
+ pu1_l0_block = s_cu_prms.pu1_chrm_src +
+ s_cu_prms.i4_chrm_src_stride * ht_offset + wd_offset;
+ pu1_l0_block_prev = pu1_l0_block;
+ }
+ }
+
+ if(had_block_size == 4)
+ {
+ if(!is_hbd)
+ {
+ ps_ctxt->ai4_source_chroma_satd[index_satd] =
+ ps_ctxt->s_cmn_opt_func.pf_chroma_AC_HAD_4x4_8bit(
+ pu1_l0_block,
+ s_cu_prms.i4_chrm_src_stride,
+ ai1_zeros_buffer,
+ had_block_size,
+ pi2_residue_had,
+ had_block_size);
+ }
+
+ index_satd++;
+
+ } // block size of 4x4
+
+ } // for all blocks
+
+ } // is hbd check
+ }
+
+ ihevce_cu_recurse_decide(
+ ps_ctxt,
+ &s_cu_prms,
+ ps_cu_tree_analyse,
+ ps_cu_tree_analyse,
+ ps_ctb_ipe_analyse,
+ ps_cu_me_data,
+ &ps_ctb_col_pu,
+ &s_cu_update_prms,
+ pu1_row_pu_map,
+ &col_pu_map_idx,
+ i4_tree_depth,
+ ctb_ctr << 6,
+ vert_ctr << 6,
+ cur_ctb_ht);
+
+ if(ps_ctxt->i1_slice_type != ISLICE)
+ {
+ ASSERT(
+ (cur_ctb_wd * cur_ctb_ht) <=
+ ihevce_compute_area_of_valid_cus_in_ctb(ps_cu_tree_analyse));
+ }
+ /*If Sup pic rc is enabled*/
+ if(1 == ps_ctxt->i4_sub_pic_level_rc)
+ {
+ /*In a row, after the required CTB is reached, send data and query scale from Bit Control thread */
+ ihevce_sub_pic_rc_in_data(
+ (void *)ps_multi_thrd_ctxt,
+ (void *)ps_ctxt,
+ (void *)ps_ctb_ipe_analyse,
+ (void *)ps_frm_ctb_prms);
+ }
+
+ ps_ctxt->ps_enc_out_ctxt->u1_cu_size = 128;
+
+ } /* End of CU recursion block */
+
+#if PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
+ {
+ ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
+ enc_loop_cu_prms_t *ps_cu_prms = &s_cu_prms;
+ ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
+
+ do
+ {
+ ihevce_update_final_cu_results(
+ ps_ctxt,
+ ps_enc_out_ctxt,
+ ps_cu_prms,
+ NULL, /* &ps_ctb_col_pu */
+ NULL, /* &col_pu_map_idx */
+ &s_cu_update_prms,
+ ctb_ctr,
+ vert_ctr);
+
+ ps_enc_out_ctxt++;
+
+ ASSERT(ps_ctb_in->u1_num_cus_in_ctb <= MAX_CTB_SIZE);
+
+ } while(ps_enc_out_ctxt->u1_cu_size != 128);
+ }
+#else
+ if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
+ {
+ ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt = &ps_ctxt->as_enc_cu_ctxt[0];
+ enc_loop_cu_prms_t *ps_cu_prms = &s_cu_prms;
+ ps_ctxt->pu1_ecd_data = pu1_row_ecd_data;
+
+ do
+ {
+ ihevce_update_final_cu_results(
+ ps_ctxt,
+ ps_enc_out_ctxt,
+ ps_cu_prms,
+ NULL, /* &ps_ctb_col_pu */
+ NULL, /* &col_pu_map_idx */
+ &s_cu_update_prms,
+ ctb_ctr,
+ vert_ctr);
+
+ ps_enc_out_ctxt++;
+
+ ASSERT(ps_ctb_in->u1_num_cus_in_ctb <= MAX_CTB_SIZE);
+
+ } while(ps_enc_out_ctxt->u1_cu_size != 128);
+ }
+#endif
+
+ /* --- ctb level copy of data to left buffers--*/
+ ((pf_enc_loop_ctb_left_copy)ps_ctxt->pv_enc_loop_ctb_left_copy)(ps_ctxt, &s_cu_prms);
+
+ if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
+ {
+ /* For the Unaligned CTB, make the invalid edge boundary strength 0 */
+ ihevce_bs_clear_invalid(
+ &ps_ctxt->s_deblk_bs_prms,
+ last_ctb_row_flag,
+ (ctb_ctr == (num_ctbs_horz_pic - 1)),
+ last_hz_ctb_wd,
+ last_vt_ctb_ht);
+
+ /* -----------------Read boundary strengts for current CTB------------- */
+
+ if((0 == ps_ctxt->i4_deblock_type) && (ps_ctxt->i4_deblk_pad_hpel_cur_pic))
+ {
+ /*Storing boundary strengths of current CTB*/
+ UWORD32 *pu4_bs_horz = &ps_ctxt->s_deblk_bs_prms.au4_horz_bs[0];
+ UWORD32 *pu4_bs_vert = &ps_ctxt->s_deblk_bs_prms.au4_vert_bs[0];
+
+ memcpy(s_deblk_ctb_row_params.pu4_ctb_row_bs_vert, pu4_bs_vert, (ctb_size * 4) / 8);
+ memcpy(s_deblk_ctb_row_params.pu4_ctb_row_bs_horz, pu4_bs_horz, (ctb_size * 4) / 8);
+ }
+ //Increment for storing next CTB info
+ s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
+ (ctb_size >> 3); //one vertical edge per 8x8 block
+ s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
+ (ctb_size >> 3); //one horizontal edge per 8x8 block
+ }
+
+ /* -------------- ctb level updates ----------------- */
+ ps_row_cu += ps_ctb_out->u1_num_cus_in_ctb;
+
+ pu1_row_pu_map += (ctb_size >> 2) * (ctb_size >> 2);
+
+ /* first ctb offset will be populated by the caller */
+ if(0 != ctb_ctr)
+ {
+ pu4_pu_offsets[ctb_ctr] = pu4_pu_offsets[ctb_ctr - 1] + num_pus_in_ctb;
+ }
+ pu2_num_pu_map[ctb_ctr] = num_pus_in_ctb;
+ ASSERT(ps_ctb_out->u1_num_cus_in_ctb != 0);
+
+ ps_ctb_in++;
+ ps_ctb_out++;
+ }
+
+ /* ---------- Encloop end of row updates ----------------- */
+
+ /* at the end of row processing cu pixel counter is set to */
+ /* (num ctb * ctbzise) + ctb size */
+ /* this is to set the dependency for right most cu of last */
+ /* ctb's top right data dependency */
+ /* this even takes care of entropy dependency for */
+ /* incomplete ctb as well */
+ ihevce_dmgr_set_row_row_sync(
+ pv_dep_mngr_enc_loop_cu_top_right,
+ (ctb_ctr * ctb_size + ctb_size),
+ vert_ctr,
+ ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
+
+ ps_ctxt->s_sao_ctxt_t.ps_cmn_utils_optimised_function_list = &ps_ctxt->s_cmn_opt_func;
+
+ /* Restore structure.
+ Getting the address of stored-BS and Qp-map and other info */
+ memcpy(&s_deblk_ctb_row_params, &ps_ctxt->s_deblk_ctbrow_prms, sizeof(deblk_ctbrow_prms_t));
+ {
+ /* Update the pointers to the tile start */
+ s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
+ (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one vertical edge per 8x8 block
+ s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
+ (ps_tile_params->i4_first_ctb_x * (ctb_size >> 3)); //one horizontal edge per 8x8 block
+ s_deblk_ctb_row_params.pi1_ctb_row_qp += (ps_tile_params->i4_first_ctb_x * (ctb_size >> 2));
+ }
+
+#if PROFILE_ENC_REG_DATA
+ s_profile.u8_enc_reg_data[vert_ctr] = 0;
+#endif
+
+ /* -- Loop over all the CTBs in a row for Deblocking and Subpel gen --- */
+ if(!ps_ctxt->u1_is_input_data_hbd)
+ {
+ WORD32 last_col_pic, last_col_tile;
+
+ for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
+ {
+ /* store the ctb level prms in cu prms */
+ s_cu_prms.i4_ctb_pos = ctb_ctr;
+ s_cu_prms.pu1_luma_src = (UWORD8 *)ps_curr_src_bufs->pv_y_buf + ctb_ctr * ctb_size;
+ s_cu_prms.pu1_chrm_src = (UWORD8 *)ps_curr_src_bufs->pv_u_buf + ctb_ctr * ctb_size;
+
+ s_cu_prms.pu1_luma_recon = (UWORD8 *)ps_curr_recon_bufs->pv_y_buf + ctb_ctr * ctb_size;
+ s_cu_prms.pu1_chrm_recon = (UWORD8 *)ps_curr_recon_bufs->pv_u_buf + ctb_ctr * ctb_size;
+ s_cu_prms.pu1_sbpel_hxfy = (UWORD8 *)ppu1_y_subpel_planes[0] + ctb_ctr * ctb_size;
+
+ s_cu_prms.pu1_sbpel_fxhy = (UWORD8 *)ppu1_y_subpel_planes[1] + ctb_ctr * ctb_size;
+
+ s_cu_prms.pu1_sbpel_hxhy = (UWORD8 *)ppu1_y_subpel_planes[2] + ctb_ctr * ctb_size;
+
+ /* If last ctb in the horizontal row */
+ if(ctb_ctr == (num_ctbs_horz_pic - 1))
+ {
+ last_col_pic = 1;
+ }
+ else
+ {
+ last_col_pic = 0;
+ }
+
+ /* If last ctb in the tile row */
+ if(ctb_ctr == (ctb_end - 1))
+ {
+ last_col_tile = 1;
+ }
+ else
+ {
+ last_col_tile = 0;
+ }
+
+ if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
+ {
+ /* Wait till top neighbour CTB has done it's deblocking*/
+ if(ctb_ctr < (ctb_end)-1)
+ {
+ ihevce_dmgr_chk_row_row_sync(
+ pv_dep_mngr_enc_loop_dblk,
+ ctb_ctr,
+ dblk_offset,
+ dblk_check_dep_pos,
+ ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
+ ps_ctxt->thrd_id);
+ }
+
+ if((0 == ps_ctxt->i4_deblock_type))
+ {
+ /* Populate Qp-map */
+ if(ctb_start == ctb_ctr)
+ {
+ ihevce_deblk_populate_qp_map(
+ ps_ctxt,
+ &s_deblk_ctb_row_params,
+ ps_ctb_out_dblk,
+ vert_ctr,
+ ps_frm_ctb_prms,
+ ps_tile_params);
+ }
+ ps_ctxt->s_deblk_prms.i4_ctb_size = ctb_size;
+
+ /* recon pointers and stride */
+ ps_ctxt->s_deblk_prms.pu1_ctb_y = s_cu_prms.pu1_luma_recon;
+ ps_ctxt->s_deblk_prms.pu1_ctb_uv = s_cu_prms.pu1_chrm_recon;
+ ps_ctxt->s_deblk_prms.i4_luma_pic_stride = s_cu_prms.i4_luma_recon_stride;
+ ps_ctxt->s_deblk_prms.i4_chroma_pic_stride = s_cu_prms.i4_chrm_recon_stride;
+
+ ps_ctxt->s_deblk_prms.i4_deblock_top_ctb_edge = (0 == vert_ctr) ? 0 : 1;
+ {
+ ps_ctxt->s_deblk_prms.i4_deblock_top_ctb_edge =
+ (ps_tile_params->i4_first_ctb_y == vert_ctr) ? 0 : 1;
+ }
+ ps_ctxt->s_deblk_prms.i4_deblock_left_ctb_edge = (ctb_start == ctb_ctr) ? 0 : 1;
+ //or according to slice boundary. Support yet to be added !!!!
+
+ ihevce_deblk_ctb(
+ &ps_ctxt->s_deblk_prms, last_col_tile, &s_deblk_ctb_row_params);
+
+ //Increment for storing next CTB info
+ s_deblk_ctb_row_params.pu4_ctb_row_bs_vert +=
+ (ctb_size >> 3); //one vertical edge per 8x8 block
+ s_deblk_ctb_row_params.pu4_ctb_row_bs_horz +=
+ (ctb_size >> 3); //one horizontal edge per 8x8 block
+ s_deblk_ctb_row_params.pi1_ctb_row_qp +=
+ (ctb_size >> 2); //one qp per 4x4 block.
+
+ } //end of if((0 == ps_ctxt->i4_deblock_type)
+ } // end of if(ps_ctxt->i4_deblk_pad_hpel_cur_pic)
+
+ /* Apply SAO over the previous CTB-row */
+ if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
+ ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
+ {
+ sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
+
+ if((vert_ctr > ps_tile_params->i4_first_ctb_y) &&
+ (ctb_ctr > ctb_start)) //if((vert_ctr > 0) && (ctb_ctr > 0))
+ {
+ /* Call the sao function to do sao for the current ctb*/
+
+ /* Register the curr ctb's x pos in sao context*/
+ ps_sao_ctxt->i4_ctb_x = ctb_ctr - 1;
+
+ /* Register the curr ctb's y pos in sao context*/
+ ps_sao_ctxt->i4_ctb_y = vert_ctr - 1;
+
+ ps_ctb_out_sao = ps_sao_ctxt->ps_ctb_out +
+ (vert_ctr - 1) * ps_frm_ctb_prms->i4_num_ctbs_horz +
+ (ctb_ctr - 1);
+ ps_sao_ctxt->ps_sao = &ps_ctb_out_sao->s_sao;
+ ps_sao_ctxt->i4_sao_blk_wd = ctb_size;
+ ps_sao_ctxt->i4_sao_blk_ht = ctb_size;
+
+ ps_sao_ctxt->i4_is_last_ctb_row = 0;
+ ps_sao_ctxt->i4_is_last_ctb_col = 0;
+
+ /* Calculate the recon buf pointer and stride for teh current ctb */
+ ps_sao_ctxt->pu1_cur_luma_recon_buf =
+ ps_sao_ctxt->pu1_frm_luma_recon_buf +
+ (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
+ (ps_sao_ctxt->i4_ctb_x * ctb_size);
+
+ ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
+
+ ps_sao_ctxt->pu1_cur_chroma_recon_buf =
+ ps_sao_ctxt->pu1_frm_chroma_recon_buf +
+ (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
+ (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
+ (ps_sao_ctxt->i4_ctb_x * ctb_size);
+
+ ps_sao_ctxt->i4_cur_chroma_recon_stride =
+ ps_sao_ctxt->i4_frm_chroma_recon_stride;
+
+ ps_sao_ctxt->pu1_cur_luma_src_buf =
+ ps_sao_ctxt->pu1_frm_luma_src_buf +
+ (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
+ (ps_sao_ctxt->i4_ctb_x * ctb_size);
+
+ ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
+
+ ps_sao_ctxt->pu1_cur_chroma_src_buf =
+ ps_sao_ctxt->pu1_frm_chroma_src_buf +
+ (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
+ (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
+ (ps_sao_ctxt->i4_ctb_x * ctb_size);
+
+ ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
+
+ /* Calculate the pointer to buff to store the (x,y)th sao
+ * for the top merge of (x,y+1)th ctb
+ */
+ ps_sao_ctxt->ps_top_ctb_sao =
+ &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
+ [ps_sao_ctxt->i4_ctb_x +
+ (ps_sao_ctxt->i4_ctb_y) *
+ ps_frm_ctb_prms->i4_num_ctbs_horz +
+ (ps_ctxt->i4_bitrate_instance_num *
+ ps_sao_ctxt->i4_num_ctb_units)];
+
+ /* Calculate the pointer to buff to store the top pixels of curr ctb*/
+ ps_sao_ctxt->pu1_curr_sao_src_top_luma =
+ ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
+ (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
+ ps_sao_ctxt->i4_ctb_x * ctb_size +
+ ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
+ ps_sao_ctxt->i4_top_chroma_buf_size);
+
+ /* Calculate the pointer to buff to store the top pixels of curr ctb*/
+ ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
+ ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
+ (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
+ ps_sao_ctxt->i4_ctb_x * ctb_size +
+ ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
+ ps_sao_ctxt->i4_top_chroma_buf_size);
+
+ {
+ UWORD32 u4_ctb_sao_bits;
+
+ ihevce_sao_analyse(
+ &ps_ctxt->s_sao_ctxt_t,
+ ps_ctb_out_sao,
+ &u4_ctb_sao_bits,
+ ps_tile_params);
+ ps_ctxt
+ ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
+ [ps_ctxt->i4_bitrate_instance_num]
+ ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
+ ps_ctxt
+ ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
+ [ps_ctxt->i4_bitrate_instance_num]
+ ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
+ }
+ if(ps_ctxt->i4_deblk_pad_hpel_cur_pic &
+ 0x1) /** Subpel generation not done for non-ref picture **/
+ {
+ /* Padding and Subpel Plane Generation */
+ ihevce_pad_interp_recon_ctb(
+ ps_pad_interp_recon,
+ ctb_ctr - 1,
+ vert_ctr - 1,
+ ps_ctxt->i4_quality_preset,
+ ps_frm_ctb_prms,
+ ps_ctxt->ai2_scratch,
+ ps_ctxt->i4_bitrate_instance_num,
+ ps_ctxt->ps_func_selector);
+ }
+ }
+
+ /* Call the sao function again for the last ctb of the previous row*/
+ if(((ctb_ctr + 1) == (ctb_end)) &&
+ (vert_ctr >
+ ps_tile_params
+ ->i4_first_ctb_y)) //( ((ctb_ctr+1) == ps_frm_ctb_prms->i4_num_ctbs_horz) && (vert_ctr > 0) )
+ {
+ /* Register the curr ctb's x pos in sao context*/
+ ps_ctxt->s_sao_ctxt_t.i4_ctb_x = ctb_ctr;
+
+ /* Register the curr ctb's y pos in sao context*/
+ ps_ctxt->s_sao_ctxt_t.i4_ctb_y = vert_ctr - 1;
+
+ ps_ctb_out_sao = ps_ctxt->s_sao_ctxt_t.ps_ctb_out +
+ (vert_ctr - 1) * ps_frm_ctb_prms->i4_num_ctbs_horz + (ctb_ctr);
+
+ ps_ctxt->s_sao_ctxt_t.ps_sao = &ps_ctb_out_sao->s_sao;
+
+ ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd =
+ ctb_size - ((ps_tile_params->i4_curr_tile_wd_in_ctb_unit * ctb_size) -
+ ps_tile_params->i4_curr_tile_width);
+
+ ps_ctxt->s_sao_ctxt_t.i4_sao_blk_ht = ps_ctxt->s_sao_ctxt_t.i4_ctb_size;
+
+ ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_row = 0;
+ ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 1;
+
+ /* Calculate the recon buf pointer and stride for teh current ctb */
+ ps_sao_ctxt->pu1_cur_luma_recon_buf =
+ ps_sao_ctxt->pu1_frm_luma_recon_buf +
+ (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
+ (ps_sao_ctxt->i4_ctb_x * ctb_size);
+
+ ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
+
+ ps_sao_ctxt->pu1_cur_chroma_recon_buf =
+ ps_sao_ctxt->pu1_frm_chroma_recon_buf +
+ (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
+ (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
+ (ps_sao_ctxt->i4_ctb_x * ctb_size);
+
+ ps_sao_ctxt->i4_cur_chroma_recon_stride =
+ ps_sao_ctxt->i4_frm_chroma_recon_stride;
+
+ ps_sao_ctxt->pu1_cur_luma_src_buf =
+ ps_sao_ctxt->pu1_frm_luma_src_buf +
+ (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
+ (ps_sao_ctxt->i4_ctb_x * ctb_size);
+
+ ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
+
+ ps_sao_ctxt->pu1_cur_chroma_src_buf =
+ ps_sao_ctxt->pu1_frm_chroma_src_buf +
+ (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
+ (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
+ (ps_sao_ctxt->i4_ctb_x * ctb_size);
+
+ ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
+
+ /* Calculate the pointer to buff to store the (x,y)th sao
+ * for the top merge of (x,y+1)th ctb
+ */
+ ps_sao_ctxt->ps_top_ctb_sao =
+ &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
+ [ps_sao_ctxt->i4_ctb_x +
+ (ps_sao_ctxt->i4_ctb_y) *
+ ps_frm_ctb_prms->i4_num_ctbs_horz +
+ (ps_ctxt->i4_bitrate_instance_num *
+ ps_sao_ctxt->i4_num_ctb_units)];
+
+ /* Calculate the pointer to buff to store the top pixels of curr ctb*/
+ ps_sao_ctxt->pu1_curr_sao_src_top_luma =
+ ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
+ (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
+ ps_sao_ctxt->i4_ctb_x * ctb_size +
+ ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
+ ps_sao_ctxt->i4_top_chroma_buf_size);
+
+ /* Calculate the pointer to buff to store the top pixels of curr ctb*/
+ ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
+ ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
+ (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
+ ps_sao_ctxt->i4_ctb_x * ctb_size +
+ ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
+ ps_sao_ctxt->i4_top_chroma_buf_size);
+
+ {
+ UWORD32 u4_ctb_sao_bits;
+
+ ihevce_sao_analyse(
+ &ps_ctxt->s_sao_ctxt_t,
+ ps_ctb_out_sao,
+ &u4_ctb_sao_bits,
+ ps_tile_params);
+ ps_ctxt
+ ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
+ [ps_ctxt->i4_bitrate_instance_num]
+ ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
+ ps_ctxt
+ ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
+ [ps_ctxt->i4_bitrate_instance_num]
+ ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
+ }
+ if(ps_ctxt->i4_deblk_pad_hpel_cur_pic &
+ 0x1) /** Subpel generation not done for non-ref picture **/
+ {
+ /* Padding and Subpel Plane Generation */
+ ihevce_pad_interp_recon_ctb(
+ ps_pad_interp_recon,
+ ctb_ctr,
+ vert_ctr - 1,
+ ps_ctxt->i4_quality_preset,
+ ps_frm_ctb_prms,
+ ps_ctxt->ai2_scratch,
+ ps_ctxt->i4_bitrate_instance_num,
+ ps_ctxt->ps_func_selector);
+ }
+ }
+ }
+ else //SAO Disabled
+ {
+ if(1 == ps_ctxt->i4_deblk_pad_hpel_cur_pic)
+ {
+ /* Padding and Subpel Plane Generation */
+ ihevce_pad_interp_recon_ctb(
+ ps_pad_interp_recon,
+ ctb_ctr,
+ vert_ctr,
+ ps_ctxt->i4_quality_preset,
+ ps_frm_ctb_prms,
+ ps_ctxt->ai2_scratch,
+ ps_ctxt->i4_bitrate_instance_num,
+ ps_ctxt->ps_func_selector);
+ }
+ }
+
+ /* update the number of ctbs deblocked for this row */
+ ihevce_dmgr_set_row_row_sync(
+ pv_dep_mngr_enc_loop_dblk,
+ (ctb_ctr + 1),
+ vert_ctr,
+ ps_ctxt->i4_tile_col_idx /* Col Tile No. */);
+ } //end of loop over CTBs in current CTB-row
+ {
+ if(!ps_ctxt->i4_bitrate_instance_num)
+ {
+ if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
+ ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
+ {
+ /* If SAO is on, then signal completion of previous CTB row */
+ if(0 != vert_ctr)
+ {
+ {
+ WORD32 post_ctb_ctr;
+
+ for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
+ {
+ ihevce_dmgr_map_set_sync(
+ pv_dep_mngr_me_dep_encloop,
+ post_ctb_ctr,
+ (vert_ctr - 1),
+ MAP_CTB_COMPLETE);
+ }
+ }
+ }
+ }
+ else
+ {
+ {
+ WORD32 post_ctb_ctr;
+
+ for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
+ {
+ ihevce_dmgr_map_set_sync(
+ pv_dep_mngr_me_dep_encloop,
+ post_ctb_ctr,
+ vert_ctr,
+ MAP_CTB_COMPLETE);
+ }
+ }
+ }
+ }
+ }
+
+ /* Call the sao function again for the last ctb row of frame */
+ if(ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
+ ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
+ {
+ sao_ctxt_t *ps_sao_ctxt = &ps_ctxt->s_sao_ctxt_t;
+
+ for(ctb_ctr = ctb_start; ctb_ctr < ctb_end; ctb_ctr++)
+ {
+ if((vert_ctr == (ps_tile_params->i4_first_ctb_y +
+ ps_tile_params->i4_curr_tile_ht_in_ctb_unit - 1)) &&
+ (ctb_ctr >
+ ctb_start)) //((vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1)) && (ctb_ctr > 0))
+ {
+ /* Register the curr ctb's x pos in sao context*/
+ ps_ctxt->s_sao_ctxt_t.i4_ctb_x = ctb_ctr - 1;
+
+ /* Register the curr ctb's y pos in sao context*/
+ ps_ctxt->s_sao_ctxt_t.i4_ctb_y = vert_ctr;
+
+ ps_ctb_out_sao = ps_ctxt->s_sao_ctxt_t.ps_ctb_out +
+ (vert_ctr)*ps_frm_ctb_prms->i4_num_ctbs_horz + (ctb_ctr - 1);
+
+ ps_ctxt->s_sao_ctxt_t.ps_sao = &ps_ctb_out_sao->s_sao;
+
+ ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd = ps_ctxt->s_sao_ctxt_t.i4_ctb_size;
+ ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 0;
+
+ ps_ctxt->s_sao_ctxt_t.i4_sao_blk_ht =
+ ctb_size - ((ps_tile_params->i4_curr_tile_ht_in_ctb_unit * ctb_size) -
+ ps_tile_params->i4_curr_tile_height);
+
+ ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_row = 1;
+
+ /* Calculate the recon buf pointer and stride for teh current ctb */
+ ps_sao_ctxt->pu1_cur_luma_recon_buf =
+ ps_sao_ctxt->pu1_frm_luma_recon_buf +
+ (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
+ (ps_sao_ctxt->i4_ctb_x * ctb_size);
+
+ ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
+
+ ps_sao_ctxt->pu1_cur_chroma_recon_buf =
+ ps_sao_ctxt->pu1_frm_chroma_recon_buf +
+ (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
+ (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
+ (ps_sao_ctxt->i4_ctb_x * ctb_size);
+
+ ps_sao_ctxt->i4_cur_chroma_recon_stride =
+ ps_sao_ctxt->i4_frm_chroma_recon_stride;
+
+ ps_sao_ctxt->pu1_cur_luma_src_buf =
+ ps_sao_ctxt->pu1_frm_luma_src_buf +
+ (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
+ (ps_sao_ctxt->i4_ctb_x * ctb_size);
+
+ ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
+
+ ps_sao_ctxt->pu1_cur_chroma_src_buf =
+ ps_sao_ctxt->pu1_frm_chroma_src_buf +
+ (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
+ (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
+ (ps_sao_ctxt->i4_ctb_x * ctb_size);
+
+ ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
+
+ /* Calculate the pointer to buff to store the (x,y)th sao
+ * for the top merge of (x,y+1)th ctb
+ */
+ ps_sao_ctxt->ps_top_ctb_sao =
+ &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
+ [ps_sao_ctxt->i4_ctb_x +
+ (ps_sao_ctxt->i4_ctb_y) *
+ ps_frm_ctb_prms->i4_num_ctbs_horz +
+ (ps_ctxt->i4_bitrate_instance_num *
+ ps_sao_ctxt->i4_num_ctb_units)];
+
+ /* Calculate the pointer to buff to store the top pixels of curr ctb*/
+ ps_sao_ctxt->pu1_curr_sao_src_top_luma =
+ ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
+ (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
+ ps_sao_ctxt->i4_ctb_x * ctb_size +
+ ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
+ ps_sao_ctxt->i4_top_chroma_buf_size);
+
+ /* Calculate the pointer to buff to store the top pixels of curr ctb*/
+ ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
+ ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
+ (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
+ ps_sao_ctxt->i4_ctb_x * ctb_size +
+ ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
+ ps_sao_ctxt->i4_top_chroma_buf_size);
+
+ {
+ UWORD32 u4_ctb_sao_bits;
+ ihevce_sao_analyse(
+ &ps_ctxt->s_sao_ctxt_t,
+ ps_ctb_out_sao,
+ &u4_ctb_sao_bits,
+ ps_tile_params);
+ ps_ctxt
+ ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
+ [ps_ctxt->i4_bitrate_instance_num]
+ ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
+ ps_ctxt
+ ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
+ [ps_ctxt->i4_bitrate_instance_num]
+ ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
+ }
+ if(ps_ctxt->i4_deblk_pad_hpel_cur_pic &
+ 0x1) /** Subpel generation not done for non-ref picture **/
+ {
+ /* Padding and Subpel Plane Generation */
+ ihevce_pad_interp_recon_ctb(
+ ps_pad_interp_recon,
+ ctb_ctr - 1,
+ vert_ctr,
+ ps_ctxt->i4_quality_preset,
+ ps_frm_ctb_prms,
+ ps_ctxt->ai2_scratch,
+ ps_ctxt->i4_bitrate_instance_num,
+ ps_ctxt->ps_func_selector);
+ }
+ }
+ /* Call the sao function again for the last ctb of the last ctb row of frame */
+ if((vert_ctr == (ps_tile_params->i4_first_ctb_y +
+ ps_tile_params->i4_curr_tile_ht_in_ctb_unit - 1)) &&
+ ((ctb_ctr + 1) ==
+ (ctb_end))) //( ((ctb_ctr+1) == ps_frm_ctb_prms->i4_num_ctbs_horz))
+ {
+ /* Register the curr ctb's x pos in sao context*/
+ ps_ctxt->s_sao_ctxt_t.i4_ctb_x = ctb_ctr;
+
+ /* Register the curr ctb's y pos in sao context*/
+ ps_ctxt->s_sao_ctxt_t.i4_ctb_y = vert_ctr;
+
+ ps_ctb_out_sao = ps_ctxt->s_sao_ctxt_t.ps_ctb_out +
+ (vert_ctr)*ps_frm_ctb_prms->i4_num_ctbs_horz + (ctb_ctr);
+
+ ps_ctxt->s_sao_ctxt_t.ps_sao = &ps_ctb_out_sao->s_sao;
+
+ ps_ctxt->s_sao_ctxt_t.i4_sao_blk_wd =
+ ctb_size - ((ps_tile_params->i4_curr_tile_wd_in_ctb_unit * ctb_size) -
+ ps_tile_params->i4_curr_tile_width);
+
+ ps_ctxt->s_sao_ctxt_t.i4_sao_blk_ht =
+ ctb_size - ((ps_tile_params->i4_curr_tile_ht_in_ctb_unit * ctb_size) -
+ ps_tile_params->i4_curr_tile_height);
+
+ ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_row = 1;
+ ps_ctxt->s_sao_ctxt_t.i4_is_last_ctb_col = 1;
+
+ /* Calculate the recon buf pointer and stride for teh current ctb */
+ ps_sao_ctxt->pu1_cur_luma_recon_buf =
+ ps_sao_ctxt->pu1_frm_luma_recon_buf +
+ (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
+ (ps_sao_ctxt->i4_ctb_x * ctb_size);
+
+ ps_sao_ctxt->i4_cur_luma_recon_stride = ps_sao_ctxt->i4_frm_luma_recon_stride;
+
+ ps_sao_ctxt->pu1_cur_chroma_recon_buf =
+ ps_sao_ctxt->pu1_frm_chroma_recon_buf +
+ (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
+ (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
+ (ps_sao_ctxt->i4_ctb_x * ctb_size);
+
+ ps_sao_ctxt->i4_cur_chroma_recon_stride =
+ ps_sao_ctxt->i4_frm_chroma_recon_stride;
+
+ ps_sao_ctxt->pu1_cur_luma_src_buf =
+ ps_sao_ctxt->pu1_frm_luma_src_buf +
+ (ps_sao_ctxt->i4_frm_luma_src_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
+ (ps_sao_ctxt->i4_ctb_x * ctb_size);
+
+ ps_sao_ctxt->i4_cur_luma_src_stride = ps_sao_ctxt->i4_frm_luma_src_stride;
+
+ ps_sao_ctxt->pu1_cur_chroma_src_buf =
+ ps_sao_ctxt->pu1_frm_chroma_src_buf +
+ (ps_sao_ctxt->i4_frm_chroma_src_stride * ps_sao_ctxt->i4_ctb_y *
+ (ctb_size >> (ps_ctxt->u1_chroma_array_type == 1))) +
+ (ps_sao_ctxt->i4_ctb_x * ctb_size);
+
+ ps_sao_ctxt->i4_cur_chroma_src_stride = ps_sao_ctxt->i4_frm_chroma_src_stride;
+
+ /* Calculate the pointer to buff to store the (x,y)th sao
+ * for the top merge of (x,y+1)th ctb
+ */
+ ps_sao_ctxt->ps_top_ctb_sao =
+ &ps_sao_ctxt->aps_frm_top_ctb_sao[ps_ctxt->i4_enc_frm_id]
+ [ps_sao_ctxt->i4_ctb_x +
+ ps_sao_ctxt->i4_ctb_y *
+ ps_frm_ctb_prms->i4_num_ctbs_horz +
+ (ps_ctxt->i4_bitrate_instance_num *
+ ps_sao_ctxt->i4_num_ctb_units)];
+
+ /* Calculate the pointer to buff to store the top pixels of curr ctb*/
+ ps_sao_ctxt->pu1_curr_sao_src_top_luma =
+ ps_sao_ctxt->apu1_sao_src_frm_top_luma[ps_ctxt->i4_enc_frm_id] +
+ (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_luma_buf_stride +
+ ps_sao_ctxt->i4_ctb_x * ctb_size +
+ ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
+ ps_sao_ctxt->i4_top_chroma_buf_size);
+
+ /* Calculate the pointer to buff to store the top pixels of curr ctb*/
+ ps_sao_ctxt->pu1_curr_sao_src_top_chroma =
+ ps_sao_ctxt->apu1_sao_src_frm_top_chroma[ps_ctxt->i4_enc_frm_id] +
+ (ps_sao_ctxt->i4_ctb_y - 1) * ps_sao_ctxt->i4_frm_top_chroma_buf_stride +
+ ps_sao_ctxt->i4_ctb_x * ctb_size +
+ ps_ctxt->i4_bitrate_instance_num * (ps_sao_ctxt->i4_top_luma_buf_size +
+ ps_sao_ctxt->i4_top_chroma_buf_size);
+
+ {
+ UWORD32 u4_ctb_sao_bits;
+
+ ihevce_sao_analyse(
+ &ps_ctxt->s_sao_ctxt_t,
+ ps_ctb_out_sao,
+ &u4_ctb_sao_bits,
+ ps_tile_params);
+ ps_ctxt
+ ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
+ [ps_ctxt->i4_bitrate_instance_num]
+ ->u4_frame_rdopt_header_bits += u4_ctb_sao_bits;
+ ps_ctxt
+ ->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id]
+ [ps_ctxt->i4_bitrate_instance_num]
+ ->u4_frame_rdopt_bits += u4_ctb_sao_bits;
+ }
+ if(ps_ctxt->i4_deblk_pad_hpel_cur_pic &
+ 0x1) /** Subpel generation not done for non-ref picture **/
+ {
+ /* Padding and Subpel Plane Generation */
+ ihevce_pad_interp_recon_ctb(
+ ps_pad_interp_recon,
+ ctb_ctr,
+ vert_ctr,
+ ps_ctxt->i4_quality_preset,
+ ps_frm_ctb_prms,
+ ps_ctxt->ai2_scratch,
+ ps_ctxt->i4_bitrate_instance_num,
+ ps_ctxt->ps_func_selector);
+ }
+ }
+ } //end of loop over CTBs in current CTB-row
+
+ /* If SAO is on, then signal completion of the last CTB row of frame */
+ {
+ if(vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1))
+ {
+ if(!ps_ctxt->i4_bitrate_instance_num)
+ {
+ {
+ WORD32 post_ctb_ctr;
+
+ for(post_ctb_ctr = ctb_start; post_ctb_ctr < ctb_end; post_ctb_ctr++)
+ {
+ ihevce_dmgr_map_set_sync(
+ pv_dep_mngr_me_dep_encloop,
+ post_ctb_ctr,
+ vert_ctr,
+ MAP_CTB_COMPLETE);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_enc_loop_pass \endif
+*
+* \brief
+* Frame level enc_loop pass function
+*
+* \param[in] pv_ctxt : pointer to enc_loop module
+* \param[in] ps_frm_lamda : Frame level Lambda params
+* \param[in] ps_inp : pointer to input yuv buffer (frame buffer)
+* \param[in] ps_ctb_in : pointer CTB structure (output of ME/IPE) (frame buffer)
+* \param[out] ps_frm_recon : pointer recon picture structure pointer (frame buffer)
+* \param[out] ps_ctb_out : pointer CTB output structure (frame buffer)
+* \param[out] ps_cu_out : pointer CU output structure (frame buffer)
+* \param[out] ps_tu_out : pointer TU output structure (frame buffer)
+* \param[out] pi2_frm_coeffs : pointer coeff output frame buffer)
+*
+* \return
+* None
+*
+* Note : Currently the frame level calcualtions done assumes that
+* framewidth of the input /recon are excat multiple of ctbsize
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_enc_loop_process(
+ void *pv_ctxt,
+ ihevce_lap_enc_buf_t *ps_curr_inp,
+ ctb_analyse_t *ps_ctb_in,
+ ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse,
+ recon_pic_buf_t *ps_frm_recon,
+ cur_ctb_cu_tree_t *ps_cu_tree_out,
+ ctb_enc_loop_out_t *ps_ctb_out,
+ cu_enc_loop_out_t *ps_cu_out,
+ tu_enc_loop_out_t *ps_tu_out,
+ pu_t *ps_pu_out,
+ UWORD8 *pu1_frm_ecd_data,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ frm_lambda_ctxt_t *ps_frm_lamda,
+ multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
+ WORD32 thrd_id,
+ WORD32 i4_enc_frm_id,
+ WORD32 i4_pass)
+{
+ WORD32 vert_ctr;
+ WORD32 tile_col_idx;
+ iv_enc_yuv_buf_t s_curr_src_bufs;
+ iv_enc_yuv_buf_t s_curr_recon_bufs;
+ iv_enc_yuv_buf_src_t s_curr_recon_bufs_src;
+ UWORD32 *pu4_pu_offsets;
+ WORD32 end_of_frame;
+ UWORD8 *apu1_y_sub_pel_planes[3];
+ pad_interp_recon_frm_t s_pad_interp_recon;
+ ihevce_enc_loop_master_ctxt_t *ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_ctxt;
+
+ ihevce_enc_loop_ctxt_t *ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[thrd_id];
+
+ WORD32 i4_bitrate_instance_num = ps_ctxt->i4_bitrate_instance_num;
+
+ /* initialize the closed loop lambda for the current frame */
+ ps_ctxt->i8_cl_ssd_lambda_qf = ps_frm_lamda->i8_cl_ssd_lambda_qf;
+ ps_ctxt->i8_cl_ssd_lambda_chroma_qf = ps_frm_lamda->i8_cl_ssd_lambda_chroma_qf;
+ ps_ctxt->u4_chroma_cost_weighing_factor = ps_frm_lamda->u4_chroma_cost_weighing_factor;
+ ps_ctxt->i4_satd_lamda = ps_frm_lamda->i4_cl_satd_lambda_qf;
+ ps_ctxt->i4_sad_lamda = ps_frm_lamda->i4_cl_sad_type2_lambda_qf;
+ ps_ctxt->thrd_id = thrd_id;
+ ps_ctxt->u1_is_refPic = ps_curr_inp->s_lap_out.i4_is_ref_pic;
+
+#if DISABLE_SAO_WHEN_NOISY
+ ps_ctxt->s_sao_ctxt_t.ps_ctb_data = ps_ctb_in;
+ ps_ctxt->s_sao_ctxt_t.i4_ctb_data_stride = ps_frm_ctb_prms->i4_num_ctbs_horz;
+#endif
+
+#if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
+ ps_ctxt->pv_err_func_selector = ps_func_selector;
+#endif
+
+ /*Bit0 - of this Flag indicates whether current pictute needs to be deblocked,
+ padded and hpel planes need to be generated.
+ Bit1 - of this flag set to 1 if sao is enabled. This is to enable deblocking when sao is enabled*/
+ ps_ctxt->i4_deblk_pad_hpel_cur_pic =
+ (ps_frm_recon->i4_deblk_pad_hpel_cur_pic) ||
+ ((ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_luma_flag ||
+ ps_ctxt->s_sao_ctxt_t.ps_slice_hdr->i1_slice_sao_chroma_flag)
+ << 1);
+
+ /* Share all reference pictures with nbr clients. This flag will be used only
+ in case of dist-enc mode */
+ ps_ctxt->i4_share_flag = (ps_frm_recon->i4_is_reference != 0);
+ ps_ctxt->pv_frm_recon = (void *)ps_frm_recon;
+
+ /* Register the frame level ssd lamda for both luma and chroma*/
+ ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_qf = ps_frm_lamda->i8_cl_ssd_lambda_qf;
+ ps_ctxt->s_sao_ctxt_t.i8_cl_ssd_lambda_chroma_qf = ps_frm_lamda->i8_cl_ssd_lambda_chroma_qf;
+
+ ihevce_populate_cl_cu_lambda_prms(
+ ps_ctxt,
+ ps_frm_lamda,
+ (WORD32)ps_ctxt->i1_slice_type,
+ ps_curr_inp->s_lap_out.i4_temporal_lyr_id,
+ ENC_LOOP_LAMBDA_TYPE);
+
+ ps_ctxt->u1_disable_intra_eval = DISABLE_INTRA_IN_BPICS &&
+ (IHEVCE_QUALITY_P6 == ps_ctxt->i4_quality_preset) &&
+ (ps_ctxt->i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE);
+
+ end_of_frame = 0;
+
+ /* ----------------------------------------------------- */
+ /* store the stride and dimensions of source and recon */
+ /* buffer pointers will be over written at every CTB row */
+ /* ----------------------------------------------------- */
+ memcpy(&s_curr_src_bufs, &ps_curr_inp->s_lap_out.s_input_buf, sizeof(iv_enc_yuv_buf_t));
+
+ memcpy(&s_curr_recon_bufs, &ps_frm_recon->s_yuv_buf_desc, sizeof(iv_enc_yuv_buf_t));
+
+ memcpy(&s_curr_recon_bufs_src, &ps_frm_recon->s_yuv_buf_desc_src, sizeof(iv_enc_yuv_buf_src_t));
+
+ /* get the frame level pu offset pointer*/
+ pu4_pu_offsets = ps_frm_recon->pu4_pu_off;
+
+ s_pad_interp_recon.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
+
+ /* ------------ Loop over all the CTB rows --------------- */
+ while(0 == end_of_frame)
+ {
+ UWORD8 *pu1_tmp;
+ UWORD8 *pu1_row_pu_map;
+ UWORD8 *pu1_row_ecd_data;
+ ctb_analyse_t *ps_ctb_row_in;
+ ctb_enc_loop_out_t *ps_ctb_row_out;
+ cu_enc_loop_out_t *ps_row_cu;
+ tu_enc_loop_out_t *ps_row_tu;
+ pu_t *ps_row_pu;
+ pu_col_mv_t *ps_row_col_pu;
+ job_queue_t *ps_job;
+ UWORD32 *pu4_pu_row_offsets;
+ UWORD16 *pu2_num_pu_row;
+
+ ipe_l0_ctb_analyse_for_me_t *ps_row_ipe_analyse;
+ cur_ctb_cu_tree_t *ps_row_cu_tree;
+ UWORD8 is_inp_422 = (ps_ctxt->u1_chroma_array_type == 2);
+
+ /* Get the current row from the job queue */
+ ps_job = (job_queue_t *)ihevce_enc_grp_get_next_job(
+ ps_multi_thrd_ctxt, ENC_LOOP_JOB + i4_bitrate_instance_num, 1, i4_enc_frm_id);
+
+ /* Register the pointer to ctb out of the current frame*/
+ ps_ctxt->s_sao_ctxt_t.ps_ctb_out = ps_ctb_out;
+
+ /* If all rows are done, set the end of process flag to 1, */
+ /* and the current row to -1 */
+ if(NULL == ps_job)
+ {
+ vert_ctr = -1;
+ tile_col_idx = -1;
+ end_of_frame = 1;
+ }
+ else
+ {
+ ihevce_tile_params_t *ps_col_tile_params_temp;
+ ihevce_tile_params_t *ps_tile_params;
+ WORD32 i4_tile_id;
+
+ ASSERT((ENC_LOOP_JOB + i4_bitrate_instance_num) == ps_job->i4_task_type);
+ /* set the output dependency */
+ ihevce_enc_grp_job_set_out_dep(ps_multi_thrd_ctxt, ps_job, i4_enc_frm_id);
+
+ /* Obtain the current row's details from the job */
+ vert_ctr = ps_job->s_job_info.s_enc_loop_job_info.i4_ctb_row_no;
+ {
+ /* Obtain the current colum tile index from the job */
+ tile_col_idx = ps_job->s_job_info.s_enc_loop_job_info.i4_tile_col_idx;
+
+ /* The tile parameter for the col. idx. Use only the properties
+ which is same for all the bottom tiles like width, start_x, etc.
+ Don't use height, start_y, etc. */
+ ps_col_tile_params_temp =
+ ((ihevce_tile_params_t *)ps_master_ctxt->pv_tile_params_base + tile_col_idx);
+
+ /* Derive actual tile_id based on vert_ctr */
+ i4_tile_id =
+ *(ps_frm_ctb_prms->pi4_tile_id_map +
+ vert_ctr * ps_frm_ctb_prms->i4_tile_id_ctb_map_stride +
+ ps_col_tile_params_temp->i4_first_ctb_x);
+ /* Derive pointer to current tile prms */
+ ps_tile_params =
+ ((ihevce_tile_params_t *)ps_master_ctxt->pv_tile_params_base + i4_tile_id);
+ }
+
+ ps_ctxt->i4_tile_col_idx = tile_col_idx;
+ /* derive the current ctb row pointers */
+
+ /* luma src */
+ pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf +
+ (ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_y *
+ ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd) +
+ ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_x;
+
+ pu1_tmp +=
+ (vert_ctr * ps_frm_ctb_prms->i4_ctb_size *
+ ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd);
+
+ s_curr_src_bufs.pv_y_buf = pu1_tmp;
+
+ if(!ps_ctxt->u1_is_input_data_hbd)
+ {
+ /* cb src */
+ pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf;
+ pu1_tmp +=
+ (vert_ctr * (ps_frm_ctb_prms->i4_ctb_size >> ((is_inp_422 == 1) ? 0 : 1)) *
+ ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd);
+
+ s_curr_src_bufs.pv_u_buf = pu1_tmp;
+ }
+
+ /* luma recon */
+ pu1_tmp = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
+ pu1_tmp +=
+ (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
+
+ s_curr_recon_bufs.pv_y_buf = pu1_tmp;
+ s_pad_interp_recon.pu1_luma_recon = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
+ s_pad_interp_recon.i4_luma_recon_stride = ps_frm_recon->s_yuv_buf_desc.i4_y_strd;
+ if(!ps_ctxt->u1_is_input_data_hbd)
+ {
+ /* cb recon */
+ pu1_tmp = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
+ pu1_tmp +=
+ (vert_ctr * (ps_frm_ctb_prms->i4_ctb_size >> ((is_inp_422 == 1) ? 0 : 1)) *
+ ps_frm_recon->s_yuv_buf_desc.i4_uv_strd);
+
+ s_curr_recon_bufs.pv_u_buf = pu1_tmp;
+ s_pad_interp_recon.pu1_chrm_recon = (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
+ s_pad_interp_recon.i4_chrm_recon_stride = ps_frm_recon->s_yuv_buf_desc.i4_uv_strd;
+
+ s_pad_interp_recon.i4_ctb_size = ps_frm_ctb_prms->i4_ctb_size;
+
+ /* Register the source buffer pointers in sao context*/
+ ps_ctxt->s_sao_ctxt_t.pu1_frm_luma_src_buf =
+ (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf +
+ (ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_y *
+ ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd) +
+ ps_curr_inp->s_lap_out.s_input_buf.i4_start_offset_x;
+
+ ps_ctxt->s_sao_ctxt_t.i4_frm_luma_src_stride =
+ ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd;
+
+ ps_ctxt->s_sao_ctxt_t.pu1_frm_chroma_src_buf =
+ (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf;
+
+ ps_ctxt->s_sao_ctxt_t.i4_frm_chroma_src_stride =
+ ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd;
+ }
+
+ /* Subpel planes hxfy, fxhy, hxhy*/
+ pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[0];
+ pu1_tmp +=
+ (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
+ apu1_y_sub_pel_planes[0] = pu1_tmp;
+ s_pad_interp_recon.pu1_sbpel_hxfy = ps_frm_recon->apu1_y_sub_pel_planes[0];
+
+ pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[1];
+ pu1_tmp +=
+ (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
+ apu1_y_sub_pel_planes[1] = pu1_tmp;
+ s_pad_interp_recon.pu1_sbpel_fxhy = ps_frm_recon->apu1_y_sub_pel_planes[1];
+
+ pu1_tmp = ps_frm_recon->apu1_y_sub_pel_planes[2];
+ pu1_tmp +=
+ (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_frm_recon->s_yuv_buf_desc.i4_y_strd);
+ apu1_y_sub_pel_planes[2] = pu1_tmp;
+ s_pad_interp_recon.pu1_sbpel_hxhy = ps_frm_recon->apu1_y_sub_pel_planes[2];
+
+ /* row level coeffs buffer */
+ pu1_row_ecd_data =
+ pu1_frm_ecd_data +
+ (vert_ctr *
+ ((is_inp_422 == 1) ? (ps_frm_ctb_prms->i4_max_tus_in_row << 1)
+ : ((ps_frm_ctb_prms->i4_max_tus_in_row * 3) >> 1)) *
+ MAX_SCAN_COEFFS_BYTES_4x4);
+
+ /* Row level CU buffer */
+ ps_row_cu = ps_cu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_cus_in_row);
+
+ /* Row level TU buffer */
+ ps_row_tu = ps_tu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_tus_in_row);
+
+ /* Row level PU buffer */
+ ps_row_pu = ps_pu_out + (vert_ctr * ps_frm_ctb_prms->i4_max_pus_in_row);
+
+ /* Row level colocated PU buffer */
+ /* ps_frm_col_mv has (i4_num_ctbs_horz + 1) CTBs for stride */
+ ps_row_col_pu =
+ ps_frm_recon->ps_frm_col_mv + (vert_ctr * (ps_frm_ctb_prms->i4_num_ctbs_horz + 1) *
+ ps_frm_ctb_prms->i4_num_pus_in_ctb);
+ /* Row level col PU map buffer */
+ /* pu1_frm_pu_map has (i4_num_ctbs_horz + 1) CTBs for stride */
+ pu1_row_pu_map =
+ ps_frm_recon->pu1_frm_pu_map + (vert_ctr * (ps_frm_ctb_prms->i4_num_ctbs_horz + 1) *
+ ps_frm_ctb_prms->i4_num_pus_in_ctb);
+ /* row ctb in pointer */
+ ps_ctb_row_in = ps_ctb_in + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
+
+ /* row ctb out pointer */
+ ps_ctb_row_out = ps_ctb_out + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
+
+ /* row number of PUs map pointer */
+ pu2_num_pu_row =
+ ps_frm_recon->pu2_num_pu_map + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
+
+ /* row pu offsets pointer */
+ pu4_pu_row_offsets = pu4_pu_offsets + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
+ /* store the first CTB pu offset pointer */
+ *pu4_pu_row_offsets = vert_ctr * ps_frm_ctb_prms->i4_max_pus_in_row;
+ /* Initialize ptr to current IPE row */
+ ps_row_ipe_analyse = ps_ipe_analyse + (vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz);
+
+ /* Initialize ptr to current row */
+ ps_row_cu_tree = ps_cu_tree_out +
+ (vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz * MAX_NUM_NODES_CU_TREE);
+
+ /* Get the EncLoop Top-Right CU Dep Mngr */
+ ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right =
+ ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[ps_ctxt->i4_enc_frm_id]
+ [i4_bitrate_instance_num];
+ /* Get the EncLoop Deblock Dep Mngr */
+ ps_ctxt->pv_dep_mngr_enc_loop_dblk =
+ ps_master_ctxt
+ ->aapv_dep_mngr_enc_loop_dblk[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num];
+
+ ps_ctxt->pu1_curr_row_cabac_state = &ps_master_ctxt->au1_ctxt_models[vert_ctr][0];
+
+ {
+ /* derive the pointers of top row buffers */
+ ps_ctxt->pv_top_row_luma =
+ (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[ps_ctxt->i4_enc_frm_id] +
+ (ps_ctxt->i4_frm_top_row_luma_size * ps_ctxt->i4_bitrate_instance_num) +
+ (vert_ctr - 1) * ps_ctxt->i4_top_row_luma_stride;
+
+ ps_ctxt->pv_top_row_chroma =
+ (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[ps_ctxt->i4_enc_frm_id] +
+ (ps_ctxt->i4_frm_top_row_chroma_size * ps_ctxt->i4_bitrate_instance_num) +
+ (vert_ctr - 1) * ps_ctxt->i4_top_row_chroma_stride;
+
+ /* derive the pointers of bottom row buffers to update current row data */
+ ps_ctxt->pv_bot_row_luma =
+ (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[ps_ctxt->i4_enc_frm_id] +
+ (ps_ctxt->i4_frm_top_row_luma_size * ps_ctxt->i4_bitrate_instance_num) +
+ (vert_ctr)*ps_ctxt->i4_top_row_luma_stride;
+
+ ps_ctxt->pv_bot_row_chroma =
+ (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[ps_ctxt->i4_enc_frm_id] +
+ (ps_ctxt->i4_frm_top_row_chroma_size * ps_ctxt->i4_bitrate_instance_num) +
+ (vert_ctr)*ps_ctxt->i4_top_row_chroma_stride;
+
+ /* Register the buffer pointers in sao context*/
+ ps_ctxt->s_sao_ctxt_t.pu1_frm_luma_recon_buf =
+ (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_y_buf;
+ ps_ctxt->s_sao_ctxt_t.i4_frm_luma_recon_stride =
+ ps_frm_recon->s_yuv_buf_desc.i4_y_strd;
+
+ ps_ctxt->s_sao_ctxt_t.pu1_frm_chroma_recon_buf =
+ (UWORD8 *)ps_frm_recon->s_yuv_buf_desc.pv_u_buf;
+ ps_ctxt->s_sao_ctxt_t.i4_frm_chroma_recon_stride =
+ ps_frm_recon->s_yuv_buf_desc.i4_uv_strd;
+
+ ps_ctxt->s_sao_ctxt_t.ps_rdopt_entropy_ctxt = &ps_ctxt->s_rdopt_entropy_ctxt;
+
+ ps_ctxt->s_sao_ctxt_t.i4_frm_top_luma_buf_stride =
+ ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd + 1;
+
+ ps_ctxt->s_sao_ctxt_t.i4_frm_top_chroma_buf_stride =
+ ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd + 2;
+ }
+
+ ps_ctxt->ps_top_row_nbr =
+ ps_ctxt->aps_frm_top_row_nbr[ps_ctxt->i4_enc_frm_id] +
+ (ps_ctxt->i4_frm_top_row_nbr_size * ps_ctxt->i4_bitrate_instance_num) +
+ (vert_ctr - 1) * ps_ctxt->i4_top_row_nbr_stride;
+
+ ps_ctxt->ps_bot_row_nbr =
+ ps_ctxt->aps_frm_top_row_nbr[ps_ctxt->i4_enc_frm_id] +
+ (ps_ctxt->i4_frm_top_row_nbr_size * ps_ctxt->i4_bitrate_instance_num) +
+ (vert_ctr)*ps_ctxt->i4_top_row_nbr_stride;
+
+ if(vert_ctr > 0)
+ {
+ ps_ctxt->pu1_top_rt_cabac_state = &ps_master_ctxt->au1_ctxt_models[vert_ctr - 1][0];
+ }
+ else
+ {
+ ps_ctxt->pu1_top_rt_cabac_state = NULL;
+ }
+
+ ASSERT(
+ ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[0]
+ .ps_pps->i1_sign_data_hiding_flag ==
+ ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[1]
+ .ps_pps->i1_sign_data_hiding_flag);
+
+ /* call the row level processing function */
+ ihevce_enc_loop_process_row(
+ ps_ctxt,
+ &s_curr_src_bufs,
+ &s_curr_recon_bufs,
+ &s_curr_recon_bufs_src,
+ &apu1_y_sub_pel_planes[0],
+ ps_ctb_row_in,
+ ps_ctb_row_out,
+ ps_row_ipe_analyse,
+ ps_row_cu_tree,
+ ps_row_cu,
+ ps_row_tu,
+ ps_row_pu,
+ ps_row_col_pu,
+ pu2_num_pu_row,
+ pu1_row_pu_map,
+ pu1_row_ecd_data,
+ pu4_pu_row_offsets,
+ ps_frm_ctb_prms,
+ vert_ctr,
+ ps_frm_recon,
+ ps_ctxt->pv_dep_mngr_encloop_dep_me,
+ &s_pad_interp_recon,
+ i4_pass,
+ ps_multi_thrd_ctxt,
+ ps_tile_params);
+ }
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_enc_loop_dblk_get_prms_dep_mngr \endif
+*
+* \brief Returns to the caller key attributes relevant for dependency manager,
+* ie, the number of vertical units in l0 layer
+*
+* \par Description:
+*
+* \param[in] pai4_ht : ht
+* \param[out] pi4_num_vert_units_in_lyr : Pointer to store num vertical units
+* for deblocking
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_enc_loop_dblk_get_prms_dep_mngr(WORD32 i4_ht, WORD32 *pi4_num_vert_units_in_lyr)
+{
+ /* Blk ht at a given layer*/
+ WORD32 unit_ht_c;
+ WORD32 ctb_size = 64;
+
+ /* compute blk ht and unit ht */
+ unit_ht_c = ctb_size;
+
+ /* set the numebr of vertical units */
+ *pi4_num_vert_units_in_lyr = (i4_ht + unit_ht_c - 1) / unit_ht_c;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_enc_loop_get_num_mem_recs \endif
+*
+* \brief
+* Number of memory records are returned for enc_loop module
+* Note : Include TOT MEM. req. for ENC.LOOP + TOT MEM. req. for Dep Mngr for Dblk
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32
+ ihevce_enc_loop_get_num_mem_recs(WORD32 i4_num_bitrate_inst, WORD32 i4_num_enc_loop_frm_pllel)
+{
+ WORD32 enc_loop_mem_recs = NUM_ENC_LOOP_MEM_RECS;
+ WORD32 enc_loop_dblk_dep_mngr_mem_recs =
+ i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs();
+ WORD32 enc_loop_cu_top_right_dep_mngr_mem_recs =
+ i4_num_enc_loop_frm_pllel * i4_num_bitrate_inst * ihevce_dmgr_get_num_mem_recs();
+ WORD32 enc_loop_aux_br_dep_mngr_mem_recs =
+ i4_num_enc_loop_frm_pllel * (i4_num_bitrate_inst - 1) * ihevce_dmgr_get_num_mem_recs();
+
+ return (
+ (enc_loop_mem_recs + enc_loop_dblk_dep_mngr_mem_recs +
+ enc_loop_cu_top_right_dep_mngr_mem_recs + enc_loop_aux_br_dep_mngr_mem_recs));
+}
+/*!
+******************************************************************************
+* \if Function name : ihevce_enc_loop_get_mem_recs \endif
+*
+* \brief
+* Memory requirements are returned for ENC_LOOP.
+*
+* \param[in,out] ps_mem_tab : pointer to memory descriptors table
+* \param[in] ps_init_prms : Create time static parameters
+* \param[in] i4_num_proc_thrds : Number of processing threads for this module
+* \param[in] i4_mem_space : memspace in whihc memory request should be done
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32 ihevce_enc_loop_get_mem_recs(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ WORD32 i4_num_proc_thrds,
+ WORD32 i4_num_bitrate_inst,
+ WORD32 i4_num_enc_loop_frm_pllel,
+ WORD32 i4_mem_space,
+ WORD32 i4_resolution_id)
+{
+ UWORD32 u4_width, u4_height, n_tabs;
+ UWORD32 u4_ctb_in_a_row, u4_ctb_rows_in_a_frame;
+ WORD32 ctr;
+ WORD32 i4_chroma_format = ps_init_prms->s_src_prms.i4_chr_format;
+
+ /* derive frame dimensions */
+ /*width of the input YUV to be encoded */
+ u4_width = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
+ /*making the width a multiple of CTB size*/
+ u4_width += SET_CTB_ALIGN(
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, MAX_CTB_SIZE);
+
+ /*height of the input YUV to be encoded */
+ u4_height = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
+ /*making the height a multiple of CTB size*/
+ u4_height += SET_CTB_ALIGN(
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, MAX_CTB_SIZE);
+ u4_ctb_in_a_row = (u4_width / MAX_CTB_SIZE);
+ u4_ctb_rows_in_a_frame = (u4_height / MAX_CTB_SIZE);
+ /* memories should be requested assuming worst case requirememnts */
+
+ /* Module context structure */
+ ps_mem_tab[ENC_LOOP_CTXT].i4_mem_size = sizeof(ihevce_enc_loop_master_ctxt_t);
+
+ ps_mem_tab[ENC_LOOP_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_CTXT].i4_mem_alignment = 8;
+
+ /* Thread context structure */
+ ps_mem_tab[ENC_LOOP_THRDS_CTXT].i4_mem_size =
+ i4_num_proc_thrds * sizeof(ihevce_enc_loop_ctxt_t);
+
+ ps_mem_tab[ENC_LOOP_THRDS_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_THRDS_CTXT].i4_mem_alignment = 16;
+
+ /* Scale matrices */
+ ps_mem_tab[ENC_LOOP_SCALE_MAT].i4_mem_size = 2 * MAX_TU_SIZE * MAX_TU_SIZE * sizeof(WORD16);
+
+ ps_mem_tab[ENC_LOOP_SCALE_MAT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_SCALE_MAT].i4_mem_alignment = 8;
+
+ /* Rescale matrices */
+ ps_mem_tab[ENC_LOOP_RESCALE_MAT].i4_mem_size = 2 * MAX_TU_SIZE * MAX_TU_SIZE * sizeof(WORD16);
+
+ ps_mem_tab[ENC_LOOP_RESCALE_MAT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_RESCALE_MAT].i4_mem_alignment = 8;
+
+ /* top row luma one row of pixel data per CTB row */
+ if(ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8)
+ {
+ ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_size = (u4_ctb_rows_in_a_frame + 1) *
+ (u4_width + MAX_CU_SIZE + 1) * sizeof(UWORD16) *
+ i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
+ }
+ else
+ {
+ ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_size = (u4_ctb_rows_in_a_frame + 1) *
+ (u4_width + MAX_CU_SIZE + 1) * sizeof(UWORD8) *
+ i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
+ }
+
+ ps_mem_tab[ENC_LOOP_TOP_LUMA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_TOP_LUMA].i4_mem_alignment = 8;
+
+ /* top row chroma */
+ if(ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8)
+ {
+ ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_size =
+ (u4_ctb_rows_in_a_frame + 1) * (u4_width + MAX_CU_SIZE + 2) * sizeof(UWORD16) *
+ i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
+ }
+ else
+ {
+ ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_size =
+ (u4_ctb_rows_in_a_frame + 1) * (u4_width + MAX_CU_SIZE + 2) * sizeof(UWORD8) *
+ i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
+ }
+
+ ps_mem_tab[ENC_LOOP_TOP_CHROMA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_TOP_CHROMA].i4_mem_alignment = 8;
+
+ /* top row neighbour 4x4 */
+ ps_mem_tab[ENC_LOOP_TOP_NBR4X4].i4_mem_size =
+ (u4_ctb_rows_in_a_frame + 1) * (((u4_width + MAX_CU_SIZE) >> 2) + 1) * sizeof(nbr_4x4_t) *
+ i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel;
+
+ ps_mem_tab[ENC_LOOP_TOP_NBR4X4].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_TOP_NBR4X4].i4_mem_alignment = 8;
+
+ /* memory to dump rate control parameters by each thread for each bit-rate instance */
+ /* RC params collated by each thread for each bit-rate instance separately */
+ ps_mem_tab[ENC_LOOP_RC_PARAMS].i4_mem_size = i4_num_bitrate_inst * i4_num_enc_loop_frm_pllel *
+ i4_num_proc_thrds * sizeof(enc_loop_rc_params_t);
+
+ ps_mem_tab[ENC_LOOP_RC_PARAMS].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_RC_PARAMS].i4_mem_alignment = 8;
+ /* Memory required for deblocking */
+ {
+ /* Memory to store Qp of top4x4 blocks for each CTB row.
+ This memory is allocated at frame level and shared across
+ all cores. The Qp values are needed to form Qp-map(described
+ in the ENC_LOOP_DEBLOCKING section below)*/
+
+ UWORD32 u4_size_bs_memory, u4_size_qp_memory;
+ UWORD32 u4_size_top_4x4_qp_memory;
+
+ /*Memory required to store Qp of top4x4 blocks for a CTB row for entire frame*/
+ /*Space required per CTB*/
+ u4_size_top_4x4_qp_memory = (MAX_CTB_SIZE / 4);
+ /*Space required for entire CTB row*/
+ u4_size_top_4x4_qp_memory *= u4_ctb_in_a_row;
+ /*Space required for entire frame*/
+ u4_size_top_4x4_qp_memory *= u4_ctb_rows_in_a_frame;
+ /*Space required for multiple bitrate*/
+ u4_size_top_4x4_qp_memory *= i4_num_bitrate_inst;
+ /*Space required for multiple frames in parallel*/
+ u4_size_top_4x4_qp_memory *= i4_num_enc_loop_frm_pllel;
+
+ ps_mem_tab[ENC_LOOP_QP_TOP_4X4].i4_mem_size = u4_size_top_4x4_qp_memory;
+ ps_mem_tab[ENC_LOOP_QP_TOP_4X4].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+ ps_mem_tab[ENC_LOOP_QP_TOP_4X4].i4_mem_alignment = 8;
+
+ /* Memory allocation of BS and Qp-map for deblocking at CTB-row level:
+ ## Boundary Strength(Vertical):
+ BS stored per CTB at one stretch i.e. for a 64x CTB first 8 entries belongs to first CTB
+ of the row followed by 8 entries of second CTB and so on.
+ 8 entries: Includes left edge of current CTB and excludes right edge.
+ ## Boundary Strength(Horizontal):
+ Same as Vertical.
+ 8 entries: Includes top edge of current CTB and excludes bottom edge.
+
+ ## Qp-map storage:
+ T0 T1 T2 T3 T4 T5 ..........to the end of the CTB row
+ 00 01 02 03 04 05 ..........to the end of the CTB row
+ 10 11 12 13 14 15 ..........to the end of the CTB row
+ 20 21 22 23 24 25 ..........to the end of the CTB row
+ 30 31 32 33 34 35 ..........to the end of the CTB row
+ 40 41 42 43 44 45 ..........to the end of the CTB row
+ ............................to the end of the CTB row
+ upto height_of_CTB..........to the end of the CTB row
+
+ Qp is stored for each "4x4 block" in a proper 2-D array format (One entry for each 4x4).
+ A 2-D array of height= (height_of_CTB +1), and width = (width_of_CTB).
+ where,
+ => height_of_CTB = number of 4x4 blocks in a CTB vertically,
+ => +1 is done to store Qp of lowest 4x4-block layer of top-CTB
+ in order to deblock top edge of current CTB.
+ => width_of_CTB = number of 4x4 blocks in a CTB horizontally,
+ */
+
+ /*Memory(in bytes) required for storing Boundary Strength for entire CTB row*/
+ /*1 vertical edge per 8 pixel*/
+ u4_size_bs_memory = (MAX_CTB_SIZE >> 3);
+ /*Vertical edges for entire width of CTB row*/
+ u4_size_bs_memory *= u4_ctb_in_a_row;
+ /*Each vertical edge of CTB row is 4 bytes*/
+ u4_size_bs_memory = u4_size_bs_memory << 2;
+ /*Adding Memory required for storing horizontal BS by doubling*/
+ u4_size_bs_memory = u4_size_bs_memory << 1;
+
+ /*Memory(in bytes) required for storing Qp at 4x4 level for entire CTB row*/
+ /*Number of 4x4 blocks in the width of a CTB*/
+ u4_size_qp_memory = (MAX_CTB_SIZE >> 2);
+ /*Number of 4x4 blocks in the height of a CTB. Adding 1 to store Qp of lowest
+ 4x4-block layer of top-CTB in order to deblock top edge of current CTB*/
+ u4_size_qp_memory *= ((MAX_CTB_SIZE >> 2) + 1);
+ /*Storage for entire CTB row*/
+ u4_size_qp_memory *= u4_ctb_in_a_row;
+
+ /*Multiplying by i4_num_proc_thrds to assign memory for each core*/
+ ps_mem_tab[ENC_LOOP_DEBLOCKING].i4_mem_size =
+ i4_num_proc_thrds * (u4_size_bs_memory + u4_size_qp_memory);
+
+ ps_mem_tab[ENC_LOOP_DEBLOCKING].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_DEBLOCKING].i4_mem_alignment = 8;
+ }
+
+ /* Memory required to store pred for 422 chroma */
+ ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].i4_mem_size =
+ i4_num_proc_thrds * MAX_CTB_SIZE * MAX_CTB_SIZE * 2 *
+ (i4_chroma_format == IV_YUV_422SP_UV) *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
+
+ ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].i4_mem_alignment = 8;
+
+ /* Memory for inter pred buffers */
+ {
+ WORD32 i4_num_bufs_per_thread = 0;
+
+ WORD32 i4_buf_size_per_cand =
+ (MAX_CTB_SIZE) * (MAX_CTB_SIZE) *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
+ WORD32 i4_quality_preset =
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
+ switch(i4_quality_preset)
+ {
+ case IHEVCE_QUALITY_P0:
+ {
+ i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_PQ;
+ break;
+ }
+ case IHEVCE_QUALITY_P2:
+ {
+ i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_HQ;
+ break;
+ }
+ case IHEVCE_QUALITY_P3:
+ {
+ i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_MS;
+ break;
+ }
+ case IHEVCE_QUALITY_P4:
+ {
+ i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_HS;
+ break;
+ }
+ case IHEVCE_QUALITY_P5:
+ case IHEVCE_QUALITY_P6:
+ case IHEVCE_QUALITY_P7:
+ {
+ i4_num_bufs_per_thread = MAX_NUM_INTER_CANDS_ES;
+ break;
+ }
+ default:
+ {
+ ASSERT(0);
+ }
+ }
+
+ i4_num_bufs_per_thread += 4;
+
+ ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_size =
+ i4_num_bufs_per_thread * i4_num_proc_thrds * i4_buf_size_per_cand;
+
+ ps_mem_tab[ENC_LOOP_INTER_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_alignment = 8;
+ }
+
+ /* Memory required to store chroma intra pred */
+ ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].i4_mem_size =
+ i4_num_proc_thrds * (MAX_TU_SIZE) * (MAX_TU_SIZE)*2 * NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD *
+ ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
+
+ ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].i4_mem_alignment = 8;
+
+ /* Memory required to store pred for reference substitution output */
+ ps_mem_tab[ENC_LOOP_REF_SUB_OUT].i4_mem_size =
+ i4_num_proc_thrds * ((MAX_TU_SIZE * 2 * 2) + 4) *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
+
+ ps_mem_tab[ENC_LOOP_REF_SUB_OUT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_REF_SUB_OUT].i4_mem_alignment = 8;
+
+ /* Memory required to store pred for reference filtering output */
+ ps_mem_tab[ENC_LOOP_REF_FILT_OUT].i4_mem_size =
+ i4_num_proc_thrds * ((MAX_TU_SIZE * 2 * 2) + 4) *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
+
+ ps_mem_tab[ENC_LOOP_REF_FILT_OUT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_REF_FILT_OUT].i4_mem_alignment = 8;
+
+#if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
+ if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset == 0)
+#endif
+ {
+ /* Memory assignments for recon storage during CU Recursion */
+ ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_size =
+ i4_num_proc_thrds * (MAX_CU_SIZE * MAX_CU_SIZE) *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
+
+ ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_alignment = 8;
+
+ ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_size =
+ i4_num_proc_thrds * (MAX_CU_SIZE * (MAX_CU_SIZE >> 1)) *
+ ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
+
+ ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_alignment = 8;
+ }
+#if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
+ else
+ {
+ /* Memory assignments for recon storage during CU Recursion */
+ ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_size = 0;
+
+ ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].i4_mem_alignment = 8;
+
+ ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_size = 0;
+
+ ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].i4_mem_alignment = 8;
+ }
+#endif
+
+#if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
+ if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset == 0)
+#endif
+ {
+ /* Memory assignments for pred storage during CU Recursion */
+ ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_size =
+ i4_num_proc_thrds * (MAX_CU_SIZE * MAX_CU_SIZE) *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
+
+ ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_alignment = 8;
+
+ ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_size =
+ i4_num_proc_thrds * (MAX_CU_SIZE * (MAX_CU_SIZE >> 1)) *
+ ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1) *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
+
+ ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_alignment = 8;
+ }
+#if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
+ else
+ {
+ /* Memory assignments for pred storage during CU Recursion */
+ ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_size = 0;
+
+ ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].i4_mem_alignment = 8;
+
+ ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_size = 0;
+
+ ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].i4_mem_alignment = 8;
+ }
+#endif
+
+ /* Memory assignments for CTB left luma data storage */
+ ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].i4_mem_size =
+ i4_num_proc_thrds * (MAX_CTB_SIZE + MAX_TU_SIZE) *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
+
+ ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].i4_mem_alignment = 8;
+
+ /* Memory assignments for CTB left chroma data storage */
+ ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_size =
+ i4_num_proc_thrds * (MAX_CTB_SIZE + MAX_TU_SIZE) *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
+ ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_size <<=
+ ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0);
+
+ ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].i4_mem_alignment = 8;
+
+ /* Memory required for SAO */
+ {
+ WORD32 num_vert_units;
+ WORD32 num_horz_units;
+ WORD32 ctb_aligned_ht, ctb_aligned_wd;
+ WORD32 luma_buf, chroma_buf;
+
+ num_vert_units = u4_height / MAX_CTB_SIZE;
+ num_horz_units = u4_width / MAX_CTB_SIZE;
+
+ ctb_aligned_ht = u4_height;
+ ctb_aligned_wd = u4_width;
+
+ /* Memory for top buffer. 1 extra width is required for top buf ptr for row 0
+ * and 1 extra location is required for top left buf ptr for row 0
+ * Also 1 extra byte is required for every row for top left pixel if
+ * the top left ptr is to be passed to leaf level unconditionally
+ */
+ luma_buf = (ctb_aligned_ht + (ctb_aligned_wd + 1) * (num_vert_units + 1)) *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
+ chroma_buf = (ctb_aligned_ht + (ctb_aligned_wd + 2) * (num_vert_units + 1)) *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
+
+ ps_mem_tab[ENC_LOOP_SAO].i4_mem_size =
+ (luma_buf + chroma_buf) * (i4_num_bitrate_inst) * (i4_num_enc_loop_frm_pllel);
+
+ /* Add the memory required to store the sao information of top ctb for top merge
+ * This is frame level buffer.
+ */
+ ps_mem_tab[ENC_LOOP_SAO].i4_mem_size +=
+ ((num_horz_units * sizeof(sao_enc_t)) * num_vert_units) * (i4_num_bitrate_inst) *
+ (i4_num_enc_loop_frm_pllel);
+
+ ps_mem_tab[ENC_LOOP_SAO].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_SAO].i4_mem_alignment = 8;
+ }
+
+ /* Memory for CU level Coeff data buffer */
+ {
+ /* 16 additional bytes are required to ensure alignment */
+ {
+ ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].i4_mem_size =
+ i4_num_proc_thrds *
+ (((MAX_LUMA_COEFFS_CTB +
+ (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) +
+ 16) *
+ (2) * sizeof(UWORD8));
+ }
+
+ ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].i4_mem_alignment = 16;
+
+ ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].i4_mem_size =
+ i4_num_proc_thrds *
+ (MAX_LUMA_COEFFS_CTB +
+ (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) *
+ sizeof(UWORD8);
+
+ ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].i4_mem_alignment = 16;
+ }
+
+ /* Memory for CU dequant data buffer */
+ {
+ /* 16 additional bytes are required to ensure alignment */
+ {
+ ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].i4_mem_size =
+ i4_num_proc_thrds *
+ (((i4_chroma_format == IV_YUV_422SP_UV) ? (MAX_CU_SIZE * (MAX_CU_SIZE << 1))
+ : (MAX_CU_SIZE * (MAX_CU_SIZE >> 1) * 3)) +
+ 8) *
+ (2) * sizeof(WORD16);
+ }
+
+ ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].i4_mem_alignment = 16;
+ }
+
+ /* Memory for Recon Datastore (Used around and within the RDOPT loop) */
+ {
+ WORD32 i4_memSize_perThread;
+
+ WORD32 i4_chroma_memSize_perThread = 0;
+ /* 2 bufs each allocated to the two 'enc_loop_cu_final_prms_t' structs */
+ /* used in RDOPT to store cur and best modes' data */
+ WORD32 i4_luma_memSize_perThread =
+ 4 * MAX_CU_SIZE * MAX_CU_SIZE *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
+
+ /* 'Glossary' for comments in the following codeBlock */
+ /* 1 - 2 Bufs for storing recons of the best modes determined in the */
+ /* function 'ihevce_intra_chroma_pred_mode_selector' */
+ /* 2 - 1 buf each allocated to the two 'enc_loop_cu_final_prms_t' structs */
+ /* used in RDOPT to store cur and best modes' data */
+ if(i4_chroma_format == IV_YUV_422SP_UV)
+ {
+ WORD32 i4_quality_preset =
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
+ switch(i4_quality_preset)
+ {
+ case IHEVCE_QUALITY_P0:
+ {
+ /* 1 */
+ i4_chroma_memSize_perThread +=
+ 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_PQ *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
+
+ /* 2 */
+ i4_chroma_memSize_perThread +=
+ 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
+
+ break;
+ }
+ case IHEVCE_QUALITY_P2:
+ {
+ /* 1 */
+ i4_chroma_memSize_perThread +=
+ 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_HQ *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
+
+ /* 2 */
+ i4_chroma_memSize_perThread +=
+ 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
+
+ break;
+ }
+ case IHEVCE_QUALITY_P3:
+ {
+ /* 1 */
+ i4_chroma_memSize_perThread +=
+ 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_MS *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
+
+ /* 2 */
+ i4_chroma_memSize_perThread +=
+ 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
+
+ break;
+ }
+ case IHEVCE_QUALITY_P4:
+ {
+ /* 1 */
+ i4_chroma_memSize_perThread +=
+ 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_HS *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
+
+ /* 2 */
+ i4_chroma_memSize_perThread +=
+ 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
+
+ break;
+ }
+ case IHEVCE_QUALITY_P5:
+ {
+ /* 1 */
+ i4_chroma_memSize_perThread +=
+ 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_XS *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
+
+ /* 2 */
+ i4_chroma_memSize_perThread +=
+ 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
+
+ break;
+ }
+ case IHEVCE_QUALITY_P6:
+ case IHEVCE_QUALITY_P7:
+ {
+ /* 1 */
+ i4_chroma_memSize_perThread +=
+ 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_CHROMA_RDOPT_EVAL_IN_XS6 *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
+
+ /* 2 */
+ i4_chroma_memSize_perThread +=
+ 2 * MAX_CU_SIZE * MAX_CU_SIZE * ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS6 *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
+
+ break;
+ }
+ }
+ }
+ else
+ {
+ WORD32 i4_quality_preset =
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
+ switch(i4_quality_preset)
+ {
+ case IHEVCE_QUALITY_P0:
+ {
+ /* 1 */
+ i4_chroma_memSize_perThread +=
+ 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_PQ *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
+
+ /* 2 */
+ i4_chroma_memSize_perThread +=
+ 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
+ ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
+
+ break;
+ }
+ case IHEVCE_QUALITY_P2:
+ {
+ /* 1 */
+ i4_chroma_memSize_perThread +=
+ 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_HQ *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
+
+ /* 2 */
+ i4_chroma_memSize_perThread +=
+ 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
+ ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
+
+ break;
+ }
+ case IHEVCE_QUALITY_P3:
+ {
+ /* 1 */
+ i4_chroma_memSize_perThread +=
+ 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_MS *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
+
+ /* 2 */
+ i4_chroma_memSize_perThread +=
+ 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
+ ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
+
+ break;
+ }
+ case IHEVCE_QUALITY_P4:
+ {
+ /* 1 */
+ i4_chroma_memSize_perThread +=
+ 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_HS *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
+
+ /* 2 */
+ i4_chroma_memSize_perThread +=
+ 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
+ ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
+
+ break;
+ }
+ case IHEVCE_QUALITY_P5:
+ {
+ /* 1 */
+ i4_chroma_memSize_perThread +=
+ 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_XS *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
+
+ /* 2 */
+ i4_chroma_memSize_perThread +=
+ 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
+ ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
+
+ break;
+ }
+ case IHEVCE_QUALITY_P6:
+ case IHEVCE_QUALITY_P7:
+ {
+ /* 1 */
+ i4_chroma_memSize_perThread +=
+ 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ENABLE_CHROMA_RDOPT_EVAL_IN_XS6 *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
+
+ /* 2 */
+ i4_chroma_memSize_perThread +=
+ 2 * MAX_CU_SIZE * (MAX_CU_SIZE / 2) *
+ ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS6 *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1);
+
+ break;
+ }
+ }
+ }
+
+ i4_memSize_perThread = i4_luma_memSize_perThread + i4_chroma_memSize_perThread;
+
+ ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_size =
+ i4_num_proc_thrds * i4_memSize_perThread * sizeof(UWORD8);
+
+ ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_alignment = 16;
+ }
+
+ n_tabs = NUM_ENC_LOOP_MEM_RECS;
+
+ /*************************************************************************/
+ /* --- EncLoop Deblock sync Dep Mngr Mem requests -- */
+ /*************************************************************************/
+
+ /* Fill the memtabs for EncLoop Deblock Dep Mngr */
+ {
+ WORD32 count;
+ WORD32 num_vert_units;
+ WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
+
+ ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
+ ASSERT(num_vert_units > 0);
+ for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
+ {
+ for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++)
+ {
+ n_tabs += ihevce_dmgr_get_mem_recs(
+ &ps_mem_tab[n_tabs],
+ DEP_MNGR_ROW_ROW_SYNC,
+ num_vert_units,
+ ps_init_prms->s_app_tile_params.i4_num_tile_cols,
+ i4_num_proc_thrds,
+ i4_mem_space);
+ }
+ }
+ }
+
+ /*************************************************************************/
+ /* --- EncLoop Top-Right CU sync Dep Mngr Mem requests -- */
+ /*************************************************************************/
+
+ /* Fill the memtabs for Top-Right CU sync Dep Mngr */
+ {
+ WORD32 count;
+ WORD32 num_vert_units;
+ WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
+ ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
+ ASSERT(num_vert_units > 0);
+
+ for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
+ {
+ for(ctr = 0; ctr < i4_num_bitrate_inst; ctr++)
+ {
+ n_tabs += ihevce_dmgr_get_mem_recs(
+ &ps_mem_tab[n_tabs],
+ DEP_MNGR_ROW_ROW_SYNC,
+ num_vert_units,
+ ps_init_prms->s_app_tile_params.i4_num_tile_cols,
+ i4_num_proc_thrds,
+ i4_mem_space);
+ }
+ }
+ }
+
+ /*************************************************************************/
+ /* --- EncLoop Aux. on Ref. bitrate sync Dep Mngr Mem requests -- */
+ /*************************************************************************/
+
+ /* Fill the memtabs for EncLoop Aux. on Ref. bitrate Dep Mngr */
+ {
+ WORD32 count;
+ WORD32 num_vert_units;
+ WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
+
+ ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
+ ASSERT(num_vert_units > 0);
+
+ for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
+ {
+ for(ctr = 1; ctr < i4_num_bitrate_inst; ctr++)
+ {
+ n_tabs += ihevce_dmgr_get_mem_recs(
+ &ps_mem_tab[n_tabs],
+ DEP_MNGR_ROW_ROW_SYNC,
+ num_vert_units,
+ ps_init_prms->s_app_tile_params.i4_num_tile_cols,
+ i4_num_proc_thrds,
+ i4_mem_space);
+ }
+ }
+ }
+
+ return (n_tabs);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_enc_loop_init \endif
+*
+* \brief
+* Intialization for ENC_LOOP context state structure .
+*
+* \param[in] ps_mem_tab : pointer to memory descriptors table
+* \param[in] ps_init_prms : Create time static parameters
+* \param[in] pv_osal_handle : Osal handle
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void *ihevce_enc_loop_init(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ WORD32 i4_num_proc_thrds,
+ void *pv_osal_handle,
+ func_selector_t *ps_func_selector,
+ rc_quant_t *ps_rc_quant_ctxt,
+ ihevce_tile_params_t *ps_tile_params_base,
+ WORD32 i4_resolution_id,
+ WORD32 i4_num_enc_loop_frm_pllel,
+ UWORD8 u1_is_popcnt_available)
+{
+ ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
+ ihevce_enc_loop_ctxt_t *ps_ctxt;
+ WORD32 ctr, n_tabs;
+ UWORD32 u4_width, u4_height;
+ UWORD32 u4_ctb_in_a_row, u4_ctb_rows_in_a_frame;
+ UWORD32 u4_size_bs_memory, u4_size_qp_memory;
+ UWORD8 *pu1_deblk_base; /*Store the base address of deblcoking memory*/
+ WORD32 i;
+ WORD32 i4_num_bitrate_inst =
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_num_bitrate_instances;
+ enc_loop_rc_params_t *ps_enc_loop_rc_params;
+ UWORD8 *pu1_sao_base; /* store the base address of sao*/
+ UWORD32 u4_ctb_aligned_wd, ctb_size, u4_ctb_aligned_ht, num_vert_units;
+ WORD32 i4_chroma_format = ps_init_prms->s_src_prms.i4_chr_format;
+ WORD32 is_hbd_mode = (ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8);
+ WORD32 i4_enc_frm_id;
+ WORD32 num_cu_in_ctb;
+ WORD32 i4_num_tile_cols = 1; //Default value is 1
+
+ /* ENC_LOOP state structure */
+ ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)ps_mem_tab[ENC_LOOP_CTXT].pv_base;
+
+ ps_master_ctxt->i4_num_proc_thrds = i4_num_proc_thrds;
+
+ ps_ctxt = (ihevce_enc_loop_ctxt_t *)ps_mem_tab[ENC_LOOP_THRDS_CTXT].pv_base;
+ ps_enc_loop_rc_params = (enc_loop_rc_params_t *)ps_mem_tab[ENC_LOOP_RC_PARAMS].pv_base;
+ ps_ctxt->ps_rc_quant_ctxt = ps_rc_quant_ctxt;
+ /*Calculation of memory sizes for deblocking*/
+ {
+ /*width of the input YUV to be encoded. */
+ u4_width = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
+ /*making the width a multiple of CTB size*/
+ u4_width += SET_CTB_ALIGN(
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, MAX_CTB_SIZE);
+
+ u4_ctb_in_a_row = (u4_width / MAX_CTB_SIZE);
+
+ /*height of the input YUV to be encoded */
+ u4_height = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
+ /*making the height a multiple of CTB size*/
+ u4_height += SET_CTB_ALIGN(
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, MAX_CTB_SIZE);
+
+ u4_ctb_rows_in_a_frame = (u4_height / MAX_CTB_SIZE);
+
+ /*Memory(in bytes) required for storing Boundary Strength for entire CTB row*/
+ /*1 vertical edge per 8 pixel*/
+ u4_size_bs_memory = (MAX_CTB_SIZE >> 3);
+ /*Vertical edges for entire width of CTB row*/
+ u4_size_bs_memory *= u4_ctb_in_a_row;
+ /*Each vertical edge of CTB row is 4 bytes*/
+ u4_size_bs_memory = u4_size_bs_memory << 2;
+ /*Adding Memory required for storing horizontal BS by doubling*/
+ u4_size_bs_memory = u4_size_bs_memory << 1;
+
+ /*Memory(in bytes) required for storing Qp at 4x4 level for entire CTB row*/
+ /*Number of 4x4 blocks in the width of a CTB*/
+ u4_size_qp_memory = (MAX_CTB_SIZE >> 2);
+ /*Number of 4x4 blocks in the height of a CTB. Adding 1 to store Qp of lowest
+ 4x4-block layer of top-CTB in order to deblock top edge of current CTB*/
+ u4_size_qp_memory *= ((MAX_CTB_SIZE >> 2) + 1);
+ /*Storage for entire CTB row*/
+ u4_size_qp_memory *= u4_ctb_in_a_row;
+
+ pu1_deblk_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_DEBLOCKING].pv_base;
+ }
+
+ /*Derive the base pointer of sao*/
+ pu1_sao_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_SAO].pv_base;
+ ctb_size = (1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size);
+ u4_ctb_aligned_wd = u4_width;
+ u4_ctb_aligned_ht = u4_height;
+ num_vert_units = (u4_height) / ctb_size;
+
+ for(ctr = 0; ctr < ps_master_ctxt->i4_num_proc_thrds; ctr++)
+ {
+ ps_master_ctxt->aps_enc_loop_thrd_ctxt[ctr] = ps_ctxt;
+ /* Store Tile params base into EncLoop context */
+ ps_ctxt->pv_tile_params_base = (void *)ps_tile_params_base;
+ ihevce_cmn_utils_instr_set_router(
+ &ps_ctxt->s_cmn_opt_func, u1_is_popcnt_available, ps_init_prms->e_arch_type);
+ ihevce_sifter_sad_fxn_assigner(
+ (FT_SAD_EVALUATOR **)(&ps_ctxt->pv_evalsad_pt_npu_mxn_8bit), ps_init_prms->e_arch_type);
+ ps_ctxt->i4_max_search_range_horizontal =
+ ps_init_prms->s_config_prms.i4_max_search_range_horz;
+ ps_ctxt->i4_max_search_range_vertical =
+ ps_init_prms->s_config_prms.i4_max_search_range_vert;
+
+ ps_ctxt->i4_quality_preset =
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
+
+ if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P7)
+ {
+ ps_ctxt->i4_quality_preset = IHEVCE_QUALITY_P6;
+ }
+
+ ps_ctxt->i4_num_proc_thrds = ps_master_ctxt->i4_num_proc_thrds;
+
+ ps_ctxt->i4_rc_pass = ps_init_prms->s_pass_prms.i4_pass;
+
+ ps_ctxt->u1_chroma_array_type = (i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1;
+
+ ps_ctxt->s_deblk_prms.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
+
+ ps_ctxt->pi2_scal_mat = (WORD16 *)ps_mem_tab[ENC_LOOP_SCALE_MAT].pv_base;
+
+ ps_ctxt->pi2_rescal_mat = (WORD16 *)ps_mem_tab[ENC_LOOP_RESCALE_MAT].pv_base;
+
+ if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P2)
+ {
+ ps_ctxt->i4_use_ctb_level_lamda = 0;
+ }
+ else
+ {
+ ps_ctxt->i4_use_ctb_level_lamda = 0;
+ }
+
+ /** Register the function selector pointer*/
+ ps_ctxt->ps_func_selector = ps_func_selector;
+
+ ps_ctxt->s_mc_ctxt.ps_func_selector = ps_func_selector;
+
+ /* Initiallization for non-distributed mode */
+ ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[0] = 0;
+ ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[1] = 0;
+ ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[2] = 0;
+ ps_ctxt->s_mc_ctxt.ai4_tile_xtra_pel[3] = 0;
+
+ ps_ctxt->s_deblk_prms.ps_func_selector = ps_func_selector;
+ ps_ctxt->i4_top_row_luma_stride = (u4_width + MAX_CU_SIZE + 1);
+
+ ps_ctxt->i4_frm_top_row_luma_size =
+ ps_ctxt->i4_top_row_luma_stride * (u4_ctb_rows_in_a_frame + 1);
+
+ ps_ctxt->i4_top_row_chroma_stride = (u4_width + MAX_CU_SIZE + 2);
+
+ ps_ctxt->i4_frm_top_row_chroma_size =
+ ps_ctxt->i4_top_row_chroma_stride * (u4_ctb_rows_in_a_frame + 1);
+
+ {
+ for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
+ {
+ /* +1 is to provision top left pel */
+ ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] =
+ (UWORD8 *)ps_mem_tab[ENC_LOOP_TOP_LUMA].pv_base + 1 +
+ (ps_ctxt->i4_frm_top_row_luma_size * i4_enc_frm_id * i4_num_bitrate_inst);
+
+ /* pointer incremented by 1 row to avoid OOB access in 0th row */
+ ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] =
+ (UWORD8 *)ps_ctxt->apv_frm_top_row_luma[i4_enc_frm_id] +
+ ps_ctxt->i4_top_row_luma_stride;
+
+ /* +2 is to provision top left pel */
+ ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] =
+ (UWORD8 *)ps_mem_tab[ENC_LOOP_TOP_CHROMA].pv_base + 2 +
+ (ps_ctxt->i4_frm_top_row_chroma_size * i4_enc_frm_id * i4_num_bitrate_inst);
+
+ /* pointer incremented by 1 row to avoid OOB access in 0th row */
+ ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] =
+ (UWORD8 *)ps_ctxt->apv_frm_top_row_chroma[i4_enc_frm_id] +
+ ps_ctxt->i4_top_row_chroma_stride;
+ }
+ }
+
+ /* +1 is to provision top left nbr */
+ ps_ctxt->i4_top_row_nbr_stride = (((u4_width + MAX_CU_SIZE) >> 2) + 1);
+ ps_ctxt->i4_frm_top_row_nbr_size =
+ ps_ctxt->i4_top_row_nbr_stride * (u4_ctb_rows_in_a_frame + 1);
+ for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
+ {
+ ps_ctxt->aps_frm_top_row_nbr[i4_enc_frm_id] =
+ (nbr_4x4_t *)ps_mem_tab[ENC_LOOP_TOP_NBR4X4].pv_base + 1 +
+ (ps_ctxt->i4_frm_top_row_nbr_size * i4_enc_frm_id * i4_num_bitrate_inst);
+ ps_ctxt->aps_frm_top_row_nbr[i4_enc_frm_id] += ps_ctxt->i4_top_row_nbr_stride;
+ }
+
+ num_cu_in_ctb = ctb_size / MIN_CU_SIZE;
+ num_cu_in_ctb *= num_cu_in_ctb;
+
+ /* pointer incremented by 1 row to avoid OOB access in 0th row */
+
+ /* Memory for CU level Coeff data buffer */
+ {
+ WORD32 i4_16byte_boundary_overshoot;
+ WORD32 buf_size_per_cu;
+ WORD32 buf_size_per_thread_wo_alignment_req;
+ WORD32 buf_size_per_thread;
+
+ buf_size_per_cu =
+ ((MAX_LUMA_COEFFS_CTB +
+ (MAX_CHRM_COEFFS_CTB << ((i4_chroma_format == IV_YUV_422SP_UV) ? 1 : 0))) +
+ 16) *
+ sizeof(UWORD8);
+ buf_size_per_thread_wo_alignment_req = buf_size_per_cu - 16 * sizeof(UWORD8);
+
+ {
+ buf_size_per_thread = buf_size_per_cu * (2);
+
+ for(i = 0; i < 2; i++)
+ {
+ ps_ctxt->as_cu_prms[i].pu1_cu_coeffs =
+ (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_COEFF_DATA].pv_base +
+ (ctr * buf_size_per_thread) + (i * buf_size_per_cu);
+
+ i4_16byte_boundary_overshoot =
+ ((LWORD64)ps_ctxt->as_cu_prms[i].pu1_cu_coeffs & 0xf);
+
+ ps_ctxt->as_cu_prms[i].pu1_cu_coeffs += (16 - i4_16byte_boundary_overshoot);
+ }
+ }
+
+ ps_ctxt->pu1_cu_recur_coeffs =
+ (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_COEFF_DATA].pv_base +
+ (ctr * buf_size_per_thread_wo_alignment_req);
+ }
+
+ /* Memory for CU dequant data buffer */
+ {
+ WORD32 buf_size_per_thread;
+ WORD32 i4_16byte_boundary_overshoot;
+
+ WORD32 buf_size_per_cu =
+ (((i4_chroma_format == IV_YUV_422SP_UV) ? (MAX_CU_SIZE * (MAX_CU_SIZE << 1))
+ : (MAX_CU_SIZE * (MAX_CU_SIZE >> 1) * 3)) +
+ 8) *
+ sizeof(WORD16);
+
+ {
+ buf_size_per_thread = buf_size_per_cu * 2;
+
+ for(i = 0; i < 2; i++)
+ {
+ ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs =
+ (WORD16
+ *)((UWORD8 *)ps_mem_tab[ENC_LOOP_CU_DEQUANT_DATA].pv_base + (ctr * buf_size_per_thread) + (i * buf_size_per_cu));
+
+ i4_16byte_boundary_overshoot =
+ ((LWORD64)ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs & 0xf);
+
+ ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs =
+ (WORD16
+ *)((UWORD8 *)ps_ctxt->as_cu_prms[i].pi2_cu_deq_coeffs + (16 - i4_16byte_boundary_overshoot));
+ }
+ }
+ }
+
+ /*------ Deblocking memory's pointers assignements starts ------*/
+
+ /*Assign stride = 4x4 blocks in horizontal edge*/
+ ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_strd = (MAX_CTB_SIZE / 4) * u4_ctb_in_a_row;
+
+ ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_size =
+ ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_strd * u4_ctb_rows_in_a_frame;
+
+ /*Assign frame level memory to store the Qp of
+ top 4x4 neighbours of each CTB row*/
+ for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
+ {
+ ps_ctxt->s_deblk_ctbrow_prms.api1_qp_top_4x4_ctb_row[i4_enc_frm_id] =
+ (WORD8 *)ps_mem_tab[ENC_LOOP_QP_TOP_4X4].pv_base +
+ (ps_ctxt->s_deblk_ctbrow_prms.u4_qp_top_4x4_buf_size * i4_num_bitrate_inst *
+ i4_enc_frm_id);
+ }
+
+ ps_ctxt->s_deblk_ctbrow_prms.pu4_ctb_row_bs_vert = (UWORD32 *)pu1_deblk_base;
+
+ ps_ctxt->s_deblk_ctbrow_prms.pu4_ctb_row_bs_horz =
+ (UWORD32 *)(pu1_deblk_base + (u4_size_bs_memory >> 1));
+
+ ps_ctxt->s_deblk_ctbrow_prms.pi1_ctb_row_qp = (WORD8 *)pu1_deblk_base + u4_size_bs_memory;
+
+ /*Assign stride = 4x4 blocks in horizontal edge*/
+ ps_ctxt->s_deblk_ctbrow_prms.u4_qp_buffer_stride = (MAX_CTB_SIZE / 4) * u4_ctb_in_a_row;
+
+ pu1_deblk_base += (u4_size_bs_memory + u4_size_qp_memory);
+
+ /*------Deblocking memory's pointers assignements ends ------*/
+
+ /*------SAO memory's pointer assignment starts------------*/
+ if(!is_hbd_mode)
+ {
+ /* 2 is added to allocate top left pixel */
+ ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size =
+ u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 1) * (num_vert_units + 1);
+ ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size =
+ u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 2) * (num_vert_units + 1);
+ ps_ctxt->s_sao_ctxt_t.i4_num_ctb_units =
+ num_vert_units * (u4_ctb_aligned_wd / MAX_CTB_SIZE);
+
+ for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
+ {
+ ps_ctxt->s_sao_ctxt_t.apu1_sao_src_frm_top_luma[i4_enc_frm_id] =
+ pu1_sao_base +
+ ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size +
+ ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size) *
+ i4_num_bitrate_inst * i4_enc_frm_id) + // move to the next frame_id
+ u4_ctb_aligned_wd +
+ 2;
+
+ ps_ctxt->s_sao_ctxt_t.apu1_sao_src_frm_top_chroma[i4_enc_frm_id] =
+ pu1_sao_base +
+ ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size +
+ ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size) *
+ i4_num_bitrate_inst * i4_enc_frm_id) +
+ +u4_ctb_aligned_ht + (u4_ctb_aligned_wd + 1) * (num_vert_units + 1) +
+ u4_ctb_aligned_wd + 4;
+
+ ps_ctxt->s_sao_ctxt_t.aps_frm_top_ctb_sao[i4_enc_frm_id] = (sao_enc_t *) (pu1_sao_base +
+ ((ps_ctxt->s_sao_ctxt_t.i4_top_luma_buf_size + ps_ctxt->s_sao_ctxt_t.i4_top_chroma_buf_size)
+ *i4_num_bitrate_inst*i4_num_enc_loop_frm_pllel) +
+ (ps_ctxt->s_sao_ctxt_t.i4_num_ctb_units * sizeof(sao_enc_t) *i4_num_bitrate_inst * i4_enc_frm_id));
+ }
+ ps_ctxt->s_sao_ctxt_t.i4_ctb_size =
+ (1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size);
+ ps_ctxt->s_sao_ctxt_t.u4_ctb_aligned_wd = u4_ctb_aligned_wd;
+ }
+
+ /*------SAO memory's pointer assignment ends------------*/
+
+ /* perform all one time initialisation here */
+ ps_ctxt->i4_nbr_map_strd = MAX_PU_IN_CTB_ROW + 1 + 8;
+
+ ps_ctxt->pu1_ctb_nbr_map = ps_ctxt->au1_nbr_ctb_map[0];
+
+ ps_ctxt->i4_deblock_type = ps_init_prms->s_coding_tools_prms.i4_deblocking_type;
+
+ /* move the pointer to 1,2 location */
+ ps_ctxt->pu1_ctb_nbr_map += ps_ctxt->i4_nbr_map_strd;
+ ps_ctxt->pu1_ctb_nbr_map++;
+
+ ps_ctxt->i4_cu_csbf_strd = MAX_TU_IN_CTB_ROW;
+
+ CREATE_SUBBLOCK2CSBFID_MAP(gai4_subBlock2csbfId_map4x4TU, 1, 4, ps_ctxt->i4_cu_csbf_strd);
+
+ CREATE_SUBBLOCK2CSBFID_MAP(gai4_subBlock2csbfId_map8x8TU, 4, 8, ps_ctxt->i4_cu_csbf_strd);
+
+ CREATE_SUBBLOCK2CSBFID_MAP(
+ gai4_subBlock2csbfId_map16x16TU, 16, 16, ps_ctxt->i4_cu_csbf_strd);
+
+ CREATE_SUBBLOCK2CSBFID_MAP(
+ gai4_subBlock2csbfId_map32x32TU, 64, 32, ps_ctxt->i4_cu_csbf_strd);
+
+ /* For both instance initialise the chroma dequant start idx */
+ ps_ctxt->as_cu_prms[0].i4_chrm_deq_coeff_strt_idx = (MAX_CU_SIZE * MAX_CU_SIZE);
+ ps_ctxt->as_cu_prms[1].i4_chrm_deq_coeff_strt_idx = (MAX_CU_SIZE * MAX_CU_SIZE);
+
+ /* initialise all the function pointer tables */
+ {
+ ps_ctxt->pv_inter_rdopt_cu_mc_mvp =
+ (pf_inter_rdopt_cu_mc_mvp)ihevce_inter_rdopt_cu_mc_mvp;
+
+ ps_ctxt->pv_inter_rdopt_cu_ntu = (pf_inter_rdopt_cu_ntu)ihevce_inter_rdopt_cu_ntu;
+
+#if ENABLE_RDO_BASED_TU_RECURSION
+ if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
+ {
+ ps_ctxt->pv_inter_rdopt_cu_ntu =
+ (pf_inter_rdopt_cu_ntu)ihevce_inter_tu_tree_selector_and_rdopt_cost_computer;
+ }
+#endif
+ ps_ctxt->pv_intra_chroma_pred_mode_selector =
+ (pf_intra_chroma_pred_mode_selector)ihevce_intra_chroma_pred_mode_selector;
+ ps_ctxt->pv_intra_rdopt_cu_ntu = (pf_intra_rdopt_cu_ntu)ihevce_intra_rdopt_cu_ntu;
+ ps_ctxt->pv_final_rdopt_mode_prcs =
+ (pf_final_rdopt_mode_prcs)ihevce_final_rdopt_mode_prcs;
+ ps_ctxt->pv_store_cu_results = (pf_store_cu_results)ihevce_store_cu_results;
+ ps_ctxt->pv_enc_loop_cu_bot_copy = (pf_enc_loop_cu_bot_copy)ihevce_enc_loop_cu_bot_copy;
+ ps_ctxt->pv_enc_loop_ctb_left_copy =
+ (pf_enc_loop_ctb_left_copy)ihevce_enc_loop_ctb_left_copy;
+
+ /* Memory assignments for chroma intra pred buffer */
+ {
+ WORD32 pred_buf_size =
+ MAX_TU_SIZE * MAX_TU_SIZE * 2 * ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
+ WORD32 pred_buf_size_per_thread =
+ NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD * pred_buf_size;
+ UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CHROMA_PRED_INTRA].pv_base +
+ (ctr * pred_buf_size_per_thread);
+
+ for(i = 0; i < NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD; i++)
+ {
+ ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[i].pv_pred_data = pu1_base;
+ pu1_base += pred_buf_size;
+ }
+ }
+
+ /* Memory assignments for reference substitution output */
+ {
+ WORD32 pred_buf_size = ((MAX_TU_SIZE * 2 * 2) + 4);
+ WORD32 pred_buf_size_per_thread = pred_buf_size;
+ UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_REF_SUB_OUT].pv_base +
+ (ctr * pred_buf_size_per_thread);
+
+ ps_ctxt->pv_ref_sub_out = pu1_base;
+ }
+
+ /* Memory assignments for reference filtering output */
+ {
+ WORD32 pred_buf_size = ((MAX_TU_SIZE * 2 * 2) + 4);
+ WORD32 pred_buf_size_per_thread = pred_buf_size;
+ UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_REF_FILT_OUT].pv_base +
+ (ctr * pred_buf_size_per_thread);
+
+ ps_ctxt->pv_ref_filt_out = pu1_base;
+ }
+
+ /* Memory assignments for recon storage during CU Recursion */
+#if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
+ if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
+#endif
+ {
+ {
+ WORD32 pred_buf_size = (MAX_CU_SIZE * MAX_CU_SIZE);
+ WORD32 pred_buf_size_per_thread = pred_buf_size;
+ UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_RECON].pv_base +
+ (ctr * pred_buf_size_per_thread);
+
+ ps_ctxt->pv_cu_luma_recon = pu1_base;
+ }
+
+ {
+ WORD32 pred_buf_size = ((MAX_CU_SIZE * MAX_CU_SIZE) >> 1) *
+ ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
+ WORD32 pred_buf_size_per_thread = pred_buf_size;
+ UWORD8 *pu1_base =
+ (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_RECON].pv_base +
+ (ctr * pred_buf_size_per_thread);
+
+ ps_ctxt->pv_cu_chrma_recon = pu1_base;
+ }
+ }
+
+ /* Memory assignments for pred storage during CU Recursion */
+#if !PROCESS_GT_1CTB_VIA_CU_RECUR_IN_FAST_PRESETS
+ if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0)
+#endif
+ {
+ {
+ WORD32 pred_buf_size = (MAX_CU_SIZE * MAX_CU_SIZE);
+ WORD32 pred_buf_size_per_thread = pred_buf_size;
+ UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_LUMA_PRED].pv_base +
+ (ctr * pred_buf_size_per_thread);
+
+ ps_ctxt->pv_CTB_pred_luma = pu1_base;
+ }
+
+ {
+ WORD32 pred_buf_size = ((MAX_CU_SIZE * MAX_CU_SIZE) >> 1) *
+ ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
+ WORD32 pred_buf_size_per_thread = pred_buf_size;
+ UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_CU_RECUR_CHROMA_PRED].pv_base +
+ (ctr * pred_buf_size_per_thread);
+
+ ps_ctxt->pv_CTB_pred_chroma = pu1_base;
+ }
+ }
+
+ /* Memory assignments for CTB left luma data storage */
+ {
+ WORD32 pred_buf_size = (MAX_CTB_SIZE + MAX_TU_SIZE);
+ WORD32 pred_buf_size_per_thread = pred_buf_size;
+ UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_LEFT_LUMA_DATA].pv_base +
+ (ctr * pred_buf_size_per_thread);
+
+ ps_ctxt->pv_left_luma_data = pu1_base;
+ }
+
+ /* Memory assignments for CTB left chroma data storage */
+ {
+ WORD32 pred_buf_size =
+ (MAX_CTB_SIZE + MAX_TU_SIZE) * ((i4_chroma_format == IV_YUV_422SP_UV) ? 2 : 1);
+ WORD32 pred_buf_size_per_thread = pred_buf_size;
+ UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_LEFT_CHROMA_DATA].pv_base +
+ (ctr * pred_buf_size_per_thread);
+
+ ps_ctxt->pv_left_chrm_data = pu1_base;
+ }
+ }
+
+ /* Memory for inter pred buffers */
+ {
+ WORD32 i4_num_bufs_per_thread;
+
+ WORD32 i4_buf_size_per_cand =
+ (MAX_CTB_SIZE) * (MAX_CTB_SIZE) *
+ ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) * sizeof(UWORD8);
+
+ i4_num_bufs_per_thread =
+ (ps_mem_tab[ENC_LOOP_INTER_PRED].i4_mem_size / i4_num_proc_thrds) /
+ i4_buf_size_per_cand;
+
+ ps_ctxt->i4_max_num_inter_rdopt_cands = i4_num_bufs_per_thread - 4;
+
+ ps_ctxt->s_pred_buf_data.u4_is_buf_in_use = UINT_MAX;
+
+ {
+ UWORD8 *pu1_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_INTER_PRED].pv_base +
+ +(ctr * i4_buf_size_per_cand * i4_num_bufs_per_thread);
+
+ for(i = 0; i < i4_num_bufs_per_thread; i++)
+ {
+ ps_ctxt->s_pred_buf_data.apv_inter_pred_data[i] =
+ pu1_base + i * i4_buf_size_per_cand;
+ ps_ctxt->s_pred_buf_data.u4_is_buf_in_use ^= (1 << i);
+ }
+ }
+ }
+
+ /* Memory required to store pred for 422 chroma */
+ if(i4_chroma_format == IV_YUV_422SP_UV)
+ {
+ WORD32 pred_buf_size = MAX_CTB_SIZE * MAX_CTB_SIZE * 2;
+ WORD32 pred_buf_size_per_thread =
+ pred_buf_size * ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) ? 2 : 1) *
+ sizeof(UWORD8);
+ void *pv_base = (UWORD8 *)ps_mem_tab[ENC_LOOP_422_CHROMA_INTRA_PRED].pv_base +
+ (ctr * pred_buf_size_per_thread);
+
+ ps_ctxt->pv_422_chroma_intra_pred_buf = pv_base;
+ }
+ else
+ {
+ ps_ctxt->pv_422_chroma_intra_pred_buf = NULL;
+ }
+
+ /* Memory for Recon Datastore (Used around and within the RDOPT loop) */
+ {
+ WORD32 i4_lumaBufSize = MAX_CU_SIZE * MAX_CU_SIZE;
+ WORD32 i4_chromaBufSize =
+ MAX_CU_SIZE * (MAX_CU_SIZE / 2) * ((i4_chroma_format == IV_YUV_422SP_UV) + 1);
+ WORD32 i4_memSize_perThread = ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].i4_mem_size /
+ (i4_num_proc_thrds * sizeof(UWORD8) * (is_hbd_mode + 1));
+ WORD32 i4_quality_preset = ps_ctxt->i4_quality_preset;
+ {
+ UWORD8 *pu1_mem_base =
+ (((UWORD8 *)ps_mem_tab[ENC_LOOP_RECON_DATA_STORE].pv_base) +
+ ctr * i4_memSize_perThread);
+
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_luma_recon_bufs[0] =
+ pu1_mem_base + i4_lumaBufSize * 0;
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_luma_recon_bufs[1] =
+ pu1_mem_base + i4_lumaBufSize * 1;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_luma_recon_bufs[0] =
+ pu1_mem_base + i4_lumaBufSize * 2;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_luma_recon_bufs[1] =
+ pu1_mem_base + i4_lumaBufSize * 3;
+
+ pu1_mem_base += i4_lumaBufSize * 4;
+
+ switch(i4_quality_preset)
+ {
+ case IHEVCE_QUALITY_P0:
+ {
+#if ENABLE_CHROMA_RDOPT_EVAL_IN_PQ
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
+ pu1_mem_base + i4_chromaBufSize * 0;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
+ pu1_mem_base + i4_chromaBufSize * 1;
+#else
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
+#endif
+
+#if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
+ pu1_mem_base + i4_chromaBufSize * 2;
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
+ pu1_mem_base + i4_chromaBufSize * 3;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
+ pu1_mem_base + i4_chromaBufSize * 2;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
+ pu1_mem_base + i4_chromaBufSize * 3;
+#else
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
+#endif
+
+ break;
+ }
+ case IHEVCE_QUALITY_P2:
+ {
+#if ENABLE_CHROMA_RDOPT_EVAL_IN_HQ
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
+ pu1_mem_base + i4_chromaBufSize * 0;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
+ pu1_mem_base + i4_chromaBufSize * 1;
+#else
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
+#endif
+
+#if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
+ pu1_mem_base + i4_chromaBufSize * 2;
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
+ pu1_mem_base + i4_chromaBufSize * 3;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
+ pu1_mem_base + i4_chromaBufSize * 2;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
+ pu1_mem_base + i4_chromaBufSize * 3;
+#else
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
+#endif
+
+ break;
+ }
+ case IHEVCE_QUALITY_P3:
+ {
+#if ENABLE_CHROMA_RDOPT_EVAL_IN_MS
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
+ pu1_mem_base + i4_chromaBufSize * 0;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
+ pu1_mem_base + i4_chromaBufSize * 1;
+#else
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
+#endif
+
+#if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
+ pu1_mem_base + i4_chromaBufSize * 2;
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
+ pu1_mem_base + i4_chromaBufSize * 3;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
+ pu1_mem_base + i4_chromaBufSize * 2;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
+ pu1_mem_base + i4_chromaBufSize * 3;
+#else
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
+#endif
+
+ break;
+ }
+ case IHEVCE_QUALITY_P4:
+ {
+#if ENABLE_CHROMA_RDOPT_EVAL_IN_HS
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
+ pu1_mem_base + i4_chromaBufSize * 0;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
+ pu1_mem_base + i4_chromaBufSize * 1;
+#else
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
+#endif
+
+#if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
+ pu1_mem_base + i4_chromaBufSize * 2;
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
+ pu1_mem_base + i4_chromaBufSize * 3;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
+ pu1_mem_base + i4_chromaBufSize * 2;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
+ pu1_mem_base + i4_chromaBufSize * 3;
+#else
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
+#endif
+
+ break;
+ }
+ case IHEVCE_QUALITY_P5:
+ {
+#if ENABLE_CHROMA_RDOPT_EVAL_IN_XS
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] =
+ pu1_mem_base + i4_chromaBufSize * 0;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] =
+ pu1_mem_base + i4_chromaBufSize * 1;
+#else
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[0] = NULL;
+#endif
+
+#if ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] =
+ pu1_mem_base + i4_chromaBufSize * 2;
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] =
+ pu1_mem_base + i4_chromaBufSize * 3;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] =
+ pu1_mem_base + i4_chromaBufSize * 2;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] =
+ pu1_mem_base + i4_chromaBufSize * 3;
+#else
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[1] = NULL;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.apv_chroma_recon_bufs[2] = NULL;
+#endif
+
+ break;
+ }
+ }
+ }
+
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.i4_lumaRecon_stride = MAX_CU_SIZE;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.i4_lumaRecon_stride = MAX_CU_SIZE;
+ ps_ctxt->as_cu_prms[0].s_recon_datastore.i4_chromaRecon_stride = MAX_CU_SIZE;
+ ps_ctxt->as_cu_prms[1].s_recon_datastore.i4_chromaRecon_stride = MAX_CU_SIZE;
+
+ } /* Recon Datastore */
+
+ /****************************************************/
+ /****************************************************/
+ /* ps_pps->i1_sign_data_hiding_flag == UNHIDDEN */
+ /* when NO_SBH. else HIDDEN */
+ /****************************************************/
+ /****************************************************/
+ /* Zero cbf tool is enabled by default for all presets */
+ ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
+
+ if(ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3)
+ {
+ ps_ctxt->i4_quant_rounding_level = CU_LEVEL_QUANT_ROUNDING;
+ ps_ctxt->i4_chroma_quant_rounding_level = CHROMA_QUANT_ROUNDING;
+ ps_ctxt->i4_rdoq_level = ALL_CAND_RDOQ;
+ ps_ctxt->i4_sbh_level = ALL_CAND_SBH;
+ }
+ else if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P3)
+ {
+ ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
+ ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
+ ps_ctxt->i4_rdoq_level = NO_RDOQ;
+ ps_ctxt->i4_sbh_level = NO_SBH;
+ }
+ else
+ {
+ ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
+ ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
+ ps_ctxt->i4_rdoq_level = NO_RDOQ;
+ ps_ctxt->i4_sbh_level = NO_SBH;
+ }
+
+#if DISABLE_QUANT_ROUNDING
+ ps_ctxt->i4_quant_rounding_level = FIXED_QUANT_ROUNDING;
+ ps_ctxt->i4_chroma_quant_rounding_level = FIXED_QUANT_ROUNDING;
+#endif
+ /*Disabling RDOQ only when spatial modulation is enabled
+ as RDOQ degrades visual quality*/
+ if(ps_init_prms->s_config_prms.i4_cu_level_rc & 1)
+ {
+ ps_ctxt->i4_rdoq_level = NO_RDOQ;
+ }
+
+#if DISABLE_RDOQ
+ ps_ctxt->i4_rdoq_level = NO_RDOQ;
+#endif
+
+#if DISABLE_SBH
+ ps_ctxt->i4_sbh_level = NO_SBH;
+#endif
+
+ /*Rounding factor calc based on previous cabac states */
+
+ ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[0] = &ps_ctxt->i4_quant_round_4x4[0][0];
+ ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[1] = &ps_ctxt->i4_quant_round_8x8[0][0];
+ ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[2] = &ps_ctxt->i4_quant_round_16x16[0][0];
+ ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[4] = &ps_ctxt->i4_quant_round_32x32[0][0];
+
+ ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[0] = &ps_ctxt->i4_quant_round_4x4[1][0];
+ ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[1] = &ps_ctxt->i4_quant_round_8x8[1][0];
+ ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[2] = &ps_ctxt->i4_quant_round_16x16[1][0];
+ ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[4] = &ps_ctxt->i4_quant_round_32x32[1][0];
+
+ ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[0] = &ps_ctxt->i4_quant_round_cr_4x4[0][0];
+ ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[1] = &ps_ctxt->i4_quant_round_cr_8x8[0][0];
+ ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[2] = &ps_ctxt->i4_quant_round_cr_16x16[0][0];
+
+ ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[0] = &ps_ctxt->i4_quant_round_cr_4x4[1][0];
+ ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[1] = &ps_ctxt->i4_quant_round_cr_8x8[1][0];
+ ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[2] = &ps_ctxt->i4_quant_round_cr_16x16[1][0];
+
+ /****************************************************************************************/
+ /* Setting the perform rdoq and sbh flags appropriately */
+ /****************************************************************************************/
+ {
+ /******************************************/
+ /* For best cand rdoq and/or sbh */
+ /******************************************/
+ ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq =
+ (ps_ctxt->i4_rdoq_level == BEST_CAND_RDOQ);
+ /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
+ we would have to do RDOQ again.*/
+ ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq =
+ ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq ||
+ ((BEST_CAND_SBH == ps_ctxt->i4_sbh_level) &&
+ (ALL_CAND_RDOQ == ps_ctxt->i4_rdoq_level));
+
+ ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh =
+ (ps_ctxt->i4_sbh_level == BEST_CAND_SBH);
+
+ /* SBH should be performed if
+ a) i4_sbh_level is BEST_CAND_SBH.
+ b) For all quality presets above medium speed(i.e. high speed and extreme speed) and
+ if SBH has to be done because for these presets the quant, iquant and scan coeff
+ data are calculated in this function and not during the RDOPT stage*/
+
+ /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
+ ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh =
+ ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh ||
+ ((BEST_CAND_RDOQ == ps_ctxt->i4_rdoq_level) &&
+ (ALL_CAND_SBH == ps_ctxt->i4_sbh_level));
+
+ /******************************************/
+ /* For all cand rdoq and/or sbh */
+ /******************************************/
+ ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq =
+ (ps_ctxt->i4_rdoq_level == ALL_CAND_RDOQ);
+ ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh =
+ (ps_ctxt->i4_sbh_level == ALL_CAND_SBH);
+ ps_ctxt->s_rdoq_sbh_ctxt.i4_bit_depth =
+ ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth;
+ }
+
+ if(!is_hbd_mode)
+ {
+ if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 1)
+ {
+ if(ps_ctxt->i4_rdoq_level == NO_RDOQ)
+ {
+ ps_ctxt->apf_quant_iquant_ssd[0] =
+ ps_func_selector->ihevc_quant_iquant_ssd_fptr;
+ ps_ctxt->apf_quant_iquant_ssd[2] = ps_func_selector->ihevc_quant_iquant_fptr;
+ }
+ else
+ {
+ ps_ctxt->apf_quant_iquant_ssd[0] =
+ ps_func_selector->ihevc_quant_iquant_ssd_rdoq_fptr;
+ ps_ctxt->apf_quant_iquant_ssd[2] =
+ ps_func_selector->ihevc_quant_iquant_rdoq_fptr;
+ }
+
+ /*If coef level RDOQ is enabled, quantization based on corr. error to be done */
+ if(ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING)
+ {
+ ps_ctxt->apf_quant_iquant_ssd[1] =
+ ps_func_selector->ihevc_q_iq_ssd_var_rnd_fact_fptr;
+ ps_ctxt->apf_quant_iquant_ssd[3] =
+ ps_func_selector->ihevc_q_iq_var_rnd_fact_fptr;
+ }
+ else
+ {
+ ps_ctxt->apf_quant_iquant_ssd[1] =
+ ps_func_selector->ihevc_quant_iquant_ssd_fptr;
+ ps_ctxt->apf_quant_iquant_ssd[3] = ps_func_selector->ihevc_quant_iquant_fptr;
+ }
+ }
+ else if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 0)
+ {
+ if(ps_ctxt->i4_rdoq_level == NO_RDOQ)
+ {
+ ps_ctxt->apf_quant_iquant_ssd[0] =
+ ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_fptr;
+ ps_ctxt->apf_quant_iquant_ssd[2] =
+ ps_func_selector->ihevc_quant_iquant_flat_scale_mat_fptr;
+ }
+ else
+ {
+ ps_ctxt->apf_quant_iquant_ssd[0] =
+ ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_fptr;
+ ps_ctxt->apf_quant_iquant_ssd[2] =
+ ps_func_selector->ihevc_quant_iquant_flat_scale_mat_rdoq_fptr;
+ }
+
+ /*If coef level RDOQ is enabled, quantization based on corr. error to be done */
+ if(ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING)
+ {
+ ps_ctxt->apf_quant_iquant_ssd[1] =
+ ps_func_selector->ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_fptr;
+ ps_ctxt->apf_quant_iquant_ssd[3] =
+ ps_func_selector->ihevc_q_iq_flat_scale_mat_var_rnd_fact_fptr;
+ }
+ else
+ {
+ ps_ctxt->apf_quant_iquant_ssd[1] =
+ ps_func_selector->ihevc_quant_iquant_ssd_flat_scale_mat_fptr;
+ ps_ctxt->apf_quant_iquant_ssd[3] =
+ ps_func_selector->ihevc_quant_iquant_flat_scale_mat_fptr;
+ }
+ }
+
+ ps_ctxt->s_sao_ctxt_t.apf_sao_luma[0] =
+ ps_func_selector->ihevc_sao_edge_offset_class0_fptr;
+ ps_ctxt->s_sao_ctxt_t.apf_sao_luma[1] =
+ ps_func_selector->ihevc_sao_edge_offset_class1_fptr;
+ ps_ctxt->s_sao_ctxt_t.apf_sao_luma[2] =
+ ps_func_selector->ihevc_sao_edge_offset_class2_fptr;
+ ps_ctxt->s_sao_ctxt_t.apf_sao_luma[3] =
+ ps_func_selector->ihevc_sao_edge_offset_class3_fptr;
+
+ ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[0] =
+ ps_func_selector->ihevc_sao_edge_offset_class0_chroma_fptr;
+ ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[1] =
+ ps_func_selector->ihevc_sao_edge_offset_class1_chroma_fptr;
+ ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[2] =
+ ps_func_selector->ihevc_sao_edge_offset_class2_chroma_fptr;
+ ps_ctxt->s_sao_ctxt_t.apf_sao_chroma[3] =
+ ps_func_selector->ihevc_sao_edge_offset_class3_chroma_fptr;
+
+ ps_ctxt->apf_it_recon[0] = ps_func_selector->ihevc_itrans_recon_4x4_ttype1_fptr;
+ ps_ctxt->apf_it_recon[1] = ps_func_selector->ihevc_itrans_recon_4x4_fptr;
+ ps_ctxt->apf_it_recon[2] = ps_func_selector->ihevc_itrans_recon_8x8_fptr;
+ ps_ctxt->apf_it_recon[3] = ps_func_selector->ihevc_itrans_recon_16x16_fptr;
+ ps_ctxt->apf_it_recon[4] = ps_func_selector->ihevc_itrans_recon_32x32_fptr;
+
+ ps_ctxt->apf_chrm_it_recon[0] = ps_func_selector->ihevc_chroma_itrans_recon_4x4_fptr;
+ ps_ctxt->apf_chrm_it_recon[1] = ps_func_selector->ihevc_chroma_itrans_recon_8x8_fptr;
+ ps_ctxt->apf_chrm_it_recon[2] = ps_func_selector->ihevc_chroma_itrans_recon_16x16_fptr;
+
+ ps_ctxt->apf_resd_trns[0] = ps_func_selector->ihevc_resi_trans_4x4_ttype1_fptr;
+ ps_ctxt->apf_resd_trns[1] = ps_func_selector->ihevc_resi_trans_4x4_fptr;
+ ps_ctxt->apf_resd_trns[2] = ps_func_selector->ihevc_resi_trans_8x8_fptr;
+ ps_ctxt->apf_resd_trns[3] = ps_func_selector->ihevc_resi_trans_16x16_fptr;
+ ps_ctxt->apf_resd_trns[4] = ps_func_selector->ihevc_resi_trans_32x32_fptr;
+
+ ps_ctxt->apf_chrm_resd_trns[0] = ps_func_selector->ihevc_resi_trans_4x4_fptr;
+ ps_ctxt->apf_chrm_resd_trns[1] = ps_func_selector->ihevc_resi_trans_8x8_fptr;
+ ps_ctxt->apf_chrm_resd_trns[2] = ps_func_selector->ihevc_resi_trans_16x16_fptr;
+
+ ps_ctxt->apf_lum_ip[IP_FUNC_MODE_0] =
+ ps_func_selector->ihevc_intra_pred_luma_planar_fptr;
+ ps_ctxt->apf_lum_ip[IP_FUNC_MODE_1] = ps_func_selector->ihevc_intra_pred_luma_dc_fptr;
+ ps_ctxt->apf_lum_ip[IP_FUNC_MODE_2] =
+ ps_func_selector->ihevc_intra_pred_luma_mode2_fptr;
+ ps_ctxt->apf_lum_ip[IP_FUNC_MODE_3TO9] =
+ ps_func_selector->ihevc_intra_pred_luma_mode_3_to_9_fptr;
+ ps_ctxt->apf_lum_ip[IP_FUNC_MODE_10] =
+ ps_func_selector->ihevc_intra_pred_luma_horz_fptr;
+ ps_ctxt->apf_lum_ip[IP_FUNC_MODE_11TO17] =
+ ps_func_selector->ihevc_intra_pred_luma_mode_11_to_17_fptr;
+ ps_ctxt->apf_lum_ip[IP_FUNC_MODE_18_34] =
+ ps_func_selector->ihevc_intra_pred_luma_mode_18_34_fptr;
+ ps_ctxt->apf_lum_ip[IP_FUNC_MODE_19TO25] =
+ ps_func_selector->ihevc_intra_pred_luma_mode_19_to_25_fptr;
+ ps_ctxt->apf_lum_ip[IP_FUNC_MODE_26] = ps_func_selector->ihevc_intra_pred_luma_ver_fptr;
+ ps_ctxt->apf_lum_ip[IP_FUNC_MODE_27TO33] =
+ ps_func_selector->ihevc_intra_pred_luma_mode_27_to_33_fptr;
+
+ ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_0] =
+ ps_func_selector->ihevc_intra_pred_chroma_planar_fptr;
+ ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_1] =
+ ps_func_selector->ihevc_intra_pred_chroma_dc_fptr;
+ ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_2] =
+ ps_func_selector->ihevc_intra_pred_chroma_mode2_fptr;
+ ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_3TO9] =
+ ps_func_selector->ihevc_intra_pred_chroma_mode_3_to_9_fptr;
+ ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_10] =
+ ps_func_selector->ihevc_intra_pred_chroma_horz_fptr;
+ ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_11TO17] =
+ ps_func_selector->ihevc_intra_pred_chroma_mode_11_to_17_fptr;
+ ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_18_34] =
+ ps_func_selector->ihevc_intra_pred_chroma_mode_18_34_fptr;
+ ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_19TO25] =
+ ps_func_selector->ihevc_intra_pred_chroma_mode_19_to_25_fptr;
+ ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_26] =
+ ps_func_selector->ihevc_intra_pred_chroma_ver_fptr;
+ ps_ctxt->apf_chrm_ip[IP_FUNC_MODE_27TO33] =
+ ps_func_selector->ihevc_intra_pred_chroma_mode_27_to_33_fptr;
+
+ ps_ctxt->apf_chrm_resd_trns_had[0] =
+ (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_4x4_8bit;
+ ps_ctxt->apf_chrm_resd_trns_had[1] =
+ (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_8x8_8bit;
+ ps_ctxt->apf_chrm_resd_trns_had[2] =
+ (pf_res_trans_luma_had_chroma)ps_ctxt->s_cmn_opt_func.pf_chroma_HAD_16x16_8bit;
+ }
+
+ if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 0)
+ {
+ /* initialise the scale & rescale matricies */
+ ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
+ ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
+ ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_flat_scale_mat_8x8[0];
+ ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_flat_scale_mat_16x16[0];
+ ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_flat_scale_mat_32x32[0];
+ /*init for inter matrix*/
+ ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
+ ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
+ ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_flat_scale_mat_8x8[0];
+ ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_flat_scale_mat_16x16[0];
+ ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_flat_scale_mat_32x32[0];
+
+ /*init for rescale matrix*/
+ ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
+ ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
+ ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0];
+ ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0];
+ ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0];
+ /*init for rescale inter matrix*/
+ ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
+ ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
+ ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0];
+ ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0];
+ ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0];
+ }
+ else if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 1)
+ {
+ /* initialise the scale & rescale matricies */
+ ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
+ ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
+ ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_intra_default_scale_mat_8x8[0];
+ ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_intra_default_scale_mat_16x16[0];
+ ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_intra_default_scale_mat_32x32[0];
+ /*init for inter matrix*/
+ ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
+ ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
+ ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_inter_default_scale_mat_8x8[0];
+ ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_inter_default_scale_mat_16x16[0];
+ ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_inter_default_scale_mat_32x32[0];
+
+ /*init for rescale matrix*/
+ ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
+ ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
+ ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_intra_default_rescale_mat_8x8[0];
+ ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_intra_default_rescale_mat_16x16[0];
+ ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_intra_default_rescale_mat_32x32[0];
+ /*init for rescale inter matrix*/
+ ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
+ ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
+ ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_inter_default_rescale_mat_8x8[0];
+ ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_inter_default_rescale_mat_16x16[0];
+ ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_inter_default_rescale_mat_32x32[0];
+ }
+ else
+ {
+ ASSERT(0);
+ }
+
+ /* Not recomputing Luma pred-data and header data for any preset now */
+ ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 0;
+ ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 0;
+ ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 1;
+
+ switch(ps_ctxt->i4_quality_preset)
+ {
+ case IHEVCE_QUALITY_P0:
+ {
+ ps_ctxt->i4_max_merge_candidates = 5;
+ ps_ctxt->i4_use_satd_for_merge_eval = 1;
+ ps_ctxt->u1_use_top_at_ctb_boundary = 1;
+ ps_ctxt->u1_use_early_cbf_data = 0;
+ ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_PQ;
+ ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
+ ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_PQ;
+
+ break;
+ }
+ case IHEVCE_QUALITY_P2:
+ {
+ ps_ctxt->i4_max_merge_candidates = 5;
+ ps_ctxt->i4_use_satd_for_merge_eval = 1;
+ ps_ctxt->u1_use_top_at_ctb_boundary = 1;
+ ps_ctxt->u1_use_early_cbf_data = 0;
+
+ ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_HQ;
+ ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
+ ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HQ;
+
+ break;
+ }
+ case IHEVCE_QUALITY_P3:
+ {
+ ps_ctxt->i4_max_merge_candidates = 3;
+ ps_ctxt->i4_use_satd_for_merge_eval = 1;
+ ps_ctxt->u1_use_top_at_ctb_boundary = 0;
+
+ ps_ctxt->u1_use_early_cbf_data = 0;
+ ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_MS;
+ ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
+ ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_MS;
+
+ break;
+ }
+ case IHEVCE_QUALITY_P4:
+ {
+ ps_ctxt->i4_max_merge_candidates = 2;
+ ps_ctxt->i4_use_satd_for_merge_eval = 1;
+ ps_ctxt->u1_use_top_at_ctb_boundary = 0;
+ ps_ctxt->u1_use_early_cbf_data = 0;
+ ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_HS;
+ ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
+ ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_HS;
+
+ break;
+ }
+ case IHEVCE_QUALITY_P5:
+ {
+ ps_ctxt->i4_max_merge_candidates = 2;
+ ps_ctxt->i4_use_satd_for_merge_eval = 0;
+ ps_ctxt->u1_use_top_at_ctb_boundary = 0;
+ ps_ctxt->u1_use_early_cbf_data = 0;
+ ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt = ENABLE_CHROMA_RDOPT_EVAL_IN_XS;
+ ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd =
+ ENABLE_ADDITIONAL_CHROMA_MODES_EVAL_IN_XS;
+
+ break;
+ }
+ case IHEVCE_QUALITY_P6:
+ {
+ ps_ctxt->i4_max_merge_candidates = 2;
+ ps_ctxt->i4_use_satd_for_merge_eval = 0;
+ ps_ctxt->u1_use_top_at_ctb_boundary = 0;
+ ps_ctxt->u1_use_early_cbf_data = EARLY_CBF_ON;
+ break;
+ }
+ default:
+ {
+ ASSERT(0);
+ }
+ }
+
+#if DISABLE_SKIP_AND_MERGE_EVAL
+ ps_ctxt->i4_max_merge_candidates = 0;
+#endif
+
+ ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data =
+ !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
+
+ /*initialize memory for RC related parameters required/populated by enc_loop */
+ /* the allocated memory is distributed as follows assuming encoder is running for 3 bit-rate instnaces
+ |-------|-> Thread 0, instance 0
+ | |
+ | |
+ | |
+ |-------|-> thread 0, instance 1
+ | |
+ | |
+ | |
+ |-------|-> thread 0, intance 2
+ | |
+ | |
+ | |
+ |-------|-> thread 1, instance 0
+ | |
+ | |
+ | |
+ |-------|-> thread 1, instance 1
+ | |
+ | |
+ | |
+ |-------|-> thread 1, instance 2
+ ... ...
+
+ Each theard will collate the data corresponding to the bit-rate instnace it's running at the appropriate place.
+ Finally, one thread will become master and collate the data from all the threads */
+ for(i4_enc_frm_id = 0; i4_enc_frm_id < i4_num_enc_loop_frm_pllel; i4_enc_frm_id++)
+ {
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i] = ps_enc_loop_rc_params;
+ ps_enc_loop_rc_params++;
+ }
+ }
+ /* Non-Luma modes for Chroma are evaluated only in HIGH QUALITY preset */
+
+#if !ENABLE_SEPARATE_LUMA_CHROMA_INTRA_MODE
+ ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd = 0;
+#endif
+
+ ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU].i4_iq_buff_stride =
+ MAX_TU_SIZE;
+ ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU_DIV2].i4_iq_buff_stride =
+ MAX_TU_SIZE;
+ /*Multiplying by two to account for interleaving of cb and cr*/
+ ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU].i4_pred_stride = MAX_TU_SIZE
+ << 1;
+ ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[TU_EQ_CU_DIV2].i4_pred_stride =
+ MAX_TU_SIZE << 1;
+
+ /* Memory for a frame level memory to store tile-id */
+ /* corresponding to each CTB of frame */
+ ps_ctxt->pi4_offset_for_last_cu_qp = &ps_master_ctxt->ai4_offset_for_last_cu_qp[0];
+
+ ps_ctxt->i4_qp_mod = ps_init_prms->s_config_prms.i4_cu_level_rc & 1;
+ /* psy rd strength is a run time parametr control by bit field 5-7 in the VQET field.*/
+ /* we disable psyrd if the the psy strength is zero or the BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER field is not set */
+ if(ps_init_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER))
+ {
+ UWORD32 psy_strength;
+ UWORD32 psy_strength_mask =
+ 224; // only bits 5,6,7 are ones. These three bits represent the psy strength
+ psy_strength = ps_init_prms->s_coding_tools_prms.i4_vqet & psy_strength_mask;
+ ps_ctxt->u1_enable_psyRDOPT = 1;
+ ps_ctxt->u4_psy_strength = psy_strength >> BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1;
+ if(psy_strength == 0)
+ {
+ ps_ctxt->u1_enable_psyRDOPT = 0;
+ ps_ctxt->u4_psy_strength = 0;
+ }
+ }
+
+ ps_ctxt->u1_is_stasino_enabled =
+ ((ps_init_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
+ (ps_init_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)));
+
+ ps_ctxt->u1_max_inter_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_nI;
+ ps_ctxt->u1_max_intra_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_I;
+ ps_ctxt++;
+ }
+ /* Store Tile params base into EncLoop Master context */
+ ps_master_ctxt->pv_tile_params_base = (void *)ps_tile_params_base;
+
+ if(1 == ps_tile_params_base->i4_tiles_enabled_flag)
+ {
+ i4_num_tile_cols = ps_tile_params_base->i4_num_tile_cols;
+ }
+
+ /* Updating ai4_offset_for_last_cu_qp[] array for all tile-colums of frame */
+ /* Loop over all tile-cols in frame */
+ for(ctr = 0; ctr < i4_num_tile_cols; ctr++)
+ {
+ WORD32 i4_tile_col_wd_in_ctb_unit =
+ (ps_tile_params_base + ctr)->i4_curr_tile_wd_in_ctb_unit;
+ WORD32 offset_x;
+
+ if(ctr == (i4_num_tile_cols - 1))
+ { /* Last tile-row of frame */
+ WORD32 min_cu_size = 1 << ps_init_prms->s_config_prms.i4_min_log2_cu_size;
+
+ WORD32 cu_aligned_pic_wd =
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
+ SET_CTB_ALIGN(
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width,
+ min_cu_size);
+
+ WORD32 last_hz_ctb_wd = MAX_CTB_SIZE - (u4_width - cu_aligned_pic_wd);
+
+ offset_x = (i4_tile_col_wd_in_ctb_unit - 1) * MAX_CTB_SIZE;
+ offset_x += last_hz_ctb_wd;
+ }
+ else
+ { /* Not the last tile-row of frame */
+ offset_x = (i4_tile_col_wd_in_ctb_unit)*MAX_CTB_SIZE;
+ }
+
+ offset_x /= 4;
+ offset_x -= 1;
+
+ ps_master_ctxt->ai4_offset_for_last_cu_qp[ctr] = offset_x;
+ }
+
+ n_tabs = NUM_ENC_LOOP_MEM_RECS;
+
+ /*store num bit-rate instances in the master context */
+ ps_master_ctxt->i4_num_bitrates = i4_num_bitrate_inst;
+ ps_master_ctxt->i4_num_enc_loop_frm_pllel = i4_num_enc_loop_frm_pllel;
+ /*************************************************************************/
+ /* --- EncLoop Deblock sync Dep Mngr Mem init -- */
+ /*************************************************************************/
+ {
+ WORD32 count;
+ WORD32 num_vert_units, num_blks_in_row;
+ WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
+ WORD32 wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
+
+ ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
+ ihevce_enc_loop_dblk_get_prms_dep_mngr(wd, &num_blks_in_row);
+ ASSERT(num_vert_units > 0);
+ ASSERT(num_blks_in_row > 0);
+
+ for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
+ {
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[count][i] = ihevce_dmgr_init(
+ &ps_mem_tab[n_tabs],
+ pv_osal_handle,
+ DEP_MNGR_ROW_ROW_SYNC,
+ num_vert_units,
+ num_blks_in_row,
+ i4_num_tile_cols, /* Number of Col Tiles */
+ i4_num_proc_thrds,
+ 0 /*Sem Disabled*/
+ );
+
+ n_tabs += ihevce_dmgr_get_num_mem_recs();
+ }
+ }
+ }
+ /*************************************************************************/
+ /* --- EncLoop Top-Right CU synnc Dep Mngr Mem init -- */
+ /*************************************************************************/
+ {
+ WORD32 count;
+ WORD32 num_vert_units, num_blks_in_row;
+ WORD32 ht = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
+ WORD32 wd = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
+
+ WORD32 i4_sem = 0;
+
+ if(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset >=
+ IHEVCE_QUALITY_P4)
+ i4_sem = 0;
+ else
+ i4_sem = 1;
+ ihevce_enc_loop_dblk_get_prms_dep_mngr(ht, &num_vert_units);
+ /* For Top-Right CU sync, adding one more CTB since value updation */
+ /* happens in that way for the last CTB in the row */
+ num_blks_in_row = wd + SET_CTB_ALIGN(wd, MAX_CU_SIZE);
+ num_blks_in_row += MAX_CTB_SIZE;
+
+ ASSERT(num_vert_units > 0);
+ ASSERT(num_blks_in_row > 0);
+
+ for(count = 0; count < i4_num_enc_loop_frm_pllel; count++)
+ {
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ /* For ES/HS, CU level updates uses spin-locks than semaphore */
+ {
+ ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[count][i] =
+ ihevce_dmgr_init(
+ &ps_mem_tab[n_tabs],
+ pv_osal_handle,
+ DEP_MNGR_ROW_ROW_SYNC,
+ num_vert_units,
+ num_blks_in_row,
+ i4_num_tile_cols, /* Number of Col Tiles */
+ i4_num_proc_thrds,
+ i4_sem /*Sem Disabled*/
+ );
+ }
+ n_tabs += ihevce_dmgr_get_num_mem_recs();
+ }
+ }
+ }
+
+ for(i = 1; i < 5; i++)
+ {
+ WORD32 i4_log2_trans_size = i + 1;
+ WORD32 i4_bit_depth = ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth;
+
+ ga_trans_shift[i] = (MAX_TR_DYNAMIC_RANGE - i4_bit_depth - i4_log2_trans_size) << 1;
+ }
+
+ ga_trans_shift[0] = ga_trans_shift[1];
+
+ /* return the handle to caller */
+ return ((void *)ps_master_ctxt);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_enc_loop_reg_sem_hdls \endif
+*
+* \brief
+* Intialization for ENC_LOOP context state structure .
+*
+* \param[in] ps_mem_tab : pointer to memory descriptors table
+* \param[in] ppv_sem_hdls : Array of semaphore handles
+* \param[in] i4_num_proc_thrds : Number of processing threads
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_enc_loop_reg_sem_hdls(
+ void *pv_enc_loop_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds)
+{
+ ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
+ WORD32 i, enc_frm_id;
+
+ ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
+
+ /*************************************************************************/
+ /* --- EncLoop Deblock sync Dep Mngr reg Semaphores -- */
+ /*************************************************************************/
+ for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
+ {
+ for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++)
+ {
+ ihevce_dmgr_reg_sem_hdls(
+ ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[enc_frm_id][i],
+ ppv_sem_hdls,
+ i4_num_proc_thrds);
+ }
+ }
+
+ /*************************************************************************/
+ /* --- EncLoop Top-Right CU synnc Dep Mngr reg Semaphores -- */
+ /*************************************************************************/
+ for(enc_frm_id = 0; enc_frm_id < ps_master_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
+ {
+ for(i = 0; i < ps_master_ctxt->i4_num_bitrates; i++)
+ {
+ ihevce_dmgr_reg_sem_hdls(
+ ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[enc_frm_id][i],
+ ppv_sem_hdls,
+ i4_num_proc_thrds);
+ }
+ }
+
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_enc_loop_delete \endif
+*
+* \brief
+* Destroy EncLoop module
+* Note : Only Destroys the resources allocated in the module like
+* semaphore,etc. Memory free is done Separately using memtabs
+*
+* \param[in] pv_me_ctxt : pointer to EncLoop ctxt
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_enc_loop_delete(void *pv_enc_loop_ctxt)
+{
+ ihevce_enc_loop_master_ctxt_t *ps_enc_loop_ctxt;
+ WORD32 ctr, enc_frm_id;
+
+ ps_enc_loop_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
+
+ for(enc_frm_id = 0; enc_frm_id < ps_enc_loop_ctxt->i4_num_enc_loop_frm_pllel; enc_frm_id++)
+ {
+ for(ctr = 0; ctr < ps_enc_loop_ctxt->i4_num_bitrates; ctr++)
+ {
+ /* --- EncLoop Deblock sync Dep Mngr Delete --*/
+ ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_dblk[enc_frm_id][ctr]);
+ /* --- EncLoop Top-Right CU sync Dep Mngr Delete --*/
+ ihevce_dmgr_del(ps_enc_loop_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[enc_frm_id][ctr]);
+ }
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_enc_loop_dep_mngr_frame_reset \endif
+*
+* \brief
+* Frame level Reset for the Dependency Mngrs local to EncLoop.,
+* ie CU_TopRight and Dblk
+*
+* \param[in] pv_enc_loop_ctxt : Enc_loop context pointer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_enc_loop_dep_mngr_frame_reset(void *pv_enc_loop_ctxt, WORD32 enc_frm_id)
+{
+ WORD32 ctr, frame_id;
+ ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
+
+ ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
+
+ if(1 == ps_master_ctxt->i4_num_enc_loop_frm_pllel)
+ {
+ frame_id = 0;
+ }
+ else
+ {
+ frame_id = enc_frm_id;
+ }
+
+ for(ctr = 0; ctr < ps_master_ctxt->i4_num_bitrates; ctr++)
+ {
+ /* Dep. Mngr : Reset the num ctb Deblocked in every row for ENC sync */
+ ihevce_dmgr_rst_row_row_sync(ps_master_ctxt->aapv_dep_mngr_enc_loop_dblk[frame_id][ctr]);
+
+ /* Dep. Mngr : Reset the TopRight CU Processed in every row for ENC sync */
+ ihevce_dmgr_rst_row_row_sync(
+ ps_master_ctxt->aapv_dep_mngr_enc_loop_cu_top_right[frame_id][ctr]);
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_enc_loop_frame_init \endif
+*
+* \brief
+* Frame level init of enocde loop function .
+*
+* \param[in] pv_enc_loop_ctxt : Enc_loop context pointer
+* \param[in] pi4_cu_processed : ptr to cur frame cu process in pix.
+* \param[in] aps_ref_list : ref pic list for the current frame
+* \param[in] ps_slice_hdr : ptr to current slice header params
+* \param[in] ps_pps : ptr to active pps params
+* \param[in] ps_sps : ptr to active sps params
+* \param[in] ps_vps : ptr to active vps params
+
+
+* \param[in] i1_weighted_pred_flag : weighted pred enable flag (unidir)
+* \param[in] i1_weighted_bipred_flag : weighted pred enable flag (bidir)
+* \param[in] log2_luma_wght_denom : down shift factor for weighted pred of luma
+* \param[in] log2_chroma_wght_denom : down shift factor for weighted pred of chroma
+* \param[in] cur_poc : currennt frame poc
+* \param[in] i4_bitrate_instance_num : number indicating the instance of bit-rate for multi-rate encoder
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_enc_loop_frame_init(
+ void *pv_enc_loop_ctxt,
+ WORD32 i4_frm_qp,
+ recon_pic_buf_t *(*aps_ref_list)[HEVCE_MAX_REF_PICS * 2],
+ recon_pic_buf_t *ps_frm_recon,
+ slice_header_t *ps_slice_hdr,
+ pps_t *ps_pps,
+ sps_t *ps_sps,
+ vps_t *ps_vps,
+ WORD8 i1_weighted_pred_flag,
+ WORD8 i1_weighted_bipred_flag,
+ WORD32 log2_luma_wght_denom,
+ WORD32 log2_chroma_wght_denom,
+ WORD32 cur_poc,
+ WORD32 i4_display_num,
+ enc_ctxt_t *ps_enc_ctxt,
+ me_enc_rdopt_ctxt_t *ps_curr_inp_prms,
+ WORD32 i4_bitrate_instance_num,
+ WORD32 i4_thrd_id,
+ WORD32 i4_enc_frm_id,
+ WORD32 i4_num_bitrates,
+ WORD32 i4_quality_preset,
+ void *pv_dep_mngr_encloop_dep_me)
+{
+ /* local variables */
+ ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
+ ihevce_enc_loop_ctxt_t *ps_ctxt;
+ WORD32 chroma_qp_offset, i4_div_factor;
+ WORD8 i1_slice_type = ps_slice_hdr->i1_slice_type;
+ WORD8 i1_strong_intra_smoothing_enable_flag = ps_sps->i1_strong_intra_smoothing_enable_flag;
+
+ /* ENC_LOOP master state structure */
+ ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
+
+ /* Nithya: Store the current POC in the slice header */
+ ps_slice_hdr->i4_abs_pic_order_cnt = cur_poc;
+
+ /* Update the POC list of the current frame to the recon buffer */
+ if(ps_slice_hdr->i1_num_ref_idx_l0_active != 0)
+ {
+ int i4_i;
+ for(i4_i = 0; i4_i < ps_slice_hdr->i1_num_ref_idx_l0_active; i4_i++)
+ {
+ ps_frm_recon->ai4_col_l0_poc[i4_i] = aps_ref_list[0][i4_i]->i4_poc;
+ }
+ }
+ if(ps_slice_hdr->i1_num_ref_idx_l1_active != 0)
+ {
+ int i4_i;
+ for(i4_i = 0; i4_i < ps_slice_hdr->i1_num_ref_idx_l1_active; i4_i++)
+ {
+ ps_frm_recon->ai4_col_l1_poc[i4_i] = aps_ref_list[1][i4_i]->i4_poc;
+ }
+ }
+
+ /* loop over all the threads */
+ // for(ctr = 0; ctr < ps_master_ctxt->i4_num_proc_thrds; ctr++)
+ {
+ /* ENC_LOOP state structure */
+ ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[i4_thrd_id];
+
+ /* SAO ctxt structure initialization*/
+ ps_ctxt->s_sao_ctxt_t.ps_pps = ps_pps;
+ ps_ctxt->s_sao_ctxt_t.ps_sps = ps_sps;
+ ps_ctxt->s_sao_ctxt_t.ps_slice_hdr = ps_slice_hdr;
+
+ /*bit-rate instance number for Multi-bitrate (MBR) encode */
+ ps_ctxt->i4_bitrate_instance_num = i4_bitrate_instance_num;
+ ps_ctxt->i4_num_bitrates = i4_num_bitrates;
+ ps_ctxt->i4_chroma_format = ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_chr_format;
+ ps_ctxt->i4_is_first_query = 1;
+ ps_ctxt->i4_is_ctb_qp_modified = 0;
+
+ /* enc_frm_id for multiframe encode */
+
+ if(1 == ps_enc_ctxt->s_multi_thrd.i4_num_enc_loop_frm_pllel)
+ {
+ ps_ctxt->i4_enc_frm_id = 0;
+ i4_enc_frm_id = 0;
+ }
+ else
+ {
+ ps_ctxt->i4_enc_frm_id = i4_enc_frm_id;
+ }
+
+ /*Initialize the sub pic rc buf appropriately */
+
+ /*Set the thrd id flag */
+ ps_enc_ctxt->s_multi_thrd
+ .ai4_thrd_id_valid_flag[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 1;
+
+ ps_enc_ctxt->s_multi_thrd
+ .ai8_nctb_ipe_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
+ ps_enc_ctxt->s_multi_thrd
+ .ai8_nctb_me_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
+
+ ps_enc_ctxt->s_multi_thrd
+ .ai8_nctb_l0_ipe_sad[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
+ ps_enc_ctxt->s_multi_thrd
+ .ai8_nctb_act_factor[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
+
+ ps_enc_ctxt->s_multi_thrd
+ .ai8_nctb_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
+ ps_enc_ctxt->s_multi_thrd
+ .ai8_acc_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
+ ps_enc_ctxt->s_multi_thrd
+ .ai8_acc_bits_mul_qs_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
+ ps_enc_ctxt->s_multi_thrd
+ .ai8_nctb_hdr_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
+ ps_enc_ctxt->s_multi_thrd
+ .ai8_nctb_mpm_bits_consumed[i4_enc_frm_id][i4_bitrate_instance_num][i4_thrd_id] = 0;
+ ps_enc_ctxt->s_multi_thrd.ai4_prev_chunk_qp[i4_enc_frm_id][i4_bitrate_instance_num] =
+ i4_frm_qp;
+
+ /*Frame level data for Sub Pic rc is initalized here */
+ /*Can be sent once per frame*/
+ {
+ WORD32 i4_tot_frame_ctb = ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_vert *
+ ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_horz;
+
+ /*Accumalated bits of all cu for required CTBS estimated during RDO evaluation*/
+ ps_ctxt->u4_total_cu_bits = 0;
+ ps_ctxt->u4_total_cu_hdr_bits = 0;
+
+ ps_ctxt->u4_cu_tot_bits_into_qscale = 0;
+ ps_ctxt->u4_cu_tot_bits = 0;
+ ps_ctxt->u4_total_cu_bits_mul_qs = 0;
+ ps_ctxt->i4_display_num = i4_display_num;
+ ps_ctxt->i4_sub_pic_level_rc = ps_enc_ctxt->s_multi_thrd.i4_in_frame_rc_enabled;
+ /*The Qscale is to be generated every 10th of total frame ctb is completed */
+ //ps_ctxt->i4_num_ctb_for_out_scale = (10 * i4_tot_frame_ctb)/100 ;
+ ps_ctxt->i4_num_ctb_for_out_scale = (UPDATE_QP_AT_CTB * i4_tot_frame_ctb) / 100;
+
+ ps_ctxt->i4_cu_qp_sub_pic_rc = (1 << QP_LEVEL_MOD_ACT_FACTOR);
+ /*Sub Pic RC frame level params */
+ ps_ctxt->i8_frame_l1_ipe_sad =
+ ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i8_raw_pre_intra_sad;
+ ps_ctxt->i8_frame_l0_ipe_satd =
+ ps_curr_inp_prms->ps_curr_inp->s_lap_out.i8_frame_l0_acc_satd;
+ ps_ctxt->i8_frame_l1_me_sad =
+ ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i8_raw_l1_coarse_me_sad;
+ ps_ctxt->i8_frame_l1_activity_fact =
+ ps_curr_inp_prms->ps_curr_inp->s_lap_out.i8_frame_level_activity_fact;
+ if(ps_ctxt->i4_sub_pic_level_rc)
+ {
+ ASSERT(
+ ps_curr_inp_prms->ps_curr_inp->s_lap_out
+ .ai4_frame_bits_estimated[ps_ctxt->i4_bitrate_instance_num] != 0);
+
+ ps_ctxt->ai4_frame_bits_estimated[ps_ctxt->i4_enc_frm_id]
+ [ps_ctxt->i4_bitrate_instance_num] =
+ ps_curr_inp_prms->ps_curr_inp->s_lap_out
+ .ai4_frame_bits_estimated[ps_ctxt->i4_bitrate_instance_num];
+ }
+ //ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type = 1;
+
+ ps_ctxt->i4_is_I_scenecut =
+ ((ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type == SCENE_TYPE_SCENE_CUT) &&
+ (ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_pic_type == IV_IDR_FRAME ||
+ ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_pic_type == IV_I_FRAME));
+
+ ps_ctxt->i4_is_non_I_scenecut =
+ ((ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_scene_type == SCENE_TYPE_SCENE_CUT) &&
+ (ps_ctxt->i4_is_I_scenecut == 0));
+
+ /*ps_ctxt->i4_is_I_only_scd = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_I_only_scd;
+ ps_ctxt->i4_is_non_I_scd = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_non_I_scd;*/
+ ps_ctxt->i4_is_model_valid =
+ ps_curr_inp_prms->ps_curr_inp->s_rc_lap_out.i4_is_model_valid;
+ }
+ /* cb and cr offsets are assumed to be same */
+ chroma_qp_offset = ps_slice_hdr->i1_slice_cb_qp_offset + ps_pps->i1_pic_cb_qp_offset;
+
+ /* assumption of cb = cr qp */
+ ASSERT(ps_slice_hdr->i1_slice_cb_qp_offset == ps_slice_hdr->i1_slice_cr_qp_offset);
+ ASSERT(ps_pps->i1_pic_cb_qp_offset == ps_pps->i1_pic_cr_qp_offset);
+
+ ps_ctxt->u1_is_input_data_hbd = (ps_sps->i1_bit_depth_luma_minus8 > 0);
+
+ ps_ctxt->u1_bit_depth = ps_sps->i1_bit_depth_luma_minus8 + 8;
+
+ ps_ctxt->s_mc_ctxt.i4_bit_depth = ps_ctxt->u1_bit_depth;
+ ps_ctxt->s_mc_ctxt.u1_chroma_array_type = ps_ctxt->u1_chroma_array_type;
+
+ /*remember chroma qp offset as qp related parameters are calculated at CU level*/
+ ps_ctxt->i4_chroma_qp_offset = chroma_qp_offset;
+ ps_ctxt->i1_cu_qp_delta_enable = ps_pps->i1_cu_qp_delta_enabled_flag;
+ ps_ctxt->i1_entropy_coding_sync_enabled_flag = ps_pps->i1_entropy_coding_sync_enabled_flag;
+
+ ps_ctxt->i4_is_ref_pic = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_is_ref_pic;
+ ps_ctxt->i4_temporal_layer = ps_curr_inp_prms->ps_curr_inp->s_lap_out.i4_temporal_lyr_id;
+ ps_ctxt->i4_use_const_lamda_modifier = USE_CONSTANT_LAMBDA_MODIFIER;
+ ps_ctxt->i4_use_const_lamda_modifier =
+ ps_ctxt->i4_use_const_lamda_modifier ||
+ ((ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
+ ((ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)) ||
+ (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1)) ||
+ (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_2)) ||
+ (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_3))));
+
+ {
+ ps_ctxt->f_i_pic_lamda_modifier =
+ ps_curr_inp_prms->ps_curr_inp->s_lap_out.f_i_pic_lamda_modifier;
+ }
+
+ ps_ctxt->i4_frame_qp = i4_frm_qp;
+ ps_ctxt->i4_frame_mod_qp = i4_frm_qp;
+ ps_ctxt->i4_cu_qp = i4_frm_qp;
+ ps_ctxt->i4_prev_cu_qp = i4_frm_qp;
+ ps_ctxt->i4_chrm_cu_qp =
+ (ps_ctxt->u1_chroma_array_type == 2)
+ ? MIN(i4_frm_qp + chroma_qp_offset, 51)
+ : gai1_ihevc_chroma_qp_scale[i4_frm_qp + chroma_qp_offset + MAX_QP_BD_OFFSET];
+
+ ps_ctxt->i4_cu_qp_div6 = (i4_frm_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
+ i4_div_factor = (i4_frm_qp + 3) / 6;
+ i4_div_factor = CLIP3(i4_div_factor, 3, 6);
+ ps_ctxt->i4_cu_qp_mod6 = (i4_frm_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
+
+ ps_ctxt->i4_chrm_cu_qp_div6 =
+ (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
+ ps_ctxt->i4_chrm_cu_qp_mod6 =
+ (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
+
+#define INTER_RND_QP_BY_6
+#ifdef INTER_RND_QP_BY_6
+
+ { /*1/6 rounding for 8 bit b frames*/
+ ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = 85
+ /*((1 << QUANT_ROUND_FACTOR_Q) / 6)*/;
+ }
+#else
+ /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
+ ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = (1 << QUANT_ROUND_FACTOR_Q) / 3;
+#endif
+
+ if(ISLICE == i1_slice_type)
+ {
+ /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
+ ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] = 171
+ /*((1 << QUANT_ROUND_FACTOR_Q) / 6)*/;
+ }
+ else
+ {
+ /* quant factor without RDOQ is 1/6th of shift for intra in inter pic */
+ ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
+ ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER];
+ /* (1 << QUANT_ROUND_FACTOR_Q) / 6; */
+ }
+
+ ps_ctxt->i1_strong_intra_smoothing_enable_flag = i1_strong_intra_smoothing_enable_flag;
+
+ ps_ctxt->i1_slice_type = i1_slice_type;
+
+ /* intialize the inter pred (MC) context at frame level */
+ ps_ctxt->s_mc_ctxt.ps_ref_list = aps_ref_list;
+ ps_ctxt->s_mc_ctxt.i1_weighted_pred_flag = i1_weighted_pred_flag;
+ ps_ctxt->s_mc_ctxt.i1_weighted_bipred_flag = i1_weighted_bipred_flag;
+ ps_ctxt->s_mc_ctxt.i4_log2_luma_wght_denom = log2_luma_wght_denom;
+ ps_ctxt->s_mc_ctxt.i4_log2_chroma_wght_denom = log2_chroma_wght_denom;
+
+ /* intialize the MV pred context at frame level */
+ ps_ctxt->s_mv_pred_ctxt.ps_ref_list = aps_ref_list;
+ ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr = ps_slice_hdr;
+ ps_ctxt->s_mv_pred_ctxt.ps_sps = ps_sps;
+ ps_ctxt->s_mv_pred_ctxt.i4_log2_parallel_merge_level_minus2 =
+ ps_pps->i1_log2_parallel_merge_level - 2;
+
+#if ADAPT_COLOCATED_FROM_L0_FLAG
+ if(ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_slice_temporal_mvp_enable_flag)
+ {
+ if((ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_num_ref_idx_l1_active > 0) &&
+ (ps_ctxt->s_mv_pred_ctxt.ps_ref_list[1][0]->i4_frame_qp <
+ ps_ctxt->s_mv_pred_ctxt.ps_ref_list[0][0]->i4_frame_qp))
+ {
+ ps_ctxt->s_mv_pred_ctxt.ps_slice_hdr->i1_collocated_from_l0_flag = 1;
+ }
+ }
+#endif
+ /* Initialization of deblocking params */
+ ps_ctxt->s_deblk_prms.i4_beta_offset_div2 = ps_slice_hdr->i1_beta_offset_div2;
+ ps_ctxt->s_deblk_prms.i4_tc_offset_div2 = ps_slice_hdr->i1_tc_offset_div2;
+
+ ps_ctxt->s_deblk_prms.i4_cb_qp_indx_offset = ps_pps->i1_pic_cb_qp_offset;
+
+ ps_ctxt->s_deblk_prms.i4_cr_qp_indx_offset = ps_pps->i1_pic_cr_qp_offset;
+ /*init frame level stat accumualtion parameters */
+ ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
+ ->u4_frame_sad_acc = 0;
+ ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
+ ->u4_frame_intra_sad_acc = 0;
+ ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
+ ->u4_frame_open_loop_intra_sad = 0;
+ ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
+ ->i8_frame_open_loop_ssd = 0;
+ ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
+ ->u4_frame_inter_sad_acc = 0;
+
+ ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
+ ->i8_frame_cost_acc = 0;
+ ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
+ ->i8_frame_intra_cost_acc = 0;
+ ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
+ ->i8_frame_inter_cost_acc = 0;
+
+ ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
+ ->u4_frame_intra_sad = 0;
+ ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
+ ->u4_frame_rdopt_bits = 0;
+ ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
+ ->u4_frame_rdopt_header_bits = 0;
+ ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
+ ->i4_qp_normalized_8x8_cu_sum[0] = 0;
+ ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
+ ->i4_qp_normalized_8x8_cu_sum[1] = 0;
+ ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
+ ->i4_8x8_cu_sum[0] = 0;
+ ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
+ ->i4_8x8_cu_sum[1] = 0;
+ ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
+ ->i8_sad_by_qscale[0] = 0;
+ ps_ctxt->aaps_enc_loop_rc_params[ps_ctxt->i4_enc_frm_id][i4_bitrate_instance_num]
+ ->i8_sad_by_qscale[1] = 0;
+ /* Compute the frame_qstep */
+ GET_FRAME_QSTEP_FROM_QP(ps_ctxt->i4_frame_qp, ps_ctxt->i4_frame_qstep);
+
+ ps_ctxt->u1_max_tr_depth = ps_sps->i1_max_transform_hierarchy_depth_inter;
+
+ ps_ctxt->ps_rc_quant_ctxt = &ps_enc_ctxt->s_rc_quant;
+ /* intialize the cabac rdopt context at frame level */
+ ihevce_entropy_rdo_frame_init(
+ &ps_ctxt->s_rdopt_entropy_ctxt,
+ ps_slice_hdr,
+ ps_pps,
+ ps_sps,
+ ps_vps,
+ ps_master_ctxt->au1_cu_skip_top_row,
+ &ps_enc_ctxt->s_rc_quant);
+
+ /* register the dep mngr instance for forward ME sync */
+ ps_ctxt->pv_dep_mngr_encloop_dep_me = pv_dep_mngr_encloop_dep_me;
+ }
+}
+/*
+******************************************************************************
+* \if Function name : ihevce_enc_loop_get_frame_rc_prms \endif
+*
+* \brief
+* returns Nil
+*
+* \param[in] pv_enc_loop_ctxt : pointer to encode loop context
+* \param[out]ps_rc_prms : ptr to frame level info structure
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_enc_loop_get_frame_rc_prms(
+ void *pv_enc_loop_ctxt,
+ rc_bits_sad_t *ps_rc_prms,
+ WORD32 i4_br_id, //bitrate instance id
+ WORD32 i4_enc_frm_id) // frame id
+{
+ /*Get the master thread pointer*/
+ ihevce_enc_loop_master_ctxt_t *ps_master_ctxt;
+ ihevce_enc_loop_ctxt_t *ps_ctxt;
+ UWORD32 total_frame_intra_sad = 0, total_frame_open_loop_intra_sad = 0;
+ LWORD64 i8_total_ssd_frame = 0;
+ UWORD32 total_frame_sad = 0;
+ UWORD32 total_frame_rdopt_bits = 0;
+ UWORD32 total_frame_rdopt_header_bits = 0;
+ WORD32 i4_qp_normalized_8x8_cu_sum[2] = { 0, 0 };
+ WORD32 i4_8x8_cu_sum[2] = { 0, 0 };
+ LWORD64 i8_sad_by_qscale[2] = { 0, 0 };
+ WORD32 i4_curr_qp_acc = 0;
+ WORD32 i;
+
+ /* ENC_LOOP master state structure */
+ ps_master_ctxt = (ihevce_enc_loop_master_ctxt_t *)pv_enc_loop_ctxt;
+
+ if(1 == ps_master_ctxt->i4_num_enc_loop_frm_pllel)
+ {
+ i4_enc_frm_id = 0;
+ }
+ /*loop through all threads and accumulate intra sad across all threads*/
+ for(i = 0; i < ps_master_ctxt->i4_num_proc_thrds; i++)
+ {
+ /* ENC_LOOP state structure */
+ ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[i];
+ total_frame_open_loop_intra_sad +=
+ ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_open_loop_intra_sad;
+ i8_total_ssd_frame +=
+ ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_frame_open_loop_ssd;
+ total_frame_intra_sad +=
+ ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_intra_sad;
+ total_frame_sad +=
+ ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_sad_acc;
+ total_frame_rdopt_bits +=
+ ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_bits;
+ total_frame_rdopt_header_bits +=
+ ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->u4_frame_rdopt_header_bits;
+ i4_qp_normalized_8x8_cu_sum[0] += ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
+ ->i4_qp_normalized_8x8_cu_sum[0];
+ i4_qp_normalized_8x8_cu_sum[1] += ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]
+ ->i4_qp_normalized_8x8_cu_sum[1];
+ i4_8x8_cu_sum[0] +=
+ ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i4_8x8_cu_sum[0];
+ i4_8x8_cu_sum[1] +=
+ ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i4_8x8_cu_sum[1];
+ i8_sad_by_qscale[0] +=
+ ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_sad_by_qscale[0];
+ i8_sad_by_qscale[1] +=
+ ps_ctxt->aaps_enc_loop_rc_params[i4_enc_frm_id][i4_br_id]->i8_sad_by_qscale[1];
+ }
+
+ ps_rc_prms->u4_open_loop_intra_sad = total_frame_open_loop_intra_sad;
+ ps_rc_prms->i8_total_ssd_frame = i8_total_ssd_frame;
+ ps_rc_prms->u4_total_sad = total_frame_sad;
+ ps_rc_prms->u4_total_texture_bits = total_frame_rdopt_bits - total_frame_rdopt_header_bits;
+ ps_rc_prms->u4_total_header_bits = total_frame_rdopt_header_bits;
+ /*This accumulation of intra frame sad is not intact. This can only be a temp change*/
+ ps_rc_prms->u4_total_intra_sad = total_frame_intra_sad;
+ ps_rc_prms->i4_qp_normalized_8x8_cu_sum[0] = i4_qp_normalized_8x8_cu_sum[0];
+ ps_rc_prms->i4_qp_normalized_8x8_cu_sum[1] = i4_qp_normalized_8x8_cu_sum[1];
+ ps_rc_prms->i4_8x8_cu_sum[0] = i4_8x8_cu_sum[0];
+ ps_rc_prms->i4_8x8_cu_sum[1] = i4_8x8_cu_sum[1];
+ ps_rc_prms->i8_sad_by_qscale[0] = i8_sad_by_qscale[0];
+ ps_rc_prms->i8_sad_by_qscale[1] = i8_sad_by_qscale[1];
+}
diff --git a/encoder/ihevce_enc_loop_pass.h b/encoder/ihevce_enc_loop_pass.h
new file mode 100644
index 0000000..e58e0f8
--- /dev/null
+++ b/encoder/ihevce_enc_loop_pass.h
@@ -0,0 +1,149 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_enc_loop_pass.h
+*
+* \brief
+* This file contains interface defination of Encode loop pass function
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_ENC_LOOP_PASS_H_
+#define _IHEVCE_ENC_LOOP_PASS_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+WORD32 ihevce_enc_loop_get_num_mem_recs(WORD32 i4_num_bitrate_inst, WORD32 i4_num_enc_frm_parallel);
+
+WORD32 ihevce_enc_loop_get_mem_recs(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ WORD32 i4_num_proc_thrds,
+ WORD32 i4_num_bitrate_inst,
+ WORD32 i4_num_enc_frm_parallel,
+ WORD32 i4_mem_space,
+ WORD32 i4_resolution_id);
+
+void *ihevce_enc_loop_init(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ WORD32 i4_num_proc_thrds,
+ void *pv_osal_handle,
+ func_selector_t *ps_func_selector,
+ rc_quant_t *ps_rc_quant_ctxt,
+ ihevce_tile_params_t *ps_tile_params_base,
+ WORD32 i4_resolution_id,
+ WORD32 i4_num_enc_loop_frm_pllel,
+ UWORD8 u1_is_popcnt_available);
+
+void ihevce_enc_loop_reg_sem_hdls(
+ void *pv_enc_loop_ctxt, void **ppv_sem_hdls, WORD32 i4_num_proc_thrds);
+
+void ihevce_enc_loop_dep_mngr_frame_reset(void *pv_enc_loop_ctxt, WORD32 enc_frm_id);
+
+void ihevce_enc_loop_delete(void *pv_enc_loop_ctxt);
+
+void ihevce_enc_loop_frame_init(
+ void *pv_enc_loop_ctxt,
+ WORD32 i4_frm_qp,
+ recon_pic_buf_t *(*aps_ref_list)[HEVCE_MAX_REF_PICS * 2],
+ recon_pic_buf_t *ps_frm_recon,
+ slice_header_t *ps_slice_hdr,
+ pps_t *ps_pps,
+ sps_t *ps_sps,
+ vps_t *ps_vps,
+ WORD8 i1_weighted_pred_flag,
+ WORD8 i1_weighted_bipred_flag,
+ WORD32 log2_luma_wght_denom,
+ WORD32 log2_chroma_wght_denom,
+ WORD32 cur_poc,
+ WORD32 i4_display_num,
+ enc_ctxt_t *ps_enc_ctxt,
+ me_enc_rdopt_ctxt_t *ps_cur_pic_ctxt,
+ WORD32 i4_bitrate_instance_num,
+ WORD32 i4_thrd_id,
+ WORD32 i4_enc_frm_id,
+ WORD32 i4_num_bitrates,
+ WORD32 i4_quality_preset,
+ void *pv_dep_mngr_encloop_dep_me);
+
+void ihevce_enc_loop_process(
+ void *pv_ctxt,
+ ihevce_lap_enc_buf_t *ps_curr_inp,
+ ctb_analyse_t *ps_ctb_in,
+ ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse,
+ recon_pic_buf_t *ps_frm_recon,
+ cur_ctb_cu_tree_t *ps_cu_tree_out,
+ ctb_enc_loop_out_t *ps_ctb_out,
+ cu_enc_loop_out_t *ps_cu_out,
+ tu_enc_loop_out_t *ps_tu_out,
+ pu_t *ps_pu_out,
+ UWORD8 *pu1_frm_ecd_data,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ frm_lambda_ctxt_t *ps_frm_lamda,
+ multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
+ WORD32 thrd_id,
+ WORD32 i4_enc_frm_id,
+ WORD32 i4_pass);
+
+LWORD64 ihevce_cu_mode_decide(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ enc_loop_cu_prms_t *ps_cu_prms,
+ cu_analyse_t *ps_cu_analyse,
+ final_mode_state_t *ps_final_mode_state,
+ UWORD8 *pu1_ecd_data,
+ pu_col_mv_t *ps_col_pu,
+ UWORD8 *pu1_col_pu_map,
+ WORD32 col_start_pu_idx);
+
+#endif /* _IHEVCE_ENC_LOOP_PASS_H_ */
diff --git a/encoder/ihevce_enc_loop_structs.h b/encoder/ihevce_enc_loop_structs.h
new file mode 100644
index 0000000..ce6f0cb
--- /dev/null
+++ b/encoder/ihevce_enc_loop_structs.h
@@ -0,0 +1,3198 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_enc_loop_structs.h
+*
+* \brief
+* This file contains strcutures of enc_loop pass
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_ENC_LOOP_STRUCTS_H_
+#define _IHEVCE_ENC_LOOP_STRUCTS_H_
+
+#include "ihevc_macros.h"
+
+extern UWORD16 gau2_ihevce_cabac_bin_to_bits[64 * 2];
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+/** /breif 4x4 DST, 4x4, 8x8, 16x16, 32x32 */
+#define NUM_TRANS_TYPES 5
+#define INTRA_PLANAR 0
+#define INTRA_DC 1
+#define NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD 2
+#define MAX_TU_IN_TU_EQ_DIV_2 4
+#define MAX_MVP_LIST_CAND 2
+#define MAX_COST 0x7ffffff
+#define MAX_COST_64 0x7ffffffffffffff
+#define NUM_32CU_AND_64CU_IN_CTB 5 /* 4 - 32x32 + 1 64x64*/
+#define PING_PONG 2
+#define MAX_SAO_RD_CAND 10
+#define SCRATCH_BUF_STRIDE 80
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+#define INTRA_ANGULAR(x) (x)
+
+/** @breif max 30bit value */
+#define MAX30 ((1 << 30) - 1)
+
+/* @brief macro to clip a data to max of 30bits (assuming unsgined) */
+#define CLIP30(x) ((x) > MAX30 ? MAX30 : (x))
+
+/* @brief compute the (lambda * rate) with a qshift and clip result to 30bits */
+#define COMPUTE_RATE_COST_CLIP30(r, l, qshift) ((WORD32)CLIP30((ULWORD64)((r) * (l)) >> (qshift)))
+
+#define IHEVCE_INV_WT_PRED(inp, wt, off, shift) \
+ (((((inp) - (off)) << (shift)) * wt + (1 << 14)) >> 15)
+
+#define POPULATE_PU_STRUCT(ps_pu, mvx, mvy, offset_x, offset_y, wd, ht, ref_idx, pred_lx) \
+ { \
+ (ps_pu)->b4_pos_x = (offset_x) >> 2; \
+ (ps_pu)->b4_pos_y = (offset_y) >> 2; \
+ (ps_pu)->b4_wd = ((wd) >> 2) - 1; \
+ (ps_pu)->b4_ht = ((ht) >> 2) - 1; \
+ (ps_pu)->b1_intra_flag = 0; \
+ (ps_pu)->b2_pred_mode = pred_lx; \
+ if(pred_lx) \
+ { \
+ (ps_pu)->mv.i1_l0_ref_idx = -1; \
+ (ps_pu)->mv.i1_l1_ref_idx = ref_idx; \
+ (ps_pu)->mv.s_l1_mv.i2_mvx = mvx; \
+ (ps_pu)->mv.s_l1_mv.i2_mvy = mvy; \
+ } \
+ else \
+ { \
+ (ps_pu)->mv.i1_l0_ref_idx = ref_idx; \
+ (ps_pu)->mv.i1_l1_ref_idx = -1; \
+ (ps_pu)->mv.s_l0_mv.i2_mvx = mvx; \
+ (ps_pu)->mv.s_l0_mv.i2_mvy = mvy; \
+ } \
+ }
+
+#define GET_FRAME_QSTEP_FROM_QP(frame_qp, frame_qstep) \
+ { \
+ double q_steps[6] = { 0.625, 0.703, 0.79, 0.889, 1.0, 1.125 }; \
+ \
+ frame_qstep = (WORD32)((1 << ((frame_qp) / 6)) * q_steps[(frame_qp) % 6]); \
+ }
+
+#define INITIALISE_MERGE_RESULT_STRUCT(ps_merge_data, pas_pu_results) \
+ { \
+ WORD32 i, j, k; \
+ \
+ for(i = 0; i < TOT_NUM_PARTS; i++) \
+ { \
+ (ps_merge_data)->s_pu_results.u1_num_results_per_part_l0[i] = 0; \
+ (ps_merge_data)->s_pu_results.u1_num_results_per_part_l1[i] = 0; \
+ } \
+ for(i = 0; i < 2; i++) \
+ { \
+ for(j = 0; j < TOT_NUM_PARTS; j++) \
+ { \
+ (ps_merge_data)->s_pu_results.aps_pu_results[i][j] = pas_pu_results[i][j]; \
+ for(k = 0; k < MAX_NUM_RESULTS_PER_PART_LIST; k++) \
+ { \
+ pas_pu_results[i][j][k].i4_tot_cost = MAX_COST; \
+ pas_pu_results[i][j][k].pu.mv.i1_l0_ref_idx = -1; \
+ pas_pu_results[i][j][k].pu.mv.i1_l1_ref_idx = -1; \
+ } \
+ } \
+ } \
+ }
+
+#define POPULATE_CTB_PARAMS \
+ (ps_common_frm_prms, \
+ apu1_wt_inp, \
+ i4_ctb_x_off, \
+ i4_ctb_y_off, \
+ ppu1_pred, \
+ cu_size, \
+ ref_stride, \
+ bidir_enabled, \
+ num_refs, \
+ pps_rec_list_l0, \
+ pps_rec_list_l1, \
+ pu1_non_wt_inp, \
+ lambda, \
+ lambda_q_shift, \
+ wpred_log_wdc) \
+ { \
+ WORD32 i, j; \
+ (ps_common_frm_prms)->i4_bidir_enabled = bidir_enabled; \
+ (ps_common_frm_prms)->i4_ctb_x_off = i4_ctb_x_off; \
+ (ps_common_frm_prms)->i4_ctb_y_off = i4_ctb_y_off; \
+ (ps_common_frm_prms)->i4_inp_stride = cu_size; \
+ (ps_common_frm_prms)->i4_lamda = lambda; \
+ (ps_common_frm_prms)->i4_pred_stride = cu_size; \
+ (ps_common_frm_prms)->i4_rec_stride = ref_stride; \
+ (ps_common_frm_prms)->pps_rec_list_l0 = pps_rec_list_l0; \
+ (ps_common_frm_prms)->pps_rec_list_l1 = pps_rec_list_l1; \
+ (ps_common_frm_prms)->ppu1_pred = ppu1_pred; \
+ (ps_common_frm_prms)->pu1_non_wt_inp = pu1_non_wt_inp; \
+ (ps_common_frm_prms)->pu1_wkg_mem = NULL; \
+ (ps_common_frm_prms)->u1_lamda_qshift = lambda_q_shift; \
+ (ps_common_frm_prms)->u1_num_ref = num_refs; \
+ (ps_common_frm_prms)->wpred_log_wdc = wpred_log_wdc; \
+ for(i = 0; i < 2; i++) \
+ { \
+ for(j = 0; j < MAX_NUM_REF; j++) \
+ { \
+ (ps_common_frm_prms)->apu1_wt_inp = (apu1_wt_inp)[i][j]; \
+ } \
+ } \
+ }
+
+#define COMPUTE_MERGE_IDX_COST(merge_idx_0_model, merge_idx, max_merge_cand, lambda, cost) \
+ { \
+ WORD32 cab_bits_q12 = 0; \
+ \
+ /* sanity checks */ \
+ ASSERT((merge_idx >= 0) && (merge_idx < max_merge_cand)); \
+ \
+ /* encode the merge idx only if required */ \
+ if(max_merge_cand > 1) \
+ { \
+ WORD32 bin = (merge_idx > 0); \
+ \
+ /* bits for the context modelled first bin */ \
+ cab_bits_q12 += gau2_ihevce_cabac_bin_to_bits[merge_idx_0_model ^ bin]; \
+ \
+ /* bits for larged merge idx coded as bypass tunary */ \
+ if((max_merge_cand > 2) && (merge_idx > 0)) \
+ { \
+ cab_bits_q12 += (MIN(merge_idx, (max_merge_cand - 2))) << CABAC_FRAC_BITS_Q; \
+ } \
+ \
+ cost = COMPUTE_RATE_COST_CLIP30( \
+ cab_bits_q12, lambda, (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q)); \
+ } \
+ else \
+ { \
+ cost = 0; \
+ } \
+ }
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+typedef FT_CALC_HAD_SATD_8BIT *pf_res_trans_luma_had_chroma;
+
+/** \breif function pointer prototype for residue and transform enc_loop */
+typedef UWORD32 (*pf_res_trans_chroma)(
+ UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD32 *pi4_tmp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd_chr_flag);
+
+/** \breif function pointer prototype for quantization and inv Quant for ssd
+calc. for all transform sizes */
+typedef WORD32 (*pf_quant_iquant_ssd)(
+ WORD16 *pi2_coeffs,
+ WORD16 *pi2_quant_coeff,
+ WORD16 *pi2_q_dst,
+ WORD16 *pi2_iq_dst,
+ WORD32 trans_size,
+ WORD32 qp_div, /* qpscaled / 6 */
+ WORD32 qp_rem, /* qpscaled % 6 */
+ WORD32 q_add,
+ WORD32 *pi4_quant_round_factor_0_1,
+ WORD32 *pi4_quant_round_factor_1_2,
+ WORD32 src_strd,
+ WORD32 dst_q_strd,
+ WORD32 dst_iq_strd,
+ UWORD8 *csbf,
+ WORD32 csbf_strd,
+ WORD32 *zero_col,
+ WORD32 *zero_row,
+ WORD16 *pi2_dequant_coeff,
+ LWORD64 *pi8_cost);
+
+/** \breif function pointer prototype for quantization and inv Quant for ssd
+calc. for all transform sizes (in case of RDOQ + SBH) */
+typedef WORD32 (*pf_quant_iquant_ssd_sbh)(
+ WORD16 *pi2_coeffs,
+ WORD16 *pi2_quant_coeff,
+ WORD16 *pi2_q_dst,
+ WORD16 *pi2_iq_dst,
+ WORD32 trans_size,
+ WORD32 qp_div, /* qpscaled / 6 */
+ WORD32 qp_rem, /* qpscaled % 6 */
+ WORD32 q_add,
+ WORD32 src_strd,
+ WORD32 dst_q_strd,
+ WORD32 dst_iq_strd,
+ UWORD8 *csbf,
+ WORD32 csbf_strd,
+ WORD32 *zero_col,
+ WORD32 *zero_row,
+ WORD16 *pi2_dequant_coeff,
+ WORD32 *pi4_cost,
+ WORD32 i4_scan_idx,
+ WORD32 i4_perform_rdoq);
+
+/** \breif function pointer prototype for inverse transform and recon
+for all transform sizes : Luma */
+typedef void (*pf_it_recon)(
+ WORD16 *pi2_src,
+ WORD16 *pi2_tmp,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd,
+ WORD32 zero_cols,
+ WORD32 zero_rows);
+
+/** \breif function pointer prototype for inverse transform and recon
+for all transform sizes : Chroma */
+typedef void (*pf_it_recon_chroma)(
+ WORD16 *pi2_src,
+ WORD16 *pi2_tmp,
+ UWORD8 *pu1_pred,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd,
+ WORD32 zero_cols,
+ WORD32 zero_rows);
+
+/** \breif function pointer prototype for luma sao. */
+typedef void (*pf_sao_luma)(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_src_left,
+ UWORD8 *pu1_src_top,
+ UWORD8 *pu1_src_top_left,
+ UWORD8 *pu1_src_top_right,
+ UWORD8 *pu1_src_bot_left,
+ UWORD8 *pu1_avail,
+ WORD8 *pi1_sao_offset,
+ WORD32 wd,
+ WORD32 ht);
+
+/** \breif function pointer prototype for chroma sao. */
+typedef void (*pf_sao_chroma)(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_src_left,
+ UWORD8 *pu1_src_top,
+ UWORD8 *pu1_src_top_left,
+ UWORD8 *pu1_src_top_right,
+ UWORD8 *pu1_src_bot_left,
+ UWORD8 *pu1_avail,
+ WORD8 *pi1_sao_offset_u,
+ WORD8 *pi1_sao_offset_v,
+ WORD32 wd,
+ WORD32 ht);
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+typedef enum
+{
+ IP_FUNC_MODE_0 = 0,
+ IP_FUNC_MODE_1,
+ IP_FUNC_MODE_2,
+ IP_FUNC_MODE_3TO9,
+ IP_FUNC_MODE_10,
+ IP_FUNC_MODE_11TO17,
+ IP_FUNC_MODE_18_34,
+ IP_FUNC_MODE_19TO25,
+ IP_FUNC_MODE_26,
+ IP_FUNC_MODE_27TO33,
+
+ NUM_IP_FUNCS
+
+} IP_FUNCS_T;
+
+typedef enum
+{
+ /* currently only cu and cu/2 modes are supported */
+ TU_EQ_CU = 0,
+ TU_EQ_CU_DIV2,
+ TU_EQ_SUBCU, /* only applicable for NXN mode at mincusize */
+
+ /* support for below modes needs to be added */
+ TU_EQ_CU_DIV4,
+ TU_EQ_CU_DIV8,
+ TU_EQ_CU_DIV16,
+
+ NUM_TU_WRT_CU,
+
+} TU_SIZE_WRT_CU_T;
+
+typedef enum
+{
+ RDOPT_MODE = 0,
+ RDOPT_SKIP_MODE = 1,
+
+ NUM_CORE_CALL_MODES,
+
+} CORE_FUNC_CALL_MODE_T;
+
+typedef enum
+{
+ ENC_LOOP_CTXT = 0,
+ ENC_LOOP_THRDS_CTXT,
+ ENC_LOOP_SCALE_MAT,
+ ENC_LOOP_RESCALE_MAT,
+ ENC_LOOP_TOP_LUMA,
+ ENC_LOOP_TOP_CHROMA,
+ ENC_LOOP_TOP_NBR4X4,
+ ENC_LOOP_RC_PARAMS, /* memory to dump rate control parameters by each thread for each bit-rate instance */
+ ENC_LOOP_QP_TOP_4X4,
+ ENC_LOOP_DEBLOCKING,
+ ENC_LOOP_422_CHROMA_INTRA_PRED,
+ ENC_LOOP_INTER_PRED,
+ ENC_LOOP_CHROMA_PRED_INTRA,
+ ENC_LOOP_REF_SUB_OUT,
+ ENC_LOOP_REF_FILT_OUT,
+ ENC_LOOP_CU_RECUR_LUMA_RECON,
+ ENC_LOOP_CU_RECUR_CHROMA_RECON,
+ ENC_LOOP_CU_RECUR_LUMA_PRED,
+ ENC_LOOP_CU_RECUR_CHROMA_PRED,
+ ENC_LOOP_LEFT_LUMA_DATA,
+ ENC_LOOP_LEFT_CHROMA_DATA,
+ ENC_LOOP_SAO,
+ ENC_LOOP_CU_COEFF_DATA,
+ ENC_LOOP_CU_RECUR_COEFF_DATA,
+ ENC_LOOP_CU_DEQUANT_DATA,
+ ENC_LOOP_RECON_DATA_STORE,
+ /* should always be the last entry */
+ NUM_ENC_LOOP_MEM_RECS
+
+} ENC_LOOP_MEM_TABS_T;
+
+/** This is for assigning the pred buiffers for luma (2 ping-pong) and
+chroma(1) */
+typedef enum
+{
+ CU_ME_INTRA_PRED_LUMA_IDX0 = 0,
+ CU_ME_INTRA_PRED_LUMA_IDX1,
+ CU_ME_INTRA_PRED_CHROMA_IDX,
+
+ /* should be always the last entry */
+ NUM_CU_ME_INTRA_PRED_IDX
+
+} CU_ME_INTRA_PRED_IDX_T;
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+* @brief Structure to store TU prms req. for enc_loop only
+******************************************************************************
+*/
+typedef struct
+{
+ /** Zero_col info. for the current TU Luma */
+ UWORD32 u4_luma_zero_col;
+ /** Zero_row info. for the current TU Luma */
+ UWORD32 u4_luma_zero_row;
+
+ /** Zero_col info. for the current TU Chroma Cb */
+ UWORD32 au4_cb_zero_col[2];
+ /** Zero_row info. for the current TU Chroma Cb */
+ UWORD32 au4_cb_zero_row[2];
+ /** Zero_col info. for the current TU Chroma Cr */
+ UWORD32 au4_cr_zero_col[2];
+ /** Zero_row info. for the current TU Chroma Cr */
+ UWORD32 au4_cr_zero_row[2];
+
+ /** bytes consumed by the luma ecd data */
+ WORD16 i2_luma_bytes_consumed;
+ /** bytes consumed by the Cb ecd data */
+ WORD16 ai2_cb_bytes_consumed[2];
+ /** bytes consumed by the Cr ecd data */
+ WORD16 ai2_cr_bytes_consumed[2];
+
+ /** flag to re-evaluate IQ and Coeff data of luma in the final_recon
+ function. If zero, uses the data from RDOPT cand. */
+ UWORD16 b1_eval_luma_iq_and_coeff_data : 1;
+ /** flag to re-evaluate IQ and Coeff data of chroma in the final_recon
+ function. If zero, uses the data from RDOPT cand. */
+ UWORD16 b1_eval_chroma_iq_and_coeff_data : 1;
+
+ /* TO DO : No support now, need to add. Always comapre ZERO_CBF cost */
+ /** Luma ZERO_CBF cost is compared with residue coding cost only if this
+ flag is enabled */
+ UWORD16 b1_eval_luma_zero_cbf_cost : 1;
+ /** Chroma ZERO_CBF cost is compared with residue coding cost only if this
+ flag is enabled */
+ UWORD16 b1_eval_chroma_zero_cbf_cost : 1;
+
+ /** Reserved to make WORD32 alignment */
+ UWORD16 b12_reserved : 12;
+
+} tu_enc_loop_temp_prms_t;
+
+typedef struct recon_datastore_t
+{
+ /* 2 to store current and best */
+ void *apv_luma_recon_bufs[2];
+
+ /* 0 to store cur chroma mode recon */
+ /* 1 to store winning independent chroma mode with a single TU's recon */
+ /* 2 to store winning independent chroma mode with 4 TUs' recon */
+ void *apv_chroma_recon_bufs[3];
+
+ /* The following two arrays are used to store the ID's of the buffers */
+ /* where the winning recon is being stored */
+ /* For Luma buffers, the permissible values are 0, 1 and UCHAR_MAX */
+ /* For Chroma buffers, the permissible values are 0, 1, 2 and UCHAR_MAX */
+ /* The value 'UCHAR_MAX' indicates the absence of Recon for that particular TU */
+ UWORD8 au1_bufId_with_winning_LumaRecon[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
+
+ /* 2 - 2 Chroma planes */
+ /* 2 - 2 possible subTU's */
+ UWORD8 au1_bufId_with_winning_ChromaRecon[2][MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW][2];
+
+ WORD32 i4_lumaRecon_stride;
+
+ WORD32 i4_chromaRecon_stride;
+
+ UWORD8 au1_is_chromaRecon_available[3];
+
+ UWORD8 u1_is_lumaRecon_available;
+
+} recon_datastore_t;
+
+typedef struct enc_loop_cu_final_prms_t
+{
+ recon_datastore_t s_recon_datastore;
+
+ /**
+ * Cu size of the current cu being processed
+ */
+ UWORD8 u1_cu_size;
+ /**
+ * flags to indicate the final cu prediction mode
+ */
+ UWORD8 u1_intra_flag;
+
+ /**
+ * flags to indicate Skip mode for CU
+ */
+ UWORD8 u1_skip_flag;
+
+ /**
+ * number of tu in current cu for a given mode
+ * if skip then this value should be 1
+ */
+ UWORD16 u2_num_tus_in_cu;
+
+ /**
+ * number of pu in current cu for a given mode
+ * if skip then this value should be 1
+ */
+ UWORD16 u2_num_pus_in_cu;
+
+ /**
+ * total bytes produced in ECD data buffer
+ * if skip then this value should be 0
+ */
+ WORD32 i4_num_bytes_ecd_data;
+
+ /**
+ * Partition mode of the best candidate
+ * if skip then this value should be SIZE_2Nx2N
+ * @sa PART_SIZE_E
+ */
+ UWORD8 u1_part_mode;
+
+ /**
+ * indicates if inter cu has coded coeffs 1: coded, 0: not coded
+ * if skip then this value shoudl be ignored
+ */
+ UWORD8 u1_is_cu_coded;
+
+ /**
+ * Chroma pred mode as signalled in bitstream
+ */
+ UWORD8 u1_chroma_intra_pred_mode;
+
+ /**
+ * To store the best chroma mode for TU. Will be same for NxN case.
+ * Actual Chroma pred
+ */
+ UWORD8 u1_chroma_intra_pred_actual_mode;
+
+ /**
+ * sad accumulated over all Tus of given CU
+ */
+ UWORD32 u4_cu_sad;
+
+ /**
+ * sad accumulated over all Tus of given CU
+ */
+ LWORD64 i8_cu_ssd;
+
+ /**
+ * open loop intra sad
+ */
+ UWORD32 u4_cu_open_intra_sad;
+
+ /**
+ * header bits of cu estimated during RDO evaluation.
+ * Includes tu splits flags excludes cbf flags
+ */
+ UWORD32 u4_cu_hdr_bits;
+ /**
+ * luma residual bits of a cu estimated during RDO evaluation.
+ */
+ UWORD32 u4_cu_luma_res_bits;
+
+ /**
+ * chroma residual bits of a cu estimated during RDO evaluation.
+ */
+ UWORD32 u4_cu_chroma_res_bits;
+
+ /**
+ * cbf bits of a cu estimated during RDO evaluation (considered as part of texture bits later)
+ */
+ UWORD32 u4_cu_cbf_bits;
+
+ /**
+ * array of PU for current CU
+ * For Inter PUs this will contain the follwoing
+ * - merge flag
+ * - (MVD and reference indicies) or (Merge Index)
+ * - (if Cu is skipped then Merge index for skip
+ * will be in 1st PU entry in array)
+ * for intra PU only intra flag will be set to 1
+ *
+ */
+ pu_t as_pu_enc_loop[NUM_PU_PARTS];
+
+ /**
+ * array of PU for chroma usage
+ * in case of Merge MVs and reference idx of the final candidate
+ * used by luma need sto be stored
+ * for intra PU this will not be used
+ */
+ pu_t as_pu_chrm_proc[NUM_PU_PARTS];
+
+ /**
+ * array of colocated PU for current CU
+ * MV and Ref pic id should be stored in this
+ * for intra PU only intra flag will be set to 1
+ */
+ pu_col_mv_t as_col_pu_enc_loop[NUM_INTER_PU_PARTS];
+
+ /** array to store the intra mode pred related params
+ * if nxn mode the all 4 lcoations will be used
+ */
+ intra_prev_rem_flags_t as_intra_prev_rem[NUM_PU_PARTS];
+
+ /**
+ * array to store TU propeties of the each tu in a CU
+ */
+ tu_enc_loop_out_t as_tu_enc_loop[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
+
+ /**
+ * array to store TU propeties (req. for enc_loop only and not for
+ * entropy) of the each tu in a CU
+ */
+ tu_enc_loop_temp_prms_t as_tu_enc_loop_temp_prms[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
+
+ /**
+ * Neighbour flags stored for chroma reuse
+ */
+ UWORD32 au4_nbr_flags[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
+
+ /**
+ * intra pred modes stored for chroma reuse
+ */
+ UWORD8 au1_intra_pred_mode[4];
+
+ /**
+ * array for storing coeffs during RD opt stage at CU level.
+ * Luma and chroma together
+ */
+ UWORD8 *pu1_cu_coeffs;
+
+ /**
+ * Chroma deq_coeffs start point in the ai2_cu_deq_coeffs buffer.
+ */
+ WORD32 i4_chrm_cu_coeff_strt_idx;
+
+ /**
+ * array for storing dequantized vals. during RD opt stage at CU level
+ * Luma and chroma together.
+ * Stride is assumed to be cu_size
+ * u-v interleaved storing is at TU level
+ */
+ WORD16 *pi2_cu_deq_coeffs;
+
+ /**
+ * Chroma deq_coeffs start point in the ai2_cu_deq_coeffs buffer.
+ */
+ WORD32 i4_chrm_deq_coeff_strt_idx;
+
+ /**
+ * The total RDOPT cost of the CU for the best mode
+ */
+ LWORD64 i8_best_rdopt_cost;
+
+ /**
+ * The current running RDOPT cost for the current mode
+ */
+ LWORD64 i8_curr_rdopt_cost;
+
+ LWORD64 i8_best_distortion;
+
+} enc_loop_cu_final_prms_t;
+
+typedef struct
+{
+ /** Current Cu chroma recon pointer in pic buffer */
+ UWORD8 *pu1_final_recon;
+
+ UWORD16 *pu2_final_recon;
+
+ /** Current Cu chroma source pointer in pic buffer */
+ UWORD8 *pu1_curr_src;
+
+ UWORD16 *pu2_curr_src;
+
+ /** Current CU chroma reocn buffer stride */
+ WORD32 i4_chrm_recon_stride;
+
+ /** Current CU chroma source buffer stride */
+ WORD32 i4_chrm_src_stride;
+
+ /** Current Cu chroma Left pointer for intra pred */
+ UWORD8 *pu1_cu_left;
+
+ UWORD16 *pu2_cu_left;
+
+ /** Left buffer stride */
+ WORD32 i4_cu_left_stride;
+
+ /** Current Cu chroma top pointer for intra pred */
+ UWORD8 *pu1_cu_top;
+
+ UWORD16 *pu2_cu_top;
+
+ /** Current Cu chroma top left pointer for intra pred */
+ UWORD8 *pu1_cu_top_left;
+
+ UWORD16 *pu2_cu_top_left;
+
+} enc_loop_chrm_cu_buf_prms_t;
+
+typedef struct
+{
+ /** cost of the current satd cand */
+ WORD32 i4_cost;
+
+ /** tu size w.r.t to cu of the current satd cand
+ * @sa TU_SIZE_WRT_CU_T
+ */
+ WORD8 i4_tu_depth;
+
+ /**
+ * access valid number of entries in this array based on u1_part_size
+ */
+ UWORD8 au1_intra_luma_modes[NUM_PU_PARTS];
+
+ /** @remarks u1_part_size 2Nx2N or NxN */
+ UWORD8 u1_part_mode; /* @sa: PART_SIZE_E */
+
+ /** Flag to indicate whether current candidate needs to be evaluated */
+ UWORD8 u1_eval_flag;
+
+} cu_intra_satd_out_t;
+
+/** \brief cu level parameters for SATD / RDOPT function */
+
+typedef struct
+{
+ /** pointer to source luma pointer
+ * pointer will be pointing to CTB start location
+ * At CU level based on the CU position this pointer
+ * has to appropriately incremented
+ */
+ UWORD8 *pu1_luma_src;
+
+ UWORD16 *pu2_luma_src;
+
+ /** pointer to source chroma pointer
+ * pointer will be pointing to CTB start location
+ * At CU level based on the CU position this pointer
+ * has to appropriately incremented
+ */
+ UWORD8 *pu1_chrm_src;
+
+ UWORD16 *pu2_chrm_src;
+
+ /** pointer to recon luma pointer
+ * pointer will be pointing to CTB start location
+ * At CU level based on the CU position this pointer
+ * has to appropriately incremented
+ */
+ UWORD8 *pu1_luma_recon;
+
+ UWORD16 *pu2_luma_recon;
+
+ /** pointer to recon chroma pointer
+ * pointer will be pointing to CTB start location
+ * At CU level based on the CU position this pointer
+ * has to appropriately incremented
+ */
+ UWORD8 *pu1_chrm_recon;
+
+ UWORD16 *pu2_chrm_recon;
+
+ /*1st pass parallel dpb buffer pointers aimilar to the above*/
+ UWORD8 *pu1_luma_recon_src;
+
+ UWORD16 *pu2_luma_recon_src;
+
+ UWORD8 *pu1_chrm_recon_src;
+
+ UWORD16 *pu2_chrm_recon_src;
+
+ /** Pointer to Subpel Plane Buffer */
+ UWORD8 *pu1_sbpel_hxfy;
+
+ /** Pointer to Subpel Plane Buffer */
+ UWORD8 *pu1_sbpel_fxhy;
+
+ /** Pointer to Subpel Plane Buffer */
+ UWORD8 *pu1_sbpel_hxhy;
+
+ /** Luma source stride */
+ WORD32 i4_luma_src_stride;
+
+ /** chroma soruce stride */
+ WORD32 i4_chrm_src_stride;
+
+ /** Luma recon stride */
+ WORD32 i4_luma_recon_stride;
+
+ /** chroma recon stride */
+ WORD32 i4_chrm_recon_stride;
+
+ /** ctb size */
+ WORD32 i4_ctb_size;
+
+ /** current ctb postion horz */
+ WORD32 i4_ctb_pos;
+
+ /** number of PU finalized for curr CU */
+ WORD32 i4_num_pus_in_cu;
+
+ /** number of bytes consumed for current in ecd data buf */
+ WORD32 i4_num_bytes_cons;
+
+ UWORD8 u1_is_cu_noisy;
+
+ UWORD8 *pu1_is_8x8Blk_noisy;
+
+} enc_loop_cu_prms_t;
+
+/**
+******************************************************************************
+* @brief Pad inter pred recon context
+******************************************************************************
+*/
+typedef struct
+{
+ /** Pointer to Subpel Plane Buffer */
+ UWORD8 *pu1_sbpel_hxfy;
+
+ /** Pointer to Subpel Plane Buffer */
+ UWORD8 *pu1_sbpel_fxhy;
+
+ /** Pointer to Subpel Plane Buffer */
+ UWORD8 *pu1_sbpel_hxhy;
+
+ /** pointer to recon luma pointer
+ * pointer will be pointing to CTB start location
+ * At CU level based on the CU position this pointer
+ * has to appropriately incremented
+ */
+ UWORD8 *pu1_luma_recon;
+
+ /** pointer to recon chroma pointer
+ * pointer will be pointing to CTB start location
+ * At CU level based on the CU position this pointer
+ * has to appropriately incremented
+ */
+ UWORD8 *pu1_chrm_recon;
+
+ /*FOr recon source 1st pass starts*/
+
+ UWORD8 *pu1_luma_recon_src;
+
+ /** pointer to recon chroma pointer
+ * pointer will be pointing to CTB start location
+ * At CU level based on the CU position this pointer
+ * has to appropriately incremented
+ */
+ UWORD8 *pu1_chrm_recon_src;
+ /*FOr recon source 1st pass ends */
+ /** Luma recon stride */
+ WORD32 i4_luma_recon_stride;
+
+ /** chroma recon stride */
+ WORD32 i4_chrm_recon_stride;
+
+ /** ctb size */
+ WORD32 i4_ctb_size;
+
+ /* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
+ UWORD8 u1_chroma_array_type;
+
+} pad_interp_recon_frm_t;
+
+/**
+******************************************************************************
+* @brief inter prediction (MC) context for enc loop
+******************************************************************************
+*/
+/*IMPORTANT please keep inter_pred_ctxt_t and inter_pred_me_ctxt_t as identical*/
+typedef struct
+{
+ /** pointer to reference lists */
+ recon_pic_buf_t *(*ps_ref_list)[HEVCE_MAX_REF_PICS * 2];
+
+ /** scratch buffer for horizontal interpolation destination */
+ WORD16 MEM_ALIGN16 ai2_horz_scratch[MAX_CTB_SIZE * (MAX_CTB_SIZE + 8)];
+
+ /** scratch 16 bit buffer for interpolation in l0 direction */
+ WORD16 MEM_ALIGN16 ai2_scratch_buf_l0[MAX_CTB_SIZE * MAX_CTB_SIZE];
+
+ /** scratch 16 bit buffer for interpolation in l1 direction */
+ WORD16 MEM_ALIGN16 ai2_scratch_buf_l1[MAX_CTB_SIZE * MAX_CTB_SIZE];
+
+ /** Pointer to struct containing function pointers to
+ functions in the 'common' library' */
+ func_selector_t *ps_func_selector;
+
+ /** common denominator used for luma weights */
+ WORD32 i4_log2_luma_wght_denom;
+
+ /** common denominator used for chroma weights */
+ WORD32 i4_log2_chroma_wght_denom;
+
+ /** offset w.r.t frame start in horz direction (pels) */
+ WORD32 i4_ctb_frm_pos_x;
+
+ /** offset w.r.t frame start in vert direction (pels) */
+ WORD32 i4_ctb_frm_pos_y;
+
+ /* Bit Depth of Input */
+ WORD32 i4_bit_depth;
+
+ /* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
+ UWORD8 u1_chroma_array_type;
+
+ /** weighted_pred_flag */
+ WORD8 i1_weighted_pred_flag;
+
+ /** weighted_bipred_flag */
+ WORD8 i1_weighted_bipred_flag;
+
+ /** Structure to describe extra CTBs around frame due to search
+ range associated with distributed-mode. Entries are top, left,
+ right and bottom */
+ WORD32 ai4_tile_xtra_pel[4];
+
+} inter_pred_ctxt_t;
+/*IMPORTANT please keep inter_pred_ctxt_t and inter_pred_me_ctxt_t as identical*/
+
+typedef IV_API_CALL_STATUS_T (*PF_LUMA_INTER_PRED_PU)(
+ void *pv_inter_pred_ctxt,
+ pu_t *ps_pu,
+ void *pv_dst_buf,
+ WORD32 dst_stride,
+ WORD32 i4_flag_inter_pred_source);
+
+/**
+******************************************************************************
+* @brief Motion predictor context structure
+******************************************************************************
+*/
+typedef struct
+{
+ /** pointer to reference lists */
+ recon_pic_buf_t *(*ps_ref_list)[HEVCE_MAX_REF_PICS * 2];
+
+ /** pointer to the slice header */
+ slice_header_t *ps_slice_hdr;
+
+ /** pointer to SPS */
+ sps_t *ps_sps;
+
+ /** CTB x. In CTB unit*/
+ WORD32 i4_ctb_x;
+
+ /** CTB y. In CTB unit */
+ WORD32 i4_ctb_y;
+
+ /** Log2 Parallel Merge Level - 2 */
+ WORD32 i4_log2_parallel_merge_level_minus2;
+
+ /* Number of extra CTBs external to tile due to fetched search-range around Tile */
+ /* TOP, left, right and bottom */
+ WORD32 ai4_tile_xtra_ctb[4];
+
+} mv_pred_ctxt_t;
+
+/**
+******************************************************************************
+* @brief Deblocking and Boundary strength CTB level structure
+******************************************************************************
+*/
+typedef struct
+{
+ /** Array to store the packed BS values in horizontal direction */
+ UWORD32 au4_horz_bs[(MAX_CTB_SIZE >> 3) + 1];
+
+ /** Array to store the packed BS values in vertical direction */
+ UWORD32 au4_vert_bs[(MAX_CTB_SIZE >> 3) + 1];
+
+ /** CTB neighbour availability flags for deblocking */
+ UWORD8 u1_not_first_ctb_col_of_frame;
+ UWORD8 u1_not_first_ctb_row_of_frame;
+
+} deblk_bs_ctb_ctxt_t;
+
+/**
+******************************************************************************
+* @brief Deblocking and CTB level structure
+******************************************************************************
+*/
+typedef struct
+{
+ /**
+ * BS of the last vertical 4x4 column of previous CTB
+ */
+ UWORD8 au1_prev_bs[MAX_CTB_SIZE >> 3];
+
+ /**
+ * BS of the last vertical 4x4 column of previous CTB
+ */
+ UWORD8 au1_prev_bs_uv[MAX_CTB_SIZE >> 3];
+
+ /** pointer to top 4x4 ctb nbr structure; for accessing qp */
+ nbr_4x4_t *ps_top_ctb_nbr_4x4;
+
+ /** pointer to left 4x4 ctb nbr structure; for accessing qp */
+ nbr_4x4_t *ps_left_ctb_nbr_4x4;
+
+ /** pointer to current 4x4 ctb nbr structure; for accessing qp */
+ nbr_4x4_t *ps_cur_ctb_4x4;
+
+ /** max of 8 such contiguous bs to be computed for 64x64 ctb */
+ UWORD32 *pu4_bs_horz;
+
+ /** max of 8 such contiguous bs to be computed for 64x64 ctb */
+ UWORD32 *pu4_bs_vert;
+
+ /** ptr to current ctb luma pel in frame */
+ UWORD8 *pu1_ctb_y;
+
+ UWORD16 *pu2_ctb_y;
+
+ /** ptr to current ctb sp interleaved chroma pel in frame */
+ UWORD8 *pu1_ctb_uv;
+
+ UWORD16 *pu2_ctb_uv;
+
+ func_selector_t *ps_func_selector;
+
+ /** left nbr buffer stride in terms of 4x4 units */
+ WORD32 i4_left_nbr_4x4_strd;
+
+ /** current buffer stride in terms of 4x4 units */
+ WORD32 i4_cur_4x4_strd;
+
+ /** size in pels 16 / 32 /64 */
+ WORD32 i4_ctb_size;
+
+ /** stride for luma */
+ WORD32 i4_luma_pic_stride;
+
+ /** stride for chroma */
+ WORD32 i4_chroma_pic_stride;
+
+ /** boolean indicating if left ctb edge is to be deblocked or not */
+ WORD32 i4_deblock_left_ctb_edge;
+
+ /** boolean indicating if top ctb edge is to be deblocked or not */
+ WORD32 i4_deblock_top_ctb_edge;
+
+ /** beta offset index */
+ WORD32 i4_beta_offset_div2;
+
+ /** tc offset index */
+ WORD32 i4_tc_offset_div2;
+
+ /** chroma cb qp offset index */
+ WORD32 i4_cb_qp_indx_offset;
+
+ /** chroma cr qp offset index */
+ WORD32 i4_cr_qp_indx_offset;
+
+ WORD32 i4_bit_depth;
+
+ /* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
+ UWORD8 u1_chroma_array_type;
+
+} deblk_ctb_params_t;
+
+/**
+******************************************************************************
+* @brief Stores the BS and Qp of a CTB row. For CTB-row level deblocking
+******************************************************************************
+*/
+typedef struct deblk_ctbrow_prms
+{
+ /**
+ * Refer to ihevce_enc_loop_get_mem_recs() and
+ * ihevce_enc_loop_init()for more info
+ * regarding memory allocation to each one below.
+ */
+
+ /**
+ * Stores the vertical boundary strength of a CTB row.
+ */
+ UWORD32 *pu4_ctb_row_bs_vert;
+
+ /**
+ * Storage is same as above. Contains horizontal BS.
+ */
+ UWORD32 *pu4_ctb_row_bs_horz;
+
+ /**
+ * Pointer to the CTB row's Qp storage
+ */
+ WORD8 *pi1_ctb_row_qp;
+
+ /**
+ * Stride of the pu1_ctb_row_qp_p buffer in WORD32 unit
+ */
+ WORD32 u4_qp_buffer_stride;
+
+ /*
+ * Pointer to the memory which contains the Qp of
+ * top4x4 neighbour blocks for each CTB row.
+ * This memory is at frame level.
+ */
+ WORD8 *api1_qp_top_4x4_ctb_row[MAX_NUM_ENC_LOOP_PARALLEL];
+
+ /*
+ * Stride of the above memory location.
+ * Values in one-stride correspondes to one CTB row.
+ */
+ WORD32 u4_qp_top_4x4_buf_strd;
+
+ /*size of frm level qp buffer*/
+ WORD32 u4_qp_top_4x4_buf_size;
+
+} deblk_ctbrow_prms_t;
+
+/**
+******************************************************************************
+* @brief Entropy rd opt context for cabac bit estimation and RDO
+******************************************************************************
+*/
+typedef struct rdopt_entropy_ctxt
+{
+ /**
+ * array for entropy contexts during RD opt stage at CU level
+ * one best and one current is required
+ */
+ entropy_context_t as_cu_entropy_ctxt[2];
+
+ /**
+ * init state of entropy context models during CU RD opt stage,
+ * required for saving and restoring the cabac states
+ */
+ UWORD8 au1_init_cabac_ctxt_states[IHEVC_CAB_CTXT_END];
+
+ /*
+ * ptr to top row cu skip flags (1 bit per 8x8CU)
+ */
+ UWORD8 *pu1_cu_skip_top_row;
+
+ /**
+ * Current entropy ctxt idx
+ */
+ WORD32 i4_curr_buf_idx;
+
+} rdopt_entropy_ctxt_t;
+
+/**
+******************************************************************************
+* @brief structure to save predicted data from Inter SATD stage to Inter RD opt stage
+******************************************************************************
+*/
+typedef struct
+{
+ /*Buffer to store the predicted data after motion compensation for merge and
+ * skip candidates.
+ * [2] Because for a given candidate we do motion compensation for 5 merge candidates.
+ * store the pred data after mc for the first 2 candidates and from 3rd candidate
+ * onwards, overwrite the data which has higher SATD cost.
+ */
+ void *apv_pred_data[2];
+
+ /** Stride to store the predicted data
+ */
+ WORD32 i4_pred_data_stride;
+
+} merge_skip_pred_data_t;
+/**
+******************************************************************************
+* @brief Structure to hold Rate control related parameters
+* for each bit-rate instance and each thread
+******************************************************************************
+*/
+typedef struct
+{
+ /**
+ *frame level open loop intra sad
+ *
+ */
+ LWORD64 i8_frame_open_loop_ssd;
+
+ /**
+ *frame level open loop intra sad
+ *
+ */
+ UWORD32 u4_frame_open_loop_intra_sad;
+ /**
+ * frame level intra sad accumulator
+ */
+ UWORD32 u4_frame_intra_sad;
+
+ /**
+ * frame level sad accumulator
+ */
+ UWORD32 u4_frame_sad_acc;
+
+ /**
+ * frame level intra sad accumulator
+ */
+ UWORD32 u4_frame_inter_sad_acc;
+
+ /**
+ * frame level inter sad accumulator
+ */
+ UWORD32 u4_frame_intra_sad_acc;
+
+ /**
+ * frame level cost accumulator
+ */
+ LWORD64 i8_frame_cost_acc;
+
+ /**
+ * frame level intra cost accumulator
+ */
+ LWORD64 i8_frame_inter_cost_acc;
+
+ /**
+ * frame level inter cost accumulator
+ */
+ LWORD64 i8_frame_intra_cost_acc;
+
+ /**
+ * frame level rdopt bits accumulator
+ */
+ UWORD32 u4_frame_rdopt_bits;
+
+ /**
+ * frame level rdopt header bits accumulator
+ */
+ UWORD32 u4_frame_rdopt_header_bits;
+
+ /* Sum the Qps of each 8*8 block in CU
+ * 8*8 block is considered as Min CU size possible as per standard is 8
+ * 0 corresponds to INTER and 1 corresponds to INTRA
+ */
+ WORD32 i4_qp_normalized_8x8_cu_sum[2];
+
+ /* Count the number of 8x8 blocks in each CU type (INTER/INTRA)
+ * 0 corresponds to INTER and 1 corresponds to INTRA
+ */
+ WORD32 i4_8x8_cu_sum[2];
+
+ /* SAD/Qscale accumulated over all CUs. CU size is inherently
+ * taken care in SAD
+ */
+ LWORD64 i8_sad_by_qscale[2];
+
+} enc_loop_rc_params_t;
+/**
+******************************************************************************
+* @brief CU information structure. This is to store the
+* CU final out after Recursion
+******************************************************************************
+*/
+typedef struct ihevce_enc_cu_node_ctxt_t
+{
+ /* CU params */
+ /** CU X position in terms of min CU (8x8) units */
+ UWORD8 b3_cu_pos_x : 3;
+
+ /** CU Y position in terms of min CU (8x8) units */
+ UWORD8 b3_cu_pos_y : 3;
+
+ /** reserved bytes */
+ UWORD8 b2_reserved : 2;
+
+ /** CU size 2N (width or height) in pixels */
+ UWORD8 u1_cu_size;
+
+ /**
+ * array for storing cu level final params for a given mode
+ * one best and one current is required
+ */
+ enc_loop_cu_final_prms_t s_cu_prms;
+
+ /**
+ * array for storing cu level final params for a given mode
+ * one best and one current is required
+ */
+ enc_loop_cu_final_prms_t *ps_cu_prms;
+
+ /* flag to indicate if current CU is the first
+ CU of the Quantisation group*/
+ UWORD32 b1_first_cu_in_qg : 1;
+
+ /** qp used during for CU
+ * @remarks :
+ */
+ WORD8 i1_cu_qp;
+
+} ihevce_enc_cu_node_ctxt_t;
+
+typedef struct
+{
+ WORD32 i4_sad;
+
+ WORD32 i4_mv_cost;
+
+ WORD32 i4_tot_cost;
+
+ WORD8 i1_ref_idx;
+
+ mv_t s_mv;
+
+} block_merge_nodes_t;
+
+/**
+******************************************************************************
+* @brief This struct is used for storing output of block merge
+******************************************************************************
+*/
+typedef struct
+{
+ block_merge_nodes_t *aps_best_results[MAX_NUM_PARTS];
+
+ /* Contains the best uni dir for each partition type */
+ WORD32 ai4_best_uni_dir[MAX_NUM_PARTS];
+
+ /* Contains the best pred dir for each partition type */
+ WORD32 ai4_best_pred_dir[MAX_NUM_PARTS];
+
+ WORD32 i4_tot_cost;
+
+ PART_TYPE_T e_part_type;
+} block_merge_results_t;
+
+/**
+******************************************************************************
+* @brief This struct is used for storing output of block merge and also
+* all of the intermediate results required
+******************************************************************************
+*/
+typedef struct
+{
+ block_merge_results_t as_best_results[3 + 1][NUM_BEST_ME_OUTPUTS];
+
+ block_merge_nodes_t as_nodes[3][TOT_NUM_PARTS][NUM_BEST_ME_OUTPUTS];
+
+ WORD32 part_mask;
+
+ WORD32 num_results_per_part;
+
+ WORD32 num_best_results;
+
+ /**
+ * Overall best CU cost, while other entries store CU costs
+ * in single direction, this is best CU cost, where each
+ * partition cost is evaluated as best of uni/bi
+ */
+ WORD32 best_cu_cost;
+
+} block_merge_data_t;
+/**
+******************************************************************************
+* @brief CU nbr information structure. This is to store the
+* neighbour information for final reconstruction function
+******************************************************************************
+*/
+typedef struct
+{
+ /* Pointer to top-left nbr */
+ nbr_4x4_t *ps_topleft_nbr_4x4;
+ /* Pointer to left nbr */
+ nbr_4x4_t *ps_left_nbr_4x4;
+ /* Pointer to top nbr */
+ nbr_4x4_t *ps_top_nbr_4x4;
+ /* stride of left_nbr_4x4 */
+ WORD32 nbr_4x4_left_strd;
+
+ /* Pointer to CU top */
+ UWORD8 *pu1_cu_top;
+
+ UWORD16 *pu2_cu_top;
+
+ /* Pointer to CU top-left */
+ UWORD8 *pu1_cu_top_left;
+
+ UWORD16 *pu2_cu_top_left;
+
+ /* Pointer to CU left */
+ UWORD8 *pu1_cu_left;
+
+ UWORD16 *pu2_cu_left;
+
+ /* stride of left pointer */
+ WORD32 cu_left_stride;
+} cu_nbr_prms_t;
+
+/** Structure to save the flags required for Final mode Reconstruction
+function. These flags are set based on quality presets and
+the bit-rate we are working on */
+typedef struct
+{
+ /** Flag to indicate whether Luma pred data need to recomputed in the
+ final_recon function. Now disabled for all modes */
+ UWORD8 u1_eval_luma_pred_data;
+
+ /** Flag to indicate whether Chroma pred data need to recomputed in the
+ final_recon function. Now disabled for MedSpeed only */
+ UWORD8 u1_eval_chroma_pred_data;
+
+ /** Flag to indicate whether header data need to recomputed in the
+ final_recon function. Now disabled for all modes */
+ UWORD8 u1_eval_header_data;
+
+ UWORD8 u1_eval_recon_data;
+} cu_final_recon_flags_t;
+
+/**
+******************************************************************************
+* @brief structure to save pred data of ME cand. 1 ping-pong to store the
+* the best and current luma cand. 1 buffer to store the best chroma pred
+******************************************************************************
+*/
+typedef struct
+{
+ /** Pointers to store luma pred data of me/intra cand.(2) and chroma(1) */
+ UWORD8 *pu1_pred_data[NUM_CU_ME_INTRA_PRED_IDX];
+
+ UWORD16 *pu2_pred_data[NUM_CU_ME_INTRA_PRED_IDX];
+
+ /** Stride to store the predicted data of me/intra cand.(2) and chroma(1) */
+ WORD32 ai4_pred_data_stride[NUM_CU_ME_INTRA_PRED_IDX];
+ /** Counter saying how many pointers are assigned */
+ WORD32 i4_pointer_count;
+
+} cu_me_intra_pred_prms_t;
+
+/**
+******************************************************************************
+* @brief Chroma RDOPT context structure
+******************************************************************************
+*/
+typedef struct
+{
+ /** Storing the inverse quantized data (cb) for the special modes*/
+ WORD16 ai2_iq_data_cb[(MAX_TU_SIZE * MAX_TU_SIZE) << 1];
+
+ /** Storing the inverse quantized data (cr) for the special modes*/
+ WORD16 ai2_iq_data_cr[(MAX_TU_SIZE * MAX_TU_SIZE) << 1];
+
+ /** Storing the scan coeffs (cb) for the special modes*/
+ UWORD8 au1_scan_coeff_cb[2][(MAX_TU_IN_CTB >> 1) * MAX_SCAN_COEFFS_BYTES_4x4];
+
+ /** Storing the scan coeffs (cb) for the special modes*/
+ UWORD8 au1_scan_coeff_cr[2][(MAX_TU_IN_CTB >> 1) * MAX_SCAN_COEFFS_BYTES_4x4];
+
+ /** Max number of bytes filled in scan coeff data (cb) per TU*/
+ WORD32 ai4_num_bytes_scan_coeff_cb_per_tu[2][MAX_TU_IN_TU_EQ_DIV_2];
+
+ /** Max number of bytes filled in scan coeff data (cr) per TU*/
+ WORD32 ai4_num_bytes_scan_coeff_cr_per_tu[2][MAX_TU_IN_TU_EQ_DIV_2];
+
+ /** Stride of the iq buffer*/
+ WORD32 i4_iq_buff_stride;
+
+ /** Storing the pred data
+ The predicted data is always interleaved. Therefore the size of this array will be
+ ((MAX_TU_SIZE * MAX_TU_SIZE) >> 2) * 2)*/
+ void *pv_pred_data;
+
+ /** Predicted data stride*/
+ WORD32 i4_pred_stride;
+
+ /** Storing the cbfs for each tu
+ For 1 tu case, only the 0th element will be valid*/
+ UWORD8 au1_cbf_cb[2][MAX_TU_IN_TU_EQ_DIV_2];
+
+ /** Storing the cbfs for each tu
+ For 1 tu case, only the 0th element will be valid*/
+ UWORD8 au1_cbf_cr[2][MAX_TU_IN_TU_EQ_DIV_2];
+
+ /** To store the cabac ctxt model updated by the RDOPT of best chroma mode
+ [0] : for 1 TU case, [1] : for 4 TU case */
+ UWORD8 au1_chrm_satd_updated_ctxt_models[IHEVC_CAB_CTXT_END];
+
+ /** Best SATD chroma mode, [0] : for 1 TU case (TU_EQ_CU) , [1] : for 4 TU case
+ Values : 0(PLANAR), 1(VERT), 2(HOR), 3(DC) chroma mode per each TU */
+ UWORD8 u1_best_cr_mode;
+
+ /** Best SATD chroma mode's RDOPT cost, [0] : for 1 TU case, [1] : for 4 TU case */
+ LWORD64 i8_chroma_best_rdopt;
+
+ /* Account for coding b3_chroma_intra_pred_mode prefix and suffix bins */
+ /* This is done by adding the bits for signalling chroma mode (0-3) */
+ /* and subtracting the bits for chroma mode same as luma mode (4) */
+ LWORD64 i8_cost_to_encode_chroma_mode;
+
+ /** Best SATD chroma mode's tu bits, [0] : for 1 TU case, [1] : for 4 TU case */
+ WORD32 i4_chrm_tu_bits;
+
+ /** Storing the zero col values for each TU for cb*/
+ WORD32 ai4_zero_col_cb[2][MAX_TU_IN_TU_EQ_DIV_2];
+
+ /** Storing the zero col values for each TU for cr*/
+ WORD32 ai4_zero_col_cr[2][MAX_TU_IN_TU_EQ_DIV_2];
+
+ /** Storing the zero row values for each TU for cb*/
+ WORD32 ai4_zero_row_cb[2][MAX_TU_IN_TU_EQ_DIV_2];
+
+ /** Storing the zero row values for each TU for cr*/
+ WORD32 ai4_zero_row_cr[2][MAX_TU_IN_TU_EQ_DIV_2];
+} chroma_intra_satd_ctxt_t;
+
+/**
+******************************************************************************
+* @brief Chroma RDOPT context structure
+******************************************************************************
+*/
+typedef struct
+{
+ /** Chroma SATD context structure. It is an array of two to account for the TU_EQ_CU candidate
+ and the TU_EQ_CU_DIV2 candidate*/
+ chroma_intra_satd_ctxt_t as_chr_intra_satd_ctxt[NUM_POSSIBLE_TU_SIZES_CHR_INTRA_SATD];
+
+ /** Chroma SATD has has to be evaluated only for the HIGH QUALITY */
+ UWORD8 u1_eval_chrm_satd;
+
+ /** Chroma RDOPT has to be evaluated only for the HIGH QUALITY / MEDIUM SPEED preset */
+ UWORD8 u1_eval_chrm_rdopt;
+
+} ihevce_chroma_rdopt_ctxt_t;
+
+typedef struct
+{
+ inter_cu_results_t s_cu_results;
+
+ inter_pu_results_t s_pu_results;
+} block_merge_output_t;
+
+/**
+******************************************************************************
+* @brief Structure to store the Merge/Skip Cand. for EncLoop
+******************************************************************************
+*/
+typedef struct
+{
+ /** List of all merge/skip candidates to be evalauted (SATD/RDOPT) for
+ * this CU
+ */
+ cu_inter_cand_t as_cu_inter_merge_skip_cand[MAX_NUM_CU_MERGE_SKIP_CAND];
+
+ /** number of merge/skip candidates
+ */
+ UWORD8 u1_num_merge_cands;
+
+ UWORD8 u1_num_skip_cands;
+
+ UWORD8 u1_num_merge_skip_cands;
+
+} cu_inter_merge_skip_t;
+
+/** Structure to store the Mixed mode Cand. for EncLoop */
+typedef struct
+{
+ cu_inter_cand_t as_cu_data[MAX_NUM_MIXED_MODE_INTER_RDO_CANDS];
+
+ UWORD8 u1_num_mixed_mode_type0_cands;
+
+ UWORD8 u1_num_mixed_mode_type1_cands;
+
+} cu_mixed_mode_inter_t;
+
+typedef struct
+{
+ /* +2 because an additional buffer is required for */
+ /* storing both cur and best during merge eval */
+ void *apv_inter_pred_data[MAX_NUM_INTER_RDO_CANDS + 4];
+
+ /* Bit field used to determine the indices of free bufs in 'apv_pred_data' buf array */
+ UWORD32 u4_is_buf_in_use;
+
+ /* Assumption is that the same stride is used for the */
+ /* entire set of buffers above and is equal to the */
+ /* CU size */
+ WORD32 i4_pred_stride;
+
+} ihevce_inter_pred_buf_data_t;
+/** Structure to store the Inter Cand. info in EncLoop */
+typedef struct
+{
+ cu_inter_cand_t *aps_cu_data[MAX_NUM_INTER_RDO_CANDS];
+
+ UWORD32 au4_cost[MAX_NUM_INTER_RDO_CANDS];
+
+ UWORD8 au1_pred_buf_idx[MAX_NUM_INTER_RDO_CANDS];
+
+ UWORD32 u4_src_variance;
+
+ UWORD8 u1_idx_of_worst_cost_in_cost_array;
+
+ UWORD8 u1_idx_of_worst_cost_in_pred_buf_array;
+
+ UWORD8 u1_num_inter_cands;
+
+} inter_cu_mode_info_t;
+typedef struct
+{
+ /*Frame level base pointer of buffers for each ctb row to store the top pixels
+ *and top left pixel for the next ctb row.These buffers are common accross all threads
+ */
+ UWORD8 *apu1_sao_src_frm_top_luma[MAX_NUM_ENC_LOOP_PARALLEL];
+ /*Ctb level pointer to buffer to store the top pixels
+ *and top left pixel for the next ctb row.These buffers are common accross all threads
+ */
+ UWORD8 *pu1_curr_sao_src_top_luma;
+ /*Buffer to store the left boundary before
+ * doing sao on current ctb for the next ctb in the current row
+ */
+ UWORD8 au1_sao_src_left_luma[MAX_CTB_SIZE];
+ /*Frame level base pointer of buffers for each ctb row to store the top pixels
+ *and top left pixel for the next ctb row.These buffers are common accross all threads
+ */
+ UWORD8 *apu1_sao_src_frm_top_chroma[MAX_NUM_ENC_LOOP_PARALLEL];
+
+ WORD32 i4_frm_top_chroma_buf_stride;
+
+ /*Ctb level pointer to buffer to store the top chroma pixels
+ *and top left pixel for the next ctb row.These buffers are common accross all threads
+ */
+ UWORD8 *pu1_curr_sao_src_top_chroma;
+
+ /*Scratch buffer to store the left boundary before
+ * doing sao on current ctb for the next ctb in the current row
+ */
+ UWORD8 au1_sao_src_left_chroma[MAX_CTB_SIZE * 2];
+
+ /**
+ * Luma recon buffer
+ */
+ UWORD8 *pu1_frm_luma_recon_buf;
+ /**
+ * Chroma recon buffer
+ */
+ UWORD8 *pu1_frm_chroma_recon_buf;
+ /**
+ * Luma recon buffer for curr ctb
+ */
+ UWORD8 *pu1_cur_luma_recon_buf;
+ /**
+ * Chroma recon buffer for curr ctb
+ */
+ UWORD8 *pu1_cur_chroma_recon_buf;
+ /**
+ * Luma src buffer
+ */
+ UWORD8 *pu1_frm_luma_src_buf;
+ /**
+ * Chroma src buffer
+ */
+ UWORD8 *pu1_frm_chroma_src_buf;
+ /**
+ * Luma src(input yuv) buffer for curr ctb
+ */
+ UWORD8 *pu1_cur_luma_src_buf;
+ /**
+ * Chroma src buffer for curr ctb
+ */
+ UWORD8 *pu1_cur_chroma_src_buf;
+ /* Left luma scratch buffer required for sao RD optimisation*/
+ UWORD8 au1_left_luma_scratch[MAX_CTB_SIZE];
+
+ /* Left chroma scratch buffer required for sao RD optimisation*/
+ /* Min size required= MAX_CTB_SIZE/2 * 2
+ * Multiplied by 2 because size reuired is MAX_CTB_SIZE/2 each for U and V
+ */
+ UWORD8 au1_left_chroma_scratch[MAX_CTB_SIZE * 2];
+
+ /* Top luma scratch buffer required for sao RD optimisation*/
+ UWORD8 au1_top_luma_scratch[MAX_CTB_SIZE + 2]; // +1 for top left pixel and +1 for top right
+
+ /* Top chroma scratch buffer required for sao RD optimisation*/
+ UWORD8 au1_top_chroma_scratch[MAX_CTB_SIZE + 4]; // +2 for top left pixel and +2 for top right
+
+ /* Scratch buffer to store the sao'ed output during sao RD optimisation*/
+ /* One extra row(bot pixels) is copied to scratch buf but 2d buf copy func copies multiple of 4 ,hence
+ MAX_CTB _SIZE + 4*/
+ UWORD8 au1_sao_luma_scratch[PING_PONG][SCRATCH_BUF_STRIDE * (MAX_CTB_SIZE + 4)];
+
+ /* Scratch buffer to store the sao'ed output during sao RD optimisation*/
+ /* One extra row(bot pixels) is copied to scratch buf but 2d buf copy func copies multiple of 4 ,hence
+ MAX_CTB _SIZE + 4*/
+ UWORD8 au1_sao_chroma_scratch[PING_PONG][SCRATCH_BUF_STRIDE * (MAX_CTB_SIZE + 4)];
+
+ /**
+ * CTB size
+ */
+ WORD32 i4_ctb_size;
+ /**
+ * Luma recon buffer stride
+ */
+ WORD32 i4_frm_luma_recon_stride;
+ /**
+ * Chroma recon buffer stride
+ */
+ WORD32 i4_frm_chroma_recon_stride;
+ /**
+ * Luma recon buffer stride for curr ctb
+ */
+ WORD32 i4_cur_luma_recon_stride;
+ /**
+ * Chroma recon buffer stride for curr ctb
+ */
+ WORD32 i4_cur_chroma_recon_stride;
+ /**
+ * Luma src buffer stride
+ */
+ WORD32 i4_frm_luma_src_stride;
+ /**
+ * Chroma src buffer stride
+ */
+ WORD32 i4_frm_chroma_src_stride;
+
+ WORD32 i4_frm_top_luma_buf_stride;
+ /**
+ * Luma src buffer stride for curr ctb
+ */
+ WORD32 i4_cur_luma_src_stride;
+ /**
+ * Chroma src buffer stride for curr ctb
+ */
+ WORD32 i4_cur_chroma_src_stride;
+
+ /* Top luma buffer size */
+ WORD32 i4_top_luma_buf_size;
+
+ /* Top Chroma buffer size */
+ WORD32 i4_top_chroma_buf_size;
+
+ /*** Number of CTB units **/
+ WORD32 i4_num_ctb_units;
+
+ /**
+ * CTB x pos
+ */
+ WORD32 i4_ctb_x;
+ /**
+ * CTB y pos
+ */
+ WORD32 i4_ctb_y;
+ /* SAO block width*/
+ WORD32 i4_sao_blk_wd;
+
+ /* SAO block height*/
+ WORD32 i4_sao_blk_ht;
+
+ /* Last ctb row flag*/
+ WORD32 i4_is_last_ctb_row;
+
+ /* Last ctb col flag*/
+ WORD32 i4_is_last_ctb_col;
+
+ /* CTB aligned width */
+ UWORD32 u4_ctb_aligned_wd;
+
+ /* Number of ctbs in a row*/
+ UWORD32 u4_num_ctbs_horz;
+
+ UWORD32 u4_num_ctbs_vert;
+ /**
+ * Closed loop SSD Lambda
+ * This is multiplied with bits for RD cost computations in SSD mode
+ * This is represented in q format with shift of LAMBDA_Q_SHIFT
+ */
+ LWORD64 i8_cl_ssd_lambda_qf;
+
+ /**
+ * Closed loop SSD Lambda for chroma (chroma qp is different from luma qp)
+ * This is multiplied with bits for RD cost computations in SSD mode
+ * This is represented in q format with shift of LAMBDA_Q_SHIFT
+ */
+ LWORD64 i8_cl_ssd_lambda_chroma_qf;
+ /**
+ * Pointer to current PPS
+ */
+ pps_t *ps_pps; //not used currently
+ /**
+ * Pointer to current SPS
+ */
+ sps_t *ps_sps;
+
+ /**
+ * Pointer to current slice header structure
+ */
+ slice_header_t *ps_slice_hdr;
+ /**
+ * Pointer to current frame ctb out array of structures
+ */
+ ctb_enc_loop_out_t *ps_ctb_out;
+ /**
+ * context for cabac bit estimation used during rdopt stage
+ */
+ rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt;
+ /**
+ * Pointer to sao_enc_t for the current ctb
+ */
+ sao_enc_t *ps_sao;
+ /*
+ * Pointer to an array to store the sao information of the top ctb
+ * This is required for to decide top merge
+ */
+ sao_enc_t *aps_frm_top_ctb_sao[MAX_NUM_ENC_LOOP_PARALLEL];
+
+ /*
+ * Pointer to structure to store the sao parameters of (x,y)th ctb
+ * for top merge of (x,y+1)th ctb
+ */
+ sao_enc_t *ps_top_ctb_sao;
+
+ /* structure to store the sao parameters of (x,y)th ctb for
+ * the left merge of (x+1,y)th ctb
+ */
+ sao_enc_t s_left_ctb_sao;
+
+ /* Array of structures for SAO RDO candidates*/
+ sao_enc_t as_sao_rd_cand[MAX_SAO_RD_CAND];
+
+ /** array of function pointers for luma sao */
+ pf_sao_luma apf_sao_luma[4];
+
+ /** array of function pointers for chroma sao */
+ pf_sao_chroma apf_sao_chroma[4];
+
+ /* Flag to do SAO luma and chroma filtering*/
+ WORD8 i1_slice_sao_luma_flag;
+
+ WORD8 i1_slice_sao_chroma_flag;
+
+#if DISABLE_SAO_WHEN_NOISY
+ ctb_analyse_t *ps_ctb_data;
+
+ WORD32 i4_ctb_data_stride;
+#endif
+
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list;
+
+} sao_ctxt_t;
+
+/**
+******************************************************************************
+* @brief Encode loop module context structure
+******************************************************************************
+*/
+typedef struct
+{
+#if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
+ void *pv_err_func_selector;
+#endif
+
+ /**
+ * Quality preset for comtrolling numbe of RD opt cand
+ * @sa : IHEVCE_QUALITY_CONFIG_T
+ */
+ WORD32 i4_quality_preset;
+ /**
+ *
+ *
+ */
+ WORD32 i4_rc_pass;
+ /**
+ * Lamda to be mulitplied with bits for SATD
+ * should be equal to Lamda*Qp
+ */
+ WORD32 i4_satd_lamda;
+
+ /**
+ * Lamda to be mulitplied with bits for SAD
+ * should be equal to Lamda*Qp
+ */
+ WORD32 i4_sad_lamda;
+
+ /**
+ * Closed loop SSD Lambda
+ * This is multiplied with bits for RD cost computations in SSD mode
+ * This is represented in q format with shift of LAMBDA_Q_SHIFT
+ */
+ LWORD64 i8_cl_ssd_lambda_qf;
+
+ /**
+ * Closed loop SSD Lambda for chroma (chroma qp is different from luma qp)
+ * This is multiplied with bits for RD cost computations in SSD mode
+ * This is represented in q format with shift of LAMBDA_Q_SHIFT
+ */
+ LWORD64 i8_cl_ssd_lambda_chroma_qf;
+
+ /**
+ * Ratio of Closed loop SSD Lambda and Closed loop SSD Lambda for chroma
+ * This is multiplied with (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT)
+ * to keep the precision of the ratio
+ */
+ UWORD32 u4_chroma_cost_weighing_factor;
+ /**
+ * Frame level QP to be used
+ */
+ WORD32 i4_frame_qp;
+
+ WORD32 i4_frame_mod_qp;
+
+ WORD32 i4_frame_qstep;
+
+ UWORD8 u1_max_tr_depth;
+
+ /**
+ * CU level Qp
+ */
+ WORD32 i4_cu_qp;
+
+ /**
+ * CU level Qp / 6
+ */
+ WORD32 i4_cu_qp_div6;
+
+ /**
+ * CU level Qp % 6
+ */
+ WORD32 i4_cu_qp_mod6;
+
+ /**
+ * CU level QP to be used
+ */
+ WORD32 i4_chrm_cu_qp;
+
+ /**
+ * CU level Qp / 6
+ */
+ WORD32 i4_chrm_cu_qp_div6;
+
+ /**
+ * CU level Qp % 6
+ */
+ WORD32 i4_chrm_cu_qp_mod6;
+
+ /** previous cu qp
+ * @remarks : This needs to be remembered to handle skip cases in deblocking.
+ */
+ WORD32 i4_prev_cu_qp;
+
+ /** chroma qp offset
+ * @remarks : Used to calculate chroma qp and other qp related parameter at CU level
+ */
+ WORD32 i4_chroma_qp_offset;
+
+ /**
+ * Buffer Pointer to populate the scale matrix for all transform size
+ */
+ WORD16 *pi2_scal_mat;
+
+ /**
+ * Buffer Pointer to populate the rescale matrix for all transform size
+ */
+ WORD16 *pi2_rescal_mat;
+
+ /** array of pointer to store the scaling matrices for
+ * all transform sizes and qp % 6 (pre computed)
+ */
+ WORD16 *api2_scal_mat[NUM_TRANS_TYPES * 2];
+
+ /** array of pointer to store the re-scaling matrices for
+ * all transform sizes and qp % 6 (pre computed)
+ */
+ WORD16 *api2_rescal_mat[NUM_TRANS_TYPES * 2];
+
+ /** array of function pointers for residual and
+ * forward transform for all transform sizes
+ */
+ pf_res_trans_luma apf_resd_trns[NUM_TRANS_TYPES];
+
+ /** array of function pointers for residual and
+ * forward HAD transform for all transform sizes
+ */
+ pf_res_trans_luma_had_chroma apf_chrm_resd_trns_had[NUM_TRANS_TYPES - 2];
+
+ /** array of function pointers for residual and
+ * forward transform for all transform sizes
+ * for chroma
+ */
+ pf_res_trans_chroma apf_chrm_resd_trns[NUM_TRANS_TYPES - 2];
+
+ /** array of function pointers for qunatization and
+ * inv Quant for ssd calc. for all transform sizes
+ */
+ pf_quant_iquant_ssd apf_quant_iquant_ssd[4];
+
+ /** array of function pointers for inv.transform and
+ * recon for all transform sizes
+ */
+ pf_it_recon apf_it_recon[NUM_TRANS_TYPES];
+
+ /** array of function pointers for inverse transform
+ * and recon for all transform sizes for chroma
+ */
+ pf_it_recon_chroma apf_chrm_it_recon[NUM_TRANS_TYPES - 2];
+
+ /** array of luma intra prediction function pointers */
+ pf_intra_pred apf_lum_ip[NUM_IP_FUNCS];
+
+ /** array of chroma intra prediction function pointers */
+ pf_intra_pred apf_chrm_ip[NUM_IP_FUNCS];
+
+ /* - Function pointer to cu_mode_decide function */
+ /* - The 'void *' is used since one of the parameters of */
+ /* this class of functions is the current structure */
+ /* - This function pointer is used to choose the */
+ /* appropriate function depending on whether bit_depth is */
+ /* chosen as 8 bits or greater */
+ /* - This function pointer's type is defined at the end */
+ /* of this file */
+ void *pv_cu_mode_decide;
+
+ /* Infer from the comment for the variable 'pv_cu_mode_decide' */
+ void *pv_inter_rdopt_cu_mc_mvp;
+
+ /* Infer from the comment for the variable 'pv_cu_mode_decide' */
+ void *pv_inter_rdopt_cu_ntu;
+
+ /* Infer from the comment for the variable 'pv_cu_mode_decide' */
+ void *pv_intra_chroma_pred_mode_selector;
+
+ /* Infer from the comment for the variable 'pv_cu_mode_decide' */
+ void *pv_intra_rdopt_cu_ntu;
+
+ /* Infer from the comment for the variable 'pv_cu_mode_decide' */
+ void *pv_final_rdopt_mode_prcs;
+
+ /* Infer from the comment for the variable 'pv_cu_mode_decide' */
+ void *pv_store_cu_results;
+
+ /* Infer from the comment for the variable 'pv_cu_mode_decide' */
+ void *pv_enc_loop_cu_bot_copy;
+
+ /* Infer from the comment for the variable 'pv_cu_mode_decide' */
+ void *pv_final_mode_reevaluation_with_modified_cu_qp;
+
+ /* Infer from the comment for the variable 'pv_cu_mode_decide' */
+ void *pv_enc_loop_ctb_left_copy;
+
+ /** Qunatization rounding factor for inter and intra CUs */
+ WORD32 i4_quant_rnd_factor[2];
+
+ /**
+ * Frame Buffer Pointer to store the top row luma data.
+ * one pixel row in every ctb row
+ */
+ void *apv_frm_top_row_luma[MAX_NUM_ENC_LOOP_PARALLEL];
+
+ /**
+ * One CTB row size of Top row luma data buffer
+ */
+ WORD32 i4_top_row_luma_stride;
+
+ /**
+ * One frm of Top row luma data buffer
+ */
+ WORD32 i4_frm_top_row_luma_size;
+
+ /**
+ * Current luma row bottom data store pointer
+ */
+ void *pv_bot_row_luma;
+
+ /**
+ * Top luma row top data access pointer
+ */
+ void *pv_top_row_luma;
+
+ /**
+ * Frame Buffer Pointer to store the top row chroma data (Cb Cr pixel interleaved )
+ * one pixel row in every ctb row
+ */
+ void *apv_frm_top_row_chroma[MAX_NUM_ENC_LOOP_PARALLEL];
+
+ /**
+ * One CTB row size of Top row chroma data buffer (Cb Cr pixel interleaved )
+ */
+ WORD32 i4_top_row_chroma_stride;
+
+ /**
+ * One frm size of Top row chroma data buffer (Cb Cr pixel interleaved )
+ */
+ WORD32 i4_frm_top_row_chroma_size;
+
+ /**
+ * Current chroma row bottom data store pointer
+ */
+ void *pv_bot_row_chroma;
+
+ /**
+ * Top chroma row top data access pointer
+ */
+ void *pv_top_row_chroma;
+
+ /**
+ * Frame Buffer Pointer to store the top row neighbour modes stored at 4x4 level
+ * one 4x4 row in every ctb row
+ */
+ nbr_4x4_t *aps_frm_top_row_nbr[MAX_NUM_ENC_LOOP_PARALLEL];
+
+ /**
+ * One CTB row size of Top row nbr 4x4 params buffer
+ */
+ WORD32 i4_top_row_nbr_stride;
+
+ /**
+ * One frm size of Top row nbr 4x4 params buffer
+ */
+ WORD32 i4_frm_top_row_nbr_size;
+
+ /**
+ * Current row nbr prms bottom data store pointer
+ */
+ nbr_4x4_t *ps_bot_row_nbr;
+
+ /**
+ * Top row nbr prms top data access pointer
+ */
+ nbr_4x4_t *ps_top_row_nbr;
+
+ /**
+ * Pointer to (1,1) location in au1_nbr_ctb_map
+ */
+ UWORD8 *pu1_ctb_nbr_map;
+
+ /**
+ * neigbour map buffer stride;
+ */
+ WORD32 i4_nbr_map_strd;
+
+ /**
+ * Array at ctb level to store the neighour map
+ * its size is 25x25 for ctb size of 64x64
+ */
+ UWORD8 au1_nbr_ctb_map[MAX_PU_IN_CTB_ROW + 1 + 8][MAX_PU_IN_CTB_ROW + 1 + 8];
+
+ /**
+ * Array to store left ctb data for luma
+ * some padding is added to take care of unconditional access
+ */
+ void *pv_left_luma_data;
+
+ /**
+ * Array to store left ctb data for chroma (cb abd cr pixel interleaved
+ * some padding is added to take care of unconditional access
+ */
+ void *pv_left_chrm_data;
+
+ /**
+ * Array to store the left neighbour modes at 4x4 level
+ */
+ nbr_4x4_t as_left_col_nbr[MAX_PU_IN_CTB_ROW];
+
+ /**
+ * Array to store currrent CTb pred modes at a 4x4 level
+ * used for prediction inside ctb
+ */
+ nbr_4x4_t as_ctb_nbr_arr[MAX_PU_IN_CTB_ROW * MAX_PU_IN_CTB_ROW];
+
+ /**
+ * array for storing csbf during RD opt stage at CU level
+ * one best and one current is required
+ */
+ UWORD8 au1_cu_csbf[MAX_TU_IN_CTB_ROW * MAX_TU_IN_CTB_ROW];
+
+ /**
+ * Stride of csbf buffer. will be useful for scanning access
+ * if stored in a 2D order. right now set to max tx size >> 4;
+ */
+ WORD32 i4_cu_csbf_strd;
+
+ /**
+ * Array to store pred modes during SATD and RD opt stage at CU level
+ * one best and one current is required
+ */
+ nbr_4x4_t as_cu_nbr[2][MAX_PU_IN_CTB_ROW * MAX_PU_IN_CTB_ROW];
+
+ /**
+ * array to store the output of reference substitution process output
+ * for intra CUs
+ * TOP (32 x 2) + Left (32 x 2) + Top left (1) + Alignment (3)
+ */
+ void *pv_ref_sub_out;
+
+ /**
+ * array to store the filtered reference samples for intra CUs
+ * TOP (32 x 2) + Left (32 x 2) + Top left (1) + Alignment (3)
+ */
+ void *pv_ref_filt_out;
+
+ /**
+ * Used for 3 purposes
+ *
+ * 1. MC Intermediate buffer
+ * array for storing intermediate 16-bit value for hxhy subpel
+ * generation at CTB level (+ 16) for subpel planes boundary
+ * +4 is for horizontal 4pels
+ *
+ * 2. Temprory scratch buffer for transform and coeffs storage
+ * MAX_TRANS_SIZE *2 for trans_scratch(32bit) and MAX_TRANS_SIZE *1 for trans_values
+ * The first part i.e. from 0 to MAX_TRANS_SIZE is then reused for storing the quant coeffs
+ * Max of both are used
+ *
+ * 3. MC Intermediate buffer
+ * buffer for storing intermediate 16 bit values prior to conversion to 8bit in HBD
+ *
+ */
+ MEM_ALIGN16 WORD16 ai2_scratch[(MAX_CTB_SIZE + 8 + 8) * (MAX_CTB_SIZE + 8 + 8 + 8) * 2];
+
+ /**
+ * array for storing cu level final params for a given mode
+ * one best and one current is required
+ */
+ enc_loop_cu_final_prms_t as_cu_prms[2];
+
+ /**
+ * Scan index to be used for any gien transform
+ * this is a scartch variable used to communicate
+ * scan idx at every transform level
+ */
+ WORD32 i4_scan_idx;
+
+ /**
+ * Buffer index in ping pong buffers
+ * to be used SATD mode evaluations
+ */
+ WORD32 i4_satd_buf_idx;
+
+ /**
+ * Motion Compensation module context structre
+ */
+ inter_pred_ctxt_t s_mc_ctxt;
+
+ /**
+ * MV pred module context structre
+ */
+ mv_pred_ctxt_t s_mv_pred_ctxt;
+
+ /**
+ * Deblock BS ctb structure
+ */
+ deblk_bs_ctb_ctxt_t s_deblk_bs_prms;
+
+ /**
+ * Deblocking ctb structure
+ */
+ deblk_ctb_params_t s_deblk_prms;
+
+ /**
+ * Deblocking structure. For ctb-row level
+ */
+ deblk_ctbrow_prms_t s_deblk_ctbrow_prms;
+
+ /**
+ * Deblocking enable flag
+ */
+ WORD32 i4_deblock_type;
+
+ /**
+ * context for cabac bit estimation used during rdopt stage
+ */
+ rdopt_entropy_ctxt_t s_rdopt_entropy_ctxt;
+
+ /**
+ * Context models stored for RDopt store and restore purpose
+ */
+ UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END];
+
+ /**
+ * current picture slice type
+ */
+ WORD8 i1_slice_type;
+
+ /**
+ * strong_intra_smoothing_enable_flag
+ */
+ WORD8 i1_strong_intra_smoothing_enable_flag;
+
+ /** Pointer to Dep Mngr for controlling Top-Right CU dependency */
+ void *pv_dep_mngr_enc_loop_cu_top_right;
+
+ /** Pointer to Dep Mngr for controlling Deblocking Top dependency */
+ void *pv_dep_mngr_enc_loop_dblk;
+
+ /** pointer to store the cabac states at end of second CTB in current row */
+ UWORD8 *pu1_curr_row_cabac_state;
+
+ /** pointer to copy the cabac states at start of first CTB in current row */
+ UWORD8 *pu1_top_rt_cabac_state;
+ /** flag to indicate rate control mode.
+ * @remarks : To enable CU level qp modulation only when required.
+ */
+ WORD8 i1_cu_qp_delta_enable;
+
+ /** flag to indicate rate control mode.
+ * @remarks : Entropy sync enable flag
+ */
+ WORD8 i1_entropy_coding_sync_enabled_flag;
+
+ /** Use SATD or SAD for best merge candidate evaluation */
+ WORD32 i4_use_satd_for_merge_eval;
+
+ UWORD8 u1_use_early_cbf_data;
+
+ /** Use SATD or SAD for best CU merge candidate evaluation */
+ WORD32 i4_use_satd_for_cu_merge;
+
+ /** Maximum number of merge candidates to be evaluated */
+ WORD32 i4_max_merge_candidates;
+
+ /** Flag to indicate whether current pictute needs to be deblocked,
+ padded and hpel planes need to be generated.
+ These are turned off typically in non referecne pictures when psnr
+ and recon dump is disabled
+ */
+ WORD32 i4_deblk_pad_hpel_cur_pic;
+
+ /* Array of structures for storing mc predicted data for
+ * merge and skip modes
+ */
+ merge_skip_pred_data_t as_merge_skip_pred_data[MAX_NUM_CU_MERGE_SKIP_CAND];
+
+ /* Sum the Qps of each 8*8 block in CU
+ * 8*8 block is considered as Min CU size possible as per standard is 8
+ * 0 corresponds to INTER and 1 corresponds to INTRA
+ */
+ LWORD64 i8_cl_ssd_lambda_qf_array[MAX_HEVC_QP_12bit + 1];
+ UWORD32 au4_chroma_cost_weighing_factor_array[MAX_HEVC_QP_12bit + 1];
+ LWORD64 i8_cl_ssd_lambda_chroma_qf_array[MAX_HEVC_QP_12bit + 1];
+ WORD32 i4_satd_lamda_array[MAX_HEVC_QP_12bit + 1];
+ WORD32 i4_sad_lamda_array[MAX_HEVC_QP_12bit + 1];
+
+ /************************************************************************/
+ /* The fields with the string 'type2' in their names are required */
+ /* when both 8bit and hbd lambdas are needed. The lambdas corresponding */
+ /* to the bit_depth != internal_bit_depth are stored in these fields */
+ /************************************************************************/
+ LWORD64 i8_cl_ssd_type2_lambda_qf_array[MAX_HEVC_QP_12bit + 1];
+ LWORD64 i8_cl_ssd_type2_lambda_chroma_qf_array[MAX_HEVC_QP_12bit + 1];
+ WORD32 i4_satd_type2_lamda_array[MAX_HEVC_QP_12bit + 1];
+ WORD32 i4_sad_type2_lamda_array[MAX_HEVC_QP_12bit + 1];
+
+ /* Lokesh: Added to find if the CU is the first to be coded in the group */
+ WORD32 i4_is_first_cu_qg_coded;
+
+ /* Chroma RDOPT related parameters */
+ ihevce_chroma_rdopt_ctxt_t s_chroma_rdopt_ctxt;
+
+ /* Structure to save pred data of ME/Intra cand */
+ cu_me_intra_pred_prms_t s_cu_me_intra_pred_prms;
+
+ /* Structure to save the flags required for Final mode Reconstruction
+ function. These flags are set based on quality presets and bit-rate
+ we are working on */
+ cu_final_recon_flags_t s_cu_final_recon_flags;
+
+ /* Parameter to how at which level RDOQ will be implemented:
+ 0 - RDOQ disbaled
+ 1 - RDOQ enabled during RDOPT for all candidates
+ 2 - RDOQ enabled only for the final candidate*/
+ WORD32 i4_rdoq_level;
+
+ /* Parameter to how at which level Quant rounding factors are computed:
+ FIXED_QUANT_ROUNDING : Fixed Quant rounding values are used
+ NCTB_LEVEL_QUANT_ROUNDING : NCTB level Cmputed Quant rounding values are used
+ CTB_LEVEL_QUANT_ROUNDING : CTB level Cmputed Quant rounding values are used
+ CU_LEVEL_QUANT_ROUNDING : CU level Cmputed Quant rounding values are used
+ TU_LEVEL_QUANT_ROUNDING : TU level Cmputed Quant rounding values are used*/
+ WORD32 i4_quant_rounding_level;
+
+ /* Parameter to how at which level Quant rounding factors are computed:
+ CHROMA_QUANT_ROUNDING : Chroma Quant rounding values are used for chroma */
+ WORD32 i4_chroma_quant_rounding_level;
+
+ /* Parameter to how at which level RDOQ will be implemented:
+ 0 - SBH disbaled
+ 1 - SBH enabled during RDOPT for all candidates
+ 2 - SBH enabled only for the final candidate*/
+ WORD32 i4_sbh_level;
+
+ /* Parameter to how at which level ZERO CBF RDO will be implemented:
+ 0 - ZCBF disbaled
+ 1 - ZCBF enabled during RDOPT for all candidates
+ 2 - ZCBF enabled only for the final candidate
+ */
+ WORD32 i4_zcbf_rdo_level;
+
+ /*RDOQ-SBH context structure*/
+ rdoq_sbh_ctxt_t s_rdoq_sbh_ctxt;
+
+ /** Structure to store the Merge/Skip Cand. for EncLoop */
+ cu_inter_merge_skip_t s_cu_inter_merge_skip;
+ /** Structure to store the Mixed mode Cand. for EncLoop */
+ cu_mixed_mode_inter_t s_mixed_mode_inter_cu;
+
+ ihevce_inter_pred_buf_data_t s_pred_buf_data;
+
+ void *pv_422_chroma_intra_pred_buf;
+
+ WORD32 i4_max_num_inter_rdopt_cands;
+
+ /* Output Struct per each CU during recursions */
+ ihevce_enc_cu_node_ctxt_t as_enc_cu_ctxt[MAX_CU_IN_CTB + 1];
+
+ /* Used to store best inter candidate. Used only when */
+ /* 'CU modulated QP override' is enabled */
+ cu_inter_cand_t as_best_cand[MAX_CU_IN_CTB + 1];
+
+ cu_inter_cand_t *ps_best_cand;
+
+ UWORD8 au1_cu_init_cabac_state_a_priori[MAX_CU_IN_CTB + 1][IHEVC_CAB_CTXT_END];
+
+ UWORD8 (*pau1_curr_cu_a_priori_cabac_state)[IHEVC_CAB_CTXT_END];
+
+ /* Used to store pred data of each CU in the CTB. */
+ /* Used only when 'CU modulated QP override' is enabled */
+ void *pv_CTB_pred_luma;
+
+ void *pv_CTB_pred_chroma;
+
+ /**
+ * array for storing recon during SATD and RD opt stage at CU level
+ * one best and one current is required.Luma and chroma together
+ */
+ void *pv_cu_luma_recon;
+
+ /**
+ * array for storing recon during SATD and RD opt stage at CU level
+ * one best and one current is required.Luma and chroma together
+ */
+ void *pv_cu_chrma_recon;
+
+ /**
+ * Array to store pred modes during SATD and RD opt stage at CU level
+ * one best and one current is required
+ */
+ nbr_4x4_t as_cu_recur_nbr[MAX_PU_IN_CTB_ROW * MAX_PU_IN_CTB_ROW];
+
+ /**
+ * Pointer to Array to store pred modes during SATD and RD opt stage at CU level
+ * one best and one current is required
+ */
+ nbr_4x4_t *ps_cu_recur_nbr;
+
+ /**
+ * Context models stored for CU recursion parent evaluation
+ */
+ UWORD8 au1_rdopt_recur_ctxt_models[4][IHEVC_CAB_CTXT_END];
+
+ ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt;
+
+ /**
+ * array for storing coeffs during RD opt stage at CU level
+ * one best and one current is required. Luma and chroma together
+ */
+ /*UWORD8 au1_cu_recur_coeffs[MAX_LUMA_COEFFS_CTB + MAX_CHRM_COEFFS_CTB];*/
+
+ UWORD8 *pu1_cu_recur_coeffs;
+
+ UWORD8 *apu1_cu_level_pingpong_coeff_buf_addr[2];
+
+ WORD16 *api2_cu_level_pingpong_deq_buf_addr[2];
+
+ UWORD8 *pu1_ecd_data;
+
+ /* OPT: flag to skip parent CU=4TU eval during recursion */
+ UWORD8 is_parent_cu_rdopt;
+
+ /**
+ * Array of structs containing block merge data for
+ * 4 32x32 CU's in indices 1 - 4 and 64x64 CU at 0
+ */
+ UWORD8 u1_cabac_states_next_row_copied_flag;
+
+ UWORD8 u1_cabac_states_first_cu_copied_flag;
+
+ UWORD32 u4_cur_ctb_wd;
+
+ UWORD32 u4_cur_ctb_ht;
+
+ /* thread id of the current context */
+ WORD32 thrd_id;
+
+ /** Number of processing threads created run time */
+ WORD32 i4_num_proc_thrds;
+
+ /* Instance number of bit-rate for multiple bit-rate encode */
+ WORD32 i4_bitrate_instance_num;
+
+ WORD32 i4_num_bitrates;
+
+ WORD32 i4_enc_frm_id;
+
+ /* Flag to indicate if chroma needs to be considered for cost calculation */
+ WORD32 i4_consider_chroma_cost;
+
+ /* Number of modes to be evaluated for intra */
+ WORD32 i4_num_modes_to_evaluate_intra;
+
+ /* Number of modes to be evaluated for inter */
+ WORD32 i4_num_modes_to_evaluate_inter;
+ /*pointers for struct to hold RC parameters for each bit-rate instance */
+ enc_loop_rc_params_t
+ *aaps_enc_loop_rc_params[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
+
+ /** Pointer to structure containing function pointers of common*/
+ func_selector_t *ps_func_selector;
+
+ /* Flag to control Top Right Sync for during Merge */
+ UWORD8 u1_use_top_at_ctb_boundary;
+
+ UWORD8 u1_is_input_data_hbd;
+
+ UWORD8 u1_bit_depth;
+
+ /* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
+ UWORD8 u1_chroma_array_type;
+
+ rc_quant_t *ps_rc_quant_ctxt;
+
+ sao_ctxt_t s_sao_ctxt_t;
+
+ /* Offset to get the Qp for the last CU of upper CTB-row.
+ This offset is from the current tile top row QP map start.
+ This will only be consumed by the first CU of current CTB-row
+ iff [it is skip && entropy sync is off] */
+ WORD32 *pi4_offset_for_last_cu_qp;
+
+ double i4_lamda_modifier;
+ double i4_uv_lamda_modifier;
+ WORD32 i4_temporal_layer_id;
+
+ UWORD8 u1_disable_intra_eval;
+
+ WORD32 i4_quant_round_tu[2][32 * 32];
+
+ WORD32 *pi4_quant_round_factor_tu_0_1[5];
+ WORD32 *pi4_quant_round_factor_tu_1_2[5];
+
+ WORD32 i4_quant_round_4x4[2][4 * 4];
+ WORD32 i4_quant_round_8x8[2][8 * 8];
+ WORD32 i4_quant_round_16x16[2][16 * 16];
+ WORD32 i4_quant_round_32x32[2][32 * 32];
+
+ WORD32 *pi4_quant_round_factor_cu_ctb_0_1[5];
+ WORD32 *pi4_quant_round_factor_cu_ctb_1_2[5];
+
+ WORD32 i4_quant_round_cr_4x4[2][4 * 4];
+ WORD32 i4_quant_round_cr_8x8[2][8 * 8];
+ WORD32 i4_quant_round_cr_16x16[2][16 * 16];
+
+ WORD32 *pi4_quant_round_factor_cr_cu_ctb_0_1[3];
+ WORD32 *pi4_quant_round_factor_cr_cu_ctb_1_2[3];
+ /* cost for not coding cu residue i.e forcing no residue syntax as 1 */
+ LWORD64 i8_cu_not_coded_cost;
+
+ /* dependency manager for forward ME sync */
+ void *pv_dep_mngr_encloop_dep_me;
+
+ LWORD64 ai4_source_satd_8x8[64];
+
+ LWORD64 ai4_source_chroma_satd[256];
+
+ UWORD8 u1_is_refPic;
+
+ WORD32 i4_qp_mod;
+
+ WORD32 i4_is_ref_pic;
+
+ WORD32 i4_chroma_format;
+
+ WORD32 i4_temporal_layer;
+
+ WORD32 i4_use_const_lamda_modifier;
+
+ double f_i_pic_lamda_modifier;
+
+ LWORD64 i8_distortion;
+
+ WORD32 i4_use_ctb_level_lamda;
+
+ float f_str_ratio;
+
+ /* Flag to indicate if current frame is to be shared with other clients.
+ Used only in distributed-encoding */
+ WORD32 i4_share_flag;
+
+ /* Pointer to the current recon being processed.
+ Needed for enabling TMVP in dist-encoding */
+ void *pv_frm_recon;
+
+ ihevce_cmn_opt_func_t s_cmn_opt_func;
+
+ /* The ME analogue to the struct above was not included since */
+ /* that would have entailed inclusion of all ME specific */
+ /* header files */
+ /*FT_SAD_EVALUATOR **/
+
+ /*FT_SAD_EVALUATOR **/
+ void *pv_evalsad_pt_npu_mxn_8bit;
+ UWORD8 u1_enable_psyRDOPT;
+
+ UWORD8 u1_is_stasino_enabled;
+
+ UWORD32 u4_psy_strength;
+ /*Sub PIC rc context */
+
+ WORD32 i4_sub_pic_level_rc;
+ WORD32 i4_num_ctb_for_out_scale;
+
+ /**
+ * Accumalated bits of all cu for required CTBS estimated during RDO evaluation.
+ * Required for sup pic level RC. Reset when required CU/CTB count is reached.
+ */
+ UWORD32 u4_total_cu_bits;
+
+ UWORD32 u4_total_cu_bits_mul_qs;
+
+ UWORD32 u4_total_cu_hdr_bits;
+
+ UWORD32 u4_cu_tot_bits_into_qscale;
+
+ UWORD32 u4_cu_tot_bits;
+
+ /*Scale added to the current qscale, output from sub pic rc*/
+ WORD32 i4_cu_qp_sub_pic_rc;
+
+ /*Frame level L1 IPE sad*/
+ LWORD64 i8_frame_l1_ipe_sad;
+
+ /*Frame level L0 IPE satd*/
+ LWORD64 i8_frame_l0_ipe_satd;
+
+ /*Frame level L1 ME sad*/
+ LWORD64 i8_frame_l1_me_sad;
+
+ /*Frame level L1 activity factor*/
+ LWORD64 i8_frame_l1_activity_fact;
+ /*bits esimated for frame calulated for sub pic rc bit control */
+ WORD32 ai4_frame_bits_estimated[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
+ /** I Scene cut */
+ WORD32 i4_is_I_scenecut;
+
+ /** Non Scene cut */
+ WORD32 i4_is_non_I_scenecut;
+
+ /** Frames for which online/offline model is not valid */
+ WORD32 i4_is_model_valid;
+
+ /** Steady State Frame */
+ //WORD32 i4_is_steady_state;
+
+ WORD32 i4_is_first_query;
+
+ /* Pointer to Tile params base */
+ void *pv_tile_params_base;
+
+ /** The index of column tile for which it is working */
+ WORD32 i4_tile_col_idx;
+
+ WORD32 i4_max_search_range_horizontal;
+
+ WORD32 i4_max_search_range_vertical;
+
+ WORD32 i4_is_ctb_qp_modified;
+
+ WORD32 i4_display_num;
+
+ WORD32 i4_pred_qp;
+
+ /*assumption of qg size is 8x8 block size*/
+ WORD32 ai4_qp_qg[8 * 8];
+
+ WORD32 i4_last_cu_qp_from_prev_ctb;
+
+ WORD32 i4_prev_QP;
+
+ UWORD8 u1_max_inter_tr_depth;
+
+ UWORD8 u1_max_intra_tr_depth;
+
+} ihevce_enc_loop_ctxt_t;
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/** @brief RDOQ_LEVELS_T: This enumeration specifies the RDOQ mode of operation
+*
+* NO_RDOQ : RDOQ is not performed
+* BEST_CAND_RDOQ : RDOQ for final candidate only
+* ALL_CAND_RDOQ : RDOQ for all candidates
+*/
+typedef enum
+{
+ NO_RDOQ,
+ BEST_CAND_RDOQ,
+ ALL_CAND_RDOQ,
+} RDOQ_LEVELS_T;
+
+/** @brief QUANT_ROUNDING_COEFF_LEVELS_T: This enumeration specifies the Coef level RDOQ mode of operation
+*
+* FIXED_QUANT_ROUNDING : Fixed Quant rounding values are used
+* NCTB_LEVEL_QUANT_ROUNDING : NCTB level Cmputed Quant rounding values are used
+* CTB_LEVEL_QUANT_ROUNDING : CTB level Cmputed Quant rounding values are used
+* CU_LEVEL_QUANT_ROUNDING : CU level Cmputed Quant rounding values are used
+* TU_LEVEL_QUANT_ROUNDING : TU level Cmputed Quant rounding values are used
+* Defaulat for all candidtes, based on RDOQ_LEVELS_T choose to best candidate
+*/
+typedef enum
+{
+ FIXED_QUANT_ROUNDING,
+ NCTB_LEVEL_QUANT_ROUNDING,
+ CTB_LEVEL_QUANT_ROUNDING,
+ CU_LEVEL_QUANT_ROUNDING,
+ TU_LEVEL_QUANT_ROUNDING,
+ CHROMA_QUANT_ROUNDING
+} QUANT_ROUNDING_COEFF_LEVELS_T;
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/** @brief SBH_LEVELS_T: This enumeration specifies the RDOQ mode of operation
+*
+* NO_SBH : SBH is not performed
+* BEST_CAND_SBH : SBH for final candidate only
+* ALL_CAND_SBH : SBH for all candidates
+*/
+typedef enum
+{
+ NO_SBH,
+ BEST_CAND_SBH,
+ ALL_CAND_SBH,
+} SBH_LEVELS_T;
+
+/** @brief ZCBF_LEVELS_T: This enumeration specifies the ZeroCBF RDO mode of operation
+*
+* NO_ZCBF : ZCBF RDO is not performed
+* ALL_CAND_ZCBF : ZCBF RDO for all candidates
+*/
+typedef enum
+{
+ NO_ZCBF,
+ ZCBF_ENABLE,
+} ZCBF_LEVELS_T;
+
+/**
+******************************************************************************
+* @brief Encode loop master context structure
+******************************************************************************
+*/
+typedef struct
+{
+ /** Array of encode loop structure */
+ ihevce_enc_loop_ctxt_t *aps_enc_loop_thrd_ctxt[MAX_NUM_FRM_PROC_THRDS_ENC];
+
+ /** Number of processing threads created run time */
+ WORD32 i4_num_proc_thrds;
+
+ /**
+ * Array of top row cu skip flags (1 bit per 8x8CU)
+ */
+ UWORD8 au1_cu_skip_top_row[HEVCE_MAX_WIDTH >> 6];
+
+ /** Context models stored at the end of second CTB in a row)
+ * stored in packed form pState[bits6-1] | MPS[bit0]
+ * for each CTB row
+ * using entropy sync model in RD opt
+ */
+ UWORD8 au1_ctxt_models[MAX_NUM_CTB_ROWS_FRM][IHEVC_CAB_CTXT_END];
+
+ /** Dependency manager for controlling EncLoop Top-Right CU dependency
+ * One per each bit-rate and one per each frame in parallel
+ */
+ void *aapv_dep_mngr_enc_loop_cu_top_right[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
+
+ /** Dependency manager for controlling Deblocking Top dependency
+ * One per each bit-rate and one per each frame in parallel
+ */
+ void *aapv_dep_mngr_enc_loop_dblk[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
+
+ /** number of bit-rate instances running */
+ WORD32 i4_num_bitrates;
+
+ /** number of enc frames running in parallel */
+ WORD32 i4_num_enc_loop_frm_pllel;
+
+ /* Pointer to Tile params base */
+ void *pv_tile_params_base;
+ /* Offset to get the Qp for the last CU of upper CTB-row.
+ This offset is from the current tile top row QP map start.
+
+ This will only be consumed by the first CU of current CTB-row
+ iff [it is skip && entropy sync is off]
+ There is one entry of every tile-column bcoz offset remains constant
+ for all tiles lying in a tile-column */
+ WORD32 ai4_offset_for_last_cu_qp[MAX_TILE_COLUMNS];
+} ihevce_enc_loop_master_ctxt_t;
+
+/**
+******************************************************************************
+* @brief This struct is used for storing data required by the block merge
+* function
+******************************************************************************
+*/
+typedef struct
+{
+ block_data_8x8_t *ps_8x8_data;
+
+ block_data_16x16_t *ps_16x16_data;
+
+ block_data_32x32_t *ps_32x32_data;
+
+ block_data_64x64_t *ps_64x64_data;
+
+ part_type_results_t **ps_32x32_results;
+
+ cur_ctb_cu_tree_t *ps_cu_tree;
+
+ ipe_l0_ctb_analyse_for_me_t *ps_cur_ipe_ctb;
+
+ mv_pred_ctxt_t *ps_mv_pred_ctxt;
+
+ recon_pic_buf_t *(*aps_ref_list)[HEVCE_MAX_REF_PICS * 2];
+
+ nbr_4x4_t *ps_top_nbr_4x4;
+
+ nbr_4x4_t *ps_left_nbr_4x4;
+
+ nbr_4x4_t *ps_curr_nbr_4x4;
+
+ UWORD8 *pu1_inp;
+
+ UWORD8 *pu1_ctb_nbr_map;
+
+ WORD32 i4_nbr_map_strd;
+
+ WORD32 inp_stride;
+
+ WORD32 i4_ctb_x_off;
+
+ WORD32 i4_ctb_y_off;
+
+ WORD32 use_satd_for_err_calc;
+
+ WORD32 lambda;
+
+ WORD32 lambda_q_shift;
+
+ WORD32 frm_qstep;
+
+ WORD32 num_4x4_in_ctb;
+
+ UWORD8 *pu1_wkg_mem;
+
+ UWORD8 **ppu1_pred;
+
+ UWORD8 u1_bidir_enabled;
+
+ UWORD8 u1_max_tr_depth;
+
+ WORD32 i4_ctb_pos;
+
+ WORD32 i4_ctb_size;
+
+ UWORD8 *apu1_wt_inp[MAX_REFS_SEARCHABLE + 1];
+
+ /** Pointer of Dep Mngr for EncLoop Top-Right CU dependency */
+ void *pv_dep_mngr_enc_loop_cu_top_right;
+ /** The current cu row no. for Dep Manager to Check */
+ WORD32 i4_dep_mngr_cur_cu_row_no;
+ /** The Top cu row no. for Dep Manager to Check */
+ WORD32 i4_dep_mngr_top_cu_row_no;
+
+ WORD8 i1_quality_preset;
+
+ /* Flag to control Top Right Sync for during Merge */
+ UWORD8 u1_use_top_at_ctb_boundary;
+
+} block_merge_input_t;
+
+/* Structure which stores the info regarding the TU's present in the CU*/
+typedef struct tu_prms_t
+{
+ UWORD8 u1_tu_size;
+
+ UWORD8 u1_x_off;
+
+ UWORD8 u1_y_off;
+
+ WORD32 i4_tu_cost;
+
+ WORD32 i4_early_cbf;
+
+} tu_prms_t;
+
+typedef struct
+{
+ cu_enc_loop_out_t **pps_cu_final;
+
+ pu_t **pps_row_pu;
+
+ tu_enc_loop_out_t **pps_row_tu;
+
+ UWORD8 **ppu1_row_ecd_data;
+
+ WORD32 *pi4_num_pus_in_ctb;
+
+ WORD32 *pi4_last_cu_pos_in_ctb;
+
+ WORD32 *pi4_last_cu_size;
+
+ UWORD8 *pu1_num_cus_in_ctb_out;
+
+} cu_final_update_prms;
+
+typedef struct
+{
+ cu_nbr_prms_t *ps_cu_nbr_prms;
+
+ cu_inter_cand_t *ps_best_inter_cand;
+
+ enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms;
+
+ WORD32 packed_pred_mode;
+
+ WORD32 rd_opt_best_idx;
+
+ void *pv_src;
+
+ WORD32 src_strd;
+
+ void *pv_pred;
+
+ WORD32 pred_strd;
+
+ void *pv_pred_chrm;
+
+ WORD32 pred_chrm_strd;
+
+ UWORD8 *pu1_final_ecd_data;
+
+ UWORD8 *pu1_csbf_buf;
+
+ WORD32 csbf_strd;
+
+ void *pv_luma_recon;
+
+ WORD32 recon_luma_strd;
+
+ void *pv_chrm_recon;
+
+ WORD32 recon_chrma_strd;
+
+ UWORD8 u1_cu_pos_x;
+
+ UWORD8 u1_cu_pos_y;
+
+ UWORD8 u1_cu_size;
+
+ WORD8 i1_cu_qp;
+
+ UWORD8 u1_will_cabac_state_change;
+
+ UWORD8 u1_recompute_sbh_and_rdoq;
+
+ UWORD8 u1_is_first_pass;
+
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ UWORD8 u1_is_cu_noisy;
+#endif
+
+} final_mode_process_prms_t;
+
+typedef struct
+{
+ cu_inter_cand_t s_best_cand;
+
+ /* The size is twice of what is required to ensure availability */
+ /* of adequate space for 'HBD' case */
+ UWORD8 au1_pred_luma[MAX_CU_SIZE * MAX_CU_SIZE * 2];
+
+ /* The size is twice of what is required to ensure availability */
+ /* of adequate space for 422 case */
+ UWORD8 au1_pred_chroma[MAX_CU_SIZE * MAX_CU_SIZE * 2];
+} final_mode_state_t;
+
+typedef struct
+{
+ cu_mixed_mode_inter_t *ps_mixed_modes_datastore;
+
+ cu_inter_cand_t *ps_me_cands;
+
+ cu_inter_cand_t *ps_merge_cands;
+
+ mv_pred_ctxt_t *ps_mv_pred_ctxt;
+
+ inter_pred_ctxt_t *ps_mc_ctxt;
+
+ UWORD8 *pu1_ctb_nbr_map;
+
+ void *pv_src;
+
+ nbr_4x4_t *ps_cu_nbr_buf;
+
+ nbr_4x4_t *ps_left_nbr_4x4;
+
+ nbr_4x4_t *ps_top_nbr_4x4;
+
+ nbr_4x4_t *ps_topleft_nbr_4x4;
+
+ WORD32 i4_ctb_nbr_map_stride;
+
+ WORD32 i4_src_strd;
+
+ WORD32 i4_nbr_4x4_left_strd;
+
+ UWORD8 u1_cu_size;
+
+ UWORD8 u1_cu_pos_x;
+
+ UWORD8 u1_cu_pos_y;
+
+ UWORD8 u1_num_me_cands;
+
+ UWORD8 u1_num_merge_cands;
+
+ UWORD8 u1_max_num_mixed_mode_cands_to_select;
+
+ UWORD8 u1_max_merge_candidates;
+
+ UWORD8 u1_use_satd_for_merge_eval;
+
+} ihevce_mixed_inter_modes_selector_prms_t;
+
+typedef struct
+{
+ LWORD64 i8_ssd;
+
+ LWORD64 i8_cost;
+
+#if ENABLE_INTER_ZCU_COST
+ LWORD64 i8_not_coded_cost;
+#endif
+
+ UWORD32 u4_sad;
+
+ WORD32 i4_bits;
+
+ WORD32 i4_num_bytes_used_for_ecd;
+
+ WORD32 i4_zero_col;
+
+ WORD32 i4_zero_row;
+
+ UWORD8 u1_cbf;
+
+ UWORD8 u1_reconBufId;
+
+ UWORD8 u1_is_valid_node;
+
+ UWORD8 u1_size;
+
+ UWORD8 u1_posx;
+
+ UWORD8 u1_posy;
+} tu_node_data_t;
+
+typedef struct tu_tree_node_t
+{
+ struct tu_tree_node_t *ps_child_node_tl;
+
+ struct tu_tree_node_t *ps_child_node_tr;
+
+ struct tu_tree_node_t *ps_child_node_bl;
+
+ struct tu_tree_node_t *ps_child_node_br;
+
+ tu_node_data_t s_luma_data;
+
+ /* 2 because of the 2 subTU's when input is 422 */
+ tu_node_data_t as_cb_data[2];
+
+ tu_node_data_t as_cr_data[2];
+
+ UWORD8 u1_is_valid_node;
+
+} tu_tree_node_t;
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+typedef LWORD64 (*pf_cu_mode_decide)(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ enc_loop_cu_prms_t *ps_cu_prms,
+ cu_analyse_t *ps_cu_analyse,
+ final_mode_state_t *ps_final_mode_state,
+ UWORD8 *pu1_ecd_data,
+ pu_col_mv_t *ps_col_pu,
+ UWORD8 *pu1_col_pu_map,
+ WORD32 col_start_pu_idx);
+
+typedef LWORD64 (*pf_inter_rdopt_cu_mc_mvp)(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ cu_inter_cand_t *ps_inter_cand,
+ WORD32 cu_size,
+ WORD32 cu_pos_x,
+ WORD32 cu_pos_y,
+ nbr_4x4_t *ps_left_nbr_4x4,
+ nbr_4x4_t *ps_top_nbr_4x4,
+ nbr_4x4_t *ps_topleft_nbr_4x4,
+ WORD32 nbr_4x4_left_strd,
+ WORD32 curr_buf_idx);
+
+typedef LWORD64 (*pf_inter_rdopt_cu_ntu)(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ enc_loop_cu_prms_t *ps_cu_prms,
+ void *pv_src,
+ WORD32 cu_size,
+ WORD32 cu_pos_x,
+ WORD32 cu_pos_y,
+ WORD32 curr_buf_idx,
+ enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
+ cu_inter_cand_t *ps_inter_cand,
+ cu_analyse_t *ps_cu_analyse,
+ WORD32 i4_alpha_stim_multiplier);
+
+typedef void (*pf_intra_chroma_pred_mode_selector)(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
+ cu_analyse_t *ps_cu_analyse,
+ WORD32 rd_opt_curr_idx,
+ WORD32 tu_mode,
+ WORD32 i4_alpha_stim_multiplier,
+ UWORD8 u1_is_cu_noisy);
+
+typedef LWORD64 (*pf_intra_rdopt_cu_ntu)(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ enc_loop_cu_prms_t *ps_cu_prms,
+ void *pv_pred_org,
+ WORD32 pred_strd_org,
+ enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
+ UWORD8 *pu1_luma_mode,
+ cu_analyse_t *ps_cu_analyse,
+ void *pv_curr_src,
+ void *pv_cu_left,
+ void *pv_cu_top,
+ void *pv_cu_top_left,
+ nbr_4x4_t *ps_left_nbr_4x4,
+ nbr_4x4_t *ps_top_nbr_4x4,
+ WORD32 nbr_4x4_left_strd,
+ WORD32 cu_left_stride,
+ WORD32 curr_buf_idx,
+ WORD32 func_proc_mode,
+ WORD32 i4_alpha_stim_multiplier);
+
+typedef void (*pf_final_rdopt_mode_prcs)(
+ ihevce_enc_loop_ctxt_t *ps_ctxt, final_mode_process_prms_t *ps_prms);
+
+typedef void (*pf_store_cu_results)(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ enc_loop_cu_prms_t *ps_cu_prms,
+ final_mode_state_t *ps_final_state);
+
+typedef void (*pf_enc_loop_cu_bot_copy)(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ enc_loop_cu_prms_t *ps_cu_prms,
+ ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt,
+ WORD32 curr_cu_pos_in_row,
+ WORD32 curr_cu_pos_in_ctb);
+
+typedef void (*pf_enc_loop_ctb_left_copy)(
+ ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_prms_t *ps_cu_prms);
+
+#endif /* _IHEVCE_ENC_LOOP_STRUCTS_H_ */
diff --git a/encoder/ihevce_enc_loop_utils.c b/encoder/ihevce_enc_loop_utils.c
new file mode 100644
index 0000000..7b67405
--- /dev/null
+++ b/encoder/ihevce_enc_loop_utils.c
@@ -0,0 +1,11278 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+******************************************************************************
+* \file ihevce_enc_loop_utils.c
+*
+* \brief
+* This file contains utility functions of Encode loop
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+*
+* List of Functions
+*
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+#include <limits.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_macros.h"
+#include "ihevc_debug.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+#include "ihevc_common_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_hle_interface.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_ipe_instr_set_router.h"
+#include "ihevce_decomp_pre_intra_structs.h"
+#include "ihevce_decomp_pre_intra_pass.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_nbr_avail.h"
+#include "ihevce_enc_loop_utils.h"
+#include "ihevce_sub_pic_rc.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_bs_compute_ctb.h"
+#include "ihevce_cabac_rdo.h"
+#include "ihevce_deblk.h"
+#include "ihevce_frame_process.h"
+#include "ihevce_rc_enc_structs.h"
+#include "hme_datatype.h"
+#include "hme_interface.h"
+#include "hme_common_defs.h"
+#include "hme_defs.h"
+#include "hme_common_utils.h"
+#include "ihevce_me_instr_set_router.h"
+#include "ihevce_enc_subpel_gen.h"
+#include "ihevce_inter_pred.h"
+#include "ihevce_mv_pred.h"
+#include "ihevce_mv_pred_merge.h"
+#include "ihevce_enc_loop_inter_mode_sifter.h"
+#include "ihevce_enc_cu_recursion.h"
+#include "ihevce_enc_loop_pass.h"
+#include "ihevce_common_utils.h"
+#include "ihevce_dep_mngr_interface.h"
+#include "ihevce_sao.h"
+#include "ihevce_tile_interface.h"
+#include "ihevce_profile.h"
+#include "ihevce_stasino_helpers.h"
+#include "ihevce_tu_tree_selector.h"
+
+/*****************************************************************************/
+/* Globals */
+/*****************************************************************************/
+
+extern UWORD16 gau2_ihevce_cabac_bin_to_bits[64 * 2];
+extern const UWORD8 gu1_hevce_scan4x4[3][16];
+extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc[4][16];
+extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc_tr4[16];
+extern const UWORD8 gu1_hevce_sigcoeff_ctxtinc_00[16];
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define ENABLE_ZERO_CBF 1
+#define DISABLE_RDOQ_INTRA 0
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+void *ihevce_tu_tree_update(
+ tu_prms_t *ps_tu_prms,
+ WORD32 *pnum_tu_in_cu,
+ WORD32 depth,
+ WORD32 tu_split_flag,
+ WORD32 tu_early_cbf,
+ WORD32 i4_x_off,
+ WORD32 i4_y_off)
+{
+ //WORD32 tu_split_flag = p_tu_split_flag[0];
+ WORD32 p_tu_split_flag[4];
+ WORD32 p_tu_early_cbf[4];
+
+ WORD32 tu_size = ps_tu_prms->u1_tu_size;
+
+ if(((tu_size >> depth) >= 16) && (tu_split_flag & 0x1))
+ {
+ if((tu_size >> depth) == 32)
+ {
+ /* Get the individual TU split flags */
+ p_tu_split_flag[0] = (tu_split_flag >> 16) & 0x1F;
+ p_tu_split_flag[1] = (tu_split_flag >> 11) & 0x1F;
+ p_tu_split_flag[2] = (tu_split_flag >> 6) & 0x1F;
+ p_tu_split_flag[3] = (tu_split_flag >> 1) & 0x1F;
+
+ /* Get the early CBF flags */
+ p_tu_early_cbf[0] = (tu_early_cbf >> 16) & 0x1F;
+ p_tu_early_cbf[1] = (tu_early_cbf >> 11) & 0x1F;
+ p_tu_early_cbf[2] = (tu_early_cbf >> 6) & 0x1F;
+ p_tu_early_cbf[3] = (tu_early_cbf >> 1) & 0x1F;
+ }
+ else
+ {
+ /* Get the individual TU split flags */
+ p_tu_split_flag[0] = ((tu_split_flag >> 4) & 0x1);
+ p_tu_split_flag[1] = ((tu_split_flag >> 3) & 0x1);
+ p_tu_split_flag[2] = ((tu_split_flag >> 2) & 0x1);
+ p_tu_split_flag[3] = ((tu_split_flag >> 1) & 0x1);
+
+ /* Get the early CBF flags */
+ p_tu_early_cbf[0] = ((tu_early_cbf >> 4) & 0x1);
+ p_tu_early_cbf[1] = ((tu_early_cbf >> 3) & 0x1);
+ p_tu_early_cbf[2] = ((tu_early_cbf >> 2) & 0x1);
+ p_tu_early_cbf[3] = ((tu_early_cbf >> 1) & 0x1);
+ }
+
+ ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
+ ps_tu_prms,
+ pnum_tu_in_cu,
+ depth + 1,
+ p_tu_split_flag[0],
+ p_tu_early_cbf[0],
+ i4_x_off,
+ i4_y_off);
+
+ ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
+ ps_tu_prms,
+ pnum_tu_in_cu,
+ depth + 1,
+ p_tu_split_flag[1],
+ p_tu_early_cbf[1],
+ (i4_x_off + (tu_size >> (depth + 1))),
+ i4_y_off);
+
+ ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
+ ps_tu_prms,
+ pnum_tu_in_cu,
+ depth + 1,
+ p_tu_split_flag[2],
+ p_tu_early_cbf[2],
+ i4_x_off,
+ (i4_y_off + (tu_size >> (depth + 1))));
+
+ ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
+ ps_tu_prms,
+ pnum_tu_in_cu,
+ depth + 1,
+ p_tu_split_flag[3],
+ p_tu_early_cbf[3],
+ (i4_x_off + (tu_size >> (depth + 1))),
+ (i4_y_off + (tu_size >> (depth + 1))));
+ }
+ else
+ {
+ if(tu_split_flag & 0x1)
+ {
+ /* This piece of code will be entered for the 8x8, if it is split
+ Update the 4 child TU's accordingly. */
+
+ (*pnum_tu_in_cu) += 4;
+
+ /* TL TU update */
+ ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
+
+ ps_tu_prms->u1_x_off = i4_x_off;
+
+ ps_tu_prms->u1_y_off = i4_y_off;
+
+ /* Early CBF is not done for 4x4 transforms */
+ ps_tu_prms->i4_early_cbf = 1;
+
+ ps_tu_prms++;
+
+ /* TR TU update */
+ ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
+
+ ps_tu_prms->u1_x_off = i4_x_off + (tu_size >> (depth + 1));
+
+ ps_tu_prms->u1_y_off = i4_y_off;
+
+ /* Early CBF is not done for 4x4 transforms */
+ ps_tu_prms->i4_early_cbf = 1;
+
+ ps_tu_prms++;
+
+ /* BL TU update */
+ ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
+
+ ps_tu_prms->u1_x_off = i4_x_off;
+
+ ps_tu_prms->u1_y_off = i4_y_off + (tu_size >> (depth + 1));
+
+ /* Early CBF is not done for 4x4 transforms */
+ ps_tu_prms->i4_early_cbf = 1;
+
+ ps_tu_prms++;
+
+ /* BR TU update */
+ ps_tu_prms->u1_tu_size = tu_size >> (depth + 1);
+
+ ps_tu_prms->u1_x_off = i4_x_off + (tu_size >> (depth + 1));
+
+ ps_tu_prms->u1_y_off = i4_y_off + (tu_size >> (depth + 1));
+
+ /* Early CBF is not done for 4x4 transforms */
+ ps_tu_prms->i4_early_cbf = 1;
+ }
+ else
+ {
+ /* Update the TU params */
+ ps_tu_prms->u1_tu_size = tu_size >> depth;
+
+ ps_tu_prms->u1_x_off = i4_x_off;
+
+ ps_tu_prms->u1_y_off = i4_y_off;
+
+ (*pnum_tu_in_cu)++;
+
+ /* Early CBF update for current TU */
+ ps_tu_prms->i4_early_cbf = tu_early_cbf & 0x1;
+ }
+ if((*pnum_tu_in_cu) < MAX_TU_IN_CTB)
+ {
+ ps_tu_prms++;
+
+ ps_tu_prms->u1_tu_size = tu_size;
+ }
+ }
+
+ return ps_tu_prms;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_compute_quant_rel_param \endif
+*
+* \brief
+* This function updates quantization related parameters like qp_mod_6 etc in
+* context according to new qp
+*
+* \date
+* 08/01/2013
+*
+* \author
+* Ittiam
+*
+* \return
+*
+* List of Functions
+*
+*
+******************************************************************************
+*/
+void ihevce_compute_quant_rel_param(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD8 i1_cu_qp)
+{
+ WORD32 i4_div_factor;
+
+ ps_ctxt->i4_chrm_cu_qp =
+ (ps_ctxt->u1_chroma_array_type == 2)
+ ? MIN(i1_cu_qp + ps_ctxt->i4_chroma_qp_offset, 51)
+ : gai1_ihevc_chroma_qp_scale[i1_cu_qp + ps_ctxt->i4_chroma_qp_offset + MAX_QP_BD_OFFSET];
+ ps_ctxt->i4_cu_qp_div6 = (i1_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
+ i4_div_factor = (i1_cu_qp + 3) / 6;
+ i4_div_factor = CLIP3(i4_div_factor, 3, 6);
+ ps_ctxt->i4_cu_qp_mod6 = (i1_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
+ ps_ctxt->i4_chrm_cu_qp_div6 = (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) / 6;
+ ps_ctxt->i4_chrm_cu_qp_mod6 = (ps_ctxt->i4_chrm_cu_qp + (6 * (ps_ctxt->u1_bit_depth - 8))) % 6;
+
+#define INTER_RND_QP_BY_6
+#ifdef INTER_RND_QP_BY_6
+ /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
+ {
+ ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] =
+ (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)6) + 0.5f);
+ }
+#else
+ /* quant factor without RDOQ is 1/6th of shift for inter : like in H264 */
+ ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER] = (1 << QUANT_ROUND_FACTOR_Q) / 3;
+#endif
+
+ if(ISLICE == ps_ctxt->i1_slice_type)
+ {
+ /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
+ ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
+ (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)3) + 0.5f);
+ }
+ else
+ {
+ if(0) /*TRAQO_EXT_ENABLE_ONE_THIRD_RND*/
+ {
+ /* quant factor without RDOQ is 1/3rd of shift for intra : like in H264 */
+ ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
+ (WORD32)(((1 << QUANT_ROUND_FACTOR_Q) / (float)3) + 0.5f);
+ }
+ else
+ {
+ /* quant factor without RDOQ is 1/6th of shift for intra in inter pic */
+ ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTRA] =
+ ps_ctxt->i4_quant_rnd_factor[PRED_MODE_INTER];
+ /* (1 << QUANT_ROUND_FACTOR_Q) / 6; */
+ }
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_populate_cl_cu_lambda_prms \endif
+*
+* \brief
+* Function whihc calculates the Lambda params for current picture
+*
+* \param[in] ps_enc_ctxt : encoder ctxt pointer
+* \param[in] ps_cur_pic_ctxt : current pic ctxt
+* \param[in] i4_cur_frame_qp : current pic QP
+* \param[in] first_field : is first field flag
+* \param[in] i4_temporal_lyr_id : Current picture layer id
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_populate_cl_cu_lambda_prms(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ frm_lambda_ctxt_t *ps_frm_lamda,
+ WORD32 i4_slice_type,
+ WORD32 i4_temporal_lyr_id,
+ WORD32 i4_lambda_type)
+{
+ WORD32 i4_curr_cu_qp, i4_curr_cu_qp_offset;
+ double lambda_modifier;
+ double lambda_uv_modifier;
+ double lambda;
+ double lambda_uv;
+
+ WORD32 i4_qp_bdoffset = 6 * (ps_ctxt->u1_bit_depth - 8);
+
+ /*Populate lamda modifier */
+ ps_ctxt->i4_lamda_modifier = ps_frm_lamda->lambda_modifier;
+ ps_ctxt->i4_uv_lamda_modifier = ps_frm_lamda->lambda_uv_modifier;
+ ps_ctxt->i4_temporal_layer_id = i4_temporal_lyr_id;
+
+ for(i4_curr_cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
+ i4_curr_cu_qp <= ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
+ i4_curr_cu_qp++)
+ {
+ WORD32 chroma_qp = (ps_ctxt->i4_chroma_format == IV_YUV_422SP_UV)
+ ? MIN(i4_curr_cu_qp, 51)
+ : gai1_ihevc_chroma_qp_scale[i4_curr_cu_qp + MAX_QP_BD_OFFSET];
+
+ i4_curr_cu_qp_offset = i4_curr_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
+
+ lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
+ lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
+
+ if((BSLICE == i4_slice_type) && (i4_temporal_lyr_id))
+ {
+ lambda_modifier = ps_frm_lamda->lambda_modifier *
+ CLIP3((((double)(i4_curr_cu_qp - 12)) / 6.0), 2.00, 4.00);
+ lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier *
+ CLIP3((((double)(chroma_qp - 12)) / 6.0), 2.00, 4.00);
+ }
+ else
+ {
+ lambda_modifier = ps_frm_lamda->lambda_modifier;
+ lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier;
+ }
+ if(ps_ctxt->i4_use_const_lamda_modifier)
+ {
+ if(ISLICE == ps_ctxt->i1_slice_type)
+ {
+ lambda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
+ lambda_uv_modifier = ps_ctxt->f_i_pic_lamda_modifier;
+ }
+ else
+ {
+ lambda_modifier = CONST_LAMDA_MOD_VAL;
+ lambda_uv_modifier = CONST_LAMDA_MOD_VAL;
+ }
+ }
+ switch(i4_lambda_type)
+ {
+ case 0:
+ {
+ i4_qp_bdoffset = 0;
+
+ lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
+ lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
+
+ lambda *= lambda_modifier;
+ lambda_uv *= lambda_uv_modifier;
+
+ ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
+ (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
+
+ ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
+ (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
+
+ ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
+ (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
+ if(ps_ctxt->i4_use_const_lamda_modifier)
+ {
+ ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
+ (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
+ }
+ else
+ {
+ ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
+ (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
+ }
+
+ ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
+ (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
+ ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset];
+
+ ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
+ ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset];
+
+ ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
+ ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset];
+
+ ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
+ ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset];
+
+ break;
+ }
+ case 1:
+ {
+ lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
+ lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
+
+ lambda *= lambda_modifier;
+ lambda_uv *= lambda_uv_modifier;
+
+ ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
+ (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
+
+ ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
+ (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
+
+ ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
+ (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
+ if(ps_ctxt->i4_use_const_lamda_modifier)
+ {
+ ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
+ (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
+ }
+ else
+ {
+ ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
+ (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
+ }
+ ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
+ (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
+ ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset];
+
+ ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
+ ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset];
+
+ ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
+ ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset];
+
+ ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
+ ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset];
+
+ break;
+ }
+ case 2:
+ {
+ lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bdoffset - 12)) / 3.0));
+ lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
+
+ lambda *= lambda_modifier;
+ lambda_uv *= lambda_uv_modifier;
+
+ ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
+ (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
+
+ ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_curr_cu_qp_offset] =
+ (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
+
+ ps_ctxt->i8_cl_ssd_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
+ (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
+
+ if(ps_ctxt->i4_use_const_lamda_modifier)
+ {
+ ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
+ (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
+ }
+ else
+ {
+ ps_ctxt->i4_satd_lamda_array[i4_curr_cu_qp_offset] =
+ (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
+ }
+ ps_ctxt->i4_sad_lamda_array[i4_curr_cu_qp_offset] =
+ (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
+
+ /* lambda corresponding to 8- bit, for metrics based on 8- bit ( Example 8bit SAD in encloop)*/
+ lambda = pow(2.0, (((double)(i4_curr_cu_qp - 12)) / 3.0));
+ lambda_uv = pow(2.0, (((double)(chroma_qp - 12)) / 3.0));
+
+ lambda *= lambda_modifier;
+ lambda_uv *= lambda_uv_modifier;
+
+ ps_ctxt->au4_chroma_cost_weighing_factor_array[i4_curr_cu_qp_offset] =
+ (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
+
+ ps_ctxt->i8_cl_ssd_type2_lambda_qf_array[i4_curr_cu_qp_offset] =
+ (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
+
+ ps_ctxt->i8_cl_ssd_type2_lambda_chroma_qf_array[i4_curr_cu_qp_offset] =
+ (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
+ if(ps_ctxt->i4_use_const_lamda_modifier)
+ {
+ ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
+ (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
+ }
+ else
+ {
+ ps_ctxt->i4_satd_type2_lamda_array[i4_curr_cu_qp_offset] =
+ (WORD32)(sqrt(lambda * 1.9) * (1 << LAMBDA_Q_SHIFT));
+ }
+
+ ps_ctxt->i4_sad_type2_lamda_array[i4_curr_cu_qp_offset] =
+ (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
+
+ break;
+ }
+ default:
+ {
+ /* Intended to be a barren wasteland! */
+ ASSERT(0);
+ }
+ }
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_get_cl_cu_lambda_prms \endif
+*
+* \brief
+* Function whihc calculates the Lambda params for current picture
+*
+* \param[in] ps_enc_ctxt : encoder ctxt pointer
+* \param[in] ps_cur_pic_ctxt : current pic ctxt
+* \param[in] i4_cur_frame_qp : current pic QP
+* \param[in] first_field : is first field flag
+* \param[in] i4_temporal_lyr_id : Current picture layer id
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_get_cl_cu_lambda_prms(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 i4_cur_cu_qp)
+{
+ WORD32 chroma_qp = (ps_ctxt->u1_chroma_array_type == 2)
+ ? MIN(i4_cur_cu_qp + ps_ctxt->i4_chroma_qp_offset, 51)
+ : gai1_ihevc_chroma_qp_scale
+ [i4_cur_cu_qp + ps_ctxt->i4_chroma_qp_offset + MAX_QP_BD_OFFSET];
+
+ /* closed loop ssd lambda is same as final lambda */
+ ps_ctxt->i8_cl_ssd_lambda_qf =
+ ps_ctxt->i8_cl_ssd_lambda_qf_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
+ ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
+ ps_ctxt
+ ->i8_cl_ssd_lambda_chroma_qf_array[chroma_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
+ ps_ctxt->u4_chroma_cost_weighing_factor =
+ ps_ctxt->au4_chroma_cost_weighing_factor_array
+ [chroma_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
+ /* --- Initialized the lambda for SATD computations --- */
+ /* --- 0.95 is the multiplication factor as per HM --- */
+ /* --- 1.9 is the multiplication factor for Hadamard Transform --- */
+ ps_ctxt->i4_satd_lamda =
+ ps_ctxt->i4_satd_lamda_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
+ ps_ctxt->i4_sad_lamda =
+ ps_ctxt->i4_sad_type2_lamda_array[i4_cur_cu_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_update_pred_qp \endif
+*
+* \brief
+* Computes pred qp for the given CU
+*
+* \param[in]
+*
+* \return
+*
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_update_pred_qp(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 cu_pos_x, WORD32 cu_pos_y)
+{
+ WORD32 i4_pred_qp = 0x7FFFFFFF;
+ WORD32 i4_top, i4_left;
+ if(cu_pos_x == 0 && cu_pos_y == 0) /*CTB start*/
+ {
+ i4_pred_qp = ps_ctxt->i4_prev_QP;
+ }
+ else
+ {
+ if(cu_pos_y == 0) /*CTB boundary*/
+ {
+ i4_top = ps_ctxt->i4_prev_QP;
+ }
+ else /*within CTB*/
+ {
+ i4_top = ps_ctxt->ai4_qp_qg[(cu_pos_y - 1) * 8 + (cu_pos_x)];
+ }
+ if(cu_pos_x == 0) /*CTB boundary*/
+ {
+ i4_left = ps_ctxt->i4_prev_QP;
+ }
+ else /*within CTB*/
+ {
+ i4_left = ps_ctxt->ai4_qp_qg[(cu_pos_y)*8 + (cu_pos_x - 1)];
+ }
+ i4_pred_qp = (i4_left + i4_top + 1) >> 1;
+ }
+ ps_ctxt->i4_pred_qp = i4_pred_qp;
+ return;
+}
+/*!
+******************************************************************************
+* \if Function name : ihevce_compute_cu_level_QP \endif
+*
+* \brief
+* Computes cu level QP with Traqo,Spatial Mod and In-frame RC
+*
+* \param[in]
+*
+* \return
+*
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_compute_cu_level_QP(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ WORD32 i4_activity_for_qp,
+ WORD32 i4_activity_for_lamda,
+ WORD32 i4_reduce_qp)
+{
+ /*modify quant related param in ctxt based on current cu qp*/
+ WORD32 i4_input_QP = ps_ctxt->i4_frame_mod_qp;
+ WORD32 cu_qp = i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
+
+ WORD32 i4_max_qp_allowed;
+ WORD32 i4_min_qp_allowed;
+ WORD32 i4_pred_qp;
+
+ i4_pred_qp = ps_ctxt->i4_pred_qp;
+
+ if(ps_ctxt->i4_sub_pic_level_rc)
+ {
+ i4_max_qp_allowed = (i4_pred_qp + (25 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 2)));
+ i4_min_qp_allowed = (i4_pred_qp - (26 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 2)));
+ }
+ else
+ {
+ i4_max_qp_allowed = (i4_input_QP + (7 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 4)));
+ i4_min_qp_allowed = (i4_input_QP - (18 + (ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset / 4)));
+ }
+ if((ps_ctxt->i1_slice_type == BSLICE) && (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6))
+ return;
+
+#if LAMDA_BASED_ON_QUANT
+ i4_activity_for_lamda = i4_activity_for_qp;
+#endif
+
+ if(i4_activity_for_qp != -1)
+ {
+ cu_qp = (ps_ctxt->ps_rc_quant_ctxt
+ ->pi4_qp_to_qscale[i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]);
+ if(ps_ctxt->i4_qp_mod)
+ {
+ /*Recompute the Qp as per enc thread's frame level Qp*/
+ ASSERT(i4_activity_for_qp > 0);
+ cu_qp = ((cu_qp * i4_activity_for_qp) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
+ QP_LEVEL_MOD_ACT_FACTOR;
+ }
+
+ // To avoid access of uninitialised Qscale to qp conversion table
+ if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale)
+ cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale;
+ else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale)
+ cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale;
+
+ cu_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[cu_qp];
+
+ if((1 == i4_reduce_qp) && (cu_qp > 1))
+ cu_qp--;
+
+ /*CLIP the delta to obey standard allowed QP variation of (-26 + offset/2) to (25 + offset/2)*/
+ if(cu_qp > i4_max_qp_allowed)
+ cu_qp = i4_max_qp_allowed;
+ else if(cu_qp < i4_min_qp_allowed)
+ cu_qp = i4_min_qp_allowed;
+
+ /* CLIP to maintain Qp between user configured and min and max Qp values*/
+ if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp)
+ cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
+ else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp)
+ cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
+
+ /*cu qp must be populated in cu_analyse_t struct*/
+ ps_ctxt->i4_cu_qp = cu_qp;
+ /*recompute quant related param at every cu level*/
+ ihevce_compute_quant_rel_param(ps_ctxt, cu_qp);
+ }
+
+ /*Decoupling qp and lamda calculation */
+ if(i4_activity_for_lamda != -1)
+ {
+ cu_qp = (ps_ctxt->ps_rc_quant_ctxt
+ ->pi4_qp_to_qscale[i4_input_QP + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]);
+
+ if(ps_ctxt->i4_qp_mod)
+ {
+#if MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON
+ /*Recompute the Qp as per enc thread's frame level Qp*/
+ ASSERT(i4_activity_for_lamda > 0);
+ cu_qp = ((cu_qp * i4_activity_for_lamda) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
+ QP_LEVEL_MOD_ACT_FACTOR;
+#endif
+ }
+ if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale)
+ cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale;
+ else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale)
+ cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale;
+
+ cu_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[cu_qp];
+
+ /*CLIP the delta to obey standard allowed QP variation of (-26 + offset/2) to (25 + offset/2)*/
+ if(cu_qp > i4_max_qp_allowed)
+ cu_qp = i4_max_qp_allowed;
+ else if(cu_qp < i4_min_qp_allowed)
+ cu_qp = i4_min_qp_allowed;
+
+ /* CLIP to maintain Qp between user configured and min and max Qp values*/
+ if(cu_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp)
+ cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
+ else if(cu_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp)
+ cu_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
+ /* get frame level lambda params */
+ ihevce_get_cl_cu_lambda_prms(
+ ps_ctxt, MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON ? cu_qp : ps_ctxt->i4_frame_qp);
+ }
+}
+
+/**
+*******************************************************************************
+* \if Function name : ihevce_scan_coeffs \endif
+*
+* @brief * Computes the coeff buffer for a coded TU for entropy coding
+*
+* @par Description
+* Computes the coeff buffer for a coded TU for entropy coding
+*
+* \param[in] pi2_quan_coeffs Quantized coefficient context
+*
+* \param[in] scan_idx Scan index specifying the scan order
+*
+* \param[in] trans_size Transform unit size
+*
+* \param[inout] pu1_out_data output coeff buffer for a coded TU for entropy coding
+*
+* \param[in] pu1_csbf_buf csb flag buffer
+*
+* @returns num_bytes
+* Number of bytes written to pu1_out_data
+*
+* @remarks
+*
+* \author
+* Ittiam
+*
+*******************************************************************************
+*/
+
+WORD32 ihevce_scan_coeffs(
+ WORD16 *pi2_quant_coeffs,
+ WORD32 *pi4_subBlock2csbfId_map,
+ WORD32 scan_idx,
+ WORD32 trans_size,
+ UWORD8 *pu1_out_data,
+ UWORD8 *pu1_csbf_buf,
+ WORD32 i4_csbf_stride)
+{
+ WORD32 i, trans_unit_idx, num_gt1_flag;
+ UWORD16 u2_csbf0flags;
+ WORD32 num_bytes = 0;
+ UWORD8 *pu1_trans_table;
+ UWORD8 *pu1_csb_table;
+ WORD32 shift_value, mask_value;
+ UWORD16 u2_sig_coeff_abs_gt0_flags = 0, u2_sig_coeff_abs_gt1_flags = 0;
+ UWORD16 u2_sign_flags;
+ UWORD16 u2_abs_coeff_remaining[16];
+ WORD32 blk_row, blk_col;
+
+ UWORD8 *pu1_out_data_header;
+ UWORD16 *pu2_out_data_coeff;
+
+ WORD32 x_pos, y_pos;
+ WORD32 quant_coeff;
+
+ WORD32 num_gt0_flag;
+ (void)i4_csbf_stride;
+ pu1_out_data_header = pu1_out_data;
+ /* Need only last 3 bits, rest are reserved for debugging and making */
+ /* WORD alignment */
+ u2_csbf0flags = 0xBAD0;
+
+ /* Select proper order for your transform unit and csb based on scan_idx*/
+ /* and the trans_size */
+
+ /* scan order inside a csb */
+ pu1_csb_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
+ /* GETRANGE will give the log_2 of trans_size to shift_value */
+ GETRANGE(shift_value, trans_size);
+ shift_value = shift_value - 3; /* for finding. row no. from scan index */
+ mask_value = (trans_size / 4) - 1; /*for finding the col. no. from scan index*/
+ switch(trans_size)
+ {
+ case 32:
+ pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_8x8[scan_idx][0]);
+ break;
+ case 16:
+ pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
+ break;
+ case 8:
+ pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_2x2[scan_idx][0]);
+ break;
+ case 4:
+ pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_1x1[0]);
+ break;
+ default:
+ DBG_PRINTF("Invalid Trans Size\n");
+ return -1;
+ break;
+ }
+
+ /*go through each csb in the scan order for first non-zero coded sub-block*/
+ for(trans_unit_idx = (trans_size * trans_size / 16) - 1; trans_unit_idx >= 0; trans_unit_idx--)
+ {
+ /* check for the first csb flag in our scan order */
+ if(pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]])
+ {
+ UWORD8 u1_last_x, u1_last_y;
+ /* row of csb */
+ blk_row = pu1_trans_table[trans_unit_idx] >> shift_value;
+ /* col of csb */
+ blk_col = pu1_trans_table[trans_unit_idx] & mask_value;
+
+ /*check for the 1st non-0 values inside the csb in our scan order*/
+ for(i = 15; i >= 0; i--)
+ {
+ x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
+ y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
+
+ quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
+
+ if(quant_coeff != 0)
+ break;
+ }
+
+ ASSERT(i >= 0);
+
+ u1_last_x = x_pos;
+ u1_last_y = y_pos;
+
+ /* storing last_x and last_y */
+ *pu1_out_data_header = u1_last_x;
+ pu1_out_data_header++;
+ num_bytes++;
+ *pu1_out_data_header = u1_last_y;
+ pu1_out_data_header++;
+ num_bytes++;
+
+ /* storing the scan order */
+ *pu1_out_data_header = scan_idx;
+ pu1_out_data_header++;
+ num_bytes++;
+ /* storing last_sub_block pos. in scan order count */
+ *pu1_out_data_header = trans_unit_idx;
+ pu1_out_data_header++;
+ num_bytes++;
+
+ /*stored the first 4 bytes, now all are word16. So word16 pointer*/
+ pu2_out_data_coeff = (UWORD16 *)pu1_out_data_header;
+
+ /* u2_csbf0flags word */
+ u2_csbf0flags = 0xBAD0 | 1; /*since right&bottom csbf is 0*/
+ /* storing u2_csbf0flags word */
+ *pu2_out_data_coeff = u2_csbf0flags;
+ pu2_out_data_coeff++;
+ num_bytes += 2;
+
+ num_gt0_flag = 1;
+ num_gt1_flag = 0;
+ u2_sign_flags = 0;
+
+ /* set the i th bit of u2_sig_coeff_abs_gt0_flags */
+ u2_sig_coeff_abs_gt0_flags = u2_sig_coeff_abs_gt0_flags | (1 << i);
+ if(abs(quant_coeff) > 1)
+ {
+ /* set the i th bit of u2_sig_coeff_abs_gt1_flags */
+ u2_sig_coeff_abs_gt1_flags = u2_sig_coeff_abs_gt1_flags | (1 << i);
+ /* update u2_abs_coeff_remaining */
+ u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
+
+ num_gt1_flag++;
+ }
+
+ if(quant_coeff < 0)
+ {
+ /* set the i th bit of u2_sign_flags */
+ u2_sign_flags = u2_sign_flags | (1 << i);
+ }
+
+ /* Test remaining elements in our scan order */
+ /* Can optimize further by CLZ macro */
+ for(i = i - 1; i >= 0; i--)
+ {
+ x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
+ y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
+
+ quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
+
+ if(quant_coeff != 0)
+ {
+ /* set the i th bit of u2_sig_coeff_abs_gt0_flags */
+ u2_sig_coeff_abs_gt0_flags |= (1 << i);
+
+ if((abs(quant_coeff) > 1) || (num_gt0_flag >= MAX_GT_ONE))
+ {
+ /* set the i th bit of u2_sig_coeff_abs_gt1_flags */
+ u2_sig_coeff_abs_gt1_flags |= (1 << i);
+
+ /* update u2_abs_coeff_remaining */
+ u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
+
+ num_gt1_flag++; /*n0. of Ones in sig_coeff_abs_gt1_flag*/
+ }
+
+ if(quant_coeff < 0)
+ {
+ /* set the i th bit of u2_sign_flags */
+ u2_sign_flags |= (1 << i);
+ }
+
+ num_gt0_flag++;
+ }
+ }
+
+ /* storing u2_sig_coeff_abs_gt0_flags 2 bytes */
+ *pu2_out_data_coeff = u2_sig_coeff_abs_gt0_flags;
+ pu2_out_data_coeff++;
+ num_bytes += 2;
+ /* storing u2_sig_coeff_abs_gt1_flags 2 bytes */
+ *pu2_out_data_coeff = u2_sig_coeff_abs_gt1_flags;
+ pu2_out_data_coeff++;
+ num_bytes += 2;
+ /* storing u2_sign_flags 2 bytes */
+ *pu2_out_data_coeff = u2_sign_flags;
+ pu2_out_data_coeff++;
+ num_bytes += 2;
+
+ /* Store the u2_abs_coeff_remaining[] */
+ for(i = 0; i < num_gt1_flag; i++)
+ {
+ /* storing u2_abs_coeff_remaining[i] 2 bytes */
+ *pu2_out_data_coeff = u2_abs_coeff_remaining[i];
+ pu2_out_data_coeff++;
+ num_bytes += 2;
+ }
+
+ break; /*We just need this loop for finding 1st non-zero csb only*/
+ }
+ }
+
+ /* go through remaining csb in the scan order */
+ for(trans_unit_idx = trans_unit_idx - 1; trans_unit_idx >= 0; trans_unit_idx--)
+ {
+ blk_row = pu1_trans_table[trans_unit_idx] >> shift_value; /*row of csb*/
+ blk_col = pu1_trans_table[trans_unit_idx] & mask_value; /*col of csb*/
+
+ /* u2_csbf0flags word */
+ u2_csbf0flags = 0xBAD0 | /* assuming csbf_buf has only 0 or 1 values */
+ (pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]]);
+
+ /********************************************************************/
+ /* Minor hack: As per HEVC spec csbf in not signalled in stream for */
+ /* block0, instead sig coeff map is directly signalled. This is */
+ /* taken care by forcing csbf for block0 to be 1 even if it is 0 */
+ /********************************************************************/
+ if(0 == trans_unit_idx)
+ {
+ u2_csbf0flags |= 1;
+ }
+
+ if((blk_col + 1 < trans_size / 4)) /* checking right boundary */
+ {
+ if(pu1_csbf_buf[pi4_subBlock2csbfId_map[blk_row * trans_size / 4 + blk_col + 1]])
+ {
+ /* set the 2nd bit of u2_csbf0flags for right csbf */
+ u2_csbf0flags = u2_csbf0flags | (1 << 1);
+ }
+ }
+ if((blk_row + 1 < trans_size / 4)) /* checking bottom oundary */
+ {
+ if(pu1_csbf_buf[pi4_subBlock2csbfId_map[(blk_row + 1) * trans_size / 4 + blk_col]])
+ {
+ /* set the 3rd bit of u2_csbf0flags for bottom csbf */
+ u2_csbf0flags = u2_csbf0flags | (1 << 2);
+ }
+ }
+
+ /* storing u2_csbf0flags word */
+ *pu2_out_data_coeff = u2_csbf0flags;
+ pu2_out_data_coeff++;
+ num_bytes += 2;
+
+ /* check for the csb flag in our scan order */
+ if(u2_csbf0flags & 0x1)
+ {
+ u2_sig_coeff_abs_gt0_flags = 0;
+ u2_sig_coeff_abs_gt1_flags = 0;
+ u2_sign_flags = 0;
+
+ num_gt0_flag = 0;
+ num_gt1_flag = 0;
+ /* check for the non-0 values inside the csb in our scan order */
+ /* Can optimize further by CLZ macro */
+ for(i = 15; i >= 0; i--)
+ {
+ x_pos = (pu1_csb_table[i] & 0x3) + blk_col * 4;
+ y_pos = (pu1_csb_table[i] >> 2) + blk_row * 4;
+
+ quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
+
+ if(quant_coeff != 0)
+ {
+ /* set the i th bit of u2_sig_coeff_abs_gt0_flags */
+ u2_sig_coeff_abs_gt0_flags |= (1 << i);
+
+ if((abs(quant_coeff) > 1) || (num_gt0_flag >= MAX_GT_ONE))
+ {
+ /* set the i th bit of u2_sig_coeff_abs_gt1_flags */
+ u2_sig_coeff_abs_gt1_flags |= (1 << i);
+
+ /* update u2_abs_coeff_remaining */
+ u2_abs_coeff_remaining[num_gt1_flag] = (UWORD16)abs(quant_coeff) - 1;
+
+ num_gt1_flag++;
+ }
+
+ if(quant_coeff < 0)
+ {
+ /* set the i th bit of u2_sign_flags */
+ u2_sign_flags = u2_sign_flags | (1 << i);
+ }
+
+ num_gt0_flag++;
+ }
+ }
+
+ /* storing u2_sig_coeff_abs_gt0_flags 2 bytes */
+ *pu2_out_data_coeff = u2_sig_coeff_abs_gt0_flags;
+ pu2_out_data_coeff++;
+ num_bytes += 2;
+
+ /* storing u2_sig_coeff_abs_gt1_flags 2 bytes */
+ *pu2_out_data_coeff = u2_sig_coeff_abs_gt1_flags;
+ pu2_out_data_coeff++;
+ num_bytes += 2;
+
+ /* storing u2_sign_flags 2 bytes */
+ *pu2_out_data_coeff = u2_sign_flags;
+ pu2_out_data_coeff++;
+ num_bytes += 2;
+
+ /* Store the u2_abs_coeff_remaining[] */
+ for(i = 0; i < num_gt1_flag; i++)
+ {
+ /* storing u2_abs_coeff_remaining[i] 2 bytes */
+ *pu2_out_data_coeff = u2_abs_coeff_remaining[i];
+ pu2_out_data_coeff++;
+ num_bytes += 2;
+ }
+ }
+ }
+
+ return num_bytes; /* Return the number of bytes written to out_data */
+}
+
+/**
+*******************************************************************************
+* \if Function name : ihevce_populate_intra_pred_mode \endif
+*
+* \brief * populates intra pred modes,b2_mpm_idx,b1_prev_intra_luma_pred_flag &
+* b5_rem_intra_pred_mode for a CU based on nieghbouring CUs,
+*
+* \par Description
+* Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
+* for a CU
+*
+* \param[in] top_intra_mode Top intra mode
+* \param[in] left_intra_mode Left intra mode
+* \param[in] available_top Top availability flag
+* \param[in] available_left Left availability flag
+* \param[in] cu_pos_y CU 'y' position
+* \param[in] ps_cand_mode_list pointer to populate candidate list
+*
+* \returns none
+*
+* \author
+* Ittiam
+*
+*******************************************************************************
+*/
+
+void ihevce_populate_intra_pred_mode(
+ WORD32 top_intra_mode,
+ WORD32 left_intra_mode,
+ WORD32 available_top,
+ WORD32 available_left,
+ WORD32 cu_pos_y,
+ WORD32 *ps_cand_mode_list)
+{
+ /* local variables */
+ WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top;
+
+ /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */
+ /* N = top */
+ if(0 == available_top)
+ {
+ cand_intra_pred_mode_top = INTRA_DC;
+ }
+ /* for neighbour != INTRA, setting DC is done outside */
+ else if(0 == cu_pos_y) /* It's on the CTB boundary */
+ {
+ cand_intra_pred_mode_top = INTRA_DC;
+ }
+ else
+ {
+ cand_intra_pred_mode_top = top_intra_mode;
+ }
+
+ /* N = left */
+ if(0 == available_left)
+ {
+ cand_intra_pred_mode_left = INTRA_DC;
+ }
+ /* for neighbour != INTRA, setting DC is done outside */
+ else
+ {
+ cand_intra_pred_mode_left = left_intra_mode;
+ }
+
+ /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */
+ if(cand_intra_pred_mode_left == cand_intra_pred_mode_top)
+ {
+ if(cand_intra_pred_mode_left < 2)
+ {
+ ps_cand_mode_list[0] = INTRA_PLANAR;
+ ps_cand_mode_list[1] = INTRA_DC;
+ ps_cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */
+ }
+ else
+ {
+ ps_cand_mode_list[0] = cand_intra_pred_mode_left;
+ ps_cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32);
+ ps_cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32);
+ }
+ }
+ else
+ {
+ ps_cand_mode_list[0] = cand_intra_pred_mode_left;
+ ps_cand_mode_list[1] = cand_intra_pred_mode_top;
+
+ if((cand_intra_pred_mode_left != INTRA_PLANAR) &&
+ (cand_intra_pred_mode_top != INTRA_PLANAR))
+ {
+ ps_cand_mode_list[2] = INTRA_PLANAR;
+ }
+ else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC))
+ {
+ ps_cand_mode_list[2] = INTRA_DC;
+ }
+ else
+ {
+ ps_cand_mode_list[2] = INTRA_ANGULAR(26);
+ }
+ }
+}
+/**
+*******************************************************************************
+* \if Function name : ihevce_intra_pred_mode_signaling \endif
+*
+* \brief * Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx &
+* b5_rem_intra_pred_mode for a CU
+*
+* \par Description
+* Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
+* for a CU
+*
+* \param[in] ps_nbr_top Top neighbour context
+* \param[in] ps_nbr_left Left neighbour context
+* \param[in] available_top Top availability flag
+* \param[in] available_left Left availability flag
+* \param[in] cu_pos_y CU 'y' position
+* \param[in] luma_intra_pred_mode_current the intra_pred_mode of current block
+* \param[inout] ps_intra_pred_mode_current
+* Pointer to structure having b1_prev_intra_luma_pred_flag, b2_mpm_idx and
+* b5_rem_intra_pred_mode
+*
+* \returns none
+*
+* \author
+* Ittiam
+*
+*******************************************************************************
+*/
+
+void ihevce_intra_pred_mode_signaling(
+ WORD32 top_intra_mode,
+ WORD32 left_intra_mode,
+ WORD32 available_top,
+ WORD32 available_left,
+ WORD32 cu_pos_y,
+ WORD32 luma_intra_pred_mode_current,
+ intra_prev_rem_flags_t *ps_intra_pred_mode_current)
+{
+ /* local variables */
+ WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top;
+ WORD32 cand_mode_list[3];
+
+ ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 0;
+ ps_intra_pred_mode_current->b2_mpm_idx = 0; // for safety purpose
+ ps_intra_pred_mode_current->b5_rem_intra_pred_mode = 0;
+
+ /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */
+ /* N = top */
+ if(0 == available_top)
+ {
+ cand_intra_pred_mode_top = INTRA_DC;
+ }
+ /* for neighbour != INTRA, setting DC is done outside */
+ else if(0 == cu_pos_y) /* It's on the CTB boundary */
+ {
+ cand_intra_pred_mode_top = INTRA_DC;
+ }
+ else
+ {
+ cand_intra_pred_mode_top = top_intra_mode;
+ }
+
+ /* N = left */
+ if(0 == available_left)
+ {
+ cand_intra_pred_mode_left = INTRA_DC;
+ }
+ /* for neighbour != INTRA, setting DC is done outside */
+ else
+ {
+ cand_intra_pred_mode_left = left_intra_mode;
+ }
+
+ /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */
+ if(cand_intra_pred_mode_left == cand_intra_pred_mode_top)
+ {
+ if(cand_intra_pred_mode_left < 2)
+ {
+ cand_mode_list[0] = INTRA_PLANAR;
+ cand_mode_list[1] = INTRA_DC;
+ cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */
+ }
+ else
+ {
+ cand_mode_list[0] = cand_intra_pred_mode_left;
+ cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32);
+ cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32);
+ }
+ }
+ else
+ {
+ cand_mode_list[0] = cand_intra_pred_mode_left;
+ cand_mode_list[1] = cand_intra_pred_mode_top;
+
+ if((cand_intra_pred_mode_left != INTRA_PLANAR) &&
+ (cand_intra_pred_mode_top != INTRA_PLANAR))
+ {
+ cand_mode_list[2] = INTRA_PLANAR;
+ }
+ else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC))
+ {
+ cand_mode_list[2] = INTRA_DC;
+ }
+ else
+ {
+ cand_mode_list[2] = INTRA_ANGULAR(26);
+ }
+ }
+
+ /* Signal Generation */
+
+ /* Flag & mpm_index generation */
+ if(cand_mode_list[0] == luma_intra_pred_mode_current)
+ {
+ ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
+ ps_intra_pred_mode_current->b2_mpm_idx = 0;
+ }
+ else if(cand_mode_list[1] == luma_intra_pred_mode_current)
+ {
+ ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
+ ps_intra_pred_mode_current->b2_mpm_idx = 1;
+ }
+ else if(cand_mode_list[2] == luma_intra_pred_mode_current)
+ {
+ ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 1;
+ ps_intra_pred_mode_current->b2_mpm_idx = 2;
+ }
+ /* Flag & b5_rem_intra_pred_mode generation */
+ else
+ {
+ WORD32 rem_mode;
+
+ ps_intra_pred_mode_current->b1_prev_intra_luma_pred_flag = 0;
+
+ /* sorting cand_mode_list */
+ if(cand_mode_list[0] > cand_mode_list[1])
+ {
+ SWAP(cand_mode_list[0], cand_mode_list[1]);
+ }
+ if(cand_mode_list[0] > cand_mode_list[2])
+ {
+ SWAP(cand_mode_list[0], cand_mode_list[2]);
+ }
+ if(cand_mode_list[1] > cand_mode_list[2])
+ {
+ SWAP(cand_mode_list[1], cand_mode_list[2]);
+ }
+
+ rem_mode = luma_intra_pred_mode_current;
+
+ if((rem_mode) >= cand_mode_list[2])
+ {
+ (rem_mode)--;
+ }
+ if((rem_mode) >= cand_mode_list[1])
+ {
+ (rem_mode)--;
+ }
+ if((rem_mode) >= cand_mode_list[0])
+ {
+ (rem_mode)--;
+ }
+ ps_intra_pred_mode_current->b5_rem_intra_pred_mode = rem_mode;
+ }
+}
+
+void ihevce_quant_rounding_factor_gen(
+ WORD32 i4_trans_size,
+ WORD32 is_luma,
+ rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt,
+ WORD32 *pi4_quant_round_0_1,
+ WORD32 *pi4_quant_round_1_2,
+ double i4_lamda_modifier,
+ UWORD8 i4_is_tu_level_quant_rounding)
+{
+ //WORD32 i4_scan_idx = ps_ctxt->i4_scan_idx;
+ UWORD8 *pu1_ctxt_model;
+ WORD32 scan_pos;
+ WORD32 sig_coeff_base_ctxt; /* cabac context for sig coeff flag */
+ WORD32 abs_gt1_base_ctxt;
+ WORD32 log2_tr_size, i;
+ UWORD16 u4_bits_estimated_r0, u4_bits_estimated_r1, u4_bits_estimated_r2;
+ UWORD16 u4_bits_estimated_r1_temp;
+ WORD32 j = 0;
+ WORD32 k = 0;
+ WORD32 temp2;
+
+ double i4_lamda_mod = i4_lamda_modifier * pow(2.0, (-8.0 / 3.0));
+ LWORD64 lamda_mod = (LWORD64)(i4_lamda_mod * (1 << LAMDA_Q_SHIFT_FACT));
+ /* transform size to log2transform size */
+ GETRANGE(log2_tr_size, i4_trans_size);
+ log2_tr_size -= 1;
+
+ if(1 == i4_is_tu_level_quant_rounding)
+ {
+ entropy_context_t *ps_cur_tu_entropy;
+ cab_ctxt_t *ps_cabac;
+ WORD32 curr_buf_idx = ps_rdopt_entropy_ctxt->i4_curr_buf_idx;
+ ps_cur_tu_entropy = &ps_rdopt_entropy_ctxt->as_cu_entropy_ctxt[curr_buf_idx];
+
+ ps_cabac = &ps_cur_tu_entropy->s_cabac_ctxt;
+
+ pu1_ctxt_model = &ps_cabac->au1_ctxt_models[0];
+ }
+ else
+ {
+ pu1_ctxt_model = &ps_rdopt_entropy_ctxt->au1_init_cabac_ctxt_states[0];
+ }
+ /*If transform size is 4x4, then only one sub-block*/
+ if(is_luma)
+ {
+ sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG;
+ abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG;
+
+ if(3 == log2_tr_size)
+ {
+ /* 8x8 transform size */
+ /* Assuming diagnol scan idx for now */
+ sig_coeff_base_ctxt += 9;
+ }
+ else if(3 < log2_tr_size)
+ {
+ /* larger transform sizes */
+ sig_coeff_base_ctxt += 21;
+ }
+ }
+ else
+ {
+ /* chroma context initializations */
+ sig_coeff_base_ctxt = IHEVC_CAB_COEFF_FLAG + 27;
+ abs_gt1_base_ctxt = IHEVC_CAB_COEFABS_GRTR1_FLAG + 16;
+
+ if(3 == log2_tr_size)
+ {
+ /* 8x8 transform size */
+ sig_coeff_base_ctxt += 9;
+ }
+ else if(3 < log2_tr_size)
+ {
+ /* larger transform sizes */
+ sig_coeff_base_ctxt += 12;
+ }
+ }
+
+ /*Transform size of 4x4 will have only a single CSB */
+ /* derive the context inc as per section 9.3.3.1.4 */
+
+ if(2 == log2_tr_size)
+ {
+ UWORD8 sig_ctxinc;
+ WORD32 state_mps;
+ WORD32 gt1_ctxt = 0;
+ WORD32 ctxt_set = 0;
+ WORD32 ctxt_idx = 0;
+
+ /* context set based on luma subblock pos */
+
+ /* Encodet the abs level gt1 bins */
+ /* Currently calculating trade off between mps(2) and mps(1)*/
+ /* The estimation has to be further done for mps(11) and mps(111)*/
+ /*ctxt_set = 0 as transform 4x4 has only one csb with DC */
+ /* gt1_ctxt = 0 for the co-ef value to be 2 */
+
+ ctxt_set = gt1_ctxt = 0;
+ ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
+
+ state_mps = pu1_ctxt_model[ctxt_idx];
+
+ u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
+
+ u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
+
+ QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1_temp, lamda_mod);
+ for(scan_pos = 0; scan_pos < 16; scan_pos++)
+ {
+ *(pi4_quant_round_1_2 + scan_pos) = temp2;
+ }
+
+ for(scan_pos = 0; scan_pos < 16; scan_pos++)
+ {
+ //UWORD8 nbr_csbf = 1;
+ /* derive the x,y pos */
+ UWORD8 y_pos_x_pos = scan_pos; //gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
+
+ /* 4x4 transform size increment uses lookup */
+ sig_ctxinc = gu1_hevce_sigcoeff_ctxtinc_tr4[y_pos_x_pos];
+
+ /*Get the mps state based on ctxt modes */
+ state_mps = pu1_ctxt_model[sig_ctxinc + sig_coeff_base_ctxt];
+
+ /* Bits taken to encode sig co-ef flag as 0 */
+ u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
+
+ /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
+ //
+ u4_bits_estimated_r1 =
+ (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
+
+ /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
+ u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
+
+ QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
+ *(pi4_quant_round_0_1 + scan_pos) = temp2;
+ }
+ }
+ else
+ {
+ UWORD8 *pu1_hevce_sigcoeff_ctxtinc;
+ WORD32 is_nbr_csb_state_mps;
+
+ WORD32 state_mps;
+ WORD32 gt1_ctxt = 0;
+ WORD32 ctxt_set = 0;
+ WORD32 ctxt_idx;
+ /*1to2 rounding factor is same for all sub blocks except for sub-block = 0*/
+ /*Hence will write all the sub-block with i >=1 coeff, and then overwrite for i = 0*/
+
+ /*ctxt_set = 0 DC subblock, the previous state did not have 2
+ ctxt_set = 1 DC subblock, the previous state did have >= 2
+ ctxt_set = 2 AC subblock, the previous state did not have 2
+ ctxt_set = 3 AC subblock, the previous state did have >= 2*/
+ i = 1;
+ ctxt_set = (i && is_luma) ? 2 : 0;
+
+ ctxt_set++;
+
+ /*0th position indicates the probability of 2 */
+ /*1th position indicates the probability of 1 */
+ /*2th position indicates the probability of 11 */
+ /*3th position indicates the probability of 111 */
+
+ gt1_ctxt = 0;
+ ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
+
+ state_mps = pu1_ctxt_model[ctxt_idx];
+
+ u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
+
+ u4_bits_estimated_r1 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
+ QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1, lamda_mod);
+
+ for(scan_pos = 0; scan_pos < (16 * (i4_trans_size * i4_trans_size >> 4)); scan_pos++)
+ {
+ *(pi4_quant_round_1_2 + scan_pos) = temp2;
+ }
+
+ i = 0;
+ ctxt_set = (i && is_luma) ? 2 : 0;
+ ctxt_set++;
+
+ /*0th position indicates the probability of 2 */
+ /*1th position indicates the probability of 1 */
+ /*2th position indicates the probability of 11 */
+ /*3th position indicates the probability of 111 */
+
+ gt1_ctxt = 0;
+ ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
+
+ state_mps = pu1_ctxt_model[ctxt_idx];
+
+ u4_bits_estimated_r2 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1];
+
+ u4_bits_estimated_r1 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
+ QUANT_ROUND_FACTOR(temp2, u4_bits_estimated_r2, u4_bits_estimated_r1, lamda_mod);
+
+ for(scan_pos = 0; scan_pos < 16; scan_pos++)
+ {
+ *(pi4_quant_round_1_2 + ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size))) = temp2;
+ }
+
+ {
+ WORD32 ctxt_idx;
+
+ WORD32 nbr_csbf_0, nbr_csbf_1;
+ WORD32 state_mps_0, state_mps_1;
+ ctxt_idx = IHEVC_CAB_CODED_SUBLK_IDX;
+ ctxt_idx += is_luma ? 0 : 2;
+
+ /* ctxt based on right / bottom avail csbf, section 9.3.3.1.3 */
+ /* if neibhor not available, ctxt idx = 0*/
+ nbr_csbf_0 = 0;
+ ctxt_idx += nbr_csbf_0 ? 1 : 0;
+ state_mps_0 = pu1_ctxt_model[ctxt_idx];
+
+ nbr_csbf_1 = 1;
+ ctxt_idx += nbr_csbf_1 ? 1 : 0;
+ state_mps_1 = pu1_ctxt_model[ctxt_idx];
+
+ is_nbr_csb_state_mps = ((state_mps_0 % 2) == 1) && ((state_mps_1 % 2) == 1);
+ }
+
+ if(1 == is_nbr_csb_state_mps)
+ {
+ for(i = 0; i < (i4_trans_size * i4_trans_size >> 4); i++)
+ {
+ UWORD8 sig_ctxinc;
+ WORD32 state_mps;
+ WORD32 gt1_ctxt = 0;
+ WORD32 ctxt_set = 0;
+
+ WORD32 ctxt_idx;
+
+ /*Check if the cabac states had previous nbr available */
+
+ if(i == 0)
+ pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[3][0];
+ else if(i < (i4_trans_size >> 2))
+ pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[1][0];
+ else if((i % (i4_trans_size >> 2)) == 0)
+ pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[2][0];
+ else
+ pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[0][0];
+
+ if(((i % (i4_trans_size >> 2)) == 0) && (i != 0))
+ k++;
+
+ j = ((i4_trans_size * 4) * k) + ((i % (i4_trans_size >> 2)) * 4);
+ /*ctxt_set = 0 DC subblock, the previous state did not have 2
+ ctxt_set = 1 DC subblock, the previous state did have >= 2
+ ctxt_set = 2 AC subblock, the previous state did not have 2
+ ctxt_set = 3 AC subblock, the previous state did have >= 2*/
+
+ ctxt_set = (i && is_luma) ? 2 : 0;
+
+ /* gt1_ctxt = 1 for the co-ef value to be 1 */
+ gt1_ctxt = 0;
+ ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
+
+ state_mps = pu1_ctxt_model[ctxt_idx];
+
+ /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
+ u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
+
+ for(scan_pos = 0; scan_pos < 16; scan_pos++)
+ {
+ UWORD8 y_pos_x_pos;
+
+ if(scan_pos || i)
+ {
+ y_pos_x_pos = scan_pos; // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
+ /* ctxt for AC coeff depends on curpos and neigbour csbf */
+ sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
+
+ /* based on luma subblock pos */
+ sig_ctxinc += (i && is_luma) ? 3 : 0;
+
+ sig_ctxinc += sig_coeff_base_ctxt;
+ }
+ else
+ {
+ /*MAM : both scan pos and i 0 impies the DC coef of 1st block only */
+ /* DC coeff has fixed context for luma and chroma */
+ sig_ctxinc = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27;
+ }
+
+ /*Get the mps state based on ctxt modes */
+ state_mps = pu1_ctxt_model[sig_ctxinc];
+
+ /* Bits taken to encode sig co-ef flag as 0 */
+ u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
+
+ u4_bits_estimated_r1 =
+ (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
+
+ /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
+ u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
+ {
+ QUANT_ROUND_FACTOR(
+ temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
+ *(pi4_quant_round_0_1 +
+ ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size)) + j) = temp2;
+ }
+ }
+ }
+ }
+ else
+ {
+ /*If Both nbr csbfs are 0, then all the coef in sub-blocks will have same value except for 1st subblock,
+ Hence will write the same value to all sub block, and overwrite for the 1st one */
+ i = 1;
+ {
+ UWORD8 sig_ctxinc;
+ UWORD8 y_pos_x_pos;
+ WORD32 quant_rounding_0_1;
+
+ pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc_00[0];
+
+ scan_pos = 0;
+ y_pos_x_pos = scan_pos; // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
+ /* ctxt for AC coeff depends on curpos and neigbour csbf */
+ sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
+
+ /* based on luma subblock pos */
+ sig_ctxinc += (is_luma) ? 3 : 0;
+
+ sig_ctxinc += sig_coeff_base_ctxt;
+
+ /*Get the mps state based on ctxt modes */
+ state_mps = pu1_ctxt_model[sig_ctxinc];
+
+ /* Bits taken to encode sig co-ef flag as 0 */
+ u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
+
+ u4_bits_estimated_r1 =
+ (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
+
+ /*ctxt_set = 0 DC subblock, the previous state did not have 2
+ ctxt_set = 1 DC subblock, the previous state did have >= 2
+ ctxt_set = 2 AC subblock, the previous state did not have 2
+ ctxt_set = 3 AC subblock, the previous state did have >= 2*/
+
+ ctxt_set = (i && is_luma) ? 2 : 0;
+
+ /* gt1_ctxt = 1 for the co-ef value to be 1 */
+ gt1_ctxt = 0;
+ ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
+
+ state_mps = pu1_ctxt_model[ctxt_idx];
+
+ /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
+ u4_bits_estimated_r1 += gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
+
+ QUANT_ROUND_FACTOR(
+ quant_rounding_0_1, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
+
+ for(scan_pos = 0; scan_pos < (16 * (i4_trans_size * i4_trans_size >> 4));
+ scan_pos++)
+ {
+ *(pi4_quant_round_0_1 + scan_pos) = quant_rounding_0_1;
+ }
+ }
+
+ /*First Subblock*/
+ i = 0;
+
+ {
+ UWORD8 sig_ctxinc;
+ WORD32 state_mps;
+ WORD32 gt1_ctxt = 0;
+ WORD32 ctxt_set = 0;
+
+ WORD32 ctxt_idx;
+
+ /*Check if the cabac states had previous nbr available */
+
+ {
+ pu1_hevce_sigcoeff_ctxtinc = (UWORD8 *)&gu1_hevce_sigcoeff_ctxtinc[0][0];
+
+ /*ctxt_set = 0 DC subblock, the previous state did not have 2
+ ctxt_set = 1 DC subblock, the previous state did have >= 2
+ ctxt_set = 2 AC subblock, the previous state did not have 2
+ ctxt_set = 3 AC subblock, the previous state did have >= 2*/
+ ctxt_set = (i && is_luma) ? 2 : 0;
+
+ /* gt1_ctxt = 1 for the co-ef value to be 1 */
+ gt1_ctxt = 0;
+ ctxt_idx = (ctxt_set * 4) + abs_gt1_base_ctxt + gt1_ctxt;
+
+ state_mps = pu1_ctxt_model[ctxt_idx];
+
+ /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
+ u4_bits_estimated_r1_temp = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
+
+ for(scan_pos = 0; scan_pos < 16; scan_pos++)
+ {
+ UWORD8 y_pos_x_pos;
+
+ if(scan_pos)
+ {
+ y_pos_x_pos = scan_pos; // gu1_hevce_scan4x4[i4_scan_idx][scan_pos];
+ /* ctxt for AC coeff depends on curpos and neigbour csbf */
+ sig_ctxinc = pu1_hevce_sigcoeff_ctxtinc[y_pos_x_pos];
+
+ /* based on luma subblock pos */
+ sig_ctxinc += (i && is_luma) ? 3 : 0;
+
+ sig_ctxinc += sig_coeff_base_ctxt;
+ }
+ else
+ {
+ /*MAM : both scan pos and i 0 impies the DC coef of 1st block only */
+ /* DC coeff has fixed context for luma and chroma */
+ sig_ctxinc = is_luma ? IHEVC_CAB_COEFF_FLAG : IHEVC_CAB_COEFF_FLAG + 27;
+ }
+
+ /*Get the mps state based on ctxt modes */
+ state_mps = pu1_ctxt_model[sig_ctxinc];
+
+ /* Bits taken to encode sig co-ef flag as 0 */
+ u4_bits_estimated_r0 = gau2_ihevce_cabac_bin_to_bits[state_mps ^ 0];
+
+ u4_bits_estimated_r1 =
+ (gau2_ihevce_cabac_bin_to_bits[state_mps ^ 1] + ROUND_Q12(1.000000000));
+
+ /* Bits taken to encode sig co-ef flag as 1, also account for sign bit worst case */
+ u4_bits_estimated_r1 += u4_bits_estimated_r1_temp;
+ {
+ QUANT_ROUND_FACTOR(
+ temp2, u4_bits_estimated_r1, u4_bits_estimated_r0, lamda_mod);
+ *(pi4_quant_round_0_1 +
+ ((scan_pos % 4) + ((scan_pos >> 2) * i4_trans_size))) = temp2;
+ }
+ }
+ }
+ }
+ }
+ }
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_t_q_iq_ssd_scan_fxn \endif
+*
+* \brief
+* Transform unit level (Luma) enc_loop function
+*
+* \param[in] ps_ctxt enc_loop module ctxt pointer
+* \param[in] pu1_pred pointer to predicted data buffer
+* \param[in] pred_strd predicted buffer stride
+* \param[in] pu1_src pointer to source data buffer
+* \param[in] src_strd source buffer stride
+* \param[in] pi2_deq_data pointer to store iq data
+* \param[in] deq_data_strd iq data buffer stride
+* \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod)
+* \param[out] pu1_csbf_buf pointer to store the csbf for all 4x4 in a current
+* block
+* \param[out] csbf_strd csbf buffer stride
+* \param[in] trans_size transform size (4, 8, 16,32)
+* \param[in] packed_pred_mode 0:Inter 1:Intra 2:Skip
+* \param[out] pi4_cost pointer to store the cost
+* \param[out] pi4_coeff_off pointer to store the number of bytes produced in
+* coeff buffer
+* \param[out] pu4_tu_bits pointer to store the best TU bits required encode
+the current TU in RDopt Mode
+* \param[out] pu4_blk_sad pointer to store the block sad for RC
+* \param[out] pi4_zero_col pointer to store the zero_col info for the TU
+* \param[out] pi4_zero_row pointer to store the zero_row info for the TU
+* \param[in] i4_perform_rdoq Indicates if RDOQ should be performed or not
+* \param[in] i4_perform_sbh Indicates if SBH should be performed or not
+*
+* \return
+* CBF of the current block
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+
+WORD32 ihevce_t_q_iq_ssd_scan_fxn(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD16 *pi2_deq_data,
+ WORD32 deq_data_strd,
+ UWORD8 *pu1_recon,
+ WORD32 i4_recon_stride,
+ UWORD8 *pu1_ecd_data,
+ UWORD8 *pu1_csbf_buf,
+ WORD32 csbf_strd,
+ WORD32 trans_size,
+ WORD32 packed_pred_mode,
+ LWORD64 *pi8_cost,
+ WORD32 *pi4_coeff_off,
+ WORD32 *pi4_tu_bits,
+ UWORD32 *pu4_blk_sad,
+ WORD32 *pi4_zero_col,
+ WORD32 *pi4_zero_row,
+ UWORD8 *pu1_is_recon_available,
+ WORD32 i4_perform_rdoq,
+ WORD32 i4_perform_sbh,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ WORD32 i4_alpha_stim_multiplier,
+ UWORD8 u1_is_cu_noisy,
+#endif
+ SSD_TYPE_T e_ssd_type,
+ WORD32 early_cbf)
+{
+ WORD32 cbf = 0;
+ WORD32 trans_idx;
+ WORD32 quant_scale_mat_offset;
+ WORD32 *pi4_trans_scratch;
+ WORD16 *pi2_trans_values;
+ WORD16 *pi2_quant_coeffs;
+ WORD32 *pi4_subBlock2csbfId_map = NULL;
+
+#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
+ WORD32 ai4_quant_rounding_factors[3][MAX_TU_SIZE * MAX_TU_SIZE], i;
+#endif
+
+ rdoq_sbh_ctxt_t *ps_rdoq_sbh_ctxt = &ps_ctxt->s_rdoq_sbh_ctxt;
+
+ WORD32 i4_perform_zcbf = (ENABLE_INTER_ZCU_COST && (PRED_MODE_INTRA != packed_pred_mode)) ||
+ (ps_ctxt->i4_zcbf_rdo_level == ZCBF_ENABLE);
+ WORD32 i4_perform_coeff_level_rdoq = (ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING);
+ WORD8 intra_flag = 0;
+ ASSERT(csbf_strd == MAX_TU_IN_CTB_ROW);
+
+ *pi4_tu_bits = 0;
+ *pi4_coeff_off = 0;
+ pu1_is_recon_available[0] = 0;
+
+ if((PRED_MODE_SKIP == packed_pred_mode) || (0 == early_cbf))
+ {
+ if(e_ssd_type != NULL_TYPE)
+ {
+ /* SSD cost is stored to the pointer */
+ pi8_cost[0] =
+
+ ps_ctxt->s_cmn_opt_func.pf_ssd_and_sad_calculator(
+ pu1_pred, pred_strd, pu1_src, src_strd, trans_size, pu4_blk_sad);
+
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
+ {
+ pi8_cost[0] = ihevce_inject_stim_into_distortion(
+ pu1_src,
+ src_strd,
+ pu1_pred,
+ pred_strd,
+ pi8_cost[0],
+ !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
+ : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
+ (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
+ 100.0,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ NULL_PLANE);
+ }
+#endif
+
+ /* copy pred to recon for skip mode */
+ if(SPATIAL_DOMAIN_SSD == e_ssd_type)
+ {
+ ps_ctxt->s_cmn_opt_func.pf_copy_2d(
+ pu1_recon, i4_recon_stride, pu1_pred, pred_strd, trans_size, trans_size);
+ pu1_is_recon_available[0] = 1;
+ }
+ else
+ {
+ pu1_is_recon_available[0] = 0;
+ }
+
+#if ENABLE_INTER_ZCU_COST
+ ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
+#endif
+ }
+ else
+ {
+ pi8_cost[0] = UINT_MAX;
+ }
+
+ /* cbf is returned as 0 */
+ return (0);
+ }
+
+ /* derive context variables */
+ pi4_trans_scratch = (WORD32 *)&ps_ctxt->ai2_scratch[0];
+ pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
+ pi2_trans_values = &ps_ctxt->ai2_scratch[0] + (MAX_TRANS_SIZE * 2);
+
+ /* translate the transform size to index for 4x4 and 8x8 */
+ trans_idx = trans_size >> 2;
+
+ if(PRED_MODE_INTRA == packed_pred_mode)
+ {
+ quant_scale_mat_offset = 0;
+ intra_flag = 1;
+#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
+ ai4_quant_rounding_factors[0][0] =
+ MAX(ps_ctxt->i4_quant_rnd_factor[intra_flag], (1 << QUANT_ROUND_FACTOR_Q) / 3);
+
+ for(i = 0; i < trans_size * trans_size; i++)
+ {
+ ai4_quant_rounding_factors[1][i] =
+ MAX(ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3][i],
+ (1 << QUANT_ROUND_FACTOR_Q) / 3);
+ ai4_quant_rounding_factors[2][i] =
+ MAX(ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3][i],
+ (1 << QUANT_ROUND_FACTOR_Q) / 3);
+ }
+#endif
+ }
+ else
+ {
+ quant_scale_mat_offset = NUM_TRANS_TYPES;
+ }
+ /* for intra 4x4 DST transform should be used */
+ if((1 == trans_idx) && (1 == intra_flag))
+ {
+ trans_idx = 0;
+ }
+ /* for 16x16 cases */
+ else if(16 == trans_size)
+ {
+ trans_idx = 3;
+ }
+ /* for 32x32 cases */
+ else if(32 == trans_size)
+ {
+ trans_idx = 4;
+ }
+
+ switch(trans_size)
+ {
+ case 4:
+ {
+ pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map4x4TU;
+
+ break;
+ }
+ case 8:
+ {
+ pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map8x8TU;
+
+ break;
+ }
+ case 16:
+ {
+ pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map16x16TU;
+
+ break;
+ }
+ case 32:
+ {
+ pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map32x32TU;
+
+ break;
+ }
+ }
+
+ /* Do not call the FT and Quant functions if early_cbf is 0 */
+ if(1 == early_cbf)
+ {
+ /* ---------- call residue and transform block ------- */
+ *pu4_blk_sad = ps_ctxt->apf_resd_trns[trans_idx](
+ pu1_src,
+ pu1_pred,
+ pi4_trans_scratch,
+ pi2_trans_values,
+ src_strd,
+ pred_strd,
+ ((trans_size << 16) + 0)); /* dst strd and chroma flag are packed together */
+
+ cbf = ps_ctxt->apf_quant_iquant_ssd
+ [i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2](
+ pi2_trans_values,
+ ps_ctxt->api2_rescal_mat[trans_idx + quant_scale_mat_offset],
+ pi2_quant_coeffs,
+ pi2_deq_data,
+ trans_size,
+ ps_ctxt->i4_cu_qp_div6,
+ ps_ctxt->i4_cu_qp_mod6,
+#if !PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
+ ps_ctxt->i4_quant_rnd_factor[intra_flag],
+ ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
+ ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
+#else
+ intra_flag ? ai4_quant_rounding_factors[0][0]
+ : ps_ctxt->i4_quant_rnd_factor[intra_flag],
+ intra_flag ? ai4_quant_rounding_factors[1]
+ : ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
+ intra_flag ? ai4_quant_rounding_factors[2]
+ : ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
+#endif
+ trans_size,
+ trans_size,
+ deq_data_strd,
+ pu1_csbf_buf,
+ csbf_strd,
+ pi4_zero_col,
+ pi4_zero_row,
+ ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset],
+ pi8_cost);
+
+ if(e_ssd_type != FREQUENCY_DOMAIN_SSD)
+ {
+ pi8_cost[0] = UINT_MAX;
+ }
+ }
+
+ if(0 != cbf)
+ {
+ if(i4_perform_sbh || i4_perform_rdoq)
+ {
+ ps_rdoq_sbh_ctxt->i4_iq_data_strd = deq_data_strd;
+ ps_rdoq_sbh_ctxt->i4_q_data_strd = trans_size;
+ ps_rdoq_sbh_ctxt->pi4_subBlock2csbfId_map = pi4_subBlock2csbfId_map;
+
+ ps_rdoq_sbh_ctxt->i4_qp_div = ps_ctxt->i4_cu_qp_div6;
+ ps_rdoq_sbh_ctxt->i2_qp_rem = ps_ctxt->i4_cu_qp_mod6;
+ ps_rdoq_sbh_ctxt->i4_scan_idx = ps_ctxt->i4_scan_idx;
+ ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
+ ps_rdoq_sbh_ctxt->i4_trans_size = trans_size;
+
+ ps_rdoq_sbh_ctxt->pi2_dequant_coeff =
+ ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset];
+ ps_rdoq_sbh_ctxt->pi2_iquant_coeffs = pi2_deq_data;
+ ps_rdoq_sbh_ctxt->pi2_quant_coeffs = pi2_quant_coeffs;
+ ps_rdoq_sbh_ctxt->pi2_trans_values = pi2_trans_values;
+ ps_rdoq_sbh_ctxt->pu1_csbf_buf = pu1_csbf_buf;
+
+ /* ------- call coeffs scan function ------- */
+ if((!i4_perform_rdoq))
+ {
+ ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
+
+ pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
+ }
+ }
+
+ *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
+ pi2_quant_coeffs,
+ pi4_subBlock2csbfId_map,
+ ps_ctxt->i4_scan_idx,
+ trans_size,
+ pu1_ecd_data,
+ pu1_csbf_buf,
+ csbf_strd);
+ }
+ *pi8_cost >>= ga_trans_shift[trans_idx];
+
+#if RDOPT_ZERO_CBF_ENABLE
+ /* compare null cbf cost with encode tu rd-cost */
+ if(cbf != 0)
+ {
+ WORD32 tu_bits;
+ LWORD64 tu_rd_cost;
+
+ LWORD64 zero_cbf_cost = 0;
+
+ /*Populating the feilds of rdoq_ctxt structure*/
+ if(i4_perform_rdoq)
+ {
+ /* transform size to log2transform size */
+ GETRANGE(ps_rdoq_sbh_ctxt->i4_log2_trans_size, trans_size);
+ ps_rdoq_sbh_ctxt->i4_log2_trans_size -= 1;
+ ps_rdoq_sbh_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->i8_cl_ssd_lambda_qf;
+ ps_rdoq_sbh_ctxt->i4_is_luma = 1;
+ ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td = ga_trans_shift[trans_idx];
+ ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td =
+ (1 << ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td) / 2;
+ ps_rdoq_sbh_ctxt->i1_tu_is_coded = 0;
+ ps_rdoq_sbh_ctxt->pi4_zero_col = pi4_zero_col;
+ ps_rdoq_sbh_ctxt->pi4_zero_row = pi4_zero_row;
+ }
+ else if(i4_perform_zcbf)
+ {
+ zero_cbf_cost =
+
+ ps_ctxt->s_cmn_opt_func.pf_ssd_calculator(
+ pu1_src, pu1_pred, src_strd, pred_strd, trans_size, trans_size);
+ }
+
+ /************************************************************************/
+ /* call the entropy rdo encode to get the bit estimate for current tu */
+ /* note that tu includes only residual coding bits and does not include */
+ /* tu split, cbf and qp delta encoding bits for a TU */
+ /************************************************************************/
+ if(i4_perform_rdoq)
+ {
+ tu_bits = ihevce_entropy_rdo_encode_tu_rdoq(
+ &ps_ctxt->s_rdopt_entropy_ctxt,
+ (pu1_ecd_data),
+ trans_size,
+ 1,
+ ps_rdoq_sbh_ctxt,
+ pi8_cost,
+ &zero_cbf_cost,
+ 0);
+
+ if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 0)
+ {
+ cbf = 0;
+ *pi4_coeff_off = 0;
+ }
+
+ if((i4_perform_sbh) && (0 != cbf))
+ {
+ ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
+ ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
+ *pi8_cost = ps_rdoq_sbh_ctxt->i8_ssd_cost;
+ }
+
+ /*Add round value before normalizing*/
+ *pi8_cost += ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td;
+ *pi8_cost >>= ga_trans_shift[trans_idx];
+
+ if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 1)
+ {
+ pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
+ *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
+ pi2_quant_coeffs,
+ pi4_subBlock2csbfId_map,
+ ps_ctxt->i4_scan_idx,
+ trans_size,
+ pu1_ecd_data,
+ pu1_csbf_buf,
+ csbf_strd);
+ }
+ }
+ else
+ {
+ tu_bits = ihevce_entropy_rdo_encode_tu(
+ &ps_ctxt->s_rdopt_entropy_ctxt, pu1_ecd_data, trans_size, 1, i4_perform_sbh);
+ }
+
+ *pi4_tu_bits = tu_bits;
+
+ if(e_ssd_type == SPATIAL_DOMAIN_SSD)
+ {
+ *pi8_cost = ihevce_it_recon_ssd(
+ ps_ctxt,
+ pu1_src,
+ src_strd,
+ pu1_pred,
+ pred_strd,
+ pi2_deq_data,
+ deq_data_strd,
+ pu1_recon,
+ i4_recon_stride,
+ pu1_ecd_data,
+ trans_size,
+ packed_pred_mode,
+ cbf,
+ *pi4_zero_col,
+ *pi4_zero_row,
+ NULL_PLANE);
+
+ pu1_is_recon_available[0] = 1;
+ }
+
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
+ {
+ pi8_cost[0] = ihevce_inject_stim_into_distortion(
+ pu1_src,
+ src_strd,
+ pu1_recon,
+ i4_recon_stride,
+ pi8_cost[0],
+ i4_alpha_stim_multiplier,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ NULL_PLANE);
+ }
+ else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
+ {
+ pi8_cost[0] = ihevce_inject_stim_into_distortion(
+ pu1_src,
+ src_strd,
+ pu1_pred,
+ pred_strd,
+ pi8_cost[0],
+ i4_alpha_stim_multiplier,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ NULL_PLANE);
+ }
+#endif
+
+ /* add the SSD cost to bits estimate given by ECD */
+ tu_rd_cost = *pi8_cost + COMPUTE_RATE_COST_CLIP30(
+ tu_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
+
+ if(i4_perform_zcbf)
+ {
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
+ {
+ zero_cbf_cost = ihevce_inject_stim_into_distortion(
+ pu1_src,
+ src_strd,
+ pu1_pred,
+ pred_strd,
+ zero_cbf_cost,
+ !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
+ : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
+ (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
+ 100.0,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ NULL_PLANE);
+ }
+#endif
+
+ /* force the tu as zero cbf if zero_cbf_cost is lower */
+ if(zero_cbf_cost < tu_rd_cost)
+ {
+ /* num bytes is set to 0 */
+ *pi4_coeff_off = 0;
+
+ /* cbf is returned as 0 */
+ cbf = 0;
+
+ /* cost is returned as 0 cbf cost */
+ *pi8_cost = zero_cbf_cost;
+
+ /* TU bits is set to 0 */
+ *pi4_tu_bits = 0;
+ pu1_is_recon_available[0] = 0;
+
+ if(SPATIAL_DOMAIN_SSD == e_ssd_type)
+ {
+ /* copy pred to recon for zcbf mode */
+
+ ps_ctxt->s_cmn_opt_func.pf_copy_2d(
+ pu1_recon, i4_recon_stride, pu1_pred, pred_strd, trans_size, trans_size);
+
+ pu1_is_recon_available[0] = 1;
+ }
+ }
+ /* accumulate cu not coded cost with zcbf cost */
+#if ENABLE_INTER_ZCU_COST
+ ps_ctxt->i8_cu_not_coded_cost += zero_cbf_cost;
+#endif
+ }
+ }
+ else
+ {
+ /* cbf = 0, accumulate cu not coded cost */
+ if(e_ssd_type == SPATIAL_DOMAIN_SSD)
+ {
+ *pi8_cost = ihevce_it_recon_ssd(
+ ps_ctxt,
+ pu1_src,
+ src_strd,
+ pu1_pred,
+ pred_strd,
+ pi2_deq_data,
+ deq_data_strd,
+ pu1_recon,
+ i4_recon_stride,
+ pu1_ecd_data,
+ trans_size,
+ packed_pred_mode,
+ cbf,
+ *pi4_zero_col,
+ *pi4_zero_row,
+ NULL_PLANE);
+
+ pu1_is_recon_available[0] = 1;
+ }
+
+#if ENABLE_INTER_ZCU_COST
+ {
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
+ {
+ pi8_cost[0] = ihevce_inject_stim_into_distortion(
+ pu1_src,
+ src_strd,
+ pu1_recon,
+ i4_recon_stride,
+ pi8_cost[0],
+ !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
+ : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
+ (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
+ 100.0,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ NULL_PLANE);
+ }
+ else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
+ {
+ pi8_cost[0] = ihevce_inject_stim_into_distortion(
+ pu1_src,
+ src_strd,
+ pu1_pred,
+ pred_strd,
+ pi8_cost[0],
+ !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
+ : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
+ (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
+ 100.0,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ NULL_PLANE);
+ }
+#endif
+
+ ps_ctxt->i8_cu_not_coded_cost += *pi8_cost;
+ }
+#endif /* ENABLE_INTER_ZCU_COST */
+ }
+#endif
+
+ return (cbf);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_it_recon_fxn \endif
+*
+* \brief
+* Transform unit level (Luma) IT Recon function
+*
+* \param[in] ps_ctxt enc_loop module ctxt pointer
+* \param[in] pi2_deq_data pointer to iq data
+* \param[in] deq_data_strd iq data buffer stride
+* \param[in] pu1_pred pointer to predicted data buffer
+* \param[in] pred_strd predicted buffer stride
+* \param[in] pu1_recon pointer to recon buffer
+* \param[in] recon_strd recon buffer stride
+* \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod)
+* \param[in] trans_size transform size (4, 8, 16,32)
+* \param[in] packed_pred_mode 0:Inter 1:Intra 2:Skip
+* \param[in] cbf CBF of the current block
+* \param[in] zero_cols zero_cols of the current block
+* \param[in] zero_rows zero_rows of the current block
+*
+* \return
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+
+void ihevce_it_recon_fxn(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ WORD16 *pi2_deq_data,
+ WORD32 deq_dat_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ UWORD8 *pu1_recon,
+ WORD32 recon_strd,
+ UWORD8 *pu1_ecd_data,
+ WORD32 trans_size,
+ WORD32 packed_pred_mode,
+ WORD32 cbf,
+ WORD32 zero_cols,
+ WORD32 zero_rows)
+{
+ WORD32 dc_add_flag = 0;
+ WORD32 trans_idx;
+
+ /* translate the transform size to index for 4x4 and 8x8 */
+ trans_idx = trans_size >> 2;
+
+ /* if SKIP mode needs to be evaluated the pred is copied to recon */
+ if(PRED_MODE_SKIP == packed_pred_mode)
+ {
+ UWORD8 *pu1_curr_recon, *pu1_curr_pred;
+
+ pu1_curr_pred = pu1_pred;
+ pu1_curr_recon = pu1_recon;
+
+ /* 2D copy of data */
+
+ ps_ctxt->s_cmn_opt_func.pf_2d_square_copy(
+ pu1_curr_recon, recon_strd, pu1_curr_pred, pred_strd, trans_size, sizeof(UWORD8));
+
+ return;
+ }
+
+ /* for intra 4x4 DST transform should be used */
+ if((1 == trans_idx) && (PRED_MODE_INTRA == packed_pred_mode))
+ {
+ trans_idx = 0;
+ }
+ /* for 16x16 cases */
+ else if(16 == trans_size)
+ {
+ trans_idx = 3;
+ }
+ /* for 32x32 cases */
+ else if(32 == trans_size)
+ {
+ trans_idx = 4;
+ }
+
+ /*if (lastx == 0 && lasty == 0) , ie only 1 coefficient */
+ if((0 == pu1_ecd_data[0]) && (0 == pu1_ecd_data[1]))
+ {
+ dc_add_flag = 1;
+ }
+
+ if(0 == cbf)
+ {
+ /* buffer copy */
+ ps_ctxt->s_cmn_opt_func.pf_2d_square_copy(
+ pu1_recon, recon_strd, pu1_pred, pred_strd, trans_size, 1);
+ }
+ else if((1 == dc_add_flag) && (0 != trans_idx))
+ {
+ /* dc add */
+ ps_ctxt->s_cmn_opt_func.pf_itrans_recon_dc(
+ pu1_pred,
+ pred_strd,
+ pu1_recon,
+ recon_strd,
+ trans_size,
+ pi2_deq_data[0],
+ NULL_PLANE /* luma */
+ );
+ }
+ else
+ {
+ ps_ctxt->apf_it_recon[trans_idx](
+ pi2_deq_data,
+ &ps_ctxt->ai2_scratch[0],
+ pu1_pred,
+ pu1_recon,
+ deq_dat_strd,
+ pred_strd,
+ recon_strd,
+ zero_cols,
+ zero_rows);
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_chroma_it_recon_fxn \endif
+*
+* \brief
+* Transform unit level (Chroma) IT Recon function
+*
+* \param[in] ps_ctxt enc_loop module ctxt pointer
+* \param[in] pi2_deq_data pointer to iq data
+* \param[in] deq_data_strd iq data buffer stride
+* \param[in] pu1_pred pointer to predicted data buffer
+* \param[in] pred_strd predicted buffer stride
+* \param[in] pu1_recon pointer to recon buffer
+* \param[in] recon_strd recon buffer stride
+* \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod)
+* \param[in] trans_size transform size (4, 8, 16)
+* \param[in] cbf CBF of the current block
+* \param[in] zero_cols zero_cols of the current block
+* \param[in] zero_rows zero_rows of the current block
+*
+* \return
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+
+void ihevce_chroma_it_recon_fxn(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ WORD16 *pi2_deq_data,
+ WORD32 deq_dat_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ UWORD8 *pu1_recon,
+ WORD32 recon_strd,
+ UWORD8 *pu1_ecd_data,
+ WORD32 trans_size,
+ WORD32 cbf,
+ WORD32 zero_cols,
+ WORD32 zero_rows,
+ CHROMA_PLANE_ID_T e_chroma_plane)
+{
+ WORD32 trans_idx;
+
+ ASSERT((e_chroma_plane == U_PLANE) || (e_chroma_plane == V_PLANE));
+
+ /* since 2x2 transform is not allowed for chroma*/
+ if(2 == trans_size)
+ {
+ trans_size = 4;
+ }
+
+ /* translate the transform size to index */
+ trans_idx = trans_size >> 2;
+
+ /* for 16x16 cases */
+ if(16 == trans_size)
+ {
+ trans_idx = 3;
+ }
+
+ if(0 == cbf)
+ {
+ /* buffer copy */
+ ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
+ pu1_pred, pred_strd, pu1_recon, recon_strd, trans_size, trans_size, e_chroma_plane);
+ }
+ else if((0 == pu1_ecd_data[0]) && (0 == pu1_ecd_data[1]))
+ {
+ /* dc add */
+ ps_ctxt->s_cmn_opt_func.pf_itrans_recon_dc(
+ pu1_pred,
+ pred_strd,
+ pu1_recon,
+ recon_strd,
+ trans_size,
+ pi2_deq_data[0],
+ e_chroma_plane /* chroma plane */
+ );
+ }
+ else
+ {
+ ps_ctxt->apf_chrm_it_recon[trans_idx - 1](
+ pi2_deq_data,
+ &ps_ctxt->ai2_scratch[0],
+ pu1_pred + (WORD32)e_chroma_plane,
+ pu1_recon + (WORD32)e_chroma_plane,
+ deq_dat_strd,
+ pred_strd,
+ recon_strd,
+ zero_cols,
+ zero_rows);
+ }
+}
+
+/**
+*******************************************************************************
+* \if Function name : ihevce_mpm_idx_based_filter_RDOPT_cand \endif
+*
+* \brief * Filters the RDOPT candidates based on mpm_idx
+*
+* \par Description
+* Computes the b1_prev_intra_luma_pred_flag, b2_mpm_idx & b5_rem_intra_pred_mode
+* for a CU
+*
+* \param[in] ps_ctxt : ptr to enc loop context
+* \param[in] ps_cu_analyse : ptr to CU analyse structure
+* \param[in] ps_top_nbr_4x4 top 4x4 neighbour pointer
+* \param[in] ps_left_nbr_4x4 left 4x4 neighbour pointer
+* \param[in] pu1_luma_mode luma mode
+*
+* \returns none
+*
+* \author
+* Ittiam
+*
+*******************************************************************************
+*/
+
+void ihevce_mpm_idx_based_filter_RDOPT_cand(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ cu_analyse_t *ps_cu_analyse,
+ nbr_4x4_t *ps_left_nbr_4x4,
+ nbr_4x4_t *ps_top_nbr_4x4,
+ UWORD8 *pu1_luma_mode,
+ UWORD8 *pu1_eval_mark)
+{
+ WORD32 cu_pos_x;
+ WORD32 cu_pos_y;
+ nbr_avail_flags_t s_nbr;
+ WORD32 trans_size;
+ WORD32 au4_cand_mode_list[3];
+ WORD32 nbr_flags;
+ UWORD8 *pu1_intra_luma_modes;
+ WORD32 rdopt_cand_ctr = 0;
+ UWORD8 *pu1_luma_eval_mark;
+
+ cu_pos_x = ps_cu_analyse->b3_cu_pos_x << 1;
+ cu_pos_y = ps_cu_analyse->b3_cu_pos_y << 1;
+ trans_size = ps_cu_analyse->u1_cu_size;
+
+ /* get the neighbour availability flags */
+ nbr_flags = ihevce_get_nbr_intra(
+ &s_nbr,
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ cu_pos_x,
+ cu_pos_y,
+ trans_size >> 2);
+ (void)nbr_flags;
+ /*Call the fun to populate luma intra pred mode fro TU=CU and use the same list fro
+ *TU=CU/2 also since the modes are same in both the cases.
+ */
+ ihevce_populate_intra_pred_mode(
+ ps_top_nbr_4x4->b6_luma_intra_mode,
+ ps_left_nbr_4x4->b6_luma_intra_mode,
+ s_nbr.u1_top_avail,
+ s_nbr.u1_left_avail,
+ cu_pos_y,
+ &au4_cand_mode_list[0]);
+
+ /*Loop through all the RDOPT candidates of TU=CU and TU=CU/2 and check if the current RDOPT
+ *cand is present in a4_cand_mode_list, If yes set eval flag to 1 else set it to zero
+ */
+
+ pu1_intra_luma_modes = pu1_luma_mode;
+ pu1_luma_eval_mark = pu1_eval_mark;
+
+ while(pu1_intra_luma_modes[rdopt_cand_ctr] != 255)
+ {
+ WORD32 i;
+ WORD32 found_flag = 0;
+
+ /*1st candidate of TU=CU list and TU=CU/2 list must go through RDOPT stage
+ *irrespective of whether the cand is present in the mpm idx list or not
+ */
+ if(rdopt_cand_ctr == 0)
+ {
+ rdopt_cand_ctr++;
+ continue;
+ }
+
+ for(i = 0; i < 3; i++)
+ {
+ if(pu1_intra_luma_modes[rdopt_cand_ctr] == au4_cand_mode_list[i])
+ {
+ found_flag = 1;
+ break;
+ }
+ }
+
+ if(found_flag == 0)
+ {
+ pu1_luma_eval_mark[rdopt_cand_ctr] = 0;
+ }
+
+ rdopt_cand_ctr++;
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_intra_rdopt_cu_ntu \endif
+*
+* \brief
+* Intra Coding unit funtion for RD opt mode
+*
+* \param[in] ps_ctxt enc_loop module ctxt pointer
+* \param[in] ps_chrm_cu_buf_prms pointer to chroma buffer pointers structure
+* \param[in] pu1_luma_mode : pointer to luma mode
+* \param[in] ps_cu_analyse pointer to cu analyse pointer
+* \param[in] pu1_src pointer to source data buffer
+* \param[in] src_strd source buffer stride
+* \param[in] pu1_cu_left pointer to left recon data buffer
+* \param[in] pu1_cu_top pointer to top recon data buffer
+* \param[in] pu1_cu_top_left pointer to top left recon data buffer
+* \param[in] ps_left_nbr_4x4 : left 4x4 neighbour pointer
+* \param[in] ps_top_nbr_4x4 : top 4x4 neighbour pointer
+* \param[in] nbr_4x4_left_strd left nbr4x4 stride
+* \param[in] cu_left_stride left recon buffer stride
+* \param[in] curr_buf_idx RD opt buffer index for current usage
+* \param[in] func_proc_mode : function procesing mode @sa TU_SIZE_WRT_CU_T
+*
+* \return
+* RDopt cost
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+LWORD64 ihevce_intra_rdopt_cu_ntu(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ enc_loop_cu_prms_t *ps_cu_prms,
+ void *pv_pred_org,
+ WORD32 pred_strd_org,
+ enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
+ UWORD8 *pu1_luma_mode,
+ cu_analyse_t *ps_cu_analyse,
+ void *pv_curr_src,
+ void *pv_cu_left,
+ void *pv_cu_top,
+ void *pv_cu_top_left,
+ nbr_4x4_t *ps_left_nbr_4x4,
+ nbr_4x4_t *ps_top_nbr_4x4,
+ WORD32 nbr_4x4_left_strd,
+ WORD32 cu_left_stride,
+ WORD32 curr_buf_idx,
+ WORD32 func_proc_mode,
+ WORD32 i4_alpha_stim_multiplier)
+{
+ enc_loop_cu_final_prms_t *ps_final_prms;
+ nbr_avail_flags_t s_nbr;
+ nbr_4x4_t *ps_nbr_4x4;
+ nbr_4x4_t *ps_tmp_lt_4x4;
+ recon_datastore_t *ps_recon_datastore;
+
+ ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
+
+ UWORD32 *pu4_nbr_flags;
+ UWORD8 *pu1_intra_pred_mode;
+ WORD32 cu_pos_x;
+ WORD32 cu_pos_y;
+ WORD32 trans_size = 0;
+ UWORD8 *pu1_left;
+ UWORD8 *pu1_top;
+ UWORD8 *pu1_top_left;
+ UWORD8 *pu1_recon;
+ UWORD8 *pu1_csbf_buf;
+ UWORD8 *pu1_ecd_data;
+ WORD16 *pi2_deq_data;
+ WORD32 deq_data_strd;
+ LWORD64 total_rdopt_cost;
+ WORD32 ctr;
+ WORD32 left_strd;
+ WORD32 i4_recon_stride;
+ WORD32 csbf_strd;
+ WORD32 ecd_data_bytes_cons;
+ WORD32 num_4x4_in_tu;
+ WORD32 num_4x4_in_cu;
+ WORD32 chrm_present_flag;
+ WORD32 tx_size;
+ WORD32 cu_bits;
+ WORD32 num_cu_parts = 0;
+ WORD32 num_cands = 0;
+ WORD32 cu_pos_x_8pelunits;
+ WORD32 cu_pos_y_8pelunits;
+ WORD32 i4_perform_rdoq;
+ WORD32 i4_perform_sbh;
+ UWORD8 u1_compute_spatial_ssd;
+ UWORD8 u1_compute_recon;
+ UWORD8 au1_intra_nxn_rdopt_ctxt_models[2][IHEVC_CAB_CTXT_END];
+
+ UWORD16 u2_num_tus_in_cu = 0;
+ WORD32 is_sub_pu_in_hq = 0;
+ /* Get the RDOPT cost of the best CU mode for early_exit */
+ LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
+ /* cabac context of prev intra luma pred flag */
+ UWORD8 u1_prev_flag_cabac_ctxt =
+ ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_INTRA_LUMA_PRED_FLAG];
+ WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
+
+ UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy && !DISABLE_INTRA_WHEN_NOISY;
+
+ total_rdopt_cost = 0;
+ ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
+ ps_recon_datastore = &ps_final_prms->s_recon_datastore;
+ i4_recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
+ csbf_strd = ps_ctxt->i4_cu_csbf_strd;
+ pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
+ pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
+ pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
+ deq_data_strd = ps_cu_analyse->u1_cu_size; /* deq_data stride is cu size */
+ ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
+ ps_tmp_lt_4x4 = ps_left_nbr_4x4;
+ pu4_nbr_flags = &ps_final_prms->au4_nbr_flags[0];
+ pu1_intra_pred_mode = &ps_final_prms->au1_intra_pred_mode[0];
+ cu_pos_x = ps_cu_analyse->b3_cu_pos_x;
+ cu_pos_y = ps_cu_analyse->b3_cu_pos_y;
+ cu_pos_x_8pelunits = cu_pos_x;
+ cu_pos_y_8pelunits = cu_pos_y;
+
+ /* reset cu not coded cost */
+ ps_ctxt->i8_cu_not_coded_cost = 0;
+
+ /* based on the Processng mode */
+ if(TU_EQ_CU == func_proc_mode)
+ {
+ ps_final_prms->u1_part_mode = SIZE_2Nx2N;
+ trans_size = ps_cu_analyse->u1_cu_size;
+ num_cu_parts = 1;
+ num_cands = 1;
+ u2_num_tus_in_cu = 1;
+ }
+ else if(TU_EQ_CU_DIV2 == func_proc_mode)
+ {
+ ps_final_prms->u1_part_mode = SIZE_2Nx2N;
+ trans_size = ps_cu_analyse->u1_cu_size >> 1;
+ num_cu_parts = 4;
+ num_cands = 1;
+ u2_num_tus_in_cu = 4;
+ }
+ else if(TU_EQ_SUBCU == func_proc_mode)
+ {
+ ps_final_prms->u1_part_mode = SIZE_NxN;
+ trans_size = ps_cu_analyse->u1_cu_size >> 1;
+ num_cu_parts = 4;
+ /*In HQ for TU = SUBPU, all 35 modes used for RDOPT instead of 3 modes */
+ if(IHEVCE_QUALITY_P3 > ps_ctxt->i4_quality_preset)
+ {
+ if(ps_ctxt->i1_slice_type != BSLICE)
+ {
+ num_cands = (4 * MAX_INTRA_CU_CANDIDATES) + 2;
+ }
+ else
+ {
+ num_cands = (2 * MAX_INTRA_CU_CANDIDATES);
+ }
+ }
+ else
+ {
+ num_cands = MAX_INTRA_CU_CANDIDATES;
+ }
+ u2_num_tus_in_cu = 4;
+ }
+ else
+ {
+ /* should not enter here */
+ ASSERT(0);
+ }
+
+ if(ps_ctxt->i1_cu_qp_delta_enable)
+ {
+ WORD32 i4_act_counter = 0, i4_act_counter_lamda = 0;
+ if(ps_cu_analyse->u1_cu_size == 64)
+ {
+ ASSERT(
+ (trans_size == 32) || (trans_size == 16) || (trans_size == 8) || (trans_size == 4));
+ i4_act_counter = (trans_size == 16) + 2 * ((trans_size == 8) || (trans_size == 4));
+ i4_act_counter_lamda = 3;
+ }
+ else if(ps_cu_analyse->u1_cu_size == 32)
+ {
+ ASSERT(
+ (trans_size == 32) || (trans_size == 16) || (trans_size == 8) || (trans_size == 4));
+ i4_act_counter = (trans_size == 16) + 2 * ((trans_size == 8) || (trans_size == 4));
+ i4_act_counter_lamda = 0;
+ }
+ else if(ps_cu_analyse->u1_cu_size == 16)
+ {
+ ASSERT((trans_size == 16) || (trans_size == 8) || (trans_size == 4));
+ i4_act_counter = (trans_size == 8) || (trans_size == 4);
+ i4_act_counter_lamda = 0;
+ }
+ else if(ps_cu_analyse->u1_cu_size == 8)
+ {
+ ASSERT((trans_size == 8) || (trans_size == 4));
+ i4_act_counter = 1;
+ i4_act_counter_lamda = 0;
+ }
+ else
+ {
+ ASSERT(0);
+ }
+ if(ps_ctxt->i4_use_ctb_level_lamda)
+ {
+ ihevce_compute_cu_level_QP(
+ ps_ctxt, ps_cu_analyse->i4_act_factor[i4_act_counter][1], -1, 0);
+ }
+ else
+ {
+ ihevce_compute_cu_level_QP(
+ ps_ctxt,
+ ps_cu_analyse->i4_act_factor[i4_act_counter][1],
+ ps_cu_analyse->i4_act_factor[i4_act_counter_lamda][1],
+ 0);
+ }
+
+ ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_cu_qp;
+ }
+ if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
+ {
+ ps_ctxt->i8_cl_ssd_lambda_qf =
+ ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
+ 100.0f);
+ ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
+ ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
+ (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
+ }
+
+ u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
+ (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
+ CONVERT_SSDS_TO_SPATIAL_DOMAIN;
+
+ if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
+ {
+ u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
+ CONVERT_SSDS_TO_SPATIAL_DOMAIN;
+ }
+
+ /* populate the neigbours */
+ pu1_left = (UWORD8 *)pv_cu_left;
+ pu1_top = (UWORD8 *)pv_cu_top;
+ pu1_top_left = (UWORD8 *)pv_cu_top_left;
+ left_strd = cu_left_stride;
+ num_4x4_in_tu = (trans_size >> 2);
+ num_4x4_in_cu = (ps_cu_analyse->u1_cu_size >> 2);
+ chrm_present_flag = 1;
+ ecd_data_bytes_cons = 0;
+ cu_bits = 0;
+
+ /* get the 4x4 level postion of current cu */
+ cu_pos_x = cu_pos_x << 1;
+ cu_pos_y = cu_pos_y << 1;
+
+ /* pouplate cu level params knowing that current is intra */
+ ps_final_prms->u1_skip_flag = 0;
+ ps_final_prms->u1_intra_flag = PRED_MODE_INTRA;
+ ps_final_prms->u2_num_pus_in_cu = 1;
+ /*init the is_cu_coded flag*/
+ ps_final_prms->u1_is_cu_coded = 0;
+ ps_final_prms->u4_cu_sad = 0;
+
+ ps_final_prms->as_pu_enc_loop[0].b1_intra_flag = PRED_MODE_INTRA;
+ ps_final_prms->as_pu_enc_loop[0].b4_wd = (trans_size >> 1) - 1;
+ ps_final_prms->as_pu_enc_loop[0].b4_ht = (trans_size >> 1) - 1;
+ ps_final_prms->as_pu_enc_loop[0].b4_pos_x = cu_pos_x;
+ ps_final_prms->as_pu_enc_loop[0].b4_pos_y = cu_pos_y;
+ ps_final_prms->as_pu_enc_loop[0].b1_merge_flag = 0;
+
+ ps_final_prms->as_col_pu_enc_loop[0].b1_intra_flag = 1;
+
+ /*copy qp directly as intra cant be skip*/
+ ps_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
+ ps_nbr_4x4->mv.s_l0_mv.i2_mvx = 0;
+ ps_nbr_4x4->mv.s_l0_mv.i2_mvy = 0;
+ ps_nbr_4x4->mv.s_l1_mv.i2_mvx = 0;
+ ps_nbr_4x4->mv.s_l1_mv.i2_mvy = 0;
+ ps_nbr_4x4->mv.i1_l0_ref_pic_buf_id = -1;
+ ps_nbr_4x4->mv.i1_l1_ref_pic_buf_id = -1;
+ ps_nbr_4x4->mv.i1_l0_ref_idx = -1;
+ ps_nbr_4x4->mv.i1_l1_ref_idx = -1;
+
+ /* RDOPT copy States : TU init (best until prev TU) to current */
+ memcpy(
+ &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0],
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0],
+ IHEVC_CAB_COEFFX_PREFIX);
+
+ /* RDOPT copy States :update to init state if 0 cbf */
+ memcpy(
+ &au1_intra_nxn_rdopt_ctxt_models[0][0],
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0],
+ IHEVC_CAB_COEFFX_PREFIX);
+ memcpy(
+ &au1_intra_nxn_rdopt_ctxt_models[1][0],
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0],
+ IHEVC_CAB_COEFFX_PREFIX);
+
+ /* loop for all partitions in CU blocks */
+ for(ctr = 0; ctr < num_cu_parts; ctr++)
+ {
+ UWORD8 *pu1_curr_mode;
+ WORD32 cand_ctr;
+ WORD32 nbr_flags;
+
+ /* for NxN case to track the best mode */
+ /* for other cases zeroth index will be used */
+ intra_prev_rem_flags_t as_intra_prev_rem[2];
+ LWORD64 ai8_cand_rdopt_cost[2];
+ UWORD32 au4_tu_sad[2];
+ WORD32 ai4_tu_bits[2];
+ WORD32 ai4_cbf[2];
+ WORD32 ai4_curr_bytes[2];
+ WORD32 ai4_zero_col[2];
+ WORD32 ai4_zero_row[2];
+ /* To store the pred, coeff and dequant for TU_EQ_SUBCU case (since mul.
+ cand. are there) ping-pong buffer to store the best and current */
+ UWORD8 au1_cur_pred_data[2][MIN_TU_SIZE * MIN_TU_SIZE];
+ UWORD8 au1_intra_coeffs[2][MAX_SCAN_COEFFS_BYTES_4x4];
+ WORD16 ai2_intra_deq_coeffs[2][MIN_TU_SIZE * MIN_TU_SIZE];
+ /* Context models stored for RDopt store and restore purpose */
+
+ UWORD8 au1_recon_availability[2];
+
+ WORD32 best_cand_idx = 0;
+ LWORD64 best_cand_cost = MAX_COST_64;
+ /* counters to toggle b/w best and current */
+ WORD32 best_intra_buf_idx = 1;
+ WORD32 curr_intra_buf_idx = 0;
+
+ /* copy the mode pointer to be used in inner loop */
+ pu1_curr_mode = pu1_luma_mode;
+
+ /* get the neighbour availability flags */
+ nbr_flags = ihevce_get_nbr_intra(
+ &s_nbr,
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ cu_pos_x,
+ cu_pos_y,
+ num_4x4_in_tu);
+
+ /* copy the nbr flags for chroma reuse */
+ if(4 != trans_size)
+ {
+ *pu4_nbr_flags = nbr_flags;
+ }
+ else if(1 == chrm_present_flag)
+ {
+ /* compute the avail flags assuming luma trans is 8x8 */
+ /* get the neighbour availability flags */
+ *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ cu_pos_x,
+ cu_pos_y,
+ (num_4x4_in_tu << 1),
+ (num_4x4_in_tu << 1));
+ }
+
+ u1_compute_recon = !u1_compute_spatial_ssd && ((num_cu_parts > 1) && (ctr < 3));
+
+ if(!ctr && (u1_compute_spatial_ssd || u1_compute_recon))
+ {
+ ps_recon_datastore->u1_is_lumaRecon_available = 1;
+ }
+ else if(!ctr)
+ {
+ ps_recon_datastore->u1_is_lumaRecon_available = 0;
+ }
+
+ ihevc_intra_pred_luma_ref_substitution_fptr =
+ ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
+
+ /* call reference array substitution */
+ ihevc_intra_pred_luma_ref_substitution_fptr(
+ pu1_top_left,
+ pu1_top,
+ pu1_left,
+ left_strd,
+ trans_size,
+ nbr_flags,
+ (UWORD8 *)ps_ctxt->pv_ref_sub_out,
+ 1);
+
+ /* Intra Mode gating based on MPM cand list and encoder quality preset */
+ if((ps_ctxt->i1_slice_type != ISLICE) && (TU_EQ_SUBCU == func_proc_mode) &&
+ (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
+ {
+ ihevce_mpm_idx_based_filter_RDOPT_cand(
+ ps_ctxt,
+ ps_cu_analyse,
+ ps_left_nbr_4x4,
+ ps_top_nbr_4x4,
+ pu1_luma_mode,
+ &ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[ctr][0]);
+ }
+
+ if((TU_EQ_SUBCU == func_proc_mode) && (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
+ (ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr] >= MAX_INTRA_CU_CANDIDATES))
+ {
+ WORD32 ai4_mpm_mode_list[3];
+ WORD32 i;
+
+ WORD32 i4_curr_index = ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr];
+
+ ihevce_populate_intra_pred_mode(
+ ps_top_nbr_4x4->b6_luma_intra_mode,
+ ps_tmp_lt_4x4->b6_luma_intra_mode,
+ s_nbr.u1_top_avail,
+ s_nbr.u1_left_avail,
+ cu_pos_y,
+ &ai4_mpm_mode_list[0]);
+
+ for(i = 0; i < 3; i++)
+ {
+ if(ps_cu_analyse->s_cu_intra_cand
+ .au1_intra_luma_mode_nxn_hash[ctr][ai4_mpm_mode_list[i]] == 0)
+ {
+ ASSERT(ai4_mpm_mode_list[i] < 35);
+
+ ps_cu_analyse->s_cu_intra_cand
+ .au1_intra_luma_mode_nxn_hash[ctr][ai4_mpm_mode_list[i]] = 1;
+ pu1_luma_mode[i4_curr_index] = ai4_mpm_mode_list[i];
+ ps_cu_analyse->s_cu_intra_cand.au1_num_modes_added[ctr]++;
+ i4_curr_index++;
+ }
+ }
+
+ pu1_luma_mode[i4_curr_index] = 255;
+ }
+
+ /* loop over candidates for each partition */
+ for(cand_ctr = 0; cand_ctr < num_cands; cand_ctr++)
+ {
+ WORD32 curr_pred_mode;
+ WORD32 bits = 0;
+ LWORD64 curr_cost;
+ WORD32 luma_pred_func_idx;
+ UWORD8 *pu1_curr_ecd_data;
+ WORD16 *pi2_curr_deq_data;
+ WORD32 curr_deq_data_strd;
+ WORD32 pred_strd;
+ UWORD8 *pu1_pred;
+
+ /* if NXN case the recon and ecd data is stored in temp buffers */
+ if(TU_EQ_SUBCU == func_proc_mode)
+ {
+ pu1_pred = &au1_cur_pred_data[curr_intra_buf_idx][0];
+ pred_strd = trans_size;
+ pu1_curr_ecd_data = &au1_intra_coeffs[curr_intra_buf_idx][0];
+ pi2_curr_deq_data = &ai2_intra_deq_coeffs[curr_intra_buf_idx][0];
+ curr_deq_data_strd = trans_size;
+
+ ASSERT(trans_size == MIN_TU_SIZE);
+ }
+ else
+ {
+ pu1_pred = (UWORD8 *)pv_pred_org;
+ pred_strd = pred_strd_org;
+ pu1_curr_ecd_data = pu1_ecd_data;
+ pi2_curr_deq_data = pi2_deq_data;
+ curr_deq_data_strd = deq_data_strd;
+ }
+
+ pu1_recon = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs[curr_intra_buf_idx]) +
+ (ctr & 1) * trans_size + (ctr > 1) * trans_size * i4_recon_stride;
+
+ if(is_sub_pu_in_hq == 1)
+ {
+ curr_pred_mode = cand_ctr;
+ }
+ else
+ {
+ curr_pred_mode = pu1_curr_mode[cand_ctr];
+ }
+
+ /* If the candidate mode is 255, then break */
+ if(255 == curr_pred_mode)
+ {
+ break;
+ }
+ else if(250 == curr_pred_mode)
+ {
+ continue;
+ }
+
+ /* check if this mode needs to be evaluated or not. For 2nx2n cases, this */
+ /* function will be called once per candidate, so this check has been done */
+ /* outside this function call. For NxN case, this function will be called */
+ /* only once, and all the candidates will be evaluated here. */
+ if(ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3)
+ {
+ if((TU_EQ_SUBCU == func_proc_mode) &&
+ (0 == ps_cu_analyse->s_cu_intra_cand.au1_nxn_eval_mark[ctr][cand_ctr]))
+ {
+ continue;
+ }
+ }
+
+ /* call reference filtering */
+ ps_ctxt->ps_func_selector->ihevc_intra_pred_ref_filtering_fptr(
+ (UWORD8 *)ps_ctxt->pv_ref_sub_out,
+ trans_size,
+ (UWORD8 *)ps_ctxt->pv_ref_filt_out,
+ curr_pred_mode,
+ ps_ctxt->i1_strong_intra_smoothing_enable_flag);
+
+ /* use the look up to get the function idx */
+ luma_pred_func_idx = g_i4_ip_funcs[curr_pred_mode];
+
+ /* call the intra prediction function */
+ ps_ctxt->apf_lum_ip[luma_pred_func_idx](
+ (UWORD8 *)ps_ctxt->pv_ref_filt_out,
+ 1,
+ pu1_pred,
+ pred_strd,
+ trans_size,
+ curr_pred_mode);
+
+ /* populate the coeffs scan idx */
+ ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
+
+ /* for luma 4x4 and 8x8 transforms based on intra pred mode scan is choosen*/
+ if(trans_size < 16)
+ {
+ /* for modes from 22 upto 30 horizontal scan is used */
+ if((curr_pred_mode > 21) && (curr_pred_mode < 31))
+ {
+ ps_ctxt->i4_scan_idx = SCAN_HORZ;
+ }
+ /* for modes from 6 upto 14 horizontal scan is used */
+ else if((curr_pred_mode > 5) && (curr_pred_mode < 15))
+ {
+ ps_ctxt->i4_scan_idx = SCAN_VERT;
+ }
+ }
+
+ /* RDOPT copy States : TU init (best until prev TU) to current */
+ COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
+ &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0] +
+ IHEVC_CAB_COEFFX_PREFIX,
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
+ IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
+
+ i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
+ i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
+
+#if DISABLE_RDOQ_INTRA
+ i4_perform_rdoq = 0;
+#endif
+
+ /*2 Multi- dimensinal array based on trans size of rounding factor to be added here */
+ /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */
+ /* Currently the complete array will contain only single value*/
+ /*The rounding factor is calculated with the formula
+ Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2
+ rounding factor = (1 - DeadZone Val)
+
+ Assumption: Cabac states of All the sub-blocks in the TU are considered independent
+ */
+ if((ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING))
+ {
+ if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) && (ctr != 0))
+ {
+ double i4_lamda_modifier;
+
+ if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id))
+ {
+ i4_lamda_modifier =
+ ps_ctxt->i4_lamda_modifier *
+ CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
+ }
+ else
+ {
+ i4_lamda_modifier = ps_ctxt->i4_lamda_modifier;
+ }
+ if(ps_ctxt->i4_use_const_lamda_modifier)
+ {
+ if(ISLICE == ps_ctxt->i1_slice_type)
+ {
+ i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
+ }
+ else
+ {
+ i4_lamda_modifier = CONST_LAMDA_MOD_VAL;
+ }
+ }
+
+ ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
+ &ps_ctxt->i4_quant_round_tu[0][0];
+ ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
+ &ps_ctxt->i4_quant_round_tu[1][0];
+
+ memset(
+ ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
+ 0,
+ trans_size * trans_size * sizeof(WORD32));
+ memset(
+ ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
+ 0,
+ trans_size * trans_size * sizeof(WORD32));
+
+ ihevce_quant_rounding_factor_gen(
+ trans_size,
+ 1,
+ &ps_ctxt->s_rdopt_entropy_ctxt,
+ ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
+ ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
+ i4_lamda_modifier,
+ 1);
+ }
+ else
+ {
+ ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
+ ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3];
+ ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
+ ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3];
+ }
+ }
+
+ /* call T Q IT IQ and recon function */
+ ai4_cbf[curr_intra_buf_idx] = ihevce_t_q_iq_ssd_scan_fxn(
+ ps_ctxt,
+ pu1_pred,
+ pred_strd,
+ (UWORD8 *)pv_curr_src,
+ src_strd,
+ pi2_curr_deq_data,
+ curr_deq_data_strd,
+ pu1_recon,
+ i4_recon_stride,
+ pu1_curr_ecd_data,
+ pu1_csbf_buf,
+ csbf_strd,
+ trans_size,
+ PRED_MODE_INTRA,
+ &ai8_cand_rdopt_cost[curr_intra_buf_idx],
+ &ai4_curr_bytes[curr_intra_buf_idx],
+ &ai4_tu_bits[curr_intra_buf_idx],
+ &au4_tu_sad[curr_intra_buf_idx],
+ &ai4_zero_col[curr_intra_buf_idx],
+ &ai4_zero_row[curr_intra_buf_idx],
+ &au1_recon_availability[curr_intra_buf_idx],
+ i4_perform_rdoq,
+ i4_perform_sbh,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ i4_alpha_stim_multiplier,
+ u1_is_cu_noisy,
+#endif
+ u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
+ 1 /*early_cbf */
+ );
+
+#if COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL && !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
+ {
+#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
+ ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
+ pv_curr_src,
+ src_strd,
+ pu1_pred,
+ pred_strd,
+ ai8_cand_rdopt_cost[curr_intra_buf_idx],
+ i4_alpha_stim_multiplier,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ NULL_PLANE);
+#else
+ if(u1_compute_spatial_ssd && au1_recon_availability[curr_intra_buf_idx])
+ {
+ ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
+ pv_curr_src,
+ src_strd,
+ pu1_recon,
+ i4_recon_stride,
+ ai8_cand_rdopt_cost[curr_intra_buf_idx],
+ i4_alpha_stim_multiplier,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ NULL_PLANE);
+ }
+ else
+ {
+ ai8_cand_rdopt_cost[curr_intra_buf_idx] = ihevce_inject_stim_into_distortion(
+ pv_curr_src,
+ src_strd,
+ pu1_pred,
+ pred_strd,
+ ai8_cand_rdopt_cost[curr_intra_buf_idx],
+ i4_alpha_stim_multiplier,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ NULL_PLANE);
+ }
+#endif
+ }
+#endif
+
+ if(TU_EQ_SUBCU == func_proc_mode)
+ {
+ ASSERT(ai4_curr_bytes[curr_intra_buf_idx] < MAX_SCAN_COEFFS_BYTES_4x4);
+ }
+
+ /* based on CBF/No CBF copy the corresponding state */
+ if(0 == ai4_cbf[curr_intra_buf_idx])
+ {
+ /* RDOPT copy States :update to init state if 0 cbf */
+ COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
+ &au1_intra_nxn_rdopt_ctxt_models[curr_intra_buf_idx][0] +
+ IHEVC_CAB_COEFFX_PREFIX,
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
+ IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
+ }
+ else
+ {
+ /* RDOPT copy States :update to new state only if CBF is non zero */
+ COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
+ &au1_intra_nxn_rdopt_ctxt_models[curr_intra_buf_idx][0] +
+ IHEVC_CAB_COEFFX_PREFIX,
+ &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0] +
+ IHEVC_CAB_COEFFX_PREFIX,
+ IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
+ }
+
+ /* call the function which perform intra mode prediction */
+ ihevce_intra_pred_mode_signaling(
+ ps_top_nbr_4x4->b6_luma_intra_mode,
+ ps_tmp_lt_4x4->b6_luma_intra_mode,
+ s_nbr.u1_top_avail,
+ s_nbr.u1_left_avail,
+ cu_pos_y,
+ curr_pred_mode,
+ &as_intra_prev_rem[curr_intra_buf_idx]);
+ /******************************************************************/
+ /* PREV INTRA LUMA FLAG, MPM MODE and REM INTRA MODE bits for I_NxN
+ The bits for these are evaluated for every RDO mode of current subcu
+ as they can significantly contribute to RDO cost. Note that these
+ bits are not accounted for here (ai8_cand_rdopt_cost) as they
+ are accounted for in encode_cu call later */
+
+ /******************************************************************/
+ /* PREV INTRA LUMA FLAG, MPM MODE and REM INTRA MODE bits for I_NxN
+ The bits for these are evaluated for every RDO mode of current subcu
+ as they can significantly contribute to RDO cost. Note that these
+ bits are not accounted for here (ai8_cand_rdopt_cost) as they
+ are accounted for in encode_cu call later */
+
+ /* Estimate bits to encode prev rem flag for NXN mode */
+ {
+ WORD32 bits_frac = gau2_ihevce_cabac_bin_to_bits
+ [u1_prev_flag_cabac_ctxt ^
+ as_intra_prev_rem[curr_intra_buf_idx].b1_prev_intra_luma_pred_flag];
+
+ /* rounding the fractional bits to nearest integer */
+ bits = ((bits_frac + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q);
+ }
+
+ /* based on prev flag all the mpmidx bits and rem bits */
+ if(1 == as_intra_prev_rem[curr_intra_buf_idx].b1_prev_intra_luma_pred_flag)
+ {
+ /* mpm_idx */
+ bits += as_intra_prev_rem[curr_intra_buf_idx].b2_mpm_idx ? 2 : 1;
+ }
+ else
+ {
+ /* rem intra mode */
+ bits += 5;
+ }
+
+ bits += ai4_tu_bits[curr_intra_buf_idx];
+
+ /* compute the total cost for current candidate */
+ curr_cost = ai8_cand_rdopt_cost[curr_intra_buf_idx];
+
+ /* get the final ssd cost */
+ curr_cost +=
+ COMPUTE_RATE_COST_CLIP30(bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
+
+ /* check of the best candidate cost */
+ if(curr_cost < best_cand_cost)
+ {
+ best_cand_cost = curr_cost;
+ best_cand_idx = cand_ctr;
+ best_intra_buf_idx = curr_intra_buf_idx;
+ curr_intra_buf_idx = !curr_intra_buf_idx;
+ }
+ }
+
+ /*************** For TU_EQ_SUBCU case *****************/
+ /* Copy the pred for best cand. to the final pred array */
+ /* Copy the iq-coeff for best cand. to the final array */
+ /* copy the best coeffs data to final buffer */
+ if(TU_EQ_SUBCU == func_proc_mode)
+ {
+ /* Copy the pred for best cand. to the final pred array */
+
+ ps_ctxt->s_cmn_opt_func.pf_copy_2d(
+ (UWORD8 *)pv_pred_org,
+ pred_strd_org,
+ &au1_cur_pred_data[best_intra_buf_idx][0],
+ trans_size,
+ trans_size,
+ trans_size);
+
+ /* Copy the deq-coeff for best cand. to the final array */
+
+ ps_ctxt->s_cmn_opt_func.pf_copy_2d(
+ (UWORD8 *)pi2_deq_data,
+ deq_data_strd << 1,
+ (UWORD8 *)&ai2_intra_deq_coeffs[best_intra_buf_idx][0],
+ trans_size << 1,
+ trans_size << 1,
+ trans_size);
+ /* copy the coeffs to final cu ecd bytes buffer */
+ memcpy(
+ pu1_ecd_data,
+ &au1_intra_coeffs[best_intra_buf_idx][0],
+ ai4_curr_bytes[best_intra_buf_idx]);
+
+ pu1_recon = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs[best_intra_buf_idx]) +
+ (ctr & 1) * trans_size + (ctr > 1) * trans_size * i4_recon_stride;
+ }
+
+ /*---------- Calculate Recon for the best INTRA mode ---------*/
+ /* TU_EQ_CU case : No need for recon, otherwise recon is required */
+ /* Compute recon only for the best mode for TU_EQ_SUBCU case */
+ if(u1_compute_recon)
+ {
+ ihevce_it_recon_fxn(
+ ps_ctxt,
+ pi2_deq_data,
+ deq_data_strd,
+ (UWORD8 *)pv_pred_org,
+ pred_strd_org,
+ pu1_recon,
+ i4_recon_stride,
+ pu1_ecd_data,
+ trans_size,
+ PRED_MODE_INTRA,
+ ai4_cbf[best_intra_buf_idx],
+ ai4_zero_col[best_intra_buf_idx],
+ ai4_zero_row[best_intra_buf_idx]);
+
+ ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = best_intra_buf_idx;
+ }
+ else if(u1_compute_spatial_ssd && au1_recon_availability[best_intra_buf_idx])
+ {
+ ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = best_intra_buf_idx;
+ }
+ else
+ {
+ ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
+ }
+
+ /* RDOPT copy States :update to best modes state */
+ COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
+ &au1_intra_nxn_rdopt_ctxt_models[best_intra_buf_idx][0] + IHEVC_CAB_COEFFX_PREFIX,
+ IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
+
+ /* copy the prev,mpm_idx and rem modes from best cand */
+ ps_final_prms->as_intra_prev_rem[ctr] = as_intra_prev_rem[best_intra_buf_idx];
+
+ /* update the cabac context of prev intra pred mode flag */
+ u1_prev_flag_cabac_ctxt = gau1_ihevc_next_state
+ [(u1_prev_flag_cabac_ctxt << 1) |
+ as_intra_prev_rem[best_intra_buf_idx].b1_prev_intra_luma_pred_flag];
+
+ /* accumulate the TU bits into cu bits */
+ cu_bits += ai4_tu_bits[best_intra_buf_idx];
+
+ /* copy the intra pred mode for chroma reuse */
+ if(is_sub_pu_in_hq == 0)
+ {
+ *pu1_intra_pred_mode = pu1_curr_mode[best_cand_idx];
+ }
+ else
+ {
+ *pu1_intra_pred_mode = best_cand_idx;
+ }
+
+ /* Store luma mode as chroma mode. If chroma prcs happens, and
+ if a diff. mode wins, it should update this!! */
+ if(1 == chrm_present_flag)
+ {
+ if(is_sub_pu_in_hq == 0)
+ {
+ ps_final_prms->u1_chroma_intra_pred_actual_mode =
+ ((ps_ctxt->u1_chroma_array_type == 2)
+ ? gau1_chroma422_intra_angle_mapping[pu1_curr_mode[best_cand_idx]]
+ : pu1_curr_mode[best_cand_idx]);
+ }
+ else
+ {
+ ps_final_prms->u1_chroma_intra_pred_actual_mode =
+ ((ps_ctxt->u1_chroma_array_type == 2)
+ ? gau1_chroma422_intra_angle_mapping[best_cand_idx]
+ : best_cand_idx);
+ }
+
+ ps_final_prms->u1_chroma_intra_pred_mode = 4;
+ }
+
+ /*remember the cbf flag to replicate qp for 4x4 neighbour*/
+ ps_final_prms->u1_is_cu_coded |= ai4_cbf[best_intra_buf_idx];
+
+ /*accumulate ssd over all TU of intra CU*/
+ ps_final_prms->u4_cu_sad += au4_tu_sad[best_intra_buf_idx];
+
+ /* update the bytes */
+ ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
+ ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed =
+ ai4_curr_bytes[best_intra_buf_idx];
+ /* update the zero_row and col info for the final mode */
+ ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_col =
+ ai4_zero_col[best_intra_buf_idx];
+ ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_row =
+ ai4_zero_row[best_intra_buf_idx];
+
+ ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
+
+ /* update the total bytes cons */
+ ecd_data_bytes_cons += ai4_curr_bytes[best_intra_buf_idx];
+ pu1_ecd_data += ai4_curr_bytes[best_intra_buf_idx];
+
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = ai4_cbf[best_intra_buf_idx];
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_chroma_intra_mode_idx = chrm_present_flag;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b7_qp = ps_ctxt->i4_cu_qp;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_first_tu_in_cu = 0;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_transquant_bypass = 0;
+ GETRANGE(tx_size, trans_size);
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y;
+
+ /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
+ ps_nbr_4x4->b1_skip_flag = 0;
+ ps_nbr_4x4->b1_intra_flag = 1;
+ ps_nbr_4x4->b1_pred_l0_flag = 0;
+ ps_nbr_4x4->b1_pred_l1_flag = 0;
+
+ if(is_sub_pu_in_hq == 0)
+ {
+ ps_nbr_4x4->b6_luma_intra_mode = pu1_curr_mode[best_cand_idx];
+ }
+ else
+ {
+ ps_nbr_4x4->b6_luma_intra_mode = best_cand_idx;
+ }
+
+ ps_nbr_4x4->b1_y_cbf = ai4_cbf[best_intra_buf_idx];
+
+ /* since tu size can be less than cusize, replication is done with strd */
+ {
+ WORD32 i, j;
+ nbr_4x4_t *ps_tmp_4x4;
+
+ ps_tmp_4x4 = ps_nbr_4x4;
+
+ for(i = 0; i < num_4x4_in_tu; i++)
+ {
+ for(j = 0; j < num_4x4_in_tu; j++)
+ {
+ ps_tmp_4x4[j] = *ps_nbr_4x4;
+ }
+ /* row level update*/
+ ps_tmp_4x4 += num_4x4_in_cu;
+ }
+ }
+
+ if(TU_EQ_SUBCU == func_proc_mode)
+ {
+ pu1_luma_mode += ((MAX_INTRA_CU_CANDIDATES * 4) + 2 + 1);
+ }
+
+ if((num_cu_parts > 1) && (ctr < 3))
+ {
+ /* set the neighbour map to 1 */
+ ihevce_set_nbr_map(
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ cu_pos_x,
+ cu_pos_y,
+ trans_size >> 2,
+ 1);
+
+ /* block level updates block number (1 & 3 )*/
+ pv_curr_src = (UWORD8 *)pv_curr_src + trans_size;
+ pv_pred_org = (UWORD8 *)pv_pred_org + trans_size;
+ pi2_deq_data += trans_size;
+
+ switch(ctr)
+ {
+ case 0:
+ {
+ pu1_left = pu1_recon + trans_size - 1;
+ pu1_top += trans_size;
+ pu1_top_left = pu1_top - 1;
+ left_strd = i4_recon_stride;
+
+ break;
+ }
+ case 1:
+ {
+ ASSERT(
+ (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] == 0) ||
+ (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] == 1));
+
+ /* Since the 'lumaRefSubstitution' function expects both Top and */
+ /* TopRight recon pixels to be present in the same buffer */
+ if(ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0] !=
+ ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1])
+ {
+ UWORD8 *pu1_src =
+ ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
+ [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]]) +
+ trans_size;
+ UWORD8 *pu1_dst =
+ ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
+ [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]) +
+ trans_size;
+
+ ps_ctxt->s_cmn_opt_func.pf_copy_2d(
+ pu1_dst, i4_recon_stride, pu1_src, i4_recon_stride, trans_size, trans_size);
+
+ ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] =
+ ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0];
+ }
+
+ pu1_left = (UWORD8 *)pv_cu_left + trans_size * cu_left_stride;
+ pu1_top = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
+ [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]) +
+ (trans_size - 1) * i4_recon_stride;
+ pu1_top_left = pu1_left - cu_left_stride;
+ left_strd = cu_left_stride;
+
+ break;
+ }
+ case 2:
+ {
+ ASSERT(
+ (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] == 0) ||
+ (ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1] == 1));
+
+ pu1_left = pu1_recon + trans_size - 1;
+ pu1_top = ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
+ [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[1]]) +
+ (trans_size - 1) * i4_recon_stride + trans_size;
+ pu1_top_left = pu1_top - 1;
+ left_strd = i4_recon_stride;
+
+ break;
+ }
+ }
+
+ pu1_csbf_buf += num_4x4_in_tu;
+ cu_pos_x += num_4x4_in_tu;
+ ps_nbr_4x4 += num_4x4_in_tu;
+ ps_top_nbr_4x4 += num_4x4_in_tu;
+ ps_tmp_lt_4x4 = ps_nbr_4x4 - 1;
+
+ pu1_intra_pred_mode++;
+
+ /* after 2 blocks increment the pointers to bottom blocks */
+ if(1 == ctr)
+ {
+ pv_curr_src = (UWORD8 *)pv_curr_src - (trans_size << 1);
+ pv_curr_src = (UWORD8 *)pv_curr_src + (trans_size * src_strd);
+
+ pv_pred_org = (UWORD8 *)pv_pred_org - (trans_size << 1);
+ pv_pred_org = (UWORD8 *)pv_pred_org + (trans_size * pred_strd_org);
+ pi2_deq_data -= (trans_size << 1);
+ pi2_deq_data += (trans_size * deq_data_strd);
+
+ pu1_csbf_buf -= (num_4x4_in_tu << 1);
+ pu1_csbf_buf += (num_4x4_in_tu * csbf_strd);
+
+ ps_nbr_4x4 -= (num_4x4_in_tu << 1);
+ ps_nbr_4x4 += (num_4x4_in_tu * num_4x4_in_cu);
+ ps_top_nbr_4x4 = ps_nbr_4x4 - num_4x4_in_cu;
+ ps_tmp_lt_4x4 = ps_left_nbr_4x4 + (num_4x4_in_tu * nbr_4x4_left_strd);
+
+ /* decrement pos x to start */
+ cu_pos_x -= (num_4x4_in_tu << 1);
+ cu_pos_y += num_4x4_in_tu;
+ }
+ }
+
+#if RDOPT_ENABLE
+ /* compute the RDOPT cost for the current TU */
+ ai8_cand_rdopt_cost[best_intra_buf_idx] += COMPUTE_RATE_COST_CLIP30(
+ ai4_tu_bits[best_intra_buf_idx], ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
+#endif
+
+ /* accumulate the costs */
+ total_rdopt_cost += ai8_cand_rdopt_cost[best_intra_buf_idx];
+
+ if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
+ {
+ /* Early exit : If the current running cost exceeds
+ the prev. best mode cost, break */
+ if(total_rdopt_cost > prev_best_rdopt_cost)
+ {
+ return (total_rdopt_cost);
+ }
+ }
+
+ /* if transfrom size is 4x4 then only first luma 4x4 will have chroma*/
+ chrm_present_flag = (4 != trans_size) ? 1 : INTRA_PRED_CHROMA_IDX_NONE;
+
+ pu4_nbr_flags++;
+ }
+ /* Modify the cost function for this CU. */
+ /* loop in for 8x8 blocks */
+ if(ps_ctxt->u1_enable_psyRDOPT)
+ {
+ UWORD8 *pu1_recon_cu;
+ WORD32 recon_stride;
+ WORD32 curr_pos_x;
+ WORD32 curr_pos_y;
+ WORD32 start_index;
+ WORD32 num_horz_cu_in_ctb;
+ WORD32 cu_size;
+ WORD32 had_block_size;
+
+ /* tODO: sreenivasa ctb size has to be used appropriately */
+ had_block_size = 8;
+ cu_size = ps_cu_analyse->u1_cu_size; /* todo */
+ num_horz_cu_in_ctb = 64 / had_block_size;
+
+ curr_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
+ curr_pos_y = ps_cu_analyse->b3_cu_pos_y << 3; /* pel units */
+ recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
+ pu1_recon_cu =
+ ((UWORD8 *)ps_final_prms->s_recon_datastore
+ .apv_luma_recon_bufs[ps_recon_datastore->au1_bufId_with_winning_LumaRecon[0]]);
+ /* + \ curr_pos_x + curr_pos_y * recon_stride; */
+
+ /* start index to index the source satd of curr cu int he current ctb*/
+ start_index =
+ (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
+
+ {
+ total_rdopt_cost += ihevce_psy_rd_cost(
+ ps_ctxt->ai4_source_satd_8x8,
+ pu1_recon_cu,
+ recon_stride,
+ 1, //
+ cu_size,
+ 0, // pic type
+ 0, //layer id
+ ps_ctxt->i4_satd_lamda, // lambda
+ start_index,
+ ps_ctxt->u1_is_input_data_hbd,
+ ps_ctxt->u4_psy_strength,
+ &ps_ctxt->s_cmn_opt_func
+
+ ); // 8 bit
+ }
+ }
+
+#if !FORCE_INTRA_TU_DEPTH_TO_0 //RATIONALISE_NUM_RDO_MODES_IN_PQ_AND_HQ
+ if(TU_EQ_SUBCU == func_proc_mode)
+ {
+ UWORD8 au1_tu_eq_cu_div2_modes[4];
+ UWORD8 au1_freq_of_mode[4];
+
+ WORD32 i4_num_clusters = ihevce_find_num_clusters_of_identical_points_1D(
+ ps_final_prms->au1_intra_pred_mode, au1_tu_eq_cu_div2_modes, au1_freq_of_mode, 4);
+
+ if(1 == i4_num_clusters)
+ {
+ ps_final_prms->u2_num_pus_in_cu = 1;
+ ps_final_prms->u1_part_mode = SIZE_2Nx2N;
+ }
+ }
+#endif
+
+ /* store the num TUs*/
+ ps_final_prms->u2_num_tus_in_cu = u2_num_tus_in_cu;
+
+ /* update the bytes consumed */
+ ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
+
+ /* store the current cu size to final prms */
+ ps_final_prms->u1_cu_size = ps_cu_analyse->u1_cu_size;
+
+ /* cu bits will be having luma residual bits till this point */
+ /* if zero_cbf eval is disabled then cu bits will be zero */
+ ps_final_prms->u4_cu_luma_res_bits = cu_bits;
+
+ /* ------------- Chroma processing -------------- */
+ /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
+ if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
+ {
+ LWORD64 chrm_rdopt_cost;
+ WORD32 chrm_rdopt_tu_bits;
+
+ /* Store the current RDOPT cost to enable early exit in chrom_prcs */
+ ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
+
+ chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
+ ps_ctxt,
+ curr_buf_idx,
+ func_proc_mode,
+ ps_chrm_cu_buf_prms->pu1_curr_src,
+ ps_chrm_cu_buf_prms->i4_chrm_src_stride,
+ ps_chrm_cu_buf_prms->pu1_cu_left,
+ ps_chrm_cu_buf_prms->pu1_cu_top,
+ ps_chrm_cu_buf_prms->pu1_cu_top_left,
+ ps_chrm_cu_buf_prms->i4_cu_left_stride,
+ cu_pos_x_8pelunits,
+ cu_pos_y_8pelunits,
+ &chrm_rdopt_tu_bits,
+ i4_alpha_stim_multiplier,
+ u1_is_cu_noisy);
+
+#if WEIGH_CHROMA_COST
+ chrm_rdopt_cost = (LWORD64)(
+ (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
+ (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
+ CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
+#endif
+
+#if CHROMA_RDOPT_ENABLE
+ total_rdopt_cost += chrm_rdopt_cost;
+#endif
+ cu_bits += chrm_rdopt_tu_bits;
+
+ /* cu bits for chroma residual if chroma rdopt is on */
+ /* if zero_cbf eval is disabled then cu bits will be zero */
+ ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
+
+ if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
+ {
+ /* Early exit : If the current running cost exceeds
+ the prev. best mode cost, break */
+ if(total_rdopt_cost > prev_best_rdopt_cost)
+ {
+ return (total_rdopt_cost);
+ }
+ }
+ }
+ else
+ {}
+
+ /* RDOPT copy States : Best after all luma TUs to current */
+ COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
+ &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0] +
+ IHEVC_CAB_COEFFX_PREFIX,
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
+ IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
+
+ /* get the neighbour availability flags for current cu */
+ ihevce_get_only_nbr_flag(
+ &s_nbr,
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ (cu_pos_x_8pelunits << 1),
+ (cu_pos_y_8pelunits << 1),
+ (trans_size << 1),
+ (trans_size << 1));
+
+ /* call the entropy rdo encode to get the bit estimate for current cu */
+ /*if ZERO_CBF eval is enabled then this function will return only CU header bits */
+ {
+ /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
+ WORD32 cbf_bits, header_bits;
+
+ header_bits = ihevce_entropy_rdo_encode_cu(
+ &ps_ctxt->s_rdopt_entropy_ctxt,
+ ps_final_prms,
+ cu_pos_x_8pelunits,
+ cu_pos_y_8pelunits,
+ ps_cu_analyse->u1_cu_size,
+ s_nbr.u1_top_avail,
+ s_nbr.u1_left_avail,
+ &ps_final_prms->pu1_cu_coeffs[0],
+ &cbf_bits);
+
+ cu_bits += header_bits;
+
+ /* cbf bits are excluded from header bits, instead considered as texture bits */
+ /* incase if zero cbf eval is disabled then texture bits gets added here */
+ ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
+ ps_final_prms->u4_cu_cbf_bits = cbf_bits;
+
+#if RDOPT_ENABLE
+ /* add the cost of coding the cu bits */
+ total_rdopt_cost +=
+ COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
+#endif
+ }
+ return (total_rdopt_cost);
+}
+/*!
+******************************************************************************
+* \if Function name : ihevce_inter_rdopt_cu_ntu \endif
+*
+* \brief
+* Inter Coding unit funtion whic perfomr the TQ IT IQ recon for luma
+*
+* \param[in] ps_ctxt enc_loop module ctxt pointer
+* \param[in] ps_inter_cand pointer to inter candidate structure
+* \param[in] pu1_src pointer to source data buffer
+* \param[in] cu_size Current CU size
+* \param[in] cu_pos_x cu position x w.r.t to ctb
+* \param[in] cu_pos_y cu position y w.r.t to ctb
+* \param[in] src_strd source buffer stride
+* \param[in] curr_buf_idx buffer index for current output storage
+* \param[in] ps_chrm_cu_buf_prms pointer to chroma buffer pointers structure
+*
+* \return
+* Rdopt cost
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+LWORD64 ihevce_inter_rdopt_cu_ntu(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ enc_loop_cu_prms_t *ps_cu_prms,
+ void *pv_src,
+ WORD32 cu_size,
+ WORD32 cu_pos_x,
+ WORD32 cu_pos_y,
+ WORD32 curr_buf_idx,
+ enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
+ cu_inter_cand_t *ps_inter_cand,
+ cu_analyse_t *ps_cu_analyse,
+ WORD32 i4_alpha_stim_multiplier)
+{
+ enc_loop_cu_final_prms_t *ps_final_prms;
+ nbr_4x4_t *ps_nbr_4x4;
+ tu_prms_t s_tu_prms[64 * 4];
+ tu_prms_t *ps_tu_prms;
+
+ WORD32 i4_perform_rdoq;
+ WORD32 i4_perform_sbh;
+ WORD32 ai4_tu_split_flags[4];
+ WORD32 ai4_tu_early_cbf[4];
+ WORD32 num_split_flags = 1;
+ WORD32 i;
+ UWORD8 u1_tu_size;
+ UWORD8 *pu1_pred;
+ UWORD8 *pu1_ecd_data;
+ WORD16 *pi2_deq_data;
+ UWORD8 *pu1_csbf_buf;
+ UWORD8 *pu1_tu_sz_sft;
+ UWORD8 *pu1_tu_posx;
+ UWORD8 *pu1_tu_posy;
+ LWORD64 total_rdopt_cost;
+ WORD32 ctr;
+ WORD32 chrm_ctr;
+ WORD32 num_tu_in_cu = 0;
+ WORD32 pred_stride;
+ WORD32 recon_stride;
+ WORD32 trans_size = ps_cu_analyse->u1_cu_size;
+ WORD32 csbf_strd;
+ WORD32 chrm_present_flag;
+ WORD32 ecd_data_bytes_cons;
+ WORD32 num_4x4_in_cu;
+ WORD32 num_4x4_in_tu;
+ WORD32 recon_func_mode;
+ WORD32 cu_bits;
+ UWORD8 u1_compute_spatial_ssd;
+
+ /* min_trans_size is initialized to some huge number than usual TU sizes */
+ WORD32 i4_min_trans_size = 256;
+ /* Get the RDOPT cost of the best CU mode for early_exit */
+ LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
+ WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
+
+ /* model for no residue syntax qt root cbf flag */
+ UWORD8 u1_qtroot_cbf_cabac_model = ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_NORES_IDX];
+
+ /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
+ UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END];
+
+ /* for skip cases tables are not reqquired */
+ UWORD8 u1_skip_tu_sz_sft = 0;
+ UWORD8 u1_skip_tu_posx = 0;
+ UWORD8 u1_skip_tu_posy = 0;
+ UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
+
+ /* get the pointers based on curbuf idx */
+ ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
+ ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
+ pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
+ pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
+ csbf_strd = ps_ctxt->i4_cu_csbf_strd;
+ pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
+
+ pred_stride = ps_inter_cand->i4_pred_data_stride;
+ recon_stride = cu_size;
+ pu1_pred = ps_inter_cand->pu1_pred_data;
+ chrm_ctr = 0;
+ ecd_data_bytes_cons = 0;
+ total_rdopt_cost = 0;
+ num_4x4_in_cu = cu_size >> 2;
+ recon_func_mode = PRED_MODE_INTER;
+ cu_bits = 0;
+
+ /* get the 4x4 level postion of current cu */
+ cu_pos_x = cu_pos_x << 1;
+ cu_pos_y = cu_pos_y << 1;
+
+ /* default value for cu coded flag */
+ ps_final_prms->u1_is_cu_coded = 0;
+
+ /*init of ssd of CU accuumulated over all TU*/
+ ps_final_prms->u4_cu_sad = 0;
+
+ /* populate the coeffs scan idx */
+ ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
+
+#if ENABLE_INTER_ZCU_COST
+ /* reset cu not coded cost */
+ ps_ctxt->i8_cu_not_coded_cost = 0;
+
+ /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
+ memcpy(au1_rdopt_init_ctxt_models, &ps_ctxt->au1_rdopt_init_ctxt_models[0], IHEVC_CAB_CTXT_END);
+#endif
+
+ if(ps_cu_analyse->u1_cu_size == 64)
+ {
+ num_split_flags = 4;
+ u1_tu_size = 32;
+ }
+ else
+ {
+ num_split_flags = 1;
+ u1_tu_size = ps_cu_analyse->u1_cu_size;
+ }
+
+ /* ckeck for skip mode */
+ if(1 == ps_final_prms->u1_skip_flag)
+ {
+ if(64 == cu_size)
+ {
+ /* TU = CU/2 is set but no trnaform is evaluated */
+ num_tu_in_cu = 4;
+ pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
+ pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
+ pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
+ }
+ else
+ {
+ /* TU = CU is set but no trnaform is evaluated */
+ num_tu_in_cu = 1;
+ pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
+ pu1_tu_posx = &u1_skip_tu_posx;
+ pu1_tu_posy = &u1_skip_tu_posy;
+ }
+
+ recon_func_mode = PRED_MODE_SKIP;
+ }
+ /* check for PU part mode being AMP or No AMP */
+ else if(ps_final_prms->u1_part_mode < SIZE_2NxnU)
+ {
+ if((SIZE_2Nx2N == ps_final_prms->u1_part_mode) && (cu_size < 64))
+ {
+ /* TU= CU is evaluated 2Nx2N inter case */
+ num_tu_in_cu = 1;
+ pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
+ pu1_tu_posx = &u1_skip_tu_posx;
+ pu1_tu_posy = &u1_skip_tu_posy;
+ }
+ else
+ {
+ /* currently TU= CU/2 is evaluated for all inter case */
+ num_tu_in_cu = 4;
+ pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
+ pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
+ pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
+ }
+ }
+ else
+ {
+ /* for AMP cases one level of TU recurssion is done */
+ /* based on oreintation of the partitions */
+ num_tu_in_cu = 10;
+ pu1_tu_sz_sft = &gau1_inter_tu_shft_amt_amp[ps_final_prms->u1_part_mode - 4][0];
+ pu1_tu_posx = &gau1_inter_tu_posx_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
+ pu1_tu_posy = &gau1_inter_tu_posy_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
+ }
+
+ ps_tu_prms = &s_tu_prms[0];
+ num_tu_in_cu = 0;
+
+ for(i = 0; i < num_split_flags; i++)
+ {
+ WORD32 i4_x_off = 0, i4_y_off = 0;
+
+ if(i == 1 || i == 3)
+ {
+ i4_x_off = 32;
+ }
+
+ if(i == 2 || i == 3)
+ {
+ i4_y_off = 32;
+ }
+
+ if(1 == ps_final_prms->u1_skip_flag)
+ {
+ ai4_tu_split_flags[0] = 0;
+ ps_inter_cand->ai4_tu_split_flag[i] = 0;
+
+ ai4_tu_early_cbf[0] = 0;
+ }
+ else
+ {
+ ai4_tu_split_flags[0] = ps_inter_cand->ai4_tu_split_flag[i];
+ ai4_tu_early_cbf[0] = ps_inter_cand->ai4_tu_early_cbf[i];
+ }
+
+ ps_tu_prms->u1_tu_size = u1_tu_size;
+
+ ps_tu_prms = (tu_prms_t *)ihevce_tu_tree_update(
+ ps_tu_prms,
+ &num_tu_in_cu,
+ 0,
+ ai4_tu_split_flags[0],
+ ai4_tu_early_cbf[0],
+ i4_x_off,
+ i4_y_off);
+ }
+
+ /* loop for all tu blocks in current cu */
+ ps_tu_prms = &s_tu_prms[0];
+ for(ctr = 0; ctr < num_tu_in_cu; ctr++)
+ {
+ trans_size = ps_tu_prms->u1_tu_size;
+
+ if(i4_min_trans_size > trans_size)
+ {
+ i4_min_trans_size = trans_size;
+ }
+ ps_tu_prms++;
+ }
+
+ if(ps_ctxt->i1_cu_qp_delta_enable)
+ {
+ WORD32 i4_act_counter = 0, i4_act_counter_lamda = 0;
+
+ if(ps_cu_analyse->u1_cu_size == 64)
+ {
+ ASSERT(
+ (i4_min_trans_size == 32) || (i4_min_trans_size == 16) ||
+ (i4_min_trans_size == 8) || (i4_min_trans_size == 4));
+ i4_act_counter = (i4_min_trans_size == 16) +
+ 2 * ((i4_min_trans_size == 8) || (i4_min_trans_size == 4));
+ i4_act_counter_lamda = 3;
+ }
+ else if(ps_cu_analyse->u1_cu_size == 32)
+ {
+ ASSERT(
+ (i4_min_trans_size == 32) || (i4_min_trans_size == 16) ||
+ (i4_min_trans_size == 8) || (i4_min_trans_size == 4));
+ i4_act_counter = (i4_min_trans_size == 16) +
+ 2 * ((i4_min_trans_size == 8) || (i4_min_trans_size == 4));
+ i4_act_counter_lamda = 0;
+ }
+ else if(ps_cu_analyse->u1_cu_size == 16)
+ {
+ ASSERT(
+ (i4_min_trans_size == 16) || (i4_min_trans_size == 8) || (i4_min_trans_size == 4));
+ i4_act_counter = (i4_min_trans_size == 8) || (i4_min_trans_size == 4);
+ i4_act_counter_lamda = 0;
+ }
+ else if(ps_cu_analyse->u1_cu_size == 8)
+ {
+ ASSERT((i4_min_trans_size == 8) || (i4_min_trans_size == 4));
+ i4_act_counter = 1;
+ i4_act_counter_lamda = 0;
+ }
+ else
+ {
+ ASSERT(0);
+ }
+ if(ps_ctxt->i4_use_ctb_level_lamda)
+ {
+ ihevce_compute_cu_level_QP(
+ ps_ctxt, ps_cu_analyse->i4_act_factor[i4_act_counter][0], -1, 0);
+ }
+ else
+ {
+ ihevce_compute_cu_level_QP(
+ ps_ctxt,
+ ps_cu_analyse->i4_act_factor[i4_act_counter][0],
+ ps_cu_analyse->i4_act_factor[i4_act_counter_lamda][0],
+ 0);
+ }
+
+ ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_cu_qp;
+ }
+ if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
+ {
+ ps_ctxt->i8_cl_ssd_lambda_qf =
+ ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
+ 100.0f);
+ ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
+ ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
+ (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
+ }
+
+ u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
+ (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
+ CONVERT_SSDS_TO_SPATIAL_DOMAIN;
+
+ if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
+ {
+ u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
+ CONVERT_SSDS_TO_SPATIAL_DOMAIN;
+ }
+
+ if(!u1_compute_spatial_ssd)
+ {
+ ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
+ ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
+ }
+ else
+ {
+ ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 1;
+ }
+
+ ps_tu_prms = &s_tu_prms[0];
+
+ ASSERT(num_tu_in_cu <= 256);
+
+ /* RDOPT copy States : TU init (best until prev TU) to current */
+ memcpy(
+ &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0],
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0],
+ IHEVC_CAB_COEFFX_PREFIX);
+
+ for(ctr = 0; ctr < num_tu_in_cu; ctr++)
+ {
+ WORD32 curr_bytes;
+ WORD32 tx_size;
+ WORD32 cbf, zero_col, zero_row;
+ LWORD64 rdopt_cost;
+ UWORD8 u1_is_recon_available;
+
+ WORD32 curr_pos_x;
+ WORD32 curr_pos_y;
+ nbr_4x4_t *ps_cur_nbr_4x4;
+ UWORD8 *pu1_cur_pred;
+ UWORD8 *pu1_cur_src;
+ UWORD8 *pu1_cur_recon;
+ WORD16 *pi2_cur_deq_data;
+ UWORD32 u4_tu_sad;
+ WORD32 tu_bits;
+
+ WORD32 i4_recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
+
+ trans_size = ps_tu_prms->u1_tu_size;
+ /* get the current pos x and pos y in pixels */
+ curr_pos_x = ps_tu_prms->u1_x_off; //((cu_size >> 2) * pu1_tu_posx[ctr]);
+ curr_pos_y = ps_tu_prms->u1_y_off; //((cu_size >> 2) * pu1_tu_posy[ctr]);
+
+ num_4x4_in_tu = trans_size >> 2;
+
+#if FORCE_8x8_TFR
+ if(cu_size == 64)
+ {
+ curr_pos_x = ((cu_size >> 3) * pu1_tu_posx[ctr]);
+ curr_pos_y = ((cu_size >> 3) * pu1_tu_posy[ctr]);
+ }
+#endif
+
+ /* increment the pointers to start of current TU */
+ pu1_cur_src = ((UWORD8 *)pv_src + curr_pos_x);
+ pu1_cur_src += (curr_pos_y * src_strd);
+ pu1_cur_pred = (pu1_pred + curr_pos_x);
+ pu1_cur_pred += (curr_pos_y * pred_stride);
+ pi2_cur_deq_data = pi2_deq_data + curr_pos_x;
+ pi2_cur_deq_data += (curr_pos_y * cu_size);
+ pu1_cur_recon = ((UWORD8 *)ps_final_prms->s_recon_datastore.apv_luma_recon_bufs[0]) +
+ curr_pos_x + curr_pos_y * i4_recon_stride;
+
+ ps_cur_nbr_4x4 = (ps_nbr_4x4 + (curr_pos_x >> 2));
+ ps_cur_nbr_4x4 += ((curr_pos_y >> 2) * num_4x4_in_cu);
+
+ /* RDOPT copy States : TU init (best until prev TU) to current */
+ COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
+ &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0] +
+ IHEVC_CAB_COEFFX_PREFIX,
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
+ IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
+
+ i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
+ i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
+
+ /*2 Multi- dimensinal array based on trans size of rounding factor to be added here */
+ /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */
+ /* Currently the complete array will contain only single value*/
+ /*The rounding factor is calculated with the formula
+ Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2
+ rounding factor = (1 - DeadZone Val)
+
+ Assumption: Cabac states of All the sub-blocks in the TU are considered independent
+ */
+ if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) && (ctr != 0))
+ {
+ double i4_lamda_modifier;
+
+ if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id))
+ {
+ i4_lamda_modifier = ps_ctxt->i4_lamda_modifier *
+ CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
+ }
+ else
+ {
+ i4_lamda_modifier = ps_ctxt->i4_lamda_modifier;
+ }
+ if(ps_ctxt->i4_use_const_lamda_modifier)
+ {
+ if(ISLICE == ps_ctxt->i1_slice_type)
+ {
+ i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
+ }
+ else
+ {
+ i4_lamda_modifier = CONST_LAMDA_MOD_VAL;
+ }
+ }
+ ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
+ &ps_ctxt->i4_quant_round_tu[0][0];
+ ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
+ &ps_ctxt->i4_quant_round_tu[1][0];
+
+ memset(
+ ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
+ 0,
+ trans_size * trans_size * sizeof(WORD32));
+ memset(
+ ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
+ 0,
+ trans_size * trans_size * sizeof(WORD32));
+
+ ihevce_quant_rounding_factor_gen(
+ trans_size,
+ 1,
+ &ps_ctxt->s_rdopt_entropy_ctxt,
+ ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
+ ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
+ i4_lamda_modifier,
+ 1);
+ }
+ else
+ {
+ ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
+ ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3];
+ ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
+ ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3];
+ }
+
+ /* call T Q IT IQ and recon function */
+ cbf = ihevce_t_q_iq_ssd_scan_fxn(
+ ps_ctxt,
+ pu1_cur_pred,
+ pred_stride,
+ pu1_cur_src,
+ src_strd,
+ pi2_cur_deq_data,
+ cu_size,
+ pu1_cur_recon,
+ i4_recon_stride,
+ pu1_ecd_data,
+ pu1_csbf_buf,
+ csbf_strd,
+ trans_size,
+ recon_func_mode,
+ &rdopt_cost,
+ &curr_bytes,
+ &tu_bits,
+ &u4_tu_sad,
+ &zero_col,
+ &zero_row,
+ &u1_is_recon_available,
+ i4_perform_rdoq,
+ i4_perform_sbh,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ i4_alpha_stim_multiplier,
+ u1_is_cu_noisy,
+#endif
+ u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
+ ps_ctxt->u1_use_early_cbf_data ? ps_tu_prms->i4_early_cbf : 1);
+
+#if COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL && !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
+ {
+#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
+ rdopt_cost = ihevce_inject_stim_into_distortion(
+ pu1_cur_src,
+ src_strd,
+ pu1_cur_pred,
+ pred_stride,
+ rdopt_cost,
+ i4_alpha_stim_multiplier,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ NULL_PLANE);
+#else
+ if(u1_compute_spatial_ssd && u1_is_recon_available)
+ {
+ rdopt_cost = ihevce_inject_stim_into_distortion(
+ pu1_cur_src,
+ src_strd,
+ pu1_cur_recon,
+ i4_recon_stride,
+ rdopt_cost,
+ i4_alpha_stim_multiplier,
+ trans_size,
+ 0,
+ NULL_PLANE);
+ }
+ else
+ {
+ rdopt_cost = ihevce_inject_stim_into_distortion(
+ pu1_cur_src,
+ src_strd,
+ pu1_cur_pred,
+ pred_stride,
+ rdopt_cost,
+ i4_alpha_stim_multiplier,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ NULL_PLANE);
+ }
+#endif
+ }
+#endif
+
+ if(u1_compute_spatial_ssd && u1_is_recon_available)
+ {
+ ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[ctr] = 0;
+ }
+ else
+ {
+ ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
+ }
+
+ /* accumulate the TU sad into cu sad */
+ ps_final_prms->u4_cu_sad += u4_tu_sad;
+
+ /* accumulate the TU bits into cu bits */
+ cu_bits += tu_bits;
+
+ /* inter cu is coded if any of the tu is coded in it */
+ ps_final_prms->u1_is_cu_coded |= cbf;
+
+ /* call the entropy function to get the bits */
+ /* add that to rd opt cost(SSD) */
+
+ /* update the bytes */
+ ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
+ ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = curr_bytes;
+ /* update the zero_row and col info for the final mode */
+ ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_col = zero_col;
+ ps_final_prms->as_tu_enc_loop_temp_prms[ctr].u4_luma_zero_row = zero_row;
+
+ /* update the bytes */
+ ps_final_prms->as_tu_enc_loop[ctr].i4_luma_coeff_offset = ecd_data_bytes_cons;
+
+ /* update the total bytes cons */
+ ecd_data_bytes_cons += curr_bytes;
+ pu1_ecd_data += curr_bytes;
+
+ /* RDOPT copy States : New updated after curr TU to TU init */
+ if(0 != cbf)
+ {
+ /* update to new state only if CBF is non zero */
+ COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
+ &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0] +
+ IHEVC_CAB_COEFFX_PREFIX,
+ IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
+ }
+
+ /* by default chroma present is set to 1*/
+ chrm_present_flag = 1;
+ if(4 == trans_size)
+ {
+ /* if tusize is 4x4 then only first luma 4x4 will have chroma*/
+ if(0 != chrm_ctr)
+ {
+ chrm_present_flag = INTRA_PRED_CHROMA_IDX_NONE;
+ }
+
+ /* increment the chrm ctr unconditionally */
+ chrm_ctr++;
+
+ /* after ctr reached 4 reset it */
+ if(4 == chrm_ctr)
+ {
+ chrm_ctr = 0;
+ }
+ }
+
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = cbf;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_chroma_intra_mode_idx = chrm_present_flag;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b7_qp = ps_ctxt->i4_cu_qp;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_first_tu_in_cu = 0;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_transquant_bypass = 0;
+ GETRANGE(tx_size, trans_size);
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + (curr_pos_x >> 2);
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + (curr_pos_y >> 2);
+
+ /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
+ ps_cur_nbr_4x4->b1_y_cbf = cbf;
+ /*copy the cu qp. This will be overwritten by qp calculated based on skip flag at final stage of cu mode decide*/
+ ps_cur_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
+
+ /* Qp and cbf are stored for the all 4x4 in TU */
+ {
+ WORD32 i, j;
+ nbr_4x4_t *ps_tmp_4x4;
+ ps_tmp_4x4 = ps_cur_nbr_4x4;
+
+ for(i = 0; i < num_4x4_in_tu; i++)
+ {
+ for(j = 0; j < num_4x4_in_tu; j++)
+ {
+ ps_tmp_4x4[j].b8_qp = ps_ctxt->i4_cu_qp;
+ ps_tmp_4x4[j].b1_y_cbf = cbf;
+ }
+ /* row level update*/
+ ps_tmp_4x4 += num_4x4_in_cu;
+ }
+ }
+
+#if RDOPT_ENABLE
+ /* compute the rdopt cost */
+ rdopt_cost +=
+ COMPUTE_RATE_COST_CLIP30(tu_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
+#endif
+ /* accumulate the costs */
+ total_rdopt_cost += rdopt_cost;
+
+ ps_tu_prms++;
+
+ if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
+ {
+ /* Early exit : If the current running cost exceeds
+ the prev. best mode cost, break */
+ if(total_rdopt_cost > prev_best_rdopt_cost)
+ {
+ return (total_rdopt_cost);
+ }
+ }
+ }
+
+ /* Modify the cost function for this CU. */
+ /* loop in for 8x8 blocks */
+ if(ps_ctxt->u1_enable_psyRDOPT)
+ {
+ UWORD8 *pu1_recon_cu;
+ WORD32 recon_stride;
+ WORD32 curr_pos_x;
+ WORD32 curr_pos_y;
+ WORD32 start_index;
+ WORD32 num_horz_cu_in_ctb;
+ WORD32 had_block_size;
+
+ /* tODO: sreenivasa ctb size has to be used appropriately */
+ had_block_size = 8;
+ num_horz_cu_in_ctb = 64 / had_block_size;
+
+ curr_pos_x = cu_pos_x << 2; /* pel units */
+ curr_pos_y = cu_pos_y << 2; /* pel units */
+ recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
+ pu1_recon_cu = ((UWORD8 *)ps_final_prms->s_recon_datastore
+ .apv_luma_recon_bufs[0]); // already pointing to the current CU recon
+ //+ \curr_pos_x + curr_pos_y * recon_stride;
+
+ /* start index to index the source satd of curr cu int he current ctb*/
+ start_index =
+ (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
+
+ {
+ total_rdopt_cost += ihevce_psy_rd_cost(
+ ps_ctxt->ai4_source_satd_8x8,
+ pu1_recon_cu,
+ recon_stride,
+ 1, //howz stride
+ cu_size,
+ 0, // pic type
+ 0, //layer id
+ ps_ctxt->i4_satd_lamda, // lambda
+ start_index,
+ ps_ctxt->u1_is_input_data_hbd,
+ ps_ctxt->u4_psy_strength,
+ &ps_ctxt->s_cmn_opt_func); // 8 bit
+ }
+ }
+
+ /* store the num TUs*/
+ ps_final_prms->u2_num_tus_in_cu = num_tu_in_cu;
+
+ /* update the bytes consumed */
+ ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
+
+ /* store the current cu size to final prms */
+ ps_final_prms->u1_cu_size = cu_size;
+
+ /* cu bits will be having luma residual bits till this point */
+ /* if zero_cbf eval is disabled then cu bits will be zero */
+ ps_final_prms->u4_cu_luma_res_bits = cu_bits;
+
+ /* ------------- Chroma processing -------------- */
+ /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
+ if(1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
+ {
+ LWORD64 chrm_rdopt_cost;
+ WORD32 chrm_rdopt_tu_bits;
+
+ /* Store the current RDOPT cost to enable early exit in chrom_prcs */
+ ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
+
+ chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
+ ps_ctxt,
+ curr_buf_idx,
+ 0, /* TU mode : Don't care in Inter patrh */
+ ps_chrm_cu_buf_prms->pu1_curr_src,
+ ps_chrm_cu_buf_prms->i4_chrm_src_stride,
+ ps_chrm_cu_buf_prms->pu1_cu_left,
+ ps_chrm_cu_buf_prms->pu1_cu_top,
+ ps_chrm_cu_buf_prms->pu1_cu_top_left,
+ ps_chrm_cu_buf_prms->i4_cu_left_stride,
+ (cu_pos_x >> 1),
+ (cu_pos_y >> 1),
+ &chrm_rdopt_tu_bits,
+ i4_alpha_stim_multiplier,
+ u1_is_cu_noisy);
+
+#if WEIGH_CHROMA_COST
+ chrm_rdopt_cost = (LWORD64)(
+ (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
+ (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
+ CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
+#endif
+
+#if CHROMA_RDOPT_ENABLE
+ total_rdopt_cost += chrm_rdopt_cost;
+#endif
+ cu_bits += chrm_rdopt_tu_bits;
+
+ /* during chroma evaluation if skip decision was over written */
+ /* then the current skip candidate is set to a non skip candidate */
+ ps_inter_cand->b1_skip_flag = ps_final_prms->u1_skip_flag;
+
+ /* cu bits for chroma residual if chroma rdopt is on */
+ /* if zero_cbf eval is disabled then cu bits will be zero */
+ ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
+
+ if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
+ {
+ /* Early exit : If the current running cost exceeds
+ the prev. best mode cost, break */
+ if(total_rdopt_cost > prev_best_rdopt_cost)
+ {
+ return (total_rdopt_cost);
+ }
+ }
+ }
+ else
+ {}
+
+#if SHRINK_INTER_TUTREE
+ /* ------------- Quadtree TU split optimization ------------ */
+ if(ps_final_prms->u1_is_cu_coded)
+ {
+ ps_final_prms->u2_num_tus_in_cu = ihevce_shrink_inter_tu_tree(
+ &ps_final_prms->as_tu_enc_loop[0],
+ &ps_final_prms->as_tu_enc_loop_temp_prms[0],
+ &ps_final_prms->s_recon_datastore,
+ num_tu_in_cu,
+ (ps_ctxt->u1_chroma_array_type == 2));
+ }
+#endif
+
+ /* RDOPT copy States : Best after all luma TUs (and chroma,if enabled)to current */
+ COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
+ &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0] +
+ IHEVC_CAB_COEFFX_PREFIX,
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
+ IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
+
+ /* -------- Bit estimate for RD opt -------------- */
+ {
+ nbr_avail_flags_t s_nbr;
+ /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
+ WORD32 cbf_bits, header_bits;
+
+ /* get the neighbour availability flags for current cu */
+ ihevce_get_only_nbr_flag(
+ &s_nbr,
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ cu_pos_x,
+ cu_pos_y,
+ (cu_size >> 2),
+ (cu_size >> 2));
+
+ /* call the entropy rdo encode to get the bit estimate for current cu */
+ header_bits = ihevce_entropy_rdo_encode_cu(
+ &ps_ctxt->s_rdopt_entropy_ctxt,
+ ps_final_prms,
+ (cu_pos_x >> 1), /* back to 8x8 pel units */
+ (cu_pos_y >> 1), /* back to 8x8 pel units */
+ cu_size,
+ ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
+ : s_nbr.u1_top_avail,
+ s_nbr.u1_left_avail,
+ &ps_final_prms->pu1_cu_coeffs[0],
+ &cbf_bits);
+
+ cu_bits += header_bits;
+
+ /* cbf bits are excluded from header bits, instead considered as texture bits */
+ /* incase if zero cbf eval is disabled then texture bits gets added here */
+ ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
+ ps_final_prms->u4_cu_cbf_bits = cbf_bits;
+
+#if RDOPT_ENABLE
+ /* add the cost of coding the header bits */
+ total_rdopt_cost +=
+ COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
+
+#if ENABLE_INTER_ZCU_COST
+ /* If cu is coded, Evaluate not coded cost and check if it improves over coded cost */
+ if(ps_final_prms->u1_is_cu_coded && (ZCBF_ENABLE == ps_ctxt->i4_zcbf_rdo_level))
+ {
+ LWORD64 i8_cu_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost;
+
+ WORD32 is_2nx2n_mergecu = (SIZE_2Nx2N == ps_final_prms->u1_part_mode) &&
+ (1 == ps_final_prms->as_pu_enc_loop[0].b1_merge_flag);
+
+ cab_ctxt_t *ps_cab_ctxt =
+ &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx].s_cabac_ctxt;
+
+ /* Read header bits generatated after ihevce_entropy_rdo_encode_cu() call */
+ UWORD32 u4_cu_hdr_bits_q12 = ps_cab_ctxt->u4_header_bits_estimated_q12;
+
+ /* account for coding qt_root_cbf = 0 */
+ /* First subtract cost for coding as 1 (part of header bits) and then add cost for coding as 0 */
+ u4_cu_hdr_bits_q12 += gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 0];
+ if(u4_cu_hdr_bits_q12 < gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1])
+ u4_cu_hdr_bits_q12 = 0;
+ else
+ u4_cu_hdr_bits_q12 -= gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1];
+
+ /* add the cost of coding the header bits */
+ i8_cu_not_coded_cost += COMPUTE_RATE_COST_CLIP30(
+ u4_cu_hdr_bits_q12 /* ps_final_prms->u4_cu_hdr_bits */,
+ ps_ctxt->i8_cl_ssd_lambda_qf,
+ (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
+
+ if(ps_ctxt->u1_enable_psyRDOPT)
+ {
+ i8_cu_not_coded_cost = total_rdopt_cost + 1;
+ }
+
+ /* Evaluate qtroot cbf rdo; exclude 2Nx2N Merge as skip cu is explicitly evaluated */
+ if((i8_cu_not_coded_cost <= total_rdopt_cost) && (!is_2nx2n_mergecu))
+ {
+ WORD32 tx_size;
+
+ /* force cu as not coded and update the cost */
+ ps_final_prms->u1_is_cu_coded = 0;
+ ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
+ ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
+
+ total_rdopt_cost = i8_cu_not_coded_cost;
+
+ /* reset num TUs to 1 unless cu size id 64 */
+ ps_final_prms->u2_num_tus_in_cu = (64 == cu_size) ? 4 : 1;
+ trans_size = (64 == cu_size) ? 32 : cu_size;
+ GETRANGE(tx_size, trans_size);
+
+ /* reset the bytes consumed */
+ ps_final_prms->i4_num_bytes_ecd_data = 0;
+
+ /* reset texture related bits and roll back header bits*/
+ ps_final_prms->u4_cu_cbf_bits = 0;
+ ps_final_prms->u4_cu_luma_res_bits = 0;
+ ps_final_prms->u4_cu_chroma_res_bits = 0;
+ ps_final_prms->u4_cu_hdr_bits =
+ (u4_cu_hdr_bits_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q;
+
+ /* update cabac model with qtroot cbf = 0 decision */
+ ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_NORES_IDX] =
+ gau1_ihevc_next_state[u1_qtroot_cbf_cabac_model << 1];
+
+ /* restore untouched cabac models for, tusplit, cbfs, texture etc */
+ memcpy(
+ &ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_SPLIT_TFM],
+ &au1_rdopt_init_ctxt_models[IHEVC_CAB_SPLIT_TFM],
+ (IHEVC_CAB_CTXT_END - IHEVC_CAB_SPLIT_TFM));
+
+ /* mark all tus as not coded for final eval */
+ for(ctr = 0; ctr < ps_final_prms->u2_num_tus_in_cu; ctr++)
+ {
+ WORD32 curr_pos_x = (ctr & 0x1) ? (trans_size >> 2) : 0;
+ WORD32 curr_pos_y = (ctr & 0x2) ? (trans_size >> 2) : 0;
+
+ nbr_4x4_t *ps_cur_nbr_4x4 =
+ ps_nbr_4x4 + curr_pos_x + (curr_pos_y * num_4x4_in_cu);
+
+ num_4x4_in_tu = trans_size >> 2;
+
+ ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = 0;
+ ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cb_bytes_consumed[0] = 0;
+ ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cr_bytes_consumed[0] = 0;
+
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = 0;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
+
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
+
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + curr_pos_x;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + curr_pos_y;
+
+ /* reset cbf for the all 4x4 in TU */
+ {
+ WORD32 i, j;
+ nbr_4x4_t *ps_tmp_4x4;
+ ps_tmp_4x4 = ps_cur_nbr_4x4;
+
+ for(i = 0; i < num_4x4_in_tu; i++)
+ {
+ for(j = 0; j < num_4x4_in_tu; j++)
+ {
+ ps_tmp_4x4[j].b1_y_cbf = 0;
+ }
+ /* row level update*/
+ ps_tmp_4x4 += num_4x4_in_cu;
+ }
+ }
+ }
+ }
+ }
+#endif /* ENABLE_INTER_ZCU_COST */
+
+#endif /* RDOPT_ENABLE */
+ }
+
+ return (total_rdopt_cost);
+}
+
+#if ENABLE_RDO_BASED_TU_RECURSION
+LWORD64 ihevce_inter_tu_tree_selector_and_rdopt_cost_computer(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ enc_loop_cu_prms_t *ps_cu_prms,
+ void *pv_src,
+ WORD32 cu_size,
+ WORD32 cu_pos_x,
+ WORD32 cu_pos_y,
+ WORD32 curr_buf_idx,
+ enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
+ cu_inter_cand_t *ps_inter_cand,
+ cu_analyse_t *ps_cu_analyse,
+ WORD32 i4_alpha_stim_multiplier)
+{
+ tu_tree_node_t as_tu_nodes[256 + 64 + 16 + 4 + 1];
+ buffer_data_for_tu_t s_buffer_data_for_tu;
+ enc_loop_cu_final_prms_t *ps_final_prms;
+ nbr_4x4_t *ps_nbr_4x4;
+
+ WORD32 num_split_flags = 1;
+ UWORD8 u1_tu_size;
+ UWORD8 *pu1_pred;
+ UWORD8 *pu1_ecd_data;
+ WORD16 *pi2_deq_data;
+ UWORD8 *pu1_csbf_buf;
+ UWORD8 *pu1_tu_sz_sft;
+ UWORD8 *pu1_tu_posx;
+ UWORD8 *pu1_tu_posy;
+ LWORD64 total_rdopt_cost;
+ WORD32 ctr;
+ WORD32 chrm_ctr;
+ WORD32 pred_stride;
+ WORD32 recon_stride;
+ WORD32 trans_size = ps_cu_analyse->u1_cu_size;
+ WORD32 csbf_strd;
+ WORD32 ecd_data_bytes_cons;
+ WORD32 num_4x4_in_cu;
+ WORD32 num_4x4_in_tu;
+ WORD32 recon_func_mode;
+ WORD32 cu_bits;
+ UWORD8 u1_compute_spatial_ssd;
+ /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
+ UWORD8 au1_rdopt_init_ctxt_models[IHEVC_CAB_CTXT_END];
+
+ WORD32 i4_min_trans_size = 256;
+ LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!curr_buf_idx].i8_best_rdopt_cost;
+ WORD32 src_strd = ps_cu_prms->i4_luma_src_stride;
+ /* model for no residue syntax qt root cbf flag */
+ UWORD8 u1_qtroot_cbf_cabac_model = ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_NORES_IDX];
+ UWORD8 u1_skip_tu_sz_sft = 0;
+ UWORD8 u1_skip_tu_posx = 0;
+ UWORD8 u1_skip_tu_posy = 0;
+ UWORD8 u1_is_cu_noisy = ps_cu_prms->u1_is_cu_noisy;
+
+ ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
+ ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
+ pu1_ecd_data = &ps_final_prms->pu1_cu_coeffs[0];
+ pi2_deq_data = &ps_final_prms->pi2_cu_deq_coeffs[0];
+ csbf_strd = ps_ctxt->i4_cu_csbf_strd;
+ pu1_csbf_buf = &ps_ctxt->au1_cu_csbf[0];
+ pred_stride = ps_inter_cand->i4_pred_data_stride;
+ recon_stride = cu_size;
+ pu1_pred = ps_inter_cand->pu1_pred_data;
+ chrm_ctr = 0;
+ ecd_data_bytes_cons = 0;
+ total_rdopt_cost = 0;
+ num_4x4_in_cu = cu_size >> 2;
+ recon_func_mode = PRED_MODE_INTER;
+ cu_bits = 0;
+
+ /* get the 4x4 level postion of current cu */
+ cu_pos_x = cu_pos_x << 1;
+ cu_pos_y = cu_pos_y << 1;
+
+ ps_final_prms->u1_is_cu_coded = 0;
+ ps_final_prms->u4_cu_sad = 0;
+
+ /* populate the coeffs scan idx */
+ ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
+
+#if ENABLE_INTER_ZCU_COST
+ /* reset cu not coded cost */
+ ps_ctxt->i8_cu_not_coded_cost = 0;
+
+ /* backup copy of cabac states for restoration if zero cu reside rdo wins later */
+ memcpy(au1_rdopt_init_ctxt_models, &ps_ctxt->au1_rdopt_init_ctxt_models[0], IHEVC_CAB_CTXT_END);
+#endif
+
+ if(ps_cu_analyse->u1_cu_size == 64)
+ {
+ num_split_flags = 4;
+ u1_tu_size = 32;
+ }
+ else
+ {
+ num_split_flags = 1;
+ u1_tu_size = ps_cu_analyse->u1_cu_size;
+ }
+
+ if(1 == ps_final_prms->u1_skip_flag)
+ {
+ if(64 == cu_size)
+ {
+ /* TU = CU/2 is set but no trnaform is evaluated */
+ pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
+ pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
+ pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
+ }
+ else
+ {
+ /* TU = CU is set but no trnaform is evaluated */
+ pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
+ pu1_tu_posx = &u1_skip_tu_posx;
+ pu1_tu_posy = &u1_skip_tu_posy;
+ }
+
+ recon_func_mode = PRED_MODE_SKIP;
+ }
+ /* check for PU part mode being AMP or No AMP */
+ else if(ps_final_prms->u1_part_mode < SIZE_2NxnU)
+ {
+ if((SIZE_2Nx2N == ps_final_prms->u1_part_mode) && (cu_size < 64))
+ {
+ /* TU= CU is evaluated 2Nx2N inter case */
+ pu1_tu_sz_sft = &u1_skip_tu_sz_sft;
+ pu1_tu_posx = &u1_skip_tu_posx;
+ pu1_tu_posy = &u1_skip_tu_posy;
+ }
+ else
+ {
+ /* currently TU= CU/2 is evaluated for all inter case */
+ pu1_tu_sz_sft = &gau1_inter_tu_shft_amt[0];
+ pu1_tu_posx = &gau1_inter_tu_posx_scl_amt[0];
+ pu1_tu_posy = &gau1_inter_tu_posy_scl_amt[0];
+ }
+ }
+ else
+ {
+ /* for AMP cases one level of TU recurssion is done */
+ /* based on oreintation of the partitions */
+ pu1_tu_sz_sft = &gau1_inter_tu_shft_amt_amp[ps_final_prms->u1_part_mode - 4][0];
+ pu1_tu_posx = &gau1_inter_tu_posx_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
+ pu1_tu_posy = &gau1_inter_tu_posy_scl_amt_amp[ps_final_prms->u1_part_mode - 4][0];
+ }
+
+ i4_min_trans_size = 4;
+
+ if(ps_ctxt->i1_cu_qp_delta_enable)
+ {
+ WORD32 i4_act_counter = 0, i4_act_counter_lamda = 0;
+ if(ps_cu_analyse->u1_cu_size == 64)
+ {
+ ASSERT(
+ (i4_min_trans_size == 32) || (i4_min_trans_size == 16) ||
+ (i4_min_trans_size == 8) || (i4_min_trans_size == 4));
+ i4_act_counter = (i4_min_trans_size == 16) +
+ 2 * ((i4_min_trans_size == 8) || (i4_min_trans_size == 4));
+ i4_act_counter_lamda = 3;
+ }
+ else if(ps_cu_analyse->u1_cu_size == 32)
+ {
+ ASSERT(
+ (i4_min_trans_size == 32) || (i4_min_trans_size == 16) ||
+ (i4_min_trans_size == 8) || (i4_min_trans_size == 4));
+ i4_act_counter = (i4_min_trans_size == 16) +
+ 2 * ((i4_min_trans_size == 8) || (i4_min_trans_size == 4));
+ i4_act_counter_lamda = 0;
+ }
+ else if(ps_cu_analyse->u1_cu_size == 16)
+ {
+ ASSERT(
+ (i4_min_trans_size == 16) || (i4_min_trans_size == 8) || (i4_min_trans_size == 4));
+ i4_act_counter = (i4_min_trans_size == 8) || (i4_min_trans_size == 4);
+ i4_act_counter_lamda = 0;
+ }
+ else if(ps_cu_analyse->u1_cu_size == 8)
+ {
+ ASSERT((i4_min_trans_size == 8) || (i4_min_trans_size == 4));
+ i4_act_counter = 1;
+ i4_act_counter_lamda = 0;
+ }
+ else
+ {
+ ASSERT(0);
+ }
+ if(ps_ctxt->i4_use_ctb_level_lamda)
+ {
+ ihevce_compute_cu_level_QP(
+ ps_ctxt, ps_cu_analyse->i4_act_factor[i4_act_counter][0], -1, 0);
+ }
+ else
+ {
+ ihevce_compute_cu_level_QP(
+ ps_ctxt,
+ ps_cu_analyse->i4_act_factor[i4_act_counter][0],
+ ps_cu_analyse->i4_act_factor[i4_act_counter_lamda][0],
+ 0);
+ }
+
+ ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_cu_qp;
+ }
+
+ if(u1_is_cu_noisy && !ps_ctxt->u1_enable_psyRDOPT)
+ {
+ ps_ctxt->i8_cl_ssd_lambda_qf =
+ ((float)ps_ctxt->i8_cl_ssd_lambda_qf * (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) /
+ 100.0f);
+ ps_ctxt->i8_cl_ssd_lambda_chroma_qf =
+ ((float)ps_ctxt->i8_cl_ssd_lambda_chroma_qf *
+ (100.0f - RDOPT_LAMBDA_DISCOUNT_WHEN_NOISY) / 100.0f);
+ }
+
+ u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
+ (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
+ CONVERT_SSDS_TO_SPATIAL_DOMAIN;
+
+ if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
+ {
+ u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
+ CONVERT_SSDS_TO_SPATIAL_DOMAIN;
+ }
+
+ if(!u1_compute_spatial_ssd)
+ {
+ ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
+ ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
+ }
+ else
+ {
+ ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 1;
+
+ if(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0))
+ {
+ ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 1;
+ }
+ }
+
+ /* RDOPT copy States : TU init (best until prev TU) to current */
+ memcpy(
+ &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0],
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0],
+ IHEVC_CAB_COEFFX_PREFIX);
+
+ ihevce_tu_tree_init(
+ as_tu_nodes,
+ cu_size,
+ (cu_size == 64) ? !ps_inter_cand->b1_skip_flag : 0,
+ ps_inter_cand->b1_skip_flag ? 0 : ps_ctxt->u1_max_inter_tr_depth,
+ INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
+ ps_ctxt->u1_chroma_array_type == 2);
+
+ if(!ps_inter_cand->b1_skip_flag && (ps_ctxt->i4_quality_preset >= IHEVCE_QUALITY_P3))
+ {
+ ihevce_tuSplitArray_to_tuTree_mapper(
+ as_tu_nodes,
+ ps_inter_cand->ai4_tu_split_flag,
+ cu_size,
+ cu_size,
+ MAX(MIN_TU_SIZE, (cu_size >> ps_ctxt->u1_max_inter_tr_depth)),
+ MIN(MAX_TU_SIZE, cu_size),
+ ps_inter_cand->b1_skip_flag);
+ }
+
+ ASSERT(ihevce_tu_tree_coverage_in_cu(as_tu_nodes) == cu_size * cu_size);
+
+#if ENABLE_INTER_ZCU_COST
+ ps_ctxt->i8_cu_not_coded_cost = 0;
+#endif
+
+ s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_src = pv_src;
+ s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_pred = pu1_pred;
+ s_buffer_data_for_tu.s_src_pred_rec_buf_luma.pv_recon =
+ ps_final_prms->s_recon_datastore.apv_luma_recon_bufs[0];
+ s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_src_stride = src_strd;
+ s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_pred_stride = pred_stride;
+ s_buffer_data_for_tu.s_src_pred_rec_buf_luma.i4_recon_stride =
+ ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
+ s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_src = ps_chrm_cu_buf_prms->pu1_curr_src;
+ s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_pred =
+ ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
+ curr_buf_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) + ((ps_ctxt->u1_chroma_array_type == 2) *
+ (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
+ s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_recon =
+ ps_final_prms->s_recon_datastore.apv_chroma_recon_bufs[0];
+ s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_src_stride =
+ ps_chrm_cu_buf_prms->i4_chrm_src_stride;
+ s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride =
+ ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
+ s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_recon_stride =
+ ps_final_prms->s_recon_datastore.i4_chromaRecon_stride;
+ s_buffer_data_for_tu.ps_nbr_data_buf = ps_nbr_4x4;
+ s_buffer_data_for_tu.pi2_deq_data = pi2_deq_data;
+ s_buffer_data_for_tu.pi2_deq_data_chroma =
+ pi2_deq_data + ps_final_prms->i4_chrm_deq_coeff_strt_idx;
+ s_buffer_data_for_tu.i4_nbr_data_buf_stride = num_4x4_in_cu;
+ s_buffer_data_for_tu.i4_deq_data_stride = cu_size;
+ s_buffer_data_for_tu.i4_deq_data_stride_chroma = cu_size;
+ s_buffer_data_for_tu.ppu1_ecd = &pu1_ecd_data;
+
+ if(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0))
+ {
+ UWORD8 i;
+
+ UWORD8 *pu1_pred = (UWORD8 *)s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.pv_pred;
+
+ for(i = 0; i < (!!ps_inter_cand->b3_part_size) + 1; i++)
+ {
+ pu_t *ps_pu;
+
+ WORD32 inter_pu_wd;
+ WORD32 inter_pu_ht;
+
+ ps_pu = ps_inter_cand->as_inter_pu + i;
+
+ inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
+ inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
+ inter_pu_ht <<= (ps_ctxt->u1_chroma_array_type == 2);
+ ihevce_chroma_inter_pred_pu(
+ &ps_ctxt->s_mc_ctxt,
+ ps_pu,
+ pu1_pred,
+ s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride);
+ if(!!ps_inter_cand->b3_part_size)
+ {
+ /* 2Nx__ partion case */
+ if(inter_pu_wd == cu_size)
+ {
+ pu1_pred +=
+ (inter_pu_ht *
+ s_buffer_data_for_tu.s_src_pred_rec_buf_chroma.i4_pred_stride);
+ }
+
+ /* __x2N partion case */
+ if(inter_pu_ht == (cu_size >> !(ps_ctxt->u1_chroma_array_type == 2)))
+ {
+ pu1_pred += inter_pu_wd;
+ }
+ }
+ }
+ }
+
+#if !ENABLE_TOP_DOWN_TU_RECURSION
+ total_rdopt_cost = ihevce_tu_tree_selector(
+ ps_ctxt,
+ as_tu_nodes,
+ &s_buffer_data_for_tu,
+ &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0],
+ recon_func_mode,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ i4_alpha_stim_multiplier,
+ u1_is_cu_noisy,
+#endif
+ 0,
+ ps_ctxt->u1_max_inter_tr_depth,
+ ps_inter_cand->b3_part_size,
+ u1_compute_spatial_ssd);
+#else
+ total_rdopt_cost = ihevce_topDown_tu_tree_selector(
+ ps_ctxt,
+ as_tu_nodes,
+ &s_buffer_data_for_tu,
+ &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0],
+ recon_func_mode,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ i4_alpha_stim_multiplier,
+ u1_is_cu_noisy,
+#endif
+ 0,
+ ps_ctxt->u1_max_inter_tr_depth,
+ ps_inter_cand->b3_part_size,
+ INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
+ u1_compute_spatial_ssd);
+#endif
+
+ ps_final_prms->u2_num_tus_in_cu = 0;
+ ps_final_prms->u4_cu_luma_res_bits = 0;
+ ps_final_prms->u4_cu_sad = 0;
+ total_rdopt_cost = 0;
+ ecd_data_bytes_cons = 0;
+ cu_bits = 0;
+#if ENABLE_INTER_ZCU_COST
+ ps_ctxt->i8_cu_not_coded_cost = 0;
+#endif
+ ps_final_prms->u1_is_cu_coded = 0;
+ ps_final_prms->u1_cu_size = cu_size;
+
+ ihevce_tu_selector_debriefer(
+ as_tu_nodes,
+ ps_final_prms,
+ &total_rdopt_cost,
+#if ENABLE_INTER_ZCU_COST
+ &ps_ctxt->i8_cu_not_coded_cost,
+#endif
+ &ecd_data_bytes_cons,
+ &cu_bits,
+ &ps_final_prms->u2_num_tus_in_cu,
+ ps_ctxt->i4_cu_qp,
+ cu_pos_x * 4,
+ cu_pos_y * 4,
+ INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0),
+ (ps_ctxt->u1_chroma_array_type == 2),
+ POS_TL);
+
+ if(!(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0)))
+ {
+ ps_final_prms->i4_chrm_cu_coeff_strt_idx = ecd_data_bytes_cons;
+ }
+
+ /* Modify the cost function for this CU. */
+ /* loop in for 8x8 blocks */
+ if(ps_ctxt->u1_enable_psyRDOPT)
+ {
+ UWORD8 *pu1_recon_cu;
+ WORD32 recon_stride;
+ WORD32 curr_pos_x;
+ WORD32 curr_pos_y;
+ WORD32 start_index;
+ WORD32 num_horz_cu_in_ctb;
+ WORD32 had_block_size;
+
+ /* tODO: sreenivasa ctb size has to be used appropriately */
+ had_block_size = 8;
+ num_horz_cu_in_ctb = 64 / had_block_size;
+
+ curr_pos_x = cu_pos_x << 2; /* pel units */
+ curr_pos_y = cu_pos_y << 2; /* pel units */
+ recon_stride = ps_final_prms->s_recon_datastore.i4_lumaRecon_stride;
+ pu1_recon_cu = ((UWORD8 *)ps_final_prms->s_recon_datastore
+ .apv_luma_recon_bufs[0]); // already pointing to the current CU recon
+ //+ \curr_pos_x + curr_pos_y * recon_stride;
+
+ /* start index to index the source satd of curr cu int he current ctb*/
+ start_index =
+ (curr_pos_x / had_block_size) + (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
+
+ {
+ total_rdopt_cost += ihevce_psy_rd_cost(
+ ps_ctxt->ai4_source_satd_8x8,
+ pu1_recon_cu,
+ recon_stride,
+ 1, //howz stride
+ cu_size,
+ 0, // pic type
+ 0, //layer id
+ ps_ctxt->i4_satd_lamda, // lambda
+ start_index,
+ ps_ctxt->u1_is_input_data_hbd,
+ ps_ctxt->u4_psy_strength,
+ &ps_ctxt->s_cmn_opt_func); // 8 bit
+ }
+ }
+
+ ps_final_prms->u1_chroma_intra_pred_mode = 4;
+
+ /* update the bytes consumed */
+ ps_final_prms->i4_num_bytes_ecd_data = ecd_data_bytes_cons;
+
+ /* store the current cu size to final prms */
+ ps_final_prms->u1_cu_size = cu_size;
+ /* ------------- Chroma processing -------------- */
+ /* Chroma rdopt eval for each luma candidate only for HIGH QUALITY/MEDIUM SPEDD preset*/
+ if(ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt &&
+ !(INCLUDE_CHROMA_DURING_TU_RECURSION && (ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P0)))
+ {
+ LWORD64 chrm_rdopt_cost;
+ WORD32 chrm_rdopt_tu_bits;
+
+ /* Store the current RDOPT cost to enable early exit in chrom_prcs */
+ ps_ctxt->as_cu_prms[curr_buf_idx].i8_curr_rdopt_cost = total_rdopt_cost;
+
+ chrm_rdopt_cost = ihevce_chroma_cu_prcs_rdopt(
+ ps_ctxt,
+ curr_buf_idx,
+ 0, /* TU mode : Don't care in Inter patrh */
+ ps_chrm_cu_buf_prms->pu1_curr_src,
+ ps_chrm_cu_buf_prms->i4_chrm_src_stride,
+ ps_chrm_cu_buf_prms->pu1_cu_left,
+ ps_chrm_cu_buf_prms->pu1_cu_top,
+ ps_chrm_cu_buf_prms->pu1_cu_top_left,
+ ps_chrm_cu_buf_prms->i4_cu_left_stride,
+ (cu_pos_x >> 1),
+ (cu_pos_y >> 1),
+ &chrm_rdopt_tu_bits,
+ i4_alpha_stim_multiplier,
+ u1_is_cu_noisy);
+
+#if WEIGH_CHROMA_COST
+ chrm_rdopt_cost = (LWORD64)(
+ (chrm_rdopt_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
+ (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
+ CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
+#endif
+
+#if CHROMA_RDOPT_ENABLE
+ total_rdopt_cost += chrm_rdopt_cost;
+#endif
+ cu_bits += chrm_rdopt_tu_bits;
+
+ /* during chroma evaluation if skip decision was over written */
+ /* then the current skip candidate is set to a non skip candidate */
+ ps_inter_cand->b1_skip_flag = ps_final_prms->u1_skip_flag;
+
+ /* cu bits for chroma residual if chroma rdopt is on */
+ /* if zero_cbf eval is disabled then cu bits will be zero */
+ ps_final_prms->u4_cu_chroma_res_bits = chrm_rdopt_tu_bits;
+
+ if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
+ {
+ /* Early exit : If the current running cost exceeds
+ the prev. best mode cost, break */
+ if(total_rdopt_cost > prev_best_rdopt_cost)
+ {
+ return (total_rdopt_cost);
+ }
+ }
+ }
+ else
+ {}
+
+#if SHRINK_INTER_TUTREE
+ /* ------------- Quadtree TU split optimization ------------ */
+ if(ps_final_prms->u1_is_cu_coded)
+ {
+ ps_final_prms->u2_num_tus_in_cu = ihevce_shrink_inter_tu_tree(
+ &ps_final_prms->as_tu_enc_loop[0],
+ &ps_final_prms->as_tu_enc_loop_temp_prms[0],
+ &ps_final_prms->s_recon_datastore,
+ ps_final_prms->u2_num_tus_in_cu,
+ (ps_ctxt->u1_chroma_array_type == 2));
+ }
+#endif
+
+ /* RDOPT copy States : Best after all luma TUs (and chroma,if enabled)to current */
+ COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
+ &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0] +
+ IHEVC_CAB_COEFFX_PREFIX,
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
+ IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
+
+ /* -------- Bit estimate for RD opt -------------- */
+ {
+ nbr_avail_flags_t s_nbr;
+ /*cbf_bits will account for both texture and cbf bits when zero cbf eval flag is 0*/
+ WORD32 cbf_bits, header_bits;
+
+ /* get the neighbour availability flags for current cu */
+ ihevce_get_only_nbr_flag(
+ &s_nbr,
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ cu_pos_x,
+ cu_pos_y,
+ (cu_size >> 2),
+ (cu_size >> 2));
+
+ /* call the entropy rdo encode to get the bit estimate for current cu */
+ header_bits = ihevce_entropy_rdo_encode_cu(
+ &ps_ctxt->s_rdopt_entropy_ctxt,
+ ps_final_prms,
+ (cu_pos_x >> 1), /* back to 8x8 pel units */
+ (cu_pos_y >> 1), /* back to 8x8 pel units */
+ cu_size,
+ ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
+ : s_nbr.u1_top_avail,
+ s_nbr.u1_left_avail,
+ &ps_final_prms->pu1_cu_coeffs[0],
+ &cbf_bits);
+
+ cu_bits += header_bits;
+
+ /* cbf bits are excluded from header bits, instead considered as texture bits */
+ /* incase if zero cbf eval is disabled then texture bits gets added here */
+ ps_final_prms->u4_cu_hdr_bits = (header_bits - cbf_bits);
+ ps_final_prms->u4_cu_cbf_bits = cbf_bits;
+
+#if RDOPT_ENABLE
+ /* add the cost of coding the header bits */
+ total_rdopt_cost +=
+ COMPUTE_RATE_COST_CLIP30(header_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
+
+#if ENABLE_INTER_ZCU_COST
+ /* If cu is coded, Evaluate not coded cost and check if it improves over coded cost */
+ if(ps_final_prms->u1_is_cu_coded && (ZCBF_ENABLE == ps_ctxt->i4_zcbf_rdo_level))
+ {
+ LWORD64 i8_cu_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost;
+
+ WORD32 is_2nx2n_mergecu = (SIZE_2Nx2N == ps_final_prms->u1_part_mode) &&
+ (1 == ps_final_prms->as_pu_enc_loop[0].b1_merge_flag);
+
+ cab_ctxt_t *ps_cab_ctxt =
+ &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[curr_buf_idx].s_cabac_ctxt;
+
+ /* Read header bits generatated after ihevce_entropy_rdo_encode_cu() call */
+ UWORD32 u4_cu_hdr_bits_q12 = ps_cab_ctxt->u4_header_bits_estimated_q12;
+
+ /* account for coding qt_root_cbf = 0 */
+ /* First subtract cost for coding as 1 (part of header bits) and then add cost for coding as 0 */
+ u4_cu_hdr_bits_q12 += gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 0];
+ if(u4_cu_hdr_bits_q12 < gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1])
+ u4_cu_hdr_bits_q12 = 0;
+ else
+ u4_cu_hdr_bits_q12 -= gau2_ihevce_cabac_bin_to_bits[u1_qtroot_cbf_cabac_model ^ 1];
+
+ /* add the cost of coding the header bits */
+ i8_cu_not_coded_cost += COMPUTE_RATE_COST_CLIP30(
+ u4_cu_hdr_bits_q12 /* ps_final_prms->u4_cu_hdr_bits */,
+ ps_ctxt->i8_cl_ssd_lambda_qf,
+ (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
+
+ if(ps_ctxt->u1_enable_psyRDOPT)
+ {
+ i8_cu_not_coded_cost = total_rdopt_cost + 1;
+ }
+
+ /* Evaluate qtroot cbf rdo; exclude 2Nx2N Merge as skip cu is explicitly evaluated */
+ if((i8_cu_not_coded_cost <= total_rdopt_cost) && (!is_2nx2n_mergecu))
+ {
+ WORD32 tx_size;
+
+ /* force cu as not coded and update the cost */
+ ps_final_prms->u1_is_cu_coded = 0;
+ ps_final_prms->s_recon_datastore.au1_is_chromaRecon_available[0] = 0;
+ ps_final_prms->s_recon_datastore.u1_is_lumaRecon_available = 0;
+
+ total_rdopt_cost = i8_cu_not_coded_cost;
+
+ /* reset num TUs to 1 unless cu size id 64 */
+ ps_final_prms->u2_num_tus_in_cu = (64 == cu_size) ? 4 : 1;
+ trans_size = (64 == cu_size) ? 32 : cu_size;
+ GETRANGE(tx_size, trans_size);
+
+ /* reset the bytes consumed */
+ ps_final_prms->i4_num_bytes_ecd_data = 0;
+
+ /* reset texture related bits and roll back header bits*/
+ ps_final_prms->u4_cu_cbf_bits = 0;
+ ps_final_prms->u4_cu_luma_res_bits = 0;
+ ps_final_prms->u4_cu_chroma_res_bits = 0;
+ ps_final_prms->u4_cu_hdr_bits =
+ (u4_cu_hdr_bits_q12 + (1 << (CABAC_FRAC_BITS_Q - 1))) >> CABAC_FRAC_BITS_Q;
+
+ /* update cabac model with qtroot cbf = 0 decision */
+ ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_NORES_IDX] =
+ gau1_ihevc_next_state[u1_qtroot_cbf_cabac_model << 1];
+
+ /* restore untouched cabac models for, tusplit, cbfs, texture etc */
+ memcpy(
+ &ps_cab_ctxt->au1_ctxt_models[IHEVC_CAB_SPLIT_TFM],
+ &au1_rdopt_init_ctxt_models[IHEVC_CAB_SPLIT_TFM],
+ (IHEVC_CAB_CTXT_END - IHEVC_CAB_SPLIT_TFM));
+
+ /* mark all tus as not coded for final eval */
+ for(ctr = 0; ctr < ps_final_prms->u2_num_tus_in_cu; ctr++)
+ {
+ WORD32 curr_pos_x = (ctr & 0x1) ? (trans_size >> 2) : 0;
+ WORD32 curr_pos_y = (ctr & 0x2) ? (trans_size >> 2) : 0;
+
+ nbr_4x4_t *ps_cur_nbr_4x4 =
+ ps_nbr_4x4 + curr_pos_x + (curr_pos_y * num_4x4_in_cu);
+
+ num_4x4_in_tu = trans_size >> 2;
+
+ ps_final_prms->as_tu_enc_loop_temp_prms[ctr].i2_luma_bytes_consumed = 0;
+ ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cb_bytes_consumed[0] = 0;
+ ps_final_prms->as_tu_enc_loop_temp_prms[ctr].ai2_cr_bytes_consumed[0] = 0;
+
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_y_cbf = 0;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf = 0;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf = 0;
+
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1 = 0;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1 = 0;
+
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b3_size = tx_size - 3;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_x = cu_pos_x + curr_pos_x;
+ ps_final_prms->as_tu_enc_loop[ctr].s_tu.b4_pos_y = cu_pos_y + curr_pos_y;
+
+ /* reset cbf for the all 4x4 in TU */
+ {
+ WORD32 i, j;
+ nbr_4x4_t *ps_tmp_4x4;
+ ps_tmp_4x4 = ps_cur_nbr_4x4;
+
+ for(i = 0; i < num_4x4_in_tu; i++)
+ {
+ for(j = 0; j < num_4x4_in_tu; j++)
+ {
+ ps_tmp_4x4[j].b1_y_cbf = 0;
+ }
+ /* row level update*/
+ ps_tmp_4x4 += num_4x4_in_cu;
+ }
+ }
+ }
+ }
+ }
+#endif /* ENABLE_INTER_ZCU_COST */
+
+#endif /* RDOPT_ENABLE */
+ }
+
+ return (total_rdopt_cost);
+}
+#endif
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_inter_rdopt_cu_mc_mvp \endif
+*
+* \brief
+* Inter Coding unit funtion which performs MC and MVP calc for RD opt mode
+*
+* \param[in] ps_ctxt enc_loop module ctxt pointer
+* \param[in] ps_inter_cand pointer to inter candidate structure
+* \param[in] cu_size Current CU size
+* \param[in] cu_pos_x cu position x w.r.t to ctb
+* \param[in] cu_pos_y cu position y w.r.t to ctb
+* \param[in] ps_left_nbr_4x4 Left neighbour 4x4 structure pointer
+* \param[in] ps_top_nbr_4x4 top neighbour 4x4 structure pointer
+* \param[in] ps_topleft_nbr_4x4 top left neighbour 4x4 structure pointer
+* \param[in] nbr_4x4_left_strd left neighbour 4x4 buffer stride
+* \param[in] curr_buf_idx Current Buffer index
+*
+* \return
+* Rdopt cost
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+LWORD64 ihevce_inter_rdopt_cu_mc_mvp(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ cu_inter_cand_t *ps_inter_cand,
+ WORD32 cu_size,
+ WORD32 cu_pos_x,
+ WORD32 cu_pos_y,
+ nbr_4x4_t *ps_left_nbr_4x4,
+ nbr_4x4_t *ps_top_nbr_4x4,
+ nbr_4x4_t *ps_topleft_nbr_4x4,
+ WORD32 nbr_4x4_left_strd,
+ WORD32 curr_buf_idx)
+{
+ /* local variables */
+ enc_loop_cu_final_prms_t *ps_final_prms;
+ nbr_avail_flags_t s_nbr;
+ nbr_4x4_t *ps_nbr_4x4;
+
+ UWORD8 au1_is_top_used[2][MAX_MVP_LIST_CAND];
+ UWORD8 *pu1_pred;
+ WORD32 rdopt_cost;
+ WORD32 ctr;
+ WORD32 num_cu_part;
+ WORD32 inter_pu_wd;
+ WORD32 inter_pu_ht;
+ WORD32 pred_stride;
+
+ /* get the pointers based on curbuf idx */
+ ps_nbr_4x4 = &ps_ctxt->as_cu_nbr[curr_buf_idx][0];
+ ps_final_prms = &ps_ctxt->as_cu_prms[curr_buf_idx];
+ pu1_pred = ps_inter_cand->pu1_pred_data;
+
+ pred_stride = ps_inter_cand->i4_pred_data_stride;
+
+ /* store the partition mode in final prms */
+ ps_final_prms->u1_part_mode = ps_inter_cand->b3_part_size;
+
+ /* since encoder does not support NXN part type */
+ /* num parts can be either 1 or 2 only */
+ ASSERT(SIZE_NxN != ps_inter_cand->b3_part_size);
+
+ num_cu_part = (SIZE_2Nx2N != ps_inter_cand->b3_part_size) + 1;
+
+ /* get the 4x4 level position of current cu */
+ cu_pos_x = cu_pos_x << 1;
+ cu_pos_y = cu_pos_y << 1;
+
+ /* populate cu level params */
+ ps_final_prms->u1_intra_flag = PRED_MODE_INTER;
+ ps_final_prms->u2_num_pus_in_cu = num_cu_part;
+
+ /* run a loop over all the partitons in cu */
+ for(ctr = 0; ctr < num_cu_part; ctr++)
+ {
+ pu_mv_t as_pred_mv[MAX_MVP_LIST_CAND];
+ pu_t *ps_pu;
+ WORD32 skip_or_merge_flag;
+ UWORD8 u1_use_mvp_from_top_row;
+
+ ps_pu = &ps_inter_cand->as_inter_pu[ctr];
+
+ /* IF AMP then each partitions can have diff wd ht */
+ inter_pu_wd = (ps_pu->b4_wd + 1) << 2;
+ inter_pu_ht = (ps_pu->b4_ht + 1) << 2;
+
+ /* populate reference pic buf id for bs compute */
+
+ /* L0 */
+ if(-1 != ps_pu->mv.i1_l0_ref_idx)
+ {
+ ps_pu->mv.i1_l0_ref_pic_buf_id =
+ ps_ctxt->s_mv_pred_ctxt.ps_ref_list[0][ps_pu->mv.i1_l0_ref_idx]->i4_buf_id;
+ }
+
+ /* L1 */
+ if(-1 != ps_pu->mv.i1_l1_ref_idx)
+ {
+ ps_pu->mv.i1_l1_ref_pic_buf_id =
+ ps_ctxt->s_mv_pred_ctxt.ps_ref_list[1][ps_pu->mv.i1_l1_ref_idx]->i4_buf_id;
+ }
+
+ /* SKIP or merge check for every part */
+ skip_or_merge_flag = ps_inter_cand->b1_skip_flag | ps_pu->b1_merge_flag;
+
+ /* ----------- MV Prediction ----------------- */
+ if(0 == skip_or_merge_flag)
+ {
+ /* get the neighbour availability flags */
+ ihevce_get_only_nbr_flag(
+ &s_nbr,
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ cu_pos_x,
+ cu_pos_y,
+ inter_pu_wd >> 2,
+ inter_pu_ht >> 2);
+
+ if(ps_ctxt->u1_disable_intra_eval && DISABLE_TOP_SYNC && (ps_pu->b4_pos_y == 0))
+ {
+ u1_use_mvp_from_top_row = 0;
+ }
+ else
+ {
+ u1_use_mvp_from_top_row = 1;
+ }
+
+ if(!u1_use_mvp_from_top_row)
+ {
+ if(s_nbr.u1_top_avail || s_nbr.u1_top_lt_avail || s_nbr.u1_top_rt_avail)
+ {
+ if(!s_nbr.u1_left_avail && !s_nbr.u1_bot_lt_avail)
+ {
+ WORD32 curr_cu_pos_in_row, cu_top_right_offset, cu_top_right_dep_pos;
+
+ /* Ensure Top Right Sync */
+ if(!ps_ctxt->u1_use_top_at_ctb_boundary)
+ {
+ curr_cu_pos_in_row =
+ ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_x + (cu_pos_x << 2);
+
+ if(ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y == 0)
+ {
+ /* No wait for 1st row */
+ cu_top_right_offset = -(MAX_CTB_SIZE);
+ {
+ ihevce_tile_params_t *ps_col_tile_params =
+ ((ihevce_tile_params_t *)ps_ctxt->pv_tile_params_base +
+ ps_ctxt->i4_tile_col_idx);
+
+ /* No wait for 1st row */
+ cu_top_right_offset =
+ -(ps_col_tile_params->i4_first_sample_x + (MAX_CTB_SIZE));
+ }
+ cu_top_right_dep_pos = 0;
+ }
+ else
+ {
+ cu_top_right_offset = (cu_size) + 4;
+ cu_top_right_dep_pos =
+ (ps_ctxt->s_mc_ctxt.i4_ctb_frm_pos_y >> 6) - 1;
+ }
+
+ ihevce_dmgr_chk_row_row_sync(
+ ps_ctxt->pv_dep_mngr_enc_loop_cu_top_right,
+ curr_cu_pos_in_row,
+ cu_top_right_offset,
+ cu_top_right_dep_pos,
+ ps_ctxt->i4_tile_col_idx, /* Col Tile No. */
+ ps_ctxt->thrd_id);
+ }
+
+ u1_use_mvp_from_top_row = 1;
+ }
+ else
+ {
+ s_nbr.u1_top_avail = 0;
+ s_nbr.u1_top_lt_avail = 0;
+ s_nbr.u1_top_rt_avail = 0;
+ }
+ }
+ else
+ {
+ u1_use_mvp_from_top_row = 1;
+ }
+ }
+ /* Call the MV prediction module to get MVP */
+ ihevce_mv_pred(
+ &ps_ctxt->s_mv_pred_ctxt,
+ ps_top_nbr_4x4,
+ ps_left_nbr_4x4,
+ ps_topleft_nbr_4x4,
+ nbr_4x4_left_strd,
+ &s_nbr,
+ NULL, /* colocated MV */
+ ps_pu,
+ &as_pred_mv[0],
+ au1_is_top_used);
+ }
+
+ /* store the nbr 4x4 structure */
+ ps_nbr_4x4->b1_skip_flag = ps_inter_cand->b1_skip_flag;
+ ps_nbr_4x4->b1_intra_flag = 0;
+ ps_nbr_4x4->b1_pred_l0_flag = 0;
+ ps_nbr_4x4->b1_pred_l1_flag = 0;
+
+ /* DC is default mode for inter cu, required for intra mode signalling */
+ ps_nbr_4x4->b6_luma_intra_mode = 1;
+
+ /* copy the motion vectors to neighbour structure */
+ ps_nbr_4x4->mv = ps_pu->mv;
+
+ /* copy the PU to final out pu */
+ ps_final_prms->as_pu_enc_loop[ctr] = *ps_pu;
+
+ /* copy the PU to chroma */
+ ps_final_prms->as_pu_chrm_proc[ctr] = *ps_pu;
+
+ /* store the skip flag to final prms */
+ ps_final_prms->u1_skip_flag = ps_inter_cand->b1_skip_flag;
+
+ /* MVP index & MVD calc is gated on skip/merge flag */
+ if(0 == skip_or_merge_flag)
+ {
+ /* calculate the MVDs and popluate the MVP idx for L0 */
+ if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L0 == ps_pu->b2_pred_mode))
+ {
+ WORD32 idx0_cost, idx1_cost;
+
+ /* calculate the ABS mvd for cand 0 */
+ idx0_cost = abs(ps_pu->mv.s_l0_mv.i2_mvx - as_pred_mv[0].s_l0_mv.i2_mvx);
+ idx0_cost += abs(ps_pu->mv.s_l0_mv.i2_mvy - as_pred_mv[0].s_l0_mv.i2_mvy);
+
+ /* calculate the ABS mvd for cand 1 */
+ if(u1_use_mvp_from_top_row)
+ {
+ idx1_cost = abs(ps_pu->mv.s_l0_mv.i2_mvx - as_pred_mv[1].s_l0_mv.i2_mvx);
+ idx1_cost += abs(ps_pu->mv.s_l0_mv.i2_mvy - as_pred_mv[1].s_l0_mv.i2_mvy);
+ }
+ else
+ {
+ idx1_cost = INT_MAX;
+ }
+
+ /* based on the least cost choose the mvp idx */
+ if(idx0_cost <= idx1_cost)
+ {
+ ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvx -=
+ as_pred_mv[0].s_l0_mv.i2_mvx;
+ ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvy -=
+ as_pred_mv[0].s_l0_mv.i2_mvy;
+
+ ps_final_prms->as_pu_enc_loop[ctr].b1_l0_mvp_idx = 0;
+ }
+ else
+ {
+ ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvx -=
+ as_pred_mv[1].s_l0_mv.i2_mvx;
+ ps_final_prms->as_pu_enc_loop[ctr].mv.s_l0_mv.i2_mvy -=
+ as_pred_mv[1].s_l0_mv.i2_mvy;
+
+ ps_final_prms->as_pu_enc_loop[ctr].b1_l0_mvp_idx = 1;
+ }
+
+ /* set the pred l0 flag for neighbour storage */
+ ps_nbr_4x4->b1_pred_l0_flag = 1;
+ }
+ /* calculate the MVDs and popluate the MVP idx for L1 */
+ if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L1 == ps_pu->b2_pred_mode))
+ {
+ WORD32 idx0_cost, idx1_cost;
+
+ /* calculate the ABS mvd for cand 0 */
+ idx0_cost = abs(ps_pu->mv.s_l1_mv.i2_mvx - as_pred_mv[0].s_l1_mv.i2_mvx);
+ idx0_cost += abs(ps_pu->mv.s_l1_mv.i2_mvy - as_pred_mv[0].s_l1_mv.i2_mvy);
+
+ /* calculate the ABS mvd for cand 1 */
+ if(u1_use_mvp_from_top_row)
+ {
+ idx1_cost = abs(ps_pu->mv.s_l1_mv.i2_mvx - as_pred_mv[1].s_l1_mv.i2_mvx);
+ idx1_cost += abs(ps_pu->mv.s_l1_mv.i2_mvy - as_pred_mv[1].s_l1_mv.i2_mvy);
+ }
+ else
+ {
+ idx1_cost = INT_MAX;
+ }
+
+ /* based on the least cost choose the mvp idx */
+ if(idx0_cost <= idx1_cost)
+ {
+ ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvx -=
+ as_pred_mv[0].s_l1_mv.i2_mvx;
+ ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvy -=
+ as_pred_mv[0].s_l1_mv.i2_mvy;
+
+ ps_final_prms->as_pu_enc_loop[ctr].b1_l1_mvp_idx = 0;
+ }
+ else
+ {
+ ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvx -=
+ as_pred_mv[1].s_l1_mv.i2_mvx;
+ ps_final_prms->as_pu_enc_loop[ctr].mv.s_l1_mv.i2_mvy -=
+ as_pred_mv[1].s_l1_mv.i2_mvy;
+
+ ps_final_prms->as_pu_enc_loop[ctr].b1_l1_mvp_idx = 1;
+ }
+
+ /* set the pred l1 flag for neighbour storage */
+ ps_nbr_4x4->b1_pred_l1_flag = 1;
+ }
+
+ /* set the merge flag to 0 */
+ ps_final_prms->as_pu_enc_loop[ctr].b1_merge_flag = 0;
+ ps_final_prms->as_pu_enc_loop[ctr].b3_merge_idx = 0;
+ }
+ else
+ {
+ /* copy the merge index from candidate */
+ ps_final_prms->as_pu_enc_loop[ctr].b1_merge_flag = ps_pu->b1_merge_flag;
+
+ ps_final_prms->as_pu_enc_loop[ctr].b3_merge_idx = ps_pu->b3_merge_idx;
+
+ if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L0 == ps_pu->b2_pred_mode))
+ {
+ /* set the pred l0 flag for neighbour storage */
+ ps_nbr_4x4->b1_pred_l0_flag = 1;
+ }
+
+ /* calculate the MVDs and popluate the MVP idx for L1 */
+ if((PRED_BI == ps_pu->b2_pred_mode) || (PRED_L1 == ps_pu->b2_pred_mode))
+ {
+ /* set the pred l1 flag for neighbour storage */
+ ps_nbr_4x4->b1_pred_l1_flag = 1;
+ }
+ }
+
+ /* RD opt cost computation is part of cu_ntu func hence here it is set to 0 */
+ rdopt_cost = 0;
+
+ /* copy the MV to colocated Mv structure */
+ ps_final_prms->as_col_pu_enc_loop[ctr].s_l0_mv = ps_pu->mv.s_l0_mv;
+ ps_final_prms->as_col_pu_enc_loop[ctr].s_l1_mv = ps_pu->mv.s_l1_mv;
+ ps_final_prms->as_col_pu_enc_loop[ctr].i1_l0_ref_idx = ps_pu->mv.i1_l0_ref_idx;
+ ps_final_prms->as_col_pu_enc_loop[ctr].i1_l1_ref_idx = ps_pu->mv.i1_l1_ref_idx;
+ ps_final_prms->as_col_pu_enc_loop[ctr].b2_pred_mode = ps_pu->b2_pred_mode;
+ ps_final_prms->as_col_pu_enc_loop[ctr].b1_intra_flag = 0;
+
+ /* replicate neighbour 4x4 strcuture for entire partition */
+ {
+ WORD32 i, j;
+ nbr_4x4_t *ps_tmp_4x4;
+
+ ps_tmp_4x4 = ps_nbr_4x4;
+
+ for(i = 0; i < (inter_pu_ht >> 2); i++)
+ {
+ for(j = 0; j < (inter_pu_wd >> 2); j++)
+ {
+ ps_tmp_4x4[j] = *ps_nbr_4x4;
+ }
+ /* row level update*/
+ ps_tmp_4x4 += (cu_size >> 2);
+ }
+ }
+ /* set the neighbour map to 1 */
+ ihevce_set_inter_nbr_map(
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ cu_pos_x,
+ cu_pos_y,
+ (inter_pu_wd >> 2),
+ (inter_pu_ht >> 2),
+ 1);
+ /* ----------- Motion Compensation for Luma ----------- */
+#if !ENABLE_MIXED_INTER_MODE_EVAL
+ {
+ IV_API_CALL_STATUS_T valid_mv_cand;
+
+ /*If the inter candidate is neither merge cand nor skip cand
+ then calculate the mc.*/
+ if(0 == skip_or_merge_flag || (ps_ctxt->u1_high_speed_cu_dec_on))
+ {
+ valid_mv_cand =
+ ihevce_luma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_stride, 0);
+
+ /* assert if the MC is given a valid mv candidate */
+ ASSERT(valid_mv_cand == IV_SUCCESS);
+ }
+ }
+#endif
+ if((2 == num_cu_part) && (0 == ctr))
+ {
+ /* 2Nx__ partion case */
+ if(inter_pu_wd == cu_size)
+ {
+ cu_pos_y += (inter_pu_ht >> 2);
+ pu1_pred += (inter_pu_ht * pred_stride);
+ ps_nbr_4x4 += (inter_pu_ht >> 2) * (cu_size >> 2);
+ ps_left_nbr_4x4 += (inter_pu_ht >> 2) * nbr_4x4_left_strd;
+ ps_top_nbr_4x4 = ps_nbr_4x4 - (cu_size >> 2);
+ ps_topleft_nbr_4x4 = ps_left_nbr_4x4 - nbr_4x4_left_strd;
+ }
+
+ /* __x2N partion case */
+ if(inter_pu_ht == cu_size)
+ {
+ cu_pos_x += (inter_pu_wd >> 2);
+ pu1_pred += inter_pu_wd;
+ ps_nbr_4x4 += (inter_pu_wd >> 2);
+ ps_left_nbr_4x4 = ps_nbr_4x4 - 1;
+ ps_top_nbr_4x4 += (inter_pu_wd >> 2);
+ ps_topleft_nbr_4x4 = ps_top_nbr_4x4 - 1;
+ nbr_4x4_left_strd = (cu_size >> 2);
+ }
+ }
+ }
+
+ return (rdopt_cost);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_intra_chroma_pred_mode_selector \endif
+*
+* \brief
+* Coding unit processing function for chroma special modes (Non-Luma modes)
+*
+* \param[in] ps_ctxt enc_loop module ctxt pointer
+* \param[in] ps_chrm_cu_buf_prms ctxt having chroma related prms
+* \param[in] ps_cu_analyse pointer to cu analyse
+* \param[in] rd_opt_curr_idx index in the array of RDopt params
+* \param[in] tu_mode TU_EQ_CU or other case
+*
+* \return
+* Stores the best SATD mode, it's RDOPT cost, CABAC state, TU bits
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+UWORD8 ihevce_distortion_based_intra_chroma_mode_selector(
+ cu_analyse_t *ps_cu_analyse,
+ ihevc_intra_pred_chroma_ref_substitution_ft *pf_ref_substitution,
+ pf_intra_pred *ppf_chroma_ip,
+ pf_res_trans_luma_had_chroma *ppf_resd_trns_had,
+ UWORD8 *pu1_src,
+ WORD32 i4_src_stride,
+ UWORD8 *pu1_pred,
+ WORD32 i4_pred_stride,
+ UWORD8 *pu1_ctb_nbr_map,
+ WORD32 i4_nbr_map_strd,
+ UWORD8 *pu1_ref_sub_out,
+ WORD32 i4_alpha_stim_multiplier,
+ UWORD8 u1_is_cu_noisy,
+ UWORD8 u1_trans_size,
+ UWORD8 u1_trans_idx,
+ UWORD8 u1_num_tus_in_cu,
+ UWORD8 u1_num_4x4_luma_blks_in_tu,
+ UWORD8 u1_enable_psyRDOPT,
+ UWORD8 u1_is_422)
+{
+ UWORD8 u1_chrm_mode;
+ UWORD8 ctr;
+ WORD32 i4_subtu_idx;
+
+ WORD32 i = 0;
+ UWORD8 u1_chrm_modes[4] = { 0, 1, 10, 26 };
+ WORD32 i4_satd_had[4] = { 0 };
+ WORD32 i4_best_satd_had = INT_MAX;
+ UWORD8 u1_cu_pos_x = (ps_cu_analyse->b3_cu_pos_x << 1);
+ UWORD8 u1_cu_pos_y = (ps_cu_analyse->b3_cu_pos_y << 1);
+ WORD32 i4_num_sub_tus = u1_is_422 + 1;
+ UWORD8 u1_best_chrm_mode = 0;
+
+ /* Get the best satd among all possible modes */
+ for(i = 0; i < 4; i++)
+ {
+ WORD32 left_strd = i4_src_stride;
+
+ u1_chrm_mode = (u1_is_422 == 1) ? gau1_chroma422_intra_angle_mapping[u1_chrm_modes[i]]
+ : u1_chrm_modes[i];
+
+ /* loop based on num tus in a cu */
+ for(ctr = 0; ctr < u1_num_tus_in_cu; ctr++)
+ {
+ WORD32 luma_nbr_flags;
+ WORD32 chrm_pred_func_idx;
+
+ WORD32 i4_trans_size_m2 = u1_trans_size << 1;
+ UWORD8 *pu1_tu_src = pu1_src + ((ctr & 1) * i4_trans_size_m2) +
+ (((ctr > 1) * u1_trans_size * i4_src_stride) << u1_is_422);
+ UWORD8 *pu1_tu_pred = pu1_pred + ((ctr & 1) * i4_trans_size_m2) +
+ (((ctr > 1) * u1_trans_size * i4_pred_stride) << u1_is_422);
+ WORD32 i4_curr_tu_pos_x = u1_cu_pos_x + ((ctr & 1) * u1_num_4x4_luma_blks_in_tu);
+ WORD32 i4_curr_tu_pos_y = u1_cu_pos_y + ((ctr > 1) * u1_num_4x4_luma_blks_in_tu);
+
+ luma_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
+ pu1_ctb_nbr_map,
+ i4_nbr_map_strd,
+ i4_curr_tu_pos_x,
+ i4_curr_tu_pos_y,
+ u1_num_4x4_luma_blks_in_tu,
+ u1_num_4x4_luma_blks_in_tu);
+
+ for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_sub_tus; i4_subtu_idx++)
+ {
+ WORD32 nbr_flags;
+
+ UWORD8 *pu1_cur_src =
+ pu1_tu_src + ((i4_subtu_idx == 1) * u1_trans_size * i4_src_stride);
+ UWORD8 *pu1_cur_pred =
+ pu1_tu_pred + ((i4_subtu_idx == 1) * u1_trans_size * i4_pred_stride);
+ UWORD8 *pu1_left = pu1_cur_src - 2;
+ UWORD8 *pu1_top = pu1_cur_src - i4_src_stride;
+ UWORD8 *pu1_top_left = pu1_top - 2;
+
+ nbr_flags = ihevce_get_intra_chroma_tu_nbr(
+ luma_nbr_flags, i4_subtu_idx, u1_trans_size, u1_is_422);
+
+ /* call the chroma reference array substitution */
+ pf_ref_substitution(
+ pu1_top_left,
+ pu1_top,
+ pu1_left,
+ left_strd,
+ u1_trans_size,
+ nbr_flags,
+ pu1_ref_sub_out,
+ 1);
+
+ /* use the look up to get the function idx */
+ chrm_pred_func_idx = g_i4_ip_funcs[u1_chrm_mode];
+
+ /* call the intra prediction function */
+ ppf_chroma_ip[chrm_pred_func_idx](
+ pu1_ref_sub_out, 1, pu1_cur_pred, i4_pred_stride, u1_trans_size, u1_chrm_mode);
+
+ if(!u1_is_cu_noisy || !i4_alpha_stim_multiplier)
+ {
+ /* compute Hadamard-transform satd : Cb */
+ i4_satd_had[i] += ppf_resd_trns_had[u1_trans_idx - 1](
+ pu1_cur_src, i4_src_stride, pu1_cur_pred, i4_pred_stride, NULL, 0);
+
+ /* compute Hadamard-transform satd : Cr */
+ i4_satd_had[i] += ppf_resd_trns_had[u1_trans_idx - 1](
+ pu1_cur_src + 1, i4_src_stride, pu1_cur_pred + 1, i4_pred_stride, NULL, 0);
+ }
+ else
+ {
+ WORD32 i4_satd;
+
+ /* compute Hadamard-transform satd : Cb */
+ i4_satd = ppf_resd_trns_had[u1_trans_idx - 1](
+ pu1_cur_src, i4_src_stride, pu1_cur_pred, i4_pred_stride, NULL, 0);
+
+ i4_satd = ihevce_inject_stim_into_distortion(
+ pu1_cur_src,
+ i4_src_stride,
+ pu1_cur_pred,
+ i4_pred_stride,
+ i4_satd,
+ i4_alpha_stim_multiplier,
+ u1_trans_size,
+ 0,
+ u1_enable_psyRDOPT,
+ U_PLANE);
+
+ i4_satd_had[i] += i4_satd;
+
+ /* compute Hadamard-transform satd : Cr */
+ i4_satd = ppf_resd_trns_had[u1_trans_idx - 1](
+ pu1_cur_src + 1, i4_src_stride, pu1_cur_pred + 1, i4_pred_stride, NULL, 0);
+
+ i4_satd = ihevce_inject_stim_into_distortion(
+ pu1_cur_src,
+ i4_src_stride,
+ pu1_cur_pred,
+ i4_pred_stride,
+ i4_satd,
+ i4_alpha_stim_multiplier,
+ u1_trans_size,
+ 0,
+ u1_enable_psyRDOPT,
+ V_PLANE);
+
+ i4_satd_had[i] += i4_satd;
+ }
+ }
+
+ /* set the neighbour map to 1 */
+ ihevce_set_nbr_map(
+ pu1_ctb_nbr_map,
+ i4_nbr_map_strd,
+ i4_curr_tu_pos_x,
+ i4_curr_tu_pos_y,
+ u1_num_4x4_luma_blks_in_tu,
+ 1);
+ }
+
+ /* set the neighbour map to 0 */
+ ihevce_set_nbr_map(
+ pu1_ctb_nbr_map,
+ i4_nbr_map_strd,
+ (ps_cu_analyse->b3_cu_pos_x << 1),
+ (ps_cu_analyse->b3_cu_pos_y << 1),
+ (ps_cu_analyse->u1_cu_size >> 2),
+ 0);
+
+ /* Get the least SATD and corresponding mode */
+ if(i4_best_satd_had > i4_satd_had[i])
+ {
+ i4_best_satd_had = i4_satd_had[i];
+ u1_best_chrm_mode = u1_chrm_mode;
+ }
+ }
+
+ return u1_best_chrm_mode;
+}
+
+void ihevce_intra_chroma_pred_mode_selector(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
+ cu_analyse_t *ps_cu_analyse,
+ WORD32 rd_opt_curr_idx,
+ WORD32 tu_mode,
+ WORD32 i4_alpha_stim_multiplier,
+ UWORD8 u1_is_cu_noisy)
+{
+ chroma_intra_satd_ctxt_t *ps_chr_intra_satd_ctxt;
+
+ ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
+
+ UWORD8 *pu1_pred;
+ WORD32 trans_size;
+ WORD32 num_tus_in_cu;
+ WORD32 pred_strd;
+ WORD32 ctr;
+ WORD32 i4_subtu_idx;
+ WORD32 i4_num_sub_tus;
+ WORD32 trans_idx;
+ WORD32 scan_idx;
+ WORD32 num_4x4_luma_in_tu;
+ WORD32 cu_pos_x;
+ WORD32 cu_pos_y;
+
+ recon_datastore_t *aps_recon_datastore[2] = { &ps_ctxt->as_cu_prms[0].s_recon_datastore,
+ &ps_ctxt->as_cu_prms[1].s_recon_datastore };
+
+ LWORD64 chrm_cod_cost = 0;
+ WORD32 chrm_tu_bits = 0;
+ WORD32 best_chrm_mode = DM_CHROMA_IDX;
+ UWORD8 *pu1_chrm_src = ps_chrm_cu_buf_prms->pu1_curr_src;
+ WORD32 chrm_src_stride = ps_chrm_cu_buf_prms->i4_chrm_src_stride;
+ UWORD8 *pu1_cu_left = ps_chrm_cu_buf_prms->pu1_cu_left;
+ UWORD8 *pu1_cu_top = ps_chrm_cu_buf_prms->pu1_cu_top;
+ UWORD8 *pu1_cu_top_left = ps_chrm_cu_buf_prms->pu1_cu_top_left;
+ WORD32 cu_left_stride = ps_chrm_cu_buf_prms->i4_cu_left_stride;
+ WORD32 cu_size = ps_cu_analyse->u1_cu_size;
+ WORD32 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
+ WORD32 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
+ UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
+
+ ihevc_intra_pred_chroma_ref_substitution_fptr =
+ ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
+ i4_num_sub_tus = (u1_is_422 == 1) + 1;
+
+#if DISABLE_RDOQ_INTRA
+ i4_perform_rdoq = 0;
+#endif
+
+ if(TU_EQ_CU == tu_mode)
+ {
+ num_tus_in_cu = 1;
+ trans_size = cu_size >> 1;
+ num_4x4_luma_in_tu = trans_size >> 1; /*at luma level*/
+ ps_chr_intra_satd_ctxt = &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[tu_mode];
+ }
+ else
+ {
+ num_tus_in_cu = 4;
+ trans_size = cu_size >> 2;
+ num_4x4_luma_in_tu = trans_size >> 1; /*at luma level*/
+
+ /* For 8x8 CU only one TU */
+ if(MIN_TU_SIZE > trans_size)
+ {
+ trans_size = MIN_TU_SIZE;
+ num_tus_in_cu = 1;
+ /* chroma nbr avail. is derived based on luma.
+ for 4x4 chrm use 8x8 luma's size */
+ num_4x4_luma_in_tu = num_4x4_luma_in_tu << 1;
+ }
+
+ ps_chr_intra_satd_ctxt = &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[tu_mode];
+ }
+
+ /* Can't be TU_EQ_SUBCU case */
+ ASSERT(TU_EQ_SUBCU != tu_mode);
+
+ /* translate the transform size to index */
+ trans_idx = trans_size >> 2;
+
+ pu1_pred = (UWORD8 *)ps_chr_intra_satd_ctxt->pv_pred_data;
+
+ pred_strd = ps_chr_intra_satd_ctxt->i4_pred_stride;
+
+ /* for 16x16 cases */
+ if(16 == trans_size)
+ {
+ trans_idx = 3;
+ }
+
+ best_chrm_mode = ihevce_distortion_based_intra_chroma_mode_selector(
+ ps_cu_analyse,
+ ihevc_intra_pred_chroma_ref_substitution_fptr,
+ ps_ctxt->apf_chrm_ip,
+ ps_ctxt->apf_chrm_resd_trns_had,
+ pu1_chrm_src,
+ chrm_src_stride,
+ pu1_pred,
+ pred_strd,
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ (UWORD8 *)ps_ctxt->pv_ref_sub_out,
+ i4_alpha_stim_multiplier,
+ u1_is_cu_noisy,
+ trans_size,
+ trans_idx,
+ num_tus_in_cu,
+ num_4x4_luma_in_tu,
+ ps_ctxt->u1_enable_psyRDOPT,
+ u1_is_422);
+
+ /* Store the best chroma mode */
+ ps_chr_intra_satd_ctxt->u1_best_cr_mode = best_chrm_mode;
+
+ /* evaluate RDOPT cost for the Best mode */
+ {
+ WORD32 i4_subtu_pos_x;
+ WORD32 i4_subtu_pos_y;
+ UWORD8 u1_compute_spatial_ssd;
+
+ WORD32 ai4_total_bytes_offset_cb[2] = { 0, 0 };
+ WORD32 ai4_total_bytes_offset_cr[2] = { 0, 0 };
+ /* State for prefix bin of chroma intra pred mode before CU encode */
+ UWORD8 u1_chroma_intra_mode_prefix_state =
+ ps_ctxt->au1_rdopt_init_ctxt_models[IHEVC_CAB_CHROMA_PRED_MODE];
+ WORD32 luma_trans_size = trans_size << 1;
+ WORD32 calc_recon = 0;
+ UWORD8 *pu1_left = pu1_cu_left;
+ UWORD8 *pu1_top = pu1_cu_top;
+ UWORD8 *pu1_top_left = pu1_cu_top_left;
+ WORD32 left_strd = cu_left_stride;
+
+ if(ps_ctxt->i1_cu_qp_delta_enable)
+ {
+ WORD32 i4_act_counter = 0, i4_act_counter_lamda = 0;
+ if(ps_cu_analyse->u1_cu_size == 64)
+ {
+ ASSERT(
+ (luma_trans_size == 32) || (luma_trans_size == 16) || (luma_trans_size == 8) ||
+ (luma_trans_size == 4));
+ i4_act_counter = (luma_trans_size == 16) +
+ 2 * ((luma_trans_size == 8) || (luma_trans_size == 4));
+ i4_act_counter_lamda = 3;
+ }
+ else if(ps_cu_analyse->u1_cu_size == 32)
+ {
+ ASSERT(
+ (luma_trans_size == 32) || (luma_trans_size == 16) || (luma_trans_size == 8) ||
+ (luma_trans_size == 4));
+ i4_act_counter = (luma_trans_size == 16) +
+ 2 * ((luma_trans_size == 8) || (luma_trans_size == 4));
+ i4_act_counter_lamda = 0;
+ }
+ else if(ps_cu_analyse->u1_cu_size == 16)
+ {
+ ASSERT((luma_trans_size == 16) || (luma_trans_size == 8) || (luma_trans_size == 4));
+ i4_act_counter = (luma_trans_size == 8) || (luma_trans_size == 4);
+ i4_act_counter_lamda = 0;
+ }
+ else if(ps_cu_analyse->u1_cu_size == 8)
+ {
+ ASSERT((luma_trans_size == 8) || (luma_trans_size == 4));
+ i4_act_counter = 1;
+ i4_act_counter_lamda = 0;
+ }
+ else
+ {
+ ASSERT(0);
+ }
+ /*assumption is that control comes here for intras*/
+ if(ps_ctxt->i4_use_ctb_level_lamda)
+ {
+ ihevce_compute_cu_level_QP(
+ ps_ctxt, ps_cu_analyse->i4_act_factor[i4_act_counter][1], -1, 0);
+ }
+ else
+ {
+ ihevce_compute_cu_level_QP(
+ ps_ctxt,
+ ps_cu_analyse->i4_act_factor[i4_act_counter][1],
+ ps_cu_analyse->i4_act_factor[i4_act_counter_lamda][1],
+ 0);
+ }
+
+ ps_cu_analyse->i1_cu_qp = ps_ctxt->i4_cu_qp;
+ }
+
+ u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
+ (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
+ CONVERT_SSDS_TO_SPATIAL_DOMAIN;
+
+ if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
+ {
+ u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
+ CONVERT_SSDS_TO_SPATIAL_DOMAIN;
+ }
+
+ /* get the 4x4 level postion of current cu */
+ cu_pos_x = (ps_cu_analyse->b3_cu_pos_x << 1);
+ cu_pos_y = (ps_cu_analyse->b3_cu_pos_y << 1);
+
+ calc_recon = !u1_compute_spatial_ssd && ((4 == num_tus_in_cu) || (u1_is_422 == 1));
+
+ if(calc_recon || u1_compute_spatial_ssd)
+ {
+ aps_recon_datastore[0]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 1;
+ aps_recon_datastore[1]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 1;
+ }
+ else
+ {
+ aps_recon_datastore[0]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 0;
+ aps_recon_datastore[1]->au1_is_chromaRecon_available[1 + (num_tus_in_cu > 1)] = 0;
+ }
+
+ /* loop based on num tus in a cu */
+ for(ctr = 0; ctr < num_tus_in_cu; ctr++)
+ {
+ WORD16 *pi2_cur_deq_data_cb;
+ WORD16 *pi2_cur_deq_data_cr;
+
+ WORD32 deq_data_strd = ps_chr_intra_satd_ctxt->i4_iq_buff_stride;
+ WORD32 luma_nbr_flags = 0;
+
+ luma_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ (ctr & 1) * (luma_trans_size >> 2) + cu_pos_x,
+ (ctr > 1) * (luma_trans_size >> 2) + cu_pos_y,
+ (luma_trans_size >> 2),
+ (luma_trans_size >> 2));
+
+ for(i4_subtu_idx = 0; i4_subtu_idx < i4_num_sub_tus; i4_subtu_idx++)
+ {
+ WORD32 cbf, num_bytes;
+ LWORD64 trans_ssd_u, trans_ssd_v;
+ UWORD8 u1_is_recon_available;
+
+ WORD32 trans_size_m2 = trans_size << 1;
+ UWORD8 *pu1_cur_src = pu1_chrm_src + ((ctr & 1) * trans_size_m2) +
+ (((ctr > 1) * trans_size * chrm_src_stride) << u1_is_422) +
+ (i4_subtu_idx * trans_size * chrm_src_stride);
+ UWORD8 *pu1_cur_pred = pu1_pred + ((ctr & 1) * trans_size_m2) +
+ (((ctr > 1) * trans_size * pred_strd) << u1_is_422) +
+ (i4_subtu_idx * trans_size * pred_strd);
+ WORD32 i4_recon_stride = aps_recon_datastore[0]->i4_chromaRecon_stride;
+ UWORD8 *pu1_cur_recon = ((UWORD8 *)aps_recon_datastore[0]
+ ->apv_chroma_recon_bufs[1 + (num_tus_in_cu > 1)]) +
+ ((ctr & 1) * trans_size_m2) +
+ (((ctr > 1) * trans_size * i4_recon_stride) << u1_is_422) +
+ (i4_subtu_idx * trans_size * i4_recon_stride);
+
+ /* Use Chroma coeff/iq buf of the cur. intra cand. Not rememb.
+ chroma coeff/iq for high quality intra SATD special modes. Will
+ be over written by coeff of luma mode in chroma_rdopt call */
+ UWORD8 *pu1_ecd_data_cb =
+ &ps_chr_intra_satd_ctxt->au1_scan_coeff_cb[i4_subtu_idx][0];
+ UWORD8 *pu1_ecd_data_cr =
+ &ps_chr_intra_satd_ctxt->au1_scan_coeff_cr[i4_subtu_idx][0];
+
+ WORD32 chrm_pred_func_idx = 0;
+ LWORD64 curr_cb_cod_cost = 0;
+ LWORD64 curr_cr_cod_cost = 0;
+ WORD32 nbr_flags = 0;
+
+ i4_subtu_pos_x = (((ctr & 1) * trans_size_m2) >> 2);
+ i4_subtu_pos_y = (((ctr > 1) * trans_size) >> (!u1_is_422 + 1)) +
+ ((i4_subtu_idx * trans_size) >> 2);
+ pi2_cur_deq_data_cb = &ps_chr_intra_satd_ctxt->ai2_iq_data_cb[0] +
+ ((ctr & 1) * trans_size) +
+ (((ctr > 1) * trans_size * deq_data_strd) << u1_is_422) +
+ (i4_subtu_idx * trans_size * deq_data_strd);
+ pi2_cur_deq_data_cr = &ps_chr_intra_satd_ctxt->ai2_iq_data_cr[0] +
+ ((ctr & 1) * trans_size) +
+ (((ctr > 1) * trans_size * deq_data_strd) << u1_is_422) +
+ (i4_subtu_idx * trans_size * deq_data_strd);
+
+ /* left cu boundary */
+ if(0 == i4_subtu_pos_x)
+ {
+ left_strd = cu_left_stride;
+ pu1_left = pu1_cu_left + (i4_subtu_pos_y << 2) * left_strd;
+ }
+ else
+ {
+ pu1_left = pu1_cur_recon - 2;
+ left_strd = i4_recon_stride;
+ }
+
+ /* top cu boundary */
+ if(0 == i4_subtu_pos_y)
+ {
+ pu1_top = pu1_cu_top + (i4_subtu_pos_x << 2);
+ }
+ else
+ {
+ pu1_top = pu1_cur_recon - i4_recon_stride;
+ }
+
+ /* by default top left is set to cu top left */
+ pu1_top_left = pu1_cu_top_left;
+
+ /* top left based on position */
+ if((0 != i4_subtu_pos_y) && (0 == i4_subtu_pos_x))
+ {
+ pu1_top_left = pu1_left - left_strd;
+ }
+ else if(0 != i4_subtu_pos_x)
+ {
+ pu1_top_left = pu1_top - 2;
+ }
+
+ /* populate the coeffs scan idx */
+ scan_idx = SCAN_DIAG_UPRIGHT;
+
+ /* RDOPT copy States : TU init (best until prev TU) to current */
+ COPY_CABAC_STATES(
+ &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0],
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0],
+ IHEVC_CAB_CTXT_END);
+
+ /* for 4x4 transforms based on intra pred mode scan is choosen*/
+ if(4 == trans_size)
+ {
+ /* for modes from 22 upto 30 horizontal scan is used */
+ if((best_chrm_mode > 21) && (best_chrm_mode < 31))
+ {
+ scan_idx = SCAN_HORZ;
+ }
+ /* for modes from 6 upto 14 horizontal scan is used */
+ else if((best_chrm_mode > 5) && (best_chrm_mode < 15))
+ {
+ scan_idx = SCAN_VERT;
+ }
+ }
+
+ nbr_flags = ihevce_get_intra_chroma_tu_nbr(
+ luma_nbr_flags, i4_subtu_idx, trans_size, u1_is_422);
+
+ /* call the chroma reference array substitution */
+ ihevc_intra_pred_chroma_ref_substitution_fptr(
+ pu1_top_left,
+ pu1_top,
+ pu1_left,
+ left_strd,
+ trans_size,
+ nbr_flags,
+ (UWORD8 *)ps_ctxt->pv_ref_sub_out,
+ 1);
+
+ /* use the look up to get the function idx */
+ chrm_pred_func_idx = g_i4_ip_funcs[best_chrm_mode];
+
+ /* call the intra prediction function */
+ ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
+ (UWORD8 *)ps_ctxt->pv_ref_sub_out,
+ 1,
+ pu1_cur_pred,
+ pred_strd,
+ trans_size,
+ best_chrm_mode);
+
+ /* UPLANE RDOPT Loop */
+ {
+ WORD32 tu_bits;
+
+ cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
+ ps_ctxt,
+ pu1_cur_pred,
+ pred_strd,
+ pu1_cur_src,
+ chrm_src_stride,
+ pi2_cur_deq_data_cb,
+ deq_data_strd,
+ pu1_cur_recon,
+ i4_recon_stride,
+ pu1_ecd_data_cb + ai4_total_bytes_offset_cb[i4_subtu_idx],
+ ps_ctxt->au1_cu_csbf,
+ ps_ctxt->i4_cu_csbf_strd,
+ trans_size,
+ scan_idx,
+ 1,
+ &num_bytes,
+ &tu_bits,
+ &ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr],
+ &ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr],
+ &u1_is_recon_available,
+ i4_perform_sbh,
+ i4_perform_rdoq,
+ &trans_ssd_u,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ i4_alpha_stim_multiplier,
+ u1_is_cu_noisy,
+#endif
+ 0,
+ u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
+ U_PLANE);
+
+#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS && COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL
+ if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
+ {
+#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
+ trans_ssd_u = ihevce_inject_stim_into_distortion(
+ pu1_cur_src,
+ chrm_src_stride,
+ pu1_cur_pred,
+ pred_strd,
+ trans_ssd_u,
+ i4_alpha_stim_multiplier,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ U_PLANE);
+#else
+ if(u1_compute_spatial_ssd && u1_is_recon_available)
+ {
+ trans_ssd_u = ihevce_inject_stim_into_distortion(
+ pu1_cur_src,
+ chrm_src_stride,
+ pu1_cur_recon,
+ i4_recon_stride,
+ trans_ssd_u,
+ i4_alpha_stim_multiplier,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ U_PLANE);
+ }
+ else
+ {
+ trans_ssd_u = ihevce_inject_stim_into_distortion(
+ pu1_cur_src,
+ chrm_src_stride,
+ pu1_cur_pred,
+ pred_strd,
+ trans_ssd_u,
+ i4_alpha_stim_multiplier,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ U_PLANE);
+ }
+#endif
+ }
+#endif
+
+ /* RDOPT copy States : New updated after curr TU to TU init */
+ if(0 != cbf)
+ {
+ memcpy(
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0],
+ &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0],
+ IHEVC_CAB_CTXT_END);
+ }
+ /* RDOPT copy States : Restoring back the Cb init state to Cr */
+ else
+ {
+ memcpy(
+ &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0],
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0],
+ IHEVC_CAB_CTXT_END);
+ }
+
+ if(calc_recon || (!u1_is_recon_available && u1_compute_spatial_ssd))
+ {
+ ihevce_chroma_it_recon_fxn(
+ ps_ctxt,
+ pi2_cur_deq_data_cb,
+ deq_data_strd,
+ pu1_cur_pred,
+ pred_strd,
+ pu1_cur_recon,
+ i4_recon_stride,
+ (pu1_ecd_data_cb + ai4_total_bytes_offset_cb[i4_subtu_idx]),
+ trans_size,
+ cbf,
+ ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr],
+ ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr],
+ U_PLANE);
+ }
+
+ ps_chr_intra_satd_ctxt->au1_cbf_cb[i4_subtu_idx][ctr] = cbf;
+ curr_cb_cod_cost =
+ trans_ssd_u +
+ COMPUTE_RATE_COST_CLIP30(
+ tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
+ chrm_tu_bits += tu_bits;
+ ai4_total_bytes_offset_cb[i4_subtu_idx] += num_bytes;
+ ps_chr_intra_satd_ctxt->ai4_num_bytes_scan_coeff_cb_per_tu[i4_subtu_idx][ctr] =
+ num_bytes;
+ }
+
+ /* VPLANE RDOPT Loop */
+ {
+ WORD32 tu_bits;
+
+ cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
+ ps_ctxt,
+ pu1_cur_pred,
+ pred_strd,
+ pu1_cur_src,
+ chrm_src_stride,
+ pi2_cur_deq_data_cr,
+ deq_data_strd,
+ pu1_cur_recon,
+ i4_recon_stride,
+ pu1_ecd_data_cr + ai4_total_bytes_offset_cr[i4_subtu_idx],
+ ps_ctxt->au1_cu_csbf,
+ ps_ctxt->i4_cu_csbf_strd,
+ trans_size,
+ scan_idx,
+ 1,
+ &num_bytes,
+ &tu_bits,
+ &ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr],
+ &ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr],
+ &u1_is_recon_available,
+ i4_perform_sbh,
+ i4_perform_rdoq,
+ &trans_ssd_v,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ i4_alpha_stim_multiplier,
+ u1_is_cu_noisy,
+#endif
+ 0,
+ u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
+ V_PLANE);
+
+#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS && COMPUTE_NOISE_TERM_AT_THE_TU_LEVEL
+ if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
+ {
+#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
+ trans_ssd_v = ihevce_inject_stim_into_distortion(
+ pu1_cur_src,
+ chrm_src_stride,
+ pu1_cur_pred,
+ pred_strd,
+ trans_ssd_v,
+ i4_alpha_stim_multiplier,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ V_PLANE);
+#else
+ if(u1_compute_spatial_ssd && u1_is_recon_available)
+ {
+ trans_ssd_v = ihevce_inject_stim_into_distortion(
+ pu1_cur_src,
+ chrm_src_stride,
+ pu1_cur_recon,
+ i4_recon_stride,
+ trans_ssd_v,
+ i4_alpha_stim_multiplier,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ V_PLANE);
+ }
+ else
+ {
+ trans_ssd_v = ihevce_inject_stim_into_distortion(
+ pu1_cur_src,
+ chrm_src_stride,
+ pu1_cur_pred,
+ pred_strd,
+ trans_ssd_v,
+ i4_alpha_stim_multiplier,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ V_PLANE);
+ }
+#endif
+ }
+#endif
+
+ /* RDOPT copy States : New updated after curr TU to TU init */
+ if(0 != cbf)
+ {
+ COPY_CABAC_STATES(
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0],
+ &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0],
+ IHEVC_CAB_CTXT_END);
+ }
+ /* RDOPT copy States : Restoring back the Cb init state to Cr */
+ else
+ {
+ COPY_CABAC_STATES(
+ &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0],
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0],
+ IHEVC_CAB_CTXT_END);
+ }
+
+ if(calc_recon || (!u1_is_recon_available && u1_compute_spatial_ssd))
+ {
+ ihevce_chroma_it_recon_fxn(
+ ps_ctxt,
+ pi2_cur_deq_data_cr,
+ deq_data_strd,
+ pu1_cur_pred,
+ pred_strd,
+ pu1_cur_recon,
+ i4_recon_stride,
+ (pu1_ecd_data_cr + ai4_total_bytes_offset_cr[i4_subtu_idx]),
+ trans_size,
+ cbf,
+ ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr],
+ ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr],
+ V_PLANE);
+ }
+
+ ps_chr_intra_satd_ctxt->au1_cbf_cr[i4_subtu_idx][ctr] = cbf;
+ curr_cr_cod_cost =
+ trans_ssd_v +
+ COMPUTE_RATE_COST_CLIP30(
+ tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
+ chrm_tu_bits += tu_bits;
+ ai4_total_bytes_offset_cr[i4_subtu_idx] += num_bytes;
+ ps_chr_intra_satd_ctxt->ai4_num_bytes_scan_coeff_cr_per_tu[i4_subtu_idx][ctr] =
+ num_bytes;
+ }
+
+ chrm_cod_cost += curr_cb_cod_cost;
+ chrm_cod_cost += curr_cr_cod_cost;
+ }
+
+ /* set the neighbour map to 1 */
+ ihevce_set_nbr_map(
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ (ctr & 1) * (luma_trans_size >> 2) + cu_pos_x,
+ (ctr > 1) * (luma_trans_size >> 2) + cu_pos_y,
+ (luma_trans_size >> 2),
+ 1);
+ }
+
+ /* set the neighbour map to 0 */
+ ihevce_set_nbr_map(
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ (ps_cu_analyse->b3_cu_pos_x << 1),
+ (ps_cu_analyse->b3_cu_pos_y << 1),
+ (ps_cu_analyse->u1_cu_size >> 2),
+ 0);
+
+ /* Account for coding b3_chroma_intra_pred_mode prefix and suffix bins */
+ /* This is done by adding the bits for signalling chroma mode (0-3) */
+ /* and subtracting the bits for chroma mode same as luma mode (4) */
+#if CHROMA_RDOPT_ENABLE
+ {
+ /* Estimate bits to encode prefix bin as 1 for b3_chroma_intra_pred_mode */
+ WORD32 bits_frac_1 =
+ gau2_ihevce_cabac_bin_to_bits[u1_chroma_intra_mode_prefix_state ^ 1];
+
+ WORD32 bits_for_mode_0to3 = (2 << CABAC_FRAC_BITS_Q) + bits_frac_1;
+
+ /* Estimate bits to encode prefix bin as 0 for b3_chroma_intra_pred_mode */
+ WORD32 bits_for_mode4 =
+ gau2_ihevce_cabac_bin_to_bits[u1_chroma_intra_mode_prefix_state ^ 0];
+
+ /* accumulate into final rd cost for chroma */
+ ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode = COMPUTE_RATE_COST_CLIP30(
+ (bits_for_mode_0to3 - bits_for_mode4),
+ ps_ctxt->i8_cl_ssd_lambda_chroma_qf,
+ (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
+
+ chrm_cod_cost += ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode;
+ }
+#endif
+
+ if(ps_ctxt->u1_enable_psyRDOPT)
+ {
+ UWORD8 *pu1_recon_cu;
+ WORD32 recon_stride;
+ WORD32 curr_pos_x;
+ WORD32 curr_pos_y;
+ WORD32 start_index;
+ WORD32 num_horz_cu_in_ctb;
+ WORD32 had_block_size;
+
+ /* tODO: sreenivasa ctb size has to be used appropriately */
+ had_block_size = 8;
+ num_horz_cu_in_ctb = 2 * 64 / had_block_size;
+ curr_pos_x = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
+ curr_pos_y = ps_cu_analyse->b3_cu_pos_x << 3; /* pel units */
+ recon_stride = aps_recon_datastore[0]->i4_chromaRecon_stride;
+ pu1_recon_cu =
+ aps_recon_datastore[0]->apv_chroma_recon_bufs[1 + (num_tus_in_cu > 1)]; //
+
+ /* start index to index the source satd of curr cu int he current ctb*/
+ start_index = 2 * (curr_pos_x / had_block_size) +
+ (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
+
+ {
+ chrm_cod_cost += ihevce_psy_rd_cost_croma(
+ ps_ctxt->ai4_source_chroma_satd,
+ pu1_recon_cu,
+ recon_stride,
+ 1, //
+ cu_size,
+ 0, // pic type
+ 0, //layer id
+ ps_ctxt->i4_satd_lamda, // lambda
+ start_index,
+ ps_ctxt->u1_is_input_data_hbd, // 8 bit
+ ps_ctxt->u1_chroma_array_type,
+ &ps_ctxt->s_cmn_opt_func
+
+ ); // chroma subsampling 420
+ }
+ }
+
+ ps_chr_intra_satd_ctxt->i8_chroma_best_rdopt = chrm_cod_cost;
+ ps_chr_intra_satd_ctxt->i4_chrm_tu_bits = chrm_tu_bits;
+
+ memcpy(
+ &ps_chr_intra_satd_ctxt->au1_chrm_satd_updated_ctxt_models[0],
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0],
+ IHEVC_CAB_CTXT_END);
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_chroma_cu_prcs_rdopt \endif
+*
+* \brief
+* Coding unit processing function for chroma
+*
+* \param[in] ps_ctxt enc_loop module ctxt pointer
+* \param[in] rd_opt_curr_idx index in the array of RDopt params
+* \param[in] func_proc_mode TU_EQ_CU or other case
+* \param[in] pu1_chrm_src pointer to source data buffer
+* \param[in] chrm_src_stride source buffer stride
+* \param[in] pu1_cu_left pointer to left recon data buffer
+* \param[in] pu1_cu_top pointer to top recon data buffer
+* \param[in] pu1_cu_top_left pointer to top left recon data buffer
+* \param[in] left_stride left recon buffer stride
+* \param[out] cu_pos_x position x of current CU in CTB
+* \param[out] cu_pos_y position y of current CU in CTB
+* \param[out] pi4_chrm_tu_bits pointer to store the totla chroma bits
+*
+* \return
+* Chroma coding cost (cb adn Cr included)
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+LWORD64 ihevce_chroma_cu_prcs_rdopt(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ WORD32 rd_opt_curr_idx,
+ WORD32 func_proc_mode,
+ UWORD8 *pu1_chrm_src,
+ WORD32 chrm_src_stride,
+ UWORD8 *pu1_cu_left,
+ UWORD8 *pu1_cu_top,
+ UWORD8 *pu1_cu_top_left,
+ WORD32 cu_left_stride,
+ WORD32 cu_pos_x,
+ WORD32 cu_pos_y,
+ WORD32 *pi4_chrm_tu_bits,
+ WORD32 i4_alpha_stim_multiplier,
+ UWORD8 u1_is_cu_noisy)
+{
+ tu_enc_loop_out_t *ps_tu;
+ tu_enc_loop_temp_prms_t *ps_tu_temp_prms;
+
+ ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
+
+ UWORD8 *pu1_pred;
+ UWORD8 *pu1_recon;
+ WORD32 i4_recon_stride;
+ WORD32 cu_size, trans_size = 0;
+ WORD32 pred_strd;
+ WORD32 ctr, i4_subtu_idx;
+ WORD32 scan_idx;
+ WORD32 u1_is_cu_coded_old;
+ WORD32 init_bytes_offset;
+
+ enc_loop_cu_final_prms_t *ps_best_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_curr_idx];
+ recon_datastore_t *ps_recon_datastore = &ps_best_cu_prms->s_recon_datastore;
+
+ WORD32 total_bytes_offset = 0;
+ LWORD64 chrm_cod_cost = 0;
+ WORD32 chrm_tu_bits = 0;
+ WORD32 chrm_pred_mode = DM_CHROMA_IDX, luma_pred_mode = 35;
+ LWORD64 i8_ssd_cb = 0;
+ WORD32 i4_bits_cb = 0;
+ LWORD64 i8_ssd_cr = 0;
+ WORD32 i4_bits_cr = 0;
+ UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
+ UWORD8 u1_num_tus =
+ /* NumChromaTU's = 1, if TUSize = 4 and CUSize = 8 */
+ (!ps_best_cu_prms->as_tu_enc_loop[0].s_tu.b3_size && ps_best_cu_prms->u1_intra_flag)
+ ? 1
+ : ps_best_cu_prms->u2_num_tus_in_cu;
+ UWORD8 u1_num_subtus_in_tu = u1_is_422 + 1;
+ UWORD8 u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_QP_WHERE_SPATIAL_SSD_ENABLED) &&
+ (ps_ctxt->i4_quality_preset < IHEVCE_QUALITY_P3) &&
+ CONVERT_SSDS_TO_SPATIAL_DOMAIN;
+ /* Get the RDOPT cost of the best CU mode for early_exit */
+ LWORD64 prev_best_rdopt_cost = ps_ctxt->as_cu_prms[!rd_opt_curr_idx].i8_best_rdopt_cost;
+ /* Get the current running RDOPT (Luma RDOPT) for early_exit */
+ LWORD64 curr_rdopt_cost = ps_ctxt->as_cu_prms[rd_opt_curr_idx].i8_curr_rdopt_cost;
+ WORD32 i4_perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq;
+ WORD32 i4_perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh;
+
+ ihevc_intra_pred_chroma_ref_substitution_fptr =
+ ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
+
+ if(u1_is_cu_noisy || ps_ctxt->u1_enable_psyRDOPT)
+ {
+ u1_compute_spatial_ssd = (ps_ctxt->i4_cu_qp <= MAX_HEVC_QP) &&
+ CONVERT_SSDS_TO_SPATIAL_DOMAIN;
+ }
+
+ /* Store the init bytes offset from luma */
+ init_bytes_offset = ps_best_cu_prms->i4_num_bytes_ecd_data;
+
+ /* Unused pred buffer in merge_skip_pred_data_t structure is used as
+ Chroma pred storage buf. for final_recon function.
+ The buffer is split into two and used as a ping-pong buffer */
+ pu1_pred = ps_ctxt->s_cu_me_intra_pred_prms.pu1_pred_data[CU_ME_INTRA_PRED_CHROMA_IDX] +
+ rd_opt_curr_idx * ((MAX_CTB_SIZE * MAX_CTB_SIZE >> 1) +
+ (u1_is_422 * (MAX_CTB_SIZE * MAX_CTB_SIZE >> 1)));
+
+ pred_strd = ps_ctxt->s_cu_me_intra_pred_prms.ai4_pred_data_stride[CU_ME_INTRA_PRED_CHROMA_IDX];
+
+ pu1_recon = (UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs[0];
+ i4_recon_stride = ps_recon_datastore->i4_chromaRecon_stride;
+ cu_size = ps_best_cu_prms->u1_cu_size;
+ chrm_tu_bits = 0;
+
+ /* get the first TU pointer */
+ ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
+ /* get the first TU enc_loop temp prms pointer */
+ ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
+
+ if(PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag)
+ {
+ /* Mode signalled by intra prediction for luma */
+ luma_pred_mode = ps_best_cu_prms->au1_intra_pred_mode[0];
+
+#if DISABLE_RDOQ_INTRA
+ i4_perform_rdoq = 0;
+#endif
+ }
+
+ else
+ {
+ UWORD8 *pu1_pred_org = pu1_pred;
+
+ /* ------ Motion Compensation for Chroma -------- */
+ for(ctr = 0; ctr < ps_best_cu_prms->u2_num_pus_in_cu; ctr++)
+ {
+ pu_t *ps_pu;
+ WORD32 inter_pu_wd;
+ WORD32 inter_pu_ht;
+
+ ps_pu = &ps_best_cu_prms->as_pu_chrm_proc[ctr];
+
+ inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
+ inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
+ inter_pu_ht <<= u1_is_422;
+
+ ihevce_chroma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_strd);
+
+ if(2 == ps_best_cu_prms->u2_num_pus_in_cu)
+ {
+ /* 2Nx__ partion case */
+ if(inter_pu_wd == cu_size)
+ {
+ pu1_pred += (inter_pu_ht * pred_strd);
+ }
+
+ /* __x2N partion case */
+ if(inter_pu_ht == (cu_size >> (u1_is_422 == 0)))
+ {
+ pu1_pred += inter_pu_wd;
+ }
+ }
+ }
+
+ /* restore the pred pointer to start for transform loop */
+ pu1_pred = pu1_pred_org;
+ }
+
+ /* Used to store back only the luma based info. if SATD based chorma
+ mode also comes */
+ u1_is_cu_coded_old = ps_best_cu_prms->u1_is_cu_coded;
+
+ /* evaluate chroma candidates (same as luma) and
+ if INTRA & HIGH_QUALITY compare with best SATD mode */
+ {
+ WORD32 calc_recon = 0, deq_data_strd;
+ WORD16 *pi2_deq_data;
+ UWORD8 *pu1_ecd_data;
+ UWORD8 u1_is_mode_eq_chroma_satd_mode = 0;
+
+ pi2_deq_data = &ps_best_cu_prms->pi2_cu_deq_coeffs[0];
+ pi2_deq_data += ps_best_cu_prms->i4_chrm_deq_coeff_strt_idx;
+ deq_data_strd = cu_size;
+ /* update ecd buffer for storing coeff. */
+ pu1_ecd_data = &ps_best_cu_prms->pu1_cu_coeffs[0];
+ pu1_ecd_data += init_bytes_offset;
+ /* store chroma starting index */
+ ps_best_cu_prms->i4_chrm_cu_coeff_strt_idx = init_bytes_offset;
+
+ /* get the first TU pointer */
+ ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
+ ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
+
+ /* Reset total_bytes_offset for each candidate */
+ chrm_pred_mode = (u1_is_422 == 1) ? gau1_chroma422_intra_angle_mapping[luma_pred_mode]
+ : luma_pred_mode;
+
+ total_bytes_offset = 0;
+
+ if(TU_EQ_SUBCU == func_proc_mode)
+ {
+ func_proc_mode = TU_EQ_CU_DIV2;
+ }
+
+ /* For cu_size=8 case, chroma cost will be same for TU_EQ_CU and
+ TU_EQ_CU_DIV2 and TU_EQ_SUBCU case */
+ if(8 == cu_size)
+ {
+ func_proc_mode = TU_EQ_CU;
+ }
+
+ /* loop based on num tus in a cu */
+ if(!ps_best_cu_prms->u1_intra_flag || !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd ||
+ (ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd &&
+ (chrm_pred_mode !=
+ ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode].u1_best_cr_mode)))
+ {
+ /* loop based on num tus in a cu */
+ for(ctr = 0; ctr < u1_num_tus; ctr++)
+ {
+ WORD32 num_bytes = 0;
+ LWORD64 curr_cb_cod_cost = 0;
+ LWORD64 curr_cr_cod_cost = 0;
+ WORD32 chrm_pred_func_idx = 0;
+ UWORD8 u1_is_early_exit_condition_satisfied = 0;
+
+ /* Default cb and cr offset initializatio for b3_chroma_intra_mode_idx=7 */
+ /* FIX for TU tree shrinkage caused by ecd data copies in final mode recon */
+ ps_tu->s_tu.b1_cb_cbf = ps_tu->s_tu.b1_cr_cbf = 0;
+ ps_tu->s_tu.b1_cb_cbf_subtu1 = ps_tu->s_tu.b1_cr_cbf_subtu1 = 0;
+ ps_tu->ai4_cb_coeff_offset[0] = total_bytes_offset + init_bytes_offset;
+ ps_tu->ai4_cr_coeff_offset[0] = total_bytes_offset + init_bytes_offset;
+ ps_tu->ai4_cb_coeff_offset[1] = total_bytes_offset + init_bytes_offset;
+ ps_tu->ai4_cr_coeff_offset[1] = total_bytes_offset + init_bytes_offset;
+ ps_tu_temp_prms->ai2_cb_bytes_consumed[0] = 0;
+ ps_tu_temp_prms->ai2_cr_bytes_consumed[0] = 0;
+ ps_tu_temp_prms->ai2_cb_bytes_consumed[1] = 0;
+ ps_tu_temp_prms->ai2_cr_bytes_consumed[1] = 0;
+
+ /* TU level inits */
+ /* check if chroma present flag is set */
+ if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
+ {
+ /* RDOPT copy States : TU init (best until prev TU) to current */
+ COPY_CABAC_STATES(
+ &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_curr_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0],
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0],
+ IHEVC_CAB_CTXT_END);
+
+ /* get the current transform size */
+ trans_size = ps_tu->s_tu.b3_size;
+ trans_size = (1 << (trans_size + 1)); /* in chroma units */
+
+ /* since 2x2 transform is not allowed for chroma*/
+ if(2 == trans_size)
+ {
+ trans_size = 4;
+ }
+ }
+
+ for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus_in_tu; i4_subtu_idx++)
+ {
+ WORD32 cbf;
+ UWORD8 u1_is_recon_available;
+
+ WORD32 nbr_flags = 0;
+ WORD32 zero_cols = 0;
+ WORD32 zero_rows = 0;
+
+ /* check if chroma present flag is set */
+ if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
+ {
+ UWORD8 *pu1_cur_pred;
+ UWORD8 *pu1_cur_recon;
+ UWORD8 *pu1_cur_src;
+ WORD16 *pi2_cur_deq_data;
+ WORD32 curr_pos_x, curr_pos_y;
+ LWORD64 trans_ssd_u, trans_ssd_v;
+
+ /* get the current sub-tu posx and posy w.r.t to cu */
+ curr_pos_x = (ps_tu->s_tu.b4_pos_x << 2) - (cu_pos_x << 3);
+ curr_pos_y = (ps_tu->s_tu.b4_pos_y << 2) - (cu_pos_y << 3) +
+ (i4_subtu_idx * trans_size);
+
+ /* 420sp case only vertical height will be half */
+ if(u1_is_422 == 0)
+ {
+ curr_pos_y >>= 1;
+ }
+
+ /* increment the pointers to start of current Sub-TU */
+ pu1_cur_recon = (pu1_recon + curr_pos_x);
+ pu1_cur_recon += (curr_pos_y * i4_recon_stride);
+ pu1_cur_src = (pu1_chrm_src + curr_pos_x);
+ pu1_cur_src += (curr_pos_y * chrm_src_stride);
+ pu1_cur_pred = (pu1_pred + curr_pos_x);
+ pu1_cur_pred += (curr_pos_y * pred_strd);
+ pi2_cur_deq_data = pi2_deq_data + curr_pos_x;
+ pi2_cur_deq_data += (curr_pos_y * deq_data_strd);
+
+ /* populate the coeffs scan idx */
+ scan_idx = SCAN_DIAG_UPRIGHT;
+
+ /* perform intra prediction only for Intra case */
+ if(PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag)
+ {
+ UWORD8 *pu1_top_left;
+ UWORD8 *pu1_top;
+ UWORD8 *pu1_left;
+ WORD32 left_strd;
+
+ calc_recon = !u1_compute_spatial_ssd &&
+ ((4 == u1_num_tus) || (u1_is_422 == 1)) &&
+ (((u1_num_tus == 1) && (0 == i4_subtu_idx)) ||
+ ((ctr == 3) && (0 == i4_subtu_idx) && (u1_is_422 == 1)) ||
+ ((u1_num_tus == 4) && (ctr < 3)));
+
+ /* left cu boundary */
+ if(0 == curr_pos_x)
+ {
+ pu1_left = pu1_cu_left + curr_pos_y * cu_left_stride;
+ left_strd = cu_left_stride;
+ }
+ else
+ {
+ pu1_left = pu1_cur_recon - 2;
+ left_strd = i4_recon_stride;
+ }
+
+ /* top cu boundary */
+ if(0 == curr_pos_y)
+ {
+ pu1_top = pu1_cu_top + curr_pos_x;
+ }
+ else
+ {
+ pu1_top = pu1_cur_recon - i4_recon_stride;
+ }
+
+ /* by default top left is set to cu top left */
+ pu1_top_left = pu1_cu_top_left;
+
+ /* top left based on position */
+ if((0 != curr_pos_y) && (0 == curr_pos_x))
+ {
+ pu1_top_left = pu1_left - cu_left_stride;
+ }
+ else if(0 != curr_pos_x)
+ {
+ pu1_top_left = pu1_top - 2;
+ }
+
+ /* for 4x4 transforms based on intra pred mode scan is choosen*/
+ if(4 == trans_size)
+ {
+ /* for modes from 22 upto 30 horizontal scan is used */
+ if((chrm_pred_mode > 21) && (chrm_pred_mode < 31))
+ {
+ scan_idx = SCAN_HORZ;
+ }
+ /* for modes from 6 upto 14 horizontal scan is used */
+ else if((chrm_pred_mode > 5) && (chrm_pred_mode < 15))
+ {
+ scan_idx = SCAN_VERT;
+ }
+ }
+
+ nbr_flags = ihevce_get_intra_chroma_tu_nbr(
+ ps_best_cu_prms->au4_nbr_flags[ctr],
+ i4_subtu_idx,
+ trans_size,
+ u1_is_422);
+
+ /* call the chroma reference array substitution */
+ ihevc_intra_pred_chroma_ref_substitution_fptr(
+ pu1_top_left,
+ pu1_top,
+ pu1_left,
+ left_strd,
+ trans_size,
+ nbr_flags,
+ (UWORD8 *)ps_ctxt->pv_ref_sub_out,
+ 1);
+
+ /* use the look up to get the function idx */
+ chrm_pred_func_idx = g_i4_ip_funcs[chrm_pred_mode];
+
+ /* call the intra prediction function */
+ ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
+ (UWORD8 *)ps_ctxt->pv_ref_sub_out,
+ 1,
+ pu1_cur_pred,
+ pred_strd,
+ trans_size,
+ chrm_pred_mode);
+ }
+
+ if(!ctr && !i4_subtu_idx && (u1_compute_spatial_ssd || calc_recon))
+ {
+ ps_recon_datastore->au1_is_chromaRecon_available[0] =
+ !ps_best_cu_prms->u1_skip_flag;
+ }
+ else if(!ctr && !i4_subtu_idx)
+ {
+ ps_recon_datastore->au1_is_chromaRecon_available[0] = 0;
+ }
+ /************************************************************/
+ /* recon loop is done for all cases including skip cu */
+ /* This is because skipping chroma reisdual based on luma */
+ /* skip decision can lead to chroma artifacts */
+ /************************************************************/
+ /************************************************************/
+ /*In the high quality and medium speed modes, wherein chroma*/
+ /*and luma costs are included in the total cost calculation */
+ /*the cost is just a ssd cost, and not that obtained through*/
+ /*iq_it path */
+ /************************************************************/
+ if(ps_best_cu_prms->u1_skip_flag == 0)
+ {
+ WORD32 tu_bits;
+
+ cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
+ ps_ctxt,
+ pu1_cur_pred,
+ pred_strd,
+ pu1_cur_src,
+ chrm_src_stride,
+ pi2_cur_deq_data,
+ deq_data_strd,
+ pu1_cur_recon,
+ i4_recon_stride,
+ pu1_ecd_data + total_bytes_offset,
+ ps_ctxt->au1_cu_csbf,
+ ps_ctxt->i4_cu_csbf_strd,
+ trans_size,
+ scan_idx,
+ PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag,
+ &num_bytes,
+ &tu_bits,
+ &zero_cols,
+ &zero_rows,
+ &u1_is_recon_available,
+ i4_perform_sbh,
+ i4_perform_rdoq,
+ &trans_ssd_u,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ i4_alpha_stim_multiplier,
+ u1_is_cu_noisy,
+#endif
+ ps_best_cu_prms->u1_skip_flag,
+ u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
+ U_PLANE);
+
+ if(u1_compute_spatial_ssd && u1_is_recon_available)
+ {
+ ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
+ [i4_subtu_idx] = 0;
+ }
+ else
+ {
+ ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
+ [i4_subtu_idx] = UCHAR_MAX;
+ }
+
+#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
+ {
+#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
+ trans_ssd_u = ihevce_inject_stim_into_distortion(
+ pu1_cur_src,
+ chrm_src_stride,
+ pu1_cur_pred,
+ pred_strd,
+ trans_ssd_u,
+ i4_alpha_stim_multiplier,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ U_PLANE);
+#else
+ if(u1_compute_spatial_ssd && u1_is_recon_available)
+ {
+ trans_ssd_u = ihevce_inject_stim_into_distortion(
+ pu1_cur_src,
+ chrm_src_stride,
+ pu1_cur_recon,
+ i4_recon_stride,
+ trans_ssd_u,
+ i4_alpha_stim_multiplier,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ U_PLANE);
+ }
+ else
+ {
+ trans_ssd_u = ihevce_inject_stim_into_distortion(
+ pu1_cur_src,
+ chrm_src_stride,
+ pu1_cur_pred,
+ pred_strd,
+ trans_ssd_u,
+ i4_alpha_stim_multiplier,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ U_PLANE);
+ }
+#endif
+ }
+#endif
+
+ curr_cb_cod_cost =
+ trans_ssd_u +
+ COMPUTE_RATE_COST_CLIP30(
+ tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
+
+ chrm_tu_bits += tu_bits;
+ i4_bits_cb += tu_bits;
+
+ /* RDOPT copy States : New updated after curr TU to TU init */
+ if(0 != cbf)
+ {
+ COPY_CABAC_STATES(
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0],
+ &ps_ctxt->s_rdopt_entropy_ctxt
+ .as_cu_entropy_ctxt[rd_opt_curr_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0],
+ IHEVC_CAB_CTXT_END);
+ }
+ /* RDOPT copy States : Restoring back the Cb init state to Cr */
+ else
+ {
+ COPY_CABAC_STATES(
+ &ps_ctxt->s_rdopt_entropy_ctxt
+ .as_cu_entropy_ctxt[rd_opt_curr_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0],
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0],
+ IHEVC_CAB_CTXT_END);
+ }
+
+ /* If Intra and TU=CU/2, need recon for next TUs */
+ if(calc_recon)
+ {
+ ihevce_chroma_it_recon_fxn(
+ ps_ctxt,
+ pi2_cur_deq_data,
+ deq_data_strd,
+ pu1_cur_pred,
+ pred_strd,
+ pu1_cur_recon,
+ i4_recon_stride,
+ (pu1_ecd_data + total_bytes_offset),
+ trans_size,
+ cbf,
+ zero_cols,
+ zero_rows,
+ U_PLANE);
+
+ ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
+ [i4_subtu_idx] = 0;
+ }
+ else
+ {
+ ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
+ [i4_subtu_idx] = UCHAR_MAX;
+ }
+ }
+ else
+ {
+ /* num bytes is set to 0 */
+ num_bytes = 0;
+
+ /* cbf is returned as 0 */
+ cbf = 0;
+
+ curr_cb_cod_cost = trans_ssd_u =
+
+ ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
+ pu1_cur_pred,
+ pu1_cur_src,
+ pred_strd,
+ chrm_src_stride,
+ trans_size,
+ trans_size);
+
+ if(u1_compute_spatial_ssd)
+ {
+ /* buffer copy fromp pred to recon */
+
+ ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
+ pu1_cur_pred,
+ pred_strd,
+ pu1_cur_recon,
+ i4_recon_stride,
+ trans_size,
+ trans_size,
+ U_PLANE);
+
+ ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
+ [i4_subtu_idx] = 0;
+ }
+
+ if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
+ {
+ trans_ssd_u = ihevce_inject_stim_into_distortion(
+ pu1_cur_src,
+ chrm_src_stride,
+ pu1_cur_pred,
+ pred_strd,
+ trans_ssd_u,
+ i4_alpha_stim_multiplier,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ U_PLANE);
+ }
+
+#if ENABLE_INTER_ZCU_COST
+#if !WEIGH_CHROMA_COST
+ /* cbf = 0, accumulate cu not coded cost */
+ ps_ctxt->i8_cu_not_coded_cost += curr_cb_cod_cost;
+#else
+ /* cbf = 0, accumulate cu not coded cost */
+
+ ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
+ (curr_cb_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
+ (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
+ CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
+#endif
+#endif
+ }
+
+#if !WEIGH_CHROMA_COST
+ curr_rdopt_cost += curr_cb_cod_cost;
+#else
+ curr_rdopt_cost +=
+ ((curr_cb_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
+ (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
+ CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
+#endif
+ chrm_cod_cost += curr_cb_cod_cost;
+ i8_ssd_cb += trans_ssd_u;
+
+ if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
+ {
+ /* Early exit : If the current running cost exceeds
+ the prev. best mode cost, break */
+ if(curr_rdopt_cost > prev_best_rdopt_cost)
+ {
+ u1_is_early_exit_condition_satisfied = 1;
+ break;
+ }
+ }
+
+ /* inter cu is coded if any of the tu is coded in it */
+ ps_best_cu_prms->u1_is_cu_coded |= cbf;
+
+ /* update CB related params */
+ ps_tu->ai4_cb_coeff_offset[i4_subtu_idx] =
+ total_bytes_offset + init_bytes_offset;
+
+ if(0 == i4_subtu_idx)
+ {
+ ps_tu->s_tu.b1_cb_cbf = cbf;
+ }
+ else
+ {
+ ps_tu->s_tu.b1_cb_cbf_subtu1 = cbf;
+ }
+
+ total_bytes_offset += num_bytes;
+
+ ps_tu_temp_prms->au4_cb_zero_col[i4_subtu_idx] = zero_cols;
+ ps_tu_temp_prms->au4_cb_zero_row[i4_subtu_idx] = zero_rows;
+ ps_tu_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx] = num_bytes;
+
+ /* recon loop is done for non skip cases */
+ if(ps_best_cu_prms->u1_skip_flag == 0)
+ {
+ WORD32 tu_bits;
+
+ cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
+ ps_ctxt,
+ pu1_cur_pred,
+ pred_strd,
+ pu1_cur_src,
+ chrm_src_stride,
+ pi2_cur_deq_data + trans_size,
+ deq_data_strd,
+ pu1_cur_recon,
+ i4_recon_stride,
+ pu1_ecd_data + total_bytes_offset,
+ ps_ctxt->au1_cu_csbf,
+ ps_ctxt->i4_cu_csbf_strd,
+ trans_size,
+ scan_idx,
+ PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag,
+ &num_bytes,
+ &tu_bits,
+ &zero_cols,
+ &zero_rows,
+ &u1_is_recon_available,
+ i4_perform_sbh,
+ i4_perform_rdoq,
+ &trans_ssd_v,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ i4_alpha_stim_multiplier,
+ u1_is_cu_noisy,
+#endif
+ ps_best_cu_prms->u1_skip_flag,
+ u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
+ V_PLANE);
+
+ if(u1_compute_spatial_ssd && u1_is_recon_available)
+ {
+ ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
+ [i4_subtu_idx] = 0;
+ }
+ else
+ {
+ ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
+ [i4_subtu_idx] = UCHAR_MAX;
+ }
+
+#if !USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
+ {
+#if !USE_RECON_TO_EVALUATE_STIM_IN_RDOPT
+ trans_ssd_v = ihevce_inject_stim_into_distortion(
+ pu1_cur_src,
+ chrm_src_stride,
+ pu1_cur_pred,
+ pred_strd,
+ trans_ssd_v,
+ i4_alpha_stim_multiplier,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ V_PLANE);
+#else
+ if(u1_compute_spatial_ssd && u1_is_recon_available)
+ {
+ trans_ssd_v = ihevce_inject_stim_into_distortion(
+ pu1_cur_src,
+ chrm_src_stride,
+ pu1_cur_recon,
+ i4_recon_stride,
+ trans_ssd_v,
+ i4_alpha_stim_multiplier,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ V_PLANE);
+ }
+ else
+ {
+ trans_ssd_v = ihevce_inject_stim_into_distortion(
+ pu1_cur_src,
+ chrm_src_stride,
+ pu1_cur_pred,
+ pred_strd,
+ trans_ssd_v,
+ i4_alpha_stim_multiplier,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ V_PLANE);
+ }
+#endif
+ }
+#endif
+
+ curr_cr_cod_cost =
+ trans_ssd_v +
+ COMPUTE_RATE_COST_CLIP30(
+ tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
+ chrm_tu_bits += tu_bits;
+ i4_bits_cr += tu_bits;
+
+ /* RDOPT copy States : New updated after curr TU to TU init */
+ if(0 != cbf)
+ {
+ COPY_CABAC_STATES(
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0],
+ &ps_ctxt->s_rdopt_entropy_ctxt
+ .as_cu_entropy_ctxt[rd_opt_curr_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0],
+ IHEVC_CAB_CTXT_END);
+ }
+ /* RDOPT copy States : Restoring back the Cb init state to Cr */
+ else
+ {
+ COPY_CABAC_STATES(
+ &ps_ctxt->s_rdopt_entropy_ctxt
+ .as_cu_entropy_ctxt[rd_opt_curr_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0],
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0],
+ IHEVC_CAB_CTXT_END);
+ }
+
+ /* If Intra and TU=CU/2, need recon for next TUs */
+ if(calc_recon)
+ {
+ ihevce_chroma_it_recon_fxn(
+ ps_ctxt,
+ (pi2_cur_deq_data + trans_size),
+ deq_data_strd,
+ pu1_cur_pred,
+ pred_strd,
+ pu1_cur_recon,
+ i4_recon_stride,
+ (pu1_ecd_data + total_bytes_offset),
+ trans_size,
+ cbf,
+ zero_cols,
+ zero_rows,
+ V_PLANE);
+
+ ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
+ [i4_subtu_idx] = 0;
+ }
+ else
+ {
+ ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
+ [i4_subtu_idx] = UCHAR_MAX;
+ }
+ }
+ else
+ {
+ /* num bytes is set to 0 */
+ num_bytes = 0;
+
+ /* cbf is returned as 0 */
+ cbf = 0;
+
+ curr_cr_cod_cost = trans_ssd_v =
+
+ ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
+ pu1_cur_pred + 1,
+ pu1_cur_src + 1,
+ pred_strd,
+ chrm_src_stride,
+ trans_size,
+ trans_size);
+
+ if(u1_compute_spatial_ssd)
+ {
+ /* buffer copy fromp pred to recon */
+ ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
+ pu1_cur_pred,
+ pred_strd,
+ pu1_cur_recon,
+ i4_recon_stride,
+ trans_size,
+ trans_size,
+ V_PLANE);
+
+ ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
+ [i4_subtu_idx] = 0;
+ }
+
+ if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
+ {
+ trans_ssd_v = ihevce_inject_stim_into_distortion(
+ pu1_cur_src,
+ chrm_src_stride,
+ pu1_cur_pred,
+ pred_strd,
+ trans_ssd_v,
+ i4_alpha_stim_multiplier,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ V_PLANE);
+ }
+
+#if ENABLE_INTER_ZCU_COST
+#if !WEIGH_CHROMA_COST
+ /* cbf = 0, accumulate cu not coded cost */
+ ps_ctxt->i8_cu_not_coded_cost += curr_cr_cod_cost;
+#else
+ /* cbf = 0, accumulate cu not coded cost */
+
+ ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
+ (curr_cr_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
+ (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
+ CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
+#endif
+#endif
+ }
+
+#if !WEIGH_CHROMA_COST
+ curr_rdopt_cost += curr_cr_cod_cost;
+#else
+ curr_rdopt_cost +=
+ ((curr_cr_cod_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
+ (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
+ CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
+#endif
+
+ chrm_cod_cost += curr_cr_cod_cost;
+ i8_ssd_cr += trans_ssd_v;
+
+ if(ps_ctxt->i4_bitrate_instance_num || ps_ctxt->i4_num_bitrates == 1)
+ {
+ /* Early exit : If the current running cost exceeds
+ the prev. best mode cost, break */
+ if(curr_rdopt_cost > prev_best_rdopt_cost)
+ {
+ u1_is_early_exit_condition_satisfied = 1;
+ break;
+ }
+ }
+
+ /* inter cu is coded if any of the tu is coded in it */
+ ps_best_cu_prms->u1_is_cu_coded |= cbf;
+
+ /* update CR related params */
+ ps_tu->ai4_cr_coeff_offset[i4_subtu_idx] =
+ total_bytes_offset + init_bytes_offset;
+
+ if(0 == i4_subtu_idx)
+ {
+ ps_tu->s_tu.b1_cr_cbf = cbf;
+ }
+ else
+ {
+ ps_tu->s_tu.b1_cr_cbf_subtu1 = cbf;
+ }
+
+ total_bytes_offset += num_bytes;
+
+ ps_tu_temp_prms->au4_cr_zero_col[i4_subtu_idx] = zero_cols;
+ ps_tu_temp_prms->au4_cr_zero_row[i4_subtu_idx] = zero_rows;
+ ps_tu_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx] = num_bytes;
+ }
+ else
+ {
+ ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx] =
+ UCHAR_MAX;
+ ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx] =
+ UCHAR_MAX;
+ }
+ }
+
+ if(u1_is_early_exit_condition_satisfied)
+ {
+ break;
+ }
+
+ /* loop increments */
+ ps_tu++;
+ ps_tu_temp_prms++;
+ }
+
+ /* Signal as luma mode. HIGH_QUALITY may update it */
+ ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
+
+ /* modify the cost chrm_cod_cost */
+ if(ps_ctxt->u1_enable_psyRDOPT)
+ {
+ UWORD8 *pu1_recon_cu;
+ WORD32 recon_stride;
+ WORD32 curr_pos_x;
+ WORD32 curr_pos_y;
+ WORD32 start_index;
+ WORD32 num_horz_cu_in_ctb;
+ WORD32 had_block_size;
+ /* tODO: sreenivasa ctb size has to be used appropriately */
+ had_block_size = 8;
+ num_horz_cu_in_ctb = 2 * 64 / had_block_size;
+
+ curr_pos_x = cu_pos_x << 3; /* pel units */
+ curr_pos_y = cu_pos_y << 3; /* pel units */
+ recon_stride = i4_recon_stride;
+ pu1_recon_cu = pu1_recon;
+
+ /* start index to index the source satd of curr cu int he current ctb*/
+ start_index = 2 * (curr_pos_x / had_block_size) +
+ (curr_pos_y / had_block_size) * num_horz_cu_in_ctb;
+
+ {
+ chrm_cod_cost += ihevce_psy_rd_cost_croma(
+ ps_ctxt->ai4_source_chroma_satd,
+ pu1_recon,
+ recon_stride,
+ 1, //
+ cu_size,
+ 0, // pic type
+ 0, //layer id
+ ps_ctxt->i4_satd_lamda, // lambda
+ start_index,
+ ps_ctxt->u1_is_input_data_hbd, // 8 bit
+ ps_ctxt->u1_chroma_array_type,
+ &ps_ctxt->s_cmn_opt_func
+
+ ); // chroma subsampling 420
+ }
+ }
+ }
+ else
+ {
+ u1_is_mode_eq_chroma_satd_mode = 1;
+ chrm_cod_cost = MAX_COST_64;
+ }
+
+ /* If Intra Block and preset is HIGH QUALITY, then compare with best SATD mode */
+ if((PRED_MODE_INTRA == ps_best_cu_prms->u1_intra_flag) &&
+ (1 == ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_satd))
+ {
+ if(64 == cu_size)
+ {
+ ASSERT(TU_EQ_CU != func_proc_mode);
+ }
+
+ if(ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode]
+ .i8_chroma_best_rdopt < chrm_cod_cost)
+ {
+ UWORD8 *pu1_src;
+ UWORD8 *pu1_ecd_data_src_cb;
+ UWORD8 *pu1_ecd_data_src_cr;
+
+ chroma_intra_satd_ctxt_t *ps_chr_intra_satd_ctxt =
+ &ps_ctxt->s_chroma_rdopt_ctxt.as_chr_intra_satd_ctxt[func_proc_mode];
+
+ UWORD8 *pu1_dst = &ps_ctxt->au1_rdopt_init_ctxt_models[0];
+ WORD32 ai4_ecd_data_cb_offset[2] = { 0, 0 };
+ WORD32 ai4_ecd_data_cr_offset[2] = { 0, 0 };
+
+ pu1_src = &ps_chr_intra_satd_ctxt->au1_chrm_satd_updated_ctxt_models[0];
+ chrm_cod_cost = ps_chr_intra_satd_ctxt->i8_chroma_best_rdopt;
+ chrm_pred_mode = ps_chr_intra_satd_ctxt->u1_best_cr_mode;
+ chrm_tu_bits = ps_chr_intra_satd_ctxt->i4_chrm_tu_bits;
+
+ if(u1_is_mode_eq_chroma_satd_mode)
+ {
+ chrm_cod_cost -= ps_chr_intra_satd_ctxt->i8_cost_to_encode_chroma_mode;
+ }
+
+ /*Resetting total_num_bytes_to 0*/
+ total_bytes_offset = 0;
+
+ /* Update the CABAC state corresponding to chroma only */
+ /* Chroma Cbf */
+ memcpy(pu1_dst + IHEVC_CAB_CBCR_IDX, pu1_src + IHEVC_CAB_CBCR_IDX, 2);
+ /* Chroma transform skip */
+ memcpy(pu1_dst + IHEVC_CAB_TFM_SKIP12, pu1_src + IHEVC_CAB_TFM_SKIP12, 1);
+ /* Chroma last coeff x prefix */
+ memcpy(
+ pu1_dst + IHEVC_CAB_COEFFX_PREFIX + 15,
+ pu1_src + IHEVC_CAB_COEFFX_PREFIX + 15,
+ 3);
+ /* Chroma last coeff y prefix */
+ memcpy(
+ pu1_dst + IHEVC_CAB_COEFFY_PREFIX + 15,
+ pu1_src + IHEVC_CAB_COEFFY_PREFIX + 15,
+ 3);
+ /* Chroma csbf */
+ memcpy(
+ pu1_dst + IHEVC_CAB_CODED_SUBLK_IDX + 2,
+ pu1_src + IHEVC_CAB_CODED_SUBLK_IDX + 2,
+ 2);
+ /* Chroma sig coeff flags */
+ memcpy(
+ pu1_dst + IHEVC_CAB_COEFF_FLAG + 27, pu1_src + IHEVC_CAB_COEFF_FLAG + 27, 15);
+ /* Chroma absgt1 flags */
+ memcpy(
+ pu1_dst + IHEVC_CAB_COEFABS_GRTR1_FLAG + 16,
+ pu1_src + IHEVC_CAB_COEFABS_GRTR1_FLAG + 16,
+ 8);
+ /* Chroma absgt2 flags */
+ memcpy(
+ pu1_dst + IHEVC_CAB_COEFABS_GRTR2_FLAG + 4,
+ pu1_src + IHEVC_CAB_COEFABS_GRTR2_FLAG + 4,
+ 2);
+
+ ps_tu = &ps_best_cu_prms->as_tu_enc_loop[0];
+ ps_tu_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
+
+ /* update to luma decision as we update chroma in final mode */
+ ps_best_cu_prms->u1_is_cu_coded = u1_is_cu_coded_old;
+
+ for(ctr = 0; ctr < u1_num_tus; ctr++)
+ {
+ for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus_in_tu; i4_subtu_idx++)
+ {
+ WORD32 cbf;
+ WORD32 num_bytes;
+
+ pu1_ecd_data_src_cb =
+ &ps_chr_intra_satd_ctxt->au1_scan_coeff_cb[i4_subtu_idx][0];
+ pu1_ecd_data_src_cr =
+ &ps_chr_intra_satd_ctxt->au1_scan_coeff_cr[i4_subtu_idx][0];
+
+ /* check if chroma present flag is set */
+ if(1 == ps_tu->s_tu.b3_chroma_intra_mode_idx)
+ {
+ UWORD8 *pu1_cur_pred_dest;
+ UWORD8 *pu1_cur_pred_src;
+ WORD32 pred_src_strd;
+ WORD16 *pi2_cur_deq_data_dest;
+ WORD16 *pi2_cur_deq_data_src_cb;
+ WORD16 *pi2_cur_deq_data_src_cr;
+ WORD32 deq_src_strd;
+
+ WORD32 curr_pos_x, curr_pos_y;
+
+ trans_size = ps_tu->s_tu.b3_size;
+ trans_size = (1 << (trans_size + 1)); /* in chroma units */
+
+ /*Deriving stride values*/
+ pred_src_strd = ps_chr_intra_satd_ctxt->i4_pred_stride;
+ deq_src_strd = ps_chr_intra_satd_ctxt->i4_iq_buff_stride;
+
+ /* since 2x2 transform is not allowed for chroma*/
+ if(2 == trans_size)
+ {
+ trans_size = 4;
+ }
+
+ /* get the current tu posx and posy w.r.t to cu */
+ curr_pos_x = (ps_tu->s_tu.b4_pos_x << 2) - (cu_pos_x << 3);
+ curr_pos_y = (ps_tu->s_tu.b4_pos_y << 2) - (cu_pos_y << 3) +
+ (i4_subtu_idx * trans_size);
+
+ /* 420sp case only vertical height will be half */
+ if(0 == u1_is_422)
+ {
+ curr_pos_y >>= 1;
+ }
+
+ /* increment the pointers to start of current TU */
+ pu1_cur_pred_src =
+ ((UWORD8 *)ps_chr_intra_satd_ctxt->pv_pred_data + curr_pos_x);
+ pu1_cur_pred_src += (curr_pos_y * pred_src_strd);
+ pu1_cur_pred_dest = (pu1_pred + curr_pos_x);
+ pu1_cur_pred_dest += (curr_pos_y * pred_strd);
+
+ pi2_cur_deq_data_src_cb =
+ &ps_chr_intra_satd_ctxt->ai2_iq_data_cb[0] + (curr_pos_x >> 1);
+ pi2_cur_deq_data_src_cr =
+ &ps_chr_intra_satd_ctxt->ai2_iq_data_cr[0] + (curr_pos_x >> 1);
+ pi2_cur_deq_data_src_cb += (curr_pos_y * deq_src_strd);
+ pi2_cur_deq_data_src_cr += (curr_pos_y * deq_src_strd);
+ pi2_cur_deq_data_dest = pi2_deq_data + curr_pos_x;
+ pi2_cur_deq_data_dest += (curr_pos_y * deq_data_strd);
+
+ /*Overwriting deq data with that belonging to the winning special mode
+ (luma mode != chroma mode)
+ ihevce_copy_2d takes source and dest arguments as UWORD8 *. We have to
+ correspondingly manipulate to copy WORD16 data*/
+
+ ps_ctxt->s_cmn_opt_func.pf_copy_2d(
+ (UWORD8 *)pi2_cur_deq_data_dest,
+ (deq_data_strd << 1),
+ (UWORD8 *)pi2_cur_deq_data_src_cb,
+ (deq_src_strd << 1),
+ (trans_size << 1),
+ trans_size);
+
+ ps_ctxt->s_cmn_opt_func.pf_copy_2d(
+ (UWORD8 *)(pi2_cur_deq_data_dest + trans_size),
+ (deq_data_strd << 1),
+ (UWORD8 *)pi2_cur_deq_data_src_cr,
+ (deq_src_strd << 1),
+ (trans_size << 1),
+ trans_size);
+
+ /*Overwriting pred data with that belonging to the winning special mode
+ (luma mode != chroma mode)*/
+
+ ps_ctxt->s_cmn_opt_func.pf_copy_2d(
+ pu1_cur_pred_dest,
+ pred_strd,
+ pu1_cur_pred_src,
+ pred_src_strd,
+ (trans_size << 1),
+ trans_size);
+
+ num_bytes = ps_chr_intra_satd_ctxt
+ ->ai4_num_bytes_scan_coeff_cb_per_tu[i4_subtu_idx][ctr];
+ cbf = ps_chr_intra_satd_ctxt->au1_cbf_cb[i4_subtu_idx][ctr];
+ /* inter cu is coded if any of the tu is coded in it */
+ ps_best_cu_prms->u1_is_cu_coded |= cbf;
+
+ /* update CB related params */
+ ps_tu->ai4_cb_coeff_offset[i4_subtu_idx] =
+ total_bytes_offset + init_bytes_offset;
+
+ if(0 == i4_subtu_idx)
+ {
+ ps_tu->s_tu.b1_cb_cbf = cbf;
+ }
+ else
+ {
+ ps_tu->s_tu.b1_cb_cbf_subtu1 = cbf;
+ }
+
+ /*Overwriting the cb ecd data corresponding to the special mode*/
+ if(0 != num_bytes)
+ {
+ memcpy(
+ (pu1_ecd_data + total_bytes_offset),
+ pu1_ecd_data_src_cb + ai4_ecd_data_cb_offset[i4_subtu_idx],
+ num_bytes);
+ }
+
+ total_bytes_offset += num_bytes;
+ ai4_ecd_data_cb_offset[i4_subtu_idx] += num_bytes;
+ ps_tu_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx] = num_bytes;
+
+ num_bytes = ps_chr_intra_satd_ctxt
+ ->ai4_num_bytes_scan_coeff_cr_per_tu[i4_subtu_idx][ctr];
+ cbf = ps_chr_intra_satd_ctxt->au1_cbf_cr[i4_subtu_idx][ctr];
+ /* inter cu is coded if any of the tu is coded in it */
+ ps_best_cu_prms->u1_is_cu_coded |= cbf;
+
+ /*Overwriting the cr ecd data corresponding to the special mode*/
+ if(0 != num_bytes)
+ {
+ memcpy(
+ (pu1_ecd_data + total_bytes_offset),
+ pu1_ecd_data_src_cr + ai4_ecd_data_cr_offset[i4_subtu_idx],
+ num_bytes);
+ }
+
+ /* update CR related params */
+ ps_tu->ai4_cr_coeff_offset[i4_subtu_idx] =
+ total_bytes_offset + init_bytes_offset;
+
+ if(0 == i4_subtu_idx)
+ {
+ ps_tu->s_tu.b1_cr_cbf = cbf;
+ }
+ else
+ {
+ ps_tu->s_tu.b1_cr_cbf_subtu1 = cbf;
+ }
+
+ total_bytes_offset += num_bytes;
+ ai4_ecd_data_cr_offset[i4_subtu_idx] += num_bytes;
+
+ /*Updating zero rows and zero cols*/
+ ps_tu_temp_prms->au4_cb_zero_col[i4_subtu_idx] =
+ ps_chr_intra_satd_ctxt->ai4_zero_col_cb[i4_subtu_idx][ctr];
+ ps_tu_temp_prms->au4_cb_zero_row[i4_subtu_idx] =
+ ps_chr_intra_satd_ctxt->ai4_zero_row_cb[i4_subtu_idx][ctr];
+ ps_tu_temp_prms->au4_cr_zero_col[i4_subtu_idx] =
+ ps_chr_intra_satd_ctxt->ai4_zero_col_cr[i4_subtu_idx][ctr];
+ ps_tu_temp_prms->au4_cr_zero_row[i4_subtu_idx] =
+ ps_chr_intra_satd_ctxt->ai4_zero_row_cr[i4_subtu_idx][ctr];
+
+ ps_tu_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx] = num_bytes;
+
+ if((u1_num_tus > 1) &&
+ ps_recon_datastore->au1_is_chromaRecon_available[2])
+ {
+ ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
+ [i4_subtu_idx] = 2;
+ ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
+ [i4_subtu_idx] = 2;
+ }
+ else if(
+ (1 == u1_num_tus) &&
+ ps_recon_datastore->au1_is_chromaRecon_available[1])
+ {
+ ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
+ [i4_subtu_idx] = 1;
+ ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
+ [i4_subtu_idx] = 1;
+ }
+ else
+ {
+ ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr]
+ [i4_subtu_idx] = UCHAR_MAX;
+ ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr]
+ [i4_subtu_idx] = UCHAR_MAX;
+ }
+ }
+ }
+
+ /* loop increments */
+ ps_tu++;
+ ps_tu_temp_prms++;
+ }
+ }
+
+ if(!u1_is_422)
+ {
+ if(chrm_pred_mode == luma_pred_mode)
+ {
+ ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
+ }
+ else if(chrm_pred_mode == 0)
+ {
+ ps_best_cu_prms->u1_chroma_intra_pred_mode = 0;
+ }
+ else if(chrm_pred_mode == 1)
+ {
+ ps_best_cu_prms->u1_chroma_intra_pred_mode = 3;
+ }
+ else if(chrm_pred_mode == 10)
+ {
+ ps_best_cu_prms->u1_chroma_intra_pred_mode = 2;
+ }
+ else if(chrm_pred_mode == 26)
+ {
+ ps_best_cu_prms->u1_chroma_intra_pred_mode = 1;
+ }
+ else
+ {
+ ASSERT(0); /*Should not come here*/
+ }
+ }
+ else
+ {
+ if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[luma_pred_mode])
+ {
+ ps_best_cu_prms->u1_chroma_intra_pred_mode = 4;
+ }
+ else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[0])
+ {
+ ps_best_cu_prms->u1_chroma_intra_pred_mode = 0;
+ }
+ else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[1])
+ {
+ ps_best_cu_prms->u1_chroma_intra_pred_mode = 3;
+ }
+ else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[10])
+ {
+ ps_best_cu_prms->u1_chroma_intra_pred_mode = 2;
+ }
+ else if(chrm_pred_mode == gau1_chroma422_intra_angle_mapping[26])
+ {
+ ps_best_cu_prms->u1_chroma_intra_pred_mode = 1;
+ }
+ else
+ {
+ ASSERT(0); /*Should not come here*/
+ }
+ }
+ }
+
+ /* Store the actual chroma mode */
+ ps_best_cu_prms->u1_chroma_intra_pred_actual_mode = chrm_pred_mode;
+ }
+
+ /* update the total bytes produced */
+ ps_best_cu_prms->i4_num_bytes_ecd_data = total_bytes_offset + init_bytes_offset;
+
+ /* store the final chrm bits accumulated */
+ *pi4_chrm_tu_bits = chrm_tu_bits;
+
+ return (chrm_cod_cost);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_final_rdopt_mode_prcs \endif
+*
+* \brief
+* Final RDOPT mode process function. Performs Recon computation for the
+* final mode. Re-use or Compute pred, iq-data, coeff based on the flags.
+*
+* \param[in] pv_ctxt : pointer to enc_loop module
+* \param[in] ps_prms : pointer to struct containing requisite parameters
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_final_rdopt_mode_prcs(
+ ihevce_enc_loop_ctxt_t *ps_ctxt, final_mode_process_prms_t *ps_prms)
+{
+ enc_loop_cu_final_prms_t *ps_best_cu_prms;
+ tu_enc_loop_out_t *ps_tu_enc_loop;
+ tu_enc_loop_temp_prms_t *ps_tu_enc_loop_temp_prms;
+ nbr_avail_flags_t s_nbr;
+ recon_datastore_t *ps_recon_datastore;
+
+ ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
+ ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
+ ihevc_intra_pred_ref_filtering_ft *ihevc_intra_pred_ref_filtering_fptr;
+
+ WORD32 num_tu_in_cu;
+ LWORD64 rd_opt_cost;
+ WORD32 ctr;
+ WORD32 i4_subtu_idx;
+ WORD32 cu_size;
+ WORD32 cu_pos_x, cu_pos_y;
+ WORD32 chrm_present_flag = 1;
+ WORD32 num_bytes, total_bytes = 0;
+ WORD32 chrm_ctr = 0;
+ WORD32 u1_is_cu_coded;
+ UWORD8 *pu1_old_ecd_data;
+ UWORD8 *pu1_chrm_old_ecd_data;
+ UWORD8 *pu1_cur_pred;
+ WORD16 *pi2_deq_data;
+ WORD16 *pi2_chrm_deq_data;
+ WORD16 *pi2_cur_deq_data;
+ WORD16 *pi2_cur_deq_data_chrm;
+ UWORD8 *pu1_cur_luma_recon;
+ UWORD8 *pu1_cur_chroma_recon;
+ UWORD8 *pu1_cur_src;
+ UWORD8 *pu1_cur_src_chrm;
+ UWORD8 *pu1_cur_pred_chrm;
+ UWORD8 *pu1_intra_pred_mode;
+ UWORD32 *pu4_nbr_flags;
+ LWORD64 i8_ssd;
+
+ cu_nbr_prms_t *ps_cu_nbr_prms = ps_prms->ps_cu_nbr_prms;
+ cu_inter_cand_t *ps_best_inter_cand = ps_prms->ps_best_inter_cand;
+ enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms = ps_prms->ps_chrm_cu_buf_prms;
+
+ WORD32 packed_pred_mode = ps_prms->packed_pred_mode;
+ WORD32 rd_opt_best_idx = ps_prms->rd_opt_best_idx;
+ UWORD8 *pu1_src = (UWORD8 *)ps_prms->pv_src;
+ WORD32 src_strd = ps_prms->src_strd;
+ UWORD8 *pu1_pred = (UWORD8 *)ps_prms->pv_pred;
+ WORD32 pred_strd = ps_prms->pred_strd;
+ UWORD8 *pu1_pred_chrm = (UWORD8 *)ps_prms->pv_pred_chrm;
+ WORD32 pred_chrm_strd = ps_prms->pred_chrm_strd;
+ UWORD8 *pu1_final_ecd_data = ps_prms->pu1_final_ecd_data;
+ UWORD8 *pu1_csbf_buf = ps_prms->pu1_csbf_buf;
+ WORD32 csbf_strd = ps_prms->csbf_strd;
+ UWORD8 *pu1_luma_recon = (UWORD8 *)ps_prms->pv_luma_recon;
+ WORD32 recon_luma_strd = ps_prms->recon_luma_strd;
+ UWORD8 *pu1_chrm_recon = (UWORD8 *)ps_prms->pv_chrm_recon;
+ WORD32 recon_chrma_strd = ps_prms->recon_chrma_strd;
+ UWORD8 u1_cu_pos_x = ps_prms->u1_cu_pos_x;
+ UWORD8 u1_cu_pos_y = ps_prms->u1_cu_pos_y;
+ UWORD8 u1_cu_size = ps_prms->u1_cu_size;
+ WORD8 i1_cu_qp = ps_prms->i1_cu_qp;
+ UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
+ UWORD8 u1_num_subtus = (u1_is_422 == 1) + 1;
+ /* Get the Chroma pointer and parameters */
+ UWORD8 *pu1_src_chrm = ps_chrm_cu_buf_prms->pu1_curr_src;
+ WORD32 src_chrm_strd = ps_chrm_cu_buf_prms->i4_chrm_src_stride;
+ UWORD8 u1_compute_spatial_ssd_luma = 0;
+ UWORD8 u1_compute_spatial_ssd_chroma = 0;
+ /* Get the pointer for function selector */
+ ihevc_intra_pred_luma_ref_substitution_fptr =
+ ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
+
+ ihevc_intra_pred_ref_filtering_fptr =
+ ps_ctxt->ps_func_selector->ihevc_intra_pred_ref_filtering_fptr;
+
+ ihevc_intra_pred_chroma_ref_substitution_fptr =
+ ps_ctxt->ps_func_selector->ihevc_intra_pred_chroma_ref_substitution_fptr;
+
+ /* Get the best CU parameters */
+ ps_best_cu_prms = &ps_ctxt->as_cu_prms[rd_opt_best_idx];
+ num_tu_in_cu = ps_best_cu_prms->u2_num_tus_in_cu;
+ cu_size = ps_best_cu_prms->u1_cu_size;
+ cu_pos_x = u1_cu_pos_x;
+ cu_pos_y = u1_cu_pos_y;
+ pu1_intra_pred_mode = &ps_best_cu_prms->au1_intra_pred_mode[0];
+ pu4_nbr_flags = &ps_best_cu_prms->au4_nbr_flags[0];
+ ps_recon_datastore = &ps_best_cu_prms->s_recon_datastore;
+
+ /* get the first TU pointer */
+ ps_tu_enc_loop = &ps_best_cu_prms->as_tu_enc_loop[0];
+ /* get the first TU only enc_loop prms pointer */
+ ps_tu_enc_loop_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
+ /*modify quant related param in ctxt based on current cu qp*/
+ if((ps_ctxt->i1_cu_qp_delta_enable))
+ {
+ /*recompute quant related param at every cu level*/
+ ihevce_compute_quant_rel_param(ps_ctxt, i1_cu_qp);
+
+ /* get frame level lambda params */
+ ihevce_get_cl_cu_lambda_prms(
+ ps_ctxt, MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON ? i1_cu_qp : ps_ctxt->i4_frame_qp);
+ }
+
+ ps_best_cu_prms->i8_cu_ssd = 0;
+ ps_best_cu_prms->u4_cu_open_intra_sad = 0;
+
+ /* For skip case : Set TU_size = CU_size and make cbf = 0
+ so that same TU loop can be used for all modes */
+ if(PRED_MODE_SKIP == packed_pred_mode)
+ {
+ for(ctr = 0; ctr < num_tu_in_cu; ctr++)
+ {
+ ps_tu_enc_loop->s_tu.b1_y_cbf = 0;
+
+ ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed = 0;
+
+ ps_tu_enc_loop++;
+ ps_tu_enc_loop_temp_prms++;
+ }
+
+ /* go back to the first TU pointer */
+ ps_tu_enc_loop = &ps_best_cu_prms->as_tu_enc_loop[0];
+ ps_tu_enc_loop_temp_prms = &ps_best_cu_prms->as_tu_enc_loop_temp_prms[0];
+ }
+ /** For inter case, pred calculation is outside the loop **/
+ if(PRED_MODE_INTRA != packed_pred_mode)
+ {
+ /**------------- Compute pred data if required --------------**/
+ if((1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data))
+ {
+ nbr_4x4_t *ps_topleft_nbr_4x4;
+ nbr_4x4_t *ps_left_nbr_4x4;
+ nbr_4x4_t *ps_top_nbr_4x4;
+ WORD32 nbr_4x4_left_strd;
+
+ ps_best_inter_cand->pu1_pred_data = pu1_pred;
+ ps_best_inter_cand->i4_pred_data_stride = pred_strd;
+
+ /* Get the CU nbr information */
+ ps_topleft_nbr_4x4 = ps_cu_nbr_prms->ps_topleft_nbr_4x4;
+ ps_left_nbr_4x4 = ps_cu_nbr_prms->ps_left_nbr_4x4;
+ ps_top_nbr_4x4 = ps_cu_nbr_prms->ps_top_nbr_4x4;
+ nbr_4x4_left_strd = ps_cu_nbr_prms->nbr_4x4_left_strd;
+
+ /* MVP ,MVD calc and Motion compensation */
+ rd_opt_cost = ((pf_inter_rdopt_cu_mc_mvp)ps_ctxt->pv_inter_rdopt_cu_mc_mvp)(
+ ps_ctxt,
+ ps_best_inter_cand,
+ u1_cu_size,
+ cu_pos_x,
+ cu_pos_y,
+ ps_left_nbr_4x4,
+ ps_top_nbr_4x4,
+ ps_topleft_nbr_4x4,
+ nbr_4x4_left_strd,
+ rd_opt_best_idx);
+ }
+
+ /** ------ Motion Compensation for Chroma -------- **/
+ if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data)
+ {
+ UWORD8 *pu1_cur_pred;
+ pu1_cur_pred = pu1_pred_chrm;
+
+ /* run a loop over all the partitons in cu */
+ for(ctr = 0; ctr < ps_best_cu_prms->u2_num_pus_in_cu; ctr++)
+ {
+ pu_t *ps_pu;
+ WORD32 inter_pu_wd, inter_pu_ht;
+
+ ps_pu = &ps_best_cu_prms->as_pu_chrm_proc[ctr];
+
+ /* IF AMP then each partitions can have diff wd ht */
+ inter_pu_wd = (ps_pu->b4_wd + 1) << 2; /* cb and cr pixel interleaved */
+ inter_pu_ht = ((ps_pu->b4_ht + 1) << 2) >> 1;
+ inter_pu_ht <<= u1_is_422;
+ /* chroma mc func */
+ ihevce_chroma_inter_pred_pu(
+ &ps_ctxt->s_mc_ctxt, ps_pu, pu1_cur_pred, pred_chrm_strd);
+ if(2 == ps_best_cu_prms->u2_num_pus_in_cu)
+ {
+ /* 2Nx__ partion case */
+ if(inter_pu_wd == ps_best_cu_prms->u1_cu_size)
+ {
+ pu1_cur_pred += (inter_pu_ht * pred_chrm_strd);
+ }
+ /* __x2N partion case */
+ if(inter_pu_ht == (ps_best_cu_prms->u1_cu_size >> (u1_is_422 == 0)))
+ {
+ pu1_cur_pred += inter_pu_wd;
+ }
+ }
+ }
+ }
+ }
+ pi2_deq_data = &ps_best_cu_prms->pi2_cu_deq_coeffs[0];
+ pi2_chrm_deq_data =
+ &ps_best_cu_prms->pi2_cu_deq_coeffs[0] + ps_best_cu_prms->i4_chrm_deq_coeff_strt_idx;
+ pu1_old_ecd_data = &ps_best_cu_prms->pu1_cu_coeffs[0];
+ pu1_chrm_old_ecd_data =
+ &ps_best_cu_prms->pu1_cu_coeffs[0] + ps_best_cu_prms->i4_chrm_cu_coeff_strt_idx;
+
+ /* default value for cu coded flag */
+ u1_is_cu_coded = 0;
+
+ /* If we are re-computing coeff, set sad to 0 and start accumulating */
+ /* else use the best cand. sad from RDOPT stage */
+ if(1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data)
+ {
+ /*init of ssd of CU accuumulated over all TU*/
+ ps_best_cu_prms->u4_cu_sad = 0;
+
+ /* reset the luma residual bits */
+ ps_best_cu_prms->u4_cu_luma_res_bits = 0;
+ }
+
+ if(1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data)
+ {
+ /* reset the chroma residual bits */
+ ps_best_cu_prms->u4_cu_chroma_res_bits = 0;
+ }
+
+ if((1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data) ||
+ (1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data))
+ {
+ /*Header bits have to be reevaluated if luma and chroma reevaluation is done, as
+ the quantized coefficients might be changed.
+ We are copying only those states which correspond to the header from the cabac state
+ of the previous CU, because the header is going to be recomputed for this condition*/
+ ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 1;
+ memcpy(
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0],
+ &ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0],
+ IHEVC_CAB_COEFFX_PREFIX);
+
+ if((1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data))
+ {
+ COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
+ (&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX),
+ (&ps_ctxt->s_rdopt_entropy_ctxt.au1_init_cabac_ctxt_states[0] +
+ IHEVC_CAB_COEFFX_PREFIX),
+ (IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX));
+ }
+ else
+ {
+ COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
+ (&ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX),
+ (&ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0] +
+ IHEVC_CAB_COEFFX_PREFIX),
+ (IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX));
+ }
+ ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx = rd_opt_best_idx;
+ }
+ else
+ {
+ ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 0;
+ }
+
+ /* Zero cbf tool is disabled for intra CUs */
+ if(PRED_MODE_INTRA == packed_pred_mode)
+ {
+#if ENABLE_ZERO_CBF_IN_INTRA
+ ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
+#else
+ ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
+#endif
+ }
+ else
+ {
+#if DISABLE_ZERO_ZBF_IN_INTER
+ ps_ctxt->i4_zcbf_rdo_level = NO_ZCBF;
+#else
+ ps_ctxt->i4_zcbf_rdo_level = ZCBF_ENABLE;
+#endif
+ }
+
+ /** Loop for all tu blocks in current cu and do reconstruction **/
+ for(ctr = 0; ctr < num_tu_in_cu; ctr++)
+ {
+ tu_t *ps_tu;
+ WORD32 trans_size, num_4x4_in_tu;
+ WORD32 cbf, zero_rows, zero_cols;
+ WORD32 cu_pos_x_in_4x4, cu_pos_y_in_4x4;
+ WORD32 cu_pos_x_in_pix, cu_pos_y_in_pix;
+ WORD32 luma_pred_mode, chroma_pred_mode = 0;
+ UWORD8 au1_is_recon_available[2];
+
+ ps_tu = &(ps_tu_enc_loop->s_tu); /* Points to the TU property ctxt */
+
+ u1_compute_spatial_ssd_luma = 0;
+ u1_compute_spatial_ssd_chroma = 0;
+
+ trans_size = 1 << (ps_tu->b3_size + 2);
+ num_4x4_in_tu = (trans_size >> 2);
+ cu_pos_x_in_4x4 = ps_tu->b4_pos_x;
+ cu_pos_y_in_4x4 = ps_tu->b4_pos_y;
+
+ /* populate the coeffs scan idx */
+ ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
+
+ /* get the current pos x and pos y in pixels */
+ cu_pos_x_in_pix = (cu_pos_x_in_4x4 << 2) - (cu_pos_x << 3);
+ cu_pos_y_in_pix = (cu_pos_y_in_4x4 << 2) - (cu_pos_y << 3);
+
+ /* Update pointers based on the location */
+ pu1_cur_src = pu1_src + cu_pos_x_in_pix;
+ pu1_cur_src += (cu_pos_y_in_pix * src_strd);
+ pu1_cur_pred = pu1_pred + cu_pos_x_in_pix;
+ pu1_cur_pred += (cu_pos_y_in_pix * pred_strd);
+
+ pu1_cur_luma_recon = pu1_luma_recon + cu_pos_x_in_pix;
+ pu1_cur_luma_recon += (cu_pos_y_in_pix * recon_luma_strd);
+
+ pi2_cur_deq_data = pi2_deq_data + cu_pos_x_in_pix;
+ pi2_cur_deq_data += cu_pos_y_in_pix * cu_size;
+
+ pu1_cur_src_chrm = pu1_src_chrm + cu_pos_x_in_pix;
+ pu1_cur_src_chrm += ((cu_pos_y_in_pix >> 1) * src_chrm_strd) +
+ (u1_is_422 * ((cu_pos_y_in_pix >> 1) * src_chrm_strd));
+
+ pu1_cur_pred_chrm = pu1_pred_chrm + cu_pos_x_in_pix;
+ pu1_cur_pred_chrm += ((cu_pos_y_in_pix >> 1) * pred_chrm_strd) +
+ (u1_is_422 * ((cu_pos_y_in_pix >> 1) * pred_chrm_strd));
+
+ pu1_cur_chroma_recon = pu1_chrm_recon + cu_pos_x_in_pix;
+ pu1_cur_chroma_recon += ((cu_pos_y_in_pix >> 1) * recon_chrma_strd) +
+ (u1_is_422 * ((cu_pos_y_in_pix >> 1) * recon_chrma_strd));
+
+ pi2_cur_deq_data_chrm = pi2_chrm_deq_data + cu_pos_x_in_pix;
+ pi2_cur_deq_data_chrm +=
+ ((cu_pos_y_in_pix >> 1) * cu_size) + (u1_is_422 * ((cu_pos_y_in_pix >> 1) * cu_size));
+
+ /* if transfrom size is 4x4 then only first luma 4x4 will have chroma*/
+ chrm_present_flag = 1; /* by default chroma present is set to 1*/
+
+ if(4 == trans_size)
+ {
+ /* if tusize is 4x4 then only first luma 4x4 will have chroma*/
+ if(0 != chrm_ctr)
+ {
+ chrm_present_flag = INTRA_PRED_CHROMA_IDX_NONE;
+ }
+
+ /* increment the chrm ctr unconditionally */
+ chrm_ctr++;
+ /* after ctr reached 4 reset it */
+ if(4 == chrm_ctr)
+ {
+ chrm_ctr = 0;
+ }
+ }
+
+ /**------------- Compute pred data if required --------------**/
+ if(PRED_MODE_INTRA == packed_pred_mode) /* Inter pred calc. is done outside loop */
+ {
+ /* Get the pred mode for scan idx calculation, even if pred is not required */
+ luma_pred_mode = *pu1_intra_pred_mode;
+
+ if((ps_ctxt->i4_rc_pass == 1) ||
+ (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data))
+ {
+ WORD32 nbr_flags;
+ WORD32 luma_pred_func_idx;
+ UWORD8 *pu1_left;
+ UWORD8 *pu1_top;
+ UWORD8 *pu1_top_left;
+ WORD32 left_strd;
+
+ /* left cu boundary */
+ if(0 == cu_pos_x_in_pix)
+ {
+ left_strd = ps_cu_nbr_prms->cu_left_stride;
+ pu1_left = ps_cu_nbr_prms->pu1_cu_left + cu_pos_y_in_pix * left_strd;
+ }
+ else
+ {
+ pu1_left = pu1_cur_luma_recon - 1;
+ left_strd = recon_luma_strd;
+ }
+
+ /* top cu boundary */
+ if(0 == cu_pos_y_in_pix)
+ {
+ pu1_top = ps_cu_nbr_prms->pu1_cu_top + cu_pos_x_in_pix;
+ }
+ else
+ {
+ pu1_top = pu1_cur_luma_recon - recon_luma_strd;
+ }
+
+ /* by default top left is set to cu top left */
+ pu1_top_left = ps_cu_nbr_prms->pu1_cu_top_left;
+
+ /* top left based on position */
+ if((0 != cu_pos_y_in_pix) && (0 == cu_pos_x_in_pix))
+ {
+ pu1_top_left = pu1_left - left_strd;
+ }
+ else if(0 != cu_pos_x_in_pix)
+ {
+ pu1_top_left = pu1_top - 1;
+ }
+
+ /* get the neighbour availability flags */
+ nbr_flags = ihevce_get_nbr_intra(
+ &s_nbr,
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ cu_pos_x_in_4x4,
+ cu_pos_y_in_4x4,
+ num_4x4_in_tu);
+
+ if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data)
+ {
+ /* copy the nbr flags for chroma reuse */
+ if(4 != trans_size)
+ {
+ *pu4_nbr_flags = nbr_flags;
+ }
+ else if(1 == chrm_present_flag)
+ {
+ /* compute the avail flags assuming luma trans is 8x8 */
+ /* get the neighbour availability flags */
+ *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ cu_pos_x_in_4x4,
+ cu_pos_y_in_4x4,
+ (num_4x4_in_tu << 1),
+ (num_4x4_in_tu << 1));
+ }
+
+ /* call reference array substitution */
+ ihevc_intra_pred_luma_ref_substitution_fptr(
+ pu1_top_left,
+ pu1_top,
+ pu1_left,
+ left_strd,
+ trans_size,
+ nbr_flags,
+ (UWORD8 *)ps_ctxt->pv_ref_sub_out,
+ 1);
+
+ /* call reference filtering */
+ ihevc_intra_pred_ref_filtering_fptr(
+ (UWORD8 *)ps_ctxt->pv_ref_sub_out,
+ trans_size,
+ (UWORD8 *)ps_ctxt->pv_ref_filt_out,
+ luma_pred_mode,
+ ps_ctxt->i1_strong_intra_smoothing_enable_flag);
+
+ /* use the look up to get the function idx */
+ luma_pred_func_idx = g_i4_ip_funcs[luma_pred_mode];
+
+ /* call the intra prediction function */
+ ps_ctxt->apf_lum_ip[luma_pred_func_idx](
+ (UWORD8 *)ps_ctxt->pv_ref_filt_out,
+ 1,
+ pu1_cur_pred,
+ pred_strd,
+ trans_size,
+ luma_pred_mode);
+ }
+ }
+ else if(
+ (1 == chrm_present_flag) &&
+ (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data))
+ {
+ WORD32 temp_num_4x4_in_tu = num_4x4_in_tu;
+
+ if(4 == trans_size) /* compute the avail flags assuming luma trans is 8x8 */
+ {
+ temp_num_4x4_in_tu = num_4x4_in_tu << 1;
+ }
+
+ *pu4_nbr_flags = ihevce_get_nbr_intra_mxn_tu(
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ cu_pos_x_in_4x4,
+ cu_pos_y_in_4x4,
+ temp_num_4x4_in_tu,
+ temp_num_4x4_in_tu);
+ }
+
+ /* Get the pred mode for scan idx calculation, even if pred is not required */
+ chroma_pred_mode = ps_best_cu_prms->u1_chroma_intra_pred_actual_mode;
+ }
+
+ if(1 == ps_tu_enc_loop_temp_prms->b1_eval_luma_iq_and_coeff_data)
+ {
+ WORD32 temp_bits;
+ LWORD64 temp_cost;
+ UWORD32 u4_tu_sad;
+ WORD32 perform_sbh, perform_rdoq;
+
+ if(PRED_MODE_INTRA == packed_pred_mode)
+ {
+ /* for luma 4x4 and 8x8 transforms based on intra pred mode scan is choosen*/
+ if(trans_size < 16)
+ {
+ /* for modes from 22 upto 30 horizontal scan is used */
+ if((luma_pred_mode > 21) && (luma_pred_mode < 31))
+ {
+ ps_ctxt->i4_scan_idx = SCAN_HORZ;
+ }
+ /* for modes from 6 upto 14 horizontal scan is used */
+ else if((luma_pred_mode > 5) && (luma_pred_mode < 15))
+ {
+ ps_ctxt->i4_scan_idx = SCAN_VERT;
+ }
+ }
+ }
+
+ /* RDOPT copy States : TU init (best until prev TU) to current */
+ COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
+ &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0] +
+ IHEVC_CAB_COEFFX_PREFIX,
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
+ IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
+
+ if(ps_prms->u1_recompute_sbh_and_rdoq)
+ {
+ perform_sbh = (ps_ctxt->i4_sbh_level != NO_SBH);
+ perform_rdoq = (ps_ctxt->i4_rdoq_level != NO_RDOQ);
+ }
+ else
+ {
+ /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
+ perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh;
+ /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
+ we would have to do RDOQ again.*/
+ perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq;
+ }
+
+#if DISABLE_RDOQ_INTRA
+ if(PRED_MODE_INTRA == packed_pred_mode)
+ {
+ perform_rdoq = 0;
+ }
+#endif
+ /*If BEST candidate RDOQ is enabled, Eithe no coef level rdoq or CU level rdoq has to be enabled
+ so that all candidates and best candidate are quantized with same rounding factor */
+ if(1 == perform_rdoq)
+ {
+ ASSERT(ps_ctxt->i4_quant_rounding_level != TU_LEVEL_QUANT_ROUNDING);
+ }
+
+ cbf = ihevce_t_q_iq_ssd_scan_fxn(
+ ps_ctxt,
+ pu1_cur_pred,
+ pred_strd,
+ pu1_cur_src,
+ src_strd,
+ pi2_cur_deq_data,
+ cu_size, /*deq_data stride is cu_size*/
+ pu1_cur_luma_recon,
+ recon_luma_strd,
+ pu1_final_ecd_data,
+ pu1_csbf_buf,
+ csbf_strd,
+ trans_size,
+ packed_pred_mode,
+ &temp_cost,
+ &num_bytes,
+ &temp_bits,
+ &u4_tu_sad,
+ &zero_cols,
+ &zero_rows,
+ &au1_is_recon_available[0],
+ perform_rdoq, //(BEST_CAND_RDOQ == ps_ctxt->i4_rdoq_level),
+ perform_sbh,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ !ps_ctxt->u1_is_refPic ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
+ : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
+ (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
+ 100.0,
+ ps_prms->u1_is_cu_noisy,
+#endif
+ u1_compute_spatial_ssd_luma ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
+ 1 /*early cbf*/
+ ); //(BEST_CAND_SBH == ps_ctxt->i4_sbh_level));
+
+ /* Accumulate luma residual bits */
+ ps_best_cu_prms->u4_cu_luma_res_bits += temp_bits;
+
+ /* RDOPT copy States : New updated after curr TU to TU init */
+ if(0 != cbf)
+ {
+ /* update to new state only if CBF is non zero */
+ COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
+ &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0] +
+ IHEVC_CAB_COEFFX_PREFIX,
+ IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
+ }
+
+ /* accumulate the TU sad into cu sad */
+ ps_best_cu_prms->u4_cu_sad += u4_tu_sad;
+ ps_tu->b1_y_cbf = cbf;
+ ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed = num_bytes;
+
+ /* If somebody updates cbf (RDOQ or SBH), update in nbr str. for BS */
+ if((ps_prms->u1_will_cabac_state_change) && (!ps_prms->u1_is_first_pass))
+ {
+ WORD32 num_4x4_in_cu = u1_cu_size >> 2;
+ nbr_4x4_t *ps_cur_nbr_4x4 = &ps_ctxt->as_cu_nbr[rd_opt_best_idx][0];
+ ps_cur_nbr_4x4 = (ps_cur_nbr_4x4 + (cu_pos_x_in_pix >> 2));
+ ps_cur_nbr_4x4 += ((cu_pos_y_in_pix >> 2) * num_4x4_in_cu);
+ /* repiclate the nbr 4x4 structure for all 4x4 blocks current TU */
+ ps_cur_nbr_4x4->b1_y_cbf = cbf;
+ /*copy the cu qp. This will be overwritten by qp calculated based on skip flag at final stage of cu mode decide*/
+ ps_cur_nbr_4x4->b8_qp = ps_ctxt->i4_cu_qp;
+ /* Qp and cbf are stored for the all 4x4 in TU */
+ {
+ WORD32 i, j;
+ nbr_4x4_t *ps_tmp_4x4;
+ ps_tmp_4x4 = ps_cur_nbr_4x4;
+
+ for(i = 0; i < num_4x4_in_tu; i++)
+ {
+ for(j = 0; j < num_4x4_in_tu; j++)
+ {
+ ps_tmp_4x4[j].b8_qp = ps_ctxt->i4_cu_qp;
+ ps_tmp_4x4[j].b1_y_cbf = cbf;
+ }
+ /* row level update*/
+ ps_tmp_4x4 += num_4x4_in_cu;
+ }
+ }
+ }
+ }
+ else
+ {
+ zero_cols = ps_tu_enc_loop_temp_prms->u4_luma_zero_col;
+ zero_rows = ps_tu_enc_loop_temp_prms->u4_luma_zero_row;
+
+ if(ps_prms->u1_will_cabac_state_change)
+ {
+ num_bytes = ps_tu_enc_loop_temp_prms->i2_luma_bytes_consumed;
+ }
+ else
+ {
+ num_bytes = 0;
+ }
+
+ /* copy luma ecd data to final buffer */
+ memcpy(pu1_final_ecd_data, pu1_old_ecd_data, num_bytes);
+
+ pu1_old_ecd_data += num_bytes;
+
+ au1_is_recon_available[0] = 0;
+ }
+
+ /**-------- Compute Recon data (Do IT & Recon) : Luma -----------**/
+ if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
+ (!u1_compute_spatial_ssd_luma ||
+ (!au1_is_recon_available[0] && u1_compute_spatial_ssd_luma)))
+ {
+ if(!ps_recon_datastore->u1_is_lumaRecon_available ||
+ (ps_recon_datastore->u1_is_lumaRecon_available &&
+ (UCHAR_MAX == ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr])))
+ {
+ ihevce_it_recon_fxn(
+ ps_ctxt,
+ pi2_cur_deq_data,
+ cu_size,
+ pu1_cur_pred,
+ pred_strd,
+ pu1_cur_luma_recon,
+ recon_luma_strd,
+ pu1_final_ecd_data,
+ trans_size,
+ packed_pred_mode,
+ ps_tu->b1_y_cbf,
+ zero_cols,
+ zero_rows);
+ }
+ else if(
+ ps_recon_datastore->u1_is_lumaRecon_available &&
+ (UCHAR_MAX != ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr]))
+ {
+ UWORD8 *pu1_recon_src =
+ ((UWORD8 *)ps_recon_datastore->apv_luma_recon_bufs
+ [ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr]]) +
+ cu_pos_x_in_pix + cu_pos_y_in_pix * ps_recon_datastore->i4_lumaRecon_stride;
+
+ ps_ctxt->s_cmn_opt_func.pf_copy_2d(
+ pu1_cur_luma_recon,
+ recon_luma_strd,
+ pu1_recon_src,
+ ps_recon_datastore->i4_lumaRecon_stride,
+ trans_size,
+ trans_size);
+ }
+ }
+
+ if(ps_prms->u1_will_cabac_state_change)
+ {
+ ps_tu_enc_loop->i4_luma_coeff_offset = total_bytes;
+ }
+
+ pu1_final_ecd_data += num_bytes;
+ /* update total bytes consumed */
+ total_bytes += num_bytes;
+
+ u1_is_cu_coded |= ps_tu->b1_y_cbf;
+
+ /***************** Compute T,Q,IQ,IT & Recon for Chroma ********************/
+ if(1 == chrm_present_flag)
+ {
+ pu1_cur_src_chrm = pu1_src_chrm + cu_pos_x_in_pix;
+ pu1_cur_src_chrm += ((cu_pos_y_in_pix >> 1) * src_chrm_strd) +
+ (u1_is_422 * ((cu_pos_y_in_pix >> 1) * src_chrm_strd));
+
+ pu1_cur_pred_chrm = pu1_pred_chrm + cu_pos_x_in_pix;
+ pu1_cur_pred_chrm += ((cu_pos_y_in_pix >> 1) * pred_chrm_strd) +
+ (u1_is_422 * ((cu_pos_y_in_pix >> 1) * pred_chrm_strd));
+
+ pu1_cur_chroma_recon = pu1_chrm_recon + cu_pos_x_in_pix;
+ pu1_cur_chroma_recon += ((cu_pos_y_in_pix >> 1) * recon_chrma_strd) +
+ (u1_is_422 * ((cu_pos_y_in_pix >> 1) * recon_chrma_strd));
+
+ pi2_cur_deq_data_chrm = pi2_chrm_deq_data + cu_pos_x_in_pix;
+ pi2_cur_deq_data_chrm += ((cu_pos_y_in_pix >> 1) * cu_size) +
+ (u1_is_422 * ((cu_pos_y_in_pix >> 1) * cu_size));
+
+ if(INCLUDE_CHROMA_DURING_TU_RECURSION &&
+ (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P0) &&
+ (PRED_MODE_INTRA != packed_pred_mode))
+ {
+ WORD32 i4_num_bytes;
+ UWORD8 *pu1_chroma_pred;
+ UWORD8 *pu1_chroma_recon;
+ WORD16 *pi2_chroma_deq;
+ UWORD32 u4_zero_col;
+ UWORD32 u4_zero_row;
+
+ for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
+ {
+ WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
+ WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
+ WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
+
+ if(0 == u1_is_422)
+ {
+ i4_subtu_pos_y >>= 1;
+ }
+
+ pu1_chroma_pred =
+ pu1_cur_pred_chrm + (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
+ pu1_chroma_recon = pu1_cur_chroma_recon +
+ (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
+ pi2_chroma_deq =
+ pi2_cur_deq_data_chrm + (i4_subtu_idx * chroma_trans_size * cu_size);
+
+ u4_zero_col = ps_tu_enc_loop_temp_prms->au4_cb_zero_col[i4_subtu_idx];
+ u4_zero_row = ps_tu_enc_loop_temp_prms->au4_cb_zero_row[i4_subtu_idx];
+
+ if(ps_prms->u1_will_cabac_state_change)
+ {
+ i4_num_bytes =
+ ps_tu_enc_loop_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx];
+ }
+ else
+ {
+ i4_num_bytes = 0;
+ }
+
+ memcpy(pu1_final_ecd_data, pu1_old_ecd_data, i4_num_bytes);
+
+ pu1_old_ecd_data += i4_num_bytes;
+
+ au1_is_recon_available[U_PLANE] = 0;
+
+ if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
+ (!u1_compute_spatial_ssd_chroma ||
+ (!au1_is_recon_available[U_PLANE] && u1_compute_spatial_ssd_chroma)))
+ {
+ if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
+ (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
+ (UCHAR_MAX ==
+ ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx])))
+ {
+ ihevce_chroma_it_recon_fxn(
+ ps_ctxt,
+ pi2_chroma_deq,
+ cu_size,
+ pu1_chroma_pred,
+ pred_chrm_strd,
+ pu1_chroma_recon,
+ recon_chrma_strd,
+ pu1_final_ecd_data,
+ chroma_trans_size,
+ (i4_subtu_idx == 0) ? ps_tu->b1_cb_cbf : ps_tu->b1_cb_cbf_subtu1,
+ u4_zero_col,
+ u4_zero_row,
+ U_PLANE);
+ }
+ else if(
+ ps_recon_datastore->au1_is_chromaRecon_available[0] &&
+ (UCHAR_MAX !=
+ ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx]))
+ {
+ UWORD8 *pu1_recon_src =
+ ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
+ [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
+ [U_PLANE][ctr][i4_subtu_idx]]) +
+ i4_subtu_pos_x +
+ i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
+
+ ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
+ pu1_recon_src,
+ ps_recon_datastore->i4_lumaRecon_stride,
+ pu1_chroma_recon,
+ recon_chrma_strd,
+ chroma_trans_size,
+ chroma_trans_size,
+ U_PLANE);
+ }
+ }
+
+ u1_is_cu_coded |=
+ ((1 == i4_subtu_idx) ? ps_tu->b1_cb_cbf_subtu1 : ps_tu->b1_cb_cbf);
+
+ pu1_final_ecd_data += i4_num_bytes;
+ total_bytes += i4_num_bytes;
+ }
+
+ for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
+ {
+ WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
+ WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
+ WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
+
+ if(0 == u1_is_422)
+ {
+ i4_subtu_pos_y >>= 1;
+ }
+
+ pu1_chroma_pred =
+ pu1_cur_pred_chrm + (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
+ pu1_chroma_recon = pu1_cur_chroma_recon +
+ (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
+ pi2_chroma_deq = pi2_cur_deq_data_chrm +
+ (i4_subtu_idx * chroma_trans_size * cu_size) +
+ chroma_trans_size;
+
+ u4_zero_col = ps_tu_enc_loop_temp_prms->au4_cr_zero_col[i4_subtu_idx];
+ u4_zero_row = ps_tu_enc_loop_temp_prms->au4_cr_zero_row[i4_subtu_idx];
+
+ if(ps_prms->u1_will_cabac_state_change)
+ {
+ i4_num_bytes =
+ ps_tu_enc_loop_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx];
+ }
+ else
+ {
+ i4_num_bytes = 0;
+ }
+
+ memcpy(pu1_final_ecd_data, pu1_old_ecd_data, i4_num_bytes);
+
+ pu1_old_ecd_data += i4_num_bytes;
+
+ au1_is_recon_available[V_PLANE] = 0;
+
+ if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
+ (!u1_compute_spatial_ssd_chroma ||
+ (!au1_is_recon_available[V_PLANE] && u1_compute_spatial_ssd_chroma)))
+ {
+ if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
+ (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
+ (UCHAR_MAX ==
+ ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx])))
+ {
+ ihevce_chroma_it_recon_fxn(
+ ps_ctxt,
+ pi2_chroma_deq,
+ cu_size,
+ pu1_chroma_pred,
+ pred_chrm_strd,
+ pu1_chroma_recon,
+ recon_chrma_strd,
+ pu1_final_ecd_data,
+ chroma_trans_size,
+ (i4_subtu_idx == 0) ? ps_tu->b1_cr_cbf : ps_tu->b1_cr_cbf_subtu1,
+ u4_zero_col,
+ u4_zero_row,
+ V_PLANE);
+ }
+ else if(
+ ps_recon_datastore->au1_is_chromaRecon_available[0] &&
+ (UCHAR_MAX !=
+ ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx]))
+ {
+ UWORD8 *pu1_recon_src =
+ ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
+ [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
+ [V_PLANE][ctr][i4_subtu_idx]]) +
+ i4_subtu_pos_x +
+ i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
+
+ ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
+ pu1_recon_src,
+ ps_recon_datastore->i4_lumaRecon_stride,
+ pu1_chroma_recon,
+ recon_chrma_strd,
+ chroma_trans_size,
+ chroma_trans_size,
+ V_PLANE);
+ }
+ }
+
+ u1_is_cu_coded |=
+ ((1 == i4_subtu_idx) ? ps_tu->b1_cr_cbf_subtu1 : ps_tu->b1_cr_cbf);
+
+ pu1_final_ecd_data += i4_num_bytes;
+ total_bytes += i4_num_bytes;
+ }
+ }
+ else
+ {
+ WORD32 cb_zero_col, cb_zero_row, cr_zero_col, cr_zero_row;
+
+ for(i4_subtu_idx = 0; i4_subtu_idx < u1_num_subtus; i4_subtu_idx++)
+ {
+ WORD32 cb_cbf, cr_cbf;
+ WORD32 cb_num_bytes, cr_num_bytes;
+
+ WORD32 chroma_trans_size = MAX(4, trans_size >> 1);
+
+ WORD32 i4_subtu_pos_x = cu_pos_x_in_pix;
+ WORD32 i4_subtu_pos_y = cu_pos_y_in_pix + (i4_subtu_idx * chroma_trans_size);
+
+ if(0 == u1_is_422)
+ {
+ i4_subtu_pos_y >>= 1;
+ }
+
+ pu1_cur_src_chrm += (i4_subtu_idx * chroma_trans_size * src_chrm_strd);
+ pu1_cur_pred_chrm += (i4_subtu_idx * chroma_trans_size * pred_chrm_strd);
+ pu1_cur_chroma_recon += (i4_subtu_idx * chroma_trans_size * recon_chrma_strd);
+ pi2_cur_deq_data_chrm += (i4_subtu_idx * chroma_trans_size * cu_size);
+
+ if((PRED_MODE_INTRA == packed_pred_mode) &&
+ (1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data))
+ {
+ WORD32 nbr_flags, left_strd_chrm, chrm_pred_func_idx;
+ UWORD8 *pu1_left_chrm;
+ UWORD8 *pu1_top_chrm;
+ UWORD8 *pu1_top_left_chrm;
+
+ nbr_flags = ihevce_get_intra_chroma_tu_nbr(
+ *pu4_nbr_flags, i4_subtu_idx, chroma_trans_size, u1_is_422);
+
+ /* left cu boundary */
+ if(0 == i4_subtu_pos_x)
+ {
+ left_strd_chrm = ps_chrm_cu_buf_prms->i4_cu_left_stride;
+ pu1_left_chrm =
+ ps_chrm_cu_buf_prms->pu1_cu_left + i4_subtu_pos_y * left_strd_chrm;
+ }
+ else
+ {
+ pu1_left_chrm = pu1_cur_chroma_recon - 2;
+ left_strd_chrm = recon_chrma_strd;
+ }
+
+ /* top cu boundary */
+ if(0 == i4_subtu_pos_y)
+ {
+ pu1_top_chrm = ps_chrm_cu_buf_prms->pu1_cu_top + i4_subtu_pos_x;
+ }
+ else
+ {
+ pu1_top_chrm = pu1_cur_chroma_recon - recon_chrma_strd;
+ }
+
+ /* by default top left is set to cu top left */
+ pu1_top_left_chrm = ps_chrm_cu_buf_prms->pu1_cu_top_left;
+
+ /* top left based on position */
+ if((0 != i4_subtu_pos_y) && (0 == i4_subtu_pos_x))
+ {
+ pu1_top_left_chrm = pu1_left_chrm - left_strd_chrm;
+ }
+ else if(0 != i4_subtu_pos_x)
+ {
+ pu1_top_left_chrm = pu1_top_chrm - 2;
+ }
+
+ /* call the chroma reference array substitution */
+ ihevc_intra_pred_chroma_ref_substitution_fptr(
+ pu1_top_left_chrm,
+ pu1_top_chrm,
+ pu1_left_chrm,
+ left_strd_chrm,
+ chroma_trans_size,
+ nbr_flags,
+ (UWORD8 *)ps_ctxt->pv_ref_sub_out,
+ 1);
+
+ /* use the look up to get the function idx */
+ chrm_pred_func_idx = g_i4_ip_funcs[chroma_pred_mode];
+
+ /* call the intra prediction function */
+ ps_ctxt->apf_chrm_ip[chrm_pred_func_idx](
+ (UWORD8 *)ps_ctxt->pv_ref_sub_out,
+ 1,
+ pu1_cur_pred_chrm,
+ pred_chrm_strd,
+ chroma_trans_size,
+ chroma_pred_mode);
+ }
+
+ /**---------- Compute iq&coeff data if required : Chroma ------------**/
+ if(1 == ps_tu_enc_loop_temp_prms->b1_eval_chroma_iq_and_coeff_data)
+ {
+ WORD32 perform_sbh, perform_rdoq, temp_bits;
+
+ if(ps_prms->u1_recompute_sbh_and_rdoq)
+ {
+ perform_sbh = (ps_ctxt->i4_sbh_level != NO_SBH);
+ perform_rdoq = (ps_ctxt->i4_rdoq_level != NO_RDOQ);
+ }
+ else
+ {
+ /* RDOQ will change the coefficients. If coefficients are changed, we will have to do sbh again*/
+ perform_sbh = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh;
+ /* To do SBH we need the quant and iquant data. This would mean we need to do quantization again, which would mean
+ we would have to do RDOQ again.*/
+ perform_rdoq = ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq;
+ }
+
+ /* populate the coeffs scan idx */
+ ps_ctxt->i4_scan_idx = SCAN_DIAG_UPRIGHT;
+
+ if(PRED_MODE_INTRA == packed_pred_mode)
+ {
+ /* for 4x4 transforms based on intra pred mode scan is choosen*/
+ if(4 == chroma_trans_size)
+ {
+ /* for modes from 22 upto 30 horizontal scan is used */
+ if((chroma_pred_mode > 21) && (chroma_pred_mode < 31))
+ {
+ ps_ctxt->i4_scan_idx = SCAN_HORZ;
+ }
+ /* for modes from 6 upto 14 horizontal scan is used */
+ else if((chroma_pred_mode > 5) && (chroma_pred_mode < 15))
+ {
+ ps_ctxt->i4_scan_idx = SCAN_VERT;
+ }
+ }
+ }
+
+#if DISABLE_RDOQ_INTRA
+ if(PRED_MODE_INTRA == packed_pred_mode)
+ {
+ perform_rdoq = 0;
+ }
+#endif
+
+ /* RDOPT copy States : TU init (best until prev TU) to current */
+ COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
+ &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0] +
+ IHEVC_CAB_COEFFX_PREFIX,
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
+ IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
+
+ ASSERT(rd_opt_best_idx == ps_ctxt->s_rdopt_entropy_ctxt.i4_curr_buf_idx);
+ /*If BEST candidate RDOQ is enabled, Eithe no coef level rdoq or CU level rdoq has to be enabled
+ so that all candidates and best candidate are quantized with same rounding factor */
+ if(1 == perform_rdoq)
+ {
+ ASSERT(ps_ctxt->i4_quant_rounding_level != TU_LEVEL_QUANT_ROUNDING);
+ }
+
+ if(!ps_best_cu_prms->u1_skip_flag ||
+ !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
+ {
+ /* Cb */
+ cb_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
+ ps_ctxt,
+ pu1_cur_pred_chrm,
+ pred_chrm_strd,
+ pu1_cur_src_chrm,
+ src_chrm_strd,
+ pi2_cur_deq_data_chrm,
+ cu_size,
+ pu1_chrm_recon,
+ recon_chrma_strd,
+ pu1_final_ecd_data,
+ pu1_csbf_buf,
+ csbf_strd,
+ chroma_trans_size,
+ ps_ctxt->i4_scan_idx,
+ (PRED_MODE_INTRA == packed_pred_mode),
+ &cb_num_bytes,
+ &temp_bits,
+ &cb_zero_col,
+ &cb_zero_row,
+ &au1_is_recon_available[U_PLANE],
+ perform_sbh,
+ perform_rdoq,
+ &i8_ssd,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ !ps_ctxt->u1_is_refPic
+ ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
+ : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
+ (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
+ 100.0,
+ ps_prms->u1_is_cu_noisy,
+#endif
+ ps_best_cu_prms->u1_skip_flag &&
+ ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt,
+ u1_compute_spatial_ssd_chroma ? SPATIAL_DOMAIN_SSD
+ : FREQUENCY_DOMAIN_SSD,
+ U_PLANE);
+ }
+ else
+ {
+ cb_cbf = 0;
+ temp_bits = 0;
+ cb_num_bytes = 0;
+ au1_is_recon_available[U_PLANE] = 0;
+ cb_zero_col = 0;
+ cb_zero_row = 0;
+ }
+
+ /* Accumulate chroma residual bits */
+ ps_best_cu_prms->u4_cu_chroma_res_bits += temp_bits;
+
+ /* RDOPT copy States : New updated after curr TU to TU init */
+ if(0 != cb_cbf)
+ {
+ COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
+ &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0] +
+ IHEVC_CAB_COEFFX_PREFIX,
+ IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
+ }
+ /* RDOPT copy States : Restoring back the Cb init state to Cr */
+ else
+ {
+ COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
+ &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0] +
+ IHEVC_CAB_COEFFX_PREFIX,
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
+ IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
+ }
+
+ if(!ps_best_cu_prms->u1_skip_flag ||
+ !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt)
+ {
+ /* Cr */
+ cr_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
+ ps_ctxt,
+ pu1_cur_pred_chrm,
+ pred_chrm_strd,
+ pu1_cur_src_chrm,
+ src_chrm_strd,
+ pi2_cur_deq_data_chrm + chroma_trans_size,
+ cu_size,
+ pu1_chrm_recon,
+ recon_chrma_strd,
+ pu1_final_ecd_data + cb_num_bytes,
+ pu1_csbf_buf,
+ csbf_strd,
+ chroma_trans_size,
+ ps_ctxt->i4_scan_idx,
+ (PRED_MODE_INTRA == packed_pred_mode),
+ &cr_num_bytes,
+ &temp_bits,
+ &cr_zero_col,
+ &cr_zero_row,
+ &au1_is_recon_available[V_PLANE],
+ perform_sbh,
+ perform_rdoq,
+ &i8_ssd,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ !ps_ctxt->u1_is_refPic
+ ? ALPHA_FOR_NOISE_TERM_IN_RDOPT
+ : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
+ (double)ALPHA_FOR_NOISE_TERM_IN_RDOPT) /
+ 100.0,
+ ps_prms->u1_is_cu_noisy,
+#endif
+ ps_best_cu_prms->u1_skip_flag &&
+ ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt,
+ u1_compute_spatial_ssd_chroma ? SPATIAL_DOMAIN_SSD
+ : FREQUENCY_DOMAIN_SSD,
+ V_PLANE);
+ }
+ else
+ {
+ cr_cbf = 0;
+ temp_bits = 0;
+ cr_num_bytes = 0;
+ au1_is_recon_available[V_PLANE] = 0;
+ cr_zero_col = 0;
+ cr_zero_row = 0;
+ }
+
+ /* Accumulate chroma residual bits */
+ ps_best_cu_prms->u4_cu_chroma_res_bits += temp_bits;
+
+ /* RDOPT copy States : New updated after curr TU to TU init */
+ if(0 != cr_cbf)
+ {
+ COPY_CABAC_STATES_FRM_CAB_COEFFX_PREFIX(
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0] + IHEVC_CAB_COEFFX_PREFIX,
+ &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0] +
+ IHEVC_CAB_COEFFX_PREFIX,
+ IHEVC_CAB_CTXT_END - IHEVC_CAB_COEFFX_PREFIX);
+ }
+
+ if(0 == i4_subtu_idx)
+ {
+ ps_tu->b1_cb_cbf = cb_cbf;
+ ps_tu->b1_cr_cbf = cr_cbf;
+ }
+ else
+ {
+ ps_tu->b1_cb_cbf_subtu1 = cb_cbf;
+ ps_tu->b1_cr_cbf_subtu1 = cr_cbf;
+ }
+ }
+ else
+ {
+ cb_zero_col = ps_tu_enc_loop_temp_prms->au4_cb_zero_col[i4_subtu_idx];
+ cb_zero_row = ps_tu_enc_loop_temp_prms->au4_cb_zero_row[i4_subtu_idx];
+ cr_zero_col = ps_tu_enc_loop_temp_prms->au4_cr_zero_col[i4_subtu_idx];
+ cr_zero_row = ps_tu_enc_loop_temp_prms->au4_cr_zero_row[i4_subtu_idx];
+
+ if(ps_prms->u1_will_cabac_state_change)
+ {
+ cb_num_bytes =
+ ps_tu_enc_loop_temp_prms->ai2_cb_bytes_consumed[i4_subtu_idx];
+ }
+ else
+ {
+ cb_num_bytes = 0;
+ }
+
+ if(ps_prms->u1_will_cabac_state_change)
+ {
+ cr_num_bytes =
+ ps_tu_enc_loop_temp_prms->ai2_cr_bytes_consumed[i4_subtu_idx];
+ }
+ else
+ {
+ cr_num_bytes = 0;
+ }
+
+ /* copy cb ecd data to final buffer */
+ memcpy(pu1_final_ecd_data, pu1_chrm_old_ecd_data, cb_num_bytes);
+
+ pu1_chrm_old_ecd_data += cb_num_bytes;
+
+ /* copy cb ecd data to final buffer */
+ memcpy(
+ (pu1_final_ecd_data + cb_num_bytes),
+ pu1_chrm_old_ecd_data,
+ cr_num_bytes);
+
+ pu1_chrm_old_ecd_data += cr_num_bytes;
+
+ au1_is_recon_available[U_PLANE] = 0;
+ au1_is_recon_available[V_PLANE] = 0;
+ }
+
+ /**-------- Compute Recon data (Do IT & Recon) : Chroma -----------**/
+ if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
+ (!u1_compute_spatial_ssd_chroma ||
+ (!au1_is_recon_available[U_PLANE] && u1_compute_spatial_ssd_chroma)))
+ {
+ if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
+ (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
+ (UCHAR_MAX ==
+ ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx])))
+ {
+ ihevce_chroma_it_recon_fxn(
+ ps_ctxt,
+ pi2_cur_deq_data_chrm,
+ cu_size,
+ pu1_cur_pred_chrm,
+ pred_chrm_strd,
+ pu1_cur_chroma_recon,
+ recon_chrma_strd,
+ pu1_final_ecd_data,
+ chroma_trans_size,
+ (i4_subtu_idx == 0) ? ps_tu->b1_cb_cbf : ps_tu->b1_cb_cbf_subtu1,
+ cb_zero_col,
+ cb_zero_row,
+ U_PLANE);
+ }
+ else if(
+ ps_recon_datastore->au1_is_chromaRecon_available[0] &&
+ (UCHAR_MAX !=
+ ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][i4_subtu_idx]))
+ {
+ UWORD8 *pu1_recon_src =
+ ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
+ [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
+ [U_PLANE][ctr][i4_subtu_idx]]) +
+ i4_subtu_pos_x +
+ i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
+
+ ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
+ pu1_recon_src,
+ ps_recon_datastore->i4_lumaRecon_stride,
+ pu1_cur_chroma_recon,
+ recon_chrma_strd,
+ chroma_trans_size,
+ chroma_trans_size,
+ U_PLANE);
+ }
+ }
+
+ u1_is_cu_coded |=
+ ((1 == i4_subtu_idx) ? ps_tu->b1_cb_cbf_subtu1 : ps_tu->b1_cb_cbf);
+
+ if(ps_prms->u1_will_cabac_state_change)
+ {
+ ps_tu_enc_loop->ai4_cb_coeff_offset[i4_subtu_idx] = total_bytes;
+ }
+
+ pu1_final_ecd_data += cb_num_bytes;
+ /* update total bytes consumed */
+ total_bytes += cb_num_bytes;
+
+ if(ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data &&
+ (!u1_compute_spatial_ssd_chroma ||
+ (!au1_is_recon_available[V_PLANE] && u1_compute_spatial_ssd_chroma)))
+ {
+ if(!ps_recon_datastore->au1_is_chromaRecon_available[0] ||
+ (ps_recon_datastore->au1_is_chromaRecon_available[0] &&
+ (UCHAR_MAX ==
+ ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx])))
+ {
+ ihevce_chroma_it_recon_fxn(
+ ps_ctxt,
+ pi2_cur_deq_data_chrm + chroma_trans_size,
+ cu_size,
+ pu1_cur_pred_chrm,
+ pred_chrm_strd,
+ pu1_cur_chroma_recon,
+ recon_chrma_strd,
+ pu1_final_ecd_data,
+ chroma_trans_size,
+ (i4_subtu_idx == 0) ? ps_tu->b1_cr_cbf : ps_tu->b1_cr_cbf_subtu1,
+ cr_zero_col,
+ cr_zero_row,
+ V_PLANE);
+ }
+ else if(
+ ps_recon_datastore->au1_is_chromaRecon_available[0] &&
+ (UCHAR_MAX !=
+ ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][i4_subtu_idx]))
+ {
+ UWORD8 *pu1_recon_src =
+ ((UWORD8 *)ps_recon_datastore->apv_chroma_recon_bufs
+ [ps_recon_datastore->au1_bufId_with_winning_ChromaRecon
+ [V_PLANE][ctr][i4_subtu_idx]]) +
+ i4_subtu_pos_x +
+ i4_subtu_pos_y * ps_recon_datastore->i4_chromaRecon_stride;
+
+ ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
+ pu1_recon_src,
+ ps_recon_datastore->i4_lumaRecon_stride,
+ pu1_cur_chroma_recon,
+ recon_chrma_strd,
+ chroma_trans_size,
+ chroma_trans_size,
+ V_PLANE);
+ }
+ }
+
+ u1_is_cu_coded |=
+ ((1 == i4_subtu_idx) ? ps_tu->b1_cr_cbf_subtu1 : ps_tu->b1_cr_cbf);
+
+ if(ps_prms->u1_will_cabac_state_change)
+ {
+ ps_tu_enc_loop->ai4_cr_coeff_offset[i4_subtu_idx] = total_bytes;
+ }
+
+ pu1_final_ecd_data += cr_num_bytes;
+ /* update total bytes consumed */
+ total_bytes += cr_num_bytes;
+ }
+ }
+ }
+ else
+ {
+ ps_tu_enc_loop->ai4_cb_coeff_offset[0] = total_bytes;
+ ps_tu_enc_loop->ai4_cr_coeff_offset[0] = total_bytes;
+ ps_tu_enc_loop->ai4_cb_coeff_offset[1] = total_bytes;
+ ps_tu_enc_loop->ai4_cr_coeff_offset[1] = total_bytes;
+ ps_tu->b1_cb_cbf = 0;
+ ps_tu->b1_cr_cbf = 0;
+ ps_tu->b1_cb_cbf_subtu1 = 0;
+ ps_tu->b1_cr_cbf_subtu1 = 0;
+ }
+
+ /* Update to next TU */
+ ps_tu_enc_loop++;
+ ps_tu_enc_loop_temp_prms++;
+
+ pu4_nbr_flags++;
+ pu1_intra_pred_mode++;
+
+ /*Do not set the nbr map for last pu in cu */
+ if((num_tu_in_cu - 1) != ctr)
+ {
+ /* set the neighbour map to 1 */
+ ihevce_set_nbr_map(
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ cu_pos_x_in_4x4,
+ cu_pos_y_in_4x4,
+ (trans_size >> 2),
+ 1);
+ }
+ }
+
+ if(ps_prms->u1_will_cabac_state_change)
+ {
+ ps_best_cu_prms->u1_is_cu_coded = u1_is_cu_coded;
+
+ /* Modify skip flag, if luma is skipped & Chroma is coded */
+ if((1 == u1_is_cu_coded) && (PRED_MODE_SKIP == packed_pred_mode))
+ {
+ ps_best_cu_prms->u1_skip_flag = 0;
+ }
+ }
+
+ /* during chroma evaluation if skip decision was over written */
+ /* then the current skip candidate is set to a non skip candidate */
+ if(PRED_MODE_INTRA != packed_pred_mode)
+ {
+ ps_best_inter_cand->b1_skip_flag = ps_best_cu_prms->u1_skip_flag;
+ }
+
+ /**------------- Compute header data if required --------------**/
+ if(1 == ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data)
+ {
+ WORD32 cbf_bits;
+ WORD32 cu_bits;
+ WORD32 unit_4x4_size = cu_size >> 2;
+
+ /*Restoring the running reference into the best rdopt_ctxt cabac states which will then
+ be copied as the base reference for the next cu
+ Assumption : We are ensuring that the u1_eval_header_data flag is set to 1 only if either
+ luma and chroma are being reevaluated*/
+ COPY_CABAC_STATES(
+ &ps_ctxt->s_rdopt_entropy_ctxt.as_cu_entropy_ctxt[rd_opt_best_idx]
+ .s_cabac_ctxt.au1_ctxt_models[0],
+ &ps_ctxt->au1_rdopt_init_ctxt_models[0],
+ IHEVC_CAB_CTXT_END);
+
+ /* get the neighbour availability flags for current cu */
+ ihevce_get_only_nbr_flag(
+ &s_nbr,
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ (cu_pos_x << 1),
+ (cu_pos_y << 1),
+ unit_4x4_size,
+ unit_4x4_size);
+
+ cu_bits = ihevce_entropy_rdo_encode_cu(
+ &ps_ctxt->s_rdopt_entropy_ctxt,
+ ps_best_cu_prms,
+ cu_pos_x,
+ cu_pos_y,
+ cu_size,
+ ps_ctxt->u1_disable_intra_eval ? !DISABLE_TOP_SYNC && s_nbr.u1_top_avail
+ : s_nbr.u1_top_avail,
+ s_nbr.u1_left_avail,
+ (pu1_final_ecd_data - total_bytes),
+ &cbf_bits);
+
+ /* cbf bits are excluded from header bits, instead considered as texture bits */
+ ps_best_cu_prms->u4_cu_hdr_bits = cu_bits - cbf_bits;
+ ps_best_cu_prms->u4_cu_cbf_bits = cbf_bits;
+ }
+
+ if(ps_prms->u1_will_cabac_state_change)
+ {
+ ps_best_cu_prms->i4_num_bytes_ecd_data = total_bytes;
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_set_eval_flags \endif
+*
+* \brief
+* Function which decides which eval flags have to be set based on present
+* and RDOQ conditions
+*
+* \param[in] ps_ctxt : encoder ctxt pointer
+* \param[in] enc_loop_cu_final_prms_t : pointer to final cu params
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_set_eval_flags(
+ ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_final_prms_t *ps_enc_loop_bestprms)
+{
+ WORD32 count = 0;
+
+ ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 0;
+
+ ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data =
+ !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
+
+ if(ps_ctxt->u1_disable_intra_eval && (!(ps_ctxt->i4_deblk_pad_hpel_cur_pic & 0x1)))
+ {
+ ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 0;
+ }
+ else
+ {
+ ps_ctxt->s_cu_final_recon_flags.u1_eval_recon_data = 1;
+ }
+
+ if((1 == ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_rdoq) ||
+ (1 == ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_best_cand_sbh))
+ {
+ /* When rdoq is enabled only for the best candidate, in case of in Intra nTU
+ RDOQ might have altered the coeffs of the neighbour CU. As a result, the pred
+ for the current CU will change. Therefore, we need to reevaluate the pred data*/
+ if((ps_enc_loop_bestprms->u2_num_tus_in_cu > 1) &&
+ (ps_enc_loop_bestprms->u1_intra_flag == 1))
+ {
+ ps_ctxt->s_cu_final_recon_flags.u1_eval_luma_pred_data = 1;
+ ps_ctxt->s_cu_final_recon_flags.u1_eval_chroma_pred_data = 1;
+ }
+ if(ps_enc_loop_bestprms->u1_skip_flag == 1)
+ {
+ for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
+ {
+ ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
+ .b1_eval_luma_iq_and_coeff_data = 0;
+ ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
+ .b1_eval_chroma_iq_and_coeff_data = 0;
+ }
+ }
+ else
+ {
+ for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
+ {
+ ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
+ .b1_eval_luma_iq_and_coeff_data = 1;
+ ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
+ .b1_eval_chroma_iq_and_coeff_data = 1;
+ }
+ }
+ }
+ else
+ {
+ switch(ps_ctxt->i4_quality_preset)
+ {
+ case IHEVCE_QUALITY_P0:
+ case IHEVCE_QUALITY_P2:
+ case IHEVCE_QUALITY_P3:
+ {
+ for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
+ {
+ ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
+ .b1_eval_luma_iq_and_coeff_data = 0;
+ ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
+ .b1_eval_chroma_iq_and_coeff_data =
+ !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
+ }
+
+ break;
+ }
+ case IHEVCE_QUALITY_P4:
+ case IHEVCE_QUALITY_P5:
+ {
+ for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
+ {
+ ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
+ .b1_eval_luma_iq_and_coeff_data = 0;
+ ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
+ .b1_eval_chroma_iq_and_coeff_data =
+ !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
+ }
+
+ break;
+ }
+ case IHEVCE_QUALITY_P6:
+ {
+ for(count = 0; count < ps_enc_loop_bestprms->u2_num_tus_in_cu; count++)
+ {
+ ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
+ .b1_eval_luma_iq_and_coeff_data = 0;
+#if !ENABLE_CHROMA_TRACKING_OF_LUMA_CBF_IN_XS25
+ ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
+ .b1_eval_chroma_iq_and_coeff_data =
+ !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
+#else
+ if((ps_ctxt->i1_slice_type == BSLICE) && (ps_ctxt->i4_temporal_layer_id > 1) &&
+ (ps_enc_loop_bestprms->as_tu_enc_loop[count].s_tu.b3_size >= 2))
+ {
+ ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
+ .b1_eval_chroma_iq_and_coeff_data =
+ ps_enc_loop_bestprms->as_tu_enc_loop[count].s_tu.b1_y_cbf;
+ }
+ else
+ {
+ ps_enc_loop_bestprms->as_tu_enc_loop_temp_prms[count]
+ .b1_eval_chroma_iq_and_coeff_data =
+ !ps_ctxt->s_chroma_rdopt_ctxt.u1_eval_chrm_rdopt;
+ }
+#endif
+ }
+
+ break;
+ }
+ default:
+ {
+ break;
+ }
+ }
+ }
+
+ /* Not recomputing Luma pred-data and header data for any preset now */
+ ps_ctxt->s_cu_final_recon_flags.u1_eval_header_data = 1;
+}
+
+/**
+******************************************************************************
+*
+* @brief Shrink's TU tree of inter CUs by merging redundnant child nodes
+* (not coded children) into a parent node(not coded).
+*
+* @par Description
+* This is required post RDO evaluation as TU decisions are
+* pre-determined(pre RDO) based on recursive SATD,
+* while the quad children TU's can be skipped during RDO
+*
+* The shrink process is applied iteratively till there are no
+* more modes to shrink
+*
+* @param[inout] ps_tu_enc_loop
+* pointer to tu enc loop params of inter cu
+*
+* @param[inout] ps_tu_enc_loop_temp_prms
+* pointer to temp tu enc loop params of inter cu
+*
+* @param[in] num_tu_in_cu
+* number of tus in cu
+*
+* @return modified number of tus in cu
+*
+******************************************************************************
+*/
+WORD32 ihevce_shrink_inter_tu_tree(
+ tu_enc_loop_out_t *ps_tu_enc_loop,
+ tu_enc_loop_temp_prms_t *ps_tu_enc_loop_temp_prms,
+ recon_datastore_t *ps_recon_datastore,
+ WORD32 num_tu_in_cu,
+ UWORD8 u1_is_422)
+{
+ WORD32 recurse = 1;
+ WORD32 ctr;
+
+ /* ------------- Quadtree TU Split Transform flag optimization ------------ */
+ /* Post RDO, if all 4 child nodes are not coded the overheads of split TU */
+ /* flags and cbf flags are saved by merging to parent node and marking */
+ /* parent TU as not coded */
+ /* */
+ /* ParentTUSplit=1 */
+ /* | */
+ /* --------------------------------------------------------- */
+ /* |C0(Not coded) | C1(Not coded) | C2(Not coded) | C3(Not coded) */
+ /* || */
+ /* \/ */
+ /* */
+ /* ParentTUSplit=0 (Not Coded) */
+ /* */
+ /* ------------- Quadtree TU Split Transform flag optimization ------------ */
+ while((num_tu_in_cu > 4) && recurse)
+ {
+ recurse = 0;
+
+ /* Validate inter CU */
+ //ASSERT(ps_tu_enc_loop[0].s_tu.s_tu.b1_intra_flag == 0); /*b1_intra_flag no longer a member of tu structure */
+
+ /* loop for all tu blocks in current cu */
+ for(ctr = 0; ctr < num_tu_in_cu;)
+ {
+ /* Get current tu posx, posy and size */
+ WORD32 curr_pos_x = ps_tu_enc_loop[ctr].s_tu.b4_pos_x << 2;
+ WORD32 curr_pos_y = ps_tu_enc_loop[ctr].s_tu.b4_pos_y << 2;
+ /* +1 is for parents size */
+ WORD32 parent_tu_size = 1 << (ps_tu_enc_loop[ctr].s_tu.b3_size + 2 + 1);
+
+ /* eval merge if leaf nodes reached i.e all child tus are of same size and first tu pos is same as parent pos */
+ WORD32 eval_merge = ((curr_pos_x & (parent_tu_size - 1)) == 0);
+ eval_merge &= ((curr_pos_y & (parent_tu_size - 1)) == 0);
+
+ /* As TUs are published in encode order (Z SCAN), */
+ /* Four consecutive TUS of same size implies we have hit leaf nodes. */
+ if(((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 1].s_tu.b3_size)) &&
+ ((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 2].s_tu.b3_size)) &&
+ ((ps_tu_enc_loop[ctr].s_tu.b3_size) == (ps_tu_enc_loop[ctr + 3].s_tu.b3_size)) &&
+ eval_merge)
+ {
+ WORD32 merge_parent = 1;
+
+ /* If any leaf noded is coded, it cannot be merged to parent */
+ if((ps_tu_enc_loop[ctr].s_tu.b1_y_cbf) || (ps_tu_enc_loop[ctr].s_tu.b1_cb_cbf) ||
+ (ps_tu_enc_loop[ctr].s_tu.b1_cr_cbf) ||
+
+ (ps_tu_enc_loop[ctr + 1].s_tu.b1_y_cbf) ||
+ (ps_tu_enc_loop[ctr + 1].s_tu.b1_cb_cbf) ||
+ (ps_tu_enc_loop[ctr + 1].s_tu.b1_cr_cbf) ||
+
+ (ps_tu_enc_loop[ctr + 2].s_tu.b1_y_cbf) ||
+ (ps_tu_enc_loop[ctr + 2].s_tu.b1_cb_cbf) ||
+ (ps_tu_enc_loop[ctr + 2].s_tu.b1_cr_cbf) ||
+
+ (ps_tu_enc_loop[ctr + 3].s_tu.b1_y_cbf) ||
+ (ps_tu_enc_loop[ctr + 3].s_tu.b1_cb_cbf) ||
+ (ps_tu_enc_loop[ctr + 3].s_tu.b1_cr_cbf))
+ {
+ merge_parent = 0;
+ }
+
+ if(u1_is_422)
+ {
+ if((ps_tu_enc_loop[ctr].s_tu.b1_cb_cbf_subtu1) ||
+ (ps_tu_enc_loop[ctr].s_tu.b1_cr_cbf_subtu1) ||
+
+ (ps_tu_enc_loop[ctr + 1].s_tu.b1_cb_cbf_subtu1) ||
+ (ps_tu_enc_loop[ctr + 1].s_tu.b1_cr_cbf_subtu1) ||
+
+ (ps_tu_enc_loop[ctr + 2].s_tu.b1_cb_cbf_subtu1) ||
+ (ps_tu_enc_loop[ctr + 2].s_tu.b1_cr_cbf_subtu1) ||
+
+ (ps_tu_enc_loop[ctr + 3].s_tu.b1_cb_cbf_subtu1) ||
+ (ps_tu_enc_loop[ctr + 3].s_tu.b1_cr_cbf_subtu1))
+ {
+ merge_parent = 0;
+ }
+ }
+
+ if(merge_parent)
+ {
+ /* Merge all the children (ctr,ctr+1,ctr+2,ctr+3) to parent (ctr) */
+
+ if(ps_recon_datastore->u1_is_lumaRecon_available)
+ {
+ ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr] = UCHAR_MAX;
+
+ memmove(
+ &ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr + 1],
+ &ps_recon_datastore->au1_bufId_with_winning_LumaRecon[ctr + 4],
+ (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
+ }
+
+ if(ps_recon_datastore->au1_is_chromaRecon_available[0])
+ {
+ ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][0] =
+ UCHAR_MAX;
+ ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][0] =
+ UCHAR_MAX;
+
+ memmove(
+ &ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 1][0],
+ &ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 4][0],
+ (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
+
+ memmove(
+ &ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 1][0],
+ &ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 4][0],
+ (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
+
+ if(u1_is_422)
+ {
+ ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr][1] =
+ UCHAR_MAX;
+ ps_recon_datastore->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr][1] =
+ UCHAR_MAX;
+
+ memmove(
+ &ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 1][1],
+ &ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[U_PLANE][ctr + 4][1],
+ (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
+
+ memmove(
+ &ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 1][1],
+ &ps_recon_datastore
+ ->au1_bufId_with_winning_ChromaRecon[V_PLANE][ctr + 4][1],
+ (num_tu_in_cu - ctr - 4) * sizeof(UWORD8));
+ }
+ }
+
+ /* Parent node size is one more than that of child */
+ ps_tu_enc_loop[ctr].s_tu.b3_size++;
+
+ ctr++;
+
+ /* move the subsequent TUs to next element */
+ ASSERT(num_tu_in_cu >= (ctr + 3));
+ memmove(
+ (void *)(ps_tu_enc_loop + ctr),
+ (void *)(ps_tu_enc_loop + ctr + 3),
+ (num_tu_in_cu - ctr - 3) * sizeof(tu_enc_loop_out_t));
+
+ /* Also memmove the temp TU params */
+ memmove(
+ (void *)(ps_tu_enc_loop_temp_prms + ctr),
+ (void *)(ps_tu_enc_loop_temp_prms + ctr + 3),
+ (num_tu_in_cu - ctr - 3) * sizeof(tu_enc_loop_temp_prms_t));
+
+ /* Number of TUs in CU are now less by 3 */
+ num_tu_in_cu -= 3;
+
+ /* Recurse again as new parent also be can be merged later */
+ recurse = 1;
+ }
+ else
+ {
+ /* Go to next set of leaf nodes */
+ ctr += 4;
+ }
+ }
+ else
+ {
+ ctr++;
+ }
+ }
+ }
+
+ /* return the modified num TUs*/
+ ASSERT(num_tu_in_cu > 0);
+ return (num_tu_in_cu);
+}
+
+UWORD8 ihevce_intra_mode_nxn_hash_updater(
+ UWORD8 *pu1_mode_array, UWORD8 *pu1_hash_table, UWORD8 u1_num_ipe_modes)
+{
+ WORD32 i;
+ WORD32 i4_mode;
+
+ for(i = 0; i < MAX_INTRA_CU_CANDIDATES; i++)
+ {
+ if(pu1_mode_array[i] < 35)
+ {
+ if(pu1_mode_array[i] != 0)
+ {
+ i4_mode = pu1_mode_array[i] - 1;
+
+ if(!pu1_hash_table[i4_mode])
+ {
+ pu1_hash_table[i4_mode] = 1;
+ pu1_mode_array[u1_num_ipe_modes] = i4_mode;
+ u1_num_ipe_modes++;
+ }
+ }
+
+ if(pu1_mode_array[i] != 34)
+ {
+ i4_mode = pu1_mode_array[i] + 1;
+
+ if((!pu1_hash_table[i4_mode]))
+ {
+ pu1_hash_table[i4_mode] = 1;
+ pu1_mode_array[u1_num_ipe_modes] = i4_mode;
+ u1_num_ipe_modes++;
+ }
+ }
+ }
+ }
+
+ if(!pu1_hash_table[INTRA_PLANAR])
+ {
+ pu1_hash_table[INTRA_PLANAR] = 1;
+ pu1_mode_array[u1_num_ipe_modes] = INTRA_PLANAR;
+ u1_num_ipe_modes++;
+ }
+
+ if(!pu1_hash_table[INTRA_DC])
+ {
+ pu1_hash_table[INTRA_DC] = 1;
+ pu1_mode_array[u1_num_ipe_modes] = INTRA_DC;
+ u1_num_ipe_modes++;
+ }
+
+ return u1_num_ipe_modes;
+}
+
+#if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
+WORD32 ihevce_determine_tu_tree_distribution(
+ cu_inter_cand_t *ps_cu_data,
+ me_func_selector_t *ps_func_selector,
+ WORD16 *pi2_scratch_mem,
+ UWORD8 *pu1_inp,
+ WORD32 i4_inp_stride,
+ WORD32 i4_lambda,
+ UWORD8 u1_lambda_q_shift,
+ UWORD8 u1_cu_size,
+ UWORD8 u1_max_tr_depth)
+{
+ err_prms_t s_err_prms;
+
+ PF_SAD_FXN_TU_REC pf_err_compute[4];
+
+ WORD32 i4_satd;
+
+ s_err_prms.pi4_sad_grid = &i4_satd;
+ s_err_prms.pi4_tu_split_flags = ps_cu_data->ai4_tu_split_flag;
+ s_err_prms.pu1_inp = pu1_inp;
+ s_err_prms.pu1_ref = ps_cu_data->pu1_pred_data;
+ s_err_prms.i4_inp_stride = i4_inp_stride;
+ s_err_prms.i4_ref_stride = ps_cu_data->i4_pred_data_stride;
+ s_err_prms.pu1_wkg_mem = (UWORD8 *)pi2_scratch_mem;
+
+ if(u1_cu_size == 64)
+ {
+ s_err_prms.u1_max_tr_depth = MIN(1, u1_max_tr_depth);
+ }
+ else
+ {
+ s_err_prms.u1_max_tr_depth = u1_max_tr_depth;
+ }
+
+ pf_err_compute[CU_64x64] = hme_evalsatd_pt_pu_64x64_tu_rec;
+ pf_err_compute[CU_32x32] = hme_evalsatd_pt_pu_32x32_tu_rec;
+ pf_err_compute[CU_16x16] = hme_evalsatd_pt_pu_16x16_tu_rec;
+ pf_err_compute[CU_8x8] = hme_evalsatd_pt_pu_8x8_tu_rec;
+
+ i4_satd = pf_err_compute[hme_get_range(u1_cu_size) - 4](
+ &s_err_prms, i4_lambda, u1_lambda_q_shift, 0, ps_func_selector);
+
+ if((0 == u1_max_tr_depth) && (ps_cu_data->b3_part_size != 0) && (u1_cu_size != 64))
+ {
+ ps_cu_data->ai4_tu_split_flag[0] = 1;
+ }
+
+ return i4_satd;
+}
+#endif
+
+void ihevce_populate_nbr_4x4_with_pu_data(
+ nbr_4x4_t *ps_nbr_4x4, pu_t *ps_pu, WORD32 i4_nbr_buf_stride)
+{
+ WORD32 i, j;
+
+ nbr_4x4_t *ps_tmp_4x4 = ps_nbr_4x4;
+
+ WORD32 ht = (ps_pu->b4_ht + 1);
+ WORD32 wd = (ps_pu->b4_wd + 1);
+
+ ps_nbr_4x4->b1_intra_flag = 0;
+ ps_nbr_4x4->b1_pred_l0_flag = !(ps_pu->b2_pred_mode & 1);
+ ps_nbr_4x4->b1_pred_l1_flag = (ps_pu->b2_pred_mode > PRED_L0);
+ ps_nbr_4x4->mv = ps_pu->mv;
+
+ for(i = 0; i < ht; i++)
+ {
+ for(j = 0; j < wd; j++)
+ {
+ ps_tmp_4x4[j] = *ps_nbr_4x4;
+ }
+
+ ps_tmp_4x4 += i4_nbr_buf_stride;
+ }
+}
+
+void ihevce_call_luma_inter_pred_rdopt_pass1(
+ ihevce_enc_loop_ctxt_t *ps_ctxt, cu_inter_cand_t *ps_inter_cand, WORD32 cu_size)
+{
+ pu_t *ps_pu;
+ UWORD8 *pu1_pred;
+ WORD32 pred_stride, ctr, num_cu_part, skip_or_merge_flag = 0;
+ WORD32 inter_pu_wd, inter_pu_ht;
+
+ pu1_pred = ps_inter_cand->pu1_pred_data_scr;
+ pred_stride = ps_inter_cand->i4_pred_data_stride;
+ num_cu_part = (SIZE_2Nx2N != ps_inter_cand->b3_part_size) + 1;
+
+ for(ctr = 0; ctr < num_cu_part; ctr++)
+ {
+ ps_pu = &ps_inter_cand->as_inter_pu[ctr];
+
+ /* IF AMP then each partitions can have diff wd ht */
+ inter_pu_wd = (ps_pu->b4_wd + 1) << 2;
+ inter_pu_ht = (ps_pu->b4_ht + 1) << 2;
+
+ skip_or_merge_flag = ps_inter_cand->b1_skip_flag | ps_pu->b1_merge_flag;
+ //if(0 == skip_or_merge_flag)
+ {
+ ihevce_luma_inter_pred_pu(&ps_ctxt->s_mc_ctxt, ps_pu, pu1_pred, pred_stride, 1);
+ }
+ if((2 == num_cu_part) && (0 == ctr))
+ {
+ /* 2Nx__ partion case */
+ if(inter_pu_wd == cu_size)
+ {
+ pu1_pred += (inter_pu_ht * pred_stride);
+ }
+
+ /* __x2N partion case */
+ if(inter_pu_ht == cu_size)
+ {
+ pu1_pred += inter_pu_wd;
+ }
+ }
+ }
+}
+
+LWORD64 ihevce_it_recon_ssd(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ UWORD8 *pu1_src,
+ WORD32 i4_src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 i4_pred_strd,
+ WORD16 *pi2_deq_data,
+ WORD32 i4_deq_data_strd,
+ UWORD8 *pu1_recon,
+ WORD32 i4_recon_stride,
+ UWORD8 *pu1_ecd_data,
+ UWORD8 u1_trans_size,
+ UWORD8 u1_pred_mode,
+ WORD32 i4_cbf,
+ WORD32 i4_zero_col,
+ WORD32 i4_zero_row,
+ CHROMA_PLANE_ID_T e_chroma_plane)
+{
+ if(NULL_PLANE == e_chroma_plane)
+ {
+ ihevce_it_recon_fxn(
+ ps_ctxt,
+ pi2_deq_data,
+ i4_deq_data_strd,
+ pu1_pred,
+ i4_pred_strd,
+ pu1_recon,
+ i4_recon_stride,
+ pu1_ecd_data,
+ u1_trans_size,
+ u1_pred_mode,
+ i4_cbf,
+ i4_zero_col,
+ i4_zero_row);
+
+ return ps_ctxt->s_cmn_opt_func.pf_ssd_calculator(
+ pu1_recon, pu1_src, i4_recon_stride, i4_src_strd, u1_trans_size, u1_trans_size);
+ }
+ else
+ {
+ ihevce_chroma_it_recon_fxn(
+ ps_ctxt,
+ pi2_deq_data,
+ i4_deq_data_strd,
+ pu1_pred,
+ i4_pred_strd,
+ pu1_recon,
+ i4_recon_stride,
+ pu1_ecd_data,
+ u1_trans_size,
+ i4_cbf,
+ i4_zero_col,
+ i4_zero_row,
+ e_chroma_plane);
+
+ return ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
+ pu1_recon + (e_chroma_plane == V_PLANE),
+ pu1_src + (e_chroma_plane == V_PLANE),
+ i4_recon_stride,
+ i4_src_strd,
+ u1_trans_size,
+ u1_trans_size);
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_t_q_iq_ssd_scan_fxn \endif
+*
+* \brief
+* Transform unit level (Chroma) enc_loop function
+*
+* \param[in] ps_ctxt enc_loop module ctxt pointer
+* \param[in] pu1_pred pointer to predicted data buffer
+* \param[in] pred_strd predicted buffer stride
+* \param[in] pu1_src pointer to source data buffer
+* \param[in] src_strd source buffer stride
+* \param[in] pi2_deq_data pointer to store iq data
+* \param[in] deq_data_strd iq data buffer stride
+* \param[out] pu1_ecd_data pointer coeff output buffer (input to ent cod)
+* \param[out] pu1_csbf_buf pointer to store the csbf for all 4x4 in a current
+* block
+* \param[out] csbf_strd csbf buffer stride
+* \param[in] trans_size transform size (4, 8, 16)
+* \param[in] intra_flag 0:Inter/Skip 1:Intra
+* \param[out] pi4_coeff_off pointer to store the number of bytes produced in
+* coeff buffer
+the current TU in RDopt Mode
+* \param[out] pi4_zero_col pointer to store the zero_col info for the TU
+* \param[out] pi4_zero_row pointer to store the zero_row info for the TU
+*
+* \return
+* CBF of the current block
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32 ihevce_chroma_t_q_iq_ssd_scan_fxn(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD16 *pi2_deq_data,
+ WORD32 deq_data_strd,
+ UWORD8 *pu1_recon,
+ WORD32 i4_recon_stride,
+ UWORD8 *pu1_ecd_data,
+ UWORD8 *pu1_csbf_buf,
+ WORD32 csbf_strd,
+ WORD32 trans_size,
+ WORD32 i4_scan_idx,
+ WORD32 intra_flag,
+ WORD32 *pi4_coeff_off,
+ WORD32 *pi4_tu_bits,
+ WORD32 *pi4_zero_col,
+ WORD32 *pi4_zero_row,
+ UWORD8 *pu1_is_recon_available,
+ WORD32 i4_perform_sbh,
+ WORD32 i4_perform_rdoq,
+ LWORD64 *pi8_cost,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ WORD32 i4_alpha_stim_multiplier,
+ UWORD8 u1_is_cu_noisy,
+#endif
+ UWORD8 u1_is_skip,
+ SSD_TYPE_T e_ssd_type,
+ CHROMA_PLANE_ID_T e_chroma_plane)
+{
+ WORD32 trans_idx, cbf, u4_blk_sad;
+ WORD16 *pi2_quant_coeffs;
+ WORD16 *pi2_trans_values;
+ WORD32 quant_scale_mat_offset;
+ WORD32 *pi4_trans_scratch;
+ WORD32 *pi4_subBlock2csbfId_map = NULL;
+
+#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
+ WORD32 ai4_quant_rounding_factors[3][MAX_TU_SIZE * MAX_TU_SIZE], i;
+#endif
+
+ rdoq_sbh_ctxt_t *ps_rdoq_sbh_ctxt = &ps_ctxt->s_rdoq_sbh_ctxt;
+
+ WORD32 i4_perform_zcbf = (ps_ctxt->i4_zcbf_rdo_level == ZCBF_ENABLE) ||
+ (!intra_flag && ENABLE_INTER_ZCU_COST);
+ WORD32 i4_perform_coeff_level_rdoq =
+ (ps_ctxt->i4_quant_rounding_level != FIXED_QUANT_ROUNDING) &&
+ (ps_ctxt->i4_chroma_quant_rounding_level == CHROMA_QUANT_ROUNDING);
+
+ ASSERT((e_chroma_plane == U_PLANE) || (e_chroma_plane == V_PLANE));
+ ASSERT(csbf_strd == MAX_TU_IN_CTB_ROW);
+
+ *pi4_coeff_off = 0;
+ *pi4_tu_bits = 0;
+ pu1_is_recon_available[0] = 0;
+
+ pi4_trans_scratch = (WORD32 *)&ps_ctxt->ai2_scratch[0];
+ pi2_quant_coeffs = &ps_ctxt->ai2_scratch[0];
+ pi2_trans_values = &ps_ctxt->ai2_scratch[0] + (MAX_TRANS_SIZE * 2);
+
+ if(2 == trans_size)
+ {
+ trans_size = 4;
+ }
+
+ /* translate the transform size to index */
+ trans_idx = trans_size >> 2;
+
+ if(16 == trans_size)
+ {
+ trans_idx = 3;
+ }
+
+ if(u1_is_skip)
+ {
+ pi8_cost[0] = ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
+ pu1_pred + e_chroma_plane,
+ pu1_src + e_chroma_plane,
+ pred_strd,
+ src_strd,
+ trans_size,
+ trans_size);
+
+ if(e_ssd_type == SPATIAL_DOMAIN_SSD)
+ {
+ /* buffer copy fromp pred to recon */
+ ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
+ pu1_pred,
+ pred_strd,
+ pu1_recon,
+ i4_recon_stride,
+ trans_size,
+ trans_size,
+ e_chroma_plane);
+
+ pu1_is_recon_available[0] = 1;
+ }
+
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
+ {
+ pi8_cost[0] = ihevce_inject_stim_into_distortion(
+ pu1_src,
+ src_strd,
+ pu1_pred,
+ pred_strd,
+ pi8_cost[0],
+ i4_alpha_stim_multiplier,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ e_chroma_plane);
+ }
+#endif
+
+#if ENABLE_INTER_ZCU_COST
+#if !WEIGH_CHROMA_COST
+ /* cbf = 0, accumulate cu not coded cost */
+ ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
+#else
+ ps_ctxt->i8_cu_not_coded_cost += (pi8_cost[0] * ps_ctxt->u4_chroma_cost_weighing_factor +
+ (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
+ CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT;
+#endif
+#endif
+
+ return 0;
+ }
+
+ if(intra_flag == 1)
+ {
+ quant_scale_mat_offset = 0;
+
+#if PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
+ ai4_quant_rounding_factors[0][0] =
+ MAX(ps_ctxt->i4_quant_rnd_factor[intra_flag], (1 << QUANT_ROUND_FACTOR_Q) / 3);
+
+ for(i = 0; i < trans_size * trans_size; i++)
+ {
+ ai4_quant_rounding_factors[1][i] =
+ MAX(ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3][i],
+ (1 << QUANT_ROUND_FACTOR_Q) / 3);
+ ai4_quant_rounding_factors[2][i] =
+ MAX(ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3][i],
+ (1 << QUANT_ROUND_FACTOR_Q) / 3);
+ }
+#endif
+ }
+ else
+ {
+ quant_scale_mat_offset = NUM_TRANS_TYPES;
+ }
+
+ switch(trans_size)
+ {
+ case 4:
+ {
+ pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map4x4TU;
+
+ break;
+ }
+ case 8:
+ {
+ pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map8x8TU;
+
+ break;
+ }
+ case 16:
+ {
+ pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map16x16TU;
+
+ break;
+ }
+ case 32:
+ {
+ pi4_subBlock2csbfId_map = gai4_subBlock2csbfId_map32x32TU;
+
+ break;
+ }
+ }
+
+ /* ---------- call residue and transform block ------- */
+ u4_blk_sad = ps_ctxt->apf_chrm_resd_trns[trans_idx - 1](
+ pu1_src + (e_chroma_plane == V_PLANE),
+ pu1_pred + (e_chroma_plane == V_PLANE),
+ pi4_trans_scratch,
+ pi2_trans_values,
+ src_strd,
+ pred_strd,
+ ((trans_size << 16) + 1)); /* dst strd and chroma flag are packed together */
+ (void)u4_blk_sad;
+ /* -------- calculate SSD calculation in Transform Domain ------ */
+
+ cbf = ps_ctxt->apf_quant_iquant_ssd
+ [i4_perform_coeff_level_rdoq + (e_ssd_type != FREQUENCY_DOMAIN_SSD) * 2]
+
+ (pi2_trans_values,
+ ps_ctxt->api2_rescal_mat[trans_idx + quant_scale_mat_offset],
+ pi2_quant_coeffs,
+ pi2_deq_data,
+ trans_size,
+ ps_ctxt->i4_chrm_cu_qp_div6,
+ ps_ctxt->i4_chrm_cu_qp_mod6,
+#if !PROHIBIT_INTRA_QUANT_ROUNDING_FACTOR_TO_DROP_BELOW_1BY3
+ ps_ctxt->i4_quant_rnd_factor[intra_flag],
+ ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3],
+ ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3],
+#else
+ intra_flag ? ai4_quant_rounding_factors[0][0] : ps_ctxt->i4_quant_rnd_factor[intra_flag],
+ intra_flag ? ai4_quant_rounding_factors[1]
+ : ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_0_1[trans_size >> 3],
+ intra_flag ? ai4_quant_rounding_factors[2]
+ : ps_ctxt->pi4_quant_round_factor_cr_cu_ctb_1_2[trans_size >> 3],
+#endif
+ trans_size,
+ trans_size,
+ deq_data_strd,
+ pu1_csbf_buf,
+ csbf_strd,
+ pi4_zero_col,
+ pi4_zero_row,
+ ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset],
+ pi8_cost);
+
+ if(e_ssd_type != FREQUENCY_DOMAIN_SSD)
+ {
+ pi8_cost[0] = UINT_MAX;
+ }
+
+ if(0 != cbf)
+ {
+ if(i4_perform_sbh || i4_perform_rdoq)
+ {
+ ps_rdoq_sbh_ctxt->i4_iq_data_strd = deq_data_strd;
+ ps_rdoq_sbh_ctxt->i4_q_data_strd = trans_size;
+
+ ps_rdoq_sbh_ctxt->i4_qp_div = ps_ctxt->i4_chrm_cu_qp_div6;
+ ps_rdoq_sbh_ctxt->i2_qp_rem = ps_ctxt->i4_chrm_cu_qp_mod6;
+ ps_rdoq_sbh_ctxt->i4_scan_idx = i4_scan_idx;
+ ps_rdoq_sbh_ctxt->i8_ssd_cost = *pi8_cost;
+ ps_rdoq_sbh_ctxt->i4_trans_size = trans_size;
+
+ ps_rdoq_sbh_ctxt->pi2_dequant_coeff =
+ ps_ctxt->api2_scal_mat[trans_idx + quant_scale_mat_offset];
+ ps_rdoq_sbh_ctxt->pi2_iquant_coeffs = pi2_deq_data;
+ ps_rdoq_sbh_ctxt->pi2_quant_coeffs = pi2_quant_coeffs;
+ ps_rdoq_sbh_ctxt->pi2_trans_values = pi2_trans_values;
+ ps_rdoq_sbh_ctxt->pu1_csbf_buf = pu1_csbf_buf;
+ ps_rdoq_sbh_ctxt->pi4_subBlock2csbfId_map = pi4_subBlock2csbfId_map;
+
+ if((!i4_perform_rdoq))
+ {
+ ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
+
+ pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
+ }
+ }
+
+ /* ------- call coeffs scan function ------- */
+ *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
+ pi2_quant_coeffs,
+ pi4_subBlock2csbfId_map,
+ i4_scan_idx,
+ trans_size,
+ pu1_ecd_data,
+ pu1_csbf_buf,
+ csbf_strd);
+ }
+
+ /* Normalize Cost. Note : trans_idx, not (trans_idx-1) */
+ pi8_cost[0] >>= ga_trans_shift[trans_idx];
+
+#if RDOPT_ZERO_CBF_ENABLE
+ if((0 != cbf))
+ {
+ WORD32 tu_bits;
+ LWORD64 zero_cbf_cost_u, curr_cb_cod_cost;
+
+ zero_cbf_cost_u = 0;
+
+ /*Populating the feilds of rdoq_ctxt structure*/
+ if(i4_perform_rdoq)
+ {
+ //memset(ps_rdoq_sbh_ctxt,0,sizeof(rdoq_sbh_ctxt_t));
+ /* transform size to log2transform size */
+ GETRANGE(ps_rdoq_sbh_ctxt->i4_log2_trans_size, trans_size);
+ ps_rdoq_sbh_ctxt->i4_log2_trans_size -= 1;
+
+ ps_rdoq_sbh_ctxt->i8_cl_ssd_lambda_qf = ps_ctxt->i8_cl_ssd_lambda_chroma_qf;
+ ps_rdoq_sbh_ctxt->i4_is_luma = 0;
+ ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td = ga_trans_shift[trans_idx];
+ ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td =
+ (1 << (ps_rdoq_sbh_ctxt->i4_shift_val_ssd_in_td - 1));
+ ps_rdoq_sbh_ctxt->i1_tu_is_coded = 0;
+ ps_rdoq_sbh_ctxt->pi4_zero_col = pi4_zero_col;
+ ps_rdoq_sbh_ctxt->pi4_zero_row = pi4_zero_row;
+ }
+ else if(i4_perform_zcbf)
+ {
+ /* cost of zero cbf encoding */
+ zero_cbf_cost_u =
+
+ ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_ssd_calculator(
+ pu1_pred + (e_chroma_plane == V_PLANE),
+ pu1_src + (e_chroma_plane == V_PLANE),
+ pred_strd,
+ src_strd,
+ trans_size,
+ trans_size);
+ }
+
+ /************************************************************************/
+ /* call the entropy rdo encode to get the bit estimate for current tu */
+ /* note that tu includes only residual coding bits and does not include */
+ /* tu split, cbf and qp delta encoding bits for a TU */
+ /************************************************************************/
+ if(i4_perform_rdoq)
+ {
+ tu_bits = ihevce_entropy_rdo_encode_tu_rdoq(
+ &ps_ctxt->s_rdopt_entropy_ctxt,
+ pu1_ecd_data,
+ trans_size,
+ 0,
+ ps_rdoq_sbh_ctxt,
+ pi8_cost,
+ &zero_cbf_cost_u,
+ 0);
+ //Currently, we are not accounting for sign bit in RDOPT bits calculation when RDOQ is turned on
+
+ if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 0)
+ {
+ cbf = 0;
+
+ /* num bytes is set to 0 */
+ *pi4_coeff_off = 0;
+ }
+
+ (*pi4_tu_bits) += tu_bits;
+
+ if((i4_perform_sbh) && (0 != cbf))
+ {
+ ps_rdoq_sbh_ctxt->i8_ssd_cost = pi8_cost[0];
+
+ ihevce_sign_data_hiding(ps_rdoq_sbh_ctxt);
+
+ pi8_cost[0] = ps_rdoq_sbh_ctxt->i8_ssd_cost;
+ }
+
+ /*Add round value before normalizing*/
+ pi8_cost[0] += ps_rdoq_sbh_ctxt->i4_round_val_ssd_in_td;
+ pi8_cost[0] >>= ga_trans_shift[trans_idx];
+
+ if(ps_rdoq_sbh_ctxt->i1_tu_is_coded == 1)
+ {
+ *pi4_coeff_off = ps_ctxt->s_cmn_opt_func.pf_scan_coeffs(
+ pi2_quant_coeffs,
+ pi4_subBlock2csbfId_map,
+ i4_scan_idx,
+ trans_size,
+ pu1_ecd_data,
+ ps_rdoq_sbh_ctxt->pu1_csbf_buf,
+ csbf_strd);
+ }
+ }
+ else
+ {
+ /************************************************************************/
+ /* call the entropy rdo encode to get the bit estimate for current tu */
+ /* note that tu includes only residual coding bits and does not include */
+ /* tu split, cbf and qp delta encoding bits for a TU */
+ /************************************************************************/
+ tu_bits = ihevce_entropy_rdo_encode_tu(
+ &ps_ctxt->s_rdopt_entropy_ctxt, pu1_ecd_data, trans_size, 0, i4_perform_sbh);
+
+ (*pi4_tu_bits) += tu_bits;
+ }
+
+ if(e_ssd_type == SPATIAL_DOMAIN_SSD)
+ {
+ pi8_cost[0] = ihevce_it_recon_ssd(
+ ps_ctxt,
+ pu1_src,
+ src_strd,
+ pu1_pred,
+ pred_strd,
+ pi2_deq_data,
+ deq_data_strd,
+ pu1_recon,
+ i4_recon_stride,
+ pu1_ecd_data,
+ trans_size,
+ PRED_MODE_INTRA,
+ cbf,
+ pi4_zero_col[0],
+ pi4_zero_row[0],
+ e_chroma_plane);
+
+ pu1_is_recon_available[0] = 1;
+ }
+
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
+ {
+ pi8_cost[0] = ihevce_inject_stim_into_distortion(
+ pu1_src,
+ src_strd,
+ pu1_recon,
+ i4_recon_stride,
+ pi8_cost[0],
+ i4_alpha_stim_multiplier,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ e_chroma_plane);
+ }
+ else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
+ {
+ pi8_cost[0] = ihevce_inject_stim_into_distortion(
+ pu1_src,
+ src_strd,
+ pu1_pred,
+ pred_strd,
+ pi8_cost[0],
+ i4_alpha_stim_multiplier,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ e_chroma_plane);
+ }
+#endif
+
+ curr_cb_cod_cost = pi8_cost[0];
+
+ /* add the SSD cost to bits estimate given by ECD */
+ curr_cb_cod_cost +=
+ COMPUTE_RATE_COST_CLIP30(tu_bits, ps_ctxt->i8_cl_ssd_lambda_chroma_qf, LAMBDA_Q_SHIFT);
+
+ if(i4_perform_zcbf)
+ {
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ if(u1_is_cu_noisy && i4_alpha_stim_multiplier)
+ {
+ zero_cbf_cost_u = ihevce_inject_stim_into_distortion(
+ pu1_src,
+ src_strd,
+ pu1_pred,
+ pred_strd,
+ zero_cbf_cost_u,
+ !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
+ : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
+ (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
+ 100.0,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ e_chroma_plane);
+ }
+#endif
+ /* force the tu as zero cbf if zero_cbf_cost is lower */
+ if(zero_cbf_cost_u < curr_cb_cod_cost)
+ {
+ *pi4_coeff_off = 0;
+ cbf = 0;
+ (*pi4_tu_bits) = 0;
+ pi8_cost[0] = zero_cbf_cost_u;
+
+ pu1_is_recon_available[0] = 0;
+
+ if(e_ssd_type == SPATIAL_DOMAIN_SSD)
+ {
+ ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy(
+ pu1_pred,
+ pred_strd,
+ pu1_recon,
+ i4_recon_stride,
+ trans_size,
+ trans_size,
+ e_chroma_plane);
+
+ pu1_is_recon_available[0] = 1;
+ }
+ }
+
+#if ENABLE_INTER_ZCU_COST
+ if(!intra_flag)
+ {
+#if !WEIGH_CHROMA_COST
+ ps_ctxt->i8_cu_not_coded_cost += zero_cbf_cost_u;
+#else
+ ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
+ (zero_cbf_cost_u * ps_ctxt->u4_chroma_cost_weighing_factor +
+ (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
+ CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
+#endif
+ }
+#endif
+ }
+ }
+ else
+ {
+ if(e_ssd_type == SPATIAL_DOMAIN_SSD)
+ {
+ pi8_cost[0] = ihevce_it_recon_ssd(
+ ps_ctxt,
+ pu1_src,
+ src_strd,
+ pu1_pred,
+ pred_strd,
+ pi2_deq_data,
+ deq_data_strd,
+ pu1_recon,
+ i4_recon_stride,
+ pu1_ecd_data,
+ trans_size,
+ PRED_MODE_INTRA,
+ cbf,
+ pi4_zero_col[0],
+ pi4_zero_row[0],
+ e_chroma_plane);
+
+ pu1_is_recon_available[0] = 1;
+ }
+
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ if(u1_is_cu_noisy && (e_ssd_type == SPATIAL_DOMAIN_SSD) && i4_alpha_stim_multiplier)
+ {
+ pi8_cost[0] = ihevce_inject_stim_into_distortion(
+ pu1_src,
+ src_strd,
+ pu1_recon,
+ i4_recon_stride,
+ pi8_cost[0],
+ !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
+ : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
+ (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
+ 100.0,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ e_chroma_plane);
+ }
+ else if(u1_is_cu_noisy && (e_ssd_type == FREQUENCY_DOMAIN_SSD) && i4_alpha_stim_multiplier)
+ {
+ pi8_cost[0] = ihevce_inject_stim_into_distortion(
+ pu1_src,
+ src_strd,
+ pu1_pred,
+ pred_strd,
+ pi8_cost[0],
+ !ps_ctxt->u1_is_refPic ? ALPHA_FOR_ZERO_CODING_DECISIONS
+ : ((100 - ALPHA_DISCOUNT_IN_REF_PICS_IN_RDOPT) *
+ (double)ALPHA_FOR_ZERO_CODING_DECISIONS) /
+ 100.0,
+ trans_size,
+ 0,
+ ps_ctxt->u1_enable_psyRDOPT,
+ e_chroma_plane);
+ }
+#endif
+
+#if ENABLE_INTER_ZCU_COST
+ if(!intra_flag)
+ {
+#if !WEIGH_CHROMA_COST
+ /* cbf = 0, accumulate cu not coded cost */
+ ps_ctxt->i8_cu_not_coded_cost += pi8_cost[0];
+#else
+ /* cbf = 0, accumulate cu not coded cost */
+
+ ps_ctxt->i8_cu_not_coded_cost += (LWORD64)(
+ (pi8_cost[0] * ps_ctxt->u4_chroma_cost_weighing_factor +
+ (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
+ CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT);
+#endif
+ }
+#endif
+ }
+#endif /* RDOPT_ZERO_CBF_ENABLE */
+
+ return (cbf);
+}
diff --git a/encoder/ihevce_enc_loop_utils.h b/encoder/ihevce_enc_loop_utils.h
new file mode 100644
index 0000000..6c874bd
--- /dev/null
+++ b/encoder/ihevce_enc_loop_utils.h
@@ -0,0 +1,425 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_enc_loop_utils.h
+*
+* \brief
+* This file contains interface defination of frame proceswsing pass
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_ENC_LOOP_UTILS_H_
+#define _IHEVCE_ENC_LOOP_UTILS_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define INTRA_ENC_DBG_L0 1 // Frame Level
+#define INTRA_ENC_DBG_L1 1 // CTB Row Level
+#define INTRA_ENC_DBG_L2 0 // CTB/CU Level
+#define INTRA_ENC_DBG_L3 0 // PU/TU Level
+#define INTRA_ENC_DBG_L4 0 // Pixel Level
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+#define CABAC_FRAC_BITS_Q_SHIFT (1 << CABAC_FRAC_BITS_Q)
+#define LAMDA_Q_SHIFT_FACT 20
+
+#define QUANT_ROUND_FACTOR(out, r1, r0, lambda) \
+ { \
+ LWORD64 temp3_m; \
+ LWORD64 temp; \
+ temp3_m = (((r1 - r0) * lambda)); \
+ temp = (CLIP3( \
+ ((CABAC_FRAC_BITS_Q_SHIFT - \
+ ((((LWORD64)(temp3_m) + ((LWORD64)CABAC_FRAC_BITS_Q_SHIFT << LAMDA_Q_SHIFT_FACT)) / \
+ 2) >> \
+ LAMDA_Q_SHIFT_FACT))), \
+ 0, \
+ (CABAC_FRAC_BITS_Q_SHIFT >> 1))); \
+ out = ((WORD32)(temp * (1 << QUANT_ROUND_FACTOR_Q))) >> CABAC_FRAC_BITS_Q; \
+ }
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+void ihevce_get_cl_cu_lambda_prms(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 i4_cur_cu_qp);
+
+void ihevce_populate_cl_cu_lambda_prms(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ frm_lambda_ctxt_t *ps_frm_lamda,
+ WORD32 i4_slice_type,
+ WORD32 i4_temporal_lyr_id,
+ WORD32 i4_lambda_type);
+
+void ihevce_compute_quant_rel_param(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD8 i1_cu_qp);
+
+void ihevce_compute_cu_level_QP(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ WORD32 i4_activity_for_qp,
+ WORD32 i4_activity_for_lamda,
+ WORD32 i4_reduce_qp);
+
+WORD32 ihevce_scan_coeffs(
+ WORD16 *pi2_quant_coeffs,
+ WORD32 *pi4_subBlock2csbfId_map,
+ WORD32 scan_idx,
+ WORD32 trans_size,
+ UWORD8 *pu1_out_data,
+ UWORD8 *pu1_csbf_buf,
+ WORD32 i4_csbf_stride);
+
+void ihevce_populate_intra_pred_mode(
+ WORD32 top_intra_mode,
+ WORD32 left_intra_mode,
+ WORD32 available_top,
+ WORD32 available_left,
+ WORD32 cu_pos_y,
+ WORD32 *ps_cand_mode_list);
+
+void ihevce_intra_pred_mode_signaling(
+ WORD32 top_intra_mode,
+ WORD32 left_intra_mode,
+ WORD32 available_top,
+ WORD32 available_left,
+ WORD32 cu_pos_y,
+ WORD32 luma_intra_pred_mode_current,
+ intra_prev_rem_flags_t *ps_intra_pred_mode_current);
+void ihevce_chroma_interleave_2d_copy(
+ UWORD8 *pu1_uv_src_bp,
+ WORD32 src_strd,
+ UWORD8 *pu1_uv_dst_bp,
+ WORD32 dst_strd,
+ WORD32 w,
+ WORD32 h,
+ CHROMA_PLANE_ID_T e_chroma_plane);
+
+WORD32 ihevce_t_q_iq_ssd_scan_fxn(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD16 *pi2_deq_data,
+ WORD32 deq_data_strd,
+ UWORD8 *pu1_recon,
+ WORD32 i4_recon_stride,
+ UWORD8 *pu1_ecd_data,
+ UWORD8 *pu1_csbf_buf,
+ WORD32 csbf_strd,
+ WORD32 trans_size,
+ WORD32 packed_pred_mode,
+ LWORD64 *pi8_cost,
+ WORD32 *pi4_coeff_off,
+ WORD32 *pi4_tu_bits,
+ UWORD32 *pu4_blk_sad,
+ WORD32 *pi4_zero_col,
+ WORD32 *pi4_zero_row,
+ UWORD8 *pu1_is_recon_available,
+ WORD32 i4_perform_rdoq,
+ WORD32 i4_perform_sbh,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ WORD32 i4_alpha_stim_multiplier,
+ UWORD8 u1_is_cu_noisy,
+#endif
+ SSD_TYPE_T e_ssd_type,
+ WORD32 early_cbf);
+
+void ihevce_quant_rounding_factor_gen(
+ WORD32 i4_trans_size,
+ WORD32 is_luma,
+ rdopt_entropy_ctxt_t *ps_rdopt_entropy_ctxt,
+ WORD32 *pi4_quant_round_0_1,
+ WORD32 *pi4_quant_round_1_2,
+ double i4_lamda_modifier,
+ UWORD8 i4_is_tu_level_quant_rounding);
+
+void ihevce_it_recon_fxn(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ WORD16 *pi2_deq_data,
+ WORD32 deq_dat_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ UWORD8 *pu1_recon,
+ WORD32 recon_strd,
+ UWORD8 *pu1_ecd_data,
+ WORD32 trans_size,
+ WORD32 packed_pred_mode,
+ WORD32 cbf,
+ WORD32 zero_cols,
+ WORD32 zero_rows);
+
+void ihevce_chroma_it_recon_fxn(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ WORD16 *pi2_deq_data,
+ WORD32 deq_dat_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ UWORD8 *pu1_recon,
+ WORD32 recon_strd,
+ UWORD8 *pu1_ecd_data,
+ WORD32 trans_size,
+ WORD32 cbf,
+ WORD32 zero_cols,
+ WORD32 zero_rows,
+ CHROMA_PLANE_ID_T e_chroma_plane);
+
+void ihevce_mpm_idx_based_filter_RDOPT_cand(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ cu_analyse_t *ps_cu_analyse,
+ nbr_4x4_t *ps_left_nbr_4x4,
+ nbr_4x4_t *ps_top_nbr_4x4,
+ UWORD8 *pu1_luma_mode,
+ UWORD8 *pu1_eval_mark);
+
+LWORD64 ihevce_intra_rdopt_cu_ntu(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ enc_loop_cu_prms_t *ps_cu_prms,
+ void *pv_pred_org,
+ WORD32 pred_strd_org,
+ enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
+ UWORD8 *pu1_luma_mode,
+ cu_analyse_t *ps_cu_analyse,
+ void *pv_curr_src,
+ void *pv_cu_left,
+ void *pv_cu_top,
+ void *pv_cu_top_left,
+ nbr_4x4_t *ps_left_nbr_4x4,
+ nbr_4x4_t *ps_top_nbr_4x4,
+ WORD32 nbr_4x4_left_strd,
+ WORD32 cu_left_stride,
+ WORD32 curr_buf_idx,
+ WORD32 func_proc_mode,
+ WORD32 i4_alpha_stim_multiplier);
+LWORD64 ihevce_inter_rdopt_cu_ntu(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ enc_loop_cu_prms_t *ps_cu_prms,
+ void *pv_src,
+ WORD32 cu_size,
+ WORD32 cu_pos_x,
+ WORD32 cu_pos_y,
+ WORD32 curr_buf_idx,
+ enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
+ cu_inter_cand_t *ps_inter_cand,
+ cu_analyse_t *ps_cu_analyse,
+ WORD32 i4_alpha_stim_multiplier);
+
+LWORD64 ihevce_inter_tu_tree_selector_and_rdopt_cost_computer(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ enc_loop_cu_prms_t *ps_cu_prms,
+ void *pv_src,
+ WORD32 cu_size,
+ WORD32 cu_pos_x,
+ WORD32 cu_pos_y,
+ WORD32 curr_buf_idx,
+ enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
+ cu_inter_cand_t *ps_inter_cand,
+ cu_analyse_t *ps_cu_analyse,
+ WORD32 i4_alpha_stim_multiplier);
+
+LWORD64 ihevce_inter_rdopt_cu_mc_mvp(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ cu_inter_cand_t *ps_inter_cand,
+ WORD32 cu_size,
+ WORD32 cu_pos_x,
+ WORD32 cu_pos_y,
+ nbr_4x4_t *ps_left_nbr_4x4,
+ nbr_4x4_t *ps_top_nbr_4x4,
+ nbr_4x4_t *ps_topleft_nbr_4x4,
+ WORD32 nbr_4x4_left_strd,
+ WORD32 curr_buf_idx);
+void ihevce_intra_chroma_pred_mode_selector(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ enc_loop_chrm_cu_buf_prms_t *ps_chrm_cu_buf_prms,
+ cu_analyse_t *ps_cu_analyse,
+ WORD32 rd_opt_curr_idx,
+ WORD32 tu_mode,
+ WORD32 i4_alpha_stim_multiplier,
+ UWORD8 u1_is_cu_noisy);
+
+LWORD64 ihevce_chroma_cu_prcs_rdopt(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ WORD32 rd_opt_curr_idx,
+ WORD32 func_proc_mode,
+ UWORD8 *pu1_chrm_src,
+ WORD32 chrm_src_stride,
+ UWORD8 *pu1_cu_left,
+ UWORD8 *pu1_cu_top,
+ UWORD8 *pu1_cu_top_left,
+ WORD32 cu_left_stride,
+ WORD32 cu_pos_x,
+ WORD32 cu_pos_y,
+ WORD32 *pi4_chrm_tu_bits,
+ WORD32 i4_alpha_stim_multiplier,
+ UWORD8 u1_is_cu_noisy);
+
+void ihevce_set_eval_flags(
+ ihevce_enc_loop_ctxt_t *ps_ctxt, enc_loop_cu_final_prms_t *ps_enc_loop_bestprms);
+
+void ihevce_final_rdopt_mode_prcs(
+ ihevce_enc_loop_ctxt_t *ps_ctxt, final_mode_process_prms_t *ps_prms);
+
+WORD32 ihevce_set_flags_to_regulate_reevaluation(
+ cu_final_recon_flags_t *ps_cu_recon_flags,
+ ihevce_enc_cu_node_ctxt_t *ps_enc_out_ctxt,
+ UWORD8 *pu1_deviant_cu_regions,
+ WORD32 i4_num_deviant_cus,
+ WORD8 i1_qp_past,
+ WORD8 i1_qp_present,
+ UWORD8 u1_is_422);
+
+void ihevce_err_compute(
+ UWORD8 *pu1_inp,
+ UWORD8 *pu1_interp_out_buf,
+ WORD32 *pi4_sad_grid,
+ WORD32 *pi4_tu_split_flags,
+ WORD32 inp_stride,
+ WORD32 out_stride,
+ WORD32 blk_size,
+ WORD32 part_mask,
+ WORD32 use_satd_for_err_calc);
+void ihevce_determine_children_cost_of_32x32_cu(
+ block_merge_input_t *ps_merge_in,
+ WORD32 *pi4_cost_children,
+ WORD32 idx_of_tl_child,
+ WORD32 cu_pos_x,
+ WORD32 cu_pos_y);
+
+WORD32 ihevce_determine_children_cost_of_cu_from_me_results(
+ block_merge_input_t *ps_merge_in,
+ cur_ctb_cu_tree_t *ps_cu_tree_root,
+ WORD32 *pi4_ref_bits,
+ WORD32 *pi4_cost_children,
+ WORD32 idx_of_tl_child,
+ CU_SIZE_T e_cu_size_parent);
+
+void *ihevce_tu_tree_update(
+ tu_prms_t *ps_tu_prms,
+ WORD32 *pnum_tu_in_cu,
+ WORD32 depth,
+ WORD32 tu_split_flag,
+ WORD32 tu_early_cbf,
+ WORD32 i4_x_off,
+ WORD32 i4_y_off);
+WORD32 ihevce_shrink_inter_tu_tree(
+ tu_enc_loop_out_t *ps_tu_enc_loop,
+ tu_enc_loop_temp_prms_t *ps_tu_enc_loop_temp_prms,
+ recon_datastore_t *ps_recon_datastore,
+ WORD32 num_tu_in_cu,
+ UWORD8 u1_is_422);
+UWORD8 ihevce_intra_mode_nxn_hash_updater(
+ UWORD8 *pu1_mode_array, UWORD8 *pu1_hash_table, UWORD8 u1_num_ipe_modes);
+
+#if ENABLE_TU_TREE_DETERMINATION_IN_RDOPT
+WORD32 ihevce_determine_tu_tree_distribution(
+ cu_inter_cand_t *ps_cu_data,
+ me_func_selector_t *ps_func_selector,
+ WORD16 *pi2_scratch_mem,
+ UWORD8 *pu1_inp,
+ WORD32 i4_inp_stride,
+ WORD32 i4_lambda,
+ UWORD8 u1_lambda_q_shift,
+ UWORD8 u1_cu_size,
+ UWORD8 u1_max_tr_depth);
+#endif
+
+void ihevce_populate_nbr_4x4_with_pu_data(
+ nbr_4x4_t *ps_nbr_4x4, pu_t *ps_pu, WORD32 i4_nbr_buf_stride);
+
+void ihevce_call_luma_inter_pred_rdopt_pass1(
+ ihevce_enc_loop_ctxt_t *ps_ctxt, cu_inter_cand_t *ps_inter_cand, WORD32 cu_size);
+
+LWORD64 ihevce_it_recon_ssd(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ UWORD8 *pu1_src,
+ WORD32 i4_src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 i4_pred_strd,
+ WORD16 *pi2_deq_data,
+ WORD32 i4_deq_data_strd,
+ UWORD8 *pu1_recon,
+ WORD32 i4_recon_stride,
+ UWORD8 *pu1_ecd_data,
+ UWORD8 u1_trans_size,
+ UWORD8 u1_pred_mode,
+ WORD32 i4_cbf,
+ WORD32 i4_zero_col,
+ WORD32 i4_zero_row,
+ CHROMA_PLANE_ID_T e_chroma_plane);
+
+WORD32 ihevce_chroma_t_q_iq_ssd_scan_fxn(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ WORD16 *pi2_deq_data,
+ WORD32 deq_data_strd,
+ UWORD8 *pu1_recon,
+ WORD32 i4_recon_stride,
+ UWORD8 *pu1_ecd_data,
+ UWORD8 *pu1_csbf_buf,
+ WORD32 csbf_strd,
+ WORD32 trans_size,
+ WORD32 i4_scan_idx,
+ WORD32 intra_flag,
+ WORD32 *pi4_coeff_off,
+ WORD32 *pi4_tu_bits,
+ WORD32 *pi4_zero_col,
+ WORD32 *pi4_zero_row,
+ UWORD8 *pu1_is_recon_available,
+ WORD32 i4_perform_sbh,
+ WORD32 i4_perform_rdoq,
+ LWORD64 *pi8_cost,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ WORD32 i4_alpha_stim_multiplier,
+ UWORD8 u1_is_cu_noisy,
+#endif
+ UWORD8 u1_is_skip,
+ SSD_TYPE_T e_ssd_type,
+ CHROMA_PLANE_ID_T e_chroma_plane);
+void ihevce_update_pred_qp(ihevce_enc_loop_ctxt_t *ps_ctxt, WORD32 cu_pos_x, WORD32 cu_pos_y);
+#endif /* _IHEVCE_ENC_LOOP_UTILS_H_ */
diff --git a/encoder/ihevce_enc_sbh_funcs.c b/encoder/ihevce_enc_sbh_funcs.c
new file mode 100644
index 0000000..ae7e69f
--- /dev/null
+++ b/encoder/ihevce_enc_sbh_funcs.c
@@ -0,0 +1,328 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+******************************************************************************
+* \file ihevce_enc_sbh_funcs.c
+*
+* \brief
+* This file contains utility functions for sbh
+*
+* \date
+* 31/08/2012
+*
+* \author
+* Ittiam
+*
+* List of Functions
+* ihevce_sign_data_hiding()
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+#include "ihevc_trans_tables.h"
+#include "ihevc_trans_macros.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_enc_sbh_utils.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function find the coefficient that needs to be modified for SBH
+* for each sub block, if required
+*
+* @par Description:
+* Checks the validity for applying SBH
+*
+* @param[inout] ps_rdoq_sbh_params
+* All the necessary parameters for SBH
+*
+* @returns None
+*
+* @remarks None
+*
+********************************************************************************
+*/
+void ihevce_sign_data_hiding(rdoq_sbh_ctxt_t *ps_rdoq_sbh_params)
+{
+ WORD32 i, trans_unit_idx;
+ UWORD8 *pu1_trans_table = NULL;
+ UWORD8 *pu1_csb_table;
+ WORD32 shift_value, mask_value;
+ WORD32 blk_row, blk_col;
+
+ WORD32 x_pos, y_pos;
+ WORD16 i2_quant_coeff;
+ WORD32 best_pos = -1;
+
+ WORD16 *pi2_quant_coeffs = ps_rdoq_sbh_params->pi2_quant_coeffs;
+ WORD16 *pi2_iquant_data = ps_rdoq_sbh_params->pi2_iquant_coeffs;
+ WORD16 *pi2_tr_coeffs = ps_rdoq_sbh_params->pi2_trans_values;
+ WORD32 *pi4_subBlock2csbfId_map = ps_rdoq_sbh_params->pi4_subBlock2csbfId_map;
+ WORD16 *pi2_dequant_coeff = ps_rdoq_sbh_params->pi2_dequant_coeff;
+ UWORD8 *pu1_csbf_buf = ps_rdoq_sbh_params->pu1_csbf_buf;
+ WORD32 dst_iq_strd = ps_rdoq_sbh_params->i4_iq_data_strd;
+ WORD32 dst_q_strd = ps_rdoq_sbh_params->i4_q_data_strd;
+
+ WORD32 scan_idx = ps_rdoq_sbh_params->i4_scan_idx;
+ WORD32 qp_div = ps_rdoq_sbh_params->i4_qp_div;
+ WORD32 trans_size = ps_rdoq_sbh_params->i4_trans_size;
+ WORD32 qp_rem = ps_rdoq_sbh_params->i2_qp_rem;
+ LWORD64 ssd_cost = ps_rdoq_sbh_params->i8_ssd_cost;
+
+ WORD32 last_cg = -1;
+
+ WORD32 log2_size, bit_depth, shift_iq;
+
+ GETRANGE(log2_size, trans_size);
+ log2_size -= 1;
+ bit_depth = ps_rdoq_sbh_params->i4_bit_depth;
+ shift_iq = bit_depth + log2_size - 5;
+
+ /* Select proper order for your transform unit and csb based on scan_idx*/
+ /* and the trans_size */
+
+ /* scan order inside a csb */
+ pu1_csb_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
+
+ /* GETRANGE will give the log_2 of trans_size to shift_value */
+ GETRANGE(shift_value, trans_size);
+ shift_value = shift_value - 3; /* for finding. row no. from scan index */
+ mask_value = (trans_size / 4) - 1; /*for finding the col. no. from scan index*/
+ switch(trans_size)
+ {
+ case 32:
+ pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_8x8[scan_idx][0]);
+ break;
+ case 16:
+ pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_4x4[scan_idx][0]);
+ break;
+ case 8:
+ pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_2x2[scan_idx][0]);
+ break;
+ case 4:
+ pu1_trans_table = (UWORD8 *)&(g_u1_scan_table_1x1[0]);
+ break;
+ default:
+ ASSERT(0);
+ break;
+ }
+ for(trans_unit_idx = (trans_size * trans_size / 16) - 1; trans_unit_idx >= 0; trans_unit_idx--)
+ {
+ WORD32 last_scan_pos = -1, first_scan_pos = 16, sign_first_coeff, sum_abs_level = 0,
+ quant_coeff_first;
+
+ if(pu1_csbf_buf[pi4_subBlock2csbfId_map[pu1_trans_table[trans_unit_idx]]])
+ {
+ /* row of csb */
+ blk_row = (pu1_trans_table[trans_unit_idx] >> shift_value) * 4;
+ /* col of csb */
+ blk_col = (pu1_trans_table[trans_unit_idx] & mask_value) * 4;
+
+ if(last_cg == -1)
+ {
+ last_cg = 1;
+ }
+
+ for(i = 15; i >= 0; i--)
+ {
+ x_pos = (pu1_csb_table[i] & 0x3) + blk_col;
+ y_pos = (pu1_csb_table[i] >> 2) + blk_row;
+
+ i2_quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
+
+ if(i2_quant_coeff)
+ {
+ first_scan_pos = i;
+ if(-1 == last_scan_pos)
+ {
+ last_scan_pos = i;
+ }
+
+ sum_abs_level += abs(i2_quant_coeff);
+ }
+ }
+
+ if((last_scan_pos - first_scan_pos) >= 4)
+ {
+ x_pos = (pu1_csb_table[first_scan_pos] & 0x3) + blk_col;
+ y_pos = (pu1_csb_table[first_scan_pos] >> 2) + blk_row;
+
+ quant_coeff_first = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
+
+ sign_first_coeff = (quant_coeff_first > 0) ? 0 : 1;
+
+ if(sign_first_coeff != (sum_abs_level & 0x1))
+ {
+ WORD32 q_err;
+ WORD32 min_cost = MAX_INT;
+ WORD32 final_change = 0, cur_cost = 0, cur_change = 0;
+ WORD16 i2_tr_coeff;
+ WORD16 i2_iquant_coeff;
+
+ for(i = (last_cg == 1) ? last_scan_pos : 15; i >= 0; i--)
+ {
+ x_pos = (pu1_csb_table[i] & 0x3) + blk_col;
+ y_pos = (pu1_csb_table[i] >> 2) + blk_row;
+
+ i2_quant_coeff = pi2_quant_coeffs[x_pos + (y_pos * trans_size)];
+ i2_tr_coeff = pi2_tr_coeffs[x_pos + (y_pos * trans_size)];
+ i2_iquant_coeff = pi2_iquant_data[x_pos + (y_pos * dst_iq_strd)];
+
+ q_err = abs(i2_tr_coeff) - abs(i2_iquant_coeff);
+
+ if(i2_quant_coeff != 0)
+ {
+ cur_cost = -1 * SIGN(q_err) * q_err;
+
+ if(q_err <= 0)
+ {
+ if(i == first_scan_pos && abs(i2_quant_coeff) == 1)
+ {
+ cur_cost = MAX_INT;
+ }
+ }
+ }
+ else
+ {
+ cur_cost = -q_err;
+ if(i < first_scan_pos)
+ {
+ WORD32 sign_bit = (i2_tr_coeff >= 0 ? 0 : 1);
+
+ if(sign_first_coeff != sign_bit)
+ {
+ cur_cost = MAX_INT;
+ }
+ }
+ }
+
+ cur_change = (i2_quant_coeff == 0) ? 1 : (q_err > 0 ? 1 : -1);
+
+ if(cur_cost < min_cost)
+ {
+ min_cost = cur_cost;
+ final_change = cur_change;
+ best_pos = i;
+ }
+ }
+ if((i2_quant_coeff == 32767) || (i2_quant_coeff == -32768))
+ {
+ final_change = -1;
+ }
+
+ x_pos = (pu1_csb_table[best_pos] & 0x3) + blk_col;
+ y_pos = (pu1_csb_table[best_pos] >> 2) + blk_row;
+ i2_iquant_coeff = pi2_iquant_data[x_pos + (y_pos * dst_iq_strd)];
+ i2_tr_coeff = pi2_tr_coeffs[x_pos + (y_pos * trans_size)];
+
+ if(i2_tr_coeff >= 0)
+ {
+ pi2_quant_coeffs[x_pos + (y_pos * trans_size)] += final_change;
+ }
+ else
+ {
+ pi2_quant_coeffs[x_pos + (y_pos * trans_size)] -= final_change;
+ }
+
+ {
+ WORD32 i4_err1, i4_err2;
+
+ /* Inverse Quantization */
+ IQUANT(
+ pi2_iquant_data[y_pos * dst_iq_strd + x_pos],
+ pi2_quant_coeffs[y_pos * dst_q_strd + x_pos],
+ pi2_dequant_coeff[y_pos * trans_size + x_pos] *
+ g_ihevc_iquant_scales[qp_rem],
+ shift_iq,
+ qp_div);
+
+ i4_err1 = (i2_tr_coeff - i2_iquant_coeff);
+ i4_err1 = i4_err1 * i4_err1;
+ ssd_cost = ssd_cost - i4_err1;
+ i4_err2 = (i2_tr_coeff - pi2_iquant_data[y_pos * dst_iq_strd + x_pos]);
+ i4_err2 = i4_err2 * i4_err2;
+ ssd_cost = ssd_cost + i4_err2;
+ }
+ }
+ }
+ if(last_cg == 1)
+ {
+ last_cg = 0;
+ }
+ }
+ }
+
+ ps_rdoq_sbh_params->i8_ssd_cost = ssd_cost;
+}
diff --git a/encoder/ihevce_enc_sbh_utils.h b/encoder/ihevce_enc_sbh_utils.h
new file mode 100644
index 0000000..85cfbfb
--- /dev/null
+++ b/encoder/ihevce_enc_sbh_utils.h
@@ -0,0 +1,43 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+*
+* @file ihevce_enc_sbh_utils.h
+*
+* @brief
+* Contains the declarations of utils for SBH toolset
+*
+* @author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_ENC_SBH_UTILS_H_
+#define _IHEVCE_ENC_SBH_UTILS_H_
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+void ihevce_sign_data_hiding(rdoq_sbh_ctxt_t *ps_rdoq_sbh_params);
+
+#endif /*_IHEVCE_ENC_SBH_UTILS_H_*/
diff --git a/encoder/ihevce_enc_structs.h b/encoder/ihevce_enc_structs.h
new file mode 100644
index 0000000..b0ec5c6
--- /dev/null
+++ b/encoder/ihevce_enc_structs.h
@@ -0,0 +1,3347 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_enc_structs.h
+*
+* \brief
+* This file contains structure definations of Encoder
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_ENC_STRUCTS_H_
+#define _IHEVCE_ENC_STRUCTS_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define HEVCE_MAX_WIDTH 1920
+#define HEVCE_MAX_HEIGHT 1088
+
+#define HEVCE_MIN_WIDTH 64
+#define HEVCE_MIN_HEIGHT 64
+
+#define MAX_CTBS_IN_FRAME (HEVCE_MAX_WIDTH * HEVCE_MAX_HEIGHT) / (MIN_CTB_SIZE * MIN_CTB_SIZE)
+#define MAX_NUM_CTB_ROWS_FRM (HEVCE_MAX_HEIGHT) / (MIN_CTB_SIZE)
+
+#define MIN_VERT_PROC_UNIT (8)
+#define MAX_NUM_VERT_UNITS_FRM (HEVCE_MAX_HEIGHT) / (MIN_VERT_PROC_UNIT)
+
+#define HEVCE_MAX_REF_PICS 8
+#define HEVCE_MAX_DPB_PICS (HEVCE_MAX_REF_PICS + 1)
+
+#define PAD_HORZ 80
+#define PAD_VERT 80
+
+#define DEFAULT_MAX_REFERENCE_PICS 4
+
+#define BLU_RAY_SUPPORT 231457
+
+/** @brief max number of parts in minCU : max 4 for NxN */
+#define NUM_PU_PARTS 4
+/** @brief max number of parts in Inter CU */
+#define NUM_INTER_PU_PARTS (MAX_NUM_INTER_PARTS)
+#define SEND_BI_RDOPT
+#ifdef SEND_BI_RDOPT
+/** @brief */
+#define MAX_INTER_CU_CANDIDATES 4
+#else
+/** @brief */
+#define MAX_INTER_CU_CANDIDATES 3
+#endif
+/** @brief */
+#define MAX_INTRA_CU_CANDIDATES 3
+
+#define MAX_INTRA_CANDIDATES 35
+
+/** For each resolution & bit-rate instance, one entropy thread is created */
+#define NUM_ENTROPY_THREADS (IHEVCE_MAX_NUM_RESOLUTIONS * IHEVCE_MAX_NUM_BITRATES)
+
+/* Number of buffers between Decomp and HME layers 1 : Seq mode >1 parallel mode */
+#define NUM_BUFS_DECOMP_HME 1
+
+/** Macro to indicate pre me and L0 ipe stagger in pre enc*/
+/** Implies MAX_PRE_ENC_STAGGER - 1 max stagger*/
+#define MAX_PRE_ENC_STAGGER (NUM_LAP2_LOOK_AHEAD + 1 + MIN_L1_L0_STAGGER_NON_SEQ)
+
+#define NUM_ME_ENC_BUFS (MAX_NUM_ENC_LOOP_PARALLEL)
+
+#define MIN_L0_IPE_ENC_STAGGER 1
+
+/*stagger between L0 IPE and enc*/
+#define MAX_L0_IPE_ENC_STAGGER (NUM_ME_ENC_BUFS + (MIN_L0_IPE_ENC_STAGGER))
+
+#define MAX_PRE_ENC_RC_DELAY (MAX_L0_IPE_ENC_STAGGER + 1 + NUM_BUFS_DECOMP_HME)
+
+#define MIN_PRE_ENC_RC_DELAY (MIN_L0_IPE_ENC_STAGGER + 1 + NUM_BUFS_DECOMP_HME)
+
+/** @brief number of contexts buffers maintained at frame level b/w pre-encode : encode */
+/*Explaination for minus 1: eg: MAX_PRE_ENC_STAGGER = 31 and MAX_L0_IPE_ENC_STAGGER = 5, In this case L1 produce 30 buffer,
+ l0 will start off with 30th buffer and enc will work on 33nd and 34rd frame.*/
+/* NUM_BUFS_DECOMP_HME is added to take care of pipeline between Decomp-preintra and HME */
+#define MAX_NUM_PREENC_ENC_BUFS \
+ (MAX_PRE_ENC_STAGGER + MAX_L0_IPE_ENC_STAGGER + NUM_BUFS_DECOMP_HME - 1) //22//5
+
+#define MIN_NUM_PREENC_ENC_BUFS \
+ (MAX_PRE_ENC_STAGGER + MIN_L0_IPE_ENC_STAGGER + NUM_BUFS_DECOMP_HME - 1)
+
+/** @brief number of ctb contexts maintained at frame level b/w encode : entropy */
+#define NUM_FRMPROC_ENTCOD_BUFS 8
+
+/** @brief number of extra recon buffs required for stagger design*/
+#define NUM_EXTRA_RECON_BUFS 0
+
+/** recon picture buffer size need to be increased to support EncLoop Parallelism **/
+#define NUM_EXTRA_RECON_BUFS_FOR_ELP 0
+
+/** @brief maximum number of bytes in 4x4 afetr scanning */
+#define MAX_SCAN_COEFFS_BYTES_4x4 (48)
+
+/** @brief maximum number of luma coeffs bytes after scan at CTB level */
+#define MAX_LUMA_COEFFS_CTB ((MAX_SCAN_COEFFS_BYTES_4x4) * (MAX_TU_IN_CTB)*4)
+
+/** @brief maximum number of chroma coeffs bytes after scan at CTB level */
+#define MAX_CHRM_COEFFS_CTB ((MAX_SCAN_COEFFS_BYTES_4x4) * ((MAX_TU_IN_CTB >> 1)) * 4)
+
+/** @brief maximum number of coeffs bytes after scan at CTB level */
+#define MAX_SCAN_COEFFS_CTB ((MAX_LUMA_COEFFS_CTB) + (MAX_CHRM_COEFFS_CTB))
+
+/** @breif PU map CTB buffer buyes for neighbour availibility */
+#define MUN_PU_MAP_BYTES_PER_CTB (MAX_PU_IN_CTB_ROW * MAX_PU_IN_CTB_ROW)
+
+/** @brief tottal system memory records */
+#define TOTAL_SYSTEM_MEM_RECS 120
+
+/** @brief number of input async command buffers */
+#define NUM_AYSNC_CMD_BUFS 4
+
+/** @brief Comand buffers size */
+#define ENC_COMMAND_BUFF_SIZE 512 /* 512 bytes */
+
+/** @brief Number of output buffers */
+#define NUM_OUTPUT_BUFS 4
+
+/** @brief Lamda for SATD cost estimation */
+#define LAMDA_SATD 1
+
+/** @brief Maximum number of 1s in u2_sig_coeff_abs_gt1_flags */
+#define MAX_GT_ONE 8
+
+/** MAX num ipntra pred modes */
+#define MAX_NUM_IP_MODES 35
+
+/** Number of best intra modes used for intra mode refinement */
+#define NUM_BEST_MODES 3
+
+/** Maximim number of parallel frame processing threads in pre enocde group */
+#define MAX_NUM_FRM_PROC_THRDS_PRE_ENC MAX_NUM_CORES
+
+/** Maximim number of parallel frame processing threads in encode group */
+#define MAX_NUM_FRM_PROC_THRDS_ENC MAX_NUM_CORES
+
+/** Macro to indicate teh PING_PONG buffers for stagger*/
+#define PING_PONG_BUF 2
+
+/** Max number of layers in Motion estimation
+ * should be greater than or equal to MAX_NUM_LAYERS defined in hme_interface.h
+ */
+
+#define MAX_NUM_HME_LAYERS 5
+/**
+******************************************************************************
+ * @brief Maximum number of layers allowed
+******************************************************************************
+ */
+#define MAX_NUM_LAYERS 4
+
+#define NUM_RC_PIC_TYPE 9
+
+#define MAX_NUM_NODES_CU_TREE (85)
+
+/* macros to control Dynamic load balance */
+#define DYN_LOAD_BAL_UPPER_LIMIT 0.80
+
+#define DYN_LOAD_BAL_LOWER_LIMIT 0.20
+
+#define NUM_SUB_GOP_DYN_BAL 1
+
+#define MIN_NUM_FRMS_DYN_BAL 4
+
+#define CORES_SRES_OR_MRES 2
+
+#define HME_HIGH_SAD_BLK_THRESH 35
+
+/* Enable to compare cabac states of final entropy thread with enc loop states */
+#define VERIFY_ENCLOOP_CABAC_STATES 0
+
+#define MAX_NUM_BLKS_IN_MAX_CU 64 /* max cu size is 64x64 */
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+typedef void (*pf_iq_it_rec)(
+ WORD16 *pi2_src,
+ WORD16 *pi2_tmp,
+ UWORD8 *pu1_pred,
+ WORD16 *pi2_dequant_coeff,
+ UWORD8 *pu1_dst,
+ WORD32 qp_div, /* qpscaled / 6 */
+ WORD32 qp_rem, /* qpscaled % 6 */
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd,
+ WORD32 zero_cols,
+ WORD32 zero_rows);
+
+typedef void (*pf_intra_pred)(
+ UWORD8 *pu1_ref, WORD32 src_strd, UWORD8 *pu1_dst, WORD32 dst_strd, WORD32 nt, WORD32 mode);
+
+typedef UWORD32 (*pf_res_trans_luma)(
+ UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD32 *pi4_tmp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd_chr_flag);
+
+typedef WORD32 (*pf_quant)(
+ WORD16 *pi2_coeffs,
+ WORD16 *pi2_quant_coeff,
+ WORD16 *pi2_dst,
+ WORD32 qp_div, /* qpscaled / 6 */
+ WORD32 qp_rem, /* qpscaled % 6 */
+ WORD32 q_add,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ UWORD8 *pu1_csbf_buf,
+ WORD32 csbf_strd,
+ WORD32 *zero_cols,
+ WORD32 *zero_row);
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+/// supported partition shape
+typedef enum
+{
+ SIZE_2Nx2N = 0, ///< symmetric motion partition, 2Nx2N
+ SIZE_2NxN = 1, ///< symmetric motion partition, 2Nx N
+ SIZE_Nx2N = 2, ///< symmetric motion partition, Nx2N
+ SIZE_NxN = 3, ///< symmetric motion partition, Nx N
+ SIZE_2NxnU = 4, ///< asymmetric motion partition, 2Nx( N/2) + 2Nx(3N/2)
+ SIZE_2NxnD = 5, ///< asymmetric motion partition, 2Nx(3N/2) + 2Nx( N/2)
+ SIZE_nLx2N = 6, ///< asymmetric motion partition, ( N/2)x2N + (3N/2)x2N
+ SIZE_nRx2N = 7 ///< asymmetric motion partition, (3N/2)x2N + ( N/2)x2N
+} PART_SIZE_E;
+
+/** @brief Interface level Queues of Encoder */
+
+typedef enum
+{
+ IHEVCE_INPUT_DATA_CTRL_Q = 0,
+ IHEVCE_ENC_INPUT_Q,
+ IHEVCE_INPUT_ASYNCH_CTRL_Q,
+ IHEVCE_OUTPUT_DATA_Q,
+ IHEVCE_OUTPUT_STATUS_Q,
+ IHEVCE_RECON_DATA_Q, // /*que for holding recon buffer */
+
+ IHEVCE_FRM_PRS_ENT_COD_Q, /*que for holding output buffer of enc_loop |input buffer of entropy */
+
+ IHEVCE_PRE_ENC_ME_Q, /*que for holding input buffer to ME | output of pre-enc */
+
+ IHEVCE_ME_ENC_RDOPT_Q, /* que for holding output buffer of ME or input buffer of Enc-RDopt */
+
+ IHEVCE_L0_IPE_ENC_Q, /* Queue for holding L0 ipe data to enc loop*/
+
+ /* should be last entry */
+ IHEVCE_MAX_NUM_QUEUES
+
+} IHEVCE_Q_DESC_T;
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/**
+RC_QP_QSCALE conversion structures
+**/
+typedef struct
+{
+ WORD16 i2_min_qp;
+
+ WORD16 i2_max_qp;
+
+ WORD16 i2_min_qscale;
+
+ WORD16 i2_max_qscale;
+
+ WORD32 *pi4_qscale_to_qp;
+
+ WORD32 *pi4_qp_to_qscale_q_factor;
+
+ WORD32 *pi4_qp_to_qscale;
+
+ WORD8 i1_qp_offset;
+
+} rc_quant_t;
+
+/**
+******************************************************************************
+ * @brief 4x4 level structure which contains all the parameters
+ * for neighbour prediction puopose
+******************************************************************************
+ */
+typedef struct
+{
+ /** PU motion vectors */
+ pu_mv_t mv;
+ /** Intra or Inter flag for each partition - 0 or 1 */
+ UWORD16 b1_intra_flag : 1;
+ /** CU skip flag - 0 or 1 */
+ UWORD16 b1_skip_flag : 1;
+ /** CU depth in CTB tree (0-3) */
+ UWORD16 b2_cu_depth : 2;
+
+ /** Y Qp for loop filter */
+ WORD16 b8_qp : 8;
+
+ /** Luma Intra Mode 0 - 34 */
+ UWORD16 b6_luma_intra_mode : 6;
+
+ /** Y CBF for BS compute */
+ UWORD16 b1_y_cbf : 1;
+ /** Pred L0 flag of current 4x4 */
+ UWORD16 b1_pred_l0_flag : 1;
+
+ /** Pred L0 flag of current 4x4 */
+ UWORD16 b1_pred_l1_flag : 1;
+} nbr_4x4_t;
+
+typedef struct
+{
+ /** Bottom Left availability flag */
+ UWORD8 u1_bot_lt_avail;
+
+ /** Left availability flag */
+ UWORD8 u1_left_avail;
+
+ /** Top availability flag */
+ UWORD8 u1_top_avail;
+
+ /** Top Right availability flag */
+ UWORD8 u1_top_rt_avail;
+
+ /** Top Left availability flag */
+ UWORD8 u1_top_lt_avail;
+
+} nbr_avail_flags_t;
+
+typedef struct
+{
+ /** prev intra flag*/
+ UWORD8 b1_prev_intra_luma_pred_flag : 1;
+
+ /** mpm_idx */
+ UWORD8 b2_mpm_idx : 2;
+
+ /** reminder pred mode */
+ UWORD8 b5_rem_intra_pred_mode : 5;
+
+} intra_prev_rem_flags_t;
+
+/**
+******************************************************************************
+ * @brief calc (T+Q+RDOQ) output TU structure; entropy input TU structure
+******************************************************************************
+ */
+typedef struct
+{
+ /** base tu structure */
+ tu_t s_tu;
+
+ /** offset of luma data in ecd buffer */
+ WORD32 i4_luma_coeff_offset;
+
+ /** offset of cb data in ecd buffer */
+ WORD32 ai4_cb_coeff_offset[2];
+
+ /** offset of cr data in ecd buffer */
+ WORD32 ai4_cr_coeff_offset[2];
+
+} tu_enc_loop_out_t;
+
+typedef struct
+{
+ /* L0 Motion Vector */
+ mv_t s_l0_mv;
+
+ /* L1 Motion Vector */
+ mv_t s_l1_mv;
+
+ /* L0 Ref index */
+ WORD8 i1_l0_ref_idx;
+
+ /* L1 Ref index */
+ WORD8 i1_l1_ref_idx;
+
+ /* L0 Ref Pic Buf ID */
+ WORD8 i1_l0_pic_buf_id;
+
+ /* L1 Ref Pic Buf ID */
+ WORD8 i1_l1_pic_buf_id;
+
+ /** intra flag */
+ UWORD8 b1_intra_flag : 1;
+
+ /* Pred mode */
+ UWORD8 b2_pred_mode : 2;
+
+ /* reserved flag can be used for something later */
+ UWORD8 u1_reserved;
+
+} pu_col_mv_t;
+
+/*****************************************************************************/
+/* Encoder uses same structure as pu_t for prediction unit */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief Encode loop (T+Q+RDOQ) output CU structure; entropy input CU structure
+******************************************************************************
+ */
+typedef struct
+{
+ /* CU X position in terms of min CU (8x8) units */
+ UWORD32 b3_cu_pos_x : 3;
+
+ /* CU Y position in terms of min CU (8x8) units */
+ UWORD32 b3_cu_pos_y : 3;
+
+ /** CU size in terms of min CU (8x8) units */
+ UWORD32 b4_cu_size : 4;
+
+ /** transquant bypass flag ; 0 for this encoder */
+ UWORD32 b1_tq_bypass_flag : 1;
+
+ /** cu skip flag */
+ UWORD32 b1_skip_flag : 1;
+
+ /** intra / inter CU flag */
+ UWORD32 b1_pred_mode_flag : 1;
+
+ /** indicates partition information for CU
+ * For intra 0 : for 2Nx2N / 1 for NxN iff CU=minCBsize
+ * For inter 0 : @sa PART_SIZE_E
+ */
+ UWORD32 b3_part_mode : 3;
+
+ /** 0 for this encoder */
+ UWORD32 b1_pcm_flag : 1;
+
+ /** only applicable for intra cu */
+ UWORD32 b3_chroma_intra_pred_mode : 3;
+
+ /** no residue flag for cu */
+ UWORD32 b1_no_residual_syntax_flag : 1;
+
+ /* flag to indicate if current CU is the first
+ CU of the Quantisation group*/
+ UWORD32 b1_first_cu_in_qg : 1;
+
+ /** Intra prev and reminder flags
+ * if part is NxN the tntries 1,2,3 will be valid
+ * other wise only enry 0 will be set.
+ */
+ intra_prev_rem_flags_t as_prev_rem[NUM_PU_PARTS];
+
+ /**
+ * Access valid number of pus in this array based on u1_part_mode
+ * Moiton vector differentials and reference idx should be
+ * populated in this structure
+ * @remarks shall be accessed only for inter pus
+ */
+ pu_t *ps_pu;
+
+ /**
+ * pointer to first tu of this cu. Each TU need to be populated
+ * in TU order by calc. Total TUs in CU is given by u2_num_tus_in_cu
+ */
+ tu_enc_loop_out_t *ps_enc_tu;
+
+ /** total TUs in this CU; shall be 0 if b1_no_residual_syntax_flag = 1 */
+ UWORD16 u2_num_tus_in_cu;
+
+ /** Coeff bufer pointer */
+ /* Pointer to transform coeff data */
+ /*************************************************************************/
+ /* Following format is repeated for every coded TU */
+ /* Luma Block */
+ /* num_coeffs : 16 bits */
+ /* zero_cols : 8 bits ( 1 bit per 4 columns) */
+ /* sig_coeff_map : ((TU Size * TU Size) + 31) >> 5 number of WORD32s */
+ /* coeff_data : Non zero coefficients */
+ /* Cb Block (only for last TU in 4x4 case else for every luma TU) */
+ /* num_coeffs : 16 bits */
+ /* zero_cols : 8 bits ( 1 bit per 4 columns) */
+ /* sig_coeff_map : ((TU Size * TU Size) + 31) >> 5 number of WORD32s */
+ /* coeff_data : Non zero coefficients */
+ /* Cr Block (only for last TU in 4x4 case else for every luma TU) */
+ /* num_coeffs : 16 bits */
+ /* zero_cols : 8 bits ( 1 bit per 4 columns) */
+ /* sig_coeff_map : ((TU Size * TU Size) + 31) >> 5 number of WORD32s */
+ /* coeff_data : Non zero coefficients */
+ /*************************************************************************/
+ void *pv_coeff;
+
+ /** qp used during for CU
+ * @remarks :
+ */
+ WORD8 i1_cu_qp;
+
+} cu_enc_loop_out_t;
+
+/**
+ * SAO
+ */
+typedef struct
+{
+ /**
+ * sao_type_idx_luma
+ */
+ UWORD32 b3_y_type_idx : 3;
+
+ /**
+ * luma sao_band_position
+ */
+ UWORD32 b5_y_band_pos : 5;
+
+ /**
+ * sao_type_idx_chroma
+ */
+ UWORD32 b3_cb_type_idx : 3;
+
+ /**
+ * cb sao_band_position
+ */
+ UWORD32 b5_cb_band_pos : 5;
+
+ /**
+ * sao_type_idx_chroma
+ */
+ UWORD32 b3_cr_type_idx : 3;
+
+ /**
+ * cb sao_band_position
+ */
+ UWORD32 b5_cr_band_pos : 5;
+
+ /*SAO Offsets
+ * In all these offsets, 0th element is not used
+ */
+ /**
+ * luma SaoOffsetVal[i]
+ */
+ WORD8 u1_y_offset[5];
+
+ /**
+ * chroma cb SaoOffsetVal[i]
+ */
+ WORD8 u1_cb_offset[5];
+
+ /**
+ * chroma cr SaoOffsetVal[i]
+ */
+ WORD8 u1_cr_offset[5];
+
+ /**
+ * sao_merge_left_flag common for y,cb,cr
+ */
+ UWORD32 b1_sao_merge_left_flag : 1;
+
+ /**
+ * sao_merge_up_flag common for y,cb,cr
+ */
+ UWORD32 b1_sao_merge_up_flag : 1;
+
+} sao_enc_t;
+
+/**
+******************************************************************************
+ * @brief ctb output structure; output of Encode loop, input to entropy
+******************************************************************************
+ */
+typedef struct
+{
+ /**
+ * bit0 : depth0 split flag, (64x64 splits)
+ * bits 1-3 : not used
+ * bits 4-7 : depth1 split flags; valid iff depth0 split=1 (32x32 splits)
+ * bits 8-23: depth2 split flags; (if 0 16x16 is cu else 8x8 min cu)
+
+ * if a split flag of n is set for depth 1, check the following split flags
+ * of [(8 + 4*(n-4)): (8 + 4*(n-4)+ 3)] for depth 2:
+ *
+ */
+ UWORD32 u4_cu_split_flags;
+
+ /***************************************************************
+ * For any given CU position CU_posx, CU_posy access
+ * au4_packed_tu_split_flags[(CU_posx >> 5)[(CU_posy >> 5)]
+ * Note : For CTB size smaller than 64x64 only use u4_packed_tu_split_flags[0]
+ ****************************************************************/
+
+ /**
+ * access bits corresponding to actual CU size till leaf nodes
+ * bit0 : (32x32 TU split flag)
+ * bits 1-3 : not used
+ * bits 4-7 : (16x16 TUsplit flags)
+ * bits 8-23: (8x8 TU split flags)
+
+ * if a split flag of n is set for depth 1, check the following split flags
+ * of [(8 + 4*(n-4)): (8 + 4*(n-4)+ 3)] for depth 2:
+ *
+ * @remarks As tu sizes are relative to CU sizes the producer has to
+ * make sure the correctness of u4_packed_tu_split_flags.
+ *
+ * @remarks au4_packed_tu_split_flags_cu[1]/[2]/[3] to be used only
+ * for 64x64 ctb.
+ */
+ UWORD32 au4_packed_tu_split_flags_cu[4];
+
+ /**
+ * pointer to first CU of CTB. Each CU need to be populated
+ * in CU order by calc. Total CUs in CTB is given by u1_num_cus_in_ctb
+ */
+ cu_enc_loop_out_t *ps_enc_cu;
+
+ /** total TUs in this CU; shall be 0 if b1_no_residual_syntax_flag = 1 */
+ UWORD8 u1_num_cus_in_ctb;
+
+ /** CTB neighbour availability flags */
+ nbr_avail_flags_t s_ctb_nbr_avail_flags;
+
+ /* SAO parameters of the CTB */
+ sao_enc_t s_sao;
+
+} ctb_enc_loop_out_t;
+
+/**
+******************************************************************************
+ * @brief cu inter candidate for encoder
+******************************************************************************
+ */
+typedef struct
+{
+ /** base pu structure
+ * access valid number of entries in this array based on u1_part_size
+ */
+ pu_t as_inter_pu[NUM_INTER_PU_PARTS];
+
+ /* TU split flag : tu_split_flag[0] represents the transform splits
+ * for CU size <= 32, for 64x64 each ai4_tu_split_flag corresponds
+ * to respective 32x32 */
+ /* For a 8x8 TU - 1 bit used to indicate split */
+ /* For a 16x16 TU - LSB used to indicate winner between 16 and 8 TU's. 4 other bits used to indicate split in each 8x8 quadrant */
+ /* For a 32x32 TU - See above */
+ WORD32 ai4_tu_split_flag[4];
+
+ /* TU split flag : tu_split_flag[0] represents the transform splits
+ * for CU size <= 32, for 64x64 each ai4_tu_split_flag corresponds
+ * to respective 32x32 */
+ /* For a 8x8 TU - 1 bit used to indicate split */
+ /* For a 16x16 TU - LSB used to indicate winner between 16 and 8 TU's. 4 other bits used to indicate split in each 8x8 quadrant */
+ /* For a 32x32 TU - See above */
+ WORD32 ai4_tu_early_cbf[4];
+
+ /**Pointer to the buffer having predicted data after mc in SATD stage
+ * Since we have 2 buffers for each candidate pred data for best merge candidate
+ * can be in one of the 2 buffers.
+ */
+ UWORD8 *pu1_pred_data;
+
+ UWORD16 *pu2_pred_data;
+
+ UWORD8 *pu1_pred_data_scr;
+
+ UWORD16 *pu2_pred_data_src;
+
+ /* Total cost: SATD cost + MV cost */
+ WORD32 i4_total_cost;
+
+ /** Stride for predicted data*/
+ WORD32 i4_pred_data_stride;
+
+ /** @remarks u1_part_size can be non square only for Inter */
+ UWORD8 b3_part_size : 3; /* @sa: PART_SIZE_E */
+
+ /** evaluate transform for cusize iff this flag is 1 */
+ /** this flag should be set 0 if CU is 64x64 */
+ UWORD8 b1_eval_tx_cusize : 1;
+
+ /** evaluate transform for cusize/2 iff this flag is 1 */
+ UWORD8 b1_eval_tx_cusize_by2 : 1;
+
+ /** Skip Flag : ME should always set this 0 for the candidates */
+ UWORD8 b1_skip_flag : 1;
+
+ UWORD8 b1_intra_has_won : 1;
+
+ /* used to mark if this mode needs to be evaluated in auxiliary mode */
+ /* if 1, this mode will be evaluated otherwise not.*/
+ UWORD8 b1_eval_mark : 1;
+
+} cu_inter_cand_t;
+
+/**
+******************************************************************************
+ * @brief cu intra candidate for encoder
+******************************************************************************
+ */
+typedef struct
+{
+ UWORD8 au1_intra_luma_mode_nxn_hash[NUM_PU_PARTS][MAX_INTRA_CANDIDATES];
+
+ /**
+ * List of NxN PU candidates in CU for each partition
+ * valid only of if current cusize = mincusize
+ * +1 to signal the last flag invalid value of 255 needs to be stored
+ */
+ UWORD8 au1_intra_luma_modes_nxn[NUM_PU_PARTS][(MAX_INTRA_CU_CANDIDATES * (4)) + 2 + 1];
+
+ /* used to mark if this mode needs to be evaluated in auxiliary mode */
+ /* if 1, this mode will be evaluated otherwise not.*/
+ UWORD8 au1_nxn_eval_mark[NUM_PU_PARTS][MAX_INTRA_CU_CANDIDATES + 1];
+
+ /**
+ * List of 2Nx2N PU candidates in CU
+ * +1 to signal the last flag invalid value of 255 needs to be stored
+ */
+ UWORD8 au1_intra_luma_modes_2nx2n_tu_eq_cu[MAX_INTRA_CU_CANDIDATES + 1];
+
+ /**
+ * List of 2Nx2N PU candidates in CU
+ * +1 to signal the last flag invalid value of 255 needs to be stored
+ */
+ UWORD8 au1_intra_luma_modes_2nx2n_tu_eq_cu_by_2[MAX_INTRA_CU_CANDIDATES + 1];
+
+ /* used to mark if this mode needs to be evaluated in auxiliary mode */
+ /* if 1, this mode will be evaluated otherwise not.*/
+ UWORD8 au1_2nx2n_tu_eq_cu_eval_mark[MAX_INTRA_CU_CANDIDATES + 1];
+
+ /* used to mark if this mode needs to be evaluated in auxiliary mode */
+ /* if 1, this mode will be evaluated otherwise not.*/
+ UWORD8 au1_2nx2n_tu_eq_cu_by_2_eval_mark[MAX_INTRA_CU_CANDIDATES + 1];
+
+ UWORD8 au1_num_modes_added[NUM_PU_PARTS];
+
+ /** evaluate transform for cusize iff this flag is 1 */
+ /** this flag should be set 0 if CU is 64x64 */
+ UWORD8 b1_eval_tx_cusize : 1;
+
+ /** evaluate transform for cusize/2 iff this flag is 1 */
+ UWORD8 b1_eval_tx_cusize_by2 : 1;
+
+ /** number of intra candidates for SATD evaluation in */
+ UWORD8 b6_num_intra_cands : 6;
+
+} cu_intra_cand_t;
+
+/**
+******************************************************************************
+ * @brief cu structure for mode analysis/evaluation
+******************************************************************************
+ */
+typedef struct
+{
+ /** CU X position in terms of min CU (8x8) units */
+ UWORD8 b3_cu_pos_x : 3;
+
+ /** CU Y position in terms of min CU (8x8) units */
+ UWORD8 b3_cu_pos_y : 3;
+
+ /** reserved bytes */
+ UWORD8 b2_reserved : 2;
+
+ /** CU size 2N (width or height) in pixels */
+ UWORD8 u1_cu_size;
+
+ /** Intra CU candidates after FAST CU decision (output of IPE)
+ * 8421 algo along with transform size evalution will
+ * be done for these modes in Encode loop pass.
+ */
+ cu_intra_cand_t s_cu_intra_cand;
+
+ /** indicates the angular mode (0 - 34) for chroma,
+ * Note : No provision currently to take chroma through RDOPT or SATD
+ */
+ UWORD8 u1_chroma_intra_pred_mode;
+
+ /** number of inter candidates in as_cu_inter_cand[]
+ * shall be 0 for intra frames.
+ * These inters are evaluated for RDOPT apart from merge/skip candidates
+ */
+ UWORD8 u1_num_inter_cands;
+
+ /** List of candidates to be evalauted (SATD/RDOPT) for this CU
+ * @remarks : all merge/skip candidates not a part of this list
+ */
+ cu_inter_cand_t as_cu_inter_cand[MAX_INTER_CU_CANDIDATES];
+
+ WORD32 ai4_mv_cost[MAX_INTER_CU_CANDIDATES][NUM_INTER_PU_PARTS];
+
+#if REUSE_ME_COMPUTED_ERROR_FOR_INTER_CAND_SIFTING
+ WORD32 ai4_err_metric[MAX_INTER_CU_CANDIDATES][NUM_INTER_PU_PARTS];
+#endif
+
+ /* Flag to convey if Inta or Inter is the best candidate among the
+ candidates populated
+ 0: If inter is the winner and 1: if Intra is winner*/
+ UWORD8 u1_best_is_intra;
+
+ /** number of intra rdopt candidates
+ * @remarks : shall be <= u1_num_intra_cands
+ */
+ UWORD8 u1_num_intra_rdopt_cands;
+ /** qp used during for CU
+ * @remarks :
+ */
+ WORD8 i1_cu_qp;
+ /** Activity factor used in pre enc thread for deriving the Qp
+ * @remarks : This is in Q format
+ */
+ WORD32 i4_act_factor[4][2];
+
+} cu_analyse_t;
+
+/**
+******************************************************************************
+ * @brief Structure for CU recursion
+******************************************************************************
+ */
+typedef struct cur_ctb_cu_tree_t
+{
+ /** CU X position in terms of min CU (8x8) units */
+ UWORD8 b3_cu_pos_x : 3;
+
+ /** CU X position in terms of min CU (8x8) units */
+ UWORD8 b3_cu_pos_y : 3;
+
+ /** reserved bytes */
+ UWORD8 b2_reserved : 2;
+
+ UWORD8 u1_cu_size;
+
+ UWORD8 u1_intra_eval_enable;
+
+ UWORD8 u1_inter_eval_enable;
+
+ /* Flag that indicates whether to evaluate this node */
+ /* during RDOPT evaluation. This does not mean that */
+ /* evaluation of the children need to be abandoned */
+ UWORD8 is_node_valid;
+
+ LWORD64 i8_best_rdopt_cost;
+
+ struct cur_ctb_cu_tree_t *ps_child_node_tl;
+
+ struct cur_ctb_cu_tree_t *ps_child_node_tr;
+
+ struct cur_ctb_cu_tree_t *ps_child_node_bl;
+
+ struct cur_ctb_cu_tree_t *ps_child_node_br;
+
+} cur_ctb_cu_tree_t;
+
+typedef struct
+{
+ WORD32 num_best_results;
+
+ part_type_results_t as_best_results[NUM_BEST_ME_OUTPUTS];
+
+} block_data_32x32_t;
+
+/**
+******************************************************************************
+ * @brief Structure for storing data about all the 64x64
+ * block in a 64x64 CTB
+******************************************************************************
+ */
+typedef block_data_32x32_t block_data_64x64_t;
+
+/**
+******************************************************************************
+ * @brief Structure for storing data about all 16 16x16
+ * blocks in a 64x64 CTB and each of their partitions
+******************************************************************************
+ */
+typedef struct
+{
+ WORD32 num_best_results;
+
+ /**
+ * mask of active partitions, Totally 17 bits. For a given partition
+ * id, as per PART_ID_T enum the corresponding bit position is 1/0
+ * indicating that partition is active or inactive
+ */
+ /*WORD32 i4_part_mask;*/
+
+ part_type_results_t as_best_results[NUM_BEST_ME_OUTPUTS];
+
+} block_data_16x16_t;
+
+typedef struct
+{
+ WORD32 num_best_results;
+
+ part_type_results_t as_best_results[NUM_BEST_ME_OUTPUTS];
+} block_data_8x8_t;
+
+/**
+******************************************************************************
+ * @brief Structure for data export from ME to Enc_Loop
+******************************************************************************
+ */
+typedef struct
+{
+ block_data_8x8_t as_8x8_block_data[64];
+
+ block_data_16x16_t as_block_data[16];
+
+ block_data_32x32_t as_32x32_block_data[4];
+
+ block_data_64x64_t s_64x64_block_data;
+
+} me_ctb_data_t;
+
+/**
+******************************************************************************
+ * @brief noise detection related structure
+ *
+******************************************************************************
+ */
+
+typedef struct
+{
+ WORD32 i4_noise_present;
+
+ UWORD8 au1_is_8x8Blk_noisy[MAX_CU_IN_CTB];
+
+ UWORD32 au4_variance_src_16x16[MAX_CU_IN_CTB];
+} ihevce_ctb_noise_params;
+
+/**
+******************************************************************************
+ * @brief ctb structure for mode analysis/evaluation
+******************************************************************************
+ */
+typedef struct
+{
+ /**
+ * CU decision in a ctb is frozen by ME/IPE and populated in
+ * u4_packed_cu_split_flags.
+ * @remarks
+ * TODO:review comment
+ * bit0 : 64x64 split flag, (depth0 flag for 64x64 ctb unused for smaller ctb)
+ * bits 1-3 : not used
+ * bits 4-7 : 32x32 split flags; (depth1 flags for 64x64ctb / only bit4 used for 32x32ctb)
+ * bits 8-23: 16x16 split flags; (depth2 flags for 64x64 / depth1[bits8-11] for 32x32 [bit8 for ctb 16x16] )
+
+ * if a split flag of n is set for depth 1, check the following split flags
+ * of [(8 + 4*(n-4)): (8 + 4*(n-4)+ 3)] for depth 2:
+ *
+ */
+ UWORD32 u4_cu_split_flags;
+
+ UWORD8 u1_num_cus_in_ctb;
+
+ cur_ctb_cu_tree_t *ps_cu_tree;
+
+ me_ctb_data_t *ps_me_ctb_data;
+
+ ihevce_ctb_noise_params s_ctb_noise_params;
+
+} ctb_analyse_t;
+/**
+******************************************************************************
+ * @brief Structures for tapping ssd and bit-estimate information for all CUs
+******************************************************************************
+ */
+
+typedef struct
+{
+ LWORD64 i8_cost;
+ WORD32 i4_idx;
+} cost_idx_t;
+
+/**
+******************************************************************************
+ * @brief reference/non reference pic context for encoder
+******************************************************************************
+ */
+typedef struct
+
+{
+ /**
+ * YUV buffer discriptor for the recon
+ * Allocation per frame for Y = ((ALIGN(frame width, MAX_CTB_SIZE)) + 2 * PAD_HORZ)*
+ * ((ALIGN(frame height, MAX_CTB_SIZE)) + 2 * PAD_VERT)
+ */
+ iv_enc_yuv_buf_t s_yuv_buf_desc;
+
+ iv_enc_yuv_buf_src_t s_yuv_buf_desc_src;
+
+ /* Pointer to Luma (Y) sub plane buffers Horz/ Vert / HV grid */
+ /* When (L0ME_IN_OPENLOOP_MODE == 1), additional buffer required to store */
+ /* the fullpel plane for use as reference */
+ UWORD8 *apu1_y_sub_pel_planes[3 + L0ME_IN_OPENLOOP_MODE];
+
+ /**
+ * frm level pointer to pu bank for colocated mv access
+ * Allocation per frame = (ALIGN(frame width, MAX_CTB_SIZE) / MIN_PU_SIZE) *
+ * (ALIGN(frame height, MAX_CTB_SIZE) / MIN_PU_SIZE)
+ */
+ pu_col_mv_t *ps_frm_col_mv;
+ /**
+ ************************************************************************
+ * Pointer to a PU map stored at frame level,
+ * It contains a 7 bit pu index in encoder order w.r.t to a ctb at a min
+ * granularirty of MIN_PU_SIZE size.
+ ************************************************************************
+ */
+ UWORD8 *pu1_frm_pu_map;
+
+ /** CTB level frame buffer to store the accumulated sum of
+ * number of PUs for every row */
+ UWORD16 *pu2_num_pu_map;
+
+ /** Offsets in the PU buffer at every CTB level */
+ UWORD32 *pu4_pu_off;
+
+ /** Collocated POC for reference list 0
+ * ToDo: Change the array size when multiple slices are to be supported */
+ WORD32 ai4_col_l0_poc[HEVCE_MAX_REF_PICS];
+
+ /** Collocated POC for reference list 1 */
+ WORD32 ai4_col_l1_poc[HEVCE_MAX_REF_PICS];
+
+ /** 0 = top field, 1 = bottom field */
+ WORD32 i4_bottom_field;
+
+ /** top field first input in case of interlaced case */
+ WORD32 i4_topfield_first;
+
+ /** top field first input in case of interlaced case */
+ WORD32 i4_poc;
+
+ /** unique buffer id */
+ WORD32 i4_buf_id;
+
+ /** is this reference frame or not */
+ WORD32 i4_is_reference;
+
+ /** Picture type of current picture */
+ WORD32 i4_pic_type;
+
+ /** Flag to indicate whether current pictute is free or in use */
+ WORD32 i4_is_free;
+
+ /** Bit0 - of this Flag to indicate whether current pictute needs to be deblocked,
+ padded and hpel planes need to be generated.
+ These are turned off typically in non referecne pictures when psnr
+ and recon dump is disabled.
+
+ Bit1 - of this flag set to 1 if sao is enabled. This is to enable deblocking when sao is enabled
+ */
+ WORD32 i4_deblk_pad_hpel_cur_pic;
+
+ /**
+ * weight and offset for this ref pic. To be initialized for every pic
+ * based on the lap output
+ */
+ ihevce_wght_offst_t s_weight_offset;
+
+ /**
+ * Reciprocal of the lumaweight in q15 format
+ */
+ WORD32 i4_inv_luma_wt;
+
+ /**
+ * Log to base 2 of the common denominator used for luma weights across all ref pics
+ */
+ WORD32 i4_log2_wt_denom;
+
+ /**
+ * Used as Reference for encoding current picture flag
+ */
+ WORD32 i4_used_by_cur_pic_flag;
+
+#if ADAPT_COLOCATED_FROM_L0_FLAG
+ WORD32 i4_frame_qp;
+#endif
+ /*
+ * IDR GOP number
+ */
+
+ WORD32 i4_idr_gop_num;
+
+ /*
+ * non-ref-free_flag
+ */
+ WORD32 i4_non_ref_free_flag;
+ /**
+ * Dependency manager instance for ME - Prev recon dep
+ */
+ void *pv_dep_mngr_recon;
+
+ /*display num*/
+ WORD32 i4_display_num;
+} recon_pic_buf_t;
+
+/**
+******************************************************************************
+ * @brief Lambda values used for various cost computations
+******************************************************************************
+ */
+typedef struct
+{
+ /************************************************************************/
+ /* The fields with the string 'type2' in their names are required */
+ /* when both 8bit and hbd lambdas are needed. The lambdas corresponding */
+ /* to the bit_depth != internal_bit_depth are stored in these fields */
+ /************************************************************************/
+
+ /**
+ * Closed loop SSD Lambda
+ * This is multiplied with bits for RD cost computations in SSD mode
+ * This is represented in q format with shift of LAMBDA_Q_SHIFT
+ */
+ LWORD64 i8_cl_ssd_lambda_qf;
+
+ LWORD64 i8_cl_ssd_type2_lambda_qf;
+
+ /**
+ * Closed loop SSD Lambda for chroma residue (chroma qp is different from luma qp)
+ * This is multiplied with bits for RD cost computations in SSD mode
+ * This is represented in q format with shift of LAMBDA_Q_SHIFT
+ */
+ LWORD64 i8_cl_ssd_lambda_chroma_qf;
+
+ LWORD64 i8_cl_ssd_type2_lambda_chroma_qf;
+
+ /**
+ * Closed loop SAD Lambda
+ * This is multiplied with bits for RD cost computations in SAD mode
+ * This is represented in q format with shift of LAMBDA_Q_SHIFT
+ */
+ WORD32 i4_cl_sad_lambda_qf;
+
+ WORD32 i4_cl_sad_type2_lambda_qf;
+
+ /**
+ * Open loop SAD Lambda
+ * This is multiplied with bits for RD cost computations in SAD mode
+ * This is represented in q format with shift of LAMBDA_Q_SHIFT
+ */
+ WORD32 i4_ol_sad_lambda_qf;
+
+ WORD32 i4_ol_sad_type2_lambda_qf;
+
+ /**
+ * Closed loop SATD Lambda
+ * This is multiplied with bits for RD cost computations in SATD mode
+ * This is represented in q format with shift of LAMBDA_Q_SHIFT
+ */
+ WORD32 i4_cl_satd_lambda_qf;
+
+ WORD32 i4_cl_satd_type2_lambda_qf;
+
+ /**
+ * Open loop SATD Lambda
+ * This is multiplied with bits for RD cost computations in SATD mode
+ * This is represented in q format with shift of LAMBDA_Q_SHIFT
+ */
+ WORD32 i4_ol_satd_lambda_qf;
+
+ WORD32 i4_ol_satd_type2_lambda_qf;
+
+ double lambda_modifier;
+
+ double lambda_uv_modifier;
+
+ UWORD32 u4_chroma_cost_weighing_factor;
+
+} frm_lambda_ctxt_t;
+/**
+******************************************************************************
+* @brief Mode attributes for 4x4 block populated by early decision
+******************************************************************************
+ */
+typedef struct
+{
+ /* If best mode is present or not */
+ UWORD8 mode_present;
+
+ /** Best mode for the current 4x4 prediction block */
+ UWORD8 best_mode;
+
+ /** sad for the best mode for the current 4x4 prediction block */
+ UWORD16 sad;
+
+ /** cost for the best mode for the current 4x4 prediction block */
+ UWORD16 sad_cost;
+
+} ihevce_ed_mode_attr_t; //early decision
+/**
+******************************************************************************
+ * @brief Structure at 8x8 block level which has parameters such as cur satd
+ * for QP mod @ L0 level
+******************************************************************************
+ */
+typedef struct
+{
+ /*Store SATD of current data at 8*8 level for current layer (L0)*/
+ WORD32 i4_8x8_cur_satd;
+} ihevce_8x8_L0_satd_t;
+/**
+******************************************************************************
+ * @brief Structure at 8x8 block level mean for MEAN based QP mod
+******************************************************************************
+ */
+typedef struct
+{
+ /*Store SATD of current data at 8*8 level for current layer (L0)*/
+ WORD16 i2_8x8_cur_mean;
+} ihevce_8x8_L0_mean_t;
+
+//#define DEBUG_ED_CTB_POS
+/**
+******************************************************************************
+ * @brief Structure at 4x4 block level which has parameters about early
+ * intra or inter decision
+******************************************************************************
+ */
+typedef struct
+{
+ /**
+ * Final parameter of Intra-Inter early decision for the current 4x4.
+ * 0 - invalid decision
+ * 1 - eval intra only
+ * 2 - eval inter only
+ * 3 - eval both intra and inter
+ */
+ UWORD8 intra_or_inter : 2;
+
+ UWORD8 merge_success : 1;
+
+ /** Best mode for the current 4x4 prediction block */
+ UWORD8 best_mode;
+
+ /* sad cost for the best prediction mode */
+ //UWORD16 best_sad_cost;
+
+ /** Best mode for the current 4x4 prediction block */
+ UWORD8 best_merge_mode;
+
+ /*Store SATD at 4*4 level for current layer (L1)*/
+ WORD32 i4_4x4_satd;
+
+ /*Store SATD of current data at 4*4 level for current layer (L1)*/
+ WORD32 i4_4x4_cur_satd;
+
+} ihevce_ed_blk_t; //early decision
+
+/* l1 ipe ctb analyze structure */
+/* Contains cu level qp mod related information for all possible cu
+sizes (16,32,64 in L0) in a CTB*/
+typedef struct
+{
+ WORD32 i4_sum_4x4_satd[16];
+ WORD32 i4_min_4x4_satd[16];
+
+ /*satd for L1_8x8 blocks in L1_32x32
+ 16 - num L1_8x8 in L1_32x32
+ 2 =>
+ 0 - sum of L1_4x4 @ L1_8x8
+ - equivalent to transform size of 16x16 @ L0
+ 1 - min/median of L1_4x4 @ L1_8x8
+ - equivalent to transform size of 8x8 @ L0
+ */
+ WORD32 i4_8x8_satd[16][2];
+
+ /*satd for L1_16x16 blocks in L1_32x32
+ 4 - num L1_16x16 in L1_32x32
+ 3 =>
+ 0 - sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16
+ - equivalent to transform size of 32x32 @ L0
+ 1 - min/median of (sum of L1_4x4 @ L1_8x8) @ L1_16x16
+ - equivalent to transform size of 16x16 @ L0
+ 2 - min/median of (min/median of L1_4x4 @ L1_8x8) @ L1_16x16
+ - equivalent to transform size of 8x8 @ L0
+ */
+ WORD32 i4_16x16_satd[4][3];
+
+ /*satd for 32x32 block in L1*/
+ /*Please note that i4_32x32_satd[0][3] contains sum of all 32x32 */
+ /*satd for L1_32x32 blocks in L1_32x32
+ 1 - num L1_32x32 in L1_32x32
+ 4 =>
+ 0 - min/median of (sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16) @ L1_32x32
+ - equivalent to transform size of 32x32 @ L0
+ 1 - min/median of (sum of L1_4x4 @ L1_8x8) @ L1_32x32
+ - equivalent to transform size of 16x16 @ L0
+ 2 - min/median of (min/median of L1_4x4 @ L1_8x8) @ L1_32x32
+ - equivalent to transform size of 8x8 @ L0
+ 3 - sum of (sum of (sum of L1_4x4 @ L1_8x8) @ L1_16x16) @ L1_32x32
+ */
+ WORD32 i4_32x32_satd[1][4];
+
+ /*Store SATD at 8x8 level for current layer (L1)*/
+ WORD32 i4_best_satd_8x8[16];
+
+ /* EIID: This will be used for early inter intra decisions */
+ /*SAD at 8x8 level for current layer (l1) */
+ /*Cost based on sad at 8x8 level for current layer (l1) */
+ WORD32 i4_best_sad_cost_8x8_l1_ipe[16];
+
+ WORD32 i4_best_sad_8x8_l1_ipe[16];
+ /* SAD at 8x8 level for ME. All other cost are IPE cost */
+ WORD32 i4_best_sad_cost_8x8_l1_me[16];
+
+ /* SAD at 8x8 level for ME. for given reference */
+ WORD32 i4_sad_cost_me_for_ref[16];
+
+ /* SAD at 8x8 level for ME. for given reference */
+ WORD32 i4_sad_me_for_ref[16];
+
+ /* SAD at 8x8 level for ME. All other cost are IPE cost */
+ WORD32 i4_best_sad_8x8_l1_me[16];
+
+ WORD32 i4_best_sad_8x8_l1_me_for_decide[16];
+
+ /*Mean @ L0 16x16*/
+ WORD32 ai4_16x16_mean[16];
+
+ /*Mean @ L0 32x32*/
+ WORD32 ai4_32x32_mean[4];
+
+ /*Mean @ L0 64x64*/
+ WORD32 i4_64x64_mean;
+
+} ihevce_ed_ctb_l1_t; //early decision
+
+/**
+******************************************************************************
+ * @brief 8x8 Intra analyze structure
+******************************************************************************
+ */
+typedef struct
+{
+ /** Best intra modes for 8x8 transform.
+ * Insert 255 in the end to limit number of modes
+ */
+ UWORD8 au1_best_modes_8x8_tu[MAX_INTRA_CU_CANDIDATES + 1];
+
+ /** Best 8x8 intra modes for 4x4 transform
+ * Insert 255 in the end to limit number of modes
+ */
+ UWORD8 au1_best_modes_4x4_tu[MAX_INTRA_CU_CANDIDATES + 1];
+
+ /** Best 4x4 intra modes
+ * Insert 255 in the end to limit number of modes
+ */
+ UWORD8 au1_4x4_best_modes[4][MAX_INTRA_CU_CANDIDATES + 1];
+
+ /** best 8x8 intra sad/SATD cost */
+ WORD32 i4_best_intra_cost;
+
+ /** flag to indicate if nxn pu mode (different pu at 4x4 level) is enabled */
+ UWORD8 b1_enable_nxn : 1;
+
+ /** valid cu flag : required for incomplete ctbs at frame boundaries */
+ UWORD8 b1_valid_cu : 1;
+
+ /** dummy bits */
+ UWORD8 b6_reserved : 6;
+
+} intra8_analyse_t;
+
+/**
+******************************************************************************
+ * @brief 16x16 Intra analyze structure
+******************************************************************************
+ */
+typedef struct
+{
+ /** Best intra modes for 16x16 transform.
+ * Insert 255 in the end to limit number of modes
+ */
+ UWORD8 au1_best_modes_16x16_tu[MAX_INTRA_CU_CANDIDATES + 1];
+
+ /** Best 16x16 intra modes for 8x8 transform
+ * Insert 255 in the end to limit number of modes
+ */
+ UWORD8 au1_best_modes_8x8_tu[MAX_INTRA_CU_CANDIDATES + 1];
+
+ /** 8x8 children intra analyze for this 16x16 */
+ intra8_analyse_t as_intra8_analyse[4];
+
+ /* best 16x16 intra sad/SATD cost */
+ WORD32 i4_best_intra_cost;
+
+ /* indicates if 16x16 is best cu or 8x8 cu */
+ UWORD8 b1_split_flag : 1;
+
+ /* indicates if 8x8 vs 16x16 rdo evaluation needed */
+ /* or only 8x8's rdo evaluation needed */
+ UWORD8 b1_merge_flag : 1;
+
+ /**
+ * valid cu flag : required for incomplete ctbs at frame boundaries
+ * or if CTB size is lower than 32
+ */
+ UWORD8 b1_valid_cu : 1;
+
+ /** dummy bits */
+ UWORD8 b6_reserved : 5;
+
+} intra16_analyse_t;
+
+/**
+******************************************************************************
+ * @brief 32x32 Intra analyze structure
+******************************************************************************
+ */
+typedef struct
+{
+ /** Best intra modes for 32x32 transform.
+ * Insert 255 in the end to limit number of modes
+ */
+ UWORD8 au1_best_modes_32x32_tu[MAX_INTRA_CU_CANDIDATES + 1];
+
+ /** Best 32x32 intra modes for 16x16 transform
+ * Insert 255 in the end to limit number of modes
+ */
+ UWORD8 au1_best_modes_16x16_tu[MAX_INTRA_CU_CANDIDATES + 1];
+
+ /** 16x16 children intra analyze for this 32x32 */
+ intra16_analyse_t as_intra16_analyse[4];
+
+ /* best 32x32 intra sad/SATD cost */
+ WORD32 i4_best_intra_cost;
+
+ /* indicates if 32x32 is best cu or 16x16 cu */
+ UWORD8 b1_split_flag : 1;
+
+ /* indicates if 32x32 vs 16x16 rdo evaluation needed */
+ /* or 16x16 vs 8x8 evaluation is needed */
+ UWORD8 b1_merge_flag : 1;
+
+ /**
+ * valid cu flag : required for incomplete ctbs at frame boundaries
+ * or if CTB size is lower than 64
+ */
+ UWORD8 b1_valid_cu : 1;
+
+ /** dummy bits */
+ UWORD8 b6_reserved : 5;
+
+} intra32_analyse_t;
+
+/**
+******************************************************************************
+ * @brief IPE L0 analyze structure for L0 ME to do intra/inter CU decisions
+ * This is a CTB level structure encapsulating IPE modes, cost at all
+ * level. IPE also recommemds max intra CU sizes which is required
+ * by ME for CU size determination in intra dominant CTB
+******************************************************************************
+ */
+typedef struct
+{
+ /** Best 64x64 intra modes for 32x32 transform.
+ * Insert 255 in the end to limit number of modes
+ */
+ UWORD8 au1_best_modes_32x32_tu[MAX_INTRA_CU_CANDIDATES + 1];
+
+ /** 32x32 children intra analyze for this 32x32 */
+ intra32_analyse_t as_intra32_analyse[4];
+
+ /* indicates if 64x64 is best CUs or 32x32 CUs */
+ UWORD8 u1_split_flag;
+
+ /* CTB level best 8x8 intra costs */
+ WORD32 ai4_best8x8_intra_cost[MAX_CU_IN_CTB];
+
+ /* CTB level best 16x16 intra costs */
+ WORD32 ai4_best16x16_intra_cost[MAX_CU_IN_CTB >> 2];
+
+ /* CTB level best 32x32 intra costs */
+ WORD32 ai4_best32x32_intra_cost[MAX_CU_IN_CTB >> 4];
+
+ /* best 64x64 intra cost */
+ WORD32 i4_best64x64_intra_cost;
+
+ /**
+ * CTB level early intra / inter decision at 8x8 block level
+ * 0 - invalid decision
+ * 1 - eval intra only
+ * 2 - eval inter only
+ * 3 - eval both intra and inter
+ */
+ /* Z scan format */
+ WORD8 ai1_early_intra_inter_decision[MAX_CU_IN_CTB];
+
+ /*
+ @L0 level
+ 4 => 0 - 32x32 TU in 64x64 CU
+ 1 - 16x16 TU in 64x64 CU
+ 2 - 8x8 TU in 64x64 CU
+ 3 - 64x64 CU
+ 2 => Intra/Inter */
+ WORD32 i4_64x64_act_factor[4][2];
+
+ /*
+ @L0 level
+ 4 => num 32x32 in CTB
+ 3 => 0 - 32x32 TU in 64x64 CU
+ 1 - 16x16 TU in 64x64 CU
+ 2 - 8x8 TU in 64x64 CU
+ 2 => Intra/Inter */
+ WORD32 i4_32x32_act_factor[4][3][2];
+
+ /*
+ @L0 level
+ 16 => num 16x16 in CTB
+ 2 => 0 - 16x16 TU in 64x64 CU
+ 1 - 8x8 TU in 64x64 CU
+ 2 => Intra/Inter */
+ WORD32 i4_16x16_act_factor[16][2][2];
+
+ WORD32 nodes_created_in_cu_tree;
+
+ cur_ctb_cu_tree_t *ps_cu_tree_root;
+
+ WORD32 ai4_8x8_act_factor[16];
+ WORD32 ai4_best_sad_8x8_l1_me[MAX_CU_IN_CTB];
+ WORD32 ai4_best_sad_8x8_l1_ipe[MAX_CU_IN_CTB];
+ WORD32 ai4_best_sad_cost_8x8_l1_me[MAX_CU_IN_CTB];
+ WORD32 ai4_best_sad_cost_8x8_l1_ipe[MAX_CU_IN_CTB];
+
+ /*Ctb level accumalated satd*/
+ WORD32 i4_ctb_acc_satd;
+
+ /*Ctb level accumalated mpm bits*/
+ WORD32 i4_ctb_acc_mpm_bits;
+
+} ipe_l0_ctb_analyse_for_me_t;
+
+typedef struct
+{
+ WORD16 i2_mv_x;
+ WORD16 i2_mv_y;
+} global_mv_t;
+
+/**
+******************************************************************************
+ * @brief Pre Encode pass and ME pass shared variables and buffers
+******************************************************************************
+ */
+typedef struct
+{
+ /**
+ * Buffer id
+ */
+ WORD32 i4_buf_id;
+
+ /**
+ * Flag will be set to 1 by frame processing thread after receiving flush
+ * command from application
+ */
+ WORD32 i4_end_flag;
+
+ /** frame leve ctb analyse buffer pointer */
+ ctb_analyse_t *ps_ctb_analyse;
+
+ /** frame level cu analyse buffer pointer for IPE */
+ //cu_analyse_t *ps_cu_analyse;
+
+ /** current input pointer */
+ ihevce_lap_enc_buf_t *ps_curr_inp;
+
+ /** current inp buffer id */
+ WORD32 curr_inp_buf_id;
+
+ /** Slice header parameters */
+ slice_header_t s_slice_hdr;
+
+ /** sps parameters activated by current slice */
+ sps_t *ps_sps;
+
+ /** pps parameters activated by current slice */
+ pps_t *ps_pps;
+
+ /** vps parameters activated by current slice */
+ vps_t *ps_vps;
+ /** Pointer to Penultilate Layer context memory internally has MV bank buff and related params */
+ void *pv_me_lyr_ctxt;
+
+ /** Pointer to Penultilate Layer NV bank context memory */
+ void *pv_me_lyr_bnk_ctxt;
+
+ /** Pointer to Penultilate Layer MV bank buff */
+ void *pv_me_mv_bank;
+
+ /** Pointer to Penultilate Layer reference idx buffer */
+ void *pv_me_ref_idx;
+ /**
+ * Array to store 8x8 cost (partial 8x8 sad + level adjusted cost)
+ * The order of storing is raster scan order within CTB and
+ * CTB order is raster scan within frame.
+ */
+ double *plf_intra_8x8_cost;
+
+ /**
+ * L0 layer ctb anaylse frame level buffer.
+ * IPE wil populate the cost and best modes at all levels in this buffer
+ * for every CTB in a frame
+ */
+ // moved to shorter buffer queue
+ //ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse_ctb;
+
+ /** Layer L1 buffer pointer */
+ ihevce_ed_blk_t *ps_layer1_buf;
+
+ /** Layer L2 buffer pointer */
+ ihevce_ed_blk_t *ps_layer2_buf;
+
+ /*ME reverse map info*/
+ UWORD8 *pu1_me_reverse_map_info;
+
+ /** Buffer pointer for CTB level information in pre intra pass*/
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1;
+
+ /* L0 cur 8x8 satd for QP mod*/
+ ihevce_8x8_L0_satd_t *ps_layer0_cur_satd;
+
+ /* L0 cur 8x8 mean for QP mod*/
+ ihevce_8x8_L0_mean_t *ps_layer0_cur_mean;
+
+ /** vps parameters activated by current slice */
+ sei_params_t s_sei;
+
+ /** nal_type for the slice to be encoded */
+ WORD32 i4_slice_nal_type;
+
+ /** input time stamp in terms of ticks: lower 32 */
+ WORD32 i4_inp_timestamp_low;
+
+ /** input time stamp in terms of ticks: higher 32 */
+ WORD32 i4_inp_timestamp_high;
+
+ /** input frame ctxt of app to be retured in output buffer */
+ void *pv_app_frm_ctxt;
+
+ /** current frm valid flag :
+ * will be 1 if valid input was processed by frame proc thrd
+ */
+ WORD32 i4_frm_proc_valid_flag;
+
+ /**
+ * Qp to be used for current frame
+ */
+ WORD32 i4_curr_frm_qp;
+
+ /**
+ * Frame level Lambda parameters
+ */
+ frm_lambda_ctxt_t as_lambda_prms[IHEVCE_MAX_NUM_BITRATES];
+
+ /** Frame-levelSATDcost accumalator */
+ LWORD64 i8_frame_acc_satd_cost;
+
+ /** Frame - L1 coarse me cost accumulated */
+ LWORD64 i8_acc_frame_coarse_me_cost;
+ /** Frame - L1 coarse me cost accumulated */
+ //LWORD64 i8_acc_frame_coarse_me_cost_for_ref;
+
+ /** Frame - L1 coarse me sad accumulated */
+ LWORD64 i8_acc_frame_coarse_me_sad;
+
+ /* Averge activity of 4x4 blocks from previous frame
+ * If L1, maps to 8*8 in L0
+ */
+ WORD32 i4_curr_frame_4x4_avg_act;
+
+ WORD32 ai4_mod_factor_derived_by_variance[2];
+
+ float f_strength;
+
+ /* Averge activity of 8x8 blocks from previous frame
+ * If L1, maps to 16*16 in L0
+ */
+
+ long double ld_curr_frame_8x8_log_avg[2];
+
+ LWORD64 i8_curr_frame_8x8_avg_act[2];
+
+ LWORD64 i8_curr_frame_8x8_sum_act[2];
+
+ WORD32 i4_curr_frame_8x8_sum_act_for_strength[2];
+
+ ULWORD64 u8_curr_frame_8x8_sum_act_sqr;
+
+ WORD32 i4_curr_frame_8x8_num_blks[2];
+
+ LWORD64 i8_acc_frame_8x8_sum_act[2];
+ LWORD64 i8_acc_frame_8x8_sum_act_sqr;
+ WORD32 i4_acc_frame_8x8_num_blks[2];
+ LWORD64 i8_acc_frame_8x8_sum_act_for_strength;
+ LWORD64 i8_curr_frame_8x8_sum_act_for_strength;
+
+ /* Averge activity of 16x16 blocks from previous frame
+ * If L1, maps to 32*32 in L0
+ */
+
+ long double ld_curr_frame_16x16_log_avg[3];
+
+ LWORD64 i8_curr_frame_16x16_avg_act[3];
+
+ LWORD64 i8_curr_frame_16x16_sum_act[3];
+
+ WORD32 i4_curr_frame_16x16_num_blks[3];
+
+ LWORD64 i8_acc_frame_16x16_sum_act[3];
+ WORD32 i4_acc_frame_16x16_num_blks[3];
+
+ /* Averge activity of 32x32 blocks from previous frame
+ * If L1, maps to 64*64 in L0
+ */
+
+ long double ld_curr_frame_32x32_log_avg[3];
+
+ LWORD64 i8_curr_frame_32x32_avg_act[3];
+
+ global_mv_t s_global_mv[MAX_NUM_REF];
+ LWORD64 i8_curr_frame_32x32_sum_act[3];
+
+ WORD32 i4_curr_frame_32x32_num_blks[3];
+
+ LWORD64 i8_acc_frame_32x32_sum_act[3];
+ WORD32 i4_acc_frame_32x32_num_blks[3];
+
+ LWORD64 i8_acc_num_blks_high_sad;
+
+ LWORD64 i8_total_blks;
+
+ WORD32 i4_complexity_percentage;
+
+ WORD32 i4_is_high_complex_region;
+
+ WORD32 i4_avg_noise_thrshld_4x4;
+
+ LWORD64 i8_curr_frame_mean_sum;
+ WORD32 i4_curr_frame_mean_num_blks;
+ LWORD64 i8_curr_frame_avg_mean_act;
+
+} pre_enc_me_ctxt_t;
+
+/**
+******************************************************************************
+ * @brief buffers from L0 IPE to ME and enc loop
+******************************************************************************
+ */
+typedef struct
+{
+ WORD32 i4_size;
+
+ ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse_ctb;
+} pre_enc_L0_ipe_encloop_ctxt_t;
+/**
+******************************************************************************
+ * @brief Frame process and Entropy coding pass shared variables and buffers
+******************************************************************************
+ */
+
+typedef struct
+{
+ /*PIC level Info*/
+ ULWORD64 i8_total_cu;
+ ULWORD64 i8_total_cu_min_8x8;
+ ULWORD64 i8_total_pu;
+ ULWORD64 i8_total_intra_cu;
+ ULWORD64 i8_total_inter_cu;
+ ULWORD64 i8_total_skip_cu;
+ ULWORD64 i8_total_cu_based_on_size[4];
+
+ ULWORD64 i8_total_intra_pu;
+ ULWORD64 i8_total_merge_pu;
+ ULWORD64 i8_total_non_skipped_inter_pu;
+
+ ULWORD64 i8_total_2nx2n_intra_pu[4];
+ ULWORD64 i8_total_nxn_intra_pu;
+ ULWORD64 i8_total_2nx2n_inter_pu[4];
+ ULWORD64 i8_total_smp_inter_pu[4];
+ ULWORD64 i8_total_amp_inter_pu[3];
+ ULWORD64 i8_total_nxn_inter_pu[3];
+
+ ULWORD64 i8_total_L0_mode;
+ ULWORD64 i8_total_L1_mode;
+ ULWORD64 i8_total_BI_mode;
+
+ ULWORD64 i8_total_L0_ref_idx[MAX_DPB_SIZE];
+ ULWORD64 i8_total_L1_ref_idx[MAX_DPB_SIZE];
+
+ ULWORD64 i8_total_tu;
+ ULWORD64 i8_total_non_coded_tu;
+ ULWORD64 i8_total_inter_coded_tu;
+ ULWORD64 i8_total_intra_coded_tu;
+
+ ULWORD64 i8_total_tu_based_on_size[4];
+ ULWORD64 i8_total_tu_cu64[4];
+ ULWORD64 i8_total_tu_cu32[4];
+ ULWORD64 i8_total_tu_cu16[3];
+ ULWORD64 i8_total_tu_cu8[2];
+
+ LWORD64 i8_total_qp;
+ LWORD64 i8_total_qp_min_cu;
+ WORD32 i4_min_qp;
+ WORD32 i4_max_qp;
+ LWORD64 i8_sum_squared_frame_qp;
+ LWORD64 i8_total_frame_qp;
+ WORD32 i4_max_frame_qp;
+ float f_total_buffer_underflow;
+ float f_total_buffer_overflow;
+ float f_max_buffer_underflow;
+ float f_max_buffer_overflow;
+
+ UWORD8 i1_num_ref_idx_l0_active;
+ UWORD8 i1_num_ref_idx_l1_active;
+
+ WORD32 i4_ref_poc_l0[MAX_DPB_SIZE];
+ WORD32 i4_ref_poc_l1[MAX_DPB_SIZE];
+
+ WORD8 i1_list_entry_l0[MAX_DPB_SIZE];
+ DOUBLE i2_luma_weight_l0[MAX_DPB_SIZE];
+ WORD16 i2_luma_offset_l0[MAX_DPB_SIZE];
+ WORD8 i1_list_entry_l1[MAX_DPB_SIZE];
+ DOUBLE i2_luma_weight_l1[MAX_DPB_SIZE];
+ WORD16 i2_luma_offset_l1[MAX_DPB_SIZE];
+
+ ULWORD64 u8_bits_estimated_intra;
+ ULWORD64 u8_bits_estimated_inter;
+ ULWORD64 u8_bits_estimated_slice_header;
+ ULWORD64 u8_bits_estimated_sao;
+ ULWORD64 u8_bits_estimated_split_cu_flag;
+ ULWORD64 u8_bits_estimated_cu_hdr_bits;
+ ULWORD64 u8_bits_estimated_split_tu_flag;
+ ULWORD64 u8_bits_estimated_qp_delta_bits;
+ ULWORD64 u8_bits_estimated_cbf_luma_bits;
+ ULWORD64 u8_bits_estimated_cbf_chroma_bits;
+
+ ULWORD64 u8_bits_estimated_res_luma_bits;
+ ULWORD64 u8_bits_estimated_res_chroma_bits;
+
+ ULWORD64 u8_bits_estimated_ref_id;
+ ULWORD64 u8_bits_estimated_mvd;
+ ULWORD64 u8_bits_estimated_merge_flag;
+ ULWORD64 u8_bits_estimated_mpm_luma;
+ ULWORD64 u8_bits_estimated_mpm_chroma;
+
+ ULWORD64 u8_total_bits_generated;
+ ULWORD64 u8_total_bits_vbv;
+
+ ULWORD64 u8_total_I_bits_generated;
+ ULWORD64 u8_total_P_bits_generated;
+ ULWORD64 u8_total_B_bits_generated;
+
+ UWORD32 u4_frame_sad;
+ UWORD32 u4_frame_intra_sad;
+ UWORD32 u4_frame_inter_sad;
+
+ ULWORD64 i8_frame_cost;
+ ULWORD64 i8_frame_intra_cost;
+ ULWORD64 i8_frame_inter_cost;
+} s_pic_level_acc_info_t;
+
+typedef struct
+{
+ UWORD32 u4_target_bit_rate_sei_entropy;
+ UWORD32 u4_buffer_size_sei_entropy;
+ UWORD32 u4_dbf_entropy;
+
+} s_pic_level_sei_info_t;
+/**
+******************************************************************************
+* @brief ME pass and Main enocde pass shared variables and buffers
+******************************************************************************
+*/
+typedef struct
+{
+ /**
+ * Buffer id
+ */
+ WORD32 i4_buf_id;
+
+ /**
+ * Flag will be set to 1 by frame processing thread after receiving flush
+ * command from application
+ */
+ WORD32 i4_end_flag;
+
+ /** current input pointer */
+ ihevce_lap_enc_buf_t *ps_curr_inp;
+
+ /** current inp buffer id */
+ WORD32 curr_inp_buf_id;
+
+ /** current input buffers from ME */
+ pre_enc_me_ctxt_t *ps_curr_inp_from_me_prms;
+
+ /** current inp buffer id from ME */
+ WORD32 curr_inp_from_me_buf_id;
+
+ /** current input buffers from L0 IPE */
+ pre_enc_L0_ipe_encloop_ctxt_t *ps_curr_inp_from_l0_ipe_prms;
+
+ /** current inp buffer id from L0 IPE */
+ WORD32 curr_inp_from_l0_ipe_buf_id;
+
+ /** Slice header parameters */
+ slice_header_t s_slice_hdr;
+
+ /** current frm valid flag :
+ * will be 1 if valid input was processed by frame proc thrd
+ */
+ WORD32 i4_frm_proc_valid_flag;
+
+ /**
+ * Array of reference picture list for ping instance
+ * 2=> ref_pic_list0 and ref_pic_list1
+ */
+ recon_pic_buf_t as_ref_list[IHEVCE_MAX_NUM_BITRATES][2][HEVCE_MAX_REF_PICS * 2];
+
+ /**
+ * Array of reference picture list
+ * 2=> ref_pic_list0 and ref_pic_list1
+ */
+ recon_pic_buf_t *aps_ref_list[IHEVCE_MAX_NUM_BITRATES][2][HEVCE_MAX_REF_PICS * 2];
+
+ /** Job Queue Memory encode */
+ job_queue_t *ps_job_q_enc;
+
+ /** Array of Job Queue handles of enc group for ping and pong instance*/
+ job_queue_handle_t as_job_que_enc_hdls[NUM_ENC_JOBS_QUES];
+
+ /** Array of Job Queue handles of enc group for re-encode*/
+ job_queue_handle_t as_job_que_enc_hdls_reenc[NUM_ENC_JOBS_QUES];
+ /** frame level me_ctb_data_t buffer pointer
+ */
+ me_ctb_data_t *ps_cur_ctb_me_data;
+
+ /** frame level cur_ctb_cu_tree_t buffer pointer for ME
+ */
+ cur_ctb_cu_tree_t *ps_cur_ctb_cu_tree;
+
+ /** Pointer to Dep. Mngr for CTBs processed in every row of a frame.
+ * ME is producer, EncLoop is the consumer
+ */
+ void *pv_dep_mngr_encloop_dep_me;
+
+} me_enc_rdopt_ctxt_t;
+
+typedef struct
+{
+ UWORD32 u4_payload_type;
+ UWORD32 u4_payload_length;
+ UWORD8 *pu1_sei_payload;
+} sei_payload_t;
+
+typedef struct
+{
+ /**
+ * Flag will be set to 1 by frame processing thread after receiving flush
+ * command from application
+ */
+ WORD32 i4_end_flag;
+
+ /** frame level ctb allocation for ctb after aligning to max cu size */
+ ctb_enc_loop_out_t *ps_frm_ctb_data;
+
+ /** frame level cu allocation for ctb after aligning to max cu size */
+ cu_enc_loop_out_t *ps_frm_cu_data;
+
+ /** frame level tu allocation for ctb after aligning to max cu size */
+ tu_enc_loop_out_t *ps_frm_tu_data;
+
+ /** frame level pu allocation for ctb after aligning to max cu size */
+ pu_t *ps_frm_pu_data;
+
+ /** frame level coeff allocation for ctb after aligning to max cu size */
+ void *pv_coeff_data;
+
+ /** Slice header parameters */
+ slice_header_t s_slice_hdr;
+
+ /** sps parameters activated by current slice */
+ sps_t *ps_sps;
+
+ /** pps parameters activated by current slice */
+ pps_t *ps_pps;
+
+ /** vps parameters activated by current slice */
+ vps_t *ps_vps;
+
+ /** vps parameters activated by current slice */
+ sei_params_t s_sei;
+
+ /* Flag to indicate if AUD NAL is present */
+ WORD8 i1_aud_present_flag;
+
+ /* Flag to indicate if EOS NAL is present */
+ WORD8 i1_eos_present_flag;
+
+ /** nal_type for the slice to be encoded */
+ WORD32 i4_slice_nal_type;
+
+ /** input time stamp in terms of ticks: lower 32 */
+ WORD32 i4_inp_timestamp_low;
+
+ /** input time stamp in terms of ticks: higher 32 */
+ WORD32 i4_inp_timestamp_high;
+
+ /** input frame ctxt of app to be retured in output buffer */
+ void *pv_app_frm_ctxt;
+
+ /** current frm valid flag :
+ * will be 1 if valid input was processed by frame proc thrd
+ */
+ WORD32 i4_frm_proc_valid_flag;
+
+ /** To support entropy sync the bitstream offset of each CTB row
+ * is populated in this array any put in slice header in the end
+ */
+ WORD32 ai4_entry_point_offset[MAX_NUM_CTB_ROWS_FRM];
+
+ /** RDopt estimation of bytes generated based on which rc update happens
+ *
+ */
+ WORD32 i4_rdopt_bits_generated_estimate;
+
+ /* These params are passed from enc-threads to entropy thread for
+ passing params needed for PSNR caclulation and encoding
+ summary prints */
+ DOUBLE lf_luma_mse;
+ DOUBLE lf_cb_mse;
+ DOUBLE lf_cr_mse;
+
+ DOUBLE lf_luma_ssim;
+ DOUBLE lf_cb_ssim;
+ DOUBLE lf_cr_ssim;
+
+ WORD32 i4_qp;
+ WORD32 i4_poc;
+ WORD32 i4_display_num;
+ WORD32 i4_pic_type;
+
+ /** I-only SCD */
+ WORD32 i4_is_I_scenecut;
+
+ WORD32 i4_is_non_I_scenecut;
+ WORD32 i4_sub_pic_level_rc;
+
+ WORD32 ai4_frame_bits_estimated;
+ s_pic_level_acc_info_t s_pic_level_info;
+
+ LWORD64 i8_buf_level_bitrate_change;
+
+ WORD32 i4_is_end_of_idr_gop;
+
+ sei_payload_t as_sei_payload[MAX_NUMBER_OF_SEI_PAYLOAD];
+
+ UWORD32 u4_num_sei_payload;
+ /* Flag used only in mres single output case to flush out one res and start with next */
+ WORD32 i4_out_flush_flag;
+
+} frm_proc_ent_cod_ctxt_t;
+
+/**
+******************************************************************************
+* @brief ME pass and Main enocde pass shared variables and buffers
+******************************************************************************
+*/
+typedef struct
+{
+ /*BitRate ID*/
+ WORD32 i4_br_id;
+
+ /*Frame ID*/
+ WORD32 i4_frm_id;
+
+ /*Number of CTB, after ich data is populated*/
+ WORD32 i4_ctb_count_in_data;
+
+ /*Number of CTB, after ich scale is computed*/
+ WORD32 i4_ctb_count_out_scale;
+
+ /*Bits estimated for the frame */
+ /* For NON-I SCD max buf bits*/
+ LWORD64 i8_frame_bits_estimated;
+
+ /* Bits consumed till the nctb*/
+ LWORD64 i8_nctb_bits_consumed;
+
+ /* Bits consumed till the nctb*/
+ LWORD64 i8_acc_bits_consumed;
+
+ /*Frame level Best of Ipe and ME sad*/
+ LWORD64 i8_frame_l1_me_sad;
+
+ /*SAD accumalted till NCTB*/
+ LWORD64 i8_nctb_l1_me_sad;
+
+ /*Frame level IPE sad*/
+ LWORD64 i8_frame_l1_ipe_sad;
+
+ /*SAD accumalted till NCTB*/
+ LWORD64 i8_nctb_l1_ipe_sad;
+
+ /*Frame level L0 IPE satd*/
+ LWORD64 i8_frame_l0_ipe_satd;
+
+ /*L0 SATD accumalted till NCTB*/
+ LWORD64 i8_nctb_l0_ipe_satd;
+
+ /*Frame level Activity factor acc at 8x8 level */
+ LWORD64 i8_frame_l1_activity_fact;
+
+ /*NCTB Activity factor acc at 8x8 level */
+ LWORD64 i8_nctb_l1_activity_fact;
+
+ /*L0 MPM bits accumalted till NCTB*/
+ LWORD64 i8_nctb_l0_mpm_bits;
+
+ /*Encoder hdr accumalted till NCTB*/
+ LWORD64 i8_nctb_hdr_bits_consumed;
+
+} ihevce_sub_pic_rc_ctxt_t;
+
+/**
+******************************************************************************
+ * @brief Memoery manager context (stores the memory tables allcoated)
+******************************************************************************
+ */
+typedef struct
+{
+ /**
+ * Total number of memtabs (Modules and system)
+ * during create time
+ */
+ WORD32 i4_num_create_memtabs;
+
+ /**
+ * Pointer to the mem tabs
+ * of crate time
+ */
+ iv_mem_rec_t *ps_create_memtab;
+
+ /**
+ * Total number of memtabs Data and control Ques
+ * during Ques create time
+ */
+ WORD32 i4_num_q_memtabs;
+
+ /**
+ * Pointer to the mem tabs
+ * of crate time
+ */
+ iv_mem_rec_t *ps_q_memtab;
+
+} enc_mem_mngr_ctxt;
+
+/**
+******************************************************************************
+ * @brief Encoder Interafce Queues Context
+******************************************************************************
+ */
+typedef struct
+{
+ /** Number of Queues at interface context level */
+ WORD32 i4_num_queues;
+
+ /** Array of Queues handle */
+ void *apv_q_hdl[IHEVCE_MAX_NUM_QUEUES];
+
+ /** Mutex for encuring thread safety of the access of the queues */
+ void *pv_q_mutex_hdl;
+
+} enc_q_ctxt_t;
+
+/**
+******************************************************************************
+ * @brief Module context of different modules in encoder
+******************************************************************************
+ */
+
+typedef struct
+{
+ /** Motion estimation context pointer */
+ void *pv_me_ctxt;
+ /** Coarse Motion estimation context pointer */
+ void *pv_coarse_me_ctxt;
+
+ /** Intra Prediction context pointer */
+ void *pv_ipe_ctxt;
+
+ /** Encode Loop context pointer */
+ void *pv_enc_loop_ctxt;
+
+ /** Entropy Coding context pointer */
+ void *apv_ent_cod_ctxt[IHEVCE_MAX_NUM_BITRATES];
+
+ /** Look Ahead Processing context pointer */
+ void *pv_lap_ctxt;
+ /** Rate control context pointer */
+ void *apv_rc_ctxt[IHEVCE_MAX_NUM_BITRATES];
+ /** Decomposition pre intra context pointer */
+ void *pv_decomp_pre_intra_ctxt;
+
+} module_ctxt_t;
+
+/**
+******************************************************************************
+ * @brief Threads semaphore handles
+******************************************************************************
+ */
+typedef struct
+{
+ /** LAP semaphore handle */
+ void *pv_lap_sem_handle;
+
+ /** Encode frame Process semaphore handle */
+ void *pv_enc_frm_proc_sem_handle;
+
+ /** Pre Encode frame Process semaphore handle */
+ void *pv_pre_enc_frm_proc_sem_handle;
+
+ /** Entropy coding semaphore handle
+ One semaphore for each entropy thread, i.e. for each bit-rate instance*/
+ void *apv_ent_cod_sem_handle[IHEVCE_MAX_NUM_BITRATES];
+
+ /**
+ * Semaphore handle corresponding to get free inp frame buff
+ * function call from app if called in blocking mode
+ */
+ void *pv_inp_data_sem_handle;
+
+ /**
+ * Semaphore handle corresponding to get free inp control command buff
+ * function call from app if called in blocking mode
+ */
+ void *pv_inp_ctrl_sem_handle;
+
+ /**
+ * Semaphore handle corresponding to get filled out bitstream buff
+ * function call from app if called in blocking mode
+ */
+ void *apv_out_strm_sem_handle[IHEVCE_MAX_NUM_BITRATES];
+
+ /**
+ * Semaphore handle corresponding to get filled out recon buff
+ * function call from app if called in blocking mode
+ */
+ void *apv_out_recon_sem_handle[IHEVCE_MAX_NUM_BITRATES];
+
+ /**
+ * Semaphore handle corresponding to get filled out control status buff
+ * function call from app if called in blocking mode
+ */
+ void *pv_out_ctrl_sem_handle;
+
+ /**
+ * Semaphore handle corresponding to get filled out control status buff
+ * function call from app if called in blocking mode
+ */
+ void *pv_lap_inp_data_sem_hdl;
+
+ /**
+ * Semaphore handle corresponding to get filled out control status buff
+ * function call from app if called in blocking mode
+ */
+ void *pv_preenc_inp_data_sem_hdl;
+
+ /**
+ * Semaphore handle corresponding to Multi Res Single output case
+ */
+ void *pv_ent_common_mres_sem_hdl;
+ void *pv_out_common_mres_sem_hdl;
+
+} thrd_que_sem_hdl_t;
+
+/**
+******************************************************************************
+ * @brief Frame level structure which has parameters about CTBs
+******************************************************************************
+ */
+typedef struct
+{
+ /** CTB size of all CTB in a frame in pixels
+ * this will be create time value,
+ * run time change in this value is not supported
+ */
+ WORD32 i4_ctb_size;
+
+ /** Minimum CU size of CTB in a frame in pixels
+ * this will be create time value,
+ * run time change in this value is not supported
+ */
+ WORD32 i4_min_cu_size;
+
+ /** Worst case num CUs in CTB based on i4_ctb_size */
+ WORD32 i4_num_cus_in_ctb;
+
+ /** Worst case num PUs in CTB based on i4_ctb_size */
+ WORD32 i4_num_pus_in_ctb;
+
+ /** Worst case num TUs in CTB based on i4_ctb_size */
+ WORD32 i4_num_tus_in_ctb;
+
+ /** Number of CTBs in horizontal direction
+ * this is based on run time source width and i4_ctb_size
+ */
+ WORD32 i4_num_ctbs_horz;
+
+ /** Number of CTBs in vertical direction
+ * this is based on run time source height and i4_ctb_size
+ */
+ WORD32 i4_num_ctbs_vert;
+
+ /** MAX CUs in horizontal direction
+ * this is based on run time source width, i4_ctb_size and i4_num_cus_in_ctb
+ */
+ WORD32 i4_max_cus_in_row;
+
+ /** MAX PUs in horizontal direction
+ * this is based on run time source width, i4_ctb_size and i4_num_pus_in_ctb
+ */
+ WORD32 i4_max_pus_in_row;
+
+ /** MAX TUs in horizontal direction
+ * this is based on run time source width, i4_ctb_size and i4_num_tus_in_ctb
+ */
+ WORD32 i4_max_tus_in_row;
+
+ /**
+ * CU aligned picture width (currently aligned to MAX CU size)
+ * should be modified to be aligned to MIN CU size
+ */
+
+ WORD32 i4_cu_aligned_pic_wd;
+
+ /**
+ * CU aligned picture height (currently aligned to MAX CU size)
+ * should be modified to be aligned to MIN CU size
+ */
+
+ WORD32 i4_cu_aligned_pic_ht;
+
+ /* Pointer to a frame level memory,
+ Stride is = 1 + (num ctbs in a ctb-row) + 1
+ Hieght is = 1 + (num ctbs in a ctb-col)
+ Contains tile-id of each ctb */
+ WORD32 *pi4_tile_id_map;
+
+ /* stride in units of ctb */
+ WORD32 i4_tile_id_ctb_map_stride;
+
+} frm_ctb_ctxt_t;
+
+/**
+******************************************************************************
+ * @brief ME Job Queue desc
+******************************************************************************
+ */
+typedef struct
+{
+ /** Number of output dependencies which need to be set after
+ * current job is complete,
+ * should be less than or equal to MAX_OUT_DEP defined in
+ * ihevce_multi_thrd_structs.h
+ */
+ WORD32 i4_num_output_dep;
+
+ /** Array of offsets from the start of output dependent layer's Job Ques
+ * which are dependent on current Job to be complete
+ */
+ WORD32 ai4_out_dep_unit_off[MAX_OUT_DEP];
+
+ /** Number of input dependencies to be resolved for current job to start
+ * these many jobs in lower layer should be complete to
+ * start the current JOB
+ */
+ WORD32 i4_num_inp_dep;
+
+} multi_thrd_me_job_q_prms_t;
+
+/**
+ * @brief structure in which recon data
+ * and related parameters are sent from Encoder
+ */
+typedef struct
+{
+ /** Kept for maintaining backwards compatibility in future */
+ WORD32 i4_size;
+
+ /** Buffer id for the current buffer */
+ WORD32 i4_buf_id;
+
+ /** POC of the current buffer */
+ WORD32 i4_poc;
+
+ /** End flag to communicate this is last frame output from encoder */
+ WORD32 i4_end_flag;
+
+ /** End flag to communicate encoder that this is the last buffer from application
+ 1 - Last buf, 0 - Not last buffer. No other values are supported.
+ Application has to set the appropriate value before queing in encoder queue */
+
+ WORD32 i4_is_last_buf;
+
+ /** Recon luma buffer pointer */
+ void *pv_y_buf;
+
+ /** Recon cb buffer pointer */
+ void *pv_cb_buf;
+
+ /** Recon cr buffer pointer */
+ void *pv_cr_buf;
+
+ /** Luma size **/
+ WORD32 i4_y_pixels;
+
+ /** Chroma size **/
+ WORD32 i4_uv_pixels;
+
+} iv_enc_recon_data_buffs_t;
+
+/**
+******************************************************************************
+ * @brief Multi Thread context structure
+******************************************************************************
+ */
+typedef struct
+{
+ /* Flag to indicate to enc and pre-enc thrds that app has sent force end cmd*/
+ WORD32 i4_force_end_flag;
+
+ /** Force all active threads flag
+ * This flag will be set to 1 if all Number of cores givento the encoder
+ * is less than or Equal to MAX_NUM_CORES_SEQ_EXEC. In this mode
+ * All pre enc threads and enc threads will run of the same cores with
+ * time sharing ar frame level
+ */
+ WORD32 i4_all_thrds_active_flag;
+
+ /** Flag to indicate that core manager has been configured to enable
+ * sequential execution
+ */
+ WORD32 i4_seq_mode_enabled_flag;
+ /*-----------------------------------------------------------------------*/
+ /*--------- Params related to encode group -----------------------------*/
+ /*-----------------------------------------------------------------------*/
+
+ /** Number of processing threads created runtime in encode group */
+ WORD32 i4_num_enc_proc_thrds;
+
+ /** Number of processing threads active for a given frame
+ * This value will be monitored at frame level, so as to
+ * have provsion for increasing / decreasing threads
+ * based on Load balance b/w stage in encoder
+ */
+ WORD32 i4_num_active_enc_thrds;
+ /** Job Queue Memory encode */
+ job_queue_t *ps_job_q_enc[PING_PONG_BUF];
+
+ /** Array of Job Queue handles of enc group for ping and pong instance*/
+ job_queue_handle_t as_job_que_enc_hdls[NUM_ENC_JOBS_QUES][PING_PONG_BUF];
+
+ /** Mutex for ensuring thread safety of the access of Job queues in encode group */
+ void *pv_job_q_mutex_hdl_enc_grp_me;
+
+ /** Mutex for ensuring thread safety of the access of Job queues in encode group */
+ void *pv_job_q_mutex_hdl_enc_grp_enc_loop;
+
+ /** Array of Semaphore handles (for each frame processing threads ) */
+ void *apv_enc_thrd_sem_handle[MAX_NUM_FRM_PROC_THRDS_ENC];
+
+ /** Array for communcating start processing from master thread to indivisual
+ * threads in Enocde group of threads
+ * till 0 : wait
+ * 1 : start
+ * After reading the start signal, corresponding thread hould reset it to 0
+ */
+ WORD32 ai4_enc_frm_proc_start[MAX_NUM_FRM_PROC_THRDS_ENC];
+
+ /** Note: For Enc loop pass similar memory is used whihc is part of frm_proc_ent_cod_ctxt_t
+ * for Row level Sync hence not explicitly declared here
+ */
+
+ /** Array for ME to export the Job que dependency for all layers */
+ multi_thrd_me_job_q_prms_t as_me_job_q_prms[MAX_NUM_HME_LAYERS][MAX_NUM_VERT_UNITS_FRM];
+
+ /* pointer to the mutex handle*/
+ void *apv_mutex_handle[MAX_NUM_ME_PARALLEL];
+
+ /* pointer to the mutex handle for frame init*/
+ void *apv_mutex_handle_me_end[MAX_NUM_ME_PARALLEL];
+
+ /* pointer to the mutex handle for frame init*/
+ void *apv_mutex_handle_frame_init[MAX_NUM_ENC_LOOP_PARALLEL];
+
+ /*pointer to the mutex handle*/
+ void *apv_post_enc_mutex_handle[MAX_NUM_ENC_LOOP_PARALLEL];
+
+ /* Flag to indicate that master has done ME init*/
+ WORD32 ai4_me_master_done_flag[MAX_NUM_ME_PARALLEL];
+
+ /* Counter to keep track of me num of thrds exiting critical section*/
+ WORD32 me_num_thrds_exited[MAX_NUM_ME_PARALLEL];
+
+ /* Flag to indicate that master has done the frame init*/
+ WORD32 enc_master_done_frame_init[MAX_NUM_ENC_LOOP_PARALLEL];
+
+ /* Counter to keep track of num of thrds exiting critical section*/
+ WORD32 num_thrds_exited[MAX_NUM_ENC_LOOP_PARALLEL];
+
+ /* Counter to keep track of num of thrds exiting critical section for re-encode*/
+ WORD32 num_thrds_exited_for_reenc;
+
+ /* Array to store the curr qp for ping and pong instance*/
+ WORD32 cur_qp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
+
+ /* Pointers to store output buffers for ping and pong instance*/
+ frm_proc_ent_cod_ctxt_t *ps_curr_out_enc_grp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
+
+ /* Pointer to store input buffers for me*/
+ pre_enc_me_ctxt_t *aps_cur_inp_me_prms[MAX_NUM_ME_PARALLEL];
+
+ /*pointers to store output buffers from me */
+ me_enc_rdopt_ctxt_t *aps_cur_out_me_prms[NUM_ME_ENC_BUFS];
+
+ /*pointers to store input buffers to enc-rdopt */
+ me_enc_rdopt_ctxt_t *aps_cur_inp_enc_prms[NUM_ME_ENC_BUFS];
+
+ /*Shared memory for Sub Pic rc */
+ /*Qscale calulated by sub pic rc bit control for Intra Pic*/
+ WORD32 ai4_curr_qp_estimated[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
+
+ /*Header bits error by sub pic rc bit control*/
+ float af_acc_hdr_bits_scale_err[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
+
+ /*Accumalated ME SAD for NCTB*/
+ LWORD64 ai8_nctb_me_sad[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
+ [MAX_NUM_FRM_PROC_THRDS_ENC];
+
+ /*Accumalated IPE SAD for NCTB*/
+ LWORD64 ai8_nctb_ipe_sad[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
+ [MAX_NUM_FRM_PROC_THRDS_ENC];
+
+ /*Accumalated L0 IPE SAD for NCTB*/
+ LWORD64 ai8_nctb_l0_ipe_sad[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
+ [MAX_NUM_FRM_PROC_THRDS_ENC];
+
+ /*Accumalated Activity Factor for NCTB*/
+ LWORD64 ai8_nctb_act_factor[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
+ [MAX_NUM_FRM_PROC_THRDS_ENC];
+
+ /*Accumalated Ctb counter across all threads*/
+ WORD32 ai4_ctb_ctr[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
+
+ /*Bits threshold reached for across all threads*/
+ WORD32 ai4_threshold_reached[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
+
+ /*To hold the Previous In-frame RC chunk QP*/
+ WORD32 ai4_prev_chunk_qp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
+
+ /*Accumalated Ctb counter across all threads*/
+ WORD32 ai4_acc_ctb_ctr[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
+
+ /*Flag to check if thread is initialized */
+ WORD32 ai4_thrd_id_valid_flag[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
+ [MAX_NUM_FRM_PROC_THRDS_ENC];
+
+ /*Accumalated Ctb counter across all threads*/
+ //WORD32 ai4_acc_qp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES][MAX_NUM_FRM_PROC_THRDS_ENC];
+
+ /*Accumalated bits consumed for nctbs across all threads*/
+ LWORD64 ai8_nctb_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
+ [MAX_NUM_FRM_PROC_THRDS_ENC];
+
+ /*Accumalated hdr bits consumed for nctbs across all threads*/
+ LWORD64 ai8_nctb_hdr_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
+ [MAX_NUM_FRM_PROC_THRDS_ENC];
+
+ /*Accumalated l0 mpm bits consumed for nctbs across all threads*/
+ LWORD64 ai8_nctb_mpm_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
+ [MAX_NUM_FRM_PROC_THRDS_ENC];
+
+ /*Accumalated bits consumed for total ctbs across all threads*/
+ LWORD64 ai8_acc_bits_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
+ [MAX_NUM_FRM_PROC_THRDS_ENC];
+
+ /*Accumalated bits consumed for total ctbs across all threads*/
+ LWORD64 ai8_acc_bits_mul_qs_consumed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]
+ [MAX_NUM_FRM_PROC_THRDS_ENC];
+
+ /*Qscale calulated by sub pic rc bit control */
+ WORD32 ai4_curr_qp_acc[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
+ /* End of Sub pic rc variables */
+
+ /* Pointers to store input (only L0 IPE)*/
+ pre_enc_L0_ipe_encloop_ctxt_t *aps_cur_L0_ipe_inp_prms[MAX_NUM_ME_PARALLEL];
+
+ /** Slice header parameters */
+ /** temporarily store the slice header parameters in enc-loop thread
+ which will be copied to curr_out when buffer is aquired */
+ //slice_header_t as_slice_hdr[PING_PONG_BUF];
+
+ /* Array to store input buffer ids for ping and pong instances*/
+ //WORD32 in_buf_id[PING_PONG_BUF];
+
+ /* Array tp store L0 IPE input buf ids*/
+ WORD32 ai4_in_frm_l0_ipe_id[MAX_NUM_ME_PARALLEL];
+
+ /* Array to store output buffer ids for ping and pong instances*/
+ WORD32 out_buf_id[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; //[PING_PONG_BUF];
+
+ /* Variable to indicate ping and pong instance for each thread*/
+ WORD32 ping_pong[MAX_NUM_FRM_PROC_THRDS_ENC];
+
+ /* Array of pointers to store the recon buf pointers*/
+ iv_enc_recon_data_buffs_t
+ *ps_recon_out[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; //[PING_PONG_BUF];
+
+ /* Array of pointers to frame recon for ping and pong instances*/
+ recon_pic_buf_t *ps_frm_recon[NUM_ME_ENC_BUFS][IHEVCE_MAX_NUM_BITRATES];
+
+ /* Array of recon buffer ids for ping and pong instance*/
+ WORD32 recon_buf_id[NUM_ME_ENC_BUFS][IHEVCE_MAX_NUM_BITRATES]; //[PING_PONG_BUF];
+
+ /* End flag to signal end of all the frames in me*/
+ WORD32 me_end_flag;
+
+ /* End flag to signal end of all the frames in enc*/
+ WORD32 enc_end_flag;
+
+ /* Counter to keep track of num thrds done*/
+ WORD32 num_thrds_done;
+
+ /* Flags to keep track of dumped ping pong recon buffer*/
+ WORD32 is_recon_dumped[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; //[PING_PONG_BUF];
+
+ /* Flags to keep track of dumped ping pong output buffer*/
+ WORD32 is_out_buf_freed[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; //[PING_PONG_BUF];
+
+ /* flag to produce output buffer by the thread who ever is finishing
+ enc-loop processing first, so that the entropy thread can start processing */
+ WORD32 ai4_produce_outbuf[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES];
+
+ /* Flags to keep track of dumped ping pong input buffer*/
+ WORD32 is_in_buf_freed[MAX_NUM_ENC_LOOP_PARALLEL];
+
+ /* Flags to keep track of dumped ping pong L0 IPE to enc buffer*/
+ WORD32 is_L0_ipe_in_buf_freed[MAX_NUM_ENC_LOOP_PARALLEL];
+
+ /** Dependency manager for checking whether prev. EncLoop done before
+ current frame EncLoop starts */
+ void *apv_dep_mngr_prev_frame_done[MAX_NUM_ENC_LOOP_PARALLEL];
+
+ /** Dependency manager for checking whether prev. EncLoop done before
+ re-encode of the current frame */
+ void *pv_dep_mngr_prev_frame_enc_done_for_reenc;
+
+ /** Dependency manager for checking whether prev. me done before
+ current frame me starts */
+ void *apv_dep_mngr_prev_frame_me_done[MAX_NUM_ME_PARALLEL];
+
+ /** ME coarsest layer JOB queue type */
+ WORD32 i4_me_coarsest_lyr_type;
+
+ /** number of encloop frames running in parallel */
+ WORD32 i4_num_enc_loop_frm_pllel;
+
+ /** number of me frames running in parallel */
+ WORD32 i4_num_me_frm_pllel;
+
+ /*-----------------------------------------------------------------------*/
+ /*--------- Params related to pre-enc stage -----------------------------*/
+ /*-----------------------------------------------------------------------*/
+
+ /** Number of processing threads created runtime in pre encode group */
+ WORD32 i4_num_pre_enc_proc_thrds;
+
+ /** Number of processing threads active for a given frame
+ * This value will be monitored at frame level, so as to
+ * have provsion for increasing / decreasing threads
+ * based on Load balance b/w stage in encoder
+ */
+ WORD32 i4_num_active_pre_enc_thrds;
+ /** number of threads that have done processing the current frame
+ Use to find out the last thread that is coming out of pre-enc processing
+ so that the last thread can do de-init of pre-enc stage */
+ WORD32 ai4_num_thrds_processed_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
+
+ /** number of threads that have done processing the current frame
+ Use to find out the first thread and last inoder to get qp query. As the query
+ is not read only , the quer should be done only once by thread that comes first
+ and other threads should get same value*/
+ WORD32 ai4_num_thrds_processed_L0_ipe_qp_init[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
+
+ /** number of threads that have done proessing decomp_intra
+ Used to find out the last thread that is coming out so that
+ the last thread can set flag for decomp_pre_intra_finish */
+ WORD32 ai4_num_thrds_processed_decomp[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
+
+ /** number of threads that have done proessing coarse_me
+ Used to find out the last thread that is coming out so that
+ the last thread can set flag for coarse_me_finish */
+ WORD32 ai4_num_thrds_processed_coarse_me[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
+
+ /*Flag to indicate if current instance (frame)'s Decomp_pre_intra and Coarse_ME is done.
+ Used to check if previous frame is done proecessing decom_pre_intra and coarse_me */
+ WORD32 ai4_decomp_coarse_me_complete_flag[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
+
+ /** Dependency manager for checking whether prev. frame decomp_intra
+ done before current frame decomp_intra starts */
+ void *pv_dep_mngr_prev_frame_pre_enc_l1;
+
+ /** Dependency manager for checking whether prev. frame L0 IPE done before
+ current frame L0 IPE starts */
+ void *pv_dep_mngr_prev_frame_pre_enc_l0;
+
+ /** Dependency manager for checking whether prev. frame coarse_me done before
+ current frame coarse_me starts */
+ void *pv_dep_mngr_prev_frame_pre_enc_coarse_me;
+
+ /** flag to indicate if pre_enc_init is done for current frame */
+ WORD32 ai4_pre_enc_init_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
+
+ /** flag to indicate if pre_enc_hme_init is done for current frame */
+ WORD32 ai4_pre_enc_hme_init_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
+
+ /** flag to indicate if pre_enc_deinit is done for current frame */
+ WORD32 ai4_pre_enc_deinit_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
+
+ /** Flag to indicate the end of processing when all the frames are done processing */
+ WORD32 ai4_end_flag_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
+
+ /** Flag to indicate the control blocking mode indicating input command to pre-enc
+ group should be blocking or unblocking */
+ WORD32 i4_ctrl_blocking_mode;
+
+ /** Current input pointer */
+ ihevce_lap_enc_buf_t *aps_curr_inp_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
+
+ WORD32 i4_last_inp_buf;
+
+ /* buffer id for input buffer */
+ WORD32 ai4_in_buf_id_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
+
+ /** Current output pointer */
+ pre_enc_me_ctxt_t *aps_curr_out_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
+
+ /*Current L0 IPE to enc output pointer */
+ pre_enc_L0_ipe_encloop_ctxt_t *ps_L0_IPE_curr_out_pre_enc;
+
+ /** buffer id for output buffer */
+ WORD32 ai4_out_buf_id_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
+
+ /** buffer id for L0 IPE enc buffer*/
+ WORD32 i4_L0_IPE_out_buf_id;
+
+ /** current frame recon pointer */
+ recon_pic_buf_t *aps_frm_recon_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
+
+ /** Current picture Qp */
+ WORD32 ai4_cur_frame_qp_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
+
+ /** Decomp layer buffers indicies */
+ WORD32 ai4_decomp_lyr_buf_idx[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
+
+ /*since it is guranteed that cur frame ipe will not start unless prev frame ipe is completly done,
+ an array of MAX_PRE_ENC_STAGGER might not be required*/
+ WORD32 i4_qp_update_l0_ipe;
+
+ /** Current picture encoded is the last picture to be encoded flag */
+ WORD32 i4_last_pic_flag;
+
+ /** Mutex for ensuring thread safety of the access of Job queues in decomp stage */
+ void *pv_job_q_mutex_hdl_pre_enc_decomp;
+
+ /** Mutex for ensuring thread safety of the access of Job queues in HME group */
+ void *pv_job_q_mutex_hdl_pre_enc_hme;
+
+ /** Mutex for ensuring thread safety of the access of Job queues in l0 ipe stage */
+ void *pv_job_q_mutex_hdl_pre_enc_l0ipe;
+
+ /** mutex handle for pre-enc init */
+ void *pv_mutex_hdl_pre_enc_init;
+
+ /** mutex handle for pre-enc decomp deinit */
+ void *pv_mutex_hdl_pre_enc_decomp_deinit;
+
+ /** mutex handle for pre enc hme init */
+ void *pv_mutex_hdl_pre_enc_hme_init;
+
+ /** mutex handle for pre-enc hme deinit */
+ void *pv_mutex_hdl_pre_enc_hme_deinit;
+
+ /*qp qurey before l0 ipe is done by multiple frame*/
+ /** mutex handle for L0 ipe(pre-enc init)*/
+ void *pv_mutex_hdl_l0_ipe_init;
+
+ /** mutex handle for pre-enc deinit */
+ void *pv_mutex_hdl_pre_enc_deinit;
+
+ /** Array of Semaphore handles (for each frame processing threads ) */
+ void *apv_pre_enc_thrd_sem_handle[MAX_NUM_FRM_PROC_THRDS_ENC];
+ /** array which will tell the number of CTB processed in each row,
+ * used for Row level sync in IPE pass
+ */
+ WORD32 ai4_ctbs_in_row_proc_ipe_pass[MAX_NUM_CTB_ROWS_FRM];
+
+ /** Job Queue Memory pre encode */
+ job_queue_t *aps_job_q_pre_enc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME];
+
+ /** Array of Job Queue handles enc group */
+ job_queue_handle_t as_job_que_preenc_hdls[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]
+ [NUM_PRE_ENC_JOBS_QUES];
+
+ /* accumulate intra sad across all thread to get qp before L0 IPE*/
+ WORD32 ai4_intra_satd_acc[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]
+ [MAX_NUM_FRM_PROC_THRDS_PRE_ENC];
+
+ WORD32 i4_delay_pre_me_btw_l0_ipe;
+
+ /*** This variable has the maximum delay between hme and l0ipe ***/
+ /*** This is used for wrapping around L0IPE index ***/
+ WORD32 i4_max_delay_pre_me_btw_l0_ipe;
+
+ /* This is to register the handles of Dep Mngr b/w EncLoop and ME */
+ /* This is used to delete the Mngr at the end */
+ void *apv_dep_mngr_encloop_dep_me[NUM_ME_ENC_BUFS];
+ /*flag to track buffer in me/enc que is produced or not*/
+ WORD32 ai4_me_enc_buff_prod_flag[NUM_ME_ENC_BUFS];
+
+ /*out buf que id for me */
+ WORD32 ai4_me_out_buf_id[NUM_ME_ENC_BUFS];
+
+ /*in buf que id for enc from me*/
+ WORD32 i4_enc_in_buf_id[NUM_ME_ENC_BUFS];
+
+ /* This is used to tell whether the free of recon buffers are done or not */
+ WORD32 i4_is_recon_free_done;
+
+ /* index for DVSR population */
+ WORD32 i4_idx_dvsr_p;
+ WORD32 aai4_l1_pre_intra_done[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME]
+ [(HEVCE_MAX_HEIGHT >> 1) / 8];
+
+ WORD32 i4_rc_l0_qp;
+
+ /* Used for mres single out cases. Checks whether a particular resolution is active or passive */
+ /* Only one resolution should be active for mres_single_out case */
+ WORD32 *pi4_active_res_id;
+
+ /**
+ * Sub Pic bit control mutex lock handle
+ */
+ void *pv_sub_pic_rc_mutex_lock_hdl;
+
+ void *pv_sub_pic_rc_for_qp_update_mutex_lock_hdl;
+
+ WORD32 i4_encode;
+ WORD32 i4_in_frame_rc_enabled;
+ WORD32 i4_num_re_enc;
+
+} multi_thrd_ctxt_t;
+
+/**
+ * @brief Structure to describe tile params
+ */
+typedef struct
+{
+ /* flag to indicate tile encoding enabled/disabled */
+ WORD32 i4_tiles_enabled_flag;
+
+ /* flag to indicate unifrom spacing of tiles */
+ WORD32 i4_uniform_spacing_flag;
+
+ /* num tiles in a tile-row. num tiles in tile-col */
+ WORD32 i4_num_tile_cols;
+ WORD32 i4_num_tile_rows;
+
+ /* Curr tile width and height*/
+ WORD32 i4_curr_tile_width;
+ WORD32 i4_curr_tile_height;
+
+ /* Curr tile width and heignt in CTB units*/
+ WORD32 i4_curr_tile_wd_in_ctb_unit;
+ WORD32 i4_curr_tile_ht_in_ctb_unit;
+
+ /* frame resolution */
+ //WORD32 i4_frame_width; /* encode-width */
+ //WORD32 i4_frame_height; /* encode-height */
+
+ /* total num of tiles "in frame" */
+ WORD32 i4_num_tiles;
+
+ /* Curr tile id. Assigned by raster scan order in a frame */
+ WORD32 i4_curr_tile_id;
+
+ /* x-pos of first ctb of the slice in ctb */
+ /* y-pos of first ctb of the slice in ctb */
+ WORD32 i4_first_ctb_x;
+ WORD32 i4_first_ctb_y;
+
+ /* x-pos of first ctb of the slice in samples */
+ /* y-pos of first ctb of the slice in samples */
+ WORD32 i4_first_sample_x;
+ WORD32 i4_first_sample_y;
+
+} ihevce_tile_params_t;
+
+/**
+******************************************************************************
+ * @brief Encoder context structure
+******************************************************************************
+ */
+
+typedef struct
+{
+ /**
+ * vps parameters
+ */
+ vps_t as_vps[IHEVCE_MAX_NUM_BITRATES];
+
+ /**
+ * sps parameters
+ */
+ sps_t as_sps[IHEVCE_MAX_NUM_BITRATES];
+
+ /**
+ * pps parameters
+ * Required for each bitrate separately, mainly because
+ * init qp etc parameters needs to be different for each instance
+ */
+ pps_t as_pps[IHEVCE_MAX_NUM_BITRATES];
+
+ /**
+ * Rate control mutex lock handle
+ */
+ void *pv_rc_mutex_lock_hdl;
+
+ /** frame level cu analyse buffer pointer for ME
+ * ME will get ps_ctb_analyse structure populated with ps_cu pointers
+ * pointing to ps_cu_analyse buffer from IPE.
+ */
+ //cu_analyse_t *ps_cu_analyse_inter[PING_PONG_BUF];
+
+ /**
+ * CTB frame context between encoder (producer) and entropy (consumer)
+ */
+ enc_q_ctxt_t s_enc_ques;
+
+ /**
+ * Encoder memory manager ctxt
+ */
+ enc_mem_mngr_ctxt s_mem_mngr;
+
+ /**
+ * Semaphores of all the threads created in HLE
+ * and Que handle for buffers b/w frame process and entropy
+ */
+ thrd_que_sem_hdl_t s_thrd_sem_ctxt;
+
+ /**
+ * Reference /recon buffer Que pointer
+ */
+ recon_pic_buf_t **pps_recon_buf_q[IHEVCE_MAX_NUM_BITRATES];
+
+ /**
+ * Number of buffers in Recon buffer queue
+ */
+ WORD32 ai4_num_buf_recon_q[IHEVCE_MAX_NUM_BITRATES];
+
+ /**
+ * Reference / recon buffer Que pointer for Pre Encode group
+ * this will be just a container and no buffers will be allcoated
+ */
+ recon_pic_buf_t **pps_pre_enc_recon_buf_q;
+
+ /**
+ * Number of buffers in Recon buffer queue
+ */
+ WORD32 i4_pre_enc_num_buf_recon_q;
+
+ /**
+ * frame level CTB parameters and worst PU CU and TU in a CTB row
+ */
+ frm_ctb_ctxt_t s_frm_ctb_prms;
+
+ /*
+ * Moudle ctxt pointers of all modules
+ */
+ module_ctxt_t s_module_ctxt;
+
+ /*
+ * LAP static parameters
+ */
+ ihevce_lap_static_params_t s_lap_stat_prms;
+
+ /*
+ * Run time dynamic source params
+ */
+
+ ihevce_src_params_t s_runtime_src_prms;
+
+ /*
+ *Target params
+ */
+ ihevce_tgt_params_t s_runtime_tgt_params;
+
+ /*
+ * Run time dynamic coding params
+ */
+ ihevce_coding_params_t s_runtime_coding_prms;
+
+ /**
+ * Pointer to static config params
+ */
+ ihevce_static_cfg_params_t *ps_stat_prms;
+
+ /**
+ * the following structure members used for copying recon buf info
+ * in case of duplicate pics
+ */
+
+ /**
+ * Array of reference picture list for pre enc group
+ * Separate list for ping_pong instnaces
+ * 2=> ref_pic_list0 and ref_pic_list1
+ */
+ recon_pic_buf_t as_pre_enc_ref_lists[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME][2]
+ [HEVCE_MAX_REF_PICS * 2];
+
+ /**
+ * Array of reference picture list for pre enc group
+ * Separate list for ping_pong instnaces
+ * 2=> ref_pic_list0 and ref_pic_list1
+ */
+ recon_pic_buf_t *aps_pre_enc_ref_lists[MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME][2]
+ [HEVCE_MAX_REF_PICS * 2];
+
+ /**
+ * Number of input frames per input queue
+ */
+ WORD32 i4_num_input_buf_per_queue;
+
+ /**
+ * poc of the Clean Random Access(CRA)Ipic
+ */
+ WORD32 i4_cra_poc;
+
+ /** Number of ref pics in list 0 for any given frame */
+ WORD32 i4_num_ref_l0;
+
+ /** Number of ref pics in list 1 for any given frame */
+ WORD32 i4_num_ref_l1;
+
+ /** Number of active ref pics in list 0 for cur frame */
+ WORD32 i4_num_ref_l0_active;
+
+ /** Number of active ref pics in list 1 for cur frame */
+ WORD32 i4_num_ref_l1_active;
+
+ /** Number of ref pics in list 0 for any given frame pre encode stage */
+ WORD32 i4_pre_enc_num_ref_l0;
+
+ /** Number of ref pics in list 1 for any given frame pre encode stage */
+ WORD32 i4_pre_enc_num_ref_l1;
+
+ /** Number of active ref pics in list 0 for cur frame pre encode stage */
+ WORD32 i4_pre_enc_num_ref_l0_active;
+
+ /** Number of active ref pics in list 1 for cur frame pre encode stage */
+ WORD32 i4_pre_enc_num_ref_l1_active;
+
+ /**
+ * working mem to be used for frm level activities
+ * One example is interplation at frame level. This requires memory
+ * of (max width + 16) * (max_height + 7 + 16 ) * 2 bytes.
+ * This is so since we generate interp output for max_width + 16 x
+ * max_height + 16, and then the intermediate output is 16 bit and
+ * is max_height + 16 + 7 rows
+ */
+ UWORD8 *pu1_frm_lvl_wkg_mem;
+
+ /**
+ * Multi thread processing context
+ * This memory contains the variables and pointers shared across threads
+ * in enc-group and pre-enc-group
+ */
+ multi_thrd_ctxt_t s_multi_thrd;
+
+ /** I/O Queues created status */
+ WORD32 i4_io_queues_created;
+
+ WORD32 i4_end_flag;
+
+ /** number of bit-rate instances running */
+ WORD32 i4_num_bitrates;
+
+ /** number of enc frames running in parallel */
+ WORD32 i4_num_enc_loop_frm_pllel;
+
+ /*ref bitrate id*/
+ WORD32 i4_ref_mbr_id;
+
+ /* Flag to indicate app, that end of processing has reached */
+ WORD32 i4_frame_limit_reached;
+
+ /*Structure to store the function selector
+ * pointers for common and encoder */
+ func_selector_t s_func_selector;
+
+ /*ref resolution id*/
+ WORD32 i4_resolution_id;
+
+ /*hle context*/
+ void *pv_hle_ctxt;
+
+ rc_quant_t s_rc_quant;
+ /*ME cost of P pic stored for the next ref B pic*/
+ //LWORD64 i8_acc_me_cost_of_p_pic_for_b_pic[2];
+
+ UWORD32 u4_cur_pic_encode_cnt;
+ UWORD32 u4_cur_pic_encode_cnt_dbp;
+ /*past 2 p pics high complexity status*/
+ WORD32 ai4_is_past_pic_complex[2];
+
+ WORD32 i4_is_I_reset_done;
+ WORD32 i4_past_RC_reset_count;
+
+ WORD32 i4_future_RC_reset;
+
+ WORD32 i4_past_RC_scd_reset_count;
+
+ WORD32 i4_future_RC_scd_reset;
+ WORD32 i4_poc_reset_values;
+
+ /*Place holder to store the length of LAP in first pass*/
+ /** Number of frames to look-ahead for RC by -
+ * counts 2 fields as one frame for interlaced
+ */
+ WORD32 i4_look_ahead_frames_in_first_pass;
+
+ WORD32 ai4_mod_factor_derived_by_variance[2];
+ float f_strength;
+
+ /*for B frames use the avg activity
+ from the layer 0 (I or P) which is the average over
+ Lap2 window*/
+ LWORD64 ai8_lap2_8x8_avg_act_from_T0[2];
+
+ LWORD64 ai8_lap2_16x16_avg_act_from_T0[3];
+
+ LWORD64 ai8_lap2_32x32_avg_act_from_T0[3];
+
+ /*for B frames use the log of avg activity
+ from the layer 0 (I or P) which is the average over
+ Lap2 window*/
+ long double ald_lap2_8x8_log_avg_act_from_T0[2];
+
+ long double ald_lap2_16x16_log_avg_act_from_T0[3];
+
+ long double ald_lap2_32x32_log_avg_act_from_T0[3];
+
+ ihevce_tile_params_t *ps_tile_params_base;
+
+ WORD32 ai4_column_width_array[MAX_TILE_COLUMNS];
+
+ WORD32 ai4_row_height_array[MAX_TILE_ROWS];
+
+ /* Architecture */
+ IV_ARCH_T e_arch_type;
+
+ UWORD8 u1_is_popcnt_available;
+
+ WORD32 i4_active_scene_num;
+
+ WORD32 i4_max_fr_enc_loop_parallel_rc;
+ WORD32 ai4_rc_query[IHEVCE_MAX_NUM_BITRATES];
+ WORD32 i4_active_enc_frame_id;
+
+ /**
+ * LAP interface ctxt pointer
+ */
+ void *pv_lap_interface_ctxt;
+
+ /* If enable, enables blu ray compatibility of op*/
+ WORD32 i4_blu_ray_spec;
+
+} enc_ctxt_t;
+
+/**
+******************************************************************************
+* @brief This struct contains the inter CTB params needed for the decision
+* of the best inter CU results
+******************************************************************************
+*/
+typedef struct
+{
+ hme_pred_buf_mngr_t s_pred_buf_mngr;
+
+ /** X and y offset of ctb w.r.t. start of pic */
+ WORD32 i4_ctb_x_off;
+ WORD32 i4_ctb_y_off;
+
+ /**
+ * Pred buffer ptr, updated inside subpel refinement process. This
+ * location passed to the leaf fxn for copying the winner pred buf
+ */
+ UWORD8 **ppu1_pred;
+
+ /** Working mem passed to leaf fxns */
+ UWORD8 *pu1_wkg_mem;
+
+ /** prediction buffer stride fo rleaf fxns to copy the pred winner buf */
+ WORD32 i4_pred_stride;
+
+ /** Stride of input buf, updated inside subpel fxn */
+ WORD32 i4_inp_stride;
+
+ /** stride of recon buffer */
+ WORD32 i4_rec_stride;
+
+ /** Indicates if bi dir is enabled or not */
+ WORD32 i4_bidir_enabled;
+
+ /**
+ * Total number of references of current picture which is enocded
+ */
+ UWORD8 u1_num_ref;
+
+ /** Recon Pic buffer pointers for L0 list */
+ recon_pic_buf_t **pps_rec_list_l0;
+
+ /** Recon Pic buffer pointers for L1 list */
+ recon_pic_buf_t **pps_rec_list_l1;
+
+ /**
+ * These pointers point to modified input, one each for one ref idx.
+ * Instead of weighting the reference, we weight the input with inverse
+ * wt and offset for list 0 and list 1.
+ */
+ UWORD8 *apu1_wt_inp[2][MAX_NUM_REF];
+
+ /* Since ME uses weighted inputs, we use reciprocal of the actual weights */
+ /* that are signaled in the bitstream */
+ WORD32 *pi4_inv_wt;
+ WORD32 *pi4_inv_wt_shift_val;
+
+ /* Map between L0 Reference indices and LC indices */
+ WORD8 *pi1_past_list;
+
+ /* Map between L1 Reference indices and LC indices */
+ WORD8 *pi1_future_list;
+
+ /**
+ * Points to the non-weighted input data for the current CTB
+ */
+ UWORD8 *pu1_non_wt_inp;
+
+ /**
+ * Store the pred lambda and lamda_qshifts for all the reference indices
+ */
+ WORD32 i4_lamda;
+
+ UWORD8 u1_lamda_qshift;
+
+ WORD32 wpred_log_wdc;
+
+ /**
+ * Number of active references in l0
+ */
+ UWORD8 u1_num_active_ref_l0;
+
+ /**
+ * Number of active references in l1
+ */
+ UWORD8 u1_num_active_ref_l1;
+
+ /** The max_depth for inter tu_tree */
+ UWORD8 u1_max_tr_depth;
+
+ /** Quality Preset */
+ WORD8 i1_quality_preset;
+
+ /** SATD or SAD */
+ UWORD8 u1_use_satd;
+
+ /* Frame level QP */
+ WORD32 i4_qstep_ls8;
+
+ /* Pointer to an array of PU level src variances */
+ UWORD32 *pu4_src_variance;
+
+ WORD32 i4_alpha_stim_multiplier;
+
+ UWORD8 u1_is_cu_noisy;
+
+ ULWORD64 *pu8_part_src_sigmaX;
+
+ ULWORD64 *pu8_part_src_sigmaXSquared;
+
+ UWORD8 u1_max_2nx2n_tu_recur_cands;
+
+} inter_ctb_prms_t;
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+extern const double lamda_modifier_for_I_pic[8];
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+#endif /* _IHEVCE_ENC_STRUCTS_H_ */
diff --git a/encoder/ihevce_enc_subpel_gen.c b/encoder/ihevce_enc_subpel_gen.c
new file mode 100644
index 0000000..10d7bef
--- /dev/null
+++ b/encoder/ihevce_enc_subpel_gen.c
@@ -0,0 +1,720 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+******************************************************************************
+* \file ihevce_enc_subpel_gen.c
+*
+* \brief
+* This file contains Padding and Subpel plane generation functions
+* at CTB level
+*
+* \date
+* 29/12/2012
+*
+* \author
+* Ittiam
+*
+*
+* List of Functions
+* - ihevce_suppel_padding()
+* - ihevce_pad_interp_recon_ctb()
+*
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_debug.h"
+#include "ihevc_macros.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+#include "ihevc_trans_tables.h"
+#include "ihevc_trans_macros.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_enc_loop_utils.h"
+#include "ihevce_inter_pred.h"
+#include "ihevce_common_utils.h"
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_suppel_padding \endif
+*
+* \brief
+* Subpel Plane planes Padding Function
+*
+* \param[in] pu1_dst : pointer to subpel plane
+* stride : subpel plane stride same as recon stride
+* tot_wd : width of the block in subpel plane
+* tot_ht : hieght of the block in subpel plane
+* ctb_ctr : ctb horizontal position
+* vert_ctr : ctb vertical position
+* ps_frm_ctb_prms : CTB characteristics parameters
+* \return
+* None
+*
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_subpel_padding(
+ UWORD8 *pu1_dst,
+ WORD32 stride,
+ WORD32 tot_wd,
+ WORD32 tot_ht,
+ WORD32 pad_subpel_x,
+ WORD32 pad_subpel_y,
+ WORD32 ctb_ctr,
+ WORD32 vert_ctr,
+ WORD32 i4_num_ctbs_horz,
+ WORD32 i4_num_ctbs_vert,
+ func_selector_t *ps_func_selector)
+{
+ ihevc_pad_top_ft *pf_pad_top = ps_func_selector->ihevc_pad_top_fptr;
+ ihevc_pad_bottom_ft *pf_pad_bottom = ps_func_selector->ihevc_pad_bottom_fptr;
+ ihevc_pad_left_luma_ft *pf_pad_left_luma = ps_func_selector->ihevc_pad_left_luma_fptr;
+ ihevc_pad_right_luma_ft *pf_pad_right_luma = ps_func_selector->ihevc_pad_right_luma_fptr;
+
+ UWORD8 *pu1_dst_tmp = pu1_dst;
+ WORD32 cpy_ht = tot_ht;
+
+ /* Top padding*/
+ if(vert_ctr == 0)
+ {
+ PAD_BUF_VER(pu1_dst, stride, tot_wd, pad_subpel_x, pad_subpel_y, pf_pad_top);
+ /*if curr ctb is 1st ctb in ctb row, update dst pointer for Left padding*/
+ pu1_dst_tmp = pu1_dst - pad_subpel_y * stride;
+ cpy_ht += pad_subpel_y;
+ }
+ /*bottom padding*/
+ if(vert_ctr == (i4_num_ctbs_vert - 1))
+ {
+ PAD_BUF_VER(
+ (pu1_dst + (tot_ht * stride)),
+ stride,
+ tot_wd,
+ pad_subpel_x,
+ pad_subpel_y,
+ pf_pad_bottom);
+ /*if curr ctb is 1st ctb in ctb row, update dst pointer for right padding*/
+ cpy_ht += pad_subpel_y;
+ }
+
+ /*left padding*/
+ if(ctb_ctr == 0)
+ {
+ PAD_BUF_HOR(pu1_dst_tmp, stride, cpy_ht, pad_subpel_x, pad_subpel_y, pf_pad_left_luma);
+ }
+
+ /*right padding*/
+ if(ctb_ctr == (i4_num_ctbs_horz - 1))
+ {
+ PAD_BUF_HOR(
+ pu1_dst_tmp + tot_wd, stride, cpy_ht, pad_subpel_x, pad_subpel_y, pf_pad_right_luma);
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_pad_interp_recon_ctb \endif
+*
+* \brief
+* Ctb level Subpel Plane generation and padding function
+*
+* \param[in]
+* s_cu_prms : coding unit params structures (recon buffers)
+* ctb_ctr : ctb horizontal position
+* vert_ctr : ctb vertical position
+* ps_frm_ctb_prms : CTB characteristics parameters
+* i4_dist_nbr_mask : nbr-mask for distributed mode. Should be 0 for standalone
+* or distributed-single-client mode
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_pad_interp_recon_ctb(
+ pad_interp_recon_frm_t *ps_pad_interp_recon,
+ WORD32 ctb_ctr,
+ WORD32 vert_ctr,
+ WORD32 quality_preset,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ WORD16 *pi2_hxhy_interm,
+ WORD32 i4_bitrate_instance_id,
+ func_selector_t *ps_func_selector)
+{
+ UWORD8 *pu1_src, *pu1_src_uv, *pu1_buf_y, *pu1_buf_uv;
+ WORD32 stride, stride_uv, wd, ht, wd_uv, ht_uv, pad_x, pad_y, pad_subpel_x, pad_subpel_y;
+ WORD32 tot_wd, tot_ht, offset, cpy_ht_y, cpy_ht_uv;
+ WORD32 i4_chroma_vert_pad_default;
+ WORD32 top_extra_pix = 0, left_extra_pix = 0;
+
+ WORD32 ctb_size = ps_frm_ctb_prms->i4_ctb_size;
+ UWORD8 *pu1_dst_hxfy = ps_pad_interp_recon->pu1_sbpel_hxfy +
+ (vert_ctr * ctb_size * ps_pad_interp_recon->i4_luma_recon_stride) +
+ (ctb_ctr * ctb_size);
+ UWORD8 *pu1_dst_fxhy = ps_pad_interp_recon->pu1_sbpel_fxhy +
+ (vert_ctr * ctb_size * ps_pad_interp_recon->i4_luma_recon_stride) +
+ (ctb_ctr * ctb_size);
+ UWORD8 *pu1_dst_hxhy = ps_pad_interp_recon->pu1_sbpel_hxhy +
+ (vert_ctr * ctb_size * ps_pad_interp_recon->i4_luma_recon_stride) +
+ (ctb_ctr * ctb_size);
+ UWORD8 u1_is_422 = (ps_pad_interp_recon->u1_chroma_array_type == 2);
+
+ ihevc_pad_top_ft *pf_pad_top = ps_func_selector->ihevc_pad_top_fptr;
+ ihevc_pad_bottom_ft *pf_pad_bottom = ps_func_selector->ihevc_pad_bottom_fptr;
+ ihevc_pad_left_luma_ft *pf_pad_left_luma = ps_func_selector->ihevc_pad_left_luma_fptr;
+ ihevc_pad_left_chroma_ft *pf_pad_left_chroma = ps_func_selector->ihevc_pad_left_chroma_fptr;
+ ihevc_pad_right_luma_ft *pf_pad_right_luma = ps_func_selector->ihevc_pad_right_luma_fptr;
+ ihevc_pad_right_chroma_ft *pf_pad_right_chroma = ps_func_selector->ihevc_pad_right_chroma_fptr;
+
+ ihevc_inter_pred_ft *pf_inter_pred_luma_horz =
+ ps_func_selector->ihevc_inter_pred_luma_horz_fptr;
+ ihevc_inter_pred_ft *pf_inter_pred_luma_vert =
+ ps_func_selector->ihevc_inter_pred_luma_vert_fptr;
+ ihevc_inter_pred_w16out_ft *pf_inter_pred_luma_horz_w16out =
+ ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr;
+ ihevc_inter_pred_w16inp_ft *pf_inter_pred_luma_vert_w16inp =
+ ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_fptr;
+ stride = ps_pad_interp_recon->i4_luma_recon_stride;
+ wd = ps_pad_interp_recon->i4_ctb_size;
+ ht = ps_pad_interp_recon->i4_ctb_size;
+
+ pu1_src = (UWORD8 *)ps_pad_interp_recon->pu1_luma_recon + (vert_ctr * ctb_size * stride) +
+ (ctb_ctr * ctb_size);
+
+ stride_uv = ps_pad_interp_recon->i4_chrm_recon_stride;
+ wd_uv = ps_pad_interp_recon->i4_ctb_size;
+ ht_uv = ps_pad_interp_recon->i4_ctb_size >> (0 == u1_is_422);
+
+ pu1_src_uv = (UWORD8 *)ps_pad_interp_recon->pu1_chrm_recon +
+ (vert_ctr * (ctb_size >> (0 == u1_is_422)) * stride_uv) + (ctb_ctr * ctb_size);
+
+ pad_x = ALIGN8(NTAPS_LUMA);
+ pad_y = ALIGN8(NTAPS_LUMA);
+ pad_subpel_x = PAD_HORZ - pad_x;
+ pad_subpel_y = PAD_VERT - pad_y;
+
+ offset = pad_x + (pad_y * stride);
+
+ tot_wd = wd + (pad_x << 1);
+ tot_ht = ht + (pad_y << 1);
+
+ i4_chroma_vert_pad_default = PAD_VERT >> (0 == u1_is_422);
+
+ if(ctb_ctr == (ps_frm_ctb_prms->i4_num_ctbs_horz - 1))
+ {
+ WORD32 last_ctb_x =
+ ps_frm_ctb_prms->i4_cu_aligned_pic_wd -
+ ((ps_frm_ctb_prms->i4_num_ctbs_horz - 1) * ps_pad_interp_recon->i4_ctb_size);
+ wd = last_ctb_x;
+ wd_uv = last_ctb_x;
+ }
+ if(vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1))
+ {
+ WORD32 last_ctb_y =
+ ps_frm_ctb_prms->i4_cu_aligned_pic_ht -
+ ((ps_frm_ctb_prms->i4_num_ctbs_vert - 1) * ps_pad_interp_recon->i4_ctb_size);
+ ht = last_ctb_y;
+ ht_uv = last_ctb_y >> (0 == u1_is_422);
+ }
+ tot_ht = ht;
+ tot_wd = wd;
+ pu1_buf_y = pu1_src;
+ pu1_buf_uv = pu1_src_uv;
+ cpy_ht_y = ht;
+ cpy_ht_uv = ht_uv;
+ if(vert_ctr > 0)
+ {
+ top_extra_pix = 8;
+ }
+ if(ctb_ctr > 0)
+ {
+ left_extra_pix = 8;
+ }
+ /*top padding*/
+ if(vert_ctr == 0)
+ {
+ PAD_BUF_VER(
+ pu1_src - left_extra_pix, stride, wd + left_extra_pix, PAD_HORZ, PAD_VERT, pf_pad_top);
+ PAD_BUF_VER(
+ pu1_src_uv - left_extra_pix,
+ stride_uv,
+ wd_uv + left_extra_pix,
+ PAD_HORZ,
+ i4_chroma_vert_pad_default,
+ pf_pad_top);
+ tot_ht = pad_y + ht - 8;
+ /*if curr ctb is 1st ctb in ctb row, update dst pointer for Left padding*/
+ pu1_buf_y = pu1_src - PAD_VERT * stride;
+ pu1_buf_uv = pu1_src_uv - i4_chroma_vert_pad_default * stride_uv;
+ cpy_ht_y += PAD_VERT;
+ cpy_ht_uv += i4_chroma_vert_pad_default;
+ }
+ /*bottom padding*/
+ if(vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1))
+ {
+ PAD_BUF_VER(
+ (pu1_src - left_extra_pix + (ht * stride)),
+ stride,
+ wd + left_extra_pix,
+ PAD_HORZ,
+ PAD_VERT,
+ pf_pad_bottom);
+ PAD_BUF_VER(
+ (pu1_src_uv - left_extra_pix + (ht_uv * stride_uv)),
+ stride_uv,
+ wd_uv + left_extra_pix,
+ PAD_HORZ,
+ i4_chroma_vert_pad_default,
+ pf_pad_bottom);
+ tot_ht = pad_y + ht + 8;
+ /*if curr ctb is 1st ctb in ctb row, update dst pointer for right padding*/
+ cpy_ht_y += PAD_VERT;
+ cpy_ht_uv += i4_chroma_vert_pad_default;
+ }
+
+ /*Left padding*/
+ if(ctb_ctr == 0)
+ {
+ PAD_BUF_HOR(
+ (pu1_buf_y - top_extra_pix * stride),
+ stride,
+ cpy_ht_y + top_extra_pix,
+ PAD_HORZ,
+ PAD_VERT,
+ pf_pad_left_luma);
+ PAD_BUF_HOR(
+ pu1_buf_uv - (top_extra_pix >> 1) * (u1_is_422 + 1) * stride_uv,
+ stride_uv,
+ cpy_ht_uv + (top_extra_pix >> 1) * (u1_is_422 + 1),
+ PAD_HORZ,
+ i4_chroma_vert_pad_default,
+ pf_pad_left_chroma);
+ tot_wd = pad_x + wd - 8;
+ }
+ /*right padding*/
+ if(ctb_ctr == (ps_frm_ctb_prms->i4_num_ctbs_horz - 1))
+ {
+ PAD_BUF_HOR(
+ (pu1_buf_y - (top_extra_pix * stride) + wd),
+ stride,
+ cpy_ht_y + top_extra_pix,
+ PAD_HORZ,
+ PAD_VERT,
+ pf_pad_right_luma);
+ PAD_BUF_HOR(
+ (pu1_buf_uv - ((top_extra_pix >> 1) * (u1_is_422 + 1) * stride_uv) + wd_uv),
+ stride_uv,
+ cpy_ht_uv + (top_extra_pix >> 1) * (u1_is_422 + 1),
+ PAD_HORZ,
+ i4_chroma_vert_pad_default,
+ pf_pad_right_chroma);
+ tot_wd = pad_x + wd + 8;
+ }
+
+ pu1_src -= offset;
+ pu1_dst_hxhy -= offset;
+ pu1_dst_hxfy -= offset;
+ pu1_dst_fxhy -= offset;
+
+ {
+ tot_wd = ALIGN16(tot_wd);
+ if(0 ==
+ i4_bitrate_instance_id) //do the following subpel calculations for reference bit-rate instance only
+ {
+ /* HxFY plane */
+ pf_inter_pred_luma_horz(
+ pu1_src,
+ pu1_dst_hxfy,
+ stride,
+ stride,
+ (WORD8 *)gai1_hevc_luma_filter_taps[2],
+ tot_ht,
+ tot_wd);
+
+ pf_inter_pred_luma_vert(
+ pu1_src,
+ pu1_dst_fxhy,
+ stride,
+ stride,
+ (WORD8 *)gai1_hevc_luma_filter_taps[2],
+ tot_ht,
+ tot_wd);
+
+ pf_inter_pred_luma_horz_w16out(
+ pu1_src - 3 * stride,
+ pi2_hxhy_interm,
+ stride,
+ tot_wd,
+ (WORD8 *)gai1_hevc_luma_filter_taps[2],
+ (tot_ht + 7),
+ tot_wd);
+
+ /* "Stride" of intermediate buffer in pixels,equals tot_wd */
+ pf_inter_pred_luma_vert_w16inp(
+ pi2_hxhy_interm + (3 * tot_wd),
+ pu1_dst_hxhy,
+ tot_wd,
+ stride,
+ (WORD8 *)gai1_hevc_luma_filter_taps[2],
+ tot_ht,
+ tot_wd);
+
+ ihevce_subpel_padding(
+ pu1_dst_fxhy,
+ stride,
+ tot_wd,
+ tot_ht,
+ pad_subpel_x,
+ pad_subpel_y,
+ ctb_ctr,
+ vert_ctr,
+ ps_frm_ctb_prms->i4_num_ctbs_horz,
+ ps_frm_ctb_prms->i4_num_ctbs_vert,
+ ps_func_selector);
+
+ ihevce_subpel_padding(
+ pu1_dst_hxfy,
+ stride,
+ tot_wd,
+ tot_ht,
+ pad_subpel_x,
+ pad_subpel_y,
+ ctb_ctr,
+ vert_ctr,
+ ps_frm_ctb_prms->i4_num_ctbs_horz,
+ ps_frm_ctb_prms->i4_num_ctbs_vert,
+ ps_func_selector);
+
+ ihevce_subpel_padding(
+ pu1_dst_hxhy,
+ stride,
+ tot_wd,
+ tot_ht,
+ pad_subpel_x,
+ pad_subpel_y,
+ ctb_ctr,
+ vert_ctr,
+ ps_frm_ctb_prms->i4_num_ctbs_horz,
+ ps_frm_ctb_prms->i4_num_ctbs_vert,
+ ps_func_selector);
+ }
+ }
+}
+
+void ihevce_pad_interp_recon_src_ctb(
+ pad_interp_recon_frm_t *ps_pad_interp_recon,
+ WORD32 ctb_ctr,
+ WORD32 vert_ctr,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ WORD32 i4_bitrate_instance_id,
+ func_selector_t *ps_func_selector,
+ WORD32 is_chroma_needs_padding)
+{
+ UWORD8 *pu1_src, *pu1_src_uv;
+ WORD32 stride, stride_uv, wd, ht, wd_uv, ht_uv, pad_x, pad_y;
+ WORD32 tot_wd, tot_ht;
+ WORD32 i4_chroma_vert_pad_default;
+
+ WORD32 ctb_size = ps_frm_ctb_prms->i4_ctb_size;
+ UWORD8 u1_is_422 = (ps_pad_interp_recon->u1_chroma_array_type == 2);
+
+ ihevc_pad_top_ft *pf_pad_top = ps_func_selector->ihevc_pad_top_fptr;
+ ihevc_pad_bottom_ft *pf_pad_bottom = ps_func_selector->ihevc_pad_bottom_fptr;
+ ihevc_pad_left_luma_ft *pf_pad_left_luma = ps_func_selector->ihevc_pad_left_luma_fptr;
+ ihevc_pad_left_chroma_ft *pf_pad_left_chroma = ps_func_selector->ihevc_pad_left_chroma_fptr;
+ ihevc_pad_right_luma_ft *pf_pad_right_luma = ps_func_selector->ihevc_pad_right_luma_fptr;
+ ihevc_pad_right_chroma_ft *pf_pad_right_chroma = ps_func_selector->ihevc_pad_right_chroma_fptr;
+
+ /* Luma padding */
+ pu1_src = (UWORD8 *)ps_pad_interp_recon->pu1_luma_recon_src +
+ (vert_ctr * ctb_size * ps_pad_interp_recon->i4_luma_recon_stride) +
+ (ctb_ctr * ctb_size);
+
+ stride = ps_pad_interp_recon->i4_luma_recon_stride;
+ wd = ps_pad_interp_recon->i4_ctb_size;
+ ht = ps_pad_interp_recon->i4_ctb_size;
+
+ pu1_src_uv =
+ (UWORD8 *)ps_pad_interp_recon->pu1_chrm_recon_src +
+ (vert_ctr * (ctb_size >> (0 == u1_is_422)) * ps_pad_interp_recon->i4_chrm_recon_stride) +
+ (ctb_ctr * ctb_size);
+
+ stride_uv = ps_pad_interp_recon->i4_chrm_recon_stride;
+ wd_uv = ps_pad_interp_recon->i4_ctb_size;
+ ht_uv = ps_pad_interp_recon->i4_ctb_size >> (0 == u1_is_422);
+
+ pad_x = ALIGN8(NTAPS_LUMA);
+ pad_y = ALIGN8(NTAPS_LUMA);
+
+ tot_wd = wd + (pad_x << 1);
+ tot_ht = ht + (pad_y << 1);
+
+ i4_chroma_vert_pad_default = PAD_VERT >> (0 == u1_is_422);
+
+ if(ctb_ctr == (ps_frm_ctb_prms->i4_num_ctbs_horz - 1))
+ {
+ WORD32 last_ctb_x =
+ ps_frm_ctb_prms->i4_cu_aligned_pic_wd -
+ ((ps_frm_ctb_prms->i4_num_ctbs_horz - 1) * ps_pad_interp_recon->i4_ctb_size);
+ wd = last_ctb_x;
+ wd_uv = last_ctb_x;
+ }
+ if(vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1))
+ {
+ WORD32 last_ctb_y =
+ ps_frm_ctb_prms->i4_cu_aligned_pic_ht -
+ ((ps_frm_ctb_prms->i4_num_ctbs_vert - 1) * ps_pad_interp_recon->i4_ctb_size);
+ ht = last_ctb_y;
+ ht_uv = last_ctb_y >> (0 == u1_is_422);
+ }
+
+ if(ctb_ctr == 0)
+ {
+ if(vert_ctr == 0)
+ {
+ PAD_BUF_HOR(pu1_src, stride, ht, PAD_HORZ, PAD_VERT, pf_pad_left_luma);
+ PAD_BUF_VER(pu1_src - PAD_HORZ, stride, PAD_HORZ + wd, PAD_HORZ, PAD_VERT, pf_pad_top);
+ if(is_chroma_needs_padding)
+ {
+ PAD_BUF_HOR(
+ pu1_src_uv,
+ stride_uv,
+ ht_uv,
+ PAD_HORZ,
+ i4_chroma_vert_pad_default,
+ pf_pad_left_chroma);
+ PAD_BUF_VER(
+ pu1_src_uv - PAD_HORZ,
+ stride_uv,
+ PAD_HORZ + wd_uv,
+ PAD_HORZ,
+ i4_chroma_vert_pad_default,
+ pf_pad_top);
+ }
+ }
+ else if(vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1))
+ {
+ PAD_BUF_HOR(pu1_src - 8 * stride, stride, ht + 8, PAD_HORZ, PAD_VERT, pf_pad_left_luma);
+ PAD_BUF_VER(
+ (pu1_src - PAD_HORZ + (ht * stride)),
+ stride,
+ PAD_HORZ + wd,
+ PAD_HORZ,
+ PAD_VERT,
+ pf_pad_bottom);
+ if(is_chroma_needs_padding)
+ {
+ PAD_BUF_HOR(
+ pu1_src_uv - 4 * (u1_is_422 + 1) * stride_uv,
+ stride_uv,
+ ht_uv + 4 * (u1_is_422 + 1),
+ PAD_HORZ,
+ i4_chroma_vert_pad_default,
+ pf_pad_left_chroma);
+ PAD_BUF_VER(
+ (pu1_src_uv - PAD_HORZ + (ht_uv * stride_uv)),
+ stride_uv,
+ PAD_HORZ + wd_uv,
+ PAD_HORZ,
+ i4_chroma_vert_pad_default,
+ pf_pad_bottom);
+ }
+ }
+ else
+ {
+ PAD_BUF_HOR(pu1_src - 8 * stride, stride, ht + 8, PAD_HORZ, PAD_VERT, pf_pad_left_luma);
+ if(is_chroma_needs_padding)
+ {
+ PAD_BUF_HOR(
+ pu1_src_uv - 4 * (u1_is_422 + 1) * stride_uv,
+ stride_uv,
+ ht_uv + 4 * (u1_is_422 + 1),
+ PAD_HORZ,
+ i4_chroma_vert_pad_default,
+ pf_pad_left_chroma);
+ }
+ }
+ }
+ else if(ctb_ctr == (ps_frm_ctb_prms->i4_num_ctbs_horz - 1))
+ {
+ if(vert_ctr == 0)
+ {
+ PAD_BUF_HOR(pu1_src + wd, stride, ht, PAD_HORZ, PAD_VERT, pf_pad_right_luma);
+ PAD_BUF_VER(pu1_src - 8, stride, PAD_HORZ + (wd + 8), PAD_HORZ, PAD_VERT, pf_pad_top);
+ if(is_chroma_needs_padding)
+ {
+ PAD_BUF_HOR(
+ pu1_src_uv + wd_uv,
+ stride_uv,
+ ht_uv,
+ PAD_HORZ,
+ i4_chroma_vert_pad_default,
+ pf_pad_right_chroma);
+ PAD_BUF_VER(
+ pu1_src_uv - 8,
+ stride_uv,
+ PAD_HORZ + (wd_uv + 8),
+ PAD_HORZ,
+ i4_chroma_vert_pad_default,
+ pf_pad_top);
+ }
+ }
+ else if(vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1))
+ {
+ PAD_BUF_HOR(
+ (pu1_src - (8 * stride) + wd),
+ stride,
+ ht + 8,
+ PAD_HORZ,
+ PAD_VERT,
+ pf_pad_right_luma);
+ PAD_BUF_VER(
+ (pu1_src - 8 + (ht * stride)),
+ stride,
+ PAD_HORZ + (wd + 8),
+ PAD_HORZ,
+ PAD_VERT,
+ pf_pad_bottom);
+ if(is_chroma_needs_padding)
+ {
+ PAD_BUF_HOR(
+ (pu1_src_uv - (4 * (u1_is_422 + 1) * stride_uv) + wd_uv),
+ stride_uv,
+ ht_uv + 4 * (u1_is_422 + 1),
+ PAD_HORZ,
+ i4_chroma_vert_pad_default,
+ pf_pad_right_chroma);
+ PAD_BUF_VER(
+ (pu1_src_uv - 8 + (ht_uv * stride_uv)),
+ stride_uv,
+ PAD_HORZ + (wd_uv + 8),
+ PAD_HORZ,
+ i4_chroma_vert_pad_default,
+ pf_pad_bottom);
+ }
+ }
+ else
+ {
+ PAD_BUF_HOR(
+ (pu1_src - (8 * stride) + wd),
+ stride,
+ ht + 8,
+ PAD_HORZ,
+ PAD_VERT,
+ pf_pad_right_luma);
+ if(is_chroma_needs_padding)
+ {
+ PAD_BUF_HOR(
+ (pu1_src_uv - (4 * (u1_is_422 + 1) * stride_uv) + wd_uv),
+ stride_uv,
+ ht_uv + 4 * (u1_is_422 + 1),
+ PAD_HORZ,
+ i4_chroma_vert_pad_default,
+ pf_pad_right_chroma);
+ }
+ }
+ }
+ else if(vert_ctr == 0)
+ {
+ PAD_BUF_VER(pu1_src - 8, stride, (wd + 8), PAD_HORZ, PAD_VERT, pf_pad_top);
+ if(is_chroma_needs_padding)
+ {
+ PAD_BUF_VER(
+ pu1_src_uv - 8,
+ stride_uv,
+ (wd_uv + 8),
+ PAD_HORZ,
+ i4_chroma_vert_pad_default,
+ pf_pad_top);
+ }
+ }
+ else if(vert_ctr == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1))
+ {
+ PAD_BUF_VER(
+ (pu1_src - 8 + (ht * stride)), stride, (wd + 8), PAD_HORZ, PAD_VERT, pf_pad_bottom);
+ if(is_chroma_needs_padding)
+ {
+ PAD_BUF_VER(
+ (pu1_src_uv - 8 + (ht_uv * stride_uv)),
+ stride_uv,
+ (wd_uv + 8),
+ PAD_HORZ,
+ i4_chroma_vert_pad_default,
+ pf_pad_bottom);
+ }
+ }
+}
diff --git a/encoder/ihevce_enc_subpel_gen.h b/encoder/ihevce_enc_subpel_gen.h
new file mode 100644
index 0000000..c58fa0b
--- /dev/null
+++ b/encoder/ihevce_enc_subpel_gen.h
@@ -0,0 +1,98 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_enc_subpel_gen.h
+*
+* \brief
+* This file contains interface defination of Subpel Plane generation
+* function
+*
+* \date
+* 29/12/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_ENC_SUBPEL_GEN_H_
+#define _IHEVCE_ENC_SUBPEL_GEN_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+void ihevce_subpel_padding(
+ UWORD8 *pu1_dst,
+ WORD32 stride,
+ WORD32 tot_wd,
+ WORD32 tot_ht,
+ WORD32 pad_subpel_x,
+ WORD32 pad_subpel_y,
+ WORD32 ctb_ctr,
+ WORD32 vert_ctr,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ func_selector_t *ps_func_selector);
+
+void ihevce_pad_interp_recon_ctb(
+ pad_interp_recon_frm_t *ps_pad_interp_recon,
+ WORD32 ctb_ctr,
+ WORD32 vert_ctr,
+ WORD32 quality_preset,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ WORD16 *pi2_hxhy_interm,
+ WORD32 i4_bitrate_instance,
+ func_selector_t *ps_func_selector);
+
+void ihevce_pad_interp_recon_src_ctb(
+ pad_interp_recon_frm_t *ps_pad_interp_recon,
+ WORD32 ctb_ctr,
+ WORD32 vert_ctr,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ WORD32 i4_bitrate_instance_id,
+ func_selector_t *ps_func_selector,
+ WORD32 is_chroma_needs_padding);
+
+#endif /* _IHEVCE_ENC_SUBPEL_GEN_H_ */
diff --git a/encoder/ihevce_encode_header.c b/encoder/ihevce_encode_header.c
new file mode 100644
index 0000000..5dfcbec
--- /dev/null
+++ b/encoder/ihevce_encode_header.c
@@ -0,0 +1,3317 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file ihevce_encode_header.c
+*
+* @brief
+* This file contains function definitions related to header encoding
+*
+* @author
+* Ittiam
+*
+* List of Functions
+* ihevce_generate_nal_unit_header
+* ihevce_generate_when_profile_present
+* ihevce_generate_profile_tier_level
+* ihevce_short_term_ref_pic_set
+* ihevce_generate_bit_rate_pic_rate_info
+* ihevce_generate_aud
+* ihevce_generate_eos
+* ihevce_generate_vps
+* ihevce_generate_sps
+* ihevce_generate_pps
+* ihevce_generate_slice_header
+* ihevce_populate_vps
+* ihevce_populate_sps
+* ihevce_populate_pps
+* ihevce_populate_slice_header
+* ihevce_insert_entry_offset_slice_header
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_macros.h"
+#include "ihevc_debug.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+#include "ihevc_trans_tables.h"
+#include "ihevc_trans_macros.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_error_checks.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_encode_header.h"
+#include "ihevce_encode_header_sei_vui.h"
+#include "ihevce_trace.h"
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define CU_LEVEL_QP_LIMIT_8x8 3
+#define CU_LEVEL_QP_LIMIT_16x16 2
+#define CU_LEVEL_QP_LIMIT_32x32 1
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief Generate nal unit header in the stream as per section 7.3.1.2
+*
+* @par Description
+* Inserts the nal type and temporal id plus 1 as per section 7.3.1.2 Nal unit
+* header syntax
+*
+* @param[inout] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] nal_unit_type
+* nal type to be inserted
+*
+* @param[in] temporal id
+* temporal id to be inserted
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_generate_nal_unit_header(
+ bitstrm_t *ps_bitstrm, WORD32 nal_unit_type, WORD32 nuh_temporal_id)
+{
+ WORD32 return_status = IHEVCE_SUCCESS;
+
+ /* sanity checks */
+ ASSERT((nal_unit_type >= 0) && (nal_unit_type < 64));
+ ASSERT((nuh_temporal_id >= 0) && (nuh_temporal_id < 7));
+
+ /* forbidden_zero_bit + nal_unit_type */
+ PUT_BITS(
+ ps_bitstrm,
+ nal_unit_type,
+ (1 + 6), /*extra 1 is for forbidden zero bit */
+ return_status);
+
+ /* nuh_reserved_zero_6bits */
+ PUT_BITS(ps_bitstrm, 0, 6, return_status);
+
+ /* nuh_temporal_id_plus1 */
+ PUT_BITS(ps_bitstrm, (nuh_temporal_id + 1), 3, return_status);
+
+ return (return_status);
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates fields related to Profile, Tier and Level data.
+*
+* @par Description
+* Generates fields related to Profile, Tier and Level data.
+* Called when profile_present flag is 1
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] ps_ptl
+* pointer to structure containing Profile, Tier and Level data data
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+static WORD32
+ ihevce_generate_when_profile_present(bitstrm_t *ps_bitstrm, profile_tier_lvl_t *ps_ptl)
+{
+ WORD32 return_status = IHEVCE_SUCCESS;
+ WORD32 i;
+
+ /* XXX_profile_space[] */
+ PUT_BITS(ps_bitstrm, ps_ptl->i1_profile_space, 2, return_status);
+ ENTROPY_TRACE("XXX_profile_space[]", ps_ptl->i1_profile_space);
+
+ /* XXX_tier_flag[] */
+ PUT_BITS(ps_bitstrm, ps_ptl->i1_tier_flag, 1, return_status);
+ ENTROPY_TRACE("XXX_tier_flag[]", ps_ptl->i1_tier_flag);
+
+ /* XXX_profile_idc[] */
+ PUT_BITS(ps_bitstrm, ps_ptl->i1_profile_idc, 5, return_status);
+ ENTROPY_TRACE("XXX_profile_idc[]", ps_ptl->i1_profile_idc);
+
+ for(i = 0; i < MAX_PROFILE_COMPATBLTY; i++)
+ {
+ /* XXX_profile_compatibility_flag[][j] */
+ PUT_BITS(ps_bitstrm, ps_ptl->ai1_profile_compatibility_flag[i], 1, return_status);
+ ENTROPY_TRACE(
+ "XXX_profile_compatibility_flag[][j]", ps_ptl->ai1_profile_compatibility_flag[i]);
+ }
+
+ /* XXX_progressive_source_flag[] */
+ PUT_BITS(ps_bitstrm, ps_ptl->i1_general_progressive_source_flag, 1, return_status);
+ ENTROPY_TRACE("XXX_progressive_source_flag[]", ps_ptl->i1_general_progressive_source_flag);
+
+ /* XXX_interlaced_source_flag[] */
+ PUT_BITS(ps_bitstrm, ps_ptl->i1_general_interlaced_source_flag, 1, return_status);
+ ENTROPY_TRACE("XXX_interlaced_source_flag[]", ps_ptl->i1_general_interlaced_source_flag);
+
+ /* XXX_non_packed_constraint_flag[] */
+ PUT_BITS(ps_bitstrm, ps_ptl->i1_general_non_packed_constraint_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "XXX_non_packed_constraint_flag[]", ps_ptl->i1_general_non_packed_constraint_flag);
+
+ /* XXX_frame_only_constraint_flag[] */
+ PUT_BITS(ps_bitstrm, ps_ptl->i1_frame_only_constraint_flag, 1, return_status);
+ ENTROPY_TRACE("XXX_frame_only_constraint_flag[]", ps_ptl->i1_frame_only_constraint_flag);
+
+ /* XXX_general_max_12bit_constraint_flag[] */
+ PUT_BITS(ps_bitstrm, ps_ptl->i1_general_max_12bit_constraint_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "XXX_general_max_12bit_constraint_flag[]", ps_ptl->i1_general_max_12bit_constraint_flag);
+
+ /* XXX_general_max_10bit_constraint_flag[] */
+ PUT_BITS(ps_bitstrm, ps_ptl->i1_general_max_10bit_constraint_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "XXX_general_max_10bit_constraint_flag[]", ps_ptl->i1_general_max_10bit_constraint_flag);
+
+ /* XXX_general_max_8bit_constraint_flag[] */
+ PUT_BITS(ps_bitstrm, ps_ptl->i1_general_max_8bit_constraint_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "XXX_general_max_8bit_constraint_flag[]", ps_ptl->i1_general_max_8bit_constraint_flag);
+
+ /* XXX_general_max_422chroma_constraint_flag[] */
+ PUT_BITS(ps_bitstrm, ps_ptl->i1_general_max_422chroma_constraint_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "XXX_general_max_422chroma_constraint_flag[]",
+ ps_ptl->i1_general_max_422chroma_constraint_flag);
+
+ /* XXX_general_max_420chroma_constraint_flag[] */
+ PUT_BITS(ps_bitstrm, ps_ptl->i1_general_max_420chroma_constraint_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "XXX_general_max_420chroma_constraint_flag[]",
+ ps_ptl->i1_general_max_420chroma_constraint_flag);
+
+ /* XXX_general_max_monochrome_constraint_flag[] */
+ PUT_BITS(ps_bitstrm, ps_ptl->i1_general_max_monochrome_constraint_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "XXX_general_max_monochrome_constraint_flag[]",
+ ps_ptl->i1_general_max_monochrome_constraint_flag);
+
+ /* XXX_general_intra_constraint_flag[] */
+ PUT_BITS(ps_bitstrm, ps_ptl->i1_general_intra_constraint_flag, 1, return_status);
+ ENTROPY_TRACE("XXX_general_intra_constraint_flag[]", ps_ptl->i1_general_intra_constraint_flag);
+
+ /* XXX_general_one_picture_only_constraint_flag[] */
+ PUT_BITS(ps_bitstrm, ps_ptl->i1_general_one_picture_only_constraint_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "XXX_general_one_picture_only_constraint_flag[]",
+ ps_ptl->i1_general_one_picture_only_constraint_flag);
+
+ /* XXX_general_lower_bit_rate_constraint_flag[] */
+ PUT_BITS(ps_bitstrm, ps_ptl->i1_general_lower_bit_rate_constraint_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "XXX_general_lower_bit_rate_constraint_flag[]",
+ ps_ptl->i1_general_lower_bit_rate_constraint_flag);
+
+ /* XXX_reserved_zero_35bits[] */
+ PUT_BITS(ps_bitstrm, 0, 16, return_status);
+ PUT_BITS(ps_bitstrm, 0, 16, return_status);
+ PUT_BITS(ps_bitstrm, 0, 3, return_status);
+ ENTROPY_TRACE("XXX_reserved_zero_35bits[]", 0);
+
+ return return_status;
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates Profile, Tier and Level data
+*
+* @par Description
+* Generates Profile, Tier and Level data as per Section 7.3.3
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] ps_ptl
+* pointer to structure containing Profile, Tier and Level data data
+*
+* @param[in] i1_profile_present_flag
+* flag that indicates whether profile-related data is present
+*
+* @param[in] i1_vps_max_sub_layers_minus1
+* (Maximum number of sub_layers present) minus 1
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+static WORD32 ihevce_generate_profile_tier_level(
+ bitstrm_t *ps_bitstrm,
+ profile_tier_lvl_info_t *ps_ptl,
+ WORD8 i1_profile_present_flag,
+ WORD8 i1_max_sub_layers_minus1)
+{
+ WORD32 i;
+ WORD32 return_status = IHEVCE_SUCCESS;
+
+ if(i1_profile_present_flag)
+ {
+ ihevce_generate_when_profile_present(ps_bitstrm, &ps_ptl->s_ptl_gen);
+ }
+
+ /* general_level_idc */
+ PUT_BITS(ps_bitstrm, ps_ptl->s_ptl_gen.u1_level_idc, 8, return_status);
+ ENTROPY_TRACE("general_level_idc", ps_ptl->s_ptl_gen.u1_level_idc);
+
+ for(i = 0; i < i1_max_sub_layers_minus1; i++)
+ {
+ /* sub_layer_profile_present_flag[i] */
+ PUT_BITS(ps_bitstrm, ps_ptl->ai1_sub_layer_profile_present_flag[i], 1, return_status);
+ ENTROPY_TRACE(
+ "sub_layer_profile_present_flag[i]", ps_ptl->ai1_sub_layer_profile_present_flag[i]);
+
+ /* sub_layer_level_present_flag[i] */
+ PUT_BITS(ps_bitstrm, ps_ptl->ai1_sub_layer_level_present_flag[i], 1, return_status);
+ ENTROPY_TRACE(
+ "sub_layer_level_present_flag[i]", ps_ptl->ai1_sub_layer_level_present_flag[i]);
+ }
+
+ if(i1_max_sub_layers_minus1 > 0)
+ {
+ for(i = i1_max_sub_layers_minus1; i < 8; i++)
+ {
+ /* reserved_zero_2bits[i] */
+ PUT_BITS(ps_bitstrm, 0, 2, return_status);
+ ENTROPY_TRACE("reserved_zero_2bits[i]", 0);
+ }
+ }
+
+ for(i = 0; i < i1_max_sub_layers_minus1; i++)
+ {
+ if(ps_ptl->ai1_sub_layer_profile_present_flag[i])
+ {
+ ihevce_generate_when_profile_present(ps_bitstrm, &ps_ptl->as_ptl_sub[i]);
+ }
+
+ if(ps_ptl->ai1_sub_layer_level_present_flag[i]) //TEMPORALA_SCALABILITY CHANGES BUG_FIX
+ {
+ /* sub_layer_level_idc[i] */
+ PUT_BITS(ps_bitstrm, ps_ptl->as_ptl_sub[i].u1_level_idc, 8, return_status);
+ ENTROPY_TRACE("sub_layer_level_idc[i]", ps_ptl->as_ptl_sub[i].u1_level_idc);
+ }
+ }
+
+ return return_status;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Generates short term reference picture set
+*
+* @par Description
+* Generates short term reference picture set as per section 7.3.5.2.
+* Can be called by either SPS or Slice header parsing modules.
+*
+* @param[in] ps_bitstrm
+* Pointer to bitstream structure
+*
+* @param[out] ps_stref_picset_base
+* Pointer to first short term ref pic set structure
+*
+* @param[in] num_short_term_ref_pic_sets
+* Number of short term reference pic sets
+*
+* @param[in] idx
+* Current short term ref pic set id
+*
+* @returns Error code from WORD32
+*
+*
+*******************************************************************************
+*/
+static WORD32 ihevce_short_term_ref_pic_set(
+ bitstrm_t *ps_bitstrm,
+ stref_picset_t *ps_stref_picset_base,
+ WORD32 num_short_term_ref_pic_sets,
+ WORD32 idx,
+ WORD32 *pi4_NumPocTotalCurr)
+{
+ WORD32 i;
+ WORD32 return_status = IHEVCE_SUCCESS;
+ stref_picset_t *ps_stref_picset = ps_stref_picset_base + idx;
+
+ (void)num_short_term_ref_pic_sets;
+ if(idx > 0)
+ {
+ /* inter_ref_pic_set_prediction_flag */
+ PUT_BITS(
+ ps_bitstrm, ps_stref_picset->i1_inter_ref_pic_set_prediction_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "inter_ref_pic_set_prediction_flag",
+ ps_stref_picset->i1_inter_ref_pic_set_prediction_flag);
+ }
+
+ /* This flag is assumed to be 0 for now */
+ ASSERT(0 == ps_stref_picset->i1_inter_ref_pic_set_prediction_flag);
+
+ /* num_negative_pics */
+ PUT_BITS_UEV(ps_bitstrm, ps_stref_picset->i1_num_neg_pics, return_status);
+ ENTROPY_TRACE("num_negative_pics", ps_stref_picset->i1_num_neg_pics);
+
+ /* num_positive_pics */
+ PUT_BITS_UEV(ps_bitstrm, ps_stref_picset->i1_num_pos_pics, return_status);
+ ENTROPY_TRACE("num_positive_pics", ps_stref_picset->i1_num_pos_pics);
+
+ for(i = 0; i < ps_stref_picset->i1_num_neg_pics; i++)
+ {
+ /* delta_poc_s0_minus1 */
+ PUT_BITS_UEV(ps_bitstrm, ps_stref_picset->ai2_delta_poc[i] - 1, return_status);
+ ENTROPY_TRACE("delta_poc_s0_minus1", ps_stref_picset->ai2_delta_poc[i] - 1);
+
+ /* used_by_curr_pic_s0_flag */
+ PUT_BITS(ps_bitstrm, ps_stref_picset->ai1_used[i], 1, return_status);
+ ENTROPY_TRACE("used_by_curr_pic_s0_flag", ps_stref_picset->ai1_used[i]);
+ /*get the num pocs used for cur pic*/
+ if(ps_stref_picset->ai1_used[i])
+ {
+ *pi4_NumPocTotalCurr += 1;
+ }
+ }
+
+ for(; i < (ps_stref_picset->i1_num_pos_pics + ps_stref_picset->i1_num_neg_pics); i++)
+ {
+ /* delta_poc_s1_minus1 */
+ PUT_BITS_UEV(ps_bitstrm, ps_stref_picset->ai2_delta_poc[i] - 1, return_status);
+ ENTROPY_TRACE("delta_poc_s1_minus1", ps_stref_picset->ai2_delta_poc[i] - 1);
+
+ /* used_by_curr_pic_s1_flag */
+ PUT_BITS(ps_bitstrm, ps_stref_picset->ai1_used[i], 1, return_status);
+ ENTROPY_TRACE("used_by_curr_pic_s1_flag", ps_stref_picset->ai1_used[i]);
+ /*get the num pocs used for cur pic*/
+ if(ps_stref_picset->ai1_used[i])
+ {
+ *pi4_NumPocTotalCurr += 1;
+ }
+ }
+
+ return return_status;
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates ref pic list modification
+*
+* @par Description
+* Generate ref pic list modification syntax as per Section 7.3.6.2
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] ps_slice_hdr
+* pointer to structure containing slice header
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+static WORD32 ref_pic_list_modification(
+ bitstrm_t *ps_bitstrm, slice_header_t *ps_slice_hdr, WORD32 i4_NumPocTotalCurr)
+{
+ WORD32 return_status = IHEVCE_SUCCESS;
+ WORD32 i;
+
+ /* ref_pic_list_modification_flag_l0 */
+ PUT_BITS(
+ ps_bitstrm, ps_slice_hdr->s_rplm.i1_ref_pic_list_modification_flag_l0, 1, return_status);
+ ENTROPY_TRACE(
+ "ref_pic_list_modification_flag_l0",
+ ps_slice_hdr->s_rplm.i1_ref_pic_list_modification_flag_l0);
+
+ if(ps_slice_hdr->s_rplm.i1_ref_pic_list_modification_flag_l0)
+ {
+ for(i = 0; i <= (ps_slice_hdr->i1_num_ref_idx_l0_active - 1); i++)
+ {
+ WORD32 num_bits = 32 - CLZ(i4_NumPocTotalCurr - 1);
+
+ /* list_entry_l0[ i ] */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->s_rplm.i1_list_entry_l0[i], num_bits, return_status);
+ ENTROPY_TRACE("list_entry_l0", ps_slice_hdr->s_rplm.i1_list_entry_l0[i]);
+ }
+ }
+
+ if((BSLICE == ps_slice_hdr->i1_slice_type))
+ {
+ /* ref_pic_list_modification_flag_l1 */
+ PUT_BITS(
+ ps_bitstrm, ps_slice_hdr->s_rplm.i1_ref_pic_list_modification_flag_l1, 1, return_status);
+ ENTROPY_TRACE(
+ "ref_pic_list_modification_flag_l1",
+ ps_slice_hdr->s_rplm.i1_ref_pic_list_modification_flag_l1);
+
+ if(ps_slice_hdr->s_rplm.i1_ref_pic_list_modification_flag_l1)
+ {
+ for(i = 0; i <= (ps_slice_hdr->i1_num_ref_idx_l1_active - 1); i++)
+ {
+ WORD32 num_bits = 32 - CLZ(i4_NumPocTotalCurr - 1);
+
+ /* list_entry_l1[ i ] */
+ PUT_BITS(
+ ps_bitstrm, ps_slice_hdr->s_rplm.i1_list_entry_l1[i], num_bits, return_status);
+ ENTROPY_TRACE("list_entry_l1", ps_slice_hdr->s_rplm.i1_list_entry_l1[i]);
+ }
+ }
+ } /*end of B slice check*/
+
+ return return_status;
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates Pred Weight Table
+*
+* @par Description
+* Generate Pred Weight Table as per Section 7.3.5.4
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] ps_sps
+* pointer to structure containing SPS data
+*
+* @param[in] ps_pps
+* pointer to structure containing PPS data
+*
+* @param[in] ps_slice_hdr
+* pointer to structure containing slice header
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+static WORD32 ihevce_generate_pred_weight_table(
+ bitstrm_t *ps_bitstrm, sps_t *ps_sps, pps_t *ps_pps, slice_header_t *ps_slice_hdr)
+{
+ WORD32 i;
+ WORD32 delta_luma_weight;
+ WORD32 delta_chroma_weight;
+ WORD32 return_status = IHEVCE_SUCCESS;
+ pred_wt_ofst_t *ps_wt_ofst = &ps_slice_hdr->s_wt_ofst;
+ UWORD32 u4_luma_log2_weight_denom = ps_wt_ofst->i1_luma_log2_weight_denom;
+ WORD32 chroma_log2_weight_denom = (ps_wt_ofst->i1_chroma_log2_weight_denom);
+ WORD32 i4_wght_count = 0;
+
+ (void)ps_pps;
+ /* luma_log2_weight_denom */
+ PUT_BITS_UEV(ps_bitstrm, u4_luma_log2_weight_denom, return_status);
+ ENTROPY_TRACE("luma_log2_weight_denom", u4_luma_log2_weight_denom);
+
+ if(ps_sps->i1_chroma_format_idc != 0)
+ {
+ /* delta_chroma_log2_weight_denom */
+ PUT_BITS_SEV(
+ ps_bitstrm, chroma_log2_weight_denom - u4_luma_log2_weight_denom, return_status);
+ ENTROPY_TRACE(
+ "delta_chroma_log2_weight_denom", chroma_log2_weight_denom - u4_luma_log2_weight_denom);
+ }
+
+ for(i = 0; i < ps_slice_hdr->i1_num_ref_idx_l0_active; i++)
+ {
+ /* luma_weight_l0_flag[ i ] */
+ PUT_BITS(ps_bitstrm, ps_wt_ofst->i1_luma_weight_l0_flag[i], 1, return_status);
+ i4_wght_count += ps_wt_ofst->i1_luma_weight_l0_flag[i];
+ assert(i4_wght_count <= 24);
+ ENTROPY_TRACE("luma_weight_l0_flag[ i ]", ps_wt_ofst->i1_luma_weight_l0_flag[i]);
+ }
+
+ if(ps_sps->i1_chroma_format_idc != 0)
+ {
+ for(i = 0; i < ps_slice_hdr->i1_num_ref_idx_l0_active; i++)
+ {
+ /* chroma_weight_l0_flag[ i ] */
+ PUT_BITS(ps_bitstrm, ps_wt_ofst->i1_chroma_weight_l0_flag[i], 1, return_status);
+ i4_wght_count += 2 * ps_wt_ofst->i1_chroma_weight_l0_flag[i];
+ assert(i4_wght_count <= 24);
+ ENTROPY_TRACE("chroma_weight_l0_flag[ i ]", ps_wt_ofst->i1_chroma_weight_l0_flag[i]);
+ }
+ }
+
+ delta_luma_weight = (1 << u4_luma_log2_weight_denom);
+ delta_chroma_weight = (1 << chroma_log2_weight_denom);
+
+ for(i = 0; i < ps_slice_hdr->i1_num_ref_idx_l0_active; i++)
+ {
+ if(ps_wt_ofst->i1_luma_weight_l0_flag[i])
+ {
+ /* delta_luma_weight_l0[ i ] */
+ PUT_BITS_SEV(
+ ps_bitstrm, ps_wt_ofst->i2_luma_weight_l0[i] - delta_luma_weight, return_status);
+ ENTROPY_TRACE(
+ "delta_luma_weight_l0[ i ]", ps_wt_ofst->i2_luma_weight_l0[i] - delta_luma_weight);
+
+ /* luma_offset_l0[ i ] */
+ PUT_BITS_SEV(ps_bitstrm, ps_wt_ofst->i2_luma_offset_l0[i], return_status);
+ ENTROPY_TRACE("luma_offset_l0[ i ]", ps_wt_ofst->i2_luma_offset_l0[i]);
+ }
+
+ if(ps_wt_ofst->i1_chroma_weight_l0_flag[i])
+ {
+ WORD32 shift = (1 << (BIT_DEPTH_CHROMA - 1));
+ WORD32 delta_chroma_weight_l0[2];
+ WORD32 delta_chroma_offset_l0[2];
+
+ delta_chroma_weight_l0[0] = ps_wt_ofst->i2_chroma_weight_l0_cb[i] - delta_chroma_weight;
+ delta_chroma_weight_l0[1] = ps_wt_ofst->i2_chroma_weight_l0_cr[i] - delta_chroma_weight;
+
+ delta_chroma_offset_l0[0] =
+ ps_wt_ofst->i2_chroma_offset_l0_cb[i] +
+ ((shift * ps_wt_ofst->i2_chroma_weight_l0_cb[i]) >> chroma_log2_weight_denom) -
+ shift;
+ delta_chroma_offset_l0[1] =
+ ps_wt_ofst->i2_chroma_offset_l0_cr[i] +
+ ((shift * ps_wt_ofst->i2_chroma_weight_l0_cr[i]) >> chroma_log2_weight_denom) -
+ shift;
+
+ /* delta_chroma_weight_l0[ i ][j] */
+ PUT_BITS_SEV(ps_bitstrm, delta_chroma_weight_l0[0], return_status);
+ ENTROPY_TRACE("delta_chroma_weight_l0[ i ]", delta_chroma_weight_l0[0]);
+
+ /* delta_chroma_offset_l0[ i ][j] */
+ PUT_BITS_SEV(ps_bitstrm, delta_chroma_offset_l0[0], return_status);
+ ENTROPY_TRACE("delta_chroma_offset_l0[ i ]", delta_chroma_offset_l0[0]);
+
+ /* delta_chroma_weight_l0[ i ][j] */
+ PUT_BITS_SEV(ps_bitstrm, delta_chroma_weight_l0[1], return_status);
+ ENTROPY_TRACE("delta_chroma_weight_l0[ i ]", delta_chroma_weight_l0[1]);
+
+ /* delta_chroma_offset_l0[ i ][j] */
+ PUT_BITS_SEV(ps_bitstrm, delta_chroma_offset_l0[1], return_status);
+ ENTROPY_TRACE("delta_chroma_offset_l0[ i ]", delta_chroma_offset_l0[1]);
+ }
+ }
+
+ if(BSLICE == ps_slice_hdr->i1_slice_type)
+ {
+ for(i = 0; i < ps_slice_hdr->i1_num_ref_idx_l1_active; i++)
+ {
+ /* luma_weight_l1_flag[ i ] */
+ PUT_BITS(ps_bitstrm, ps_wt_ofst->i1_luma_weight_l1_flag[i], 1, return_status);
+ i4_wght_count += ps_wt_ofst->i1_luma_weight_l1_flag[i];
+ assert(i4_wght_count <= 24);
+ ENTROPY_TRACE("luma_weight_l1_flag[ i ]", ps_wt_ofst->i1_luma_weight_l1_flag[i]);
+ }
+
+ if(ps_sps->i1_chroma_format_idc != 0)
+ {
+ for(i = 0; i < ps_slice_hdr->i1_num_ref_idx_l1_active; i++)
+ {
+ /* chroma_weight_l1_flag[ i ] */
+ PUT_BITS(ps_bitstrm, ps_wt_ofst->i1_chroma_weight_l1_flag[i], 1, return_status);
+ i4_wght_count += ps_wt_ofst->i1_chroma_weight_l1_flag[i];
+ assert(i4_wght_count <= 24);
+ ENTROPY_TRACE(
+ "chroma_weight_l1_flag[ i ]", ps_wt_ofst->i1_chroma_weight_l1_flag[i]);
+ }
+ }
+
+ for(i = 0; i < ps_slice_hdr->i1_num_ref_idx_l1_active; i++)
+ {
+ if(ps_wt_ofst->i1_luma_weight_l1_flag[i])
+ {
+ /* delta_luma_weight_l1[ i ] */
+ PUT_BITS_SEV(
+ ps_bitstrm,
+ ps_wt_ofst->i2_luma_weight_l1[i] - delta_luma_weight,
+ return_status);
+ ENTROPY_TRACE(
+ "delta_luma_weight_l1[ i ]",
+ ps_wt_ofst->i2_luma_weight_l1[i] - delta_luma_weight);
+
+ /* luma_offset_l1[ i ] */
+ PUT_BITS_SEV(ps_bitstrm, ps_wt_ofst->i2_luma_offset_l1[i], return_status);
+ ENTROPY_TRACE("luma_offset_l1[ i ]", ps_wt_ofst->i2_luma_offset_l1[i]);
+ }
+
+ if(ps_wt_ofst->i1_chroma_weight_l1_flag[i])
+ {
+ WORD32 shift = (1 << (BIT_DEPTH_CHROMA - 1));
+ WORD32 delta_chroma_weight_l1[2];
+ WORD32 delta_chroma_offset_l1[2];
+
+ delta_chroma_weight_l1[0] =
+ ps_wt_ofst->i2_chroma_weight_l1_cb[i] - delta_chroma_weight;
+ delta_chroma_weight_l1[1] =
+ ps_wt_ofst->i2_chroma_weight_l1_cr[i] - delta_chroma_weight;
+
+ delta_chroma_offset_l1[0] =
+ ps_wt_ofst->i2_chroma_offset_l1_cb[i] +
+ ((shift * ps_wt_ofst->i2_chroma_weight_l1_cb[i]) >> chroma_log2_weight_denom) -
+ shift;
+ delta_chroma_offset_l1[1] =
+ ps_wt_ofst->i2_chroma_offset_l1_cr[i] +
+ ((shift * ps_wt_ofst->i2_chroma_weight_l1_cr[i]) >> chroma_log2_weight_denom) -
+ shift;
+
+ /* delta_chroma_weight_l1[ i ][j] */
+ PUT_BITS_SEV(ps_bitstrm, delta_chroma_weight_l1[0], return_status);
+ ENTROPY_TRACE("delta_chroma_weight_l1[ i ]", delta_chroma_weight_l1[0]);
+
+ /* delta_chroma_offset_l1[ i ][j] */
+ PUT_BITS_SEV(ps_bitstrm, delta_chroma_offset_l1[0], return_status);
+ ENTROPY_TRACE("delta_chroma_offset_l1[ i ]", delta_chroma_offset_l1[0]);
+
+ /* delta_chroma_weight_l1[ i ][j] */
+ PUT_BITS_SEV(ps_bitstrm, delta_chroma_weight_l1[1], return_status);
+ ENTROPY_TRACE("delta_chroma_weight_l1[ i ]", delta_chroma_weight_l1[1]);
+
+ /* delta_chroma_offset_l1[ i ][j] */
+ PUT_BITS_SEV(ps_bitstrm, delta_chroma_offset_l1[1], return_status);
+ ENTROPY_TRACE("delta_chroma_offset_l1[ i ]", delta_chroma_offset_l1[1]);
+ }
+ }
+ }
+
+ return return_status;
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates AUD (Access Unit Delimiter)
+*
+* @par Description
+* Generate Access Unit Delimiter as per Section 7.3.2.5
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] pic_type
+* picture type
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_generate_aud(bitstrm_t *ps_bitstrm, WORD32 pic_type)
+{
+ WORD32 return_status = IHEVCE_SUCCESS;
+
+ /* Insert the NAL start code */
+ return_status = ihevce_put_nal_start_code_prefix(ps_bitstrm, 1);
+
+ /* Insert Nal Unit Header */
+ return_status |= ihevce_generate_nal_unit_header(ps_bitstrm, NAL_AUD, 0);
+
+ /* pic_type */
+ PUT_BITS(ps_bitstrm, pic_type, 3, return_status);
+ ENTROPY_TRACE("pic type", pic_type);
+
+ ihevce_put_rbsp_trailing_bits(ps_bitstrm);
+
+ return return_status;
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates EOS (End of Sequence)
+*
+* @par Description
+* Generate End of sequence as per Section 7.3.2.6
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_generate_eos(bitstrm_t *ps_bitstrm)
+{
+ WORD32 return_status = IHEVCE_SUCCESS;
+
+ /* Insert the NAL start code */
+ return_status = ihevce_put_nal_start_code_prefix(ps_bitstrm, 1);
+
+ /* Insert Nal Unit Header */
+ return_status |= ihevce_generate_nal_unit_header(ps_bitstrm, NAL_EOS, 0);
+
+ ihevce_put_rbsp_trailing_bits(ps_bitstrm);
+
+ return return_status;
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates VPS (Video Parameter Set)
+*
+* @par Description
+* Generate Video Parameter Set as per Section 7.3.2.1
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] ps_vps
+* pointer to structure containing VPS data
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_generate_vps(bitstrm_t *ps_bitstrm, vps_t *ps_vps)
+{
+ WORD32 i;
+ WORD8 i1_vps_max_sub_layers_minus1 = ps_vps->i1_vps_max_sub_layers - 1;
+ WORD32 return_status = IHEVCE_SUCCESS;
+
+ /* Insert Start Code */
+ ihevce_put_nal_start_code_prefix(ps_bitstrm, 1);
+
+ /* Insert Nal Unit Header */
+ ihevce_generate_nal_unit_header(ps_bitstrm, NAL_VPS, 0);
+
+ /* video_parameter_set_id */
+ PUT_BITS(ps_bitstrm, ps_vps->i1_vps_id, 4, return_status);
+ ENTROPY_TRACE("video_parameter_set_id", ps_vps->i1_vps_id);
+
+ /* vps_reserved_three_2bits */
+ PUT_BITS(ps_bitstrm, 3, 2, return_status);
+ ENTROPY_TRACE("vps_reserved_three_2bits", 3);
+
+ /* vps_max_layers_minus1 */
+ PUT_BITS(ps_bitstrm, 0, 6, return_status);
+ ENTROPY_TRACE("vps_max_layers_minus1 ", 3);
+
+ /* vps_max_sub_layers_minus1 */
+ PUT_BITS(ps_bitstrm, i1_vps_max_sub_layers_minus1, 3, return_status);
+ ENTROPY_TRACE("vps_max_sub_layers_minus1", i1_vps_max_sub_layers_minus1);
+
+ /* vps_temporal_id_nesting_flag */
+ PUT_BITS(ps_bitstrm, ps_vps->i1_vps_temporal_id_nesting_flag, 1, return_status);
+ ENTROPY_TRACE("vps_temporal_id_nesting_flag", ps_vps->i1_vps_temporal_id_nesting_flag);
+
+ /* vps_reserved_0xffff_16bits */
+ PUT_BITS(ps_bitstrm, 0xffff, 16, return_status);
+ ENTROPY_TRACE("vps_reserved_0xffff_16bits", 0xffff);
+
+ /* profile-tier and level info */
+ ihevce_generate_profile_tier_level(ps_bitstrm, &ps_vps->s_ptl, 1, i1_vps_max_sub_layers_minus1);
+
+ /* vps_sub_layer_ordering_info_present_flag */
+ PUT_BITS(ps_bitstrm, ps_vps->i1_sub_layer_ordering_info_present_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "vps_sub_layer_ordering_info_present_flag",
+ ps_vps->i1_sub_layer_ordering_info_present_flag);
+
+ i = ps_vps->i1_sub_layer_ordering_info_present_flag ? 0 : i1_vps_max_sub_layers_minus1;
+
+ for(; i <= i1_vps_max_sub_layers_minus1; i++)
+ {
+ /* vps_max_dec_pic_buffering[i] */
+ PUT_BITS_UEV(ps_bitstrm, ps_vps->ai1_vps_max_dec_pic_buffering[i], return_status);
+ ENTROPY_TRACE(
+ "vps_max_dec_pic_buffering_minus1[i]", ps_vps->ai1_vps_max_dec_pic_buffering[i]);
+
+ /* vps_num_reorder_pics[i] */
+ PUT_BITS_UEV(ps_bitstrm, ps_vps->ai1_vps_max_num_reorder_pics[i], return_status);
+ ENTROPY_TRACE("ai1_vps_max_num_reorder_pics[i]", ps_vps->ai1_vps_max_num_reorder_pics[i]);
+
+ /* vps_max_latency_increase[i] */
+ PUT_BITS_UEV(ps_bitstrm, ps_vps->ai1_vps_max_latency_increase[i], return_status);
+ ENTROPY_TRACE("ai1_vps_max_latency_increase[i]", ps_vps->ai1_vps_max_latency_increase[i]);
+ }
+
+ /* vps_max_layer_id */
+ PUT_BITS(ps_bitstrm, ps_vps->i1_vps_max_nuh_reserved_zero_layer_id, 6, return_status);
+ ENTROPY_TRACE("vps_max_layer_id", ps_vps->i1_vps_max_nuh_reserved_zero_layer_id);
+
+ /* vps_num_layer_sets_minus1 */
+ PUT_BITS_UEV(ps_bitstrm, 0, return_status);
+ ENTROPY_TRACE("vps_num_layer_sets_minus1", 0);
+
+ /* vps_timing_info_present_flag */
+ PUT_BITS(ps_bitstrm, 0, 1, return_status);
+ ENTROPY_TRACE("vps_timing_info_present_flag", 0);
+
+ /* vps_extension_flag */
+ PUT_BITS(ps_bitstrm, 0, 1, return_status);
+ ENTROPY_TRACE("vps_extension_flag", 0);
+
+ /* rbsp trailing bits */
+ ihevce_put_rbsp_trailing_bits(ps_bitstrm);
+
+ return return_status;
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates SPS (Video Parameter Set)
+*
+* @par Description
+* Parse Video Parameter Set as per Section 7.3.2.2
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] ps_sps
+* pointer to structure containing SPS data
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_generate_sps(bitstrm_t *ps_bitstrm, sps_t *ps_sps)
+{
+ WORD32 i;
+ WORD32 return_status = IHEVCE_SUCCESS;
+ WORD8 i1_max_sub_layers_minus1 = ps_sps->i1_sps_max_sub_layers - 1;
+
+ UWORD32 u4_log2_max_pic_order_cnt_lsb = (UWORD32)(ps_sps->i1_log2_max_pic_order_cnt_lsb);
+
+ UWORD32 u4_log2_min_coding_block_size_minus3 =
+ (UWORD32)(ps_sps->i1_log2_min_coding_block_size) - 3;
+
+ UWORD32 u4_log2_diff_max_min_coding_block_size =
+ (UWORD32)(ps_sps->i1_log2_diff_max_min_coding_block_size);
+
+ UWORD32 u4_log2_min_transform_block_size_minus2 =
+ (UWORD32)(ps_sps->i1_log2_min_transform_block_size) - 2;
+
+ UWORD32 u4_log2_diff_max_min_transform_block_size =
+ (UWORD32)(ps_sps->i1_log2_diff_max_min_transform_block_size);
+
+ /* Insert Start Code */
+ return_status = ihevce_put_nal_start_code_prefix(ps_bitstrm, 1);
+
+ /* Insert Nal Unit Header */
+ return_status |= ihevce_generate_nal_unit_header(ps_bitstrm, NAL_SPS, 0);
+
+ /* video_parameter_set_id */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_vps_id, 4, return_status);
+ ENTROPY_TRACE("video_parameter_set_id", ps_sps->i1_vps_id);
+
+ /* sps_max_sub_layers_minus1 */
+ PUT_BITS(ps_bitstrm, i1_max_sub_layers_minus1, 3, return_status);
+ ENTROPY_TRACE("sps_max_sub_layers_minus1", i1_max_sub_layers_minus1);
+
+ /* sps_temporal_id_nesting_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_sps_temporal_id_nesting_flag, 1, return_status);
+ ENTROPY_TRACE("sps_temporal_id_nesting_flag", ps_sps->i1_sps_temporal_id_nesting_flag);
+
+ /* profile-tier and level info */
+ ihevce_generate_profile_tier_level(ps_bitstrm, &ps_sps->s_ptl, 1, i1_max_sub_layers_minus1);
+
+ /* seq_parameter_set_id */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i1_sps_id, return_status);
+ ENTROPY_TRACE("seq_parameter_set_id", ps_sps->i1_sps_id);
+
+ /* chroma_format_idc */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i1_chroma_format_idc, return_status);
+ ENTROPY_TRACE("chroma_format_idc", ps_sps->i1_chroma_format_idc);
+
+ if(CHROMA_FMT_IDC_YUV444 == ps_sps->i1_chroma_format_idc)
+ {
+ /* separate_colour_plane_flag */
+ PUT_BITS(ps_bitstrm, 1, 1, return_status);
+ ENTROPY_TRACE("separate_colour_plane_flag", 1);
+ }
+
+ /* pic_width_in_luma_samples */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_pic_width_in_luma_samples, return_status);
+ ENTROPY_TRACE("pic_width_in_luma_samples", ps_sps->i2_pic_width_in_luma_samples);
+
+ /* pic_height_in_luma_samples */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_pic_height_in_luma_samples, return_status);
+ ENTROPY_TRACE("pic_height_in_luma_samples", ps_sps->i2_pic_height_in_luma_samples);
+
+ /* pic_cropping_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_pic_cropping_flag, 1, return_status);
+ ENTROPY_TRACE("pic_cropping_flag", ps_sps->i1_pic_cropping_flag);
+
+ if(ps_sps->i1_pic_cropping_flag)
+ {
+ /* pic_crop_left_offset */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_pic_crop_left_offset, return_status);
+ ENTROPY_TRACE("pic_crop_left_offset", ps_sps->i2_pic_crop_left_offset);
+
+ /* pic_crop_right_offset */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_pic_crop_right_offset, return_status);
+ ENTROPY_TRACE("pic_crop_right_offset", ps_sps->i2_pic_crop_right_offset);
+
+ /* pic_crop_top_offset */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_pic_crop_top_offset, return_status);
+ ENTROPY_TRACE("pic_crop_top_offset", ps_sps->i2_pic_crop_top_offset);
+
+ /* pic_crop_bottom_offset */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i2_pic_crop_bottom_offset, return_status);
+ ENTROPY_TRACE("pic_crop_bottom_offset", ps_sps->i2_pic_crop_bottom_offset);
+ }
+
+ /* bit_depth_luma_minus8 */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i1_bit_depth_luma_minus8, return_status);
+ ENTROPY_TRACE("bit_depth_luma_minus8", ps_sps->i1_bit_depth_luma_minus8);
+
+ /* bit_depth_chroma_minus8 */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i1_bit_depth_chroma_minus8, return_status);
+ ENTROPY_TRACE("i1_bit_depth_chroma_minus8", ps_sps->i1_bit_depth_chroma_minus8);
+
+ /* log2_max_pic_order_cnt_lsb_minus4 */
+ PUT_BITS_UEV(ps_bitstrm, u4_log2_max_pic_order_cnt_lsb - 4, return_status);
+ ENTROPY_TRACE("log2_max_pic_order_cnt_lsb_minus4", u4_log2_max_pic_order_cnt_lsb - 4);
+
+ /* sps_sub_layer_ordering_info_present_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_sps_sub_layer_ordering_info_present_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "sps_sub_layer_ordering_info_present_flag",
+ ps_sps->i1_sps_sub_layer_ordering_info_present_flag);
+
+ i = ps_sps->i1_sps_sub_layer_ordering_info_present_flag ? 0 : i1_max_sub_layers_minus1;
+
+ for(; i <= i1_max_sub_layers_minus1; i++)
+ {
+ /* max_dec_pic_buffering */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->ai1_sps_max_dec_pic_buffering[i], return_status);
+ ENTROPY_TRACE("max_dec_pic_buffering_minus1", ps_sps->ai1_sps_max_dec_pic_buffering[i]);
+
+ /* num_reorder_pics */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->ai1_sps_max_num_reorder_pics[i], return_status);
+ ENTROPY_TRACE("num_reorder_pics", ps_sps->ai1_sps_max_num_reorder_pics[i]);
+
+ /* max_latency_increase */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->ai1_sps_max_latency_increase[i], return_status);
+ ENTROPY_TRACE("max_latency_increase", ps_sps->ai1_sps_max_latency_increase[i]);
+ }
+
+ /* log2_min_coding_block_size_minus3 */
+ PUT_BITS_UEV(ps_bitstrm, u4_log2_min_coding_block_size_minus3, return_status);
+ ENTROPY_TRACE("log2_min_coding_block_size_minus3", u4_log2_min_coding_block_size_minus3);
+
+ /* log2_diff_max_min_coding_block_size */
+ PUT_BITS_UEV(ps_bitstrm, u4_log2_diff_max_min_coding_block_size, return_status);
+ ENTROPY_TRACE("log2_diff_max_min_coding_block_size", u4_log2_diff_max_min_coding_block_size);
+
+ /* log2_min_transform_block_size_minus2 */
+ PUT_BITS_UEV(ps_bitstrm, u4_log2_min_transform_block_size_minus2, return_status);
+ ENTROPY_TRACE("log2_min_transform_block_size_minus2", u4_log2_min_transform_block_size_minus2);
+
+ /* log2_diff_max_min_transform_block_size */
+ PUT_BITS_UEV(ps_bitstrm, u4_log2_diff_max_min_transform_block_size, return_status);
+ ENTROPY_TRACE(
+ "log2_diff_max_min_transform_block_size", u4_log2_diff_max_min_transform_block_size);
+
+ /* max_transform_hierarchy_depth_inter */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i1_max_transform_hierarchy_depth_inter, return_status);
+ ENTROPY_TRACE(
+ "max_transform_hierarchy_depth_inter", ps_sps->i1_max_transform_hierarchy_depth_inter);
+
+ /* max_transform_hierarchy_depth_intra */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i1_max_transform_hierarchy_depth_intra, return_status);
+ ENTROPY_TRACE(
+ "max_transform_hierarchy_depth_intra", ps_sps->i1_max_transform_hierarchy_depth_intra);
+
+ /* scaling_list_enabled_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_scaling_list_enable_flag, 1, return_status);
+ ENTROPY_TRACE("scaling_list_enabled_flag", ps_sps->i1_scaling_list_enable_flag);
+
+ if(ps_sps->i1_scaling_list_enable_flag)
+ {
+ /* sps_scaling_list_data_present_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_sps_scaling_list_data_present_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "sps_scaling_list_data_present_flag", ps_sps->i1_sps_scaling_list_data_present_flag);
+
+#if 0 /* TODO: Will be enabled once scaling list support is added */
+ if(ps_sps->i1_sps_scaling_list_data_present_flag)
+ {
+ //TODO
+ ihevce_generate_scaling_list_data(ps_bitstrm);
+ }
+#endif
+ }
+
+ /* asymmetric_motion_partitions_enabled_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_amp_enabled_flag, 1, return_status);
+ ENTROPY_TRACE("asymmetric_motion_partitions_enabled_flag", ps_sps->i1_amp_enabled_flag);
+
+ /* sample_adaptive_offset_enabled_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_sample_adaptive_offset_enabled_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "sample_adaptive_offset_enabled_flag", ps_sps->i1_sample_adaptive_offset_enabled_flag);
+
+ /* pcm_enabled_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_pcm_enabled_flag, 1, return_status);
+ ENTROPY_TRACE("pcm_enabled_flag", ps_sps->i1_pcm_enabled_flag);
+ if(ps_sps->i1_pcm_enabled_flag)
+ {
+ UWORD32 u4_log2_min_pcm_coding_block_size = (ps_sps->i1_log2_min_pcm_coding_block_size);
+ UWORD32 u4_log2_diff_max_min_pcm_coding_block_size =
+ (ps_sps->i1_log2_diff_max_min_pcm_coding_block_size);
+
+ /* pcm_sample_bit_depth_luma_minus1 */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_pcm_sample_bit_depth_luma - 1, 4, return_status);
+ ENTROPY_TRACE("pcm_sample_bit_depth_luma", ps_sps->i1_pcm_sample_bit_depth_luma - 1);
+
+ /* pcm_sample_bit_depth_chroma_minus1 */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_pcm_sample_bit_depth_chroma - 1, 4, return_status);
+ ENTROPY_TRACE("pcm_sample_bit_depth_chroma", ps_sps->i1_pcm_sample_bit_depth_chroma - 1);
+
+ /* log2_min_pcm_coding_block_size_minus3 */
+ PUT_BITS_UEV(ps_bitstrm, u4_log2_min_pcm_coding_block_size - 3, return_status);
+ ENTROPY_TRACE(
+ "log2_min_pcm_coding_block_size_minus3", u4_log2_min_pcm_coding_block_size - 3);
+
+ /* log2_diff_max_min_pcm_coding_block_size */
+ PUT_BITS_UEV(ps_bitstrm, u4_log2_diff_max_min_pcm_coding_block_size, return_status);
+ ENTROPY_TRACE(
+ "log2_diff_max_min_pcm_coding_block_size", u4_log2_diff_max_min_pcm_coding_block_size);
+
+ /* pcm_loop_filter_disable_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_pcm_loop_filter_disable_flag, 1, return_status);
+ ENTROPY_TRACE("pcm_loop_filter_disable_flag", ps_sps->i1_pcm_loop_filter_disable_flag);
+ }
+
+ /* num_short_term_ref_pic_sets */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i1_num_short_term_ref_pic_sets, return_status);
+ ENTROPY_TRACE("num_short_term_ref_pic_sets", ps_sps->i1_num_short_term_ref_pic_sets);
+
+ for(i = 0; i < ps_sps->i1_num_short_term_ref_pic_sets; i++)
+ {
+ WORD32 i4_NumPocTotalCurr = 0;
+ ihevce_short_term_ref_pic_set(
+ ps_bitstrm,
+ &ps_sps->as_stref_picset[0],
+ ps_sps->i1_num_short_term_ref_pic_sets,
+ i,
+ &i4_NumPocTotalCurr);
+ }
+
+ /* long_term_ref_pics_present_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_long_term_ref_pics_present_flag, 1, return_status);
+ ENTROPY_TRACE("long_term_ref_pics_present_flag", ps_sps->i1_long_term_ref_pics_present_flag);
+
+ if(ps_sps->i1_long_term_ref_pics_present_flag)
+ {
+ /* num_long_term_ref_pics_sps */
+ PUT_BITS_UEV(ps_bitstrm, ps_sps->i1_num_long_term_ref_pics_sps, return_status);
+ ENTROPY_TRACE("num_long_term_ref_pics_sps", ps_sps->i1_num_long_term_ref_pics_sps);
+
+ for(i = 0; i < ps_sps->i1_num_long_term_ref_pics_sps; i++)
+ {
+ /* lt_ref_pic_poc_lsb_sps[i] */
+ PUT_BITS(
+ ps_bitstrm,
+ ps_sps->au2_lt_ref_pic_poc_lsb_sps[i],
+ u4_log2_max_pic_order_cnt_lsb,
+ return_status);
+ ENTROPY_TRACE("lt_ref_pic_poc_lsb_sps[i]", ps_sps->au2_lt_ref_pic_poc_lsb_sps[i]);
+
+ /* used_by_curr_pic_lt_sps_flag[i] */
+ PUT_BITS(ps_bitstrm, ps_sps->ai1_used_by_curr_pic_lt_sps_flag[i], 1, return_status);
+ ENTROPY_TRACE(
+ "used_by_curr_pic_lt_sps_flag[i]", ps_sps->ai1_used_by_curr_pic_lt_sps_flag[i]);
+ }
+ }
+
+ /* sps_temporal_mvp_enable_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_sps_temporal_mvp_enable_flag, 1, return_status);
+ ENTROPY_TRACE("sps_temporal_mvp_enable_flag", ps_sps->i1_sps_temporal_mvp_enable_flag);
+
+#if !HM_8DOT1_SYNTAX
+ /* strong_intra_smoothing_enable_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_strong_intra_smoothing_enable_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "sps_strong_intra_smoothing_enable_flag", ps_sps->i1_strong_intra_smoothing_enable_flag);
+#endif
+
+ /* vui_parameters_present_flag */
+ PUT_BITS(ps_bitstrm, ps_sps->i1_vui_parameters_present_flag, 1, return_status);
+ ENTROPY_TRACE("vui_parameters_present_flag", ps_sps->i1_vui_parameters_present_flag);
+
+ ENTROPY_TRACE("----------- vui_parameters -----------", 0);
+
+ if(ps_sps->i1_vui_parameters_present_flag)
+ {
+ /* Add vui parameters to the bitstream */
+ ihevce_generate_vui(ps_bitstrm, ps_sps, ps_sps->s_vui_parameters);
+ }
+
+ /* sps_extension_flag */
+ PUT_BITS(ps_bitstrm, 0, 1, return_status);
+ ENTROPY_TRACE("sps_extension_flag", 0);
+
+ /* rbsp trailing bits */
+ ihevce_put_rbsp_trailing_bits(ps_bitstrm);
+
+ return return_status;
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates PPS (Picture Parameter Set)
+*
+* @par Description
+* Generate Picture Parameter Set as per Section 7.3.2.3
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] ps_pps
+* pointer to structure containing PPS data
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_generate_pps(bitstrm_t *ps_bitstrm, pps_t *ps_pps)
+{
+ WORD32 i;
+ WORD32 return_status = IHEVCE_SUCCESS;
+
+ /* Insert the NAL start code */
+ return_status = ihevce_put_nal_start_code_prefix(ps_bitstrm, 1);
+
+ /* Insert Nal Unit Header */
+ return_status |= ihevce_generate_nal_unit_header(ps_bitstrm, NAL_PPS, 0);
+
+ /* pic_parameter_set_id */
+ PUT_BITS_UEV(ps_bitstrm, ps_pps->i1_pps_id, return_status);
+ ENTROPY_TRACE("pic_parameter_set_id", ps_pps->i1_pps_id);
+
+ /* seq_parameter_set_id */
+ PUT_BITS_UEV(ps_bitstrm, ps_pps->i1_sps_id, return_status);
+ ENTROPY_TRACE("seq_parameter_set_id", ps_pps->i1_sps_id);
+
+ /* dependent_slices_enabled_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_dependent_slice_enabled_flag, 1, return_status);
+ ENTROPY_TRACE("dependent_slices_enabled_flag", ps_pps->i1_dependent_slice_enabled_flag);
+
+ /* output_flag_present_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_output_flag_present_flag, 1, return_status);
+ ENTROPY_TRACE("output_flag_present_flag", ps_pps->i1_output_flag_present_flag);
+
+ /* num_extra_slice_header_bits */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_num_extra_slice_header_bits, 3, return_status);
+ ENTROPY_TRACE("num_extra_slice_header_bits", ps_pps->i1_num_extra_slice_header_bits);
+
+ /* sign_data_hiding_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_sign_data_hiding_flag, 1, return_status);
+ ENTROPY_TRACE("sign_data_hiding_flag", ps_pps->i1_sign_data_hiding_flag);
+
+ /* cabac_init_present_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_cabac_init_present_flag, 1, return_status);
+ ENTROPY_TRACE("cabac_init_present_flag", ps_pps->i1_cabac_init_present_flag);
+
+ /* num_ref_idx_l0_default_active_minus1 */
+ PUT_BITS_UEV(ps_bitstrm, ps_pps->i1_num_ref_idx_l0_default_active - 1, return_status);
+ ENTROPY_TRACE(
+ "num_ref_idx_l0_default_active_minus1", ps_pps->i1_num_ref_idx_l0_default_active - 1);
+
+ /* num_ref_idx_l1_default_active_minus1 */
+ PUT_BITS_UEV(ps_bitstrm, ps_pps->i1_num_ref_idx_l1_default_active - 1, return_status);
+ ENTROPY_TRACE(
+ "num_ref_idx_l1_default_active_minus1", ps_pps->i1_num_ref_idx_l1_default_active - 1);
+
+ /* pic_init_qp_minus26 */
+ PUT_BITS_SEV(ps_bitstrm, ps_pps->i1_pic_init_qp - 26, return_status);
+ ENTROPY_TRACE("pic_init_qp_minus26", ps_pps->i1_pic_init_qp - 26);
+
+ /* constrained_intra_pred_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_constrained_intra_pred_flag, 1, return_status);
+ ENTROPY_TRACE("constrained_intra_pred_flag", ps_pps->i1_constrained_intra_pred_flag);
+
+ /* transform_skip_enabled_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_transform_skip_enabled_flag, 1, return_status);
+ ENTROPY_TRACE("transform_skip_enabled_flag", ps_pps->i1_transform_skip_enabled_flag);
+
+ /* cu_qp_delta_enabled_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_cu_qp_delta_enabled_flag, 1, return_status);
+ ENTROPY_TRACE("cu_qp_delta_enabled_flag", ps_pps->i1_cu_qp_delta_enabled_flag);
+
+ if(ps_pps->i1_cu_qp_delta_enabled_flag)
+ {
+ /* diff_cu_qp_delta_depth */
+ PUT_BITS_UEV(ps_bitstrm, ps_pps->i1_diff_cu_qp_delta_depth, return_status);
+ ENTROPY_TRACE("diff_cu_qp_delta_depth", ps_pps->i1_diff_cu_qp_delta_depth);
+ }
+
+ /* cb_qp_offset */
+ PUT_BITS_SEV(ps_bitstrm, ps_pps->i1_pic_cb_qp_offset, return_status);
+ ENTROPY_TRACE("cb_qp_offset", ps_pps->i1_pic_cb_qp_offset);
+
+ /* cr_qp_offset */
+ PUT_BITS_SEV(ps_bitstrm, ps_pps->i1_pic_cr_qp_offset, return_status);
+ ENTROPY_TRACE("cr_qp_offset", ps_pps->i1_pic_cr_qp_offset);
+
+ /* slicelevel_chroma_qp_flag */
+ PUT_BITS(
+ ps_bitstrm, ps_pps->i1_pic_slice_level_chroma_qp_offsets_present_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "slicelevel_chroma_qp_flag", ps_pps->i1_pic_slice_level_chroma_qp_offsets_present_flag);
+
+ /* weighted_pred_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_weighted_pred_flag, 1, return_status);
+ ENTROPY_TRACE("weighted_pred_flag", ps_pps->i1_weighted_pred_flag);
+
+ /* weighted_bipred_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_weighted_bipred_flag, 1, return_status);
+ ENTROPY_TRACE("weighted_bipred_flag", ps_pps->i1_weighted_bipred_flag);
+
+ /* transquant_bypass_enable_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_transquant_bypass_enable_flag, 1, return_status);
+ ENTROPY_TRACE("transquant_bypass_enable_flag", ps_pps->i1_transquant_bypass_enable_flag);
+
+ /* tiles_enabled_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_tiles_enabled_flag, 1, return_status);
+ ENTROPY_TRACE("tiles_enabled_flag", ps_pps->i1_tiles_enabled_flag);
+
+ /* entropy_coding_sync_enabled_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_entropy_coding_sync_enabled_flag, 1, return_status);
+ ENTROPY_TRACE("entropy_coding_sync_enabled_flag", ps_pps->i1_entropy_coding_sync_enabled_flag);
+
+ if(ps_pps->i1_tiles_enabled_flag)
+ {
+ /* num_tile_columns_minus1 */
+ PUT_BITS_UEV(ps_bitstrm, ps_pps->i1_num_tile_columns - 1, return_status);
+ ENTROPY_TRACE("num_tile_columns_minus1", ps_pps->i1_num_tile_columns - 1);
+
+ /* num_tile_rows_minus1 */
+ PUT_BITS_UEV(ps_bitstrm, ps_pps->i1_num_tile_rows - 1, return_status);
+ ENTROPY_TRACE("num_tile_rows_minus1", ps_pps->i1_num_tile_rows - 1);
+
+ /* uniform_spacing_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_uniform_spacing_flag, 1, return_status);
+ ENTROPY_TRACE("uniform_spacing_flag", ps_pps->i1_uniform_spacing_flag);
+
+ if(!ps_pps->i1_uniform_spacing_flag)
+ {
+ for(i = 0; i < ps_pps->i1_num_tile_columns - 1; i++)
+ {
+ /* column_width_minus1[i] */
+ PUT_BITS_UEV(ps_bitstrm, ps_pps->ps_tile[i].u2_wd - 1, return_status);
+ ENTROPY_TRACE("column_width_minus1[i]", ps_pps->ps_tile[i].u2_wd - 1);
+ }
+ for(i = 0; i < ps_pps->i1_num_tile_rows - 1; i++)
+ {
+ /* row_height_minus1[i] */
+ PUT_BITS_UEV(ps_bitstrm, ps_pps->ps_tile[i].u2_ht - 1, return_status);
+ ENTROPY_TRACE("row_height_minus1[i]", ps_pps->ps_tile[i].u2_ht - 1);
+ }
+ }
+
+ /* loop_filter_across_tiles_enabled_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_loop_filter_across_tiles_enabled_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "loop_filter_across_tiles_enabled_flag",
+ ps_pps->i1_loop_filter_across_tiles_enabled_flag);
+ }
+
+ /* loop_filter_across_slices_enabled_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_loop_filter_across_slices_enabled_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "loop_filter_across_slices_enabled_flag",
+ ps_pps->i1_loop_filter_across_slices_enabled_flag);
+
+ /* deblocking_filter_control_present_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_deblocking_filter_control_present_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "deblocking_filter_control_present_flag",
+ ps_pps->i1_deblocking_filter_control_present_flag);
+
+ if(ps_pps->i1_deblocking_filter_control_present_flag)
+ {
+ /* deblocking_filter_override_enabled_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_deblocking_filter_override_enabled_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "deblocking_filter_override_enabled_flag",
+ ps_pps->i1_deblocking_filter_override_enabled_flag);
+
+ /* pic_disable_deblocking_filter_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_pic_disable_deblocking_filter_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "pic_disable_deblocking_filter_flag", ps_pps->i1_pic_disable_deblocking_filter_flag);
+
+ if(!ps_pps->i1_pic_disable_deblocking_filter_flag)
+ {
+ /* beta_offset_div2 */
+ PUT_BITS_SEV(ps_bitstrm, ps_pps->i1_beta_offset_div2 >> 1, return_status);
+ ENTROPY_TRACE("beta_offset_div2", ps_pps->i1_beta_offset_div2 >> 1);
+
+ /* tc_offset_div2 */
+ PUT_BITS_SEV(ps_bitstrm, ps_pps->i1_tc_offset_div2 >> 1, return_status);
+ ENTROPY_TRACE("tc_offset_div2", ps_pps->i1_tc_offset_div2 >> 1);
+ }
+ }
+
+ /* pps_scaling_list_data_present_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_pps_scaling_list_data_present_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "pps_scaling_list_data_present_flag", ps_pps->i1_pps_scaling_list_data_present_flag);
+
+#if 0 /* TODO: Will be enabled once scaling list support is added */
+ if(ps_pps->i1_pps_scaling_list_data_present_flag )
+ {
+ //TODO
+ ihevce_scaling_list_data();
+ }
+#endif
+
+ /* lists_modification_present_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_lists_modification_present_flag, 1, return_status);
+ ENTROPY_TRACE("lists_modification_present_flag", ps_pps->i1_lists_modification_present_flag);
+
+ {
+ UWORD32 u4_log2_parallel_merge_level_minus2 = ps_pps->i1_log2_parallel_merge_level;
+
+ u4_log2_parallel_merge_level_minus2 -= 2;
+
+ /* log2_parallel_merge_level_minus2 */
+ PUT_BITS_UEV(ps_bitstrm, u4_log2_parallel_merge_level_minus2, return_status);
+ ENTROPY_TRACE("log2_parallel_merge_level_minus2", u4_log2_parallel_merge_level_minus2);
+ }
+
+ /* slice_header_extension_present_flag */
+ PUT_BITS(ps_bitstrm, ps_pps->i1_slice_header_extension_present_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "slice_header_extension_present_flag", ps_pps->i1_slice_header_extension_present_flag);
+
+ /* pps_extension_flag */
+ PUT_BITS(ps_bitstrm, 0, 1, return_status);
+ ENTROPY_TRACE("pps_extension_flag", 0);
+
+ ihevce_put_rbsp_trailing_bits(ps_bitstrm);
+
+ return return_status;
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates Slice Header
+*
+* @par Description
+* Generate Slice Header as per Section 7.3.5.1
+*
+* @param[inout] ps_bitstrm
+* pointer to bitstream context for generating slice header
+*
+* @param[in] i1_nal_unit_type
+* nal unit type
+*
+* @param[in] ps_slice_hdr
+* pointer to slice header params
+*
+* @param[in] ps_pps
+* pointer to pps params referred by slice
+*
+* @param[in] ps_sps
+* pointer to sps params referred by slice
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_generate_slice_header(
+ bitstrm_t *ps_bitstrm,
+ WORD8 i1_nal_unit_type,
+ slice_header_t *ps_slice_hdr,
+ pps_t *ps_pps,
+ sps_t *ps_sps,
+ bitstrm_t *ps_dup_bit_strm_ent_offset,
+ UWORD32 *pu4_first_slice_start_offset,
+ ihevce_tile_params_t *ps_tile_params,
+ WORD32 i4_next_slice_seg_x,
+ WORD32 i4_next_slice_seg_y)
+{
+ WORD32 i;
+ WORD32 return_status = IHEVCE_SUCCESS;
+
+ WORD32 RapPicFlag = (i1_nal_unit_type >= NAL_BLA_W_LP) &&
+ (i1_nal_unit_type <= NAL_RSV_RAP_VCL23);
+ WORD32 idr_pic_flag = (NAL_IDR_W_LP == i1_nal_unit_type) || (NAL_IDR_N_LP == i1_nal_unit_type);
+
+ WORD32 disable_deblocking_filter_flag;
+
+ WORD32 i4_NumPocTotalCurr = 0;
+ /* Initialize the pic width and pic height from sps parameters */
+ WORD32 pic_width = ps_sps->i2_pic_width_in_luma_samples;
+ WORD32 pic_height = ps_sps->i2_pic_height_in_luma_samples;
+
+ /* Initialize the CTB size from sps parameters */
+ WORD32 log2_ctb_size =
+ ps_sps->i1_log2_min_coding_block_size + ps_sps->i1_log2_diff_max_min_coding_block_size;
+ WORD32 ctb_size = (1 << log2_ctb_size);
+
+ /* Update ps_slice_hdr->i2_slice_address based on tile position in frame */
+ WORD32 num_ctb_in_row = (pic_width + ctb_size - 1) >> log2_ctb_size;
+
+ /* Overwrite i2_slice_address here as pre-enc didn't had tile structure
+ available in it's scope. Otherwise i2_slice_address would be set in
+ populate_slice_header() itself */
+ if(1 == ps_tile_params->i4_tiles_enabled_flag)
+ {
+ ps_slice_hdr->i2_slice_address =
+ ps_tile_params->i4_first_ctb_y * num_ctb_in_row + ps_tile_params->i4_first_ctb_x;
+ }
+ else
+ {
+ ps_slice_hdr->i2_slice_address = i4_next_slice_seg_x + i4_next_slice_seg_y * num_ctb_in_row;
+ }
+
+ /* Overwrite i1_first_slice_in_pic_flag here as pre-enc didn't had tile structure
+ available in it's scope. Otherwise i1_first_slice_in_pic_flag would be set in
+ populate_slice_header() itself */
+ ps_slice_hdr->i1_first_slice_in_pic_flag = (ps_slice_hdr->i2_slice_address == 0);
+
+ /* Currently if dependent slices are enabled, then all slices
+ after first slice of picture, are made dependent slices */
+ if((1 == ps_pps->i1_dependent_slice_enabled_flag) &&
+ (0 == ps_slice_hdr->i1_first_slice_in_pic_flag))
+ {
+ ps_slice_hdr->i1_dependent_slice_flag = 1;
+ }
+ else
+ {
+ ps_slice_hdr->i1_dependent_slice_flag = 0;
+ }
+
+ /* Insert start code */
+ return_status |= ihevce_put_nal_start_code_prefix(ps_bitstrm, 1);
+
+ /* Insert Nal Unit Header */
+ return_status |= ihevce_generate_nal_unit_header(
+ ps_bitstrm,
+ i1_nal_unit_type,
+ ps_slice_hdr->u4_nuh_temporal_id); //TEMPORALA_SCALABILITY CHANGES
+
+ /* first_slice_in_pic_flag */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->i1_first_slice_in_pic_flag, 1, return_status);
+ ENTROPY_TRACE("first_slice_in_pic_flag", ps_slice_hdr->i1_first_slice_in_pic_flag);
+
+ if(RapPicFlag)
+ {
+ /* no_output_of_prior_pics_flag */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->i1_no_output_of_prior_pics_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "no_output_of_prior_pics_flag", ps_slice_hdr->i1_no_output_of_prior_pics_flag);
+ }
+
+ /* pic_parameter_set_id */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_pps_id, return_status);
+ ENTROPY_TRACE("pic_parameter_set_id", ps_slice_hdr->i1_pps_id);
+
+ /* If ps_pps->i1_dependent_slice_enabled_flag is enabled and
+ curent slice is not the first slice of picture then put
+ i1_dependent_slice_flag into the bitstream */
+ if((ps_pps->i1_dependent_slice_enabled_flag) && (!ps_slice_hdr->i1_first_slice_in_pic_flag))
+ {
+ /* dependent_slice_flag */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->i1_dependent_slice_flag, 1, return_status);
+ ENTROPY_TRACE("dependent_slice_flag", ps_slice_hdr->i1_dependent_slice_flag);
+ }
+
+ if(!ps_slice_hdr->i1_first_slice_in_pic_flag)
+ {
+ WORD32 num_bits;
+ WORD32 num_ctb_in_pic;
+
+ /* ctbs in frame ceiled for width / height not multiple of ctb size */
+ num_ctb_in_pic = ((pic_width + (ctb_size - 1)) >> log2_ctb_size) *
+ ((pic_height + (ctb_size - 1)) >> log2_ctb_size);
+
+ /* Use CLZ to compute Ceil( Log2( PicSizeInCtbsY ) ) */
+ num_bits = 32 - CLZ(num_ctb_in_pic - 1);
+
+ /* slice_address */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->i2_slice_address, num_bits, return_status);
+ ENTROPY_TRACE("slice_address", ps_slice_hdr->i2_slice_address);
+ }
+
+ if(!ps_slice_hdr->i1_dependent_slice_flag)
+ {
+ for(i = 0; i < ps_pps->i1_num_extra_slice_header_bits; i++)
+ {
+ /* slice_reserved_undetermined_flag */
+ PUT_BITS(ps_bitstrm, 0, 1, return_status);
+ ENTROPY_TRACE("slice_reserved_undetermined_flag", 0);
+ }
+ /* slice_type */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_slice_type, return_status);
+ ENTROPY_TRACE("slice_type", ps_slice_hdr->i1_slice_type);
+
+ if(ps_pps->i1_output_flag_present_flag)
+ {
+ /* pic_output_flag */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->i1_pic_output_flag, 1, return_status);
+ ENTROPY_TRACE("pic_output_flag", ps_slice_hdr->i1_pic_output_flag);
+ }
+
+ if(!idr_pic_flag)
+ {
+ /* pic_order_cnt_lsb */
+ PUT_BITS(
+ ps_bitstrm,
+ ps_slice_hdr->i4_pic_order_cnt_lsb,
+ ps_sps->i1_log2_max_pic_order_cnt_lsb,
+ return_status);
+ ENTROPY_TRACE("pic_order_cnt_lsb", ps_slice_hdr->i4_pic_order_cnt_lsb);
+
+ /* short_term_ref_pic_set_sps_flag */
+ PUT_BITS(
+ ps_bitstrm, ps_slice_hdr->i1_short_term_ref_pic_set_sps_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "short_term_ref_pic_set_sps_flag",
+ ps_slice_hdr->i1_short_term_ref_pic_set_sps_flag);
+
+ if(!ps_slice_hdr->i1_short_term_ref_pic_set_sps_flag)
+ {
+ ihevce_short_term_ref_pic_set(
+ ps_bitstrm, &ps_slice_hdr->s_stref_picset, 1, 0, &i4_NumPocTotalCurr);
+ }
+ else
+ {
+ WORD32 num_bits = 32 - CLZ(ps_sps->i1_num_short_term_ref_pic_sets);
+
+ /* short_term_ref_pic_set_idx */
+ PUT_BITS(
+ ps_bitstrm,
+ ps_slice_hdr->i1_short_term_ref_pic_set_idx,
+ num_bits,
+ return_status);
+ ENTROPY_TRACE(
+ "short_term_ref_pic_set_idx", ps_slice_hdr->i1_short_term_ref_pic_set_idx);
+ }
+
+ if(ps_sps->i1_long_term_ref_pics_present_flag)
+ {
+ if(ps_sps->i1_num_long_term_ref_pics_sps > 0)
+ {
+ /* num_long_term_sps */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_num_long_term_sps, return_status);
+ ENTROPY_TRACE("num_long_term_sps", ps_slice_hdr->i1_num_long_term_sps);
+ }
+
+ /* num_long_term_pics */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_num_long_term_pics, return_status);
+ ENTROPY_TRACE("num_long_term_pics", ps_slice_hdr->i1_num_long_term_pics);
+
+ for(i = 0;
+ i < (ps_slice_hdr->i1_num_long_term_sps + ps_slice_hdr->i1_num_long_term_pics);
+ i++)
+ {
+ if(i < ps_slice_hdr->i1_num_long_term_sps)
+ {
+ /* Use CLZ to compute Ceil( Log2
+ ( num_long_term_ref_pics_sps ) ) */
+ WORD32 num_bits = 32 - CLZ(ps_sps->i1_num_long_term_ref_pics_sps);
+
+ /* lt_idx_sps[i] */
+ PUT_BITS(
+ ps_bitstrm, ps_slice_hdr->ai1_lt_idx_sps[i], num_bits, return_status);
+ ENTROPY_TRACE("lt_idx_sps[i]", ps_slice_hdr->ai1_lt_idx_sps[i]);
+ }
+ else
+ {
+ /* poc_lsb_lt[i] */
+ PUT_BITS(
+ ps_bitstrm,
+ ps_slice_hdr->ai4_poc_lsb_lt[i],
+ ps_sps->i1_log2_max_pic_order_cnt_lsb,
+ return_status);
+ ENTROPY_TRACE("poc_lsb_lt[i]", ps_slice_hdr->ai4_poc_lsb_lt[i]);
+
+ /* used_by_curr_pic_lt_flag[i] */
+ PUT_BITS(
+ ps_bitstrm,
+ ps_slice_hdr->ai1_used_by_curr_pic_lt_flag[i],
+ 1,
+ return_status);
+ ENTROPY_TRACE(
+ "used_by_curr_pic_lt_flag[i]",
+ ps_slice_hdr->ai1_used_by_curr_pic_lt_flag[i]);
+ }
+
+ /* delta_poc_msb_present_flag[i] */
+ PUT_BITS(
+ ps_bitstrm,
+ ps_slice_hdr->ai1_delta_poc_msb_present_flag[i],
+ 1,
+ return_status);
+ ENTROPY_TRACE(
+ "delta_poc_msb_present_flag[i]",
+ ps_slice_hdr->ai1_delta_poc_msb_present_flag[i]);
+
+ if(ps_slice_hdr->ai1_delta_poc_msb_present_flag[i])
+ {
+ /* delata_poc_msb_cycle_lt[i] */
+ PUT_BITS_UEV(
+ ps_bitstrm, ps_slice_hdr->ai1_delta_poc_msb_cycle_lt[i], return_status);
+ ENTROPY_TRACE(
+ "delata_poc_msb_cycle_lt", ps_slice_hdr->ai1_delta_poc_msb_cycle_lt[i]);
+ }
+ }
+ }
+
+ if(ps_sps->i1_sps_temporal_mvp_enable_flag)
+ {
+ /* slice_temporal_mvp_enable_flag */
+ PUT_BITS(
+ ps_bitstrm, ps_slice_hdr->i1_slice_temporal_mvp_enable_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "slice_temporal_mvp_enable_flag",
+ ps_slice_hdr->i1_slice_temporal_mvp_enable_flag);
+ }
+ }
+
+ if(ps_sps->i1_sample_adaptive_offset_enabled_flag)
+ {
+ /* slice_sao_luma_flag */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->i1_slice_sao_luma_flag, 1, return_status);
+ ENTROPY_TRACE("slice_sao_luma_flag", ps_slice_hdr->i1_slice_sao_luma_flag);
+
+ /* slice_sao_chroma_flag */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->i1_slice_sao_chroma_flag, 1, return_status);
+ ENTROPY_TRACE("slice_sao_chroma_flag", ps_slice_hdr->i1_slice_sao_chroma_flag);
+ }
+ if((PSLICE == ps_slice_hdr->i1_slice_type) || (BSLICE == ps_slice_hdr->i1_slice_type))
+ {
+ /* num_ref_idx_active_override_flag */
+ PUT_BITS(
+ ps_bitstrm, ps_slice_hdr->i1_num_ref_idx_active_override_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "num_ref_idx_active_override_flag",
+ ps_slice_hdr->i1_num_ref_idx_active_override_flag);
+
+ if(ps_slice_hdr->i1_num_ref_idx_active_override_flag)
+ {
+ /* i1_num_ref_idx_l0_active_minus1 */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_num_ref_idx_l0_active - 1, return_status);
+ ENTROPY_TRACE(
+ "i1_num_ref_idx_l0_active_minus1", ps_slice_hdr->i1_num_ref_idx_l0_active - 1);
+
+ if(BSLICE == ps_slice_hdr->i1_slice_type)
+ {
+ /* i1_num_ref_idx_l1_active */
+ PUT_BITS_UEV(
+ ps_bitstrm, ps_slice_hdr->i1_num_ref_idx_l1_active - 1, return_status);
+ ENTROPY_TRACE(
+ "i1_num_ref_idx_l1_active", ps_slice_hdr->i1_num_ref_idx_l1_active - 1);
+ }
+ }
+
+ if(ps_pps->i1_lists_modification_present_flag && i4_NumPocTotalCurr > 1)
+ {
+ ref_pic_list_modification(ps_bitstrm, ps_slice_hdr, i4_NumPocTotalCurr);
+ }
+
+ if(BSLICE == ps_slice_hdr->i1_slice_type)
+ {
+ /* mvd_l1_zero_flag */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->i1_mvd_l1_zero_flag, 1, return_status);
+ ENTROPY_TRACE("mvd_l1_zero_flag", ps_slice_hdr->i1_mvd_l1_zero_flag);
+ }
+
+ if(ps_pps->i1_cabac_init_present_flag)
+ {
+ /* cabac_init_flag */
+ PUT_BITS(ps_bitstrm, ps_slice_hdr->i1_cabac_init_flag, 1, return_status);
+ ENTROPY_TRACE("cabac_init_flag", ps_slice_hdr->i1_cabac_init_flag);
+ }
+
+ if(ps_slice_hdr->i1_slice_temporal_mvp_enable_flag)
+ {
+ if(BSLICE == ps_slice_hdr->i1_slice_type)
+ {
+ /* collocated_from_l0_flag */
+ PUT_BITS(
+ ps_bitstrm, ps_slice_hdr->i1_collocated_from_l0_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "collocated_from_l0_flag", ps_slice_hdr->i1_collocated_from_l0_flag);
+ }
+ if((ps_slice_hdr->i1_collocated_from_l0_flag &&
+ (ps_slice_hdr->i1_num_ref_idx_l0_active > 1)) ||
+ (!ps_slice_hdr->i1_collocated_from_l0_flag &&
+ (ps_slice_hdr->i1_num_ref_idx_l1_active > 1)))
+ {
+ /* collocated_ref_idx */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_collocated_ref_idx, return_status);
+ ENTROPY_TRACE("collocated_ref_idx", ps_slice_hdr->i1_collocated_ref_idx);
+ }
+ }
+
+ if((ps_pps->i1_weighted_pred_flag && (PSLICE == ps_slice_hdr->i1_slice_type)) ||
+ (ps_pps->i1_weighted_bipred_flag && (BSLICE == ps_slice_hdr->i1_slice_type)))
+ {
+ ihevce_generate_pred_weight_table(ps_bitstrm, ps_sps, ps_pps, ps_slice_hdr);
+ }
+
+#if !HM_8DOT1_SYNTAX
+ /* five_minus_max_num_merge_cand */
+ PUT_BITS_UEV(ps_bitstrm, 5 - ps_slice_hdr->i1_max_num_merge_cand, return_status);
+ ENTROPY_TRACE("five_minus_max_num_merge_cand", 5 - ps_slice_hdr->i1_max_num_merge_cand);
+#endif
+ }
+#if HM_8DOT1_SYNTAX
+ /* five_minus_max_num_merge_cand */
+ PUT_BITS_UEV(ps_bitstrm, 5 - ps_slice_hdr->i1_max_num_merge_cand, return_status);
+ ENTROPY_TRACE("five_minus_max_num_merge_cand", 5 - ps_slice_hdr->i1_max_num_merge_cand);
+#endif
+
+ /* slice_qp_delta */
+ PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i1_slice_qp_delta, return_status);
+ ENTROPY_TRACE("slice_qp_delta", ps_slice_hdr->i1_slice_qp_delta);
+
+ if(ps_pps->i1_pic_slice_level_chroma_qp_offsets_present_flag)
+ {
+ /* slice_cb_qp_offset */
+ PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i1_slice_cb_qp_offset, return_status);
+ ENTROPY_TRACE("slice_cb_qp_offset", ps_slice_hdr->i1_slice_cb_qp_offset);
+
+ /* slice_cr_qp_offset */
+ PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i1_slice_cr_qp_offset, return_status);
+ ENTROPY_TRACE("slice_cr_qp_offset", ps_slice_hdr->i1_slice_cr_qp_offset);
+ }
+
+ if(ps_pps->i1_deblocking_filter_control_present_flag)
+ {
+ if(ps_pps->i1_deblocking_filter_override_enabled_flag)
+ {
+ /* deblocking_filter_override_flag */
+ PUT_BITS(
+ ps_bitstrm, ps_slice_hdr->i1_deblocking_filter_override_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "deblocking_filter_override_flag",
+ ps_slice_hdr->i1_deblocking_filter_override_flag);
+ }
+
+ if(ps_slice_hdr->i1_deblocking_filter_override_flag)
+ {
+ /* slice_disable_deblocking_filter_flag */
+ PUT_BITS(
+ ps_bitstrm,
+ ps_slice_hdr->i1_slice_disable_deblocking_filter_flag,
+ 1,
+ return_status);
+ ENTROPY_TRACE(
+ "slice_disable_deblocking_filter_flag",
+ ps_slice_hdr->i1_slice_disable_deblocking_filter_flag);
+
+ if(!ps_slice_hdr->i1_slice_disable_deblocking_filter_flag)
+ {
+ /* beta_offset_div2 */
+ PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i1_beta_offset_div2 >> 1, return_status);
+ ENTROPY_TRACE("beta_offset_div2", ps_slice_hdr->i1_beta_offset_div2 >> 1);
+
+ /* tc_offset_div2 */
+ PUT_BITS_SEV(ps_bitstrm, ps_slice_hdr->i1_tc_offset_div2 >> 1, return_status);
+ ENTROPY_TRACE("tc_offset_div2", ps_slice_hdr->i1_tc_offset_div2 >> 1);
+ }
+ }
+ }
+
+ disable_deblocking_filter_flag = ps_slice_hdr->i1_slice_disable_deblocking_filter_flag |
+ ps_pps->i1_pic_disable_deblocking_filter_flag;
+
+ if(ps_pps->i1_loop_filter_across_slices_enabled_flag &&
+ (ps_slice_hdr->i1_slice_sao_luma_flag || ps_slice_hdr->i1_slice_sao_chroma_flag ||
+ !disable_deblocking_filter_flag))
+ {
+ /* slice_loop_filter_across_slices_enabled_flag */
+ PUT_BITS(
+ ps_bitstrm,
+ ps_slice_hdr->i1_slice_loop_filter_across_slices_enabled_flag,
+ 1,
+ return_status);
+ ENTROPY_TRACE(
+ "slice_loop_filter_across_slices_enabled_flag",
+ ps_slice_hdr->i1_slice_loop_filter_across_slices_enabled_flag);
+ }
+ }
+
+ if((ps_pps->i1_tiles_enabled_flag) || (ps_pps->i1_entropy_coding_sync_enabled_flag))
+ {
+ /* num_entry_point_offsets */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i4_num_entry_point_offsets, return_status);
+ ENTROPY_TRACE("num_entry_point_offsets", ps_slice_hdr->i4_num_entry_point_offsets);
+
+ /*copy the bitstream state at this stage, later once all the offset are known the duplicated state is used to write offset in bitstream*/
+ memcpy(ps_dup_bit_strm_ent_offset, ps_bitstrm, sizeof(bitstrm_t));
+
+ if(ps_slice_hdr->i4_num_entry_point_offsets > 0)
+ {
+ /* offset_len_minus1 */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_offset_len - 1, return_status);
+ ENTROPY_TRACE("offset_len_minus1", ps_slice_hdr->i1_offset_len - 1);
+
+ /*check the bitstream offset here, the first offset will be fixed here based on num_entry_offset and maximum possible emulaiton prevention bytes*/
+ /*This offset is used to generate bitstream, In the end of frame processing actual offset are updated and if there was no emulation bits the extra bytes
+ shall be filled with 0xFF so that decoder discards it as part of slice header extension*/
+
+ /*assume one byte of emulation preention for every offset we signal*/
+ /*considering emulation prevention bytes and assuming incomplete word(4 bytes) that is yet to filled and offset length(4 bytes) that will be calc
+ based on max offset length after frame is encoded*/
+ pu4_first_slice_start_offset[0] =
+ ps_bitstrm->u4_strm_buf_offset +
+ ((ps_slice_hdr->i4_num_entry_point_offsets * ps_slice_hdr->i1_offset_len) >> 3) +
+ ps_slice_hdr->i4_num_entry_point_offsets + 4 + 4;
+
+ ps_slice_hdr->pu4_entry_point_offset[0] = (*pu4_first_slice_start_offset);
+
+ for(i = 0; i < ps_slice_hdr->i4_num_entry_point_offsets; i++)
+ {
+ /* entry_point_offset[i] */
+ PUT_BITS(
+ ps_bitstrm,
+ ps_slice_hdr->pu4_entry_point_offset[i],
+ ps_slice_hdr->i1_offset_len,
+ return_status);
+ ENTROPY_TRACE("entry_point_offset[i]", ps_slice_hdr->pu4_entry_point_offset[i]);
+ }
+ }
+ }
+
+ if(ps_pps->i1_slice_header_extension_present_flag)
+ {
+ /* slice_header_extension_length */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i2_slice_header_extension_length, return_status);
+ ENTROPY_TRACE(
+ "slice_header_extension_length", ps_slice_hdr->i2_slice_header_extension_length);
+
+ for(i = 0; i < ps_slice_hdr->i2_slice_header_extension_length; i++)
+ {
+ /* slice_header_extension_data_byte[i] */
+ PUT_BITS(ps_bitstrm, 0, 8, return_status);
+ ENTROPY_TRACE("slice_header_extension_data_byte[i]", 0);
+ }
+ }
+
+ BYTE_ALIGNMENT(ps_bitstrm);
+
+ return return_status;
+}
+
+/**
+******************************************************************************
+*
+* @brief Populates vps structure
+*
+* @par Description
+* All the parameters in vps are currently hard coded
+*
+* @param[out] ps_vps
+* pointer to vps params that needs to be populated
+*
+* @param[in] ps_src_params
+* pointer to source config params; resolution, frame rate etc
+*
+* @param[in] ps_out_strm_params
+* pointer to output stream config params
+*
+* @param[in] ps_coding_params
+* pointer to coding params; to enable/disable various toolsets in pps
+*
+* @param[in] ps_config_prms
+* pointer to configuration params like bitrate, HRD buffer sizes, cu, tu sizes
+*
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_populate_vps(
+ enc_ctxt_t *ps_enc_ctxt,
+ vps_t *ps_vps,
+ ihevce_src_params_t *ps_src_params,
+ ihevce_out_strm_params_t *ps_out_strm_params,
+ ihevce_coding_params_t *ps_coding_params,
+ ihevce_config_prms_t *ps_config_prms,
+ ihevce_static_cfg_params_t *ps_stat_cfg_prms,
+ WORD32 i4_resolution_id)
+{
+ WORD8 *pi1_profile_compatiblity_flags;
+ WORD32 i;
+ WORD32 i4_field_pic = ps_src_params->i4_field_pic;
+ WORD32 i4_codec_level_index;
+ ps_vps->i1_vps_id = DEFAULT_VPS_ID;
+
+ (void)ps_config_prms;
+ /* default sub layers is 1 */
+ ps_vps->i1_vps_max_sub_layers = 1;
+ if(1 == ps_stat_cfg_prms->s_tgt_lyr_prms.i4_enable_temporal_scalability)
+ {
+ ps_vps->i1_vps_max_sub_layers = 2;
+ }
+
+ for(i = 0; i < ps_vps->i1_vps_max_sub_layers; i++)
+ {
+ /* currently bit rate and pic rate signalling is disabled */
+ ps_vps->ai1_bit_rate_info_present_flag[i] = 0;
+ ps_vps->ai1_pic_rate_info_present_flag[i] = 0;
+
+ if(ps_vps->ai1_bit_rate_info_present_flag[i])
+ {
+ /* TODO: Add support for bitrate and max bitrate */
+ ps_vps->au2_avg_bit_rate[i] = 0;
+ ps_vps->au2_max_bit_rate[i] = 0;
+ }
+
+ if(ps_vps->ai1_pic_rate_info_present_flag[i])
+ {
+ /* TODO: Add support for pic rate idc and avg pic rate */
+ }
+ }
+
+ /* default sub layer ordering info present flag */
+ ps_vps->i1_sub_layer_ordering_info_present_flag = VPS_SUB_LAYER_ORDERING_INFO_ABSENT;
+
+ /* hrd and temporal id nesting not supported for now */
+ ps_vps->i1_vps_num_hrd_parameters = 0;
+
+ if(ps_vps->i1_vps_max_sub_layers == 1)
+ {
+ ps_vps->i1_vps_temporal_id_nesting_flag = 1;
+ }
+ else
+ {
+ ps_vps->i1_vps_temporal_id_nesting_flag = 0;
+ }
+
+ /* populate the general profile, tier and level information */
+ ps_vps->s_ptl.s_ptl_gen.i1_profile_space = 0; // BLU_RAY specific change is default
+
+ /* set the profile according to user input */
+ ps_vps->s_ptl.s_ptl_gen.i1_profile_idc = ps_out_strm_params->i4_codec_profile;
+
+ /***************************************************************/
+ /* set the profile compatibility flag for current profile to 1 */
+ /* the rest of the flags are set to 0 */
+ /***************************************************************/
+ pi1_profile_compatiblity_flags = &ps_vps->s_ptl.s_ptl_gen.ai1_profile_compatibility_flag[0];
+
+ for(i = 0; i < ps_vps->i1_vps_max_sub_layers; i++) //TEMPORALA_SCALABILITY CHANGES
+ {
+ ps_vps->ai1_vps_max_dec_pic_buffering[i] =
+ ps_coding_params->i4_max_reference_frames + (2 << i4_field_pic) - 1;
+
+ ps_vps->ai1_vps_max_num_reorder_pics[i] = ps_coding_params->i4_max_temporal_layers
+ << i4_field_pic;
+
+ ps_vps->ai1_vps_max_latency_increase[i] = 0;
+
+ ps_vps->s_ptl.ai1_sub_layer_level_present_flag[i] = 1; //TEMPORALA_SCALABILITY CHANGES
+
+ ps_vps->s_ptl.ai1_sub_layer_profile_present_flag[i] = 0; //TEMPORALA_SCALABILITY CHANGES
+
+ ps_vps->s_ptl.as_ptl_sub[i].i1_profile_space = 0; // BLU_RAY specific change is default
+
+ ps_vps->s_ptl.as_ptl_sub[i].i1_profile_idc = ps_out_strm_params->i4_codec_profile;
+
+ memset(
+ ps_vps->s_ptl.as_ptl_sub[i].ai1_profile_compatibility_flag,
+ 0,
+ MAX_PROFILE_COMPATBLTY * sizeof(WORD8));
+
+ ps_vps->s_ptl.as_ptl_sub[i]
+ .ai1_profile_compatibility_flag[ps_out_strm_params->i4_codec_profile] = 1;
+
+ ps_vps->s_ptl.as_ptl_sub[i].u1_level_idc =
+ ps_stat_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_codec_level;
+
+ if(0 == i) // Only one level temporal scalability suport has been added.
+ {
+ i4_codec_level_index = ihevce_get_level_index(
+ ps_stat_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_codec_level);
+
+ if(i4_codec_level_index)
+ i4_codec_level_index -= 1;
+
+ ps_vps->s_ptl.as_ptl_sub[i].u1_level_idc =
+ (WORD32)g_as_level_data[i4_codec_level_index].e_level;
+ }
+
+ ps_vps->s_ptl.as_ptl_sub[i].i1_tier_flag = ps_out_strm_params->i4_codec_tier;
+
+ if(ps_src_params->i4_field_pic == IV_PROGRESSIVE)
+ {
+ ps_vps->s_ptl.as_ptl_sub[i].i1_general_progressive_source_flag = 1;
+
+ ps_vps->s_ptl.as_ptl_sub[i].i1_general_interlaced_source_flag = 0;
+ }
+ else if(ps_src_params->i4_field_pic == IV_INTERLACED)
+ {
+ ps_vps->s_ptl.as_ptl_sub[i].i1_general_progressive_source_flag = 0;
+
+ ps_vps->s_ptl.as_ptl_sub[i].i1_general_interlaced_source_flag = 1;
+ }
+ else if(ps_src_params->i4_field_pic == IV_CONTENTTYPE_NA)
+ {
+ ps_vps->s_ptl.as_ptl_sub[i].i1_general_progressive_source_flag = 0;
+
+ ps_vps->s_ptl.as_ptl_sub[i].i1_general_interlaced_source_flag = 0;
+ }
+
+ ps_vps->s_ptl.as_ptl_sub[i].i1_general_non_packed_constraint_flag =
+ DEFAULT_NON_PACKED_CONSTRAINT_FLAG;
+
+ if(ps_enc_ctxt->i4_blu_ray_spec == 1)
+ {
+ ps_vps->s_ptl.as_ptl_sub[i].i1_frame_only_constraint_flag = 1;
+ }
+ else
+ {
+ ps_vps->s_ptl.as_ptl_sub[i].i1_frame_only_constraint_flag =
+ DEFAULT_FRAME_ONLY_CONSTRAINT_FLAG;
+ }
+ }
+
+ memset(pi1_profile_compatiblity_flags, 0, MAX_PROFILE_COMPATBLTY);
+ pi1_profile_compatiblity_flags[ps_out_strm_params->i4_codec_profile] = 1;
+
+ /* set the level idc according to user input */
+ ps_vps->s_ptl.s_ptl_gen.u1_level_idc =
+ ps_stat_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_codec_level;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_tier_flag = ps_out_strm_params->i4_codec_tier;
+
+ if(ps_src_params->i4_field_pic == IV_PROGRESSIVE)
+ {
+ ps_vps->s_ptl.s_ptl_gen.i1_general_progressive_source_flag = 1;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_interlaced_source_flag = 0;
+ }
+ else if(ps_src_params->i4_field_pic == IV_INTERLACED)
+ {
+ ps_vps->s_ptl.s_ptl_gen.i1_general_progressive_source_flag = 0;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_interlaced_source_flag = 1;
+ }
+ else if(ps_src_params->i4_field_pic == IV_CONTENTTYPE_NA)
+ {
+ ps_vps->s_ptl.s_ptl_gen.i1_general_progressive_source_flag = 0;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_interlaced_source_flag = 0;
+ }
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_non_packed_constraint_flag =
+ DEFAULT_NON_PACKED_CONSTRAINT_FLAG;
+
+ if(ps_enc_ctxt->i4_blu_ray_spec == 1)
+ {
+ ps_vps->s_ptl.s_ptl_gen.i1_frame_only_constraint_flag = 1;
+ }
+ else
+ {
+ ps_vps->s_ptl.s_ptl_gen.i1_frame_only_constraint_flag = DEFAULT_FRAME_ONLY_CONSTRAINT_FLAG;
+ }
+ if((ps_out_strm_params->i4_codec_profile == 4) &&
+ (ps_src_params->i4_chr_format == IV_YUV_420SP_UV))
+ {
+ ps_vps->s_ptl.s_ptl_gen.i1_general_max_12bit_constraint_flag = 1;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_max_10bit_constraint_flag = 0;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_max_8bit_constraint_flag = 0;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_max_422chroma_constraint_flag = 1;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_max_420chroma_constraint_flag = 1;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_max_monochrome_constraint_flag = 0;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_intra_constraint_flag = 0;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_one_picture_only_constraint_flag = 0;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_lower_bit_rate_constraint_flag = 1;
+ }
+ else if(
+ (ps_out_strm_params->i4_codec_profile == 4) &&
+ (ps_src_params->i4_chr_format == IV_YUV_422SP_UV))
+ {
+ ps_vps->s_ptl.s_ptl_gen.i1_general_max_12bit_constraint_flag = 1;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_max_10bit_constraint_flag = 0;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_max_8bit_constraint_flag = 0;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_max_422chroma_constraint_flag = 1;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_max_420chroma_constraint_flag = 0;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_max_monochrome_constraint_flag = 0;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_intra_constraint_flag = 0;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_one_picture_only_constraint_flag = 0;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_lower_bit_rate_constraint_flag = 1;
+ }
+ else
+ {
+ ps_vps->s_ptl.s_ptl_gen.i1_general_max_12bit_constraint_flag = 0;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_max_10bit_constraint_flag = 0;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_max_8bit_constraint_flag = 0;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_max_422chroma_constraint_flag = 0;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_max_420chroma_constraint_flag = 0;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_max_monochrome_constraint_flag = 0;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_intra_constraint_flag = 0;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_one_picture_only_constraint_flag = 0;
+
+ ps_vps->s_ptl.s_ptl_gen.i1_general_lower_bit_rate_constraint_flag = 0;
+ }
+
+ ps_vps->i1_vps_max_nuh_reserved_zero_layer_id = 0;
+
+ return IHEVCE_SUCCESS;
+}
+
+/**
+******************************************************************************
+*
+* @brief Populates sps structure
+*
+* @par Description
+* Populates sps structure for its use in header generation
+*
+* @param[out] ps_sps
+* pointer to sps params that needs to be populated
+*
+* @param[in] ps_vps
+* pointer to vps params referred by the sps
+*
+* @param[in] ps_src_params
+* pointer to source config params; resolution, frame rate etc
+*
+* @param[in] ps_out_strm_params
+* pointer to output stream config params
+*
+* @param[in] ps_coding_params
+* pointer to coding params; to enable/disable various toolsets in pps
+*
+* @param[in] ps_config_prms
+* pointer to configuration params like bitrate, HRD buffer sizes, cu, tu sizes
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_populate_sps(
+ enc_ctxt_t *ps_enc_ctxt,
+ sps_t *ps_sps,
+ vps_t *ps_vps,
+ ihevce_src_params_t *ps_src_params,
+ ihevce_out_strm_params_t *ps_out_strm_params,
+ ihevce_coding_params_t *ps_coding_params,
+ ihevce_config_prms_t *ps_config_prms,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ ihevce_static_cfg_params_t *ps_stat_cfg_prms,
+ WORD32 i4_resolution_id)
+{
+ WORD32 i;
+ WORD32 i4_field_pic = ps_src_params->i4_field_pic;
+ WORD32 i4_quality_preset =
+ ps_stat_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
+ WORD32 i4_codec_level_index;
+
+ if(i4_quality_preset == IHEVCE_QUALITY_P7)
+ {
+ i4_quality_preset = IHEVCE_QUALITY_P6;
+ }
+
+ ps_sps->i1_sps_id = DEFAULT_SPS_ID;
+
+ if(1 == ps_stat_cfg_prms->s_tgt_lyr_prms.i4_mres_single_out)
+ {
+ ps_sps->i1_sps_id = i4_resolution_id;
+ }
+
+ ps_sps->i1_vps_id = ps_vps->i1_vps_id;
+
+ ps_sps->i2_pic_height_in_luma_samples = ps_frm_ctb_prms->i4_cu_aligned_pic_ht;
+
+ ps_sps->i2_pic_width_in_luma_samples = ps_frm_ctb_prms->i4_cu_aligned_pic_wd;
+
+ ps_sps->i1_amp_enabled_flag = AMP_ENABLED;
+
+ ps_sps->i1_chroma_format_idc = (ps_src_params->i4_chr_format == IV_YUV_422SP_UV) ? 2 : 1;
+
+ ps_sps->i1_separate_colour_plane_flag = 0;
+
+ ps_sps->i1_bit_depth_luma_minus8 = ps_stat_cfg_prms->s_tgt_lyr_prms.i4_internal_bit_depth - 8;
+
+ ps_sps->i1_bit_depth_chroma_minus8 = ps_stat_cfg_prms->s_tgt_lyr_prms.i4_internal_bit_depth - 8;
+
+ ps_sps->i1_log2_min_coding_block_size = ps_config_prms->i4_min_log2_cu_size;
+
+ ps_sps->i1_log2_diff_max_min_coding_block_size =
+ ps_config_prms->i4_max_log2_cu_size - ps_config_prms->i4_min_log2_cu_size;
+
+ ps_sps->i1_log2_ctb_size =
+ ps_sps->i1_log2_min_coding_block_size + ps_sps->i1_log2_diff_max_min_coding_block_size;
+
+ ps_sps->i1_log2_diff_max_min_transform_block_size =
+ ps_config_prms->i4_max_log2_tu_size - ps_config_prms->i4_min_log2_tu_size;
+
+ ps_sps->i1_log2_min_transform_block_size = ps_config_prms->i4_min_log2_tu_size;
+
+ ps_sps->i1_long_term_ref_pics_present_flag = LONG_TERM_REF_PICS_ABSENT;
+
+ ps_sps->i1_max_transform_hierarchy_depth_inter = ps_config_prms->i4_max_tr_tree_depth_nI;
+
+ ps_sps->i1_max_transform_hierarchy_depth_intra = ps_config_prms->i4_max_tr_tree_depth_I;
+
+ ps_sps->i1_pcm_enabled_flag = PCM_DISABLED;
+
+ ps_sps->i1_pcm_loop_filter_disable_flag = PCM_LOOP_FILTER_DISABLED;
+
+ ps_sps->i1_pic_cropping_flag = !!ps_coding_params->i4_cropping_mode;
+
+ if(i4_quality_preset < IHEVCE_QUALITY_P4)
+ {
+ /*** Enable SAO for PQ,HQ,MS presets **/
+ ps_sps->i1_sample_adaptive_offset_enabled_flag = SAO_ENABLED;
+ }
+ else
+ {
+ ps_sps->i1_sample_adaptive_offset_enabled_flag = SAO_DISABLED;
+ }
+#if DISABLE_SAO
+ ps_sps->i1_sample_adaptive_offset_enabled_flag = SAO_DISABLED;
+#endif
+
+ if(ps_coding_params->i4_use_default_sc_mtx == 1)
+ {
+ ps_sps->i1_scaling_list_enable_flag = SCALING_LIST_ENABLED;
+ }
+ else
+ {
+ ps_sps->i1_scaling_list_enable_flag = SCALING_LIST_DISABLED;
+ }
+
+ ps_sps->i1_sps_max_sub_layers = DEFAULT_SPS_MAX_SUB_LAYERS;
+
+ if(1 == ps_stat_cfg_prms->s_tgt_lyr_prms.i4_enable_temporal_scalability)
+ {
+ ps_sps->i1_sps_max_sub_layers = DEFAULT_SPS_MAX_SUB_LAYERS + 1;
+ }
+
+ ps_sps->i1_sps_sub_layer_ordering_info_present_flag = SPS_SUB_LAYER_ORDERING_INFO_ABSENT;
+
+ ps_sps->i1_sps_scaling_list_data_present_flag = SCALING_LIST_DATA_ABSENT;
+
+ if(ps_sps->i1_sps_max_sub_layers == 1)
+ {
+ ps_sps->i1_sps_temporal_id_nesting_flag = 1; //NO_SPS_TEMPORAL_ID_NESTING_DONE;
+ }
+ else
+ {
+ ps_sps->i1_sps_temporal_id_nesting_flag = 0; //NO_SPS_TEMPORAL_ID_NESTING_DONE;
+ }
+
+ /* short term and long term ref pic set not signalled in sps */
+ ps_sps->i1_num_short_term_ref_pic_sets = 0;
+ ps_sps->i1_long_term_ref_pics_present_flag = 0;
+
+ ps_sps->i1_num_long_term_ref_pics_sps = 0;
+ ps_sps->i1_sps_temporal_mvp_enable_flag = !DISABLE_TMVP;
+
+ ps_sps->i1_strong_intra_smoothing_enable_flag = STRONG_INTRA_SMOOTHING_FLAG_ENABLE;
+
+ ps_sps->i1_vui_parameters_present_flag = ps_out_strm_params->i4_vui_enable;
+
+ /*required in generation of slice header*/
+ ps_sps->i2_pic_ht_in_ctb = ps_frm_ctb_prms->i4_num_ctbs_vert;
+
+ ps_sps->i2_pic_wd_in_ctb = ps_frm_ctb_prms->i4_num_ctbs_horz;
+
+ ps_sps->i1_log2_max_pic_order_cnt_lsb = DEFAULT_LOG2_MAX_POC_LSB;
+
+ if(ps_sps->i1_pic_cropping_flag)
+ {
+ WORD32 num_rows_to_pad_bottom =
+ ps_sps->i2_pic_height_in_luma_samples - ps_stat_cfg_prms->s_src_prms.i4_orig_height;
+ WORD32 num_rows_to_pad_right =
+ ps_sps->i2_pic_width_in_luma_samples - ps_stat_cfg_prms->s_src_prms.i4_orig_width;
+
+ ps_sps->i2_pic_crop_top_offset = DEFAULT_PIC_CROP_TOP_OFFSET;
+
+ ps_sps->i2_pic_crop_left_offset = DEFAULT_PIC_CROP_LEFT_OFFSET;
+
+ /* picture offsets should be signalled in terms of chroma unit */
+ ps_sps->i2_pic_crop_bottom_offset = num_rows_to_pad_bottom >> 1;
+
+ /* picture offsets should be signalled in terms of chroma unit */
+ ps_sps->i2_pic_crop_right_offset = num_rows_to_pad_right >> 1;
+ }
+
+ for(i = 0; i < (ps_sps->i1_sps_max_sub_layers); i++)
+ {
+ ps_sps->ai1_sps_max_dec_pic_buffering[i] =
+ ps_coding_params->i4_max_reference_frames + (2 << i4_field_pic) - 1;
+
+ ps_sps->ai1_sps_max_num_reorder_pics[i] = ps_coding_params->i4_max_temporal_layers
+ << i4_field_pic;
+
+ ps_sps->ai1_sps_max_latency_increase[i] = 0;
+
+ ps_sps->s_ptl.ai1_sub_layer_level_present_flag[i] = 1; //TEMPORALA_SCALABILITY CHANGES
+
+ ps_sps->s_ptl.ai1_sub_layer_profile_present_flag[i] = 0; //TEMPORALA_SCALABILITY CHANGES
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_profile_space = 0; // BLU_RAY specific change is default
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_profile_idc = ps_out_strm_params->i4_codec_profile;
+
+ memset(
+ ps_sps->s_ptl.as_ptl_sub[i].ai1_profile_compatibility_flag,
+ 0,
+ MAX_PROFILE_COMPATBLTY * sizeof(WORD8));
+
+ ps_sps->s_ptl.as_ptl_sub[i]
+ .ai1_profile_compatibility_flag[ps_out_strm_params->i4_codec_profile] = 1;
+
+ ps_sps->s_ptl.as_ptl_sub[i].u1_level_idc =
+ ps_stat_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_codec_level;
+
+ if(0 == i) // Only one level temporal scalability suport has been added.
+ {
+ i4_codec_level_index = ihevce_get_level_index(
+ ps_stat_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_codec_level);
+
+ if(i4_codec_level_index)
+ i4_codec_level_index -= 1;
+
+ ps_sps->s_ptl.as_ptl_sub[i].u1_level_idc =
+ (WORD32)g_as_level_data[i4_codec_level_index].e_level;
+ }
+ ps_sps->s_ptl.as_ptl_sub[i].i1_tier_flag = ps_out_strm_params->i4_codec_tier;
+
+ if(ps_src_params->i4_field_pic == IV_PROGRESSIVE)
+ {
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_progressive_source_flag = 1;
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_interlaced_source_flag = 0;
+ }
+ else if(ps_src_params->i4_field_pic == IV_INTERLACED)
+ {
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_progressive_source_flag = 0;
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_interlaced_source_flag = 1;
+ }
+ else if(ps_src_params->i4_field_pic == IV_CONTENTTYPE_NA)
+ {
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_progressive_source_flag = 0;
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_interlaced_source_flag = 0;
+ }
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_non_packed_constraint_flag =
+ DEFAULT_NON_PACKED_CONSTRAINT_FLAG;
+
+ if(ps_enc_ctxt->i4_blu_ray_spec == 1)
+ {
+ ps_sps->s_ptl.as_ptl_sub[i].i1_frame_only_constraint_flag = 1;
+ }
+ else
+ {
+ ps_sps->s_ptl.as_ptl_sub[i].i1_frame_only_constraint_flag =
+ DEFAULT_FRAME_ONLY_CONSTRAINT_FLAG;
+ }
+ if((ps_out_strm_params->i4_codec_profile == 4) && (ps_sps->i1_chroma_format_idc == 1))
+ {
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_max_12bit_constraint_flag = 1;
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_max_10bit_constraint_flag = 0;
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_max_8bit_constraint_flag = 0;
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_max_422chroma_constraint_flag = 1;
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_max_420chroma_constraint_flag = 1;
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_max_monochrome_constraint_flag = 0;
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_intra_constraint_flag = 0;
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_one_picture_only_constraint_flag = 0;
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_lower_bit_rate_constraint_flag = 1;
+ }
+ else if((ps_out_strm_params->i4_codec_profile == 4) && (ps_sps->i1_chroma_format_idc == 2))
+ {
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_max_12bit_constraint_flag = 1;
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_max_10bit_constraint_flag = 0;
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_max_8bit_constraint_flag = 0;
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_max_422chroma_constraint_flag = 1;
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_max_420chroma_constraint_flag = 0;
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_max_monochrome_constraint_flag = 0;
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_intra_constraint_flag = 0;
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_one_picture_only_constraint_flag = 0;
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_lower_bit_rate_constraint_flag = 1;
+ }
+ else
+ {
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_max_12bit_constraint_flag = 0;
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_max_10bit_constraint_flag = 0;
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_max_8bit_constraint_flag = 0;
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_max_422chroma_constraint_flag = 0;
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_max_420chroma_constraint_flag = 0;
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_max_monochrome_constraint_flag = 0;
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_intra_constraint_flag = 0;
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_one_picture_only_constraint_flag = 0;
+
+ ps_sps->s_ptl.as_ptl_sub[i].i1_general_lower_bit_rate_constraint_flag = 0;
+ }
+ }
+
+ memset(
+ ps_sps->s_ptl.s_ptl_gen.ai1_profile_compatibility_flag,
+ 0,
+ MAX_PROFILE_COMPATBLTY * sizeof(WORD8));
+
+ /* populate the general profile, tier and level information */
+ ps_sps->s_ptl.s_ptl_gen.i1_profile_space = 0; // BLU_RAY specific change is default
+
+ ps_sps->s_ptl.s_ptl_gen.i1_profile_idc = ps_out_strm_params->i4_codec_profile;
+
+ ps_sps->s_ptl.s_ptl_gen.ai1_profile_compatibility_flag[ps_out_strm_params->i4_codec_profile] =
+ 1;
+
+ ps_sps->s_ptl.s_ptl_gen.u1_level_idc =
+ ps_stat_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_codec_level;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_tier_flag = ps_out_strm_params->i4_codec_tier;
+
+ if(ps_src_params->i4_field_pic == IV_PROGRESSIVE)
+ {
+ ps_sps->s_ptl.s_ptl_gen.i1_general_progressive_source_flag = 1;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_interlaced_source_flag = 0;
+ }
+ else if(ps_src_params->i4_field_pic == IV_INTERLACED)
+ {
+ ps_sps->s_ptl.s_ptl_gen.i1_general_progressive_source_flag = 0;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_interlaced_source_flag = 1;
+ }
+ else if(ps_src_params->i4_field_pic == IV_CONTENTTYPE_NA)
+ {
+ ps_sps->s_ptl.s_ptl_gen.i1_general_progressive_source_flag = 0;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_interlaced_source_flag = 0;
+ }
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_non_packed_constraint_flag =
+ DEFAULT_NON_PACKED_CONSTRAINT_FLAG;
+
+ if(ps_enc_ctxt->i4_blu_ray_spec == 1)
+ {
+ ps_sps->s_ptl.s_ptl_gen.i1_frame_only_constraint_flag = 1;
+ }
+ else
+ {
+ ps_sps->s_ptl.s_ptl_gen.i1_frame_only_constraint_flag = DEFAULT_FRAME_ONLY_CONSTRAINT_FLAG;
+ }
+ if((ps_out_strm_params->i4_codec_profile == 4) && (ps_sps->i1_chroma_format_idc == 1))
+ {
+ ps_sps->s_ptl.s_ptl_gen.i1_general_max_12bit_constraint_flag = 1;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_max_10bit_constraint_flag = 0;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_max_8bit_constraint_flag = 0;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_max_422chroma_constraint_flag = 1;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_max_420chroma_constraint_flag = 1;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_max_monochrome_constraint_flag = 0;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_intra_constraint_flag = 0;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_one_picture_only_constraint_flag = 0;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_lower_bit_rate_constraint_flag = 1;
+ }
+ else if((ps_out_strm_params->i4_codec_profile == 4) && (ps_sps->i1_chroma_format_idc == 2))
+ {
+ ps_sps->s_ptl.s_ptl_gen.i1_general_max_12bit_constraint_flag = 1;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_max_10bit_constraint_flag = 0;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_max_8bit_constraint_flag = 0;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_max_422chroma_constraint_flag = 1;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_max_420chroma_constraint_flag = 0;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_max_monochrome_constraint_flag = 0;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_intra_constraint_flag = 0;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_one_picture_only_constraint_flag = 0;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_lower_bit_rate_constraint_flag = 1;
+ }
+ else
+ {
+ ps_sps->s_ptl.s_ptl_gen.i1_general_max_12bit_constraint_flag = 0;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_max_10bit_constraint_flag = 0;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_max_8bit_constraint_flag = 0;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_max_422chroma_constraint_flag = 0;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_max_420chroma_constraint_flag = 0;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_max_monochrome_constraint_flag = 0;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_intra_constraint_flag = 0;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_one_picture_only_constraint_flag = 0;
+
+ ps_sps->s_ptl.s_ptl_gen.i1_general_lower_bit_rate_constraint_flag = 0;
+ }
+
+ return IHEVCE_SUCCESS;
+}
+
+/**
+******************************************************************************
+*
+* @brief Populates pps structure based on input cofiguration params
+*
+* @par Description
+* Populates pps structure for its use in header generation
+*
+* @param[out] ps_pps
+* pointer to pps params structure which needs to be populated
+*
+* @param[in] ps_sps
+* pointer to sps params refered by the pps
+*
+* @param[in] ps_src_params
+* pointer to source config params; resolution, frame rate etc
+*
+* @param[in] ps_out_strm_params
+* pointer to output stream config params
+*
+* @param[in] ps_coding_params
+* pointer to coding params; to enable/disable various toolsets in pps
+*
+* @param[in] ps_config_prms
+* pointer to configuration params like bitrate, HRD buffer sizes, cu, tu sizes
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_populate_pps(
+ pps_t *ps_pps,
+ sps_t *ps_sps,
+ ihevce_src_params_t *ps_src_params,
+ ihevce_out_strm_params_t *ps_out_strm_params,
+ ihevce_coding_params_t *ps_coding_params,
+ ihevce_config_prms_t *ps_config_prms,
+ ihevce_static_cfg_params_t *ps_stat_cfg_prms,
+ WORD32 i4_bitrate_instance_id,
+ WORD32 i4_resolution_id,
+ ihevce_tile_params_t *ps_tile_params_base,
+ WORD32 *pi4_column_width_array,
+ WORD32 *pi4_row_height_array)
+{
+ (void)ps_src_params;
+ (void)ps_out_strm_params;
+
+ ps_pps->i1_beta_offset_div2 = DEFAULT_BETA_OFFSET;
+
+ ps_pps->i1_cabac_init_present_flag = CABAC_INIT_ABSENT;
+
+ ps_pps->i1_constrained_intra_pred_flag = CONSTR_IPRED_DISABLED;
+ /*delta qp can be disabled for constant qp mode to save on qp signalling bits*/
+ ps_pps->i1_cu_qp_delta_enabled_flag = ps_config_prms->i4_cu_level_rc;
+
+ ps_pps->i1_deblocking_filter_control_present_flag = DEBLOCKING_FILTER_CONTROL_PRESENT;
+
+ ps_pps->i1_deblocking_filter_override_enabled_flag = DEBLOCKING_FILTER_OVERRIDE_DISABLED;
+
+ ps_pps->i1_pic_disable_deblocking_filter_flag = ps_coding_params->i4_deblocking_type;
+
+ if(0 != ps_stat_cfg_prms->s_slice_params.i4_slice_segment_mode)
+ {
+ ps_pps->i1_dependent_slice_enabled_flag = DEPENDENT_SLICE_ENABLED;
+ }
+ else
+ {
+ ps_pps->i1_dependent_slice_enabled_flag = DEPENDENT_SLICE_DISABLED;
+ }
+
+ /* Assign the diff_cu_qp_delta_depth with 3,2,1 for making
+ CU_LEVEL_QP_MODULATION limited to 8x8, 16x16, 32x32 respectively : Lokesh */
+ ps_pps->i1_diff_cu_qp_delta_depth = CU_LEVEL_QP_LIMIT_8x8;
+
+ if(1 == ps_coding_params->i4_enable_entropy_sync)
+ {
+ ps_pps->i1_entropy_coding_sync_enabled_flag = ENTROPY_CODING_SYNC_ENABLED;
+ }
+ else
+ {
+ ps_pps->i1_entropy_coding_sync_enabled_flag = ENTROPY_CODING_SYNC_DISABLED;
+ }
+
+ ps_pps->i1_entropy_slice_enabled_flag = ENTROPY_SLICE_DISABLED;
+
+ ps_pps->i1_lists_modification_present_flag = ps_coding_params->i4_weighted_pred_enable;
+
+ ps_pps->i1_log2_parallel_merge_level = DEFAULT_PARALLEL_MERGE_LEVEL;
+
+ ps_pps->i1_num_extra_slice_header_bits = 0;
+
+ /* SAO_note_01: Currently SAO is implemented is such a way that the
+ loop-filter has to be enabled across syntatical-tiles and slices.
+ Search for <SAO_note_01> in workspace to know more */
+ ps_pps->i1_loop_filter_across_slices_enabled_flag = LF_ACROSS_SLICES_ENABLED;
+
+ ps_pps->i1_num_ref_idx_l0_default_active = DEFAULT_NUM_REF_IDX_L0_DEFAULT_ACTIVE;
+
+ ps_pps->i1_num_ref_idx_l1_default_active = DEFAULT_NUM_REF_IDX_L1_DEFAULT_ACTIVE;
+
+ if(0 == ps_tile_params_base->i4_tiles_enabled_flag)
+ {
+ ps_pps->i1_num_tile_columns = NUM_TILES_COLS;
+
+ ps_pps->i1_num_tile_rows = NUM_TILES_ROWS;
+
+ ps_pps->i1_tiles_enabled_flag = TILES_DISABLED;
+
+ ps_pps->i1_uniform_spacing_flag = SPACING_IS_UNIFORM;
+ }
+ else
+ {
+ ps_pps->i1_num_tile_columns = ps_tile_params_base->i4_num_tile_cols;
+
+ ps_pps->i1_num_tile_rows = ps_tile_params_base->i4_num_tile_rows;
+
+ ps_pps->i1_tiles_enabled_flag = TILES_ENABLED;
+
+ ps_pps->i1_uniform_spacing_flag = ps_tile_params_base->i4_uniform_spacing_flag;
+
+ if(SPACING_IS_NONUNIFORM == ps_pps->i1_uniform_spacing_flag)
+ {
+ WORD32 i4_i;
+ for(i4_i = 0; i4_i < ps_tile_params_base->i4_num_tile_cols; i4_i++)
+ {
+ ps_pps->ps_tile[i4_i].u2_wd = pi4_column_width_array[i4_i] >>
+ ps_config_prms->i4_max_log2_cu_size;
+ }
+ for(i4_i = 0; i4_i < ps_tile_params_base->i4_num_tile_rows; i4_i++)
+ {
+ ps_pps->ps_tile[i4_i].u2_ht = pi4_row_height_array[i4_i] >>
+ ps_config_prms->i4_max_log2_cu_size;
+ }
+ }
+ }
+
+ /* SAO_note_01: Currently SAO is implemented is such a way that the
+ loop-filter has to be enabled across syntatical-tiles and slices.
+ Search for <SAO_note_01> in workspace to know more */
+ if(0 == ps_tile_params_base->i4_tiles_enabled_flag)
+ {
+ ps_pps->i1_loop_filter_across_tiles_enabled_flag = 1;
+ }
+ else
+ {
+ ps_pps->i1_loop_filter_across_tiles_enabled_flag = 0;
+ }
+
+ ps_pps->i1_output_flag_present_flag = OUTPUT_FLAG_ABSENT;
+
+ ps_pps->i1_pic_cb_qp_offset = DEFAULT_PIC_CB_QP_OFFSET;
+
+ ps_pps->i1_pic_cr_qp_offset = DEFAULT_PIC_CR_QP_OFFSET;
+
+ /*init qp is different for each bit-rate instance */
+ ps_pps->i1_pic_init_qp = CLIP3(
+ ps_stat_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id]
+ .ai4_frame_qp[i4_bitrate_instance_id],
+ ps_config_prms->i4_min_frame_qp,
+ ps_config_prms->i4_max_frame_qp);
+
+ /* enable chroma QP offset only if stasino or psy rd is present */
+ if(((ps_coding_params->i4_vqet & (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
+ ((ps_coding_params->i4_vqet & (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)) ||
+ (ps_coding_params->i4_vqet & (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1)) ||
+ (ps_coding_params->i4_vqet & (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_2)) ||
+ (ps_coding_params->i4_vqet & (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_3)))))
+ {
+ ps_pps->i1_pic_slice_level_chroma_qp_offsets_present_flag =
+ SLICE_LEVEL_CHROMA_QP_OFFSETS_PRESENT;
+ }
+ else
+ {
+ ps_pps->i1_pic_slice_level_chroma_qp_offsets_present_flag =
+ SLICE_LEVEL_CHROMA_QP_OFFSETS_ABSENT;
+ }
+
+ ps_pps->i1_pps_id = DEFAULT_PPS_ID;
+
+ if(1 == ps_stat_cfg_prms->s_tgt_lyr_prms.i4_mres_single_out)
+ {
+ ps_pps->i1_pps_id = i4_resolution_id;
+ }
+
+ ps_pps->i1_pps_scaling_list_data_present_flag = SCALING_LIST_DATA_ABSENT;
+
+ if(ps_stat_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset <
+ IHEVCE_QUALITY_P3)
+ {
+ ps_pps->i1_sign_data_hiding_flag = SIGN_DATA_HIDDEN;
+ }
+ else if(
+ ps_stat_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset ==
+ IHEVCE_QUALITY_P3)
+ {
+ ps_pps->i1_sign_data_hiding_flag = SIGN_DATA_UNHIDDEN;
+ }
+ else
+ {
+ ps_pps->i1_sign_data_hiding_flag = SIGN_DATA_UNHIDDEN;
+ }
+
+#if DISABLE_SBH
+ ps_pps->i1_sign_data_hiding_flag = SIGN_DATA_UNHIDDEN;
+#endif
+
+ ps_pps->i1_slice_extension_present_flag = SLICE_EXTENSION_ABSENT;
+
+ ps_pps->i1_slice_header_extension_present_flag = SLICE_HEADER_EXTENSION_ABSENT;
+
+ ps_pps->i1_sps_id = ps_sps->i1_sps_id;
+
+ ps_pps->i1_tc_offset_div2 = DEFAULT_TC_OFFSET;
+
+ ps_pps->i1_transform_skip_enabled_flag = TRANSFORM_SKIP_DISABLED;
+
+ ps_pps->i1_transquant_bypass_enable_flag = TRANSFORM_BYPASS_DISABLED;
+
+ ps_pps->i1_weighted_bipred_flag = ps_coding_params->i4_weighted_pred_enable;
+
+ ps_pps->i1_weighted_pred_flag = ps_coding_params->i4_weighted_pred_enable;
+
+ return IHEVCE_SUCCESS;
+}
+
+/**
+******************************************************************************
+*
+* @brief Populates slice header structure
+*
+* @par Description
+* Populates slice header structure for its use in header generation
+*
+* @param[out] ps_slice_hdr
+* pointer to slice header structure that needs to be populated
+*
+* @param[in] ps_pps
+* pointer to pps params structure refered by the slice
+*
+* @param[in] ps_sps
+* pointer to sps params refered by the pps
+*
+* @param[in] nal_unit_type
+* nal unit type for current slice
+*
+* @param[in] slice_type
+* current slice type
+*
+* @param[in] ctb_x
+* x offset of first ctb in current slice (ctb units)
+*
+* @param[in] ctb_y
+* y offset of first ctb in current slice (ctb units)
+*
+* @param[in] poc
+* pic order count for current slice (shall be 0 for IDR pics)
+*
+* @param[in] cur_slice_qp
+* qp for the current slice
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_populate_slice_header(
+ slice_header_t *ps_slice_hdr,
+ pps_t *ps_pps,
+ sps_t *ps_sps,
+ WORD32 nal_unit_type,
+ WORD32 slice_type,
+ WORD32 ctb_x,
+ WORD32 ctb_y,
+ WORD32 poc,
+ WORD32 cur_slice_qp,
+ WORD32 max_merge_candidates,
+ WORD32 i4_rc_pass_num,
+ WORD32 i4_quality_preset,
+ WORD32 stasino_enabled)
+{
+ WORD32 i;
+ WORD32 return_status = IHEVCE_SUCCESS;
+ WORD32 RapPicFlag = (nal_unit_type >= NAL_BLA_W_LP) && (nal_unit_type <= NAL_RSV_RAP_VCL23);
+
+ WORD32 idr_pic_flag = (NAL_IDR_W_LP == nal_unit_type) || (NAL_IDR_N_LP == nal_unit_type);
+
+ WORD32 disable_deblocking_filter_flag;
+
+ (void)ctb_x;
+ (void)ctb_y;
+ /* first_slice_in_pic_flag */
+ if(i4_quality_preset == IHEVCE_QUALITY_P7)
+ {
+ i4_quality_preset = IHEVCE_QUALITY_P6;
+ }
+
+ if(RapPicFlag)
+ {
+ /* no_output_of_prior_pics_flag */ /* TODO:revisit this */
+ ps_slice_hdr->i1_no_output_of_prior_pics_flag = 0; //BLU_RAY specific already done
+ }
+
+ /* pic_parameter_set_id */
+ ps_slice_hdr->i1_pps_id = ps_pps->i1_pps_id;
+
+ {
+ /* This i1_dependent_slice_flag will further be updated in generate_slice_header() function */
+ ps_slice_hdr->i1_dependent_slice_flag = 0;
+ }
+
+ if(!ps_slice_hdr->i1_dependent_slice_flag)
+ {
+ /* slice_type */
+ ps_slice_hdr->i1_slice_type = (WORD8)slice_type;
+
+ if(ps_pps->i1_output_flag_present_flag)
+ {
+ /* pic_output_flag */ /* TODO:revisit this */
+ ps_slice_hdr->i1_pic_output_flag = 0;
+ }
+
+ /* separate colour plane flag not supported in this encoder */
+ ASSERT(0 == ps_sps->i1_separate_colour_plane_flag);
+
+ if(!idr_pic_flag)
+ {
+ WORD32 log2_max_poc_lsb = ps_sps->i1_log2_max_pic_order_cnt_lsb;
+
+ /* pic_order_cnt_lsb */
+ ps_slice_hdr->i4_pic_order_cnt_lsb = poc & ((1 << log2_max_poc_lsb) - 1);
+
+ /* short_term_ref_pic_set_sps_flag */
+ /* TODO : revisit this */
+ ps_slice_hdr->i1_short_term_ref_pic_set_sps_flag = 0;
+
+ if(!ps_slice_hdr->i1_short_term_ref_pic_set_sps_flag)
+ {
+ /* TODO: To populate short term ref pic set for this slice */
+ }
+
+ /* long term ref pic flag not supported */
+ ASSERT(0 == ps_sps->i1_long_term_ref_pics_present_flag);
+ if(ps_sps->i1_long_term_ref_pics_present_flag)
+ {
+ /* TODO : not supported */
+ }
+ }
+
+ //ASSERT(0 == ps_sps->i1_sample_adaptive_offset_enabled_flag);
+ if(ps_sps->i1_sample_adaptive_offset_enabled_flag)
+ {
+ /* slice_sao_luma_flag */
+ ps_slice_hdr->i1_slice_sao_luma_flag = 1;
+ ps_slice_hdr->i1_slice_sao_chroma_flag = 1;
+ }
+
+#if DISABLE_LUMA_SAO
+ ps_slice_hdr->i1_slice_sao_luma_flag = 0;
+#endif
+
+#if DISABLE_CHROMA_SAO
+ ps_slice_hdr->i1_slice_sao_chroma_flag = 0;
+#endif
+ if((PSLICE == ps_slice_hdr->i1_slice_type) || (BSLICE == ps_slice_hdr->i1_slice_type))
+ {
+ /* TODO: currently temporal mvp disabled, need to enable later */
+ if(1 == ps_sps->i1_sps_temporal_mvp_enable_flag)
+ {
+ ps_slice_hdr->i1_slice_temporal_mvp_enable_flag = 1;
+ }
+ else
+ {
+ ps_slice_hdr->i1_slice_temporal_mvp_enable_flag = 0;
+ }
+
+ /* num_ref_idx_active_override_flag */
+ ps_slice_hdr->i1_num_ref_idx_active_override_flag = 0;
+
+ if(ps_slice_hdr->i1_num_ref_idx_active_override_flag)
+ {
+ /* TODO revisit this*/
+ /* i1_num_ref_idx_l0_active_minus1 */
+ ps_slice_hdr->i1_num_ref_idx_l0_active = 1;
+
+ if(BSLICE == ps_slice_hdr->i1_slice_type)
+ {
+ /* i1_num_ref_idx_l1_active */
+ /* TODO revisit this*/
+ ps_slice_hdr->i1_num_ref_idx_l1_active = 1;
+ }
+ }
+
+ if(BSLICE == ps_slice_hdr->i1_slice_type)
+ {
+ /* mvd_l1_zero_flag */
+ ps_slice_hdr->i1_mvd_l1_zero_flag = 0;
+ }
+
+ {
+ /* cabac_init_flag curently set to 0 */
+ ps_slice_hdr->i1_cabac_init_flag = ps_pps->i1_cabac_init_present_flag ? 1 : 0;
+ }
+
+ if(ps_slice_hdr->i1_slice_temporal_mvp_enable_flag)
+ {
+ if(BSLICE == ps_slice_hdr->i1_slice_type)
+ {
+ /* collocated_from_l0_flag */
+ ps_slice_hdr->i1_collocated_from_l0_flag = 0;
+ }
+ else if(PSLICE == ps_slice_hdr->i1_slice_type)
+ {
+ ps_slice_hdr->i1_collocated_from_l0_flag = 1;
+ }
+
+ if((ps_slice_hdr->i1_collocated_from_l0_flag &&
+ (ps_slice_hdr->i1_num_ref_idx_l0_active > 1)) ||
+ (!ps_slice_hdr->i1_collocated_from_l0_flag &&
+ (ps_slice_hdr->i1_num_ref_idx_l1_active > 1)))
+ {
+ /* collocated_ref_idx */
+ /* TODO revisit this*/
+ ps_slice_hdr->i1_collocated_ref_idx = 0;
+ //ps_slice_hdr->i1_num_ref_idx_l1_active - 1;
+ }
+ }
+ }
+ ps_slice_hdr->i1_max_num_merge_cand = max_merge_candidates;
+
+ /* TODO : revisit this */
+ ps_slice_hdr->i1_slice_qp_delta = (WORD8)cur_slice_qp - ps_pps->i1_pic_init_qp;
+
+ if(!ps_pps->i1_pic_slice_level_chroma_qp_offsets_present_flag || !stasino_enabled)
+ {
+ /* slice_cb_qp_offset */
+ ps_slice_hdr->i1_slice_cb_qp_offset = 0;
+
+ /* slice_cr_qp_offset */
+ ps_slice_hdr->i1_slice_cr_qp_offset = 0;
+ }
+ else /* only noisy regions have lower Chroma QP rating */
+ {
+ ps_slice_hdr->i1_slice_cb_qp_offset = -2;
+ ps_slice_hdr->i1_slice_cr_qp_offset = -2;
+ }
+
+ if(ps_pps->i1_deblocking_filter_control_present_flag)
+ {
+ ps_slice_hdr->i1_deblocking_filter_override_flag = 0;
+
+ if(ps_pps->i1_deblocking_filter_override_enabled_flag)
+ {
+ /* deblocking_filter_override_flag */
+ ps_slice_hdr->i1_deblocking_filter_override_flag = 0;
+ }
+
+ if(ps_slice_hdr->i1_deblocking_filter_override_flag)
+ {
+ /* slice_disable_deblocking_filter_flag */
+ ps_slice_hdr->i1_slice_disable_deblocking_filter_flag = DISABLE_DEBLOCKING_FLAG;
+
+ if(!ps_slice_hdr->i1_slice_disable_deblocking_filter_flag)
+ {
+ /* beta_offset_div2 */
+ ps_slice_hdr->i1_beta_offset_div2 = 0;
+
+ /* tc_offset_div2 */
+ ps_slice_hdr->i1_tc_offset_div2 = 0;
+ }
+ }
+ }
+
+ disable_deblocking_filter_flag = ps_slice_hdr->i1_slice_disable_deblocking_filter_flag |
+ ps_pps->i1_pic_disable_deblocking_filter_flag;
+
+ if(ps_pps->i1_loop_filter_across_slices_enabled_flag &&
+ (ps_slice_hdr->i1_slice_sao_luma_flag || ps_slice_hdr->i1_slice_sao_chroma_flag ||
+ !disable_deblocking_filter_flag))
+ {
+ /* slice_loop_filter_across_slices_enabled_flag */
+ ps_slice_hdr->i1_slice_loop_filter_across_slices_enabled_flag = 1;
+ }
+ }
+
+ if(1 == ps_pps->i1_entropy_coding_sync_enabled_flag)
+ {
+ /* num_entry_point_offsets, same as NUM of ctb rows to enable entropy sync at start of every CTB */
+ ps_slice_hdr->i4_num_entry_point_offsets = ps_sps->i2_pic_ht_in_ctb - 1;
+
+ if(ps_slice_hdr->i4_num_entry_point_offsets > 0)
+ {
+ /* generate offset_len here */
+ /* fixing the offset lenght assuming 4kx2k is log2(w * h / num_ctb_row) = 20*/
+ ps_slice_hdr->i1_offset_len = 24;
+ }
+ }
+ else
+ {
+ ps_slice_hdr->i4_num_entry_point_offsets = 0;
+ ps_slice_hdr->i1_offset_len = 0;
+ }
+
+ /* slice_header_extension_present_flag not supported */
+ //if(ps_pps->i1_slice_header_extension_present_flag)
+ {
+ /* slice_header_extension_length */
+ ps_slice_hdr->i2_slice_header_extension_length = 0;
+
+ for(i = 0; i < ps_slice_hdr->i2_slice_header_extension_length; i++)
+ {
+ /* slice_header_extension_data_byte[i] */
+ }
+ }
+
+ /* TODO : hard coding ref pix set for now */
+ /* Need to update this once the ref pics are known from lap output */
+
+ /* NOTE */
+ /* inter ref pic prediction is too much of logic for few bit savings*/
+ /* at slice header level this is not supported by the encoder */
+ ps_slice_hdr->s_stref_picset.i1_inter_ref_pic_set_prediction_flag = 0;
+
+ /* hardcoding 1 ref pic for now ..... will be updated base on lap output */
+ ps_slice_hdr->s_stref_picset.i1_num_delta_pocs = 1;
+ ps_slice_hdr->s_stref_picset.i1_num_neg_pics = 1;
+ ps_slice_hdr->s_stref_picset.i1_num_pos_pics = 0;
+
+ memset(
+ ps_slice_hdr->s_stref_picset.ai2_delta_poc,
+ 0,
+ MAX_DPB_SIZE * sizeof(*ps_slice_hdr->s_stref_picset.ai2_delta_poc));
+ ps_slice_hdr->s_stref_picset.ai2_delta_poc[0] = 1;
+
+ return return_status;
+}
+
+/**
+******************************************************************************
+*
+* @brief Insert entry point offset
+*
+* @par Description
+* Insert entry point offset in slice header after frame processing is done 7.3.5.1
+*
+* @param[inout] ps_bitstrm
+* pointer to bitstream context for generating slice header
+*
+* @param[in] i1_nal_unit_type
+* nal unit type
+*
+* @param[in] ps_slice_hdr
+* pointer to slice header params
+*
+* @param[in] ps_pps
+* pointer to pps params referred by slice
+*
+* @param[in] ps_sps
+* pointer to sps params referred by slice
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_insert_entry_offset_slice_header(
+ bitstrm_t *ps_bitstrm,
+ slice_header_t *ps_slice_hdr,
+ pps_t *ps_pps,
+ UWORD32 u4_first_slice_start_offset)
+{
+ WORD32 i;
+ WORD32 return_status = IHEVCE_SUCCESS;
+ UWORD32 max_offset = 0, offset_len = 0, num_bytes_shift = 0;
+ /*entire slice data has to be shifted*/
+ num_bytes_shift =
+ ps_slice_hdr->pu4_entry_point_offset[ps_slice_hdr->i4_num_entry_point_offsets + 1] -
+ ps_slice_hdr->pu4_entry_point_offset[0];
+ /*generate relative offset*/
+ for(i = 0; i < ps_slice_hdr->i4_num_entry_point_offsets; i++)
+ {
+ ps_slice_hdr->pu4_entry_point_offset[i] =
+ ps_slice_hdr->pu4_entry_point_offset[i + 1] - ps_slice_hdr->pu4_entry_point_offset[i];
+ if(ps_slice_hdr->pu4_entry_point_offset[i] > (WORD32)max_offset)
+ {
+ max_offset = ps_slice_hdr->pu4_entry_point_offset[i];
+ }
+ }
+ while(1)
+ {
+ if(max_offset & 0x80000000)
+ {
+ break;
+ }
+ max_offset <<= 1;
+ offset_len++;
+ }
+ offset_len = 32 - offset_len;
+ ps_slice_hdr->i1_offset_len = offset_len;
+
+ if(ps_slice_hdr->i4_num_entry_point_offsets > 0)
+ {
+ /* offset_len_minus1 */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i1_offset_len - 1, return_status);
+ ENTROPY_TRACE("offset_len_minus1", ps_slice_hdr->i1_offset_len - 1);
+ }
+
+ for(i = 0; i < ps_slice_hdr->i4_num_entry_point_offsets; i++)
+ {
+ /* entry_point_offset[i] */
+ /* entry point offset minus1 is indicated in 10.0 */
+ PUT_BITS(
+ ps_bitstrm,
+ ps_slice_hdr->pu4_entry_point_offset[i] - 1,
+ ps_slice_hdr->i1_offset_len,
+ return_status);
+ ENTROPY_TRACE("entry_point_offset[i]", ps_slice_hdr->pu4_entry_point_offset[i]);
+ }
+
+ if(ps_pps->i1_slice_header_extension_present_flag)
+ {
+ /* slice_header_extension_length */
+ PUT_BITS_UEV(ps_bitstrm, ps_slice_hdr->i2_slice_header_extension_length, return_status);
+ ENTROPY_TRACE(
+ "slice_header_extension_length", ps_slice_hdr->i2_slice_header_extension_length);
+ /*calculate slice header extension length to fill in the gap*/
+
+ for(i = 0; i < ps_slice_hdr->i2_slice_header_extension_length; i++)
+ {
+ /* slice_header_extension_data_byte[i] */
+ PUT_BITS(ps_bitstrm, 0xFF, 8, return_status);
+ ENTROPY_TRACE("slice_header_extension_data_byte[i]", 0);
+ }
+ }
+
+ BYTE_ALIGNMENT(ps_bitstrm);
+
+ ASSERT(num_bytes_shift > 0);
+ /* copy the bitstream to point where header data has ended*/
+ memmove(
+ (UWORD8 *)(ps_bitstrm->pu1_strm_buffer + ps_bitstrm->u4_strm_buf_offset),
+ (UWORD8 *)(ps_bitstrm->pu1_strm_buffer + u4_first_slice_start_offset),
+ num_bytes_shift);
+
+ /*send feedback of actual bytes generated*/
+ ps_bitstrm->u4_strm_buf_offset += num_bytes_shift;
+
+ //ASSERT(ps_bitstrm->u4_strm_buf_offset == u4_first_slice_start_offset);
+ return return_status;
+}
diff --git a/encoder/ihevce_encode_header.h b/encoder/ihevce_encode_header.h
new file mode 100644
index 0000000..2c2f22b
--- /dev/null
+++ b/encoder/ihevce_encode_header.h
@@ -0,0 +1,157 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+*
+* @file ihevce_encode_header.h
+*
+* @brief
+* This file contains structures and interface prototypes for header encoding
+*
+* @author
+* ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_ENCODE_HEADER_H_
+#define _IHEVCE_ENCODE_HEADER_H_
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief Macro to put a code with specified number of bits into the
+ * bitstream
+******************************************************************************
+ */
+#define PUT_BITS(ps_bitstrm, code_val, code_len, ret_val) \
+ ret_val |= ihevce_put_bits((ps_bitstrm), (code_val), (code_len))
+
+/**
+******************************************************************************
+ * @brief Macro to put a code with specified number of bits into the
+ * bitstream using 0th order exponential Golomb encoding for
+ * signed numbers
+******************************************************************************
+ */
+#define PUT_BITS_UEV(ps_bitstrm, code_val, ret_val) \
+ ret_val |= ihevce_put_uev((ps_bitstrm), (code_val))
+
+/**
+******************************************************************************
+ * @brief Macro to put a code with specified number of bits into the
+ * bitstream using 0th order exponential Golomb encoding for
+ * signed numbers
+******************************************************************************
+ */
+#define PUT_BITS_SEV(ps_bitstrm, code_val, ret_val) \
+ ret_val |= ihevce_put_sev((ps_bitstrm), (code_val))
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+WORD32 ihevce_generate_nal_unit_header(
+ bitstrm_t *ps_bitstrm, WORD32 nal_unit_type, WORD32 nuh_temporal_id);
+
+WORD32 ihevce_generate_aud(bitstrm_t *ps_bitstrm, WORD32 pic_type);
+
+WORD32 ihevce_generate_eos(bitstrm_t *ps_bitstrm);
+
+WORD32 ihevce_generate_vps(bitstrm_t *ps_bitstrm, vps_t *ps_vps);
+
+WORD32 ihevce_generate_sps(bitstrm_t *ps_bitstrm, sps_t *ps_sps);
+
+WORD32 ihevce_generate_pps(bitstrm_t *ps_bitstrm, pps_t *ps_pps);
+
+WORD32 ihevce_generate_slice_header(
+ bitstrm_t *ps_bitstrm,
+ WORD8 i1_nal_unit_type,
+ slice_header_t *ps_slice_hdr,
+ pps_t *ps_pps,
+ sps_t *ps_sps,
+ bitstrm_t *ps_dup_bit_strm_ent_offset,
+ UWORD32 *pu4_first_slice_start_offset,
+ ihevce_tile_params_t *ps_tile_params,
+ WORD32 i4_next_slice_seg_x,
+ WORD32 i4_next_slice_seg_y);
+
+WORD32 ihevce_populate_vps(
+ enc_ctxt_t *ps_enc_ctxt,
+ vps_t *ps_vps,
+ ihevce_src_params_t *ps_src_params,
+ ihevce_out_strm_params_t *ps_out_strm_params,
+ ihevce_coding_params_t *ps_coding_params,
+ ihevce_config_prms_t *ps_config_prms,
+ ihevce_static_cfg_params_t *ps_stat_cfg_prms,
+ WORD32 i4_resolution_id);
+
+WORD32 ihevce_populate_sps(
+ enc_ctxt_t *ps_enc_ctxt,
+ sps_t *ps_sps,
+ vps_t *ps_vps,
+ ihevce_src_params_t *ps_src_params,
+ ihevce_out_strm_params_t *ps_out_strm_params,
+ ihevce_coding_params_t *ps_coding_params,
+ ihevce_config_prms_t *ps_config_prms,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ ihevce_static_cfg_params_t *ps_stat_cfg_prms,
+ WORD32 i4_resolution_id);
+
+WORD32 ihevce_populate_pps(
+ pps_t *ps_pps,
+ sps_t *ps_sps,
+ ihevce_src_params_t *ps_src_params,
+ ihevce_out_strm_params_t *ps_out_strm_params,
+ ihevce_coding_params_t *ps_coding_params,
+ ihevce_config_prms_t *ps_config_prms,
+ ihevce_static_cfg_params_t *ps_stat_cfg_prms,
+ WORD32 i4_bitrate_instance_id,
+ WORD32 i4_resolution_id,
+ ihevce_tile_params_t *ps_tile_params_base,
+ WORD32 *pi4_column_width_array,
+ WORD32 *pi4_row_height_array);
+
+WORD32 ihevce_populate_slice_header(
+ slice_header_t *ps_slice_hdr,
+ pps_t *ps_pps,
+ sps_t *ps_sps,
+ WORD32 nal_unit_type,
+ WORD32 slice_type,
+ WORD32 ctb_x,
+ WORD32 ctb_y,
+ WORD32 poc,
+ WORD32 cur_slice_qp,
+ WORD32 max_merge_candidates,
+ WORD32 i4_rc_pass_num,
+ WORD32 i4_quality_preset,
+ WORD32 stasino_enabled);
+
+WORD32 ihevce_insert_entry_offset_slice_header(
+ bitstrm_t *ps_bitstrm,
+ slice_header_t *ps_slice_hdr,
+ pps_t *ps_pps,
+ UWORD32 u4_first_slice_start_offset);
+
+#endif // _IHEVCE_ENCODE_HEADER_H_
diff --git a/encoder/ihevce_encode_header_sei_vui.c b/encoder/ihevce_encode_header_sei_vui.c
new file mode 100644
index 0000000..df48389
--- /dev/null
+++ b/encoder/ihevce_encode_header_sei_vui.c
@@ -0,0 +1,2729 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file ihevce_encode_header_sei_vui.c
+*
+* @brief
+* This file contains function definitions related to header vui/sei encoding
+*
+* @author
+* ittiam
+*
+* List of Functions
+* ihevce_generate_sub_layer_hrd_params()
+* ihevce_generate_hrd_params()
+* ihevce_generate_vui()
+* ihevce_put_buf_period_sei_params()
+* ihevce_put_active_parameter_set_sei_params()
+* ihevce_put_mastering_disp_col_vol_sei_params()
+* ihevce_put_mastering_disp_col_vol_sei_params()
+* ihevce_put_sei_params()
+* ihevce_put_cll_info_sei_params()
+* ihevce_put_recovery_point_sei_params()
+* ihevce_put_pic_timing_sei_params()
+* ihevce_put_hash_sei_params()
+* ihevce_put_sei_msg()
+* ihevce_generate_sei()
+* ihevce_populate_mastering_disp_col_vol_sei()
+* ihevce_populate_recovery_point_sei()
+* ihevce_populate_picture_timing_sei()
+* ihevce_populate_buffering_period_sei()
+* ihevce_populate_active_parameter_set_sei()
+* ihevce_calc_CRC()
+* ihevce_calc_checksum()
+* ihevce_populate_hash_sei()
+* ihevce_populate_vui()
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_macros.h"
+#include "ihevc_debug.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+#include "ihevc_trans_tables.h"
+#include "ihevc_trans_macros.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_error_checks.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_encode_header.h"
+#include "ihevce_encode_header_sei_vui.h"
+#include "ihevce_trace.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief Generates sub-layer hrd parameters of VUI (Video Usability Info Set)
+*
+* @par Description
+* Parse Video Usability Info as per ANNEX E.2
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] ps_hrd_params
+* pointer to structure containing sub-layer hrd params of VUI data
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_generate_sub_layer_hrd_params(
+ bitstrm_t *ps_bitstrm,
+ sub_lyr_hrd_params_t *ps_sub_lyr_hrd_params,
+ hrd_params_t *ps_hrd_params,
+ WORD32 cpb_cnt_minus1)
+{
+ WORD32 j;
+ WORD32 return_status = IHEVCE_SUCCESS;
+
+ for(j = 0; j <= cpb_cnt_minus1; j++)
+ {
+ /* au4_bit_rate_value_minus1 */
+ PUT_BITS_UEV(
+ ps_bitstrm, ps_sub_lyr_hrd_params->au4_bit_rate_value_minus1[j], return_status);
+ ENTROPY_TRACE("bit_rate_value_minus1", ps_sub_lyr_hrd_params->au4_bit_rate_value_minus1[j]);
+
+ /* au4_cpb_size_value_minus1 */
+ PUT_BITS_UEV(
+ ps_bitstrm, ps_sub_lyr_hrd_params->au4_cpb_size_value_minus1[j], return_status);
+ ENTROPY_TRACE("cpb_size_value_minus1", ps_sub_lyr_hrd_params->au4_cpb_size_value_minus1[j]);
+
+ if(ps_hrd_params->u1_sub_pic_cpb_params_present_flag)
+ {
+ /* au4_cpb_size_du_value_minus1 */
+ PUT_BITS_UEV(
+ ps_bitstrm, ps_sub_lyr_hrd_params->au4_cpb_size_du_value_minus1[j], return_status);
+ ENTROPY_TRACE(
+ "cpb_size_du_value_minus1", ps_sub_lyr_hrd_params->au4_cpb_size_du_value_minus1[j]);
+
+ /* au4_bit_rate_du_value_minus1 */
+ PUT_BITS_UEV(
+ ps_bitstrm, ps_sub_lyr_hrd_params->au4_bit_rate_du_value_minus1[j], return_status);
+ ENTROPY_TRACE(
+ "bit_rate_du_value_minus1", ps_sub_lyr_hrd_params->au4_bit_rate_du_value_minus1[j]);
+ }
+
+ /* au1_cbr_flag */
+ PUT_BITS(ps_bitstrm, ps_sub_lyr_hrd_params->au1_cbr_flag[j], 1, return_status);
+ ENTROPY_TRACE("cbr_flag", ps_sub_lyr_hrd_params->au1_cbr_flag[j]);
+ }
+ return return_status;
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates hrd parameters of VUI (Video Usability Info Set)
+*
+* @par Description
+* Parse Video Usability Info as per ANNEX E.2
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] ps_sps
+* pointer to structure containing SPS data
+*
+* @param[in] ps_hrd_params
+* pointer to structure containing hrd params of VUI data
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_generate_hrd_params(bitstrm_t *ps_bitstrm, hrd_params_t *ps_hrd_params, sps_t *ps_sps)
+{
+ WORD32 i;
+ WORD32 return_status = IHEVCE_SUCCESS;
+ UWORD8 u1_common_info_present_flag = 1;
+
+ if(u1_common_info_present_flag)
+ {
+ /* u1_nal_hrd_parameters_present_flag */
+ PUT_BITS(ps_bitstrm, ps_hrd_params->u1_nal_hrd_parameters_present_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "nal_hrd_parameters_present_flag", ps_hrd_params->u1_nal_hrd_parameters_present_flag);
+
+ /* u1_vcl_hrd_parameters_present_flag */
+ PUT_BITS(ps_bitstrm, ps_hrd_params->u1_vcl_hrd_parameters_present_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "vcl_hrd_parameters_present_flag", ps_hrd_params->u1_vcl_hrd_parameters_present_flag);
+
+ if(ps_hrd_params->u1_vcl_hrd_parameters_present_flag ||
+ ps_hrd_params->u1_nal_hrd_parameters_present_flag)
+ {
+ /* u1_sub_pic_cpb_params_present_flag */
+ PUT_BITS(
+ ps_bitstrm, ps_hrd_params->u1_sub_pic_cpb_params_present_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "sub_pic_Cpb_params_present_flag",
+ ps_hrd_params->u1_sub_pic_cpb_params_present_flag);
+
+ if(ps_hrd_params->u1_sub_pic_cpb_params_present_flag)
+ {
+ /* u1_tick_divisor_minus2 */
+ PUT_BITS(ps_bitstrm, ps_hrd_params->u1_tick_divisor_minus2, 8, return_status);
+ ENTROPY_TRACE("tick_divisor_minus2", ps_hrd_params->u1_tick_divisor_minus2);
+
+ /* u1_du_cpb_removal_delay_increment_length_minus1 */
+ PUT_BITS(
+ ps_bitstrm,
+ ps_hrd_params->u1_du_cpb_removal_delay_increment_length_minus1,
+ 5,
+ return_status);
+ ENTROPY_TRACE(
+ "du_cpb_removal_delay_increment_length_minus1",
+ ps_hrd_params->u1_du_cpb_removal_delay_increment_length_minus1);
+
+ /* u1_sub_pic_cpb_params_in_pic_timing_sei_flag */
+ PUT_BITS(
+ ps_bitstrm,
+ ps_hrd_params->u1_sub_pic_cpb_params_in_pic_timing_sei_flag,
+ 1,
+ return_status);
+ ENTROPY_TRACE(
+ "sub_pic_cpb_params_in_pic_timing_sei_flag",
+ ps_hrd_params->u1_sub_pic_cpb_params_in_pic_timing_sei_flag);
+
+ /* u1_dpb_output_delay_du_length_minus1 */
+ PUT_BITS(
+ ps_bitstrm,
+ ps_hrd_params->u1_dpb_output_delay_du_length_minus1,
+ 5,
+ return_status);
+ ENTROPY_TRACE(
+ "dpb_output_delay_du_length_minus1",
+ ps_hrd_params->u1_dpb_output_delay_du_length_minus1);
+ }
+
+ /* u4_bit_rate_scale */
+ PUT_BITS(ps_bitstrm, ps_hrd_params->u4_bit_rate_scale, 4, return_status);
+ ENTROPY_TRACE("bit_rate_scale", ps_hrd_params->u4_bit_rate_scale);
+
+ /* u4_cpb_size_scale */
+ PUT_BITS(ps_bitstrm, ps_hrd_params->u4_cpb_size_scale, 4, return_status);
+ ENTROPY_TRACE("cpb_size_scale", ps_hrd_params->u4_cpb_size_scale);
+
+ if(ps_hrd_params->u1_sub_pic_cpb_params_present_flag)
+ {
+ /* u4_cpb_size_du_scale */
+ PUT_BITS(ps_bitstrm, ps_hrd_params->u4_cpb_size_du_scale, 4, return_status);
+ ENTROPY_TRACE("cpb_size_du_scale", ps_hrd_params->u4_cpb_size_du_scale);
+ }
+
+ /* u1_initial_cpb_removal_delay_length_minus1 */
+ PUT_BITS(
+ ps_bitstrm,
+ ps_hrd_params->u1_initial_cpb_removal_delay_length_minus1,
+ 5,
+ return_status);
+ ENTROPY_TRACE(
+ "initial_cpb_removal_delay_length_minus1",
+ ps_hrd_params->u1_initial_cpb_removal_delay_length_minus1);
+
+ /* u1_au_cpb_removal_delay_length_minus1 */
+ PUT_BITS(
+ ps_bitstrm, ps_hrd_params->u1_au_cpb_removal_delay_length_minus1, 5, return_status);
+ ENTROPY_TRACE(
+ "cpb_removal_delay_length_minus1",
+ ps_hrd_params->u1_au_cpb_removal_delay_length_minus1);
+
+ /* u1_dpb_output_delay_length_minus1 */
+ PUT_BITS(
+ ps_bitstrm, ps_hrd_params->u1_dpb_output_delay_length_minus1, 5, return_status);
+ ENTROPY_TRACE(
+ "dpb_output_delay_length_minus1", ps_hrd_params->u1_dpb_output_delay_length_minus1);
+ }
+ }
+
+ for(i = 0; i < (ps_sps->i1_sps_max_sub_layers); i++)
+ {
+ /* au1_fixed_pic_rate_general_flag */
+ PUT_BITS(ps_bitstrm, ps_hrd_params->au1_fixed_pic_rate_general_flag[i], 1, return_status);
+ ENTROPY_TRACE(
+ "fixed_pic_rate_general_flag", ps_hrd_params->au1_fixed_pic_rate_general_flag[i]);
+
+ if(!ps_hrd_params->au1_fixed_pic_rate_general_flag[i])
+ {
+ /* au1_fixed_pic_rate_within_cvs_flag */
+ PUT_BITS(
+ ps_bitstrm, ps_hrd_params->au1_fixed_pic_rate_within_cvs_flag[i], 1, return_status);
+ ENTROPY_TRACE(
+ "fixed_pic_rate_within_cvs_flag",
+ ps_hrd_params->au1_fixed_pic_rate_within_cvs_flag[i]);
+ }
+
+ if(ps_hrd_params->au1_fixed_pic_rate_within_cvs_flag[i])
+ {
+ /* au2_elemental_duration_in_tc_minus1 */
+ PUT_BITS_UEV(
+ ps_bitstrm, ps_hrd_params->au2_elemental_duration_in_tc_minus1[i], return_status);
+ ENTROPY_TRACE(
+ "elemental_duration_in_tc_minus1",
+ ps_hrd_params->au2_elemental_duration_in_tc_minus1[i]);
+ }
+ else
+ {
+ /* au1_low_delay_hrd_flag */
+ PUT_BITS(ps_bitstrm, ps_hrd_params->au1_low_delay_hrd_flag[i], 1, return_status);
+ ENTROPY_TRACE("low_delay_hrd_flag", ps_hrd_params->au1_low_delay_hrd_flag[i]);
+ }
+
+ if(!ps_hrd_params->au1_low_delay_hrd_flag[i])
+ {
+ /* au1_cpb_cnt_minus1 */
+ PUT_BITS_UEV(ps_bitstrm, ps_hrd_params->au1_cpb_cnt_minus1[i], return_status);
+ ENTROPY_TRACE("cpb_cnt_minus1", ps_hrd_params->au1_cpb_cnt_minus1[i]);
+ }
+
+ if(ps_hrd_params->u1_nal_hrd_parameters_present_flag)
+ {
+ sub_lyr_hrd_params_t *ps_sub_lyr_hrd_params =
+ &ps_hrd_params->as_sub_layer_hrd_params[i];
+ return_status |= ihevce_generate_sub_layer_hrd_params(
+ ps_bitstrm,
+ ps_sub_lyr_hrd_params,
+ ps_hrd_params,
+ ps_hrd_params->au1_cpb_cnt_minus1[i]);
+ }
+
+ if(ps_hrd_params->u1_vcl_hrd_parameters_present_flag)
+ {
+ sub_lyr_hrd_params_t *ps_sub_lyr_hrd_params =
+ &ps_hrd_params->as_sub_layer_hrd_params[i];
+ return_status |= ihevce_generate_sub_layer_hrd_params(
+ ps_bitstrm,
+ ps_sub_lyr_hrd_params,
+ ps_hrd_params,
+ ps_hrd_params->au1_cpb_cnt_minus1[i]);
+ }
+ }
+
+ return return_status;
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates VUI (Video Usability Info Set)
+*
+* @par Description
+* Parse Video Usability Info as per ANNEX E.2
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] ps_sps
+* pointer to structure containing SPS data
+*
+* @param[in] ps_vui
+* pointer to structure containing VUI data
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_generate_vui(bitstrm_t *ps_bitstrm, sps_t *ps_sps, vui_t s_vui)
+{
+ WORD32 return_status = IHEVCE_SUCCESS;
+
+ /* aspect_ratio_info_present_flag */
+ PUT_BITS(ps_bitstrm, s_vui.u1_aspect_ratio_info_present_flag, 1, return_status);
+ ENTROPY_TRACE("aspect_ratio_info_present_flag", s_vui.u1_aspect_ratio_info_present_flag);
+
+ if(s_vui.u1_aspect_ratio_info_present_flag)
+ {
+ /* aspect_ratio_idc */
+ PUT_BITS(ps_bitstrm, s_vui.u1_aspect_ratio_idc, 8, return_status);
+ ENTROPY_TRACE("aspect_ratio_idc", s_vui.u1_aspect_ratio_idc);
+ if(s_vui.u1_aspect_ratio_idc == IHEVCE_EXTENDED_SAR)
+ {
+ /* SAR_width */
+ PUT_BITS(ps_bitstrm, s_vui.u2_sar_width, 16, return_status);
+ ENTROPY_TRACE("sar_width", s_vui.u2_sar_width);
+
+ /* SAR_hieght */
+ PUT_BITS(ps_bitstrm, s_vui.u2_sar_height, 16, return_status);
+ ENTROPY_TRACE("sar_height", s_vui.u2_sar_height);
+ }
+ }
+
+ /* overscan_info_present_flag */
+ PUT_BITS(ps_bitstrm, s_vui.u1_overscan_info_present_flag, 1, return_status);
+ ENTROPY_TRACE("overscan_info_present_flag", s_vui.u1_overscan_info_present_flag);
+
+ if(s_vui.u1_overscan_info_present_flag)
+ {
+ /* u1_overscan_appropriate_flag */
+ PUT_BITS(ps_bitstrm, s_vui.u1_overscan_appropriate_flag, 1, return_status);
+ ENTROPY_TRACE("overscan_appropriate_flag", s_vui.u1_overscan_appropriate_flag);
+ }
+
+ /* video_signal_type_present_flag */
+ PUT_BITS(ps_bitstrm, s_vui.u1_video_signal_type_present_flag, 1, return_status);
+ ENTROPY_TRACE("video_signal_type_present_flag", s_vui.u1_video_signal_type_present_flag);
+
+ if(s_vui.u1_video_signal_type_present_flag)
+ {
+ /* u1_video_format */
+ PUT_BITS(ps_bitstrm, s_vui.u1_video_format, 3, return_status);
+ ENTROPY_TRACE("video_format", s_vui.u1_video_format);
+
+ /* u1_video_full_range_flag */
+ PUT_BITS(ps_bitstrm, s_vui.u1_video_full_range_flag, 1, return_status);
+ ENTROPY_TRACE("video_full_range_flag", s_vui.u1_video_full_range_flag);
+
+ /* u1_colour_description_present_flag */
+ PUT_BITS(ps_bitstrm, s_vui.u1_colour_description_present_flag, 1, return_status);
+ ENTROPY_TRACE("colour_description_present_flag", s_vui.u1_colour_description_present_flag);
+
+ if(s_vui.u1_colour_description_present_flag)
+ {
+ /* u1_colour_primaries */
+ PUT_BITS(ps_bitstrm, s_vui.u1_colour_primaries, 8, return_status);
+ ENTROPY_TRACE("colour_primaries", s_vui.u1_colour_primaries);
+
+ /* u1_transfer_characteristics */
+ PUT_BITS(ps_bitstrm, s_vui.u1_transfer_characteristics, 8, return_status);
+ ENTROPY_TRACE("transfer_characteristics", s_vui.u1_transfer_characteristics);
+
+ /* u1_matrix_coefficients */
+ PUT_BITS(ps_bitstrm, s_vui.u1_matrix_coefficients, 8, return_status);
+ ENTROPY_TRACE("matrix_coefficients", s_vui.u1_matrix_coefficients);
+ }
+ }
+
+ /* u1_chroma_loc_info_present_flag */
+ PUT_BITS(ps_bitstrm, s_vui.u1_chroma_loc_info_present_flag, 1, return_status);
+ ENTROPY_TRACE("chroma_loc_info_present_flag", s_vui.u1_chroma_loc_info_present_flag);
+
+ if(s_vui.u1_chroma_loc_info_present_flag)
+ {
+ /* u1_chroma_sample_loc_type_top_field */
+ PUT_BITS_UEV(ps_bitstrm, s_vui.u1_chroma_sample_loc_type_top_field, return_status);
+ ENTROPY_TRACE(
+ "chroma_sample_loc_type_top_field", s_vui.u1_chroma_sample_loc_type_top_field);
+
+ /* u1_chroma_sample_loc_type_bottom_field */
+ PUT_BITS_UEV(ps_bitstrm, s_vui.u1_chroma_sample_loc_type_bottom_field, return_status);
+ ENTROPY_TRACE(
+ "chroma_sample_loc_type_bottom_field", s_vui.u1_chroma_sample_loc_type_bottom_field);
+ }
+
+ /* u1_neutral_chroma_indication_flag */
+ PUT_BITS(ps_bitstrm, s_vui.u1_neutral_chroma_indication_flag, 1, return_status);
+ ENTROPY_TRACE("neutral_chroma_indication_flag", s_vui.u1_neutral_chroma_indication_flag);
+
+ /* u1_field_seq_flag */
+ PUT_BITS(ps_bitstrm, s_vui.u1_field_seq_flag, 1, return_status);
+ ENTROPY_TRACE("field_seq_flag", s_vui.u1_field_seq_flag);
+
+ /* HM CURRENTLY NOT SUPPOSTED */
+ /* u1_frame_field_info_present_flag */
+ PUT_BITS(ps_bitstrm, s_vui.u1_frame_field_info_present_flag, 1, return_status);
+ ENTROPY_TRACE("frame_field_info_present_flag", s_vui.u1_frame_field_info_present_flag);
+
+ /* u1_default_display_window_flag */
+ PUT_BITS(ps_bitstrm, s_vui.u1_default_display_window_flag, 1, return_status);
+ ENTROPY_TRACE("default_display_window_flag", s_vui.u1_default_display_window_flag);
+
+ if(s_vui.u1_default_display_window_flag)
+ {
+ /* u4_def_disp_win_left_offset */
+ PUT_BITS_UEV(ps_bitstrm, s_vui.u4_def_disp_win_left_offset, return_status);
+ ENTROPY_TRACE("def_disp_win_left_offset", s_vui.u4_def_disp_win_left_offset);
+
+ /* u4_def_disp_win_right_offset */
+ PUT_BITS_UEV(ps_bitstrm, s_vui.u4_def_disp_win_right_offset, return_status);
+ ENTROPY_TRACE("def_disp_win_right_offset", s_vui.u4_def_disp_win_right_offset);
+
+ /* u4_def_disp_win_top_offset */
+ PUT_BITS_UEV(ps_bitstrm, s_vui.u4_def_disp_win_top_offset, return_status);
+ ENTROPY_TRACE("def_disp_win_top_offset", s_vui.u4_def_disp_win_top_offset);
+
+ /* u4_def_disp_win_bottom_offset */
+ PUT_BITS_UEV(ps_bitstrm, s_vui.u4_def_disp_win_bottom_offset, return_status);
+ ENTROPY_TRACE("def_disp_win_bottom_offset", s_vui.u4_def_disp_win_bottom_offset);
+ }
+
+ /* u1_vui_timing_info_present_flag */
+ PUT_BITS(ps_bitstrm, s_vui.u1_vui_timing_info_present_flag, 1, return_status);
+ ENTROPY_TRACE("vui_timing_info_present_flag", s_vui.u1_vui_timing_info_present_flag);
+
+ if(s_vui.u1_vui_timing_info_present_flag)
+ {
+ /* u4_num_units_in_tick */
+ PUT_BITS(ps_bitstrm, s_vui.u4_vui_num_units_in_tick, 32, return_status);
+ ENTROPY_TRACE("num_units_in_tick", s_vui.u4_vui_num_units_in_tick);
+
+ /* u4_time_scale */
+ PUT_BITS(ps_bitstrm, s_vui.u4_vui_time_scale, 32, return_status);
+ ENTROPY_TRACE("time_scale", s_vui.u4_vui_time_scale);
+
+ /* u1_poc_proportional_to_timing_flag */
+ PUT_BITS(ps_bitstrm, s_vui.u1_poc_proportional_to_timing_flag, 1, return_status);
+ ENTROPY_TRACE("poc_proportional_to_timing_flag", s_vui.u1_poc_proportional_to_timing_flag);
+
+ /* u1_num_ticks_poc_diff_one_minus1 */
+ if(s_vui.u1_poc_proportional_to_timing_flag)
+ {
+ PUT_BITS_UEV(ps_bitstrm, s_vui.u4_num_ticks_poc_diff_one_minus1, return_status);
+ ENTROPY_TRACE("num_ticks_poc_diff_one_minus1", s_vui.u4_num_ticks_poc_diff_one_minus1);
+ }
+
+ /* u1_vui_hrd_parameters_present_flag */
+ PUT_BITS(ps_bitstrm, s_vui.u1_vui_hrd_parameters_present_flag, 1, return_status);
+ ENTROPY_TRACE("vui_hrd_parameters_present_flag", s_vui.u1_vui_hrd_parameters_present_flag);
+
+ if(s_vui.u1_vui_hrd_parameters_present_flag)
+ {
+ return_status |=
+ ihevce_generate_hrd_params(ps_bitstrm, &(s_vui.s_vui_hrd_parameters), ps_sps);
+ }
+ }
+
+ /* u1_bitstream_restriction_flag */
+ PUT_BITS(ps_bitstrm, s_vui.u1_bitstream_restriction_flag, 1, return_status);
+ ENTROPY_TRACE("bitstream_restriction_flag", s_vui.u1_bitstream_restriction_flag);
+
+ if(s_vui.u1_bitstream_restriction_flag)
+ {
+ /* u1_tiles_fixed_structure_flag */
+ PUT_BITS(ps_bitstrm, s_vui.u1_tiles_fixed_structure_flag, 1, return_status);
+ ENTROPY_TRACE("tiles_fixed_structure_flag", s_vui.u1_tiles_fixed_structure_flag);
+
+ /* u1_motion_vectors_over_pic_boundaries_flag */
+ PUT_BITS(ps_bitstrm, s_vui.u1_motion_vectors_over_pic_boundaries_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "motion_vectors_over_pic_boundaries_flag",
+ s_vui.u1_motion_vectors_over_pic_boundaries_flag);
+
+ /* u1_restricted_ref_pic_lists_flag */
+ PUT_BITS(ps_bitstrm, s_vui.u1_restricted_ref_pic_lists_flag, 1, return_status);
+ ENTROPY_TRACE("restricted_ref_pic_lists_flag", s_vui.u1_restricted_ref_pic_lists_flag);
+
+ /* u4_min_spatial_segmentation_idc */
+ PUT_BITS_UEV(ps_bitstrm, s_vui.u4_min_spatial_segmentation_idc, return_status);
+ ENTROPY_TRACE("min_spatial_segmentation_idc", s_vui.u4_min_spatial_segmentation_idc);
+
+ /* u1_max_bytes_per_pic_denom */
+ PUT_BITS_UEV(ps_bitstrm, s_vui.u1_max_bytes_per_pic_denom, return_status);
+ ENTROPY_TRACE("max_bytes_per_pic_denom", s_vui.u1_max_bytes_per_pic_denom);
+
+ /* u1_max_bits_per_mincu_denom */
+ PUT_BITS_UEV(ps_bitstrm, s_vui.u1_max_bits_per_mincu_denom, return_status);
+ ENTROPY_TRACE("max_bits_per_mincu_denom", s_vui.u1_max_bits_per_mincu_denom);
+
+ /* u1_log2_max_mv_length_horizontal */
+ PUT_BITS_UEV(ps_bitstrm, s_vui.u1_log2_max_mv_length_horizontal, return_status);
+ ENTROPY_TRACE("log2_max_mv_length_horizontal", s_vui.u1_log2_max_mv_length_horizontal);
+
+ /* u1_log2_max_mv_length_vertical */
+ PUT_BITS_UEV(ps_bitstrm, s_vui.u1_log2_max_mv_length_vertical, return_status);
+ ENTROPY_TRACE("log2_max_mv_length_vertical", s_vui.u1_log2_max_mv_length_vertical);
+ }
+ return return_status;
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates Buffering Period (Supplemental Enhancement Information )
+*
+* @par Description
+* Parse Supplemental Enhancement Information as per Section 7.3.2.4
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] ps_pt_sei
+* pointer to structure containing buffering period SEI data
+*
+* @param[in] ps_vui_params
+* pointer to structure containing VUI data
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_put_buf_period_sei_params(
+ buf_period_sei_params_t *ps_bp_sei, vui_t *ps_vui_params, bitstrm_t *ps_bitstrm)
+{
+ UWORD32 i;
+ WORD32 return_status = IHEVCE_SUCCESS;
+ UWORD8 u1_payload_size = 0;
+ UWORD8 u1_sub_pic_cpb_params_present_flag =
+ ps_vui_params->s_vui_hrd_parameters.u1_sub_pic_cpb_params_present_flag;
+
+ {
+ //UWORD32 range;
+ //UWORD32 sps_id = ps_bp_sei->u1_bp_seq_parameter_set_id;
+
+ //range = 0;
+ //GETRANGE(range, sps_id);
+ u1_payload_size += 1; //(((range - 1) << 1) + 1);
+
+ if(!u1_sub_pic_cpb_params_present_flag)
+ u1_payload_size += 1;
+ u1_payload_size += 1;
+ u1_payload_size +=
+ ps_vui_params->s_vui_hrd_parameters.u1_au_cpb_removal_delay_length_minus1 + 1;
+ if(1 == ps_vui_params->s_vui_hrd_parameters.u1_nal_hrd_parameters_present_flag)
+ {
+ for(i = 0; i <= ps_bp_sei->u4_cpb_cnt; i++)
+ {
+ u1_payload_size += ps_bp_sei->u4_initial_cpb_removal_delay_length << 1;
+ if(u1_sub_pic_cpb_params_present_flag || ps_bp_sei->u1_rap_cpb_params_present_flag)
+ u1_payload_size += ps_bp_sei->u4_initial_cpb_removal_delay_length << 1;
+ }
+ }
+ if(1 == ps_vui_params->s_vui_hrd_parameters.u1_vcl_hrd_parameters_present_flag)
+ {
+ for(i = 0; i <= ps_bp_sei->u4_cpb_cnt; i++)
+ {
+ u1_payload_size += ps_bp_sei->u4_initial_cpb_removal_delay_length << 1;
+ if(u1_sub_pic_cpb_params_present_flag || ps_bp_sei->u1_rap_cpb_params_present_flag)
+ u1_payload_size += ps_bp_sei->u4_initial_cpb_removal_delay_length << 1;
+ }
+ }
+ }
+
+ u1_payload_size = (u1_payload_size + 7) >> 3;
+
+ /************************************************************************************************/
+ /* Calculating the cbp removal delay and cbp removal delay offset based on the */
+ /* buffer level information from Rate control */
+ /* NOTE : Buffer fullness level for Rate control is updated using Approximate */
+ /* number of bits from RDOPT stage rather than from accurate number of bits from ENTROPY coding */
+ /************************************************************************************************/
+
+ {
+ ULWORD64 u8_temp;
+ UWORD32 u4_buffer_size, u4_dbf;
+
+ u4_buffer_size = ps_bp_sei->u4_buffer_size_sei;
+ u4_dbf = ps_bp_sei->u4_dbf_sei;
+ for(i = 0; i <= ps_bp_sei->u4_cpb_cnt; i++)
+ {
+ u8_temp = ((ULWORD64)(u4_dbf)*90000);
+ u8_temp = u8_temp / ps_bp_sei->u4_target_bit_rate_sei;
+ ps_bp_sei->au4_nal_initial_cpb_removal_delay[i] = (UWORD32)u8_temp;
+
+ u8_temp = ((ULWORD64)(u4_buffer_size - u4_dbf) * 90000);
+ u8_temp = u8_temp / ps_bp_sei->u4_target_bit_rate_sei;
+ ps_bp_sei->au4_nal_initial_cpb_removal_delay_offset[i] = (UWORD32)u8_temp;
+
+ if(ps_bp_sei->au4_nal_initial_cpb_removal_delay[i] < 1)
+ ps_bp_sei->au4_nal_initial_cpb_removal_delay[i] =
+ 1; /* ps_bp_sei->au4_nal_initial_cpb_removal_delay[i] should be always greater than 0 */
+ }
+ }
+
+ /************************************************************************/
+ /* PayloadSize : This is the size of the payload in bytes */
+ /************************************************************************/
+ PUT_BITS(ps_bitstrm, u1_payload_size, 8, return_status);
+ ENTROPY_TRACE("payload_size", u1_payload_size);
+
+ /************************************************************************/
+ /* Put the buffering period SEI parameters into the bitstream. For */
+ /* details refer to section D.1.1 of the standard */
+ /************************************************************************/
+
+ /* seq_parameter_set_id */
+ PUT_BITS_UEV(ps_bitstrm, ps_bp_sei->u1_bp_seq_parameter_set_id, return_status);
+ ENTROPY_TRACE("seq_parameter_set_id", ps_bp_sei->u1_bp_seq_parameter_set_id);
+
+ //PUT_BITS(ps_bitstrm, u1_sub_pic_cpb_params_present_flag, 1, return_status);
+ //ENTROPY_TRACE("sub_pic_cpb_params_present_flag", u1_sub_pic_cpb_params_present_flag);
+
+ if(!u1_sub_pic_cpb_params_present_flag)
+ {
+ /* u1_rap_cpb_params_present_flag */
+ PUT_BITS(ps_bitstrm, ps_bp_sei->u1_rap_cpb_params_present_flag, 1, return_status);
+ ENTROPY_TRACE("rap_cpb_params_present_flag", ps_bp_sei->u1_rap_cpb_params_present_flag);
+ }
+
+ if(ps_bp_sei->u1_rap_cpb_params_present_flag)
+ {
+ PUT_BITS(
+ ps_bitstrm,
+ ps_bp_sei->u4_cpb_delay_offset,
+ (ps_vui_params->s_vui_hrd_parameters.u1_au_cpb_removal_delay_length_minus1 + 1),
+ return_status);
+ ENTROPY_TRACE("cpb_delay_offset", ps_bp_sei->cpb_delay_offset);
+
+ PUT_BITS(
+ ps_bitstrm,
+ ps_bp_sei->u4_dpb_delay_offset,
+ (ps_vui_params->s_vui_hrd_parameters.u1_dpb_output_delay_length_minus1 + 1),
+ return_status);
+ ENTROPY_TRACE("dpb_delay_offset", ps_bp_sei->dpb_delay_offset);
+ }
+
+ PUT_BITS(ps_bitstrm, ps_bp_sei->u1_concatenation_flag, 1, return_status);
+ ENTROPY_TRACE("concatenation_flag", ps_bp_sei->concatenation_flag);
+
+ PUT_BITS(
+ ps_bitstrm,
+ ps_bp_sei->u4_au_cpb_removal_delay_delta_minus1,
+ (ps_vui_params->s_vui_hrd_parameters.u1_au_cpb_removal_delay_length_minus1 + 1),
+ return_status);
+
+ ENTROPY_TRACE(
+ "au_cpb_removal_delay_delta_minus1", ps_bp_sei->au_cpb_removal_delay_delta_minus1);
+
+ if(1 == ps_vui_params->s_vui_hrd_parameters.u1_nal_hrd_parameters_present_flag)
+ {
+ for(i = 0; i <= ps_bp_sei->u4_cpb_cnt; i++)
+ {
+ PUT_BITS(
+ ps_bitstrm,
+ ps_bp_sei->au4_nal_initial_cpb_removal_delay[i],
+ ps_bp_sei->u4_initial_cpb_removal_delay_length,
+ return_status);
+ ENTROPY_TRACE(
+ "nal_initial_cpb_removal_delay", ps_bp_sei->au4_nal_initial_cpb_removal_delay[i]);
+
+ PUT_BITS(
+ ps_bitstrm,
+ ps_bp_sei->au4_nal_initial_cpb_removal_delay_offset[i],
+ ps_bp_sei->u4_initial_cpb_removal_delay_length,
+ return_status);
+ ENTROPY_TRACE(
+ "nal_initial_cpb_removal_delay_offset",
+ ps_bp_sei->au4_nal_initial_cpb_removal_delay_offset[i]);
+
+ if(u1_sub_pic_cpb_params_present_flag || ps_bp_sei->u1_rap_cpb_params_present_flag)
+ {
+ PUT_BITS(
+ ps_bitstrm,
+ ps_bp_sei->au4_nal_initial_alt_cpb_removal_delay[i],
+ ps_bp_sei->u4_initial_cpb_removal_delay_length,
+ return_status);
+ ENTROPY_TRACE(
+ "nal_initial_alt_cpb_removal_delay",
+ ps_bp_sei->au4_nal_initial_alt_cpb_removal_delay[i]);
+
+ PUT_BITS(
+ ps_bitstrm,
+ ps_bp_sei->au4_nal_initial_alt_cpb_removal_delay_offset[i],
+ ps_bp_sei->u4_initial_cpb_removal_delay_length,
+ return_status);
+ ENTROPY_TRACE(
+ "nal_initial_alt_cpb_removal_delay_offset",
+ ps_bp_sei->au4_nal_initial_alt_cpb_removal_delay_offset[i]);
+ }
+ }
+ }
+
+ if(1 == ps_vui_params->s_vui_hrd_parameters.u1_vcl_hrd_parameters_present_flag)
+ {
+ for(i = 0; i <= ps_bp_sei->u4_cpb_cnt; i++)
+ {
+ PUT_BITS(
+ ps_bitstrm,
+ ps_bp_sei->au4_vcl_initial_cpb_removal_delay[i],
+ ps_bp_sei->u4_initial_cpb_removal_delay_length,
+ return_status);
+ ENTROPY_TRACE(
+ "vcl_initial_cpb_removal_delay", ps_bp_sei->au4_vcl_initial_cpb_removal_delay[i]);
+
+ PUT_BITS(
+ ps_bitstrm,
+ ps_bp_sei->au4_vcl_initial_cpb_removal_delay_offset[i],
+ ps_bp_sei->u4_initial_cpb_removal_delay_length,
+ return_status);
+ ENTROPY_TRACE(
+ "vcl_initial_cpb_removal_delay_offset",
+ ps_bp_sei->au4_vcl_initial_cpb_removal_delay_offset[i]);
+
+ if(u1_sub_pic_cpb_params_present_flag || ps_bp_sei->u1_rap_cpb_params_present_flag)
+ {
+ PUT_BITS(
+ ps_bitstrm,
+ ps_bp_sei->au4_vcl_initial_alt_cpb_removal_delay[i],
+ ps_bp_sei->u4_initial_cpb_removal_delay_length,
+ return_status);
+ ENTROPY_TRACE(
+ "vcl_initial_alt_cpb_removal_delay",
+ ps_bp_sei->au4_vcl_initial_alt_cpb_removal_delay[i]);
+
+ PUT_BITS(
+ ps_bitstrm,
+ ps_bp_sei->au4_vcl_initial_alt_cpb_removal_delay_offset[i],
+ ps_bp_sei->u4_initial_cpb_removal_delay_length,
+ return_status);
+ ENTROPY_TRACE(
+ "vcl_initial_alt_cpb_removal_delay_offset",
+ ps_bp_sei->au4_vcl_initial_alt_cpb_removal_delay_offset[i]);
+ }
+ }
+ }
+
+ return (return_status);
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates active parameter set (Supplemental Enhancement Information )
+*
+* @par Description
+* Parse Supplemental Enhancement Information as per Section 7.3.2.4
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] ps_act_sei
+* pointer to structure containing active parameter set SEI data
+*
+* @return success or failure error code
+*
+*****************************************************************************
+*/
+WORD32 ihevce_put_active_parameter_set_sei_params(
+ active_parameter_set_sei_param_t *ps_act_sei, bitstrm_t *ps_bitstrm)
+
+{
+ UWORD8 u1_payload_size = 0, i;
+ UWORD8 u1_range = 0, u1_act_sps_id;
+ WORD32 return_status = IHEVCE_SUCCESS;
+
+ u1_payload_size += 4;
+ u1_payload_size += 1;
+ u1_payload_size += 1;
+ u1_payload_size += 1; // num_sps_ids_minus1 ahould be zero as per the standard
+
+ u1_act_sps_id = ps_act_sei->u1_active_video_parameter_set_id;
+
+ GETRANGE(u1_range, u1_act_sps_id);
+
+ u1_payload_size += (((u1_range - 1) << 1) + 1);
+
+ u1_payload_size = (u1_payload_size + 7) >> 3;
+
+ PUT_BITS(ps_bitstrm, u1_payload_size, 8, return_status);
+ ENTROPY_TRACE("payload_size", u1_payload_size);
+
+ PUT_BITS(ps_bitstrm, ps_act_sei->u1_active_video_parameter_set_id, 4, return_status);
+ ENTROPY_TRACE("active_video_parameter_set_id", ps_act_sei->u1_active_video_parameter_set_id);
+
+ PUT_BITS(ps_bitstrm, ps_act_sei->u1_self_contained_cvs_flag, 1, return_status);
+ ENTROPY_TRACE("self_contained_cvs_flag", ps_act_sei->u1_self_contained_cvs_flag);
+
+ PUT_BITS(ps_bitstrm, ps_act_sei->u1_no_parameter_set_update_flag, 1, return_status);
+ ENTROPY_TRACE("no_parameter_set_update_flag", ps_act_sei->u1_no_parameter_set_update_flag);
+
+ PUT_BITS_UEV(ps_bitstrm, ps_act_sei->u1_num_sps_ids_minus1, return_status);
+
+ ENTROPY_TRACE("num_sps_ids_minus1", ps_act_sei->u1_num_sps_ids_minus1);
+
+ for(i = 0; i <= ps_act_sei->u1_num_sps_ids_minus1; i++)
+ {
+ PUT_BITS_UEV(ps_bitstrm, ps_act_sei->au1_active_seq_parameter_set_id[i], return_status);
+ ENTROPY_TRACE(
+ "active_video_parameter_set_id", ps_act_sei->au1_active_seq_parameter_set_id[i]);
+ }
+ return (return_status);
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates Mastering Display Colour Volume (Supplemental Enhancement Information )
+*
+* @par Description
+* Parse Supplemental Enhancement Information as per Section 7.3.2.4
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] ps_rp_sei
+* pointer to structure containing recovery point SEI data
+*
+* @param[in] ps_vui_params
+* pointer to structure containing VUI data
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_put_mastering_disp_col_vol_sei_params(
+ mastering_dis_col_vol_sei_params_t *ps_mdcl_sei, bitstrm_t *ps_bitstrm)
+{
+ WORD32 return_status = IHEVCE_SUCCESS;
+ UWORD8 u1_payload_size = 0;
+ WORD32 c;
+
+ u1_payload_size += 6; /* display primaries x */
+ u1_payload_size += 6; /* display primaries y */
+ u1_payload_size += 2; /* white point x */
+ u1_payload_size += 2; /* white point y */
+ u1_payload_size += 4; /* max display mastering luminance */
+ u1_payload_size += 4; /* min display mastering luminance */
+
+ /************************************************************************/
+ /* PayloadSize : This is the size of the payload in bytes */
+ /************************************************************************/
+ PUT_BITS(ps_bitstrm, u1_payload_size, 8, return_status);
+ ENTROPY_TRACE("u1_payload_size", u1_payload_size);
+
+ ASSERT(ps_mdcl_sei->u2_white_point_x <= 50000);
+
+ ASSERT(ps_mdcl_sei->u2_white_point_y <= 50000);
+
+ ASSERT(
+ ps_mdcl_sei->u4_max_display_mastering_luminance >
+ ps_mdcl_sei->u4_min_display_mastering_luminance);
+
+ /*******************************************************************************/
+ /* Put the mastering display colour volume SEI parameters into the bitstream. */
+ /* For details refer to section D.1.1 of the standard */
+ /*******************************************************************************/
+
+ /* display primaries x */
+ for(c = 0; c < 3; c++)
+ {
+ ASSERT(ps_mdcl_sei->au2_display_primaries_x[c] <= 50000);
+
+ PUT_BITS(ps_bitstrm, ps_mdcl_sei->au2_display_primaries_x[c], 16, return_status);
+ ENTROPY_TRACE("u2_display_primaries_x", ps_mdcl_sei->au2_display_primaries_x[c]);
+
+ ASSERT(ps_mdcl_sei->au2_display_primaries_y[c] <= 50000);
+
+ PUT_BITS(ps_bitstrm, ps_mdcl_sei->au2_display_primaries_y[c], 16, return_status);
+ ENTROPY_TRACE("u2_display_primaries_y", ps_mdcl_sei->au2_display_primaries_y[c]);
+ }
+
+ /* white point x */
+ PUT_BITS(ps_bitstrm, ps_mdcl_sei->u2_white_point_x, 16, return_status);
+ ENTROPY_TRACE("u2_white point x", ps_mdcl_sei->u2_white_point_x);
+
+ /* white point y */
+ PUT_BITS(ps_bitstrm, ps_mdcl_sei->u2_white_point_y, 16, return_status);
+ ENTROPY_TRACE("u2_white point y", ps_mdcl_sei->u2_white_point_y);
+
+ /* max display mastering luminance */
+ PUT_BITS(ps_bitstrm, ps_mdcl_sei->u4_max_display_mastering_luminance, 32, return_status);
+ ENTROPY_TRACE(
+ "u4_max_display_mastering_luminance", ps_mdcl_sei->u4_max_display_mastering_luminance);
+
+ /* min display mastering luminance */
+ PUT_BITS(ps_bitstrm, ps_mdcl_sei->u4_min_display_mastering_luminance, 32, return_status);
+ ENTROPY_TRACE(
+ "u4_max_display_mastering_luminance", ps_mdcl_sei->u4_min_display_mastering_luminance);
+
+ return (return_status);
+}
+
+/**
+******************************************************************************
+*
+* @brief Stores user data in bitstream
+*
+* @par Description
+* Parse Supplemental Enhancement Information as per Section 7.3.2.4
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] u4_sei_payload_length
+* SEI Payload Length
+*
+* @param[in] pu1_sei_payload
+* pointer to SEI Payload
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_put_sei_params(
+ UWORD32 u4_sei_payload_length, UWORD8 *pu1_sei_payload, bitstrm_t *ps_bitstrm)
+{
+ WORD32 return_status = IHEVCE_SUCCESS;
+ UWORD32 i, u4_length = 0;
+
+ u4_length = u4_sei_payload_length;
+ while(u4_sei_payload_length >= 0xFF)
+ {
+ PUT_BITS(ps_bitstrm, (UWORD32)0xFF, 8, return_status);
+ u4_sei_payload_length -= 0xFF;
+ }
+ PUT_BITS(ps_bitstrm, (UWORD32)u4_sei_payload_length, 8, return_status);
+
+ for(i = 0; i < u4_length; i++)
+ {
+ PUT_BITS(ps_bitstrm, (UWORD32)*pu1_sei_payload, 8, return_status);
+ pu1_sei_payload++;
+ }
+
+ return (return_status);
+}
+
+/**
+******************************************************************************
+*
+* @brief Stores content light level info in bitstream
+*
+* @par Description
+* Parse Supplemental Enhancement Information as per Section 7.3.2.4
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in]
+*
+*
+* @param[in]
+*
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_put_cll_info_sei_params(UWORD16 u2_avg_cll, UWORD16 u2_max_cll, bitstrm_t *ps_bitstrm)
+{
+ WORD32 return_status = IHEVCE_SUCCESS;
+ UWORD8 u1_payload_size;
+
+ u1_payload_size = 4;
+ /************************************************************************/
+ /* PayloadSize : This is the size of the payload in bytes */
+ /************************************************************************/
+ PUT_BITS(ps_bitstrm, u1_payload_size, 8, return_status);
+ ENTROPY_TRACE("u1_payload_size", u1_payload_size);
+
+ PUT_BITS(ps_bitstrm, (UWORD32)u2_avg_cll, 16, return_status);
+ PUT_BITS(ps_bitstrm, (UWORD32)u2_max_cll, 16, return_status);
+
+ return (return_status);
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates Recovery Point (Supplemental Enhancement Information )
+*
+* @par Description
+* Parse Supplemental Enhancement Information as per Section 7.3.2.4
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] ps_rp_sei
+* pointer to structure containing recovery point SEI data
+*
+* @param[in] ps_vui_params
+* pointer to structure containing VUI data
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_put_recovery_point_sei_params(
+ recovery_point_sei_params_t *ps_rp_sei, bitstrm_t *ps_bitstrm)
+{
+ WORD32 return_status = IHEVCE_SUCCESS;
+ UWORD8 u1_payload_size = 0;
+
+ {
+ UWORD32 range, val;
+ WORD32 recov_point = ps_rp_sei->i4_recovery_poc_cnt;
+ val = 0;
+ range = 0;
+
+ if(recov_point <= 0)
+ val = ((-recov_point) << 1);
+ else
+ val = (recov_point << 1) - 1;
+
+ GETRANGE(range, val);
+
+ u1_payload_size += (((range - 1) << 1) + 1);
+
+ u1_payload_size += 1;
+ u1_payload_size += 1;
+ }
+
+ u1_payload_size = (u1_payload_size + 7) >> 3;
+ /************************************************************************/
+ /* PayloadSize : This is the size of the payload in bytes */
+ /************************************************************************/
+ PUT_BITS(ps_bitstrm, u1_payload_size, 8, return_status);
+ ENTROPY_TRACE("u1_payload_size", u1_payload_size);
+ /************************************************************************/
+ /* Put the recovery point SEI parameters into the bitstream. For */
+ /* details refer to section D.1.1 of the standard */
+ /************************************************************************/
+
+ /* i4_recovery_poc_cnt */
+ PUT_BITS_SEV(ps_bitstrm, ps_rp_sei->i4_recovery_poc_cnt, return_status);
+ ENTROPY_TRACE("i4_recovery_poc_cnt", ps_rp_sei->i4_recovery_poc_cnt);
+
+ /* u1_exact_match_flag */
+ PUT_BITS(ps_bitstrm, ps_rp_sei->u1_exact_match_flag, 1, return_status);
+ ENTROPY_TRACE("exact_match_flag", ps_rp_sei->u1_exact_match_flag);
+
+ /* u1_broken_link_flag */
+ PUT_BITS(ps_bitstrm, ps_rp_sei->u1_broken_link_flag, 1, return_status);
+ ENTROPY_TRACE("broken_link_flag", ps_rp_sei->u1_broken_link_flag);
+
+ return (return_status);
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates Picture timing (Supplemental Enhancement Information )
+*
+* @par Description
+* Parse Supplemental Enhancement Information as per Section 7.3.2.4
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] ps_pt_sei
+* pointer to structure containing oicture timing SEI data
+*
+* @param[in] ps_vui_params
+* pointer to structure containing VUI data
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_put_pic_timing_sei_params(
+ pic_timing_sei_params_t *ps_pt_sei, vui_t *ps_vui_params, bitstrm_t *ps_bitstrm)
+{
+ UWORD32 i;
+ UWORD8 u1_payload_size = 0;
+ WORD32 return_status = IHEVCE_SUCCESS;
+ UWORD8 u1_au_cpb_removal_delay_length =
+ ps_vui_params->s_vui_hrd_parameters.u1_au_cpb_removal_delay_length_minus1 + 1;
+
+ UWORD8 u1_dpb_output_delay_length =
+ ps_vui_params->s_vui_hrd_parameters.u1_dpb_output_delay_length_minus1 + 1;
+
+ UWORD8 u1_du_cpb_removal_delay_increment_length =
+ ps_vui_params->s_vui_hrd_parameters.u1_du_cpb_removal_delay_increment_length_minus1 + 1;
+
+ UWORD8 u1_sub_pic_cpb_params_present_flag =
+ ps_vui_params->s_vui_hrd_parameters.u1_sub_pic_cpb_params_present_flag;
+
+ UWORD8 u1_sub_pic_cpb_params_in_pt_sei_flag =
+ ps_vui_params->s_vui_hrd_parameters.u1_sub_pic_cpb_params_in_pic_timing_sei_flag;
+
+ {
+ if(1 == ps_vui_params->u1_frame_field_info_present_flag)
+ {
+ u1_payload_size += 4;
+ u1_payload_size += 2;
+ u1_payload_size += 1;
+ }
+
+ if(ps_vui_params->s_vui_hrd_parameters.u1_nal_hrd_parameters_present_flag ||
+ ps_vui_params->s_vui_hrd_parameters.u1_vcl_hrd_parameters_present_flag)
+ {
+ u1_payload_size += u1_au_cpb_removal_delay_length;
+ u1_payload_size += u1_dpb_output_delay_length;
+ }
+
+ if(u1_sub_pic_cpb_params_in_pt_sei_flag && u1_sub_pic_cpb_params_present_flag)
+ {
+ UWORD32 range, val = ps_pt_sei->u4_num_decoding_units_minus1;
+ range = 0;
+
+ GETRANGE(range, val);
+ u1_payload_size += (((range - 1) << 1) + 1);
+
+ u1_payload_size += 1;
+ if(1 == ps_pt_sei->u1_du_common_cpb_removal_delay_flag)
+ {
+ u1_payload_size += u1_du_cpb_removal_delay_increment_length;
+
+ for(i = 0; i <= ps_pt_sei->u4_num_decoding_units_minus1; i++)
+ {
+ val = ps_pt_sei->au4_num_nalus_in_du_minus1[0];
+ range = 0;
+
+ GETRANGE(range, val);
+ u1_payload_size += (((range - 1) << 1) + 1);
+
+ if((1 != ps_pt_sei->u1_du_common_cpb_removal_delay_flag) &&
+ (i < ps_pt_sei->u4_num_decoding_units_minus1))
+ {
+ u1_payload_size += u1_du_cpb_removal_delay_increment_length;
+ }
+ }
+ }
+ }
+ }
+
+ ASSERT((ps_pt_sei->u4_au_cpb_removal_delay_minus1 < (1 << u1_au_cpb_removal_delay_length)));
+
+ u1_payload_size = (u1_payload_size + 7) >> 3;
+ /************************************************************************/
+ /* PayloadSize : This is the size of the payload in bytes */
+ /************************************************************************/
+ PUT_BITS(ps_bitstrm, u1_payload_size, 8, return_status);
+ ENTROPY_TRACE("u1_payload_size", u1_payload_size);
+
+ /************************************************************************/
+ /* Put the picture timing SEI parameters into the bitstream. For */
+ /* details refer to section D.1.2 of the standard */
+ /************************************************************************/
+
+ if(1 == ps_vui_params->u1_frame_field_info_present_flag)
+ {
+ PUT_BITS(ps_bitstrm, ps_pt_sei->u4_pic_struct, 4, return_status);
+ ENTROPY_TRACE("pic_struct", ps_pt_sei->u4_pic_struct);
+
+ PUT_BITS(ps_bitstrm, ps_pt_sei->u4_source_scan_type, 2, return_status);
+ ENTROPY_TRACE("source_scan_type", ps_pt_sei->u4_source_scan_type);
+
+ PUT_BITS(ps_bitstrm, ps_pt_sei->u1_duplicate_flag, 1, return_status);
+ ENTROPY_TRACE("duplicate_flag", ps_pt_sei->u1_duplicate_flag);
+ }
+
+ if(ps_vui_params->s_vui_hrd_parameters.u1_nal_hrd_parameters_present_flag ||
+ ps_vui_params->s_vui_hrd_parameters
+ .u1_vcl_hrd_parameters_present_flag) // condition from standard when CpbDpbDelaysPresentFlag flag will be present
+ {
+ PUT_BITS(
+ ps_bitstrm,
+ ps_pt_sei->u4_au_cpb_removal_delay_minus1,
+ u1_au_cpb_removal_delay_length,
+ return_status);
+ ENTROPY_TRACE("cpb_removal_delay_minus1", ps_pt_sei->u4_au_cpb_removal_delay_minus1);
+
+ PUT_BITS(
+ ps_bitstrm,
+ ps_pt_sei->u4_pic_dpb_output_delay,
+ u1_dpb_output_delay_length,
+ return_status);
+ ENTROPY_TRACE("pic_dpb_output_delay", ps_pt_sei->u4_pic_dpb_output_delay);
+
+ if(u1_sub_pic_cpb_params_present_flag)
+ {
+ PUT_BITS(
+ ps_bitstrm,
+ ps_pt_sei->u4_pic_dpb_output_du_delay,
+ ps_vui_params->s_vui_hrd_parameters.u1_dpb_output_delay_du_length_minus1,
+ return_status);
+ ENTROPY_TRACE("pic_dpb_output_du_delay", ps_pt_sei->u4_pic_dpb_output_delay);
+ }
+
+ if(u1_sub_pic_cpb_params_in_pt_sei_flag && u1_sub_pic_cpb_params_present_flag)
+ {
+ PUT_BITS_UEV(ps_bitstrm, ps_pt_sei->u4_num_decoding_units_minus1, return_status);
+ ENTROPY_TRACE("num_decoding_units_minus1", ps_pt_sei->u4_num_decoding_units_minus1);
+
+ PUT_BITS(ps_bitstrm, ps_pt_sei->u1_du_common_cpb_removal_delay_flag, 1, return_status);
+ ENTROPY_TRACE(
+ "du_common_cpb_removal_delay_flag", ps_pt_sei->u4_du_common_cpb_removal_delay_flag);
+
+ if(1 == ps_pt_sei->u1_du_common_cpb_removal_delay_flag)
+ {
+ PUT_BITS(
+ ps_bitstrm,
+ ps_pt_sei->u4_du_common_cpb_removal_delay_increment_minus1,
+ u1_du_cpb_removal_delay_increment_length,
+ return_status);
+ ENTROPY_TRACE(
+ "du_common_cpb_removal_delay_increment_minus1",
+ ps_pt_sei->u4_du_common_cpb_removal_delay_increment_minus1);
+ }
+
+ for(i = 0; i <= ps_pt_sei->u4_num_decoding_units_minus1; i++)
+ {
+ PUT_BITS_UEV(ps_bitstrm, ps_pt_sei->au4_num_nalus_in_du_minus1[0], return_status);
+ ENTROPY_TRACE("num_nalus_in_du_minus1", ps_pt_sei->u4_num_nalus_in_du_minus1);
+
+ if((1 != ps_pt_sei->u1_du_common_cpb_removal_delay_flag) &&
+ (i < ps_pt_sei->u4_num_decoding_units_minus1))
+ {
+ PUT_BITS(
+ ps_bitstrm,
+ ps_pt_sei->au4_du_cpb_removal_delay_increment_minus1[0],
+ u1_du_cpb_removal_delay_increment_length,
+ return_status);
+ ENTROPY_TRACE(
+ "du_cpb_removal_delay_increment_minus1",
+ ps_pt_sei->u4_du_cpb_removal_delay_increment_minus1);
+ }
+ }
+ }
+ }
+
+ return (return_status);
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates Hash (Supplemental Enhancement Information )
+*
+* @par Description
+* Put hash sei params
+*
+* @param[in] ps_hash_sei_params
+* pointer to structure containing hash SEI data
+*
+* @param[in] i1_decoded_pic_hash_sei_flag
+* flag saying the hash SEI type
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_put_hash_sei_params(
+ hash_sei_param_t *ps_hash_sei_params, WORD8 i1_decoded_pic_hash_sei_flag, bitstrm_t *ps_bitstrm)
+{
+ UWORD32 i, c_idx, val;
+ WORD32 return_status = IHEVCE_SUCCESS;
+ UWORD8 u1_payload_size = 0;
+
+ u1_payload_size += 1; /* hash_type */
+
+ if(1 == i1_decoded_pic_hash_sei_flag)
+ {
+ /* MD5 : 3 color planes * 16 byte values */
+ u1_payload_size += (16 * 3);
+ }
+ else if(2 == i1_decoded_pic_hash_sei_flag)
+ {
+ /* CRC : 3 color planes * 2 byte values */
+ u1_payload_size += (2 * 3);
+ }
+ else if(3 == i1_decoded_pic_hash_sei_flag)
+ {
+ /* Checksum : 3 color planes * 4 byte values */
+ u1_payload_size += (4 * 3);
+ }
+ else
+ {
+ ASSERT(0);
+ }
+
+ /************************************************************************/
+ /* PayloadSize : This is the size of the payload in bytes */
+ /************************************************************************/
+ PUT_BITS(ps_bitstrm, u1_payload_size, 8, return_status);
+ ENTROPY_TRACE("payload_size", u1_payload_size);
+
+ /************************************************************************/
+ /* Put the hash SEI parameters into the bitstream. For */
+ /* details refer to section D.2.19 of the standard */
+ /************************************************************************/
+
+ PUT_BITS(ps_bitstrm, (i1_decoded_pic_hash_sei_flag - 1), 8, return_status);
+ ENTROPY_TRACE("hash_type", (i1_decoded_pic_hash_sei_flag - 1));
+
+ if(1 == i1_decoded_pic_hash_sei_flag)
+ {
+ for(c_idx = 0; c_idx < 3; c_idx++)
+ {
+ for(i = 0; i < 16; i++)
+ {
+ PUT_BITS(ps_bitstrm, ps_hash_sei_params->au1_sei_hash[c_idx][i], 8, return_status);
+ ENTROPY_TRACE("picture_md5", ps_hash_sei_params->au1_sei_hash[c_idx][i]);
+ }
+ }
+ }
+ else if(2 == i1_decoded_pic_hash_sei_flag)
+ {
+ for(c_idx = 0; c_idx < 3; c_idx++)
+ {
+ val = (ps_hash_sei_params->au1_sei_hash[c_idx][0] << 8) +
+ ps_hash_sei_params->au1_sei_hash[c_idx][1];
+ PUT_BITS(ps_bitstrm, val, 16, return_status);
+ ENTROPY_TRACE("picture_crc", val);
+ }
+ }
+ else if(3 == i1_decoded_pic_hash_sei_flag)
+ {
+ for(c_idx = 0; c_idx < 3; c_idx++)
+ {
+ val = (ps_hash_sei_params->au1_sei_hash[c_idx][0] << 24) +
+ (ps_hash_sei_params->au1_sei_hash[c_idx][1] << 16) +
+ (ps_hash_sei_params->au1_sei_hash[c_idx][2] << 8) +
+ (ps_hash_sei_params->au1_sei_hash[c_idx][3]);
+
+ PUT_BITS(ps_bitstrm, val, 32, return_status);
+ ENTROPY_TRACE("picture_checksum", val);
+ }
+ }
+ else
+ {
+ ASSERT(0);
+ }
+
+ return (return_status);
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates SEI (Supplemental Enhancement Information )
+*
+* @par Description
+* Parse Supplemental Enhancement Information as per Section 7.3.2.4
+*
+* @param[in] e_payload_type
+* Determines the type of SEI msg
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] ps_sei_params
+* pointer to structure containing SEI data
+* buffer period, recovery point, picture timing
+*
+* @param[in] ps_vui_params
+* pointer to structure containing VUI data
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_put_sei_msg(
+ IHEVCE_SEI_TYPE e_payload_type,
+ sei_params_t *ps_sei_params,
+ vui_t *ps_vui_params,
+ bitstrm_t *ps_bitstrm,
+ UWORD32 i4_sei_payload_length,
+ UWORD8 *pu1_sei_payload)
+{
+ WORD32 return_status = IHEVCE_SUCCESS;
+ /************************************************************************/
+ /* PayloadType : Send in the SEI type in the stream */
+ /************************************************************************/
+ UWORD32 u4_payload_type = e_payload_type;
+ while(u4_payload_type > 0xFF)
+ {
+ PUT_BITS(ps_bitstrm, 0xFF, 8, return_status);
+ u4_payload_type -= 0xFF;
+ }
+ PUT_BITS(ps_bitstrm, (UWORD32)u4_payload_type, 8, return_status);
+ ENTROPY_TRACE("e_payload_type", e_payload_type);
+
+ /************************************************************************/
+ /* PayloadSize : This is sent from within the type specific functions */
+ /************************************************************************/
+
+ switch(e_payload_type)
+ {
+ case IHEVCE_SEI_BUF_PERIOD_T:
+ return_status |= ihevce_put_buf_period_sei_params(
+ &(ps_sei_params->s_buf_period_sei_params), ps_vui_params, ps_bitstrm);
+ break;
+
+ case IHEVCE_SEI_PIC_TIMING_T:
+ return_status |= ihevce_put_pic_timing_sei_params(
+ &(ps_sei_params->s_pic_timing_sei_params), ps_vui_params, ps_bitstrm);
+ break;
+
+ case IHEVCE_SEI_RECOVERY_POINT_T:
+ return_status |= ihevce_put_recovery_point_sei_params(
+ &(ps_sei_params->s_recovery_point_params), ps_bitstrm);
+ break;
+ case IHEVCE_SEI_ACTIVE_PARAMETER_SETS_T:
+ return_status |= ihevce_put_active_parameter_set_sei_params(
+ &(ps_sei_params->s_active_parameter_set_sei_params), ps_bitstrm);
+ break;
+ case IHEVCE_SEI_DECODED_PICTURE_HASH_T:
+ return_status |= ihevce_put_hash_sei_params(
+ &(ps_sei_params->s_hash_sei_params),
+ ps_sei_params->i1_decoded_pic_hash_sei_flag,
+ ps_bitstrm);
+ break;
+ case IHEVCE_SEI_MASTERING_DISP_COL_VOL_T:
+ return_status |= ihevce_put_mastering_disp_col_vol_sei_params(
+ &(ps_sei_params->s_mastering_dis_col_vol_sei_params), ps_bitstrm);
+ break;
+ case IHEVCE_SEI_CONTENT_LIGHT_LEVEL_DATA_T:
+ return_status |= ihevce_put_cll_info_sei_params(
+ ps_sei_params->s_cll_info_sei_params.u2_sei_avg_cll,
+ ps_sei_params->s_cll_info_sei_params.u2_sei_max_cll,
+ ps_bitstrm);
+ break;
+ //case IHEVCE_SEI_USER_DATA_REGISTERED_ITU_T_T35_T:
+ //return_status |= ihevce_put_sei_params(i4_sei_payload_length, pu1_sei_payload, ps_bitstrm);
+ //break;
+ default:
+ //Any Payload type other than the above cases will entred hereand be added to the bitstream
+ return_status |= ihevce_put_sei_params(i4_sei_payload_length, pu1_sei_payload, ps_bitstrm);
+ //return_status = IHEVCE_FAIL;
+ }
+
+ ASSERT(IHEVCE_SUCCESS == return_status);
+
+ /* rbsp trailing bits */
+ if((IHEVCE_SUCCESS == return_status) && (ps_bitstrm->i4_bits_left_in_cw & 0x7))
+ ihevce_put_rbsp_trailing_bits(ps_bitstrm);
+
+ return (return_status);
+}
+
+/**
+******************************************************************************
+*
+* @brief Generates SEI (Supplemental Enhancement Information )
+*
+* @par Description
+* Parse Supplemental Enhancement Information as per Section 7.3.2.4
+*
+* @param[in] ps_bitstrm
+* pointer to bitstream context (handle)
+*
+* @param[in] ps_sei_params
+* pointer to structure containing SEI data
+* buffer period, recovery point, picture timing
+*
+* @param[in] ps_vui_params
+* pointer to structure containing VUI data
+*
+* @param[in] nal_unit_header
+* NAL_PREFIX_SEI / NAL_SUFFIX_SEI
+*
+* @param[in] u4_num_sei_payloads
+* Number of SEI payloads
+*
+* @param[in] ps_sei_payload
+* pointer to structure containing SEI payload data
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_generate_sei(
+ bitstrm_t *ps_bitstrm,
+ sei_params_t *ps_sei_params,
+ vui_t *ps_vui_params,
+ WORD32 insert_per_cra,
+ WORD32 nal_unit_header,
+ UWORD32 u4_num_sei_payloads,
+ sei_payload_t *ps_sei_payload)
+{
+ UWORD32 u4_i;
+ WORD32 return_status = IHEVCE_SUCCESS;
+
+ (void)insert_per_cra;
+ /* Insert Start Code */
+ return_status = ihevce_put_nal_start_code_prefix(ps_bitstrm, 1);
+
+ ASSERT((NAL_PREFIX_SEI == nal_unit_header) || (NAL_SUFFIX_SEI == nal_unit_header));
+ /* Insert Nal Unit Header */
+ return_status |= ihevce_generate_nal_unit_header(ps_bitstrm, nal_unit_header, 0);
+
+ if(NAL_PREFIX_SEI == nal_unit_header)
+ {
+ /* Active Parameter and Buffering period insertion */
+
+ if(ps_sei_params->i1_buf_period_params_present_flag)
+ {
+ /* insert active_parameter_set SEI required if buffering period SEI messages are inserted */
+ return_status |= ihevce_put_sei_msg(
+ IHEVCE_SEI_ACTIVE_PARAMETER_SETS_T,
+ ps_sei_params,
+ ps_vui_params,
+ ps_bitstrm,
+ 0,
+ NULL);
+
+ /*************************************************************************************************/
+ /* NOTE: Need to terminate and start new SEI message after active parameter set SEI */
+ /* Buffering period/pic timing SEI refering to active SPS cannot be embedded in same SEI message */
+ /* This is because SPS is activated in HM deocder after completely parsing full SEI message. */
+ /*************************************************************************************************/
+ if(1) /* Insert New SEI for buffering period after active parameter set SEI */
+ {
+ ihevce_put_rbsp_trailing_bits(ps_bitstrm);
+
+ /* Insert Next SEI Start Code */
+ return_status = ihevce_put_nal_start_code_prefix(ps_bitstrm, 1);
+
+ /* Insert Next SEI Nal Unit Header */
+ return_status |= ihevce_generate_nal_unit_header(ps_bitstrm, nal_unit_header, 0);
+ }
+
+ /* Buffering Period SEI meassage for all IDR, CRA pics */
+ return_status |= ihevce_put_sei_msg(
+ IHEVCE_SEI_BUF_PERIOD_T, ps_sei_params, ps_vui_params, ps_bitstrm, 0, NULL);
+ }
+
+ /* Pic timing SEI meassage for non IDR, non CRA pics */
+ if(ps_sei_params->i1_pic_timing_params_present_flag)
+ {
+ return_status |= ihevce_put_sei_msg(
+ IHEVCE_SEI_PIC_TIMING_T, ps_sei_params, ps_vui_params, ps_bitstrm, 0, NULL);
+ }
+
+ /* Recovery point SEI meassage for all IDR, CRA pics */
+ if(ps_sei_params->i1_recovery_point_params_present_flag)
+ {
+ return_status |= ihevce_put_sei_msg(
+ IHEVCE_SEI_RECOVERY_POINT_T, ps_sei_params, ps_vui_params, ps_bitstrm, 0, NULL);
+ }
+
+ /* Mastering Display Colour SEI for all IDR, CRA pics */
+ if(ps_sei_params->i4_sei_mastering_disp_colour_vol_params_present_flags)
+ {
+ return_status |= ihevce_put_sei_msg(
+ IHEVCE_SEI_MASTERING_DISP_COL_VOL_T,
+ ps_sei_params,
+ ps_vui_params,
+ ps_bitstrm,
+ 0,
+ NULL);
+ }
+ /*Registered User Data*/
+ for(u4_i = 0; u4_i < u4_num_sei_payloads; u4_i++)
+ {
+ return_status |= ihevce_put_sei_msg(
+ (IHEVCE_SEI_TYPE)(ps_sei_payload[u4_i].u4_payload_type),
+ ps_sei_params,
+ ps_vui_params,
+ ps_bitstrm,
+ ps_sei_payload[u4_i].u4_payload_length,
+ ps_sei_payload[u4_i].pu1_sei_payload);
+ }
+ /* Content Light Level Information*/
+ if(ps_sei_params->i1_sei_cll_enable)
+ {
+ return_status |= ihevce_put_sei_msg(
+ IHEVCE_SEI_CONTENT_LIGHT_LEVEL_DATA_T,
+ ps_sei_params,
+ ps_vui_params,
+ ps_bitstrm,
+ 0,
+ NULL);
+ }
+ }
+ else if(NAL_SUFFIX_SEI == nal_unit_header)
+ {
+ /* Insert hash SEI */
+ if(ps_sei_params->i1_decoded_pic_hash_sei_flag)
+ {
+ return_status |= ihevce_put_sei_msg(
+ IHEVCE_SEI_DECODED_PICTURE_HASH_T,
+ ps_sei_params,
+ ps_vui_params,
+ ps_bitstrm,
+ 0,
+ NULL);
+ }
+ }
+
+ /*put trailing bits to indicate end of sei*/
+ ihevce_put_rbsp_trailing_bits(ps_bitstrm);
+
+ return return_status;
+}
+
+/**
+******************************************************************************
+*
+* @brief Populates ihevce_populate_mastering_disp_col_vol_sei of SEI structure
+*
+* @par Description
+* Populates mastering display colour volume sei structure
+*
+* @param[in] ps_sei
+* pointer to sei params that needs to be populated
+*
+* @param[in] ps_out_strm_prms
+* pointer to output stream params
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_populate_mastering_disp_col_vol_sei(
+ sei_params_t *ps_sei, ihevce_out_strm_params_t *ps_out_strm_prms)
+{
+ WORD32 i;
+
+ mastering_dis_col_vol_sei_params_t *ps_mastering_dis_col_vol_sei_params =
+ &ps_sei->s_mastering_dis_col_vol_sei_params;
+
+ for(i = 0; i < 3; i++)
+ {
+ ps_mastering_dis_col_vol_sei_params->au2_display_primaries_x[i] =
+ ps_out_strm_prms->au2_display_primaries_x[i];
+ ps_mastering_dis_col_vol_sei_params->au2_display_primaries_y[i] =
+ ps_out_strm_prms->au2_display_primaries_y[i];
+ }
+
+ ps_mastering_dis_col_vol_sei_params->u2_white_point_x = ps_out_strm_prms->u2_white_point_x;
+ ps_mastering_dis_col_vol_sei_params->u2_white_point_y = ps_out_strm_prms->u2_white_point_y;
+
+ ps_mastering_dis_col_vol_sei_params->u4_max_display_mastering_luminance =
+ ps_out_strm_prms->u4_max_display_mastering_luminance;
+ ps_mastering_dis_col_vol_sei_params->u4_min_display_mastering_luminance =
+ ps_out_strm_prms->u4_min_display_mastering_luminance;
+
+ return IHEVCE_SUCCESS;
+}
+
+/**
+******************************************************************************
+*
+* @brief Populates ihevce_populate_recovery_point_sei of SEI structure
+*
+* @par Description
+* Populates vui structure for its use in header generation
+*
+* @param[out] ps_sei
+* pointer to sei params that needs to be populated
+*
+* @param[out] ps_vui
+* pointer to vui params referred by sei
+*
+* @param[out] ps_sps
+* pointer to sps params referred by sei
+*
+* @param[in] ps_src_params
+* pointer to source config params; resolution, frame rate etc
+*
+* @param[in] ps_config_prms
+* pointer to configuration params like bitrate, HRD buffer sizes, cu, tu sizes
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_populate_recovery_point_sei(
+ sei_params_t *ps_sei, ihevce_vui_sei_params_t *ps_vui_sei_prms)
+{
+ recovery_point_sei_params_t *ps_rec_point_params = &ps_sei->s_recovery_point_params;
+
+ (void)ps_vui_sei_prms;
+ ps_rec_point_params->i4_recovery_poc_cnt = 0;
+ ps_rec_point_params->u1_broken_link_flag = 0;
+ ps_rec_point_params->u1_exact_match_flag = 1;
+
+ return IHEVCE_SUCCESS;
+}
+
+/**
+******************************************************************************
+*
+* @brief Populates picture timing of SEI structure
+*
+* @par Description
+* Populates vui structure for its use in header generation
+*
+* @param[out] ps_sei
+* pointer to sei params that needs to be populated
+*
+* @param[out] ps_vui
+* pointer to vui params referred by sei
+*
+* @param[in] ps_src_params
+* pointer to source config params; resolution, frame rate etc
+*
+* @param[in] u4_bottom_field_flag
+* Used only for interlaced field coding. 0:top field, 1:bottom field
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_populate_picture_timing_sei(
+ sei_params_t *ps_sei,
+ vui_t *ps_vui,
+ ihevce_src_params_t *ps_src_params,
+ WORD32 u4_bottom_field_flag)
+{
+ pic_timing_sei_params_t *ps_pic_timing_params = &ps_sei->s_pic_timing_sei_params;
+ UWORD8 u1_prog_seq = !ps_src_params->i4_field_pic;
+ UWORD8 u1_top_field_first = 1; //ps_curr_inp->s_input_buf.i4_topfield_first;
+
+ UWORD8 u1_repeat_first_field = 0;
+ WORD32 field_seq_flag = ps_vui->u1_field_seq_flag;
+
+ if(ps_vui->u1_frame_field_info_present_flag)
+ {
+ /**************************************************************************/
+ /* Refer Table D-1 */
+ /**************************************************************************/
+ if(0 == u1_prog_seq)
+ {
+ if(field_seq_flag)
+ {
+ ASSERT((u4_bottom_field_flag == 0) || (u4_bottom_field_flag == 1));
+
+ /* 1 => top field pic */
+ /* 2 => bottom field pic */
+ ps_pic_timing_params->u4_pic_struct = 1 + u4_bottom_field_flag;
+ }
+ else if(0 == u1_repeat_first_field)
+ {
+ /******************************************************************/
+ /* [PROGRESSIVE SEQ] = 0; */
+ /* [MPEG2 PIC STRUCT] = FIELD_PICTURE */
+ /* [REPEAT_FIRST_FIELD] = 0 */
+ /* u1_pic_struct = 3 => top - bottom field pic */
+ /* u1_pic_struct = 4 => bottom - top */
+ /******************************************************************/
+ ps_pic_timing_params->u4_pic_struct = 4 - u1_top_field_first;
+ }
+ else
+ {
+ /******************************************************************/
+ /* [PROGRESSIVE SEQ] = 0; */
+ /* [MPEG2 PIC STRUCT] = FIELD_PICTURE */
+ /* [REPEAT_FIRST_FIELD] = 1 */
+ /* u1_pic_struct = 5 => top - bottom - top */
+ /* u1_pic_struct = 6 => bottom - top - bottom */
+ /******************************************************************/
+ ps_pic_timing_params->u4_pic_struct = 6 - u1_top_field_first;
+ }
+ }
+ else
+ {
+ if(0 == u1_repeat_first_field)
+ {
+ /******************************************************************/
+ /* [PROGRESSIVE SEQ] = 1; */
+ /* [MPEG2 PIC STRUCT] = FRAME_PICTURE */
+ /* u1_pic_struct = 0 => frame picture (no repeat) */
+ /******************************************************************/
+ ps_pic_timing_params->u4_pic_struct = 0;
+ }
+ else
+ {
+ /******************************************************************/
+ /* [PROGRESSIVE SEQ] = 1; */
+ /* [MPEG2 PIC STRUCT] = FRAME_PICTURE */
+ /* u1_pic_struct = 7 => frame picture (repeat once) */
+ /* u1_pic_struct = 8 => frame picture (repeat twice) */
+ /******************************************************************/
+ ps_pic_timing_params->u4_pic_struct = 7 + u1_top_field_first;
+ }
+ }
+ /* Porogressive frame - 1 ; Interlace - 0 */
+ ps_pic_timing_params->u4_source_scan_type = !ps_src_params->i4_field_pic;
+
+ ps_pic_timing_params->u1_duplicate_flag = 0;
+ }
+ ps_pic_timing_params->u4_pic_dpb_output_du_delay = 0;
+
+ ps_pic_timing_params->u4_num_decoding_units_minus1 = 1;
+
+ ps_pic_timing_params->u1_du_common_cpb_removal_delay_flag = 1;
+
+ ps_pic_timing_params->u4_du_common_cpb_removal_delay_increment_minus1 = 1;
+
+ ps_pic_timing_params->au4_num_nalus_in_du_minus1[0] = 1;
+
+ ps_pic_timing_params->au4_du_cpb_removal_delay_increment_minus1[0] = 1;
+
+ return IHEVCE_SUCCESS;
+}
+
+/**
+******************************************************************************
+*
+* @brief Populates buffer period of sei structure
+*
+* @par Description
+* Populates vui structure for its use in header generation
+*
+* @param[out] ps_sei
+* pointer to sei params that needs to be populated
+*
+* @param[out] ps_vui
+* pointer to vui params referred by sei
+*
+* @param[out] ps_sps
+* pointer to sps params referred by sei
+*
+* @param[out] ps_vui_sei_prms
+* pointer to sps params referred by application
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_populate_buffering_period_sei(
+ sei_params_t *ps_sei, vui_t *ps_vui, sps_t *ps_sps, ihevce_vui_sei_params_t *ps_vui_sei_prms)
+{
+ WORD32 i;
+
+ buf_period_sei_params_t *ps_bp_sei = &ps_sei->s_buf_period_sei_params;
+
+ WORD32 i1_sps_max_sub_layersminus1 = ps_sps->i1_sps_max_sub_layers - 1;
+
+ hrd_params_t *ps_vui_hrd_parameters = &ps_vui->s_vui_hrd_parameters;
+
+ sub_lyr_hrd_params_t *ps_sub_layer_hrd_params =
+ &ps_vui_hrd_parameters->as_sub_layer_hrd_params[i1_sps_max_sub_layersminus1];
+
+ WORD32 cpb_cnt = ps_vui_hrd_parameters->au1_cpb_cnt_minus1[i1_sps_max_sub_layersminus1];
+
+ WORD32 cpb_size, bit_rate;
+
+ (void)ps_vui_sei_prms;
+ ps_bp_sei->u1_bp_seq_parameter_set_id = ps_sps->i1_sps_id;
+
+ ps_bp_sei->u4_initial_cpb_removal_delay_length =
+ ps_vui->s_vui_hrd_parameters.u1_initial_cpb_removal_delay_length_minus1 + 1;
+
+ ps_bp_sei->u1_sub_pic_cpb_params_present_flag =
+ ps_vui_hrd_parameters->u1_sub_pic_cpb_params_present_flag;
+
+ ps_bp_sei->u1_rap_cpb_params_present_flag = 0; //DEFAULT value
+
+ ps_bp_sei->u4_cpb_delay_offset = 0; //DEFAULT value
+ ps_bp_sei->u4_dpb_delay_offset = 0; //DEFAULT value
+
+ ps_bp_sei->u1_concatenation_flag = 0; //DEFAULT value ???
+ ps_bp_sei->u4_au_cpb_removal_delay_delta_minus1 = 0; //DEFAULT value ???
+
+ ps_bp_sei->u4_cpb_cnt = cpb_cnt;
+
+ if(ps_vui->s_vui_hrd_parameters.u1_nal_hrd_parameters_present_flag)
+ {
+ for(i = 0; i <= cpb_cnt; i++)
+ {
+ ULWORD64 u8_temp;
+ if(1 == ps_vui->s_vui_hrd_parameters.u1_sub_pic_cpb_params_present_flag)
+ {
+ ASSERT(1 == ps_vui->s_vui_hrd_parameters.u1_sub_pic_cpb_params_present_flag);
+
+ cpb_size = (ps_sub_layer_hrd_params->au4_cpb_size_du_value_minus1[i] + 1) *
+ (1 << (4 + ps_vui_hrd_parameters->u4_cpb_size_du_scale));
+
+ bit_rate = (ps_sub_layer_hrd_params->au4_bit_rate_du_value_minus1[i] + 1) *
+ (1 << (6 + ps_vui_hrd_parameters->u4_bit_rate_scale));
+ }
+ else
+ {
+ cpb_size = (ps_sub_layer_hrd_params->au4_cpb_size_value_minus1[i] + 1) *
+ (1 << (4 + ps_vui_hrd_parameters->u4_cpb_size_scale));
+
+ bit_rate = (ps_sub_layer_hrd_params->au4_bit_rate_value_minus1[i] + 1) *
+ (1 << (6 + ps_vui_hrd_parameters->u4_bit_rate_scale));
+ }
+
+ u8_temp = (ULWORD64)(90000 * (ULWORD64)cpb_size);
+ ps_bp_sei->au4_nal_initial_cpb_removal_delay[i] = (UWORD32)(u8_temp / bit_rate);
+
+ ps_bp_sei->au4_nal_initial_cpb_removal_delay_offset[i] = 0;
+
+ if(ps_bp_sei->u1_rap_cpb_params_present_flag ||
+ ps_vui->s_vui_hrd_parameters.u1_sub_pic_cpb_params_present_flag)
+ {
+ ps_bp_sei->au4_nal_initial_alt_cpb_removal_delay[i] = (UWORD32)(u8_temp / bit_rate);
+ ps_bp_sei->au4_nal_initial_cpb_removal_delay_offset[i] = 0;
+ }
+ }
+ }
+
+ if(ps_vui->s_vui_hrd_parameters.u1_vcl_hrd_parameters_present_flag)
+ {
+ for(i = 0; i <= cpb_cnt; i++)
+ {
+ if(1 == ps_vui->s_vui_hrd_parameters.u1_sub_pic_cpb_params_present_flag)
+ {
+ ASSERT(1 == ps_vui->s_vui_hrd_parameters.u1_sub_pic_cpb_params_present_flag);
+ cpb_size = (ps_sub_layer_hrd_params->au4_cpb_size_du_value_minus1[i] + 1) *
+ (1 << (4 + ps_vui_hrd_parameters->u4_cpb_size_du_scale));
+
+ bit_rate = (ps_sub_layer_hrd_params->au4_bit_rate_du_value_minus1[i] + 1) *
+ (1 << (6 + ps_vui_hrd_parameters->u4_bit_rate_scale));
+ }
+ else
+ {
+ cpb_size = (ps_sub_layer_hrd_params->au4_cpb_size_value_minus1[i] + 1) *
+ (1 << (4 + ps_vui_hrd_parameters->u4_cpb_size_scale));
+
+ bit_rate = (ps_sub_layer_hrd_params->au4_bit_rate_value_minus1[i] + 1) *
+ (1 << (6 + ps_vui_hrd_parameters->u4_bit_rate_scale));
+ }
+
+ ps_bp_sei->au4_vcl_initial_cpb_removal_delay[i] = 90000 * (cpb_size / bit_rate);
+
+ ps_bp_sei->au4_vcl_initial_cpb_removal_delay_offset[i] = 0;
+
+ if(ps_bp_sei->u1_rap_cpb_params_present_flag ||
+ ps_vui->s_vui_hrd_parameters.u1_sub_pic_cpb_params_present_flag)
+ {
+ ps_bp_sei->au4_vcl_initial_alt_cpb_removal_delay[i] = 90000 * (cpb_size / bit_rate);
+ ps_bp_sei->au4_vcl_initial_cpb_removal_delay_offset[i] = 0;
+ }
+ }
+ }
+
+ /* Reset picture timing cbp removal delay at every insertion Buffering period SEI */
+ //ps_sei->s_pic_timing_sei_params.u4_au_cpb_removal_delay_minus1 = 0;
+
+ return IHEVCE_SUCCESS;
+}
+
+/**
+******************************************************************************
+*
+* @brief Populates active parameter set sei structure
+*
+* @par Description
+*
+* @param[out] ps_sei
+* pointer to sei params that needs to be populated
+*
+* @param[in] ps_vps
+* pointer to configuration vps_t.
+*
+* @return success or failure error code
+*
+*****************************************************************************
+*/
+WORD32 ihevce_populate_active_parameter_set_sei(sei_params_t *ps_sei, vps_t *ps_vps, sps_t *ps_sps)
+{
+ UWORD8 i;
+ active_parameter_set_sei_param_t *ps_act_sei = &ps_sei->s_active_parameter_set_sei_params;
+
+ (void)ps_sps;
+ ps_act_sei->u1_active_video_parameter_set_id = ps_vps->i1_vps_id;
+ ps_act_sei->u1_self_contained_cvs_flag = 0;
+ ps_act_sei->u1_no_parameter_set_update_flag = 1;
+ ps_act_sei->u1_num_sps_ids_minus1 = 0;
+ for(i = 0; i <= ps_act_sei->u1_num_sps_ids_minus1; i++)
+ {
+ ps_act_sei->au1_active_seq_parameter_set_id[i] = 0;
+ }
+
+ return IHEVCE_SUCCESS;
+}
+
+/**
+******************************************************************************
+*
+* @brief Populates Hash SEI values for CRC Hash
+*
+* @par Description
+*
+* @param[out] ps_hash_sei_params
+* pointer to hash sei params that needs to be populated
+*
+* @param[in] bit_depth
+* i4_internal_bit_depth value. Assume same for Luma & Chroma
+*
+* @param[in] pv_y_buf
+* Pointer to decoded/recon Luma buffer
+*
+* @param[in] y_wd
+* pic width in luma samples
+*
+* @param[in] y_ht
+* pic height in luma samples
+*
+* @param[in] y_strd
+* Stride of luma buffer
+*
+* @param[in] pv_u_buf
+* Pointer to decoded/recon Chroma buffer
+*
+* @param[in] uv_wd
+* pic width in luma samples / SubWidthC
+*
+* @param[in] uv_ht
+* pic height in luma samples / SubHeightC
+*
+* @param[in] uv_strd
+* Stride of chroma buffer
+*
+* @return None
+*
+*****************************************************************************
+*/
+static void ihevce_calc_CRC(
+ hash_sei_param_t *ps_hash_sei_params,
+ WORD32 bit_depth,
+ void *pv_y_buf,
+ WORD32 y_wd,
+ WORD32 y_ht,
+ WORD32 y_strd,
+ void *pv_u_buf,
+ WORD32 uv_wd,
+ WORD32 uv_ht,
+ WORD32 uv_strd)
+{
+ WORD32 x, y, bit_idx, is_gt8bit = 0, gt8bit_mul;
+ UWORD8 *pu_buf;
+
+ /* For this to work, assumes little endian in case of HBD */
+ if(bit_depth > 8)
+ is_gt8bit = 1;
+ gt8bit_mul = 1 + is_gt8bit;
+
+ /* Luma CRC */
+ {
+ UWORD32 u4_crc_val = 0xffff;
+
+ pu_buf = (UWORD8 *)pv_y_buf;
+
+ for(y = 0; y < y_ht; y++)
+ {
+ for(x = 0; x < y_wd; x++)
+ {
+ // take CRC of first pictureData byte
+ for(bit_idx = 0; bit_idx < 8; bit_idx++)
+ {
+ CALC_CRC_BIT_LEVEL(
+ u4_crc_val, pu_buf[y * (y_strd * gt8bit_mul) + (x * gt8bit_mul)], bit_idx);
+ }
+ // take CRC of second pictureData byte if bit depth is greater than 8-bits
+ if(bit_depth > 8)
+ {
+ for(bit_idx = 0; bit_idx < 8; bit_idx++)
+ {
+ CALC_CRC_BIT_LEVEL(
+ u4_crc_val,
+ pu_buf[y * (y_strd * gt8bit_mul) + (x * gt8bit_mul + 1)],
+ bit_idx);
+ }
+ }
+ }
+ }
+
+ for(bit_idx = 0; bit_idx < 16; bit_idx++)
+ {
+ UWORD32 u4_crc_msb = (u4_crc_val >> 15) & 1;
+ u4_crc_val = ((u4_crc_val << 1) & 0xffff) ^ (u4_crc_msb * 0x1021);
+ }
+
+ ps_hash_sei_params->au1_sei_hash[0][0] = (u4_crc_val >> 8) & 0xff;
+ ps_hash_sei_params->au1_sei_hash[0][1] = u4_crc_val & 0xff;
+ }
+
+ /* Cb & Cr CRC */
+ {
+ UWORD32 u4_crc_val_u = 0xffff, u4_crc_val_v = 0xffff;
+
+ pu_buf = (UWORD8 *)pv_u_buf;
+
+ for(y = 0; y < uv_ht; y++)
+ {
+ for(x = 0; x < uv_wd; x += 2)
+ {
+ // take CRC of first pictureData byte
+ for(bit_idx = 0; bit_idx < 8; bit_idx++)
+ {
+ CALC_CRC_BIT_LEVEL(
+ u4_crc_val_u,
+ pu_buf[y * (uv_strd * gt8bit_mul) + (x * gt8bit_mul)],
+ bit_idx);
+ CALC_CRC_BIT_LEVEL(
+ u4_crc_val_v,
+ pu_buf[y * (uv_strd * gt8bit_mul) + ((x + 1) * gt8bit_mul)],
+ bit_idx);
+ }
+ // take CRC of second pictureData byte if bit depth is greater than 8-bits
+ if(bit_depth > 8)
+ {
+ for(bit_idx = 0; bit_idx < 8; bit_idx++)
+ {
+ CALC_CRC_BIT_LEVEL(
+ u4_crc_val_u,
+ pu_buf[y * (uv_strd * gt8bit_mul) + (x * gt8bit_mul) + 1],
+ bit_idx);
+ CALC_CRC_BIT_LEVEL(
+ u4_crc_val_v,
+ pu_buf[y * (uv_strd * gt8bit_mul) + ((x + 1) * gt8bit_mul) + 1],
+ bit_idx);
+ }
+ }
+ }
+ }
+
+ for(bit_idx = 0; bit_idx < 16; bit_idx++)
+ {
+ UWORD32 u4_crc_msb = (u4_crc_val_u >> 15) & 1;
+ u4_crc_val_u = ((u4_crc_val_u << 1) & 0xffff) ^ (u4_crc_msb * 0x1021);
+
+ u4_crc_msb = (u4_crc_val_v >> 15) & 1;
+ u4_crc_val_v = ((u4_crc_val_v << 1) & 0xffff) ^ (u4_crc_msb * 0x1021);
+ }
+
+ ps_hash_sei_params->au1_sei_hash[1][0] = (u4_crc_val_u >> 8) & 0xff;
+ ps_hash_sei_params->au1_sei_hash[1][1] = u4_crc_val_u & 0xff;
+
+ ps_hash_sei_params->au1_sei_hash[2][0] = (u4_crc_val_v >> 8) & 0xff;
+ ps_hash_sei_params->au1_sei_hash[2][1] = u4_crc_val_v & 0xff;
+ }
+}
+
+/**
+******************************************************************************
+*
+* @brief Populates Hash SEI values for Checksum Hash
+*
+* @par Description
+*
+* @param[out] ps_hash_sei_params
+* pointer to hash sei params that needs to be populated
+*
+* @param[in] bit_depth
+* i4_internal_bit_depth value. Assume same for Luma & Chroma
+*
+* @param[in] pv_y_buf
+* Pointer to decoded/recon Luma buffer
+*
+* @param[in] y_wd
+* pic width in luma samples
+*
+* @param[in] y_ht
+* pic height in luma samples
+*
+* @param[in] y_strd
+* Stride of luma buffer
+*
+* @param[in] pv_u_buf
+* Pointer to decoded/recon Chroma buffer
+*
+* @param[in] uv_wd
+* pic width in luma samples / SubWidthC
+*
+* @param[in] uv_ht
+* pic height in luma samples / SubHeightC
+*
+* @param[in] uv_strd
+* Stride of chroma buffer
+*
+* @return None
+*
+*****************************************************************************
+*/
+static void ihevce_calc_checksum(
+ hash_sei_param_t *ps_hash_sei_params,
+ WORD32 bit_depth,
+ void *pv_y_buf,
+ WORD32 y_wd,
+ WORD32 y_ht,
+ WORD32 y_strd,
+ void *pv_u_buf,
+ WORD32 uv_wd,
+ WORD32 uv_ht,
+ WORD32 uv_strd,
+ WORD32 i4_frame_pos_x,
+ WORD32 i4_frame_pos_y)
+{
+ WORD32 x, y;
+ UWORD8 *pu_buf;
+ WORD32 row, col;
+ UWORD32 u4_xor_mask;
+ UWORD32 gt8bit_mul = 1;
+
+ if(bit_depth > 8)
+ gt8bit_mul++;
+
+ /* Luma Checksum */
+ {
+ UWORD32 u4_sum_luma = 0;
+
+ pu_buf = (UWORD8 *)pv_y_buf;
+
+ for(y = i4_frame_pos_y, row = 0; row < y_ht; y++, row++)
+ {
+ for(x = i4_frame_pos_x, col = 0; col < y_wd; x++, col++)
+ {
+ // take checksum of first pictureData byte
+ u4_xor_mask = (x & 0xFF) ^ (y & 0xFF) ^ (x >> 8) ^ (y >> 8);
+ u4_sum_luma =
+ (u4_sum_luma + ((pu_buf[(row * y_strd + col) * gt8bit_mul]) ^ u4_xor_mask)) &
+ 0xFFFFFFFF;
+
+ // take checksum of second pictureData byte if bit depth is greater than 8-bits
+ if(bit_depth > 8)
+ {
+ u4_sum_luma = (u4_sum_luma + ((pu_buf[(row * y_strd + col) * gt8bit_mul + 1]) ^
+ u4_xor_mask)) &
+ 0xFFFFFFFF;
+ }
+ }
+ }
+
+ ps_hash_sei_params->au1_sei_hash[0][0] = (u4_sum_luma >> 24) & 0xff;
+ ps_hash_sei_params->au1_sei_hash[0][1] = (u4_sum_luma >> 16) & 0xff;
+ ps_hash_sei_params->au1_sei_hash[0][2] = (u4_sum_luma >> 8) & 0xff;
+ ps_hash_sei_params->au1_sei_hash[0][3] = (u4_sum_luma)&0xff;
+ }
+
+ /* Cb & Cr checksum */
+ {
+ UWORD32 u4_sum_cb = 0, u4_sum_cr = 0;
+ pu_buf = (UWORD8 *)pv_u_buf;
+
+ for(y = (i4_frame_pos_y / 2), row = 0; row < uv_ht; y++, row++)
+ {
+ for(x = (i4_frame_pos_x / 2), col = 0; col < uv_wd; x++, col += 2)
+ {
+ // take checksum of first pictureData byte
+ u4_xor_mask = (x & 0xFF) ^ (y & 0xFF) ^ (x >> 8) ^ (y >> 8);
+ u4_sum_cb =
+ (u4_sum_cb + ((pu_buf[(row * uv_strd + (col)) * gt8bit_mul]) ^ u4_xor_mask)) &
+ 0xFFFFFFFF;
+ u4_sum_cr = (u4_sum_cr +
+ ((pu_buf[(row * uv_strd + (col + 1)) * gt8bit_mul]) ^ u4_xor_mask)) &
+ 0xFFFFFFFF;
+
+ // take checksum of second pictureData byte if bit depth is greater than 8-bits
+ if(bit_depth > 8)
+ {
+ u4_sum_cb = (u4_sum_cb + ((pu_buf[(row * uv_strd + (col)) * gt8bit_mul + 1]) ^
+ u4_xor_mask)) &
+ 0xFFFFFFFF;
+ u4_sum_cr =
+ (u4_sum_cr +
+ ((pu_buf[(row * uv_strd + (col + 1)) * gt8bit_mul + 1]) ^ u4_xor_mask)) &
+ 0xFFFFFFFF;
+ }
+ }
+ }
+
+ ps_hash_sei_params->au1_sei_hash[1][0] = (u4_sum_cb >> 24) & 0xff;
+ ps_hash_sei_params->au1_sei_hash[1][1] = (u4_sum_cb >> 16) & 0xff;
+ ps_hash_sei_params->au1_sei_hash[1][2] = (u4_sum_cb >> 8) & 0xff;
+ ps_hash_sei_params->au1_sei_hash[1][3] = (u4_sum_cb)&0xff;
+
+ ps_hash_sei_params->au1_sei_hash[2][0] = (u4_sum_cr >> 24) & 0xff;
+ ps_hash_sei_params->au1_sei_hash[2][1] = (u4_sum_cr >> 16) & 0xff;
+ ps_hash_sei_params->au1_sei_hash[2][2] = (u4_sum_cr >> 8) & 0xff;
+ ps_hash_sei_params->au1_sei_hash[2][3] = (u4_sum_cr)&0xff;
+ }
+}
+
+/**
+******************************************************************************
+*
+* @brief Populates Hash SEI values
+*
+* @par Description
+*
+* @param[out] ps_sei
+* pointer to sei params that needs to be populated
+*
+* @param[in] bit_depth
+* i4_internal_bit_depth value. Assume same for Luma & Chroma
+*
+* @param[in] pv_y_buf
+* Pointer to decoded/recon Luma buffer
+*
+* @param[in] y_wd
+* pic width in luma samples
+*
+* @param[in] y_ht
+* pic height in luma samples
+*
+* @param[in] y_strd
+* Stride of luma buffer
+*
+* @param[in] pv_u_buf
+* Pointer to decoded/recon Chroma buffer
+*
+* @param[in] uv_wd
+* pic width in luma samples / SubWidthC
+*
+* @param[in] uv_ht
+* pic height in luma samples / SubHeightC
+*
+* @param[in] uv_strd
+* Stride of chroma buffer
+*
+* @return success or failure error code
+*
+*****************************************************************************
+*/
+WORD32 ihevce_populate_hash_sei(
+ sei_params_t *ps_sei,
+ WORD32 bit_depth,
+ void *pv_y_buf,
+ WORD32 y_wd,
+ WORD32 y_ht,
+ WORD32 y_strd,
+ void *pv_u_buf,
+ WORD32 uv_wd,
+ WORD32 uv_ht,
+ WORD32 uv_strd,
+ WORD32 i4_frame_pos_x,
+ WORD32 i4_frame_pos_y)
+{
+ hash_sei_param_t *ps_hash_sei_params = &ps_sei->s_hash_sei_params;
+
+ if(1 == ps_sei->i1_decoded_pic_hash_sei_flag)
+ {
+ ASSERT(0); // Not supported now!
+ }
+ else if(2 == ps_sei->i1_decoded_pic_hash_sei_flag)
+ {
+ /* calculate CRC for entire reconstructed picture */
+ ihevce_calc_CRC(
+ ps_hash_sei_params,
+ bit_depth,
+ pv_y_buf,
+ y_wd,
+ y_ht,
+ y_strd,
+ pv_u_buf,
+ uv_wd,
+ uv_ht,
+ uv_strd);
+ }
+ else if(3 == ps_sei->i1_decoded_pic_hash_sei_flag)
+ {
+ /* calculate Checksum for entire reconstructed picture */
+ ihevce_calc_checksum(
+ ps_hash_sei_params,
+ bit_depth,
+ pv_y_buf,
+ y_wd,
+ y_ht,
+ y_strd,
+ pv_u_buf,
+ uv_wd,
+ uv_ht,
+ uv_strd,
+ i4_frame_pos_x,
+ i4_frame_pos_y);
+ }
+ else
+ {
+ ASSERT(0);
+ }
+
+ return IHEVCE_SUCCESS;
+}
+
+/**
+******************************************************************************
+*
+* @brief Populates vui structure
+*
+* @par Description
+* Populates vui structure for its use in header generation
+*
+* @param[out] ps_vui
+* pointer to vui params that needs to be populated
+*
+* @param[out] ps_sps
+* pointer to sps params referred by vui
+*
+* @param[in] ps_src_params
+* pointer to source config params; resolution, frame rate etc
+*
+* @param[out] ps_vui_sei_prms
+* pointer to sps params referred by application
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_populate_vui(
+ vui_t *ps_vui,
+ sps_t *ps_sps,
+ ihevce_src_params_t *ps_src_params,
+ ihevce_vui_sei_params_t *ps_vui_sei_prms,
+ WORD32 i4_resolution_id,
+ ihevce_tgt_params_t *ps_tgt_params,
+ ihevce_static_cfg_params_t *ps_stat_prms,
+ WORD32 i4_bitrate_instance_id)
+{
+ WORD32 i, j, i4_range_idr, i4_range_cdr;
+ ULWORD64 max_vbv_size;
+
+ ps_vui->u1_aspect_ratio_info_present_flag = ps_vui_sei_prms->u1_aspect_ratio_info_present_flag;
+
+ ps_vui->u1_aspect_ratio_idc = ps_vui_sei_prms->au1_aspect_ratio_idc[i4_resolution_id];
+
+ ps_vui->u2_sar_height = ps_vui_sei_prms->au2_sar_height[i4_resolution_id];
+
+ ps_vui->u2_sar_width = ps_vui_sei_prms->au2_sar_width[i4_resolution_id];
+
+ ps_vui->u1_overscan_info_present_flag = ps_vui_sei_prms->u1_overscan_info_present_flag;
+
+ ps_vui->u1_overscan_appropriate_flag = ps_vui_sei_prms->u1_overscan_appropriate_flag;
+
+ ps_vui->u1_video_signal_type_present_flag = ps_vui_sei_prms->u1_video_signal_type_present_flag;
+
+ ps_vui->u1_video_format = ps_vui_sei_prms->u1_video_format;
+
+ ps_vui->u1_video_full_range_flag = ps_vui_sei_prms->u1_video_full_range_flag;
+
+ ps_vui->u1_colour_description_present_flag =
+ ps_vui_sei_prms->u1_colour_description_present_flag;
+
+ ps_vui->u1_colour_primaries = ps_vui_sei_prms->u1_colour_primaries;
+
+ ps_vui->u1_transfer_characteristics = ps_vui_sei_prms->u1_transfer_characteristics;
+
+ ps_vui->u1_matrix_coefficients = ps_vui_sei_prms->u1_matrix_coefficients;
+
+ ps_vui->u1_chroma_loc_info_present_flag = ps_vui_sei_prms->u1_chroma_loc_info_present_flag;
+
+ ps_vui->u1_chroma_sample_loc_type_top_field =
+ ps_vui_sei_prms->u1_chroma_sample_loc_type_top_field;
+
+ ps_vui->u1_chroma_sample_loc_type_bottom_field =
+ ps_vui_sei_prms->u1_chroma_sample_loc_type_bottom_field;
+
+ ps_vui->u1_neutral_chroma_indication_flag = 0;
+
+ ps_vui->u1_default_display_window_flag = 0;
+
+ /* Default Values for display offset added */
+ if(ps_vui->u1_default_display_window_flag)
+ {
+ ps_vui->u4_def_disp_win_bottom_offset = 0;
+
+ ps_vui->u4_def_disp_win_left_offset = 0;
+
+ ps_vui->u4_def_disp_win_right_offset = 0;
+
+ ps_vui->u4_def_disp_win_top_offset = 0;
+ }
+
+ ps_vui->u1_vui_hrd_parameters_present_flag =
+ ps_vui_sei_prms->u1_vui_hrd_parameters_present_flag;
+
+ ps_vui->u1_field_seq_flag = ps_src_params->i4_field_pic;
+
+ ps_vui->u1_frame_field_info_present_flag = 1;
+
+ ps_vui->u1_vui_timing_info_present_flag = ps_vui_sei_prms->u1_timing_info_present_flag;
+
+ //if(ps_vui->u1_vui_timing_info_present_flag)
+ {
+ /* NumUnits in tick is same as the frame rate denominator assuming delta poc as 1 */
+ ps_vui->u4_vui_num_units_in_tick = ps_src_params->i4_frm_rate_denom;
+
+ /* TimeScale is the same as the frame rate numerator assuming delta poc as 1 */
+ ps_vui->u4_vui_time_scale =
+ (ps_src_params->i4_frm_rate_num / ps_tgt_params->i4_frm_rate_scale_factor);
+ }
+
+ ps_vui->u1_poc_proportional_to_timing_flag = 1;
+
+ if(ps_vui->u1_poc_proportional_to_timing_flag && ps_vui->u1_vui_timing_info_present_flag)
+ ps_vui->u4_num_ticks_poc_diff_one_minus1 = 0;
+
+ //if (ps_vui->u1_vui_hrd_parameters_present_flag)
+ {
+ ps_vui->s_vui_hrd_parameters.u1_initial_cpb_removal_delay_length_minus1 = 23;
+ ps_vui->s_vui_hrd_parameters.u1_au_cpb_removal_delay_length_minus1 = 23; /* Default value */
+
+ ps_vui->s_vui_hrd_parameters.u1_dpb_output_delay_length_minus1 =
+ 4; // max num of B pics are 7. So the max delay can go up to 5 and a maximun 10 is allowed for initial removal dalay.
+
+ ps_vui->s_vui_hrd_parameters.u1_nal_hrd_parameters_present_flag =
+ ps_vui_sei_prms->u1_nal_hrd_parameters_present_flag;
+
+ ps_vui->s_vui_hrd_parameters.u1_vcl_hrd_parameters_present_flag =
+ 0; //ps_vui_sei_prms->u1_vcl_hrd_parameters_present_flag;
+ ps_vui->s_vui_hrd_parameters.u1_sub_pic_cpb_params_present_flag = 0;
+
+ if(ps_vui->s_vui_hrd_parameters.u1_nal_hrd_parameters_present_flag ||
+ ps_vui->s_vui_hrd_parameters.u1_vcl_hrd_parameters_present_flag)
+ {
+ /* Initialize u1_au_cpb_removal_delay_length_minus1 based on configured intra periods */
+ ps_vui->s_vui_hrd_parameters.u1_au_cpb_removal_delay_length_minus1 =
+ 8; /* Default value when HRD params are enabled */
+ if(ps_stat_prms->s_coding_tools_prms.i4_max_cra_open_gop_period ||
+ ps_stat_prms->s_coding_tools_prms.i4_max_closed_gop_period)
+ {
+ GETRANGE(
+ i4_range_cdr, ps_stat_prms->s_coding_tools_prms.i4_max_cra_open_gop_period);
+
+ GETRANGE(i4_range_idr, ps_stat_prms->s_coding_tools_prms.i4_max_closed_gop_period);
+
+ ps_vui->s_vui_hrd_parameters.u1_au_cpb_removal_delay_length_minus1 =
+ MAX(i4_range_cdr, i4_range_idr);
+ }
+ /*BLU_RAY Default set to 0 */
+ ps_vui->s_vui_hrd_parameters.u1_sub_pic_cpb_params_present_flag = 0;
+ if(ps_vui->s_vui_hrd_parameters.u1_sub_pic_cpb_params_present_flag)
+ {
+ ps_vui->s_vui_hrd_parameters.u1_tick_divisor_minus2 = 1;
+ ps_vui->s_vui_hrd_parameters.u1_du_cpb_removal_delay_increment_length_minus1 = 23;
+ ps_vui->s_vui_hrd_parameters.u1_sub_pic_cpb_params_in_pic_timing_sei_flag = 1;
+ ps_vui->s_vui_hrd_parameters.u1_dpb_output_delay_du_length_minus1 = 0;
+ }
+ }
+
+ ps_vui->s_vui_hrd_parameters.u4_bit_rate_scale = VUI_BIT_RATE_SCALE;
+ ps_vui->s_vui_hrd_parameters.u4_cpb_size_scale = VUI_CPB_SIZE_SCALE;
+ if(ps_vui->s_vui_hrd_parameters.u1_sub_pic_cpb_params_present_flag)
+ {
+ ps_vui->s_vui_hrd_parameters.u4_cpb_size_du_scale = 0;
+ }
+
+ for(i = 0; i <= (ps_sps->i1_sps_max_sub_layers - 1); i++)
+ {
+ ps_vui->s_vui_hrd_parameters.au1_fixed_pic_rate_general_flag[i] =
+ 1; /*BLU_RAY specific change already done */
+ ps_vui->s_vui_hrd_parameters.au1_fixed_pic_rate_within_cvs_flag[i] = 1;
+ ps_vui->s_vui_hrd_parameters.au2_elemental_duration_in_tc_minus1[i] = 0;
+
+ /*BLU_RAY low_delay_hrd_flag is always set to 0*/
+ ps_vui->s_vui_hrd_parameters.au1_low_delay_hrd_flag[i] = 0;
+
+ /************************************************************************/
+ /* cpb_cnt_minus1 is set to zero because we assume that the decoder */
+ /* can work with just one CPB specification */
+ /************************************************************************/
+ ps_vui->s_vui_hrd_parameters.au1_cpb_cnt_minus1[i] = 0;
+
+ max_vbv_size = ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id]
+ .ai4_max_vbv_buffer_size[i4_bitrate_instance_id];
+ for(j = 0; j <= ps_vui->s_vui_hrd_parameters.au1_cpb_cnt_minus1[i]; j++)
+ {
+ ULWORD64 u8_bit_rate_val =
+ (ULWORD64)ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id]
+ .ai4_tgt_bitrate[i4_bitrate_instance_id];
+ ULWORD64 u8_max_cpb_size;
+ if((ps_stat_prms->s_config_prms.i4_rate_control_mode == 2) ||
+ (ps_stat_prms->s_config_prms.i4_rate_control_mode ==
+ 1)) // VBR/Capped VBR rate control mode
+ u8_bit_rate_val =
+ (ULWORD64)(ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id]
+ .ai4_peak_bitrate[i4_bitrate_instance_id]);
+ u8_max_cpb_size =
+ max_vbv_size; //((ULWORD64)(max_vbv_size * u8_bit_rate_val)/1000);
+
+ if(3 == ps_stat_prms->s_config_prms.i4_rate_control_mode)
+ {
+ /* For CQP mode, assume Level specified max rate and buffer size */
+ WORD32 codec_level_index = ihevce_get_level_index(
+ ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_codec_level);
+ WORD32 codec_tier = ps_stat_prms->s_out_strm_prms.i4_codec_tier;
+
+ /* Bitrate as per level and tier limits */
+ u8_bit_rate_val =
+ g_as_level_data[codec_level_index].i4_max_bit_rate[codec_tier];
+ u8_max_cpb_size = g_as_level_data[codec_level_index].i4_max_cpb[codec_tier];
+ }
+
+ u8_bit_rate_val >>= (6 + ps_vui->s_vui_hrd_parameters.u4_bit_rate_scale);
+ u8_max_cpb_size >>= (4 + ps_vui->s_vui_hrd_parameters.u4_cpb_size_scale);
+
+ ps_vui->s_vui_hrd_parameters.as_sub_layer_hrd_params[i]
+ .au4_bit_rate_value_minus1[j] = (UWORD32)(u8_bit_rate_val - 1);
+ ps_vui->s_vui_hrd_parameters.as_sub_layer_hrd_params[i]
+ .au4_cpb_size_value_minus1[j] = (UWORD32)(u8_max_cpb_size - 1);
+
+ if(ps_vui->s_vui_hrd_parameters.u1_sub_pic_cpb_params_present_flag)
+ {
+ ps_vui->s_vui_hrd_parameters.as_sub_layer_hrd_params[i]
+ .au4_cpb_size_value_minus1[j] = 0;
+ }
+
+ /************************************************************************/
+ /* CBR flag is set as per the RATE_CONTROL macro */
+ /************************************************************************/
+
+ /* Default cbr falg setting. will discard Decoder buffer overflows ( No stuffing required)*/
+
+ ps_vui->s_vui_hrd_parameters.as_sub_layer_hrd_params[i].au1_cbr_flag[j] = 0;
+ }
+ }
+ }
+
+ ps_vui->u1_bitstream_restriction_flag = 0;
+
+ if(ps_vui->u1_bitstream_restriction_flag)
+ {
+ ps_vui->u1_tiles_fixed_structure_flag = 1;
+
+ ps_vui->u1_motion_vectors_over_pic_boundaries_flag = 1;
+
+ ps_vui->u4_min_spatial_segmentation_idc = 0;
+
+ ps_vui->u1_restricted_ref_pic_lists_flag = 0;
+
+ ps_vui->u1_max_bytes_per_pic_denom = 2;
+
+ ps_vui->u1_max_bits_per_mincu_denom = 1;
+
+ ps_vui->u1_log2_max_mv_length_horizontal = 15;
+
+ ps_vui->u1_log2_max_mv_length_vertical = 15;
+ }
+
+ return IHEVCE_SUCCESS;
+}
diff --git a/encoder/ihevce_encode_header_sei_vui.h b/encoder/ihevce_encode_header_sei_vui.h
new file mode 100644
index 0000000..cb69456
--- /dev/null
+++ b/encoder/ihevce_encode_header_sei_vui.h
@@ -0,0 +1,177 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+*
+* @file ihevce_encode_header_sei_vui.h
+*
+* @brief
+* This file contains structures and interface prototypes for header vui/sei
+* encoding
+*
+* @author
+* ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_ENCODE_HEADER_SEI_VUI_H_
+#define _IHEVCE_ENCODE_HEADER_SEI_VUI_H_
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief Macro to calculate the CRC for a bit index
+******************************************************************************
+ */
+#define CALC_CRC_BIT_LEVEL(u4_crc_val, u1_cur_val, bit_idx) \
+ { \
+ UWORD32 u4_bit_val, u4_crc_msb; \
+ u4_crc_msb = (u4_crc_val >> 15) & 1; \
+ u4_bit_val = (u1_cur_val >> (7 - bit_idx)) & 1; \
+ u4_crc_val = (((u4_crc_val << 1) + u4_bit_val) & 0xffff) ^ (u4_crc_msb * 0x1021); \
+ }
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+typedef enum
+{
+ /* SEI PREFIX */
+ IHEVCE_SEI_BUF_PERIOD_T = 0,
+ IHEVCE_SEI_PIC_TIMING_T,
+ IHEVCE_SEI_PAN_SCAN_RECT_T,
+ IHEVCE_SEI_FILLER_PAYLOAD_T,
+ IHEVCE_SEI_USER_DATA_REGISTERED_ITU_T_T35_T,
+ IHEVCE_SEI_USER_DATA_UNREGISTERED_T,
+ IHEVCE_SEI_RECOVERY_POINT_T = 6,
+ IHEVCE_SEI_SCENE_INFO_T = 9,
+ IHEVCE_SEI_FULL_FRAME_SNAPSHOT_T = 15,
+ IHEVCE_SEI_PROGRESSIVE_REFINEMENT_SEGMENT_START_T = 16,
+ IHEVCE_SEI_PROGRESSIVE_REFINEMENT_SEGMENT_END_T = 17,
+ IHEVCE_SEI_FILM_GRAIN_CHARACTERISTICS_T = 19,
+ IHEVCE_SEI_POST_FILTER_HINT_T = 22,
+ IHEVCE_SEI_TONE_MAPPING_INFO_T = 23,
+ IHEVCE_SEI_FRAME_PACKING_ARRANGEMENT_T = 45,
+ IHEVCE_SEI_DISPLAY_ORIENTATION_T = 47,
+ IHEVCE_SEI_SOP_DESCRIPTION_T = 128,
+ IHEVCE_SEI_ACTIVE_PARAMETER_SETS_T = 129,
+ IHEVCE_SEI_DECODING_UNIT_INFO_T = 130,
+ IHEVCE_SEI_TL0_INDEX_T = 131,
+ IHEVCE_SEI_DECODED_PICTURE_HASH_T = 132, /* SEI SUFFIX */
+ IHEVCE_SEI_SCALABLE_NESTING_T = 133,
+ IHEVCE_SEI_REGION_REFRESH_INFO_T = 134,
+ IHEVCE_SEI_MASTERING_DISP_COL_VOL_T = 137,
+ IHEVCE_SEI_CONTENT_LIGHT_LEVEL_DATA_T = 144,
+
+ /* SIE SUFFIX/PREFIX REST OF THE SEI */
+ IHEVCE_SEI_RESERVED_SEI_MESSAGE_T
+} IHEVCE_SEI_TYPE;
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+WORD32 ihevce_generate_sub_layer_hrd_params(
+ bitstrm_t *ps_bitstrm,
+ sub_lyr_hrd_params_t *ps_sub_lyr_hrd_params,
+ hrd_params_t *ps_hrd_params,
+ WORD32 cpb_cnt_minus1);
+
+WORD32
+ ihevce_generate_hrd_params(bitstrm_t *ps_bitstrm, hrd_params_t *ps_hrd_params, sps_t *ps_sps);
+
+WORD32 ihevce_generate_vui(bitstrm_t *ps_bitstrm, sps_t *ps_sps, vui_t s_vui);
+
+WORD32 ihevce_put_buf_period_sei_params(
+ buf_period_sei_params_t *ps_bp_sei, vui_t *ps_vui_params, bitstrm_t *ps_bitstrm);
+
+WORD32 ihevce_put_active_parameter_set_sei_params(
+ active_parameter_set_sei_param_t *ps_act_sei, bitstrm_t *ps_bitstrm);
+
+WORD32 ihevce_put_recovery_point_sei_params(
+ recovery_point_sei_params_t *ps_rp_sei, bitstrm_t *ps_bitstrm);
+
+WORD32 ihevce_put_pic_timing_sei_params(
+ pic_timing_sei_params_t *ps_pt_sei, vui_t *ps_vui_params, bitstrm_t *ps_bitstrm);
+
+WORD32 ihevce_put_sei_msg(
+ IHEVCE_SEI_TYPE e_payload_type,
+ sei_params_t *ps_sei_params,
+ vui_t *ps_vui_params,
+ bitstrm_t *ps_bitstrm,
+ UWORD32 i4_registered_user_data_length,
+ UWORD8 *pu1_user_data_registered);
+
+WORD32 ihevce_generate_sei(
+ bitstrm_t *ps_bitstrm,
+ sei_params_t *ps_sei_params,
+ vui_t *ps_vui_params,
+ WORD32 insert_per_cra,
+ WORD32 nal_unit_header,
+ UWORD32 u4_num_sei_payloads,
+ sei_payload_t *ps_sei_payload);
+
+WORD32 ihevce_populate_recovery_point_sei(
+ sei_params_t *ps_sei, ihevce_vui_sei_params_t *ps_vui_sei_prms);
+
+WORD32 ihevce_populate_mastering_disp_col_vol_sei(
+ sei_params_t *ps_sei, ihevce_out_strm_params_t *ps_out_strm_prms);
+
+WORD32 ihevce_populate_picture_timing_sei(
+ sei_params_t *ps_sei,
+ vui_t *ps_vui,
+ ihevce_src_params_t *ps_src_params,
+ WORD32 u4_bottom_field_flag);
+
+WORD32 ihevce_populate_buffering_period_sei(
+ sei_params_t *ps_sei, vui_t *ps_vui, sps_t *ps_sps, ihevce_vui_sei_params_t *ps_vui_sei_prms);
+
+WORD32 ihevce_populate_active_parameter_set_sei(sei_params_t *ps_sei, vps_t *ps_vps, sps_t *ps_sps);
+
+WORD32 ihevce_populate_hash_sei(
+ sei_params_t *ps_sei,
+ WORD32 bit_depth,
+ void *pv_y_buf,
+ WORD32 y_wd,
+ WORD32 y_ht,
+ WORD32 y_strd,
+ void *pv_u_buf,
+ WORD32 uv_wd,
+ WORD32 uv_ht,
+ WORD32 uv_strd,
+ WORD32 i4_frame_pos_x,
+ WORD32 i4_frame_pos_y);
+
+WORD32 ihevce_populate_vui(
+ vui_t *ps_vui,
+ sps_t *ps_sps,
+ ihevce_src_params_t *ps_src_params,
+ ihevce_vui_sei_params_t *ps_vui_sei_prms,
+ WORD32 i4_resolution_id,
+ ihevce_tgt_params_t *ps_tgt_params,
+ ihevce_static_cfg_params_t *ps_stat_prms,
+ WORD32 i4_bitrate_instance_id);
+
+#endif // _IHEVCE_ENCODE_HEADER_SEI_VUI_H_
diff --git a/encoder/ihevce_entropy_cod.c b/encoder/ihevce_entropy_cod.c
new file mode 100644
index 0000000..378d4f9
--- /dev/null
+++ b/encoder/ihevce_entropy_cod.c
@@ -0,0 +1,809 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+ ******************************************************************************
+ * \file ihevce_entropy_cod.c
+ *
+ * \brief
+ * This file contains interface function definitions related to Entroy coding
+ *
+ * \date
+ * 18/09/2012
+ *
+ * \author
+ * Ittiam
+ *
+ * List of Functions
+ *
+ *
+ ******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_macros.h"
+#include "ihevc_debug.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+#include "ihevc_trans_tables.h"
+#include "ihevc_trans_macros.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_hle_interface.h"
+#include "ihevce_hle_q_func.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_error_checks.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_entropy_interface.h"
+#include "ihevce_rc_enc_structs.h"
+#include "ihevce_rc_interface.h"
+#include "ihevce_encode_header.h"
+#include "ihevce_encode_header_sei_vui.h"
+#include "ihevce_trace.h"
+
+#include "cast_types.h"
+#include "osal.h"
+#include "osal_defaults.h"
+
+/*****************************************************************************/
+/* Extern variables */
+/*****************************************************************************/
+UWORD8 gau1_pic_type_string[5][11] = {
+ { "I-SLICE " }, { "P-SLICE " }, { "B-SLICE " }, { "IDR-SLICE" }, { "b-SLICE " }
+};
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+#define PSNR_FROM_MSE(x, bit_depth) \
+ ((x == 0) ? 99.999999 : (20 * log10(((1 << bit_depth) - 1) / sqrt(x))))
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*!
+ ******************************************************************************
+ * \if Function name : ihevce_ent_coding_thrd \endif
+ *
+ * \brief
+ * Entropy coding thread interface function
+ *
+ * \param[in] Frame process pointer
+ *
+ * \return
+ * None
+ *
+ * \author
+ * Ittiam
+ *
+ *****************************************************************************
+ */
+WORD32 ihevce_ent_coding_thrd(void *pv_frm_proc_thrd_ctxt)
+{
+ /* local variabels */
+ frm_proc_thrd_ctxt_t *ps_thrd_ctxt;
+ enc_ctxt_t *ps_enc_ctxt;
+ WORD32 i4_thrd_id;
+ ihevce_hle_ctxt_t *ps_hle_ctxt;
+ WORD32 end_flag;
+ WORD32 out_buf_id;
+ WORD32 inp_buf_id;
+ WORD32 entropy_error = 0;
+ WORD32 i4_bitrate_instance_num, i4_resolution_id, i4_out_res_id;
+ WORD32 i4_bufque_id;
+ UWORD32 u4_encode_frm_num = 0;
+ UWORD32 u4_au_cpb_removal_delay_minus1 = 0;
+ WORD32 i4_no_output = 0;
+ WORD32 i4_do_entr_last = 1;
+ WORD32 i4_use_dummy_buffer = 0;
+ void *pv_entropy_hdl;
+ entropy_context_t *ps_entropy_ctxt;
+
+ iv_output_data_buffs_t *ps_curr_out = NULL;
+ frm_proc_ent_cod_ctxt_t *ps_curr_inp = NULL;
+ iv_output_data_buffs_t s_curr_out_dummy;
+
+ /* derive local variables */
+ ps_thrd_ctxt = (frm_proc_thrd_ctxt_t *)pv_frm_proc_thrd_ctxt;
+ i4_thrd_id = ps_thrd_ctxt->i4_thrd_id;
+ ps_hle_ctxt = (ihevce_hle_ctxt_t *)ps_thrd_ctxt->ps_hle_ctxt;
+ ps_enc_ctxt = (enc_ctxt_t *)ps_thrd_ctxt->pv_enc_ctxt;
+ end_flag = 0;
+ i4_bitrate_instance_num = i4_thrd_id;
+ i4_bufque_id = i4_thrd_id;
+ i4_resolution_id = ps_enc_ctxt->i4_resolution_id;
+ i4_out_res_id = i4_resolution_id;
+
+ /*swaping of buf_id for 0th and reference bitrate location, as encoder
+ assumes always 0th loc for reference bitrate and app must receive in
+ the configured order*/
+ i4_bufque_id = i4_bitrate_instance_num;
+ if(i4_bitrate_instance_num == 0)
+ {
+ i4_bufque_id = ps_enc_ctxt->i4_ref_mbr_id;
+ }
+ else if(i4_bitrate_instance_num == ps_enc_ctxt->i4_ref_mbr_id)
+ {
+ i4_bufque_id = 0;
+ }
+
+ if(1 == ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.i4_mres_single_out)
+ {
+ i4_bufque_id = 0;
+ i4_out_res_id = 0;
+ }
+ pv_entropy_hdl = ps_enc_ctxt->s_module_ctxt.apv_ent_cod_ctxt[i4_bitrate_instance_num];
+ ps_entropy_ctxt = (entropy_context_t *)pv_entropy_hdl;
+ /* ---------- Processing Loop until end command is recieved --------- */
+ while(0 == end_flag)
+ {
+ /*Get a buffer pointer*/
+ /* ------- get next input buffer from Frame buffer que ---------- */
+ ps_curr_inp = (frm_proc_ent_cod_ctxt_t *)ihevce_q_get_filled_buff(
+ (void *)ps_enc_ctxt,
+ (IHEVCE_FRM_PRS_ENT_COD_Q + i4_bitrate_instance_num),
+ &inp_buf_id,
+ BUFF_QUE_BLOCKING_MODE);
+ if(1 == ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.i4_mres_single_out)
+ {
+ if(1 == ps_curr_inp->i4_out_flush_flag)
+ {
+ if(1 == ps_enc_ctxt->s_multi_thrd.pi4_active_res_id[i4_resolution_id])
+ ps_enc_ctxt->s_multi_thrd.pi4_active_res_id[i4_resolution_id] = 0;
+ else
+ ASSERT(0);
+ }
+ else
+ {
+ if(0 == ps_enc_ctxt->s_multi_thrd.pi4_active_res_id[i4_resolution_id])
+ {
+ /* During change in resolution check whether prev res is active before starting to dump new resolution */
+ WORD32 other_res_active = 1;
+ WORD32 ctr;
+ volatile WORD32 *pi4_active_res_check;
+ pi4_active_res_check = ps_enc_ctxt->s_multi_thrd.pi4_active_res_id;
+ while(other_res_active)
+ {
+ /* Continue in polling mode untill all the other resolutions are in passive mode */
+ other_res_active = 0;
+ for(ctr = 0;
+ ctr < ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.i4_num_res_layers;
+ ctr++)
+ {
+ if(ctr != i4_resolution_id)
+ {
+ /* Check whether any resolution other than current resolution is active */
+ /* If its active it means that previous resolution has not finished entropy */
+ /* Wait for it to finish entropy*/
+ other_res_active |= pi4_active_res_check[ctr];
+ }
+ }
+ if(1 == ps_curr_inp->i4_end_flag)
+ {
+ i4_no_output = 1;
+ }
+ }
+
+ if(0 == ps_curr_inp->i4_end_flag)
+ {
+ ps_enc_ctxt->s_multi_thrd.pi4_active_res_id[i4_resolution_id] = 1;
+ }
+ }
+ }
+ }
+ if(0 == ps_curr_inp->i4_out_flush_flag)
+ {
+ if(1 == i4_no_output)
+ {
+ ps_curr_out = NULL;
+ }
+ else
+ {
+ if(!i4_use_dummy_buffer)
+ {
+ /* ------- get a filled descriptor from output Que ------------ */
+ ps_curr_out = (iv_output_data_buffs_t *)ihevce_q_get_filled_buff(
+ (void *)ps_enc_ctxt,
+ (IHEVCE_OUTPUT_DATA_Q + i4_bufque_id),
+ &out_buf_id,
+ BUFF_QUE_BLOCKING_MODE);
+ }
+ else
+ {
+ ps_curr_out = &s_curr_out_dummy;
+ out_buf_id = 0;
+ ps_curr_out->i4_bitstream_buf_size = ps_entropy_ctxt->i4_bitstream_buf_size;
+ ps_curr_out->pv_bitstream_bufs = ps_entropy_ctxt->pv_dummy_out_buf;
+ }
+ }
+ }
+
+ PROFILE_START(
+ &ps_hle_ctxt->profile_entropy[ps_enc_ctxt->i4_resolution_id][i4_bitrate_instance_num]);
+ /* Content Light Level Information */
+ {
+ ps_curr_inp->s_sei.i1_sei_cll_enable =
+ (WORD8)ps_enc_ctxt->ps_stat_prms->s_out_strm_prms.i4_sei_cll_enable;
+ ps_curr_inp->s_sei.s_cll_info_sei_params.u2_sei_max_cll =
+ ps_enc_ctxt->ps_stat_prms->s_out_strm_prms.u2_sei_max_cll;
+ ps_curr_inp->s_sei.s_cll_info_sei_params.u2_sei_avg_cll =
+ ps_enc_ctxt->ps_stat_prms->s_out_strm_prms.u2_sei_avg_cll;
+ }
+ if((NULL != ps_curr_out) && (NULL != ps_curr_inp))
+
+ {
+ WORD32 i;
+
+ /*PIC_INFO: reset the pic-level info flags*/
+ ps_curr_inp->s_pic_level_info.i8_total_cu = 0;
+ ps_curr_inp->s_pic_level_info.i8_total_intra_cu = 0;
+ ps_curr_inp->s_pic_level_info.i8_total_inter_cu = 0;
+ ps_curr_inp->s_pic_level_info.i8_total_skip_cu = 0;
+ ps_curr_inp->s_pic_level_info.i8_total_pu = 0;
+ ps_curr_inp->s_pic_level_info.i8_total_intra_pu = 0;
+ ps_curr_inp->s_pic_level_info.i8_total_non_skipped_inter_pu = 0;
+ ps_curr_inp->s_pic_level_info.i8_total_merge_pu = 0;
+ for(i = 0; i < 4; i++)
+ {
+ ps_curr_inp->s_pic_level_info.i8_total_cu_based_on_size[i] = 0;
+ ps_curr_inp->s_pic_level_info.i8_total_2nx2n_intra_pu[i] = 0;
+ ps_curr_inp->s_pic_level_info.i8_total_2nx2n_inter_pu[i] = 0;
+ ps_curr_inp->s_pic_level_info.i8_total_tu_based_on_size[i] = 0;
+ ps_curr_inp->s_pic_level_info.i8_total_smp_inter_pu[i] = 0;
+ if(i != 3)
+ {
+ ps_curr_inp->s_pic_level_info.i8_total_amp_inter_pu[i] = 0;
+ ps_curr_inp->s_pic_level_info.i8_total_nxn_inter_pu[i] = 0;
+ }
+ }
+
+ ps_curr_inp->s_pic_level_info.i8_total_nxn_intra_pu = 0;
+ ps_curr_inp->s_pic_level_info.i8_total_L0_mode = 0;
+ ps_curr_inp->s_pic_level_info.i8_total_L1_mode = 0;
+ ps_curr_inp->s_pic_level_info.i8_total_BI_mode = 0;
+ //ps_curr_inp->s_pic_level_info.u4_frame_intra_sad = ps_enc_ctxt->u4
+ for(i = 0; i < MAX_DPB_SIZE; i++)
+ {
+ ps_curr_inp->s_pic_level_info.i8_total_L0_ref_idx[i] = 0;
+ ps_curr_inp->s_pic_level_info.i8_total_L1_ref_idx[i] = 0;
+ }
+
+ ps_curr_inp->s_pic_level_info.i8_total_tu = 0;
+ ps_curr_inp->s_pic_level_info.i8_total_non_coded_tu = 0;
+ ps_curr_inp->s_pic_level_info.i8_total_intra_coded_tu = 0;
+ ps_curr_inp->s_pic_level_info.i8_total_inter_coded_tu = 0;
+
+ ps_curr_inp->s_pic_level_info.i8_total_qp = 0;
+ ps_curr_inp->s_pic_level_info.i8_total_qp_min_cu = 0;
+ ps_curr_inp->s_pic_level_info.i4_min_qp = 100;
+ ps_curr_inp->s_pic_level_info.i4_max_qp = 0;
+ ps_curr_inp->s_pic_level_info.i4_max_frame_qp = 0;
+
+ ps_curr_inp->s_pic_level_info.i8_sum_squared_frame_qp = 0;
+ ps_curr_inp->s_pic_level_info.i8_total_frame_qp = 0;
+ ps_curr_inp->s_pic_level_info.f_total_buffer_underflow = 0;
+ ps_curr_inp->s_pic_level_info.f_total_buffer_overflow = 0;
+ ps_curr_inp->s_pic_level_info.f_max_buffer_underflow = 0;
+ ps_curr_inp->s_pic_level_info.f_max_buffer_overflow = 0;
+
+ ps_curr_inp->s_pic_level_info.u8_bits_estimated_intra = 0;
+ ps_curr_inp->s_pic_level_info.u8_bits_estimated_inter = 0;
+ ps_curr_inp->s_pic_level_info.u8_bits_estimated_slice_header = 0;
+ ps_curr_inp->s_pic_level_info.u8_bits_estimated_sao = 0;
+ ps_curr_inp->s_pic_level_info.u8_bits_estimated_split_cu_flag = 0;
+ ps_curr_inp->s_pic_level_info.u8_bits_estimated_cu_hdr_bits = 0;
+ ps_curr_inp->s_pic_level_info.u8_bits_estimated_split_tu_flag = 0;
+ ps_curr_inp->s_pic_level_info.u8_bits_estimated_qp_delta_bits = 0;
+ ps_curr_inp->s_pic_level_info.u8_bits_estimated_cbf_luma_bits = 0;
+ ps_curr_inp->s_pic_level_info.u8_bits_estimated_cbf_chroma_bits = 0;
+
+ ps_curr_inp->s_pic_level_info.u8_bits_estimated_res_luma_bits = 0;
+ ps_curr_inp->s_pic_level_info.u8_bits_estimated_res_chroma_bits = 0;
+
+ ps_curr_inp->s_pic_level_info.u8_bits_estimated_ref_id = 0;
+ ps_curr_inp->s_pic_level_info.u8_bits_estimated_mvd = 0;
+ ps_curr_inp->s_pic_level_info.u8_bits_estimated_merge_flag = 0;
+ ps_curr_inp->s_pic_level_info.u8_bits_estimated_mpm_luma = 0;
+ ps_curr_inp->s_pic_level_info.u8_bits_estimated_mpm_chroma = 0;
+
+ if(1 == ps_curr_inp->i4_frm_proc_valid_flag)
+ {
+ /* --- Init of buffering period and pic timing SEI related params ----*/
+ {
+ UWORD32 i4_dbf, i4_buffersize, i4_trgt_bit_rate;
+ if(ps_enc_ctxt->ps_stat_prms->s_config_prms.i4_rate_control_mode != 3)
+ {
+ ihevce_get_dbf_buffer_size(
+ (void *)ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[i4_bitrate_instance_num],
+ &i4_buffersize,
+ &i4_dbf,
+ &i4_trgt_bit_rate);
+ }
+ else
+ {
+ /* Default initializations in CQP mode */
+ WORD32 codec_level =
+ ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[0]
+ .i4_codec_level;
+ WORD32 codec_level_index = ihevce_get_level_index(codec_level);
+
+ i4_buffersize =
+ (UWORD32)g_as_level_data[codec_level_index].i4_max_cpb[HIGH_TIER];
+ i4_trgt_bit_rate =
+ (UWORD32)g_as_level_data[codec_level_index].i4_max_bit_rate[HIGH_TIER];
+ i4_dbf = i4_buffersize;
+ }
+
+ ps_curr_inp->s_sei.s_buf_period_sei_params.u4_buffer_size_sei = i4_buffersize;
+ ps_curr_inp->s_sei.s_buf_period_sei_params.u4_dbf_sei = i4_dbf;
+ ps_curr_inp->s_sei.s_buf_period_sei_params.u4_target_bit_rate_sei =
+ i4_trgt_bit_rate;
+
+ /* ----------------- Derivation of u4_au_cpb_removal_delay_minus1 --------------------------------*/
+ ps_curr_inp->s_sei.s_pic_timing_sei_params.u4_au_cpb_removal_delay_minus1 =
+ u4_au_cpb_removal_delay_minus1;
+
+ /* ----------------- Derivation of u4_pic_dpb_output_delay --------------------------------*/
+ ps_curr_inp->s_sei.s_pic_timing_sei_params.u4_pic_dpb_output_delay =
+ ps_curr_inp->ps_sps->ai1_sps_max_num_reorder_pics[0] +
+ ps_curr_inp->i4_display_num - u4_encode_frm_num;
+ }
+ /* call the core entropy coding entry point function */
+ entropy_error = ihevce_entropy_encode_frame(
+ pv_entropy_hdl, ps_curr_out, ps_curr_inp, ps_curr_out->i4_bitstream_buf_size);
+
+ /* ----------------- Derivation of u4_au_cpb_removal_delay_minus1 --------------------------------*/
+ if(ps_curr_inp->s_sei.i1_buf_period_params_present_flag)
+ {
+ /* Reset u4_au_cpb_removal_delay_minus1 after every buffering period as subsequent pictiming is w.r.t new buffering period SEI */
+ u4_au_cpb_removal_delay_minus1 = 0;
+ }
+ else
+ {
+ /* cpb delay is circularly incremented with wrap around based on max length signalled in VUI */
+ UWORD8 u1_au_cpb_removal_delay_length =
+ ps_curr_inp->ps_sps->s_vui_parameters.s_vui_hrd_parameters
+ .u1_au_cpb_removal_delay_length_minus1 +
+ 1;
+
+ UWORD32 u4_max_cpb_removal_delay_val =
+ (1 << u1_au_cpb_removal_delay_length) - 1;
+
+ u4_au_cpb_removal_delay_minus1 = (u4_au_cpb_removal_delay_minus1 + 1) &
+ u4_max_cpb_removal_delay_val;
+ }
+ /* Debug prints for entropy error */
+ if(entropy_error)
+ {
+ DBG_PRINTF("Entropy encode error %x\n", entropy_error);
+ DEBUG("Entropy encode error %d\n", entropy_error);
+ }
+ if(ps_enc_ctxt->ps_stat_prms->s_config_prms.i4_rate_control_mode != 3)
+ {
+ /* acquire mutex lock for rate control calls */
+ osal_mutex_lock(ps_enc_ctxt->pv_rc_mutex_lock_hdl);
+
+ /* get frame rate/bit rate/max buffer size */
+ ihevce_vbv_compliance_frame_level_update(
+ ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[i4_bitrate_instance_num],
+ (ps_curr_out->i4_bytes_generated << 3),
+ i4_resolution_id,
+ i4_bitrate_instance_num,
+ ps_curr_inp->s_sei.s_pic_timing_sei_params.u4_au_cpb_removal_delay_minus1);
+ /* release mutex lock after rate control calls */
+ osal_mutex_unlock(ps_enc_ctxt->pv_rc_mutex_lock_hdl);
+ }
+
+ /*correct the mismatch between rdopt and entropy thread mismatch*/
+ {
+ /* acquire mutex lock for rate control calls */
+ osal_mutex_lock(ps_enc_ctxt->pv_rc_mutex_lock_hdl);
+
+ /*Set to -1 when no change in bitrate, other-wise set to encoder bufferfullness at that moment*/
+ ps_curr_out->i8_cur_vbv_level = ps_curr_inp->i8_buf_level_bitrate_change;
+ if(ps_curr_inp->i8_buf_level_bitrate_change != -1)
+ {
+ LWORD64 bitrate, buffer_size;
+ ASSERT(
+ i4_bitrate_instance_num ==
+ 0); //since dynamic change in bitrate is not supported in multi bitrate and resolution
+ get_avg_bitrate_bufsize(
+ ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[i4_bitrate_instance_num],
+ &bitrate,
+ &buffer_size);
+
+ change_bitrate_vbv_complaince(
+ ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[i4_bitrate_instance_num],
+ bitrate,
+ buffer_size);
+ /*Change bitrate in SEI-VUI related context as well*/
+ ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms
+ .as_tgt_params[ps_enc_ctxt->i4_resolution_id]
+ .ai4_tgt_bitrate[i4_bitrate_instance_num] = (WORD32)bitrate;
+ ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms
+ .as_tgt_params[ps_enc_ctxt->i4_resolution_id]
+ .ai4_max_vbv_buffer_size[i4_bitrate_instance_num] = (WORD32)buffer_size;
+ }
+ /*account for error to meet bitrate more precisely*/
+ ihevce_rc_rdopt_entropy_bit_correct(
+ ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[i4_bitrate_instance_num],
+ (ps_curr_out->i4_bytes_generated << 3),
+ inp_buf_id); //ps_curr_inp->i4_inp_timestamp_low
+
+ /* release mutex lock after rate control calls */
+ osal_mutex_unlock(ps_enc_ctxt->pv_rc_mutex_lock_hdl);
+ }
+ u4_encode_frm_num++;
+ }
+ else
+ {
+ ps_curr_out->i4_bytes_generated = 0;
+ ps_curr_out->i4_encoded_frame_type = IV_NA_FRAME;
+ }
+
+ ps_curr_out->i4_buf_id = out_buf_id;
+ end_flag = ps_curr_inp->i4_end_flag;
+ ps_curr_out->i4_end_flag = ps_curr_inp->i4_end_flag;
+
+ if(1 == ps_enc_ctxt->s_multi_thrd.i4_force_end_flag)
+ {
+ end_flag = 1;
+ ps_curr_out->i4_end_flag = 1;
+ }
+ if(!i4_use_dummy_buffer)
+ {
+ /* Call back to Apln. saying buffer is produced */
+ ps_hle_ctxt->ihevce_output_strm_fill_done(
+ ps_hle_ctxt->pv_out_cb_handle,
+ ps_curr_out,
+ i4_bufque_id, /* br intance */
+ i4_out_res_id /* res_instance*/);
+ }
+
+ if(ps_curr_inp->i4_frm_proc_valid_flag)
+ {
+ ps_curr_inp->s_pic_level_info.u8_total_bits_generated =
+ ps_curr_out->i4_bytes_generated * 8;
+ }
+
+ /* --- release the current output buffer ---- */
+ if(!i4_use_dummy_buffer)
+ {
+ ihevce_q_rel_buf(
+ (void *)ps_enc_ctxt, (IHEVCE_OUTPUT_DATA_Q + i4_bufque_id), out_buf_id);
+ }
+
+ /* release the input buffer*/
+ ihevce_q_rel_buf(
+ (void *)ps_enc_ctxt,
+ (IHEVCE_FRM_PRS_ENT_COD_Q + i4_bitrate_instance_num),
+ inp_buf_id);
+
+ /* reset the pointers to NULL */
+ ps_curr_inp = NULL;
+ ps_curr_out = NULL;
+ }
+ else
+ {
+ end_flag = ps_curr_inp->i4_end_flag;
+ if(NULL != ps_curr_inp)
+ {
+ /* release the input buffer*/
+ ihevce_q_rel_buf(
+ (void *)ps_enc_ctxt,
+ (IHEVCE_FRM_PRS_ENT_COD_Q + i4_bitrate_instance_num),
+ inp_buf_id);
+ }
+
+ // ASSERT(0);
+ }
+ PROFILE_STOP(
+ &ps_hle_ctxt->profile_entropy[ps_enc_ctxt->i4_resolution_id][i4_bitrate_instance_num],
+ NULL);
+ }
+
+ /* Release all the buffers the application might have queued in */
+ /* Do this only if its not a force end */
+ if(1 != ps_enc_ctxt->s_multi_thrd.i4_force_end_flag)
+ {
+ end_flag = 0;
+ }
+ if(1 == ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.i4_mres_single_out)
+ {
+ ps_enc_ctxt->ps_stat_prms->i4_outbuf_buf_free_control = 1;
+ i4_do_entr_last = ps_enc_ctxt->s_multi_thrd.pi4_active_res_id[i4_resolution_id];
+ }
+ else
+ {
+ i4_do_entr_last = 1;
+ }
+
+ if((1 == i4_do_entr_last) && (!i4_use_dummy_buffer))
+ {
+ while(0 == end_flag)
+ {
+ if(1 == ps_enc_ctxt->ps_stat_prms->i4_outbuf_buf_free_control) //FFMPEG application
+ {
+ /* ------- get a filled descriptor from output Que ------------ */
+ ps_curr_out = (iv_output_data_buffs_t *)ihevce_q_get_filled_buff(
+ (void *)ps_enc_ctxt,
+ (IHEVCE_OUTPUT_DATA_Q + i4_bufque_id),
+ &out_buf_id,
+ BUFF_QUE_NON_BLOCKING_MODE);
+
+ /* Update the end_flag from application */
+ end_flag = (ps_curr_out == NULL);
+ }
+ else if(
+ 0 == ps_enc_ctxt->ps_stat_prms
+ ->i4_outbuf_buf_free_control) // process call control based application
+ {
+ ps_curr_out = (iv_output_data_buffs_t *)ihevce_q_get_filled_buff(
+ (void *)ps_enc_ctxt,
+ (IHEVCE_OUTPUT_DATA_Q + i4_bufque_id),
+ &out_buf_id,
+ BUFF_QUE_BLOCKING_MODE);
+ }
+ else
+ {
+ /* should not enter here */
+ ASSERT(0);
+ }
+
+ if(ps_curr_out)
+ {
+ end_flag = ps_curr_out->i4_is_last_buf;
+
+ /* Fill the min. necessory things */
+ ps_curr_out->i4_process_ret_sts = IV_SUCCESS;
+ ps_curr_out->i4_end_flag = 1;
+ ps_curr_out->i4_bytes_generated = 0;
+
+ /* Call back to Apln. saying buffer is produced */
+ ps_hle_ctxt->ihevce_output_strm_fill_done(
+ ps_hle_ctxt->pv_out_cb_handle,
+ ps_curr_out,
+ i4_bufque_id, /* br intance */
+ i4_out_res_id /* res_instance*/);
+
+ /* --- release the current output buffer ---- */
+ ihevce_q_rel_buf(
+ (void *)ps_enc_ctxt, (IHEVCE_OUTPUT_DATA_Q + i4_bufque_id), out_buf_id);
+ }
+ }
+ }
+ if(1 == ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.i4_mres_single_out)
+ {
+ /* Mres single out usecase: Set active res_id to zero for curretn res so that other resolutions exit entropy */
+ ps_enc_ctxt->s_multi_thrd.pi4_active_res_id[i4_resolution_id] = 0;
+ }
+
+ return (0);
+}
+
+/**
+******************************************************************************
+*
+* @brief Generate sps, pps and vps header for
+*
+* @par Description
+* This function generates nal headers like SPS/PPS/VPS
+*
+* @param[in] ps_hle_ctxt
+* pointer to high level interface context (handle)
+*
+* @param[in] i4_bitrate_instance_id
+* bitrate id
+*
+* @param[in] i4_resolution_id
+* resolution id
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_entropy_encode_header(
+ ihevce_hle_ctxt_t *ps_hle_ctxt, WORD32 i4_bitrate_instance_id, WORD32 i4_resolution_id)
+{
+ WORD32 ret = IHEVCE_SUCCESS;
+ bitstrm_t s_bit_strm;
+ bitstrm_t *ps_bitstrm = &s_bit_strm;
+ enc_ctxt_t *ps_enc_ctxt;
+ ihevce_tgt_layer_params_t *ps_tgt_lyr_prms;
+ sps_t *ps_sps;
+ vps_t *ps_vps;
+ pps_t *ps_pps;
+ iv_output_data_buffs_t *ps_curr_out = NULL;
+ WORD32 out_buf_id;
+
+ /* sanity checks */
+ if((ps_hle_ctxt == NULL) || (ps_hle_ctxt->i4_size != sizeof(ihevce_hle_ctxt_t)) ||
+ ps_hle_ctxt->i4_hle_init_done != 1)
+ return IHEVCE_FAIL;
+
+ ps_tgt_lyr_prms = (ihevce_tgt_layer_params_t *)&ps_hle_ctxt->ps_static_cfg_prms->s_tgt_lyr_prms;
+
+ if(i4_resolution_id >= ps_tgt_lyr_prms->i4_num_res_layers)
+ return IHEVCE_FAIL;
+
+ if(i4_bitrate_instance_id >=
+ ps_tgt_lyr_prms->as_tgt_params[i4_resolution_id].i4_num_bitrate_instances)
+ return IHEVCE_FAIL;
+
+ ps_enc_ctxt = (enc_ctxt_t *)ps_hle_ctxt->apv_enc_hdl[i4_resolution_id];
+ ps_sps = &ps_enc_ctxt->as_sps[i4_bitrate_instance_id];
+ ps_vps = &ps_enc_ctxt->as_vps[i4_bitrate_instance_id];
+ ps_pps = &ps_enc_ctxt->as_pps[i4_bitrate_instance_id];
+
+ /* ------- get a filled descriptor from output Que ------------ */
+ ps_curr_out = (iv_output_data_buffs_t *)ihevce_q_get_filled_buff(
+ (void *)ps_enc_ctxt,
+ (IHEVCE_OUTPUT_DATA_Q + i4_bitrate_instance_id),
+ &out_buf_id,
+ BUFF_QUE_BLOCKING_MODE);
+
+ /* Initialize the bitstream engine */
+ ret |= ihevce_bitstrm_init(
+ ps_bitstrm, (UWORD8 *)ps_curr_out->pv_bitstream_bufs, ps_curr_out->i4_bitstream_buf_size);
+
+ /* Reset Bitstream NAL counter */
+ ps_bitstrm->i4_num_nal = 0;
+
+ /* generate vps */
+ ret |= ihevce_generate_vps(ps_bitstrm, ps_vps);
+
+ /* generate sps */
+ ret |= ihevce_generate_sps(ps_bitstrm, ps_sps);
+
+ /* generate pps */
+ ret |= ihevce_generate_pps(ps_bitstrm, ps_pps);
+
+ /* attach the time stamp of the input to output */
+ ps_curr_out->i4_out_timestamp_low = 0;
+ ps_curr_out->i4_out_timestamp_high = 0;
+
+ /*attach the app frame info of this buffer */
+ ps_curr_out->pv_app_frm_ctxt = NULL;
+
+ /* frame never skipped for now */
+ ps_curr_out->i4_frame_skipped = 0;
+
+ /* update error code and return */
+ ps_curr_out->i4_process_error_code = ret;
+
+ ps_curr_out->i4_bytes_generated = ps_bitstrm->u4_strm_buf_offset;
+
+ /* ------------------- Initialize non-VCL prefix NAL Size/offsets --------------------*/
+ {
+ WORD32 num_non_vcl_prefix_nals = ps_bitstrm->i4_num_nal;
+ WORD32 ctr = 0;
+
+ ASSERT(num_non_vcl_prefix_nals <= MAX_NUM_PREFIX_NALS_PER_AU);
+
+ ps_curr_out->i4_num_non_vcl_prefix_nals = num_non_vcl_prefix_nals;
+ for(ctr = 0; ctr < MIN(num_non_vcl_prefix_nals, MAX_NUM_PREFIX_NALS_PER_AU); ctr++)
+ {
+ /* NAL offset is derive by subtracting Bistream base from NAL start pointer */
+ ULWORD64 u8_cur_nal_start = (ULWORD64)ps_bitstrm->apu1_nal_start[ctr];
+
+#if POPULATE_NAL_SIZE
+
+ /* ----------Populate NAL Size -------------*/
+ if((ctr + 1) < num_non_vcl_prefix_nals)
+ {
+ ULWORD64 u8_next_nal_start = (ULWORD64)ps_bitstrm->apu1_nal_start[ctr + 1];
+ ps_curr_out->ai4_size_non_vcl_prefix_nals[ctr] =
+ (UWORD32)(u8_next_nal_start - u8_cur_nal_start);
+ }
+ else
+ {
+ ULWORD64 u8_next_nal_start =
+ (ULWORD64)ps_bitstrm->pu1_strm_buffer + ps_bitstrm->u4_strm_buf_offset;
+ ps_curr_out->ai4_size_non_vcl_prefix_nals[ctr] =
+ (UWORD32)(u8_next_nal_start - u8_cur_nal_start);
+ }
+ ASSERT(ps_curr_out->ai4_size_non_vcl_prefix_nals[ctr] > 0);
+
+#elif POPULATE_NAL_OFFSET
+
+ /* ----------Populate NAL Offset -------------*/
+
+ ASSERT(u8_cur_nal_start >= u8_bitstream_base);
+ ps_curr_out->ai4_off_non_vcl_prefix_nals[ctr] =
+ (UWORD32)(u8_cur_nal_start - u8_bitstream_base);
+
+ if(ctr)
+ {
+ /* sanity check on increasing NAL offsets */
+ ASSERT(
+ ps_curr_out->ai4_off_non_vcl_prefix_nals[ctr] >
+ ps_curr_out->ai4_off_non_vcl_prefix_nals[ctr - 1]);
+ }
+#endif /* POPULATE_NAL_SIZE */
+ }
+ }
+
+ ps_curr_out->i4_buf_id = out_buf_id;
+ ps_curr_out->i4_end_flag = 0;
+ if(IHEVCE_SUCCESS == ret)
+ {
+ ps_curr_out->i4_process_ret_sts = IV_SUCCESS;
+ }
+ else
+ {
+ ps_curr_out->i4_process_ret_sts = IV_FAIL;
+ }
+ ps_curr_out->i4_encoded_frame_type = IV_NA_FRAME;
+
+ /* Call back to Apln. saying buffer is produced */
+ ps_hle_ctxt->ihevce_output_strm_fill_done(
+ ps_hle_ctxt->pv_out_cb_handle, ps_curr_out, i4_bitrate_instance_id, i4_resolution_id);
+
+ /* release the input buffer*/
+ ihevce_q_rel_buf(
+ (void *)ps_enc_ctxt, (IHEVCE_OUTPUT_DATA_Q + i4_bitrate_instance_id), out_buf_id);
+
+ return ret;
+}
diff --git a/encoder/ihevce_entropy_cod.h b/encoder/ihevce_entropy_cod.h
new file mode 100644
index 0000000..e7bf73d
--- /dev/null
+++ b/encoder/ihevce_entropy_cod.h
@@ -0,0 +1,69 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_entropy_cod.h
+*
+* \brief
+* This file contains interface function declarations related to Entroy
+* coding
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_ENTROPY_COD_H_
+#define _IHEVCE_ENTROPY_COD_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+WORD32 ihevce_ent_coding_thrd(void *pv_hle_ctxt);
+
+#endif /* _IHEVCE_ENTROPY_COD_H_ */
diff --git a/encoder/ihevce_entropy_ctxt.h b/encoder/ihevce_entropy_ctxt.h
new file mode 100644
index 0000000..f662879
--- /dev/null
+++ b/encoder/ihevce_entropy_ctxt.h
@@ -0,0 +1,89 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file ihevce_entropy_ctxt.h
+*
+* @brief
+* This file contains structures and interface prototypes for header encoding
+*
+* @author
+* Ittiam
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_ENTROPY_CTXT_H_
+#define _IHEVCE_ENTROPY_CTXT_H_
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief Data for the trace functionality
+******************************************************************************
+ */
+typedef struct
+{
+ /**
+ * pointer to vps_t struct
+ */
+ vps_t *ps_vps;
+
+ /**
+ * pointer to sps_t struct
+ */
+ sps_t *ps_sps;
+
+ /**
+ * pointer to pps_t struct
+ */
+ pps_t *ps_pps;
+
+ /**
+ * pointer to slice_header_t struct
+ */
+ slice_header_t *ps_slice_hdr;
+
+ /**
+ * pointer to ihevce_src_params_t struct
+ */
+ ihevce_src_params_t *ps_src_params;
+
+ /**
+ * pointer to pps_t ihevce_out_strm_params_t
+ */
+ ihevce_out_strm_params_t *ps_out_atrm_params;
+
+ /**
+ * pointer to ihevce_coding_params_t struct
+ */
+ ihevce_coding_params_t *ps_coding_params;
+
+ /**
+ * pointer to ihevce_config_prms_t struct
+ */
+ ihevce_config_prms_t *ps_config_prms;
+
+} entropy_ctxt_t;
+
+#endif //_IHEVCE_ENTROPY_CTXT_H_
diff --git a/encoder/ihevce_entropy_interface.c b/encoder/ihevce_entropy_interface.c
new file mode 100644
index 0000000..729c8e4
--- /dev/null
+++ b/encoder/ihevce_entropy_interface.c
@@ -0,0 +1,837 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+******************************************************************************
+* @file ihevce_entropy_interface.c
+*
+* @brief
+* This file contains function definitions for entropy interface related to
+* memory init and process apis
+*
+* @author
+* Ittiam
+*
+* List of Functions
+* ihevce_entropy_get_num_mem_recs()
+* ihevce_entropy_size_of_out_buffer()
+* ihevce_entropy_get_mem_recs()
+* ihevce_entropy_init()
+* ihevce_entropy_encode_frame()
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_macros.h"
+#include "ihevc_debug.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+#include "ihevc_trans_tables.h"
+#include "ihevc_trans_macros.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_error_checks.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_entropy_interface.h"
+#include "ihevce_encode_header.h"
+#include "ihevce_encode_header_sei_vui.h"
+#include "ihevce_trace.h"
+
+#include "cast_types.h"
+#include "osal.h"
+#include "osal_defaults.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief Number of memory records are returned for entropy module
+*
+* @par Description
+*
+* @return number of memory records
+*
+******************************************************************************
+*/
+WORD32 ihevce_entropy_get_num_mem_recs(void)
+{
+ return (NUM_ENTROPY_MEM_RECS);
+}
+
+/**
+******************************************************************************
+*
+* @brief Estimated bitstream buffer size basing on input dimensions
+*
+* @par Description
+*
+* @return bitstream buffer size
+*
+******************************************************************************
+*/
+WORD32 ihevce_entropy_size_of_out_buffer(frm_proc_ent_cod_ctxt_t *ps_curr_inp)
+{
+ WORD32 i4_size;
+
+ i4_size = (WORD32)(
+ ps_curr_inp->ps_sps->i2_pic_height_in_luma_samples *
+ ps_curr_inp->ps_sps->i2_pic_width_in_luma_samples);
+
+ return (i4_size);
+}
+
+/**
+******************************************************************************
+*
+* @brief Populates Memory requirements of the entropy module
+*
+* @par Description
+*
+* @param[inout] ps_mem_tab
+* pointer to memory descriptors table
+*
+* @param[in] ps_init_prms
+* Create time static parameters
+*
+* @param[in] i4_mem_space
+* memspace in whihc memory request should be done
+*
+* @return number of memory requirements filled
+*
+******************************************************************************
+*/
+WORD32 ihevce_entropy_get_mem_recs(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ WORD32 i4_mem_space,
+ WORD32 i4_resolution_id)
+{
+ /* memories should be requested assuming worst case requirememnts */
+ WORD32 max_width = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
+ WORD32 max_height = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
+ WORD32 max_align_width = ALIGN64(max_width);
+ WORD32 max_align_height = ALIGN64(max_height);
+
+ /* Module context structure */
+ ps_mem_tab[ENTROPY_CTXT].i4_mem_size = sizeof(entropy_context_t);
+ ps_mem_tab[ENTROPY_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+ ps_mem_tab[ENTROPY_CTXT].i4_mem_alignment = 64;
+
+ /* top row cu skip flags (1 bit per 8x8CU) */
+ ps_mem_tab[ENTROPY_TOP_SKIP_FLAGS].i4_mem_size = max_align_width >> 6;
+ ps_mem_tab[ENTROPY_TOP_SKIP_FLAGS].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+ ps_mem_tab[ENTROPY_TOP_SKIP_FLAGS].i4_mem_alignment = 64;
+
+ /* top row CU Depth (1 byte per 8x8CU) */
+ ps_mem_tab[ENTROPY_TOP_CU_DEPTH].i4_mem_size = (max_align_width >> 3);
+ ps_mem_tab[ENTROPY_TOP_CU_DEPTH].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+ ps_mem_tab[ENTROPY_TOP_CU_DEPTH].i4_mem_alignment = 64;
+
+ /* Dummy_buffer to handle first pass MBR case*/
+ ps_mem_tab[ENTROPY_DUMMY_OUT_BUF].i4_mem_size = (max_align_width * max_align_height * 2);
+ ps_mem_tab[ENTROPY_DUMMY_OUT_BUF].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+ ps_mem_tab[ENTROPY_DUMMY_OUT_BUF].i4_mem_alignment = 64;
+
+ return (NUM_ENTROPY_MEM_RECS);
+}
+
+/**
+******************************************************************************
+*
+* @brief Intialization of entropy module
+*
+* @par Description
+* pointers of the memory requests done in ihevce_entropy_get_mem_recs() are
+* used to initialized the entropy module and the handle is returned
+*
+* @param[inout] ps_mem_tab
+* pointer to memory descriptors table
+*
+* @param[in] ps_init_prms
+* Create time static parameters
+*
+* @return
+* Handle of the entropy module returned as void ptr
+*
+******************************************************************************
+*/
+void *ihevce_entropy_init(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ void *pv_tile_params_base,
+ WORD32 i4_res_id)
+{
+ entropy_context_t *ps_entropy_ctxt;
+
+ /* Entropy state structure */
+ ps_entropy_ctxt = (entropy_context_t *)ps_mem_tab[ENTROPY_CTXT].pv_base;
+ memset(ps_entropy_ctxt, 0, sizeof(entropy_context_t));
+
+ ps_entropy_ctxt->pu1_skip_cu_top = (UWORD8 *)ps_mem_tab[ENTROPY_TOP_SKIP_FLAGS].pv_base;
+ ps_entropy_ctxt->pu1_cu_depth_top = (UWORD8 *)ps_mem_tab[ENTROPY_TOP_CU_DEPTH].pv_base;
+ ps_entropy_ctxt->pv_dummy_out_buf = ps_mem_tab[ENTROPY_DUMMY_OUT_BUF].pv_base;
+ ps_entropy_ctxt->i4_bitstream_buf_size = ps_mem_tab[ENTROPY_DUMMY_OUT_BUF].i4_mem_size;
+
+ /* perform all one time initialisation here */
+ /*************************************************************************/
+ /* Note pu1_cbf_cb, pu1_cbf_cr initialization are done with array idx 1 */
+ /* This is because these flags are accessed as pu1_cbf_cb[tfr_depth - 1] */
+ /* without cheking for tfr_depth= 0 */
+ /*************************************************************************/
+ ps_entropy_ctxt->apu1_cbf_cb[0] = &ps_entropy_ctxt->au1_cbf_cb[0][1];
+ ps_entropy_ctxt->apu1_cbf_cr[0] = &ps_entropy_ctxt->au1_cbf_cr[0][1];
+ ps_entropy_ctxt->apu1_cbf_cb[1] = &ps_entropy_ctxt->au1_cbf_cb[1][1];
+ ps_entropy_ctxt->apu1_cbf_cr[1] = &ps_entropy_ctxt->au1_cbf_cr[1][1];
+
+ memset(ps_entropy_ctxt->au1_cbf_cb, 0, (MAX_TFR_DEPTH + 1) * 2 * sizeof(UWORD8));
+
+ /* register codec level */
+ ps_entropy_ctxt->i4_codec_level =
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_res_id].i4_codec_level;
+
+ /* Flag to enable/disable insertion of SPS, VPS & PPS at every CRA frame */
+ ps_entropy_ctxt->i4_sps_at_cdr_enable = ps_init_prms->s_out_strm_prms.i4_sps_at_cdr_enable;
+
+ /* Store Tile params base into entropy context */
+ ps_entropy_ctxt->pv_tile_params_base = pv_tile_params_base;
+
+ ps_entropy_ctxt->pv_sys_api = (void *)&ps_init_prms->s_sys_api;
+
+ ps_entropy_ctxt->i4_slice_segment_mode = ps_init_prms->s_slice_params.i4_slice_segment_mode;
+
+ /* Set slice segment length */
+ if((ps_entropy_ctxt->i4_slice_segment_mode == 1) ||
+ (ps_entropy_ctxt->i4_slice_segment_mode == 2))
+ {
+ ps_entropy_ctxt->i4_slice_segment_max_length =
+ ps_init_prms->s_slice_params.i4_slice_segment_argument;
+ }
+ else
+ {
+ ps_entropy_ctxt->i4_slice_segment_max_length = 0;
+ }
+
+ /* return the handle to caller */
+ return ((void *)ps_entropy_ctxt);
+}
+
+/**
+******************************************************************************
+*
+* @brief entry point for entropy coding of a frame
+*
+* @par Description
+* This function generates nal headers like SPS/PPS/slice header and call the
+* slice data entropy coding function
+*
+* @param[in] ps_enc_ctxt
+* pointer to encoder context (handle)
+*
+* @param[out] ps_curr_out
+* pointer to output data buffer context where bitstream is generated
+*
+* @param[out] ps_curr_inp
+* pointer to entropy input params context
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_entropy_encode_frame(
+ void *pv_entropy_hdl,
+ iv_output_data_buffs_t *ps_curr_out,
+ frm_proc_ent_cod_ctxt_t *ps_curr_inp,
+ WORD32 i4_out_buf_size)
+{
+ WORD32 ret = IHEVCE_SUCCESS;
+ WORD32 tile_ctr, total_tiles = 1;
+ entropy_context_t *ps_entropy_ctxt = (entropy_context_t *)pv_entropy_hdl;
+
+ /* current frame slice type and nal type */
+ WORD32 slice_type = ps_curr_inp->s_slice_hdr.i1_slice_type;
+
+ /* current frame slice type and nal type */
+ WORD32 nal_type = ps_curr_inp->i4_slice_nal_type;
+
+ /* read vps, sps and pps from input params */
+ vps_t *ps_vps = ps_curr_inp->ps_vps;
+ sps_t *ps_sps = ps_curr_inp->ps_sps;
+ pps_t *ps_pps = ps_curr_inp->ps_pps;
+ sei_params_t *ps_sei = &ps_curr_inp->s_sei;
+ ihevce_tile_params_t *ps_tile_params_base;
+ WORD32 out_buf_size = i4_out_buf_size;
+
+ /* Headers are repeated once per IDR. Should be changed to every CRA */
+ WORD32 insert_vps_sps_pps =
+ ((slice_type == ISLICE) &&
+ (((NAL_IDR_N_LP == nal_type) || (NAL_CRA == nal_type)) || (NAL_IDR_W_LP == nal_type)));
+
+ WORD32 insert_per_cra =
+ ((slice_type == ISLICE) &&
+ (((NAL_IDR_N_LP == nal_type) || (NAL_CRA == nal_type)) || (NAL_IDR_W_LP == nal_type)));
+ bitstrm_t *ps_bitstrm = &ps_entropy_ctxt->s_bit_strm;
+
+ ULWORD64 u8_bits_slice_header_prev;
+
+ WORD32 i4_slice_segment_max_length_bckp;
+ WORD32 i4_max_num_slices;
+
+ ihevce_sys_api_t *ps_sys_api = (ihevce_sys_api_t *)ps_entropy_ctxt->pv_sys_api;
+
+#if POPULATE_NAL_OFFSET
+ ULWORD64 u8_bitstream_base = (ULWORD64)ps_curr_out->pv_bitstream_bufs;
+#endif
+ if(0 == ps_entropy_ctxt->i4_sps_at_cdr_enable)
+ {
+ insert_vps_sps_pps =
+ ((slice_type == ISLICE) && ((NAL_IDR_N_LP == nal_type) || (NAL_IDR_W_LP == nal_type)));
+ }
+ /* intialize vps, sps, pps, sei and slice header in entropy context */
+ ps_entropy_ctxt->ps_vps = ps_vps;
+ ps_entropy_ctxt->ps_sps = ps_sps;
+ ps_entropy_ctxt->ps_pps = ps_pps;
+ ps_entropy_ctxt->ps_sei = ps_sei;
+ ps_entropy_ctxt->ps_slice_hdr = &ps_curr_inp->s_slice_hdr;
+ ps_entropy_ctxt->i4_is_cu_cbf_zero = 1;
+
+ ps_entropy_ctxt->ps_pic_level_info = &ps_curr_inp->s_pic_level_info;
+
+ /* intialize the frame level ctb pointer for current slice */
+ ps_entropy_ctxt->ps_frm_ctb = ps_curr_inp->ps_frm_ctb_data;
+
+ /* Initiallizing to indicate the start of frame */
+ ps_entropy_ctxt->i4_next_slice_seg_x = 0;
+ ps_entropy_ctxt->i4_next_slice_seg_y = 0;
+
+ /* enable the residue encode flag */
+ ps_entropy_ctxt->i4_enable_res_encode = 1;
+
+ /* Initialize the bitstream engine */
+ ret |= ihevce_bitstrm_init(ps_bitstrm, (UWORD8 *)ps_curr_out->pv_bitstream_bufs, out_buf_size);
+
+ /* Reset Bitstream NAL counter */
+ ps_bitstrm->i4_num_nal = 0;
+
+ /*PIC INFO: Store the Bits before slice header is encoded*/
+ u8_bits_slice_header_prev = (ps_bitstrm->u4_strm_buf_offset * 8);
+
+ /* generate AUD if enabled from the application */
+ if(1 == ps_curr_inp->i1_aud_present_flag)
+ {
+ UWORD8 u1_pic_type;
+
+ switch(slice_type)
+ {
+ case ISLICE:
+ u1_pic_type = 0;
+ break;
+ case PSLICE:
+ u1_pic_type = 1;
+ break;
+ default:
+ u1_pic_type = 2;
+ break;
+ }
+
+ ret |= ihevce_generate_aud(ps_bitstrm, u1_pic_type);
+ }
+
+ if(insert_vps_sps_pps)
+ {
+ /* generate vps */
+ ret |= ihevce_generate_vps(ps_bitstrm, ps_entropy_ctxt->ps_vps);
+
+ /* generate sps */
+ ret |= ihevce_generate_sps(ps_bitstrm, ps_entropy_ctxt->ps_sps);
+
+ /* generate pps */
+ ret |= ihevce_generate_pps(ps_bitstrm, ps_entropy_ctxt->ps_pps);
+ }
+
+ /* generate sei */
+ if(1 == ps_entropy_ctxt->ps_sei->i1_sei_parameters_present_flag)
+ {
+ WORD32 i4_insert_prefix_sei =
+ ps_entropy_ctxt->ps_sei->i1_buf_period_params_present_flag ||
+ ps_entropy_ctxt->ps_sei->i1_pic_timing_params_present_flag ||
+ ps_entropy_ctxt->ps_sei->i1_recovery_point_params_present_flag ||
+ ps_entropy_ctxt->ps_sei->i4_sei_mastering_disp_colour_vol_params_present_flags ||
+ ps_curr_inp->u4_num_sei_payload || ps_curr_inp->s_sei.i1_sei_cll_enable;
+
+ if(i4_insert_prefix_sei)
+ {
+ ret |= ihevce_generate_sei(
+ ps_bitstrm,
+ ps_entropy_ctxt->ps_sei,
+ &ps_entropy_ctxt->ps_sps->s_vui_parameters,
+ insert_per_cra,
+ NAL_PREFIX_SEI,
+ ps_curr_inp->u4_num_sei_payload,
+ &ps_curr_inp->as_sei_payload[0]);
+ }
+ }
+
+ /*PIC INFO: Populate slice header bits */
+ ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_slice_header +=
+ (ps_bitstrm->u4_strm_buf_offset * 8) - u8_bits_slice_header_prev;
+
+ ps_tile_params_base = (ihevce_tile_params_t *)ps_entropy_ctxt->pv_tile_params_base;
+
+ ps_curr_out->i4_bytes_generated = 0; //Init
+
+ /* ------------------- Initialize non-VCL prefix NAL Size/offsets --------------------*/
+ {
+ WORD32 num_non_vcl_prefix_nals = ps_bitstrm->i4_num_nal;
+ WORD32 ctr = 0;
+
+ ASSERT(num_non_vcl_prefix_nals <= MAX_NUM_PREFIX_NALS_PER_AU);
+
+ ps_curr_out->i4_num_non_vcl_prefix_nals = num_non_vcl_prefix_nals;
+ for(ctr = 0; ctr < MIN(num_non_vcl_prefix_nals, MAX_NUM_PREFIX_NALS_PER_AU); ctr++)
+ {
+ /* NAL offset is derive by subtracting Bistream base from NAL start pointer */
+ ULWORD64 u8_cur_nal_start = (ULWORD64)ps_bitstrm->apu1_nal_start[ctr];
+
+#if POPULATE_NAL_SIZE
+
+ /* ----------Populate NAL Size -------------*/
+ if((ctr + 1) < num_non_vcl_prefix_nals)
+ {
+ ULWORD64 u8_next_nal_start = (ULWORD64)ps_bitstrm->apu1_nal_start[ctr + 1];
+ ps_curr_out->ai4_size_non_vcl_prefix_nals[ctr] =
+ (UWORD32)(u8_next_nal_start - u8_cur_nal_start);
+ }
+ else
+ {
+ ULWORD64 u8_next_nal_start =
+ (ULWORD64)ps_bitstrm->pu1_strm_buffer + ps_bitstrm->u4_strm_buf_offset;
+ ps_curr_out->ai4_size_non_vcl_prefix_nals[ctr] =
+ (UWORD32)(u8_next_nal_start - u8_cur_nal_start);
+ }
+ ASSERT(ps_curr_out->ai4_size_non_vcl_prefix_nals[ctr] > 0);
+
+#elif POPULATE_NAL_OFFSET
+
+ /* ----------Populate NAL Offset -------------*/
+
+ ASSERT(u8_cur_nal_start >= u8_bitstream_base);
+ ps_curr_out->ai4_off_non_vcl_prefix_nals[ctr] =
+ (UWORD32)(u8_cur_nal_start - u8_bitstream_base);
+
+ if(ctr)
+ {
+ /* sanity check on increasing NAL offsets */
+ ASSERT(
+ ps_curr_out->ai4_off_non_vcl_prefix_nals[ctr] >
+ ps_curr_out->ai4_off_non_vcl_prefix_nals[ctr - 1]);
+ }
+#endif /* POPULATE_NAL_SIZE */
+ }
+ }
+
+ total_tiles = ps_tile_params_base->i4_num_tiles;
+
+ /* frame level NUM slices related params initialisations */
+ {
+ WORD32 codec_level_index = ihevce_get_level_index(ps_entropy_ctxt->i4_codec_level);
+
+ i4_max_num_slices = g_as_level_data[codec_level_index].i4_max_slices_per_picture;
+ ps_entropy_ctxt->i4_num_slice_seg = 0;
+ }
+
+ /* back up slice arg length before pic encoding */
+ i4_slice_segment_max_length_bckp = ps_entropy_ctxt->i4_slice_segment_max_length;
+
+ for(tile_ctr = 0; tile_ctr < total_tiles; tile_ctr++)
+ {
+ WORD32 i4_end_of_slice = 0;
+
+ /* Loop over all the slice segments */
+ while(0 == i4_end_of_slice)
+ {
+ WORD32 i4_bytes_generated, i4_slice_header_bits;
+
+ /*PIC INFO: Store the Bits before slice header is encoded*/
+ u8_bits_slice_header_prev = (ps_bitstrm->u4_strm_buf_offset * 8);
+
+ /* generate slice header */
+ ret |= ihevce_generate_slice_header(
+ ps_bitstrm,
+ nal_type,
+ ps_entropy_ctxt->ps_slice_hdr,
+ ps_entropy_ctxt->ps_pps,
+ ps_entropy_ctxt->ps_sps,
+ &ps_entropy_ctxt->s_dup_bit_strm_ent_offset,
+ &ps_entropy_ctxt->s_cabac_ctxt.u4_first_slice_start_offset,
+ (ps_tile_params_base + tile_ctr),
+ ps_entropy_ctxt->i4_next_slice_seg_x,
+ ps_entropy_ctxt->i4_next_slice_seg_y);
+
+ i4_slice_header_bits =
+ (ps_bitstrm->u4_strm_buf_offset * 8) - (WORD32)u8_bits_slice_header_prev;
+
+ /* Update slice segment length with bytes in slice header */
+ if(2 == ps_entropy_ctxt->i4_slice_segment_mode)
+ {
+ ps_entropy_ctxt->i4_slice_seg_len = (i4_slice_header_bits / 8);
+ }
+ else //Initiallize to zero
+ {
+ ps_entropy_ctxt->i4_slice_seg_len = 0;
+ }
+
+ /*PIC INFO: Populate slice header bits */
+ ps_entropy_ctxt->ps_pic_level_info->u8_bits_estimated_slice_header +=
+ i4_slice_header_bits;
+
+ /* check if number of slices generated in is MAX -1 as per codec_level */
+ if(ps_entropy_ctxt->i4_num_slice_seg == (i4_max_num_slices - 1))
+ {
+ /* i4_slice_segment_max_length is set to a huge positive value */
+ /* so that remaining CTBS in the picture gets encoded as a single slice */
+ ps_entropy_ctxt->i4_slice_segment_max_length = 0x7FFFFFFF;
+ }
+
+ /* encode the slice data */
+ ret |= ihevce_encode_slice_data(
+ ps_entropy_ctxt, (ps_tile_params_base + tile_ctr), &i4_end_of_slice);
+
+ /* increment the number of slices generated */
+ ps_entropy_ctxt->i4_num_slice_seg++;
+
+ if(1 == ps_pps->i1_entropy_coding_sync_enabled_flag)
+ {
+ /*after encoding is done each slice offset is available. Enter these offset in slice header*/
+ ihevce_insert_entry_offset_slice_header(
+ &ps_entropy_ctxt->s_dup_bit_strm_ent_offset,
+ ps_entropy_ctxt->ps_slice_hdr,
+ ps_entropy_ctxt->ps_pps,
+ ps_entropy_ctxt->s_cabac_ctxt.u4_first_slice_start_offset);
+ }
+
+ /* compute the bytes generated and return */
+ if(1 == ps_pps->i1_entropy_coding_sync_enabled_flag)
+ {
+ i4_bytes_generated = ps_entropy_ctxt->s_dup_bit_strm_ent_offset.u4_strm_buf_offset;
+ }
+ else
+ {
+ i4_bytes_generated = ps_entropy_ctxt->s_cabac_ctxt.u4_strm_buf_offset;
+ }
+
+ /* Updating bytes generated and Updating strm_buffer pointer */
+ ps_curr_out->i4_bytes_generated += i4_bytes_generated;
+
+ /* Re-Initialize the bitstream engine after each tile or slice */
+ ihevce_bitstrm_init(
+ ps_bitstrm, (ps_bitstrm->pu1_strm_buffer + i4_bytes_generated), out_buf_size);
+ }
+ }
+
+ /* Max slices related warning prints based on last slice status */
+ if(ps_entropy_ctxt->i4_num_slice_seg == i4_max_num_slices)
+ {
+ if(ps_entropy_ctxt->i4_slice_seg_len >= i4_slice_segment_max_length_bckp)
+ {
+ if(1 == ps_entropy_ctxt->i4_slice_segment_mode)
+ {
+ ps_sys_api->ihevce_printf(
+ ps_sys_api->pv_cb_handle,
+ "IHEVCE_WARNING: Last slice contains %d CTBs exceeds %d (Max limit of CTBs "
+ "configured). As per codec_level max number of slices per frame is %d\n",
+ ps_entropy_ctxt->i4_slice_seg_len,
+ i4_slice_segment_max_length_bckp,
+ i4_max_num_slices);
+ }
+ else if(2 == ps_entropy_ctxt->i4_slice_segment_mode)
+ {
+ ps_sys_api->ihevce_printf(
+ ps_sys_api->pv_cb_handle,
+ "IHEVCE_WARNING: Last slice contains %d Bytes exceeds %d (Max limit of Bytes "
+ "configured). As per codec_level max number of slices per frame is %d\n",
+ ps_entropy_ctxt->i4_slice_seg_len,
+ i4_slice_segment_max_length_bckp,
+ i4_max_num_slices);
+ }
+ }
+ }
+
+ /* restore slice arg length after pic encoding */
+ ps_entropy_ctxt->i4_slice_segment_max_length = i4_slice_segment_max_length_bckp;
+
+ /* ---------------------- Initialize VCL NAL Size/offsets ---------------------------*/
+ {
+ WORD32 vcl_start = ps_curr_out->i4_num_non_vcl_prefix_nals;
+ WORD32 num_vcl_nals = ps_bitstrm->i4_num_nal - vcl_start;
+ WORD32 ctr = 0;
+
+ ASSERT(num_vcl_nals > 0);
+ ASSERT(num_vcl_nals <= MAX_NUM_VCL_NALS_PER_AU);
+
+ ps_curr_out->i4_num_vcl_nals = num_vcl_nals;
+ for(ctr = 0; ctr < MIN(num_vcl_nals, MAX_NUM_VCL_NALS_PER_AU); ctr++)
+ {
+ /* NAL offset is derive by subtracting Bistream base from NAL start pointer */
+ ULWORD64 u8_cur_nal_start = (ULWORD64)ps_bitstrm->apu1_nal_start[ctr + vcl_start];
+
+#if POPULATE_NAL_SIZE
+
+ /* ----------Populate NAL Size -------------*/
+ if((ctr + 1) < num_vcl_nals)
+ {
+ ULWORD64 u8_next_nal_start =
+ (ULWORD64)ps_bitstrm->apu1_nal_start[ctr + vcl_start + 1];
+ ps_curr_out->ai4_size_vcl_nals[ctr] =
+ (UWORD32)(u8_next_nal_start - u8_cur_nal_start);
+ }
+ else
+ {
+ ULWORD64 u8_next_nal_start =
+ (ULWORD64)ps_bitstrm->pu1_strm_buffer + ps_bitstrm->u4_strm_buf_offset;
+ ps_curr_out->ai4_size_vcl_nals[ctr] =
+ (UWORD32)(u8_next_nal_start - u8_cur_nal_start);
+ }
+ ASSERT(ps_curr_out->ai4_size_vcl_nals[ctr] > 0);
+
+#elif POPULATE_NAL_OFFSET
+
+ /* ----------Populate NAL Offset -------------*/
+
+ ASSERT(u8_cur_nal_start >= u8_bitstream_base);
+ ps_curr_out->ai4_off_vcl_nals[ctr] = (UWORD32)(u8_cur_nal_start - u8_bitstream_base);
+
+ if(ctr)
+ {
+ /* sanity check on increasing NAL offsets */
+ ASSERT(ps_curr_out->ai4_off_vcl_nals[ctr] > ps_curr_out->ai4_off_vcl_nals[ctr - 1]);
+ }
+#endif /* POPULATE_NAL_SIZE */
+ }
+ }
+
+ /* generate suffix sei */
+ if(1 == ps_entropy_ctxt->ps_sei->i1_sei_parameters_present_flag)
+ {
+ /* Insert hash SEI */
+ if(0 != ps_entropy_ctxt->ps_sei->i1_decoded_pic_hash_sei_flag)
+ {
+ ret |= ihevce_generate_sei(
+ ps_bitstrm,
+ ps_entropy_ctxt->ps_sei,
+ &ps_entropy_ctxt->ps_sps->s_vui_parameters,
+ insert_per_cra,
+ NAL_SUFFIX_SEI,
+ ps_curr_inp->u4_num_sei_payload,
+ &ps_curr_inp->as_sei_payload[0]);
+ }
+
+ /* Updating bytes generated */
+ ps_curr_out->i4_bytes_generated += ps_bitstrm->u4_strm_buf_offset;
+ }
+
+ /* generate end of sequence nal */
+ if((1 == ps_curr_inp->i1_eos_present_flag) && (ps_curr_inp->i4_is_end_of_idr_gop == 1))
+ {
+ ret |= ihevce_generate_eos(ps_bitstrm);
+ /* Updating bytes generated */
+ ps_curr_out->i4_bytes_generated += ps_bitstrm->u4_strm_buf_offset;
+ }
+
+ /* ------------------- Initialize non-VCL suffix NAL Size/offsets -----------------------*/
+ {
+ WORD32 non_vcl_suffix_start =
+ ps_curr_out->i4_num_non_vcl_prefix_nals + ps_curr_out->i4_num_vcl_nals;
+ WORD32 num_non_vcl_suffix_nals = ps_bitstrm->i4_num_nal - non_vcl_suffix_start;
+ WORD32 ctr = 0;
+
+ ASSERT(num_non_vcl_suffix_nals >= 0);
+ ASSERT(num_non_vcl_suffix_nals <= MAX_NUM_SUFFIX_NALS_PER_AU);
+
+ ps_curr_out->i4_num_non_vcl_suffix_nals = num_non_vcl_suffix_nals;
+ for(ctr = 0; ctr < MIN(num_non_vcl_suffix_nals, MAX_NUM_SUFFIX_NALS_PER_AU); ctr++)
+ {
+ /* NAL offset is derive by subtracting Bistream base from NAL start pointer */
+ ULWORD64 u8_cur_nal_start =
+ (ULWORD64)ps_bitstrm->apu1_nal_start[ctr + non_vcl_suffix_start];
+
+#if POPULATE_NAL_SIZE
+
+ /* ----------Populate NAL Size -------------*/
+ if((ctr + 1) < num_non_vcl_suffix_nals)
+ {
+ ULWORD64 u8_next_nal_start =
+ (ULWORD64)ps_bitstrm->apu1_nal_start[ctr + non_vcl_suffix_start + 1];
+ ps_curr_out->ai4_size_non_vcl_suffix_nals[ctr] =
+ (UWORD32)(u8_next_nal_start - u8_cur_nal_start);
+ }
+ else
+ {
+ ULWORD64 u8_next_nal_start =
+ (ULWORD64)ps_bitstrm->pu1_strm_buffer + ps_bitstrm->u4_strm_buf_offset;
+ ps_curr_out->ai4_size_non_vcl_suffix_nals[ctr] =
+ (UWORD32)(u8_next_nal_start - u8_cur_nal_start);
+ }
+ ASSERT(ps_curr_out->ai4_size_non_vcl_suffix_nals[ctr] > 0);
+
+#elif POPULATE_NAL_OFFSET
+
+ /* ----------Populate NAL Offset -------------*/
+
+ ASSERT(u8_cur_nal_start >= u8_bitstream_base);
+ ps_curr_out->ai4_off_non_vcl_suffix_nals[ctr] =
+ (UWORD32)(u8_cur_nal_start - u8_bitstream_base);
+
+ if(ctr)
+ {
+ /* sanity check on increasing NAL offsets */
+ ASSERT(
+ ps_curr_out->ai4_off_non_vcl_suffix_nals[ctr] >
+ ps_curr_out->ai4_off_non_vcl_suffix_nals[ctr - 1]);
+ }
+#endif /* POPULATE_NAL_SIZE */
+ }
+ }
+
+ /*PIC INFO: Populatinf Ref POC, weights and offset*/
+ {
+ WORD32 i;
+ ps_entropy_ctxt->ps_pic_level_info->i1_num_ref_idx_l0_active =
+ ps_entropy_ctxt->ps_slice_hdr->i1_num_ref_idx_l0_active;
+ ps_entropy_ctxt->ps_pic_level_info->i1_num_ref_idx_l1_active =
+ ps_entropy_ctxt->ps_slice_hdr->i1_num_ref_idx_l1_active;
+ for(i = 0; i < ps_entropy_ctxt->ps_slice_hdr->i1_num_ref_idx_l0_active; i++)
+ {
+ ps_entropy_ctxt->ps_pic_level_info->i4_ref_poc_l0[i] =
+ ps_entropy_ctxt->ps_slice_hdr->s_rplm.i4_ref_poc_l0[i];
+ ps_entropy_ctxt->ps_pic_level_info->i1_list_entry_l0[i] =
+ ps_entropy_ctxt->ps_slice_hdr->s_rplm.i1_list_entry_l0[i];
+ ps_entropy_ctxt->ps_pic_level_info->i2_luma_weight_l0[i] =
+ (DOUBLE)ps_entropy_ctxt->ps_slice_hdr->s_wt_ofst.i2_luma_weight_l0[i] /
+ (1 << ps_entropy_ctxt->ps_slice_hdr->s_wt_ofst.i1_luma_log2_weight_denom);
+ ps_entropy_ctxt->ps_pic_level_info->i2_luma_offset_l0[i] =
+ ps_entropy_ctxt->ps_slice_hdr->s_wt_ofst.i2_luma_offset_l0[i];
+ }
+ for(i = 0; i < ps_entropy_ctxt->ps_slice_hdr->i1_num_ref_idx_l1_active; i++)
+ {
+ ps_entropy_ctxt->ps_pic_level_info->i4_ref_poc_l1[i] =
+ ps_entropy_ctxt->ps_slice_hdr->s_rplm.i4_ref_poc_l1[i];
+ ps_entropy_ctxt->ps_pic_level_info->i1_list_entry_l1[i] =
+ ps_entropy_ctxt->ps_slice_hdr->s_rplm.i1_list_entry_l1[i];
+ ps_entropy_ctxt->ps_pic_level_info->i2_luma_weight_l1[i] =
+ (DOUBLE)ps_entropy_ctxt->ps_slice_hdr->s_wt_ofst.i2_luma_weight_l1[i] /
+ (1 << ps_entropy_ctxt->ps_slice_hdr->s_wt_ofst.i1_luma_log2_weight_denom);
+ ps_entropy_ctxt->ps_pic_level_info->i2_luma_offset_l1[i] =
+ ps_entropy_ctxt->ps_slice_hdr->s_wt_ofst.i2_luma_offset_l1[i];
+ }
+ }
+
+ /* attach the time stamp of the input to output */
+ ps_curr_out->i4_out_timestamp_low = ps_curr_inp->i4_inp_timestamp_low;
+
+ ps_curr_out->i4_out_timestamp_high = ps_curr_inp->i4_inp_timestamp_high;
+
+ /*attach the app frame info of this buffer */
+ ps_curr_out->pv_app_frm_ctxt = ps_curr_inp->pv_app_frm_ctxt;
+
+ /* frame never skipped for now */
+ ps_curr_out->i4_frame_skipped = 0;
+
+ /* update error code and return */
+ ps_curr_out->i4_process_error_code = ret;
+
+ switch(slice_type)
+ {
+ case ISLICE:
+ if((nal_type == NAL_IDR_N_LP) || (NAL_IDR_W_LP == nal_type))
+ {
+ ps_curr_out->i4_encoded_frame_type = IV_IDR_FRAME;
+ }
+ else
+ {
+ ps_curr_out->i4_encoded_frame_type = IV_I_FRAME;
+ }
+ break;
+ case PSLICE:
+ ps_curr_out->i4_encoded_frame_type = IV_P_FRAME;
+ break;
+ case BSLICE:
+ ps_curr_out->i4_encoded_frame_type = IV_B_FRAME;
+ break;
+ }
+
+ if(IHEVCE_SUCCESS == ret)
+ {
+ ps_curr_out->i4_process_ret_sts = IV_SUCCESS;
+ }
+ else
+ {
+ ps_curr_out->i4_process_ret_sts = IV_FAIL;
+ }
+
+ return (ret);
+}
diff --git a/encoder/ihevce_entropy_interface.h b/encoder/ihevce_entropy_interface.h
new file mode 100644
index 0000000..1e047c0
--- /dev/null
+++ b/encoder/ihevce_entropy_interface.h
@@ -0,0 +1,94 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_entropy_interface.h
+*
+* \brief
+* This file contains interface defination of HEVC entropy function
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_ENTROPY_INTERFACE_H_
+#define _IHEVCE_ENTROPY_INTERFACE_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/** @brief Enable/Disable NAL size populatation in output buffer */
+#define POPULATE_NAL_SIZE 1
+
+/** @brief Enable/Disable NAL offset populatation in output buffer */
+#define POPULATE_NAL_OFFSET 0
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+WORD32 ihevce_entropy_get_num_mem_recs(void);
+
+WORD32 ihevce_entropy_size_of_out_buffer(frm_proc_ent_cod_ctxt_t *ps_curr_inp);
+
+WORD32 ihevce_entropy_get_mem_recs(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ WORD32 i4_mem_space,
+ WORD32 i4_resolution_id);
+
+void *ihevce_entropy_init(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ void *pv_tile_params_base,
+ WORD32 i4_res_id);
+
+WORD32 ihevce_entropy_encode_frame(
+ void *pv_entropy_hdl,
+ iv_output_data_buffs_t *ps_curr_out,
+ frm_proc_ent_cod_ctxt_t *ps_curr_inp,
+ WORD32 i4_out_buf_size);
+
+#endif /* _IHEVCE_ENTROPY_INTERFACE_H_ */
diff --git a/encoder/ihevce_entropy_structs.h b/encoder/ihevce_entropy_structs.h
new file mode 100644
index 0000000..5e69c25
--- /dev/null
+++ b/encoder/ihevce_entropy_structs.h
@@ -0,0 +1,357 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file ihevce_entropy_structs.h
+*
+* @brief
+* This file contains encoder entropy context related structures and
+* interface prototypes
+*
+* @author
+* Ittiam
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_ENTROPY_STRUCTS_H_
+#define _IHEVCE_ENTROPY_STRUCTS_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+/**
+******************************************************************************
+ * @brief defines maximum transform depth in HEVC (32 to 4)
+******************************************************************************
+ */
+#define MAX_TFR_DEPTH 5
+
+/**
+******************************************************************************
+ * @brief defines maximum qp delta to be coded as truncated unary code
+******************************************************************************
+ */
+#define TU_MAX_QP_DELTA_ABS 5
+
+/**
+******************************************************************************
+ * @brief defines maximum value of context increment used for qp delta encode
+******************************************************************************
+ */
+#define CTXT_MAX_QP_DELTA_ABS 1
+
+/**
+******************************************************************************
+ * @brief header length in the compressed scan coeff buffer of a TU
+******************************************************************************
+ */
+#define COEFF_BUF_HEADER_LEN 4
+
+/**
+******************************************************************************
+ * @brief extracts the "bitpos" bit of a input variable x
+******************************************************************************
+ */
+#define EXTRACT_BIT(val, x, bitpos) \
+ { \
+ val = ((((x) >> (bitpos)) & 0x1)); \
+ }
+
+/**
+******************************************************************************
+ * @brief inserts bit y in "bitpos' position of input varaible x
+******************************************************************************
+ */
+#define INSERT_BIT(x, bitpos, y) ((x) |= ((y) << (bitpos)))
+
+/**
+******************************************************************************
+ * @brief sets n bits starting from "bitpos' position of input varaible x
+******************************************************************************
+ */
+#define SET_BITS(x, bitpos, n) ((x) |= (((1 << (n)) - 1) << (bitpos)))
+
+/**
+******************************************************************************
+ * @brief clears n bits starting from "bitpos' position of input varaible x
+******************************************************************************
+ */
+#define CLEAR_BITS(x, bitpos, n) ((x) &= (~(((1 << (n)) - 1) << (bitpos))))
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief Enumeration for memory records requested by entropy module
+******************************************************************************
+ */
+typedef enum
+{
+ ENTROPY_CTXT = 0,
+ ENTROPY_TOP_SKIP_FLAGS,
+ ENTROPY_TOP_CU_DEPTH,
+ ENTROPY_DUMMY_OUT_BUF,
+
+ /* should always be the last entry */
+ NUM_ENTROPY_MEM_RECS
+
+} IHEVCE_ENTROPY_MEM_TABS_T;
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief Entropy context for encoder
+******************************************************************************
+ */
+typedef struct entropy_context
+{
+ /** cabac engine context */
+ cab_ctxt_t s_cabac_ctxt;
+
+ /** bitstream context */
+ bitstrm_t s_bit_strm;
+
+ /**
+ * duplicate bitstream to generate entry offset
+ * to support entropy sync
+ */
+ bitstrm_t s_dup_bit_strm_ent_offset;
+
+ /** pointer to top row cu skip flags (1 bit per 8x8CU) */
+ UWORD8 *pu1_skip_cu_top;
+
+ /** pointer to top row cu depth buffer (1 byte per 8x8CU) */
+ UWORD8 *pu1_cu_depth_top;
+
+ /** pointer to parent coded block flags based on trasform depth */
+ UWORD8 *apu1_cbf_cb[2];
+
+ /** pointer to parent coded block flags based on trasform depth */
+ UWORD8 *apu1_cbf_cr[2];
+
+ /** left cu skip flags (max of 8) (1 bit per 8x8) */
+ UWORD32 u4_skip_cu_left;
+
+ /** array of left cu skip flags (max of 8) (1 byte per 8x8) */
+ UWORD8 au1_cu_depth_left[8];
+
+ /** scratch array of cb coded block flags for tu recursion */
+ UWORD8 au1_cbf_cb[2][MAX_TFR_DEPTH + 1];
+
+ /** scratch array of cr coded block flags for tu recursion */
+ UWORD8 au1_cbf_cr[2][MAX_TFR_DEPTH + 1];
+
+ /** current ctb x offset w.r.t frame start */
+ WORD32 i4_ctb_x;
+
+ /** current ctb y offset w.r.t frame start */
+ WORD32 i4_ctb_y;
+
+ //These values are never consumed apart from test-bench. Observed on June16 2014.
+ /** current slice first ctb x offset w.r.t frame start */
+ /** current slice first ctb y offset w.r.t frame start */
+ WORD32 i4_ctb_slice_x;
+ WORD32 i4_ctb_slice_y;
+
+ /** Address of first CTB of next slice segment. In ctb unit */
+ WORD32 i4_next_slice_seg_x;
+
+ /** Address of first CTB of next slice segment. In ctb unit */
+ WORD32 i4_next_slice_seg_y;
+
+ /** sracth place holder for cu index of a ctb in context */
+ WORD32 i4_cu_idx;
+
+ /** sracth place holder for tu index of a cu in context */
+ WORD32 i4_tu_idx;
+
+ /** pcm not supported currently; this parameter shall be 0 */
+ WORD8 i1_ctb_num_pcm_blks;
+
+ /** indicates if qp delta is to be coded in trasform unit of a cu */
+ WORD8 i1_encode_qp_delta;
+
+ /** place holder for current qp of a cu */
+ WORD8 i1_cur_qp;
+
+ /** log2ctbsize indicated in SPS */
+ WORD8 i1_log2_ctb_size;
+
+ /**************************************************************************/
+ /* Following are shared structures with the encoder loop */
+ /* entropy context is not the owner of these and hence not allocated here */
+ /**************************************************************************/
+ /** pointer to current vps parameters */
+ vps_t *ps_vps;
+
+ /** pointer to current sps parameters */
+ sps_t *ps_sps;
+
+ /** pointer to current pps parameters */
+ pps_t *ps_pps;
+
+ /** pointer to current sei parameters */
+ sei_params_t *ps_sei;
+
+ /** pointer to current slice header parameters */
+ slice_header_t *ps_slice_hdr;
+
+ /** pointer to frame level ctb structures prepared by main encode loop */
+ ctb_enc_loop_out_t *ps_frm_ctb;
+
+ /**
+ * array to store cu level qp for entire 64x64 ctb
+ */
+ WORD32 ai4_8x8_cu_qp[64];
+
+ /**
+ * flag to check if cbf all tu in a given cu is zero
+ */
+ WORD32 i4_is_cu_cbf_zero;
+
+ /**
+ * flag to enable / disbale residue encoding (used for RD opt bits estimate mode)
+ */
+ WORD32 i4_enable_res_encode;
+
+ /* flag to enable/disable insertion of SPS, VPS, PPS at CRA pictures */
+ WORD32 i4_sps_at_cdr_enable;
+
+ /* quantization group position variables which stores the aligned position */
+ WORD32 i4_qg_pos_x;
+
+ WORD32 i4_qg_pos_y;
+
+ void *pv_tile_params_base;
+
+ s_pic_level_acc_info_t *ps_pic_level_info;
+
+ void *pv_sys_api;
+
+ /* Flag to control dependent slices.
+ 0: Disable all slice segment limits
+ 1: Enforce max number of CTBs (not supported)
+ 2: Enforce max number of bytes */
+ WORD32 i4_slice_segment_mode;
+
+ /* Max number of CTBs/bytes in encoded slice. Will be used only when
+ i4_slice_mode_enable is set to 1 or 2 in configuration file. This parameter is
+ used for limiting the size of encoded slice under user-configured value */
+ WORD32 i4_slice_segment_max_length;
+
+ /* Accumulated number of CTBs/bytes in current slice */
+ WORD32 i4_slice_seg_len;
+
+ /** Number of slice segments generated per picture
+ this parameter is to track the number of slices generated
+ and comapre aganist MAX NUM VCL Nals allowed at a given level */
+ WORD32 i4_num_slice_seg;
+
+ /** Codec Level */
+ WORD32 i4_codec_level;
+
+ /**
+ * number of neigbour cus coded as skips; Cannot exceed 2 (1 left, 1 top)
+ */
+ WORD32 i4_num_nbr_skip_cus;
+
+ void *pv_dummy_out_buf;
+
+ WORD32 i4_bitstream_buf_size;
+} entropy_context_t;
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+WORD32 ihevce_encode_transform_tree(
+ entropy_context_t *ps_entropy_ctxt,
+ WORD32 x0_ctb,
+ WORD32 y0_ctb,
+ WORD32 log2_tr_size,
+ WORD32 tr_depth,
+ WORD32 blk_num,
+ cu_enc_loop_out_t *ps_enc_cu);
+
+WORD32 ihevce_cabac_residue_encode(
+ entropy_context_t *ps_entropy_ctxt, void *pv_coeff, WORD32 log2_tr_size, WORD32 is_luma);
+
+WORD32 ihevce_cabac_residue_encode_rdopt(
+ entropy_context_t *ps_entropy_ctxt,
+ void *pv_coeff,
+ WORD32 log2_tr_size,
+ WORD32 is_luma,
+ WORD32 perform_rdoq);
+
+WORD32 ihevce_cabac_residue_encode_rdoq(
+ entropy_context_t *ps_entropy_ctxt,
+ void *pv_coeff,
+ WORD32 log2_tr_size,
+ WORD32 is_luma,
+ void *ps_rdoq_ctxt_1,
+ LWORD64 *pi8_tu_coded_dist,
+ LWORD64 *pi8_not_coded_dist,
+ WORD32 perform_sbh);
+
+WORD32 ihevce_find_new_last_csb(
+ WORD32 *pi4_subBlock2csbfId_map,
+ WORD32 cur_last_csb_pos,
+ void *pv_rdoq_ctxt,
+ UWORD8 *pu1_trans_table,
+ UWORD8 *pu1_csb_table,
+ WORD16 *pi2_coeffs,
+ WORD32 shift_value,
+ WORD32 mask_value,
+ UWORD8 **ppu1_addr);
+
+WORD32 ihevce_code_all_sig_coeffs_as_0_explicitly(
+ void *pv_rdoq_ctxt,
+ WORD32 i,
+ UWORD8 *pu1_trans_table,
+ WORD32 is_luma,
+ WORD32 scan_type,
+ WORD32 infer_coeff,
+ WORD32 nbr_csbf,
+ cab_ctxt_t *ps_cabac);
+
+void ihevce_copy_backup_ctxt(
+ void *pv_dest, void *pv_src, void *pv_backup_ctxt_dest, void *pv_backup_ctxt_src);
+
+WORD32 ihevce_cabac_encode_coding_unit(
+ entropy_context_t *ps_entropy_ctxt,
+ cu_enc_loop_out_t *ps_enc_cu,
+ WORD32 cu_depth,
+ WORD32 top_avail,
+ WORD32 left_avail);
+
+WORD32 ihevce_encode_slice_data(
+ entropy_context_t *ps_entropy_ctxt,
+ ihevce_tile_params_t *ps_tile_params,
+ WORD32 *pi4_end_of_slice_flag);
+
+WORD32 ihevce_cabac_encode_sao(
+ entropy_context_t *ps_entropy_ctxt, ctb_enc_loop_out_t *ps_ctb_enc_loop_out);
+
+#endif /* _IHEVCE_ENTROPY_STRUCTS_H_ */
diff --git a/encoder/ihevce_error_check.c b/encoder/ihevce_error_check.c
new file mode 100644
index 0000000..0652858
--- /dev/null
+++ b/encoder/ihevce_error_check.c
@@ -0,0 +1,1600 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+******************************************************************************
+* \file ihevce_error_checks.c
+*
+* \brief
+* This file contains all the functions which checks the validity of the
+* parameters passed to the encoder.
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+* List of Functions
+* ihevce_get_level_index()
+* ihevce_hle_validate_static_params()
+* ihevce_validate_tile_config_params()
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_macros.h"
+#include "ihevc_debug.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+#include "ihevc_trans_tables.h"
+#include "ihevc_trans_macros.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_hle_interface.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_error_checks.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_trace.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_validate_tile_config_params \endif
+*
+* \brief
+* This function validates the static parameters related to tiles
+*
+* \param[in] Encoder static config prms pointer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32 ihevce_validate_tile_config_params(ihevce_static_cfg_params_t *ps_static_cfg_prms)
+{
+ WORD32 error_code = IHEVCE_SUCCESS;
+ ihevce_sys_api_t *ps_sys_api = &ps_static_cfg_prms->s_sys_api;
+ void *pv_cb_handle = ps_sys_api->pv_cb_handle;
+
+ /* As of now tiles are not supported */
+ if(ps_static_cfg_prms->s_app_tile_params.i4_tiles_enabled_flag != 0)
+ {
+ error_code = IHEVCE_BAD_TILE_CONFIGURATION;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: i4_tiles_enabled_flag should be set to 0 \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ return error_code;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_hle_validate_static_params \endif
+*
+* \brief
+* This function validates the static parameters before creating the encoder
+* instance.
+*
+* \param[in] Encoder context pointer
+*
+* \return
+* Error code
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32 ihevce_hle_validate_static_params(ihevce_static_cfg_params_t *ps_static_cfg_prms)
+{
+ WORD32 error_code;
+ WORD32 i4_resolution_id;
+ WORD32 ai4_num_bitrate_instances[IHEVCE_MAX_NUM_RESOLUTIONS] = { 1 };
+ WORD32 i4_num_resolutions;
+ ihevce_sys_api_t *ps_sys_api = &ps_static_cfg_prms->s_sys_api;
+ void *pv_cb_handle = ps_sys_api->pv_cb_handle;
+
+ /* derive local variables */
+ i4_num_resolutions = ps_static_cfg_prms->s_tgt_lyr_prms.i4_num_res_layers;
+ for(i4_resolution_id = 0; i4_resolution_id < i4_num_resolutions; i4_resolution_id++)
+ {
+ ai4_num_bitrate_instances[i4_resolution_id] =
+ ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id]
+ .i4_num_bitrate_instances;
+ }
+ // clang-format off
+ if(0 != ps_static_cfg_prms->i4_log_dump_level)
+ {
+ /* Print all the config params */
+ if((0 == ps_static_cfg_prms->i4_res_id) && (0 == ps_static_cfg_prms->i4_br_id))
+ {
+ WORD32 i4_resolution_id_loop, i4_i;
+ WORD32 i4_num_res_layers = ps_static_cfg_prms->s_tgt_lyr_prms.i4_num_res_layers;
+
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "**********************************************\n");
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "*********** STATIC PARAMS CONFIG *************\n");
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "**********************************************\n");
+
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : ps_static_cfg_prms->s_src_prms \n");
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_width %d \n", ps_static_cfg_prms->s_src_prms.i4_width);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_height %d \n", ps_static_cfg_prms->s_src_prms.i4_height);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_frm_rate_num %d \n", ps_static_cfg_prms->s_src_prms.i4_frm_rate_num);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_frm_rate_denom %d \n", ps_static_cfg_prms->s_src_prms.i4_frm_rate_denom);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_field_pic %d \n", ps_static_cfg_prms->s_src_prms.i4_field_pic);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_chr_format %d \n", ps_static_cfg_prms->s_src_prms.i4_chr_format);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_input_bit_depth %d \n", ps_static_cfg_prms->s_src_prms.i4_input_bit_depth);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_topfield_first %d \n\n", ps_static_cfg_prms->s_src_prms.i4_topfield_first);
+
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : ps_static_cfg_prms->s_tgt_lyr_prms \n");
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_num_res_layers %d \n", i4_num_res_layers);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_multi_res_layer_reuse %d \n", ps_static_cfg_prms->s_tgt_lyr_prms.i4_multi_res_layer_reuse);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_mbr_quality_setting %d \n", ps_static_cfg_prms->s_tgt_lyr_prms.i4_mbr_quality_setting);
+
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : For Each resolution,");
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "\nIHEVCE : i4_target_width ");
+ for(i4_resolution_id_loop = 0; i4_resolution_id_loop < i4_num_res_layers; i4_resolution_id_loop++)
+ {
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "res_id %d %d ", i4_resolution_id_loop, ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id_loop].i4_width);
+ }
+
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "\nIHEVCE : i4_target_width ");
+ for(i4_resolution_id_loop = 0; i4_resolution_id_loop < i4_num_res_layers; i4_resolution_id_loop++)
+ {
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "res_id %d %d ", i4_resolution_id_loop, ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id_loop].i4_height);
+ }
+
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "\nIHEVCE : i4_frm_rate_scale_factor ");
+ for(i4_resolution_id_loop = 0; i4_resolution_id_loop < i4_num_res_layers; i4_resolution_id_loop++)
+ {
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "res_id %d %d ", i4_resolution_id_loop,
+ ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id_loop].i4_frm_rate_scale_factor);
+ }
+
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "\nIHEVCE : i4_codec_level ");
+ for(i4_resolution_id_loop = 0; i4_resolution_id_loop < i4_num_res_layers; i4_resolution_id_loop++)
+ {
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "res_id %d %d ", i4_resolution_id_loop, ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id_loop].i4_codec_level);
+ }
+
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "\nIHEVCE : i4_num_bitrate_instances ");
+ for(i4_resolution_id_loop = 0; i4_resolution_id_loop < i4_num_res_layers; i4_resolution_id_loop++)
+ {
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "res_id %d %d", i4_resolution_id_loop,
+ ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id_loop].i4_num_bitrate_instances);
+ }
+
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "\n");
+ for(i4_resolution_id_loop = 0; i4_resolution_id_loop < i4_num_res_layers; i4_resolution_id_loop++)
+ {
+ WORD32 i4_num_bitrate_instances, i4_br_loop;
+ i4_num_bitrate_instances = ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id_loop].i4_num_bitrate_instances;
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_tgt_bitrate res_id %d ", i4_resolution_id_loop);
+ for(i4_br_loop = 0; i4_br_loop < i4_num_bitrate_instances; i4_br_loop++)
+ {
+ PRINTF(
+ ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "br_id %d %d ", i4_br_loop, ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id_loop].ai4_tgt_bitrate[i4_br_loop]);
+ }
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "\n");
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_peak_bitrate res_id %d ", i4_resolution_id_loop);
+ for(i4_br_loop = 0; i4_br_loop < i4_num_bitrate_instances; i4_br_loop++)
+ {
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "br_id %d %d ", i4_br_loop,
+ ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id_loop].ai4_peak_bitrate[i4_br_loop]);
+ }
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "\n");
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : vbv_buffer_size res_id %d ", i4_resolution_id_loop);
+ for(i4_br_loop = 0; i4_br_loop < i4_num_bitrate_instances; i4_br_loop++)
+ {
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "br_id %d %d ", i4_br_loop,
+ ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id_loop].ai4_max_vbv_buffer_size[i4_br_loop]);
+ }
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "\n");
+ }
+
+ for(i4_resolution_id_loop = 0; i4_resolution_id_loop < i4_num_res_layers; i4_resolution_id_loop++)
+ {
+ WORD32 i4_num_bitrate_instances, i4_br_loop;
+
+ i4_num_bitrate_instances = ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id_loop].i4_num_bitrate_instances;
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_frame_qp res_id %d ", i4_resolution_id_loop);
+ for(i4_br_loop = 0; i4_br_loop < i4_num_bitrate_instances; i4_br_loop++)
+ {
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "br_id %d %d ", i4_br_loop, ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id_loop].ai4_frame_qp[i4_br_loop]);
+ }
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "\n");
+ }
+
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_internal_bit_depth %d \n", ps_static_cfg_prms->s_tgt_lyr_prms.i4_internal_bit_depth);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_enable_temporal_scalability %d \n", ps_static_cfg_prms->s_tgt_lyr_prms.i4_enable_temporal_scalability);
+
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_quality_preset ");
+ for(i4_resolution_id_loop = 0; i4_resolution_id_loop < i4_num_res_layers; i4_resolution_id_loop++)
+ {
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "res_id %d %d", i4_resolution_id_loop, ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id_loop].i4_quality_preset);
+ }
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "\n");
+
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "\nIHEVCE : ps_static_cfg_prms->s_coding_tools_prms \n");
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_max_idr_period %d \n", ps_static_cfg_prms->s_coding_tools_prms.i4_max_closed_gop_period);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_min_idr_period %d \n", ps_static_cfg_prms->s_coding_tools_prms.i4_min_closed_gop_period);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_max_cra_period %d \n", ps_static_cfg_prms->s_coding_tools_prms.i4_max_cra_open_gop_period);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_i_cra_period %d \n", ps_static_cfg_prms->s_coding_tools_prms.i4_max_i_open_gop_period);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_max_temporal_layers %d \n", ps_static_cfg_prms->s_coding_tools_prms.i4_max_temporal_layers);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_max_reference_frames %d \n", ps_static_cfg_prms->s_coding_tools_prms.i4_max_reference_frames);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_deblocking_type %d \n", ps_static_cfg_prms->s_coding_tools_prms.i4_deblocking_type);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_use_default_sc_mtx %d \n", ps_static_cfg_prms->s_coding_tools_prms.i4_use_default_sc_mtx);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_enable_entropy_sync %d \n", ps_static_cfg_prms->s_coding_tools_prms.i4_enable_entropy_sync);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_cropping_mode %d \n", ps_static_cfg_prms->s_coding_tools_prms.i4_cropping_mode);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_vqet %d \n", ps_static_cfg_prms->s_coding_tools_prms.i4_vqet);
+
+ switch(ps_static_cfg_prms->e_arch_type)
+ {
+ case ARCH_NA:
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : archType %d \n", 0);
+ break;
+#ifdef ARM
+ case ARCH_ARM_NONEON:
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : archType %d \n", 4);
+ break;
+#endif
+ default:
+ break;
+ }
+
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "\nIHEVCE : ps_static_cfg_prms->s_config_prms \n");
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_num_frms_to_encode %d \n", ps_static_cfg_prms->s_config_prms.i4_num_frms_to_encode);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_max_log2_cu_size %d \n", ps_static_cfg_prms->s_config_prms.i4_max_log2_cu_size);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_max_log2_cu_size %d \n", ps_static_cfg_prms->s_config_prms.i4_min_log2_cu_size);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_max_log2_cu_size %d \n", ps_static_cfg_prms->s_config_prms.i4_max_log2_tu_size);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_max_log2_cu_size %d \n", ps_static_cfg_prms->s_config_prms.i4_min_log2_cu_size);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_max_tr_tree_depth_I %d \n", ps_static_cfg_prms->s_config_prms.i4_max_tr_tree_depth_I);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_max_tr_tree_depth_nI %d \n", ps_static_cfg_prms->s_config_prms.i4_max_tr_tree_depth_nI);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_max_search_range_horz %d \n", ps_static_cfg_prms->s_config_prms.i4_max_search_range_horz);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_max_search_range_vert %d \n", ps_static_cfg_prms->s_config_prms.i4_max_search_range_vert);
+
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "\nIHEVCE : ps_static_cfg_prms->s_multi_thrd_prms \n");
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_max_num_cores %d \n", ps_static_cfg_prms->s_multi_thrd_prms.i4_max_num_cores);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_use_thrd_affinity %d \n", ps_static_cfg_prms->s_multi_thrd_prms.i4_use_thrd_affinity);
+
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "\nIHEVCE : rate control params \n");
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_rate_control_mode %d \n", ps_static_cfg_prms->s_config_prms.i4_rate_control_mode);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_cu_level_rc %d \n", ps_static_cfg_prms->s_config_prms.i4_cu_level_rc);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_pass %d \n", ps_static_cfg_prms->s_pass_prms.i4_pass);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_rate_factor %d \n", ps_static_cfg_prms->s_config_prms.i4_rate_factor);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_vbr_max_peak_rate_dur %d \n", ps_static_cfg_prms->s_config_prms.i4_vbr_max_peak_rate_dur);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_init_vbv_fullness %d \n", ps_static_cfg_prms->s_config_prms.i4_init_vbv_fullness);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_stuffing_enable %d \n", ps_static_cfg_prms->s_config_prms.i4_stuffing_enable);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_max_frame_qp %d \n", ps_static_cfg_prms->s_config_prms.i4_max_frame_qp);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_min_frame_qp %d \n", ps_static_cfg_prms->s_config_prms.i4_min_frame_qp);
+
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "\n");
+
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "\nIHEVCE : ps_static_cfg_prms->s_lap_prms\n");
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_rc_look_ahead_pics %d \n", ps_static_cfg_prms->s_lap_prms.i4_rc_look_ahead_pics);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_enable_wts_ofsts %d \n", ps_static_cfg_prms->s_lap_prms.i4_enable_wts_ofsts);
+
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "\nIHEVCE : ps_static_cfg_prms->s_out_strm_prms\n");
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_codec_type %d \n", ps_static_cfg_prms->s_out_strm_prms.i4_codec_type);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_codec_profile %d \n", ps_static_cfg_prms->s_out_strm_prms.i4_codec_profile);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_codec_tier %d \n", ps_static_cfg_prms->s_out_strm_prms.i4_codec_tier);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_aud_enable_flags %d \n", ps_static_cfg_prms->s_out_strm_prms.i4_aud_enable_flags);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_interop_flags %d \n", ps_static_cfg_prms->s_out_strm_prms.i4_interop_flags);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_sps_at_cdr_enable %d \n", ps_static_cfg_prms->s_out_strm_prms.i4_sps_at_cdr_enable);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_vui_enable %d \n", ps_static_cfg_prms->s_out_strm_prms.i4_vui_enable);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_sei_enable_flag %d \n", ps_static_cfg_prms->s_out_strm_prms.i4_sei_enable_flag);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_sei_payload_enable_flag %d \n", ps_static_cfg_prms->s_out_strm_prms.i4_sei_payload_enable_flag);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_sei_buffer_period_flags %d \n", ps_static_cfg_prms->s_out_strm_prms.i4_sei_buffer_period_flags);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_sei_pic_timing_flags %d \n", ps_static_cfg_prms->s_out_strm_prms.i4_sei_pic_timing_flags);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_sei_cll_enable %d \n", ps_static_cfg_prms->s_out_strm_prms.i4_sei_cll_enable);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : u2_sei_avg_cll %d \n", ps_static_cfg_prms->s_out_strm_prms.u2_sei_avg_cll);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : u2_sei_max_cll %d \n", ps_static_cfg_prms->s_out_strm_prms.u2_sei_max_cll);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_sei_recovery_point_flags %d \n", ps_static_cfg_prms->s_out_strm_prms.i4_sei_recovery_point_flags);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_sei_mastering_disp_colour_vol_flags %d \n", ps_static_cfg_prms->s_out_strm_prms.i4_sei_mastering_disp_colour_vol_flags);
+ for(i4_i = 0; i4_i < 3; i4_i++)
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : u2_display_primaries_x[i4_i] %d \n", ps_static_cfg_prms->s_out_strm_prms.au2_display_primaries_x[i4_i]);
+ for(i4_i = 0; i4_i < 3; i4_i++)
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : u2_display_primaries_y[i4_i] %d \n", ps_static_cfg_prms->s_out_strm_prms.au2_display_primaries_y[i4_i]);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : u2_white_point_x %d \n", ps_static_cfg_prms->s_out_strm_prms.u2_white_point_x);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : u2_white_point_y %d \n", ps_static_cfg_prms->s_out_strm_prms.u2_white_point_y);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : u4_max_display_mastering_luminance %d \n", ps_static_cfg_prms->s_out_strm_prms.u4_max_display_mastering_luminance);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : u4_min_display_mastering_luminance %d \n", ps_static_cfg_prms->s_out_strm_prms.u4_min_display_mastering_luminance);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_sei_hash_flags %d \n", ps_static_cfg_prms->s_out_strm_prms.i4_decoded_pic_hash_sei_flag);
+
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "\nIHEVCE : ps_static_cfg_prms->s_app_tile_params\n");
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_tiles_enabled_flag %d \n", ps_static_cfg_prms->s_app_tile_params.i4_tiles_enabled_flag);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_uniform_spacing_flag %d \n", ps_static_cfg_prms->s_app_tile_params.i4_uniform_spacing_flag);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_num_tile_cols %d \n", ps_static_cfg_prms->s_app_tile_params.i4_num_tile_cols);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_num_tile_rows %d \n", ps_static_cfg_prms->s_app_tile_params.i4_num_tile_rows);
+
+ for(i4_i = 0; i4_i < ps_static_cfg_prms->s_app_tile_params.i4_num_tile_cols; i4_i++)
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_column_width[i4_i] %d \n", ps_static_cfg_prms->s_app_tile_params.ai4_column_width[i4_i]);
+
+ for(i4_i = 0; i4_i < ps_static_cfg_prms->s_app_tile_params.i4_num_tile_rows; i4_i++)
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_row_height[i4_i] %d \n", ps_static_cfg_prms->s_app_tile_params.ai4_row_height[i4_i]);
+
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "\nIHEVCE : ps_static_cfg_prms->s_slice_params\n");
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_slice_segment_mode %d \n", ps_static_cfg_prms->s_slice_params.i4_slice_segment_mode);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_slice_segment_argument %d \n", ps_static_cfg_prms->s_slice_params.i4_slice_segment_argument);
+
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "\nIHEVCE : ps_static_cfg_prms->s_vui_sei_prms\n");
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : u1_aspect_ratio_info_present_flag %d \n", ps_static_cfg_prms->s_vui_sei_prms.u1_aspect_ratio_info_present_flag);
+
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : u1_aspect_ratio_idc ");
+ for(i4_resolution_id_loop = 0; i4_resolution_id_loop < i4_num_res_layers; i4_resolution_id_loop++)
+ {
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "res_id %d %d ", i4_resolution_id_loop, ps_static_cfg_prms->s_vui_sei_prms.au1_aspect_ratio_idc[i4_resolution_id_loop]);
+ }
+
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "\nIHEVCE : au2_sar_width ");
+ for(i4_resolution_id_loop = 0; i4_resolution_id_loop < i4_num_res_layers; i4_resolution_id_loop++)
+ {
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "res_id %d %d ", i4_resolution_id_loop, ps_static_cfg_prms->s_vui_sei_prms.au2_sar_width[i4_resolution_id_loop]);
+ }
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "\nIHEVCE : au2_sar_width ");
+ for(i4_resolution_id_loop = 0; i4_resolution_id_loop < i4_num_res_layers; i4_resolution_id_loop++)
+ {
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "res_id %d %d ", i4_resolution_id_loop, ps_static_cfg_prms->s_vui_sei_prms.au2_sar_height[i4_resolution_id_loop]);
+ }
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "\nIHEVCE : u1_overscan_info_present_flag %d \n", ps_static_cfg_prms->s_vui_sei_prms.u1_overscan_info_present_flag);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : u1_overscan_appropriate_flag %d \n", ps_static_cfg_prms->s_vui_sei_prms.u1_overscan_appropriate_flag);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : u1_video_signal_type_present_flag %d \n", ps_static_cfg_prms->s_vui_sei_prms.u1_video_signal_type_present_flag);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : u1_video_format %d \n", ps_static_cfg_prms->s_vui_sei_prms.u1_video_format);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : u1_video_full_range_flag %d \n", ps_static_cfg_prms->s_vui_sei_prms.u1_video_full_range_flag);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : u1_colour_description_present_flag %d \n", ps_static_cfg_prms->s_vui_sei_prms.u1_colour_description_present_flag);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : u1_colour_primaries %d \n", ps_static_cfg_prms->s_vui_sei_prms.u1_colour_primaries);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : u1_transfer_characteristics %d \n", ps_static_cfg_prms->s_vui_sei_prms.u1_transfer_characteristics);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : u1_matrix_coefficients %d \n", ps_static_cfg_prms->s_vui_sei_prms.u1_matrix_coefficients);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : u1_chroma_loc_info_present_flag %d \n", ps_static_cfg_prms->s_vui_sei_prms.u1_chroma_loc_info_present_flag);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : u1_chroma_sample_loc_type_top_field %d \n", ps_static_cfg_prms->s_vui_sei_prms.u1_chroma_sample_loc_type_top_field);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : u1_chroma_sample_loc_type_bottom_field %d \n", ps_static_cfg_prms->s_vui_sei_prms.u1_chroma_sample_loc_type_bottom_field);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : u1_timing_info_present_flag %d \n", ps_static_cfg_prms->s_vui_sei_prms.u1_timing_info_present_flag);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : u1_vui_hrd_parameters_present_flag %d \n", ps_static_cfg_prms->s_vui_sei_prms.u1_vui_hrd_parameters_present_flag);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : u1_nal_hrd_parameters_present_flag %d \n", ps_static_cfg_prms->s_vui_sei_prms.u1_nal_hrd_parameters_present_flag);
+ }
+
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "\nIHEVCE : ps_static_cfg_prms \n");
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_enable_logo %d \n", ps_static_cfg_prms->i4_enable_logo);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_log_dump_level %d \n", ps_static_cfg_prms->i4_log_dump_level);
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "IHEVCE : i4_save_recon %d \n", ps_static_cfg_prms->i4_save_recon);
+
+ PRINTF(ps_sys_api->pv_cb_handle, i4_res_id, i4_br_id, "**********************************************\n");
+ }
+ // clang-format on
+
+ if(ps_static_cfg_prms->s_multi_thrd_prms.i4_num_proc_groups > MAX_NUMBER_PROC_GRPS)
+ {
+ error_code = IHEVCE_UNSUPPORTED_PROC_CONFIG;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: Number of Processor Groups not supported \n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ /* Error check for system-api callback functions */
+ if(NULL == ps_sys_api->ihevce_printf)
+ {
+ error_code = IHEVCE_SYSTEM_APIS_NOT_INITIALLIZED;
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+ if(NULL == ps_sys_api->s_file_io_api.ihevce_fopen)
+ {
+ error_code = IHEVCE_SYSTEM_APIS_NOT_INITIALLIZED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: ihevce_fopen callback function not initiallized\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+ if(NULL == ps_sys_api->s_file_io_api.ihevce_fclose)
+ {
+ error_code = IHEVCE_SYSTEM_APIS_NOT_INITIALLIZED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: ihevce_fclose callback function not initiallized\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+ if(NULL == ps_sys_api->s_file_io_api.ihevce_fflush)
+ {
+ error_code = IHEVCE_SYSTEM_APIS_NOT_INITIALLIZED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: ihevce_fflush callback function not initiallized\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+ if(NULL == ps_sys_api->s_file_io_api.ihevce_fseek)
+ {
+ error_code = IHEVCE_SYSTEM_APIS_NOT_INITIALLIZED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: ihevce_fseek callback function not initiallized\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+ if(NULL == ps_sys_api->s_file_io_api.ihevce_fread)
+ {
+ error_code = IHEVCE_SYSTEM_APIS_NOT_INITIALLIZED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: ihevce_fread callback function not initiallized\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+ if(NULL == ps_sys_api->s_file_io_api.ihevce_fscanf)
+ {
+ error_code = IHEVCE_SYSTEM_APIS_NOT_INITIALLIZED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: ihevce_fscanf callback function not initiallized\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+ if(NULL == ps_sys_api->ihevce_sscanf)
+ {
+ error_code = IHEVCE_SYSTEM_APIS_NOT_INITIALLIZED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: ihevce_sscanf callback function not initiallized\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+ if(NULL == ps_sys_api->s_file_io_api.ihevce_fprintf)
+ {
+ error_code = IHEVCE_SYSTEM_APIS_NOT_INITIALLIZED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: ihevce_fprintf callback function not initiallized\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+ if(NULL == ps_sys_api->s_file_io_api.ihevce_fwrite)
+ {
+ error_code = IHEVCE_SYSTEM_APIS_NOT_INITIALLIZED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: ihevce_fwrite callback function not initiallized\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+ if(NULL == ps_sys_api->ihevce_sprintf)
+ {
+ error_code = IHEVCE_SYSTEM_APIS_NOT_INITIALLIZED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: ihevce_sprintf callback function not initiallized\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ /* Error check for static source parameters */
+ if((ps_static_cfg_prms->s_src_prms.i4_orig_width > HEVCE_MAX_WIDTH) ||
+ (ps_static_cfg_prms->s_src_prms.i4_orig_width < 2))
+ {
+ error_code = IHEVCE_WIDTH_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: i4_src_width out of range \n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ if((ps_static_cfg_prms->s_src_prms.i4_orig_height > HEVCE_MAX_HEIGHT) ||
+ (ps_static_cfg_prms->s_src_prms.i4_orig_height < 2))
+ {
+ error_code = IHEVCE_HEIGHT_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: i4_src_height out of range \n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+ /*check for odd resolution*/
+ if(0 != (ps_static_cfg_prms->s_src_prms.i4_width & 1))
+ {
+ error_code = IHEVCE_WIDTH_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: i4_src_width not supported \n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+ if(0 != (ps_static_cfg_prms->s_src_prms.i4_height & 1))
+ {
+ error_code = IHEVCE_HEIGHT_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: i4_src_height not supported \n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ if((ps_static_cfg_prms->s_src_prms.i4_frm_rate_denom != 1000) &&
+ (ps_static_cfg_prms->s_src_prms.i4_frm_rate_denom != 1001))
+ {
+ error_code = IHEVCE_FRAME_RATE_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: frame rate denom not supported \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if((((ps_static_cfg_prms->s_src_prms.i4_frm_rate_num * 1.0) /
+ ps_static_cfg_prms->s_src_prms.i4_frm_rate_denom) > MAX_FRAME_RATE) ||
+ (((ps_static_cfg_prms->s_src_prms.i4_frm_rate_num * 1.0) /
+ ps_static_cfg_prms->s_src_prms.i4_frm_rate_denom) < MIN_FRAME_RATE))
+ {
+ error_code = IHEVCE_FRAME_RATE_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: Frame rate (%d / %d) is out of range [%.1f - %.1f]\n",
+ ps_static_cfg_prms->s_src_prms.i4_frm_rate_num,
+ ps_static_cfg_prms->s_src_prms.i4_frm_rate_denom,
+ MIN_FRAME_RATE, MAX_FRAME_RATE);
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if(ps_static_cfg_prms->s_src_prms.i4_field_pic != 0)
+ {
+ error_code = IHEVCE_CONTENT_TYPE_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: Field encoding not supported \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if(ps_static_cfg_prms->s_src_prms.inp_chr_format != IV_YUV_420SP_UV &&
+ ps_static_cfg_prms->s_src_prms.inp_chr_format != IV_YUV_420P)
+ {
+ error_code = IHEVCE_CHROMA_FORMAT_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: i4_input_chroma_format Invalid \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if(ps_static_cfg_prms->s_src_prms.i4_chr_format != IV_YUV_420SP_UV)
+ {
+ error_code = IHEVCE_CHROMA_FORMAT_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: i4_internal_chroma_format Invalid \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ /* Check error for interoperability flags */
+ if(ps_static_cfg_prms->s_out_strm_prms.i4_interop_flags != 0)
+ {
+ error_code = IHEVCE_INTEROPERABILITY_FLAG_SUPPORTED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: i4_interop_flags out of range, to be set to 0\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ /* Error check for static output stream parameters */
+ if(ps_static_cfg_prms->s_out_strm_prms.i4_codec_type != 0)
+ {
+ error_code = IHEVCE_CODEC_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: i4_codec_type should be set to 0 \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if(ps_static_cfg_prms->s_out_strm_prms.i4_codec_profile != 1)
+ {
+ error_code = IHEVCE_CODEC_PROFILE_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: i4_codec_profile should be set to 1 \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if(ps_static_cfg_prms->s_tgt_lyr_prms.i4_internal_bit_depth != 8)
+ {
+ error_code = IHEVCE_OUTPUT_BIT_DEPTH_OUT_OF_RANGE;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: (output_bit_depth = %d) not supported \n",
+ ps_static_cfg_prms->s_tgt_lyr_prms.i4_internal_bit_depth);
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if(ps_static_cfg_prms->s_src_prms.i4_input_bit_depth != 8)
+ {
+ error_code = IHEVCE_INPUT_BIT_DEPTH_OUT_OF_RANGE;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: i4_input_bit_depth value not supported \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if((ps_static_cfg_prms->s_out_strm_prms.i4_vui_enable > 1) ||
+ (ps_static_cfg_prms->s_out_strm_prms.i4_vui_enable < 0))
+ {
+ error_code = IHEVCE_VUI_ENABLE_OUT_OF_RANGE;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: i4_vui_enable should be set to 1 or 0 \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if((ps_static_cfg_prms->s_out_strm_prms.i4_sei_enable_flag > 1) ||
+ (ps_static_cfg_prms->s_out_strm_prms.i4_sei_enable_flag < 0))
+ {
+ error_code = IHEVCE_SEI_ENABLE_OUT_OF_RANGE;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: i4_sei_enable_flags should be set to 1 or 0 \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if((ps_static_cfg_prms->s_out_strm_prms.i4_sei_payload_enable_flag > 1) ||
+ (ps_static_cfg_prms->s_out_strm_prms.i4_sei_payload_enable_flag < 0))
+ {
+ error_code = IHEVCE_SEI_PAYLOAD_ENABLE_OUT_OF_RANGE;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: i4_sei_payload_enable_flag should be set to 1 or 0 \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+ if((ps_static_cfg_prms->s_multi_thrd_prms.i4_max_num_cores > MAX_NUM_CORES) ||
+ (ps_static_cfg_prms->s_multi_thrd_prms.i4_max_num_cores < 1))
+ {
+ error_code = IHEVCE_INVALID_CORE_CONFIG;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: Invalid Number of Cores configured\n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if((ps_static_cfg_prms->e_arch_type != ARCH_NA) &&
+ (ps_static_cfg_prms->e_arch_type != ARCH_ARM_NONEON))
+ {
+ error_code = IHEVCE_ARCHITECTURE_TYPE_UNSUPPORTED;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: unsupported archType \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if(ps_static_cfg_prms->s_coding_tools_prms.i4_vqet != 0)
+ {
+ error_code = IHEVCE_VISUAL_QUALITY_ENHANCEMENTS_TOGGLER_VALUE_UNSUPPORTED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: visual_quality_enhancements_toggler should be set to 0 \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if(ps_static_cfg_prms->s_coding_tools_prms.i4_max_temporal_layers < 0 ||
+ ps_static_cfg_prms->s_coding_tools_prms.i4_max_temporal_layers > 3)
+ {
+ error_code = IHEVCE_TEMPORAL_LAYERS_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: i4_max_temporal_layers out of range \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if((ps_static_cfg_prms->s_coding_tools_prms.i4_max_closed_gop_period < 0) ||
+ (ps_static_cfg_prms->s_coding_tools_prms.i4_max_cra_open_gop_period < 0) ||
+ (ps_static_cfg_prms->s_coding_tools_prms.i4_max_i_open_gop_period < 0))
+ {
+ error_code = IHEVCE_INVALID_GOP_PERIOD;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: gop period is not valid for the configured temporal layers\n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ {
+ WORD32 sub_gop_size = (1 << ps_static_cfg_prms->s_coding_tools_prms.i4_max_temporal_layers)
+ << ps_static_cfg_prms->s_src_prms.i4_field_pic;
+ WORD32 i4_max_idr_period, i4_min_idr_period, i4_max_cra_period, i4_max_i_period;
+ WORD32 i4_max_i_distance;
+ WORD32 i4_min_i_distance = 0, i4_non_zero_idr_period = 0x7FFFFFFF,
+ i4_non_zero_cra_period = 0x7FFFFFFF, i4_non_zero_i_period = 0x7FFFFFFF;
+ i4_max_idr_period = ps_static_cfg_prms->s_coding_tools_prms.i4_max_closed_gop_period;
+ i4_min_idr_period = ps_static_cfg_prms->s_coding_tools_prms.i4_min_closed_gop_period;
+ i4_max_cra_period = ps_static_cfg_prms->s_coding_tools_prms.i4_max_cra_open_gop_period;
+ i4_max_i_period = ps_static_cfg_prms->s_coding_tools_prms.i4_max_i_open_gop_period;
+ i4_max_i_distance = MAX(MAX(i4_max_idr_period, i4_max_cra_period), i4_max_i_period);
+
+ if(sub_gop_size > 1)
+ {
+ switch(sub_gop_size)
+ {
+ case 2:
+ ps_static_cfg_prms->s_coding_tools_prms.i4_max_closed_gop_period =
+ ALIGN2(i4_max_idr_period);
+
+ if(i4_max_idr_period > 1)
+ ps_static_cfg_prms->s_coding_tools_prms.i4_max_closed_gop_period =
+ ps_static_cfg_prms->s_coding_tools_prms.i4_max_closed_gop_period + 1;
+
+ ps_static_cfg_prms->s_coding_tools_prms.i4_max_cra_open_gop_period =
+ ALIGN2(i4_max_cra_period);
+ ps_static_cfg_prms->s_coding_tools_prms.i4_max_i_open_gop_period =
+ ALIGN2(i4_max_i_period);
+ break;
+ case 4:
+ ps_static_cfg_prms->s_coding_tools_prms.i4_max_closed_gop_period =
+ ALIGN4(i4_max_idr_period);
+
+ if(i4_max_idr_period > 1)
+ ps_static_cfg_prms->s_coding_tools_prms.i4_max_closed_gop_period =
+ ps_static_cfg_prms->s_coding_tools_prms.i4_max_closed_gop_period + 1;
+
+ ps_static_cfg_prms->s_coding_tools_prms.i4_max_cra_open_gop_period =
+ ALIGN4(i4_max_cra_period);
+ ps_static_cfg_prms->s_coding_tools_prms.i4_max_i_open_gop_period =
+ ALIGN4(i4_max_i_period);
+ break;
+ case 8:
+ ps_static_cfg_prms->s_coding_tools_prms.i4_max_closed_gop_period =
+ ALIGN8(i4_max_idr_period);
+
+ if(i4_max_idr_period > 1)
+ ps_static_cfg_prms->s_coding_tools_prms.i4_max_closed_gop_period =
+ ps_static_cfg_prms->s_coding_tools_prms.i4_max_closed_gop_period + 1;
+
+ ps_static_cfg_prms->s_coding_tools_prms.i4_max_cra_open_gop_period =
+ ALIGN8(i4_max_cra_period);
+ ps_static_cfg_prms->s_coding_tools_prms.i4_max_i_open_gop_period =
+ ALIGN8(i4_max_i_period);
+ break;
+ }
+ }
+
+ if(0 != i4_max_idr_period)
+ {
+ i4_non_zero_idr_period = i4_max_idr_period;
+ }
+ if(0 != i4_max_cra_period)
+ {
+ i4_non_zero_cra_period = i4_max_cra_period;
+ }
+ if(0 != i4_max_i_period)
+ {
+ i4_non_zero_i_period = i4_max_i_period;
+ }
+ i4_min_i_distance =
+ MIN(MIN(i4_non_zero_idr_period, i4_non_zero_cra_period), i4_non_zero_i_period);
+ if(i4_min_i_distance < sub_gop_size && i4_min_i_distance)
+ {
+ error_code = IHEVCE_INVALID_GOP_PERIOD;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: gop period is not valid for the configured temporal layers\n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if((i4_min_idr_period > i4_max_idr_period) || (i4_min_idr_period < 0))
+ {
+ error_code = IHEVCE_INVALID_GOP_PERIOD;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: gop period is not valid => min closed gop > max closed gop\n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+ if(ps_static_cfg_prms->s_coding_tools_prms.i4_max_temporal_layers && i4_max_i_distance == 1)
+ {
+ error_code = IHEVCE_TEMPORAL_LAYERS_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: Invalid max temporal layer for I only encoding\n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+ if((i4_max_idr_period < i4_max_cra_period || i4_max_idr_period < i4_max_i_period) &&
+ i4_max_idr_period)
+ {
+ error_code = IHEVCE_INVALID_GOP_PERIOD;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: MAX IDR period can't be less than Max CRA or I period\n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+ if((i4_max_cra_period < i4_max_i_period) && i4_max_cra_period)
+ {
+ error_code = IHEVCE_INVALID_GOP_PERIOD;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: MAX CRA period can't be less than Max I period\n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+ }
+ if(0 != ps_static_cfg_prms->s_tgt_lyr_prms.i4_enable_temporal_scalability)
+ {
+ error_code = IHEVCE_INVALID_TEMPORAL_SCALABILITY;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: Temporal scalability is not supported \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if(ps_static_cfg_prms->s_coding_tools_prms.i4_max_reference_frames != -1)
+ {
+ error_code = IHEVCE_REF_FRAMES_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: only supported value for i4_max_reference_frames is -1\n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if(ps_static_cfg_prms->s_coding_tools_prms.i4_weighted_pred_enable != 0 &&
+ ps_static_cfg_prms->s_coding_tools_prms.i4_weighted_pred_enable != 1)
+ {
+ error_code = IHEVCE_INVALID_WEIGHTED_PREDICTION_INPUT;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: i4_weighted_pred_enable invalid \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if(ps_static_cfg_prms->s_coding_tools_prms.i4_deblocking_type != 0 &&
+ ps_static_cfg_prms->s_coding_tools_prms.i4_deblocking_type != 1 &&
+ ps_static_cfg_prms->s_coding_tools_prms.i4_deblocking_type != 2)
+ {
+ error_code = IHEVCE_INVALID_DEBLOCKING_TYPE_INPUT;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: i4_deblocking_type invalid\n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if(ps_static_cfg_prms->s_coding_tools_prms.i4_use_default_sc_mtx != 0 &&
+ ps_static_cfg_prms->s_coding_tools_prms.i4_use_default_sc_mtx != 1)
+ {
+ error_code = IHEVCE_INVALID_DEFAULT_SC_MATRIX_ENABLE_INPUT;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: i4_use_default_sc_mtx invalid \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if(ps_static_cfg_prms->s_coding_tools_prms.i4_cropping_mode != 0 &&
+ ps_static_cfg_prms->s_coding_tools_prms.i4_cropping_mode != 1)
+ {
+ error_code = IHEVCE_INVALID_CROPPING_MODE;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: i4_cropping_mode invalid \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ /* Error checks for Static Config Parameters */
+ if(ps_static_cfg_prms->s_config_prms.i4_min_log2_cu_size != 3)
+ {
+ error_code = IHEVCE_MIN_CU_SIZE_INPUT_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: i4_min_log2_cu_size invalid \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if(ps_static_cfg_prms->s_config_prms.i4_min_log2_tu_size != 2)
+ {
+ error_code = IHEVCE_MIN_TU_SIZE_INPUT_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: i4_min_log2_tu_size invalid \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if(ps_static_cfg_prms->s_config_prms.i4_max_log2_cu_size < 4 ||
+ ps_static_cfg_prms->s_config_prms.i4_max_log2_cu_size > 6)
+ {
+ error_code = IHEVCE_MAX_CU_SIZE_INPUT_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: i4_max_log2_cu_size invalid \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if(ps_static_cfg_prms->s_config_prms.i4_max_log2_tu_size < 2 ||
+ ps_static_cfg_prms->s_config_prms.i4_max_log2_tu_size > 5)
+ {
+ error_code = IHEVCE_MAX_TU_SIZE_INPUT_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: i4_max_log2_tu_size invalid \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if(ps_static_cfg_prms->s_config_prms.i4_min_log2_cu_size == 4 &&
+ ps_static_cfg_prms->s_config_prms.i4_max_log2_tu_size == 5)
+ {
+ /* Because tu size should always be lesser than the cu size */
+ error_code = IHEVCE_INVALID_MAX_TU_SIZE;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: Invalid combination of i4_min_log2_cu_size and i4_max_log2_tu_size\n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if(ps_static_cfg_prms->s_config_prms.i4_max_tr_tree_depth_I < 1 ||
+ ps_static_cfg_prms->s_config_prms.i4_max_tr_tree_depth_I > 3)
+ {
+ error_code = IHEVCE_INVALID_TR_TREE_DEPTH_FOR_I_FRAME;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: i4_max_tr_tree_depth_I out of range\n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if(ps_static_cfg_prms->s_config_prms.i4_max_tr_tree_depth_nI < 1 ||
+ ps_static_cfg_prms->s_config_prms.i4_max_tr_tree_depth_nI > 4)
+ {
+ error_code = IHEVCE_INVALID_TR_TREE_DEPTH;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: i4_max_tr_tree_depth_nI out of range\n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if(ps_static_cfg_prms->s_config_prms.i4_max_search_range_horz < 64 ||
+ ps_static_cfg_prms->s_config_prms.i4_max_search_range_horz > 512)
+ {
+ error_code = IHEVCE_UNSUPPORTED_HORIZONTAL_SEARCH_RANGE;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: i4_max_search_range_horz out of range\n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if(ps_static_cfg_prms->s_config_prms.i4_max_search_range_vert < 32 ||
+ ps_static_cfg_prms->s_config_prms.i4_max_search_range_vert > 256)
+ {
+ error_code = IHEVCE_UNSUPPORTED_VERTICAL_SEARCH_RANGE;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: i4_max_search_range_vert out of range\n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if(ps_static_cfg_prms->s_lap_prms.i4_rc_look_ahead_pics > NUM_LAP2_LOOK_AHEAD ||
+ ps_static_cfg_prms->s_lap_prms.i4_rc_look_ahead_pics < 0)
+ {
+ error_code = IHEVCE_UNSUPPORTED_LOOK_AHEAD;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: rc look ahead pc must be in range of 0 to NUM_LAP2_LOOK_AHEAD\n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ /* Num res instances should be less than equal to IHEVCE_MAX_NUM_RESOLUTIONS */
+ if((i4_num_resolutions < 1) || (i4_num_resolutions > IHEVCE_MAX_NUM_RESOLUTIONS))
+ {
+ error_code = IHEVCE_NUM_MAX_RESOLUTIONS_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: invalid i4_num_resolutions \n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ if((ps_static_cfg_prms->i4_res_id < 0) || (ps_static_cfg_prms->i4_res_id >= i4_num_resolutions))
+ {
+ error_code = IHEVCE_NUM_MAX_RESOLUTIONS_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: invalid i4_num_resolutions \n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ if((ps_static_cfg_prms->s_tgt_lyr_prms.i4_mres_single_out < 0) ||
+ (ps_static_cfg_prms->s_tgt_lyr_prms.i4_mres_single_out > 1))
+ {
+ error_code = IHEVCE_INVALID_MRES_SINGLE_OUT;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: invalid i4_mres_single_out value \n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ if((ps_static_cfg_prms->i4_save_recon) &&
+ (1 == ps_static_cfg_prms->s_tgt_lyr_prms.i4_mres_single_out))
+ {
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE WARNING: i4_save_recon not supported for mres single out case \n");
+ ps_static_cfg_prms->i4_save_recon = 0;
+ }
+
+ if((1 == i4_num_resolutions) && (1 == ps_static_cfg_prms->s_tgt_lyr_prms.i4_mres_single_out))
+ {
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE WARNING: i4_mres_single_out value changed to 0 for single resolution case \n");
+ ps_static_cfg_prms->s_tgt_lyr_prms.i4_mres_single_out = 0;
+ }
+
+ if(ps_static_cfg_prms->s_tgt_lyr_prms.i4_mbr_quality_setting < 0 ||
+ ps_static_cfg_prms->s_tgt_lyr_prms.i4_mbr_quality_setting > 3)
+ {
+ error_code = IHEVCE_INVALID_MBR_QUALITY_SETTING;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: invalid mbr quality setting\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ if(ps_static_cfg_prms->s_tgt_lyr_prms.i4_multi_res_layer_reuse != 0)
+ {
+ error_code = IHEVCE_MULTI_RES_LAYER_REUSE_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: reuse of info across resolution is not currently supported \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ for(i4_resolution_id = 0; i4_resolution_id < i4_num_resolutions; i4_resolution_id++)
+ {
+ WORD32 codec_level_index, quality_preset, height, width, frm_rate_scale_factor;
+ WORD32 br_ctr;
+ UWORD32 u4_luma_sample_rate;
+ WORD32 max_dpb_size;
+ WORD32 i4_field_pic = ps_static_cfg_prms->s_src_prms.i4_field_pic;
+
+ codec_level_index = ihevce_get_level_index(
+ ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_codec_level);
+ quality_preset =
+ ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
+ height = ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
+ width = ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
+ frm_rate_scale_factor = ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id]
+ .i4_frm_rate_scale_factor;
+ /* Check error for max picture size(luma) for the given level */
+ if((width * height) > g_as_level_data[codec_level_index].i4_max_luma_picture_size)
+ {
+ error_code = IHEVCE_PIC_SIZE_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: (i4_tgt_width * i4_tgt_height) out of range for resolution number "
+ "'%d' codec level %d "
+ "\n",
+ i4_resolution_id,
+ codec_level_index);
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ if((width * height) <= (g_as_level_data[codec_level_index].i4_max_luma_picture_size >> 2))
+ {
+ max_dpb_size = 16;
+ }
+ else if((width * height) <= (g_as_level_data[codec_level_index].i4_max_luma_picture_size >> 1))
+ {
+ max_dpb_size = 12;
+ }
+ else if(
+ (width * height) <=
+ (3 * g_as_level_data[codec_level_index].i4_max_luma_picture_size >> 2))
+ {
+ max_dpb_size = 8;
+ }
+ else
+ {
+ max_dpb_size = 6;
+ }
+
+ /* DPB check */
+ if((((DEFAULT_MAX_REFERENCE_PICS - i4_field_pic) /*max reference*/ + 2) << i4_field_pic) >
+ max_dpb_size)
+ {
+ error_code = IHEVCE_CODEC_LEVEL_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: i4_codec_level should be set correct \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if((quality_preset > IHEVCE_QUALITY_P7) || (quality_preset < 0) || (quality_preset == 1))
+ {
+ error_code = IHEVCE_INVALID_QUALITY_PRESET_INPUT;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: i4_quality_preset invalid \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ /* Error checks for target width and height */
+ if((height > HEVCE_MAX_HEIGHT) || (height < HEVCE_MIN_HEIGHT) ||
+ (height != ps_static_cfg_prms->s_src_prms.i4_height))
+ {
+ error_code = IHEVCE_TGT_HEIGHT_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: Target height not supported\n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if((width > HEVCE_MAX_WIDTH) || (width < HEVCE_MIN_WIDTH) ||
+ (width != ps_static_cfg_prms->s_src_prms.i4_width))
+ {
+ error_code = IHEVCE_TGT_WIDTH_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: Target width not supported\n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ /* Error checks for the codec level */
+ if(ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_codec_level >
+ LEVEL6)
+ {
+ error_code = IHEVCE_CODEC_LEVEL_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: i4_codec_level should be set to a max value of 153 \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if(frm_rate_scale_factor != 1)
+ {
+ error_code = IHEVCE_TGT_FRAME_RATE_SCALING_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: Target frame rate scaler should be 1 \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ u4_luma_sample_rate = (UWORD32)(width * height);
+ u4_luma_sample_rate *= (UWORD32)(
+ ps_static_cfg_prms->s_src_prms.i4_frm_rate_num /
+ (ps_static_cfg_prms->s_src_prms.i4_frm_rate_denom * frm_rate_scale_factor));
+ /* Check error for max samples rate (frame rate * luma picture size) for the given level */
+ if(u4_luma_sample_rate > g_as_level_data[codec_level_index].u4_max_luma_sample_rate)
+ {
+ error_code = IHEVCE_LUMA_SAMPLE_RATE_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: input sample rate (i4_src_width * i4_src_height * i4_frm_rate_num / "
+ "i4_frm_rate_denom ) "
+ "exceeds u4_max_luma_sample_rate\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ /* Num instances should be less than equal to IHEVCE_MAX_NUM_BITRATES */
+ if((ai4_num_bitrate_instances[i4_resolution_id] < 1) ||
+ (ai4_num_bitrate_instances[i4_resolution_id] > IHEVCE_MAX_NUM_BITRATES))
+ {
+ error_code = IHEVCE_INVALID_NUM_BR_INSTANCES;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: invalid i4_num_bitrate_instances \n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ /* check for codec tier */
+ if((ps_static_cfg_prms->s_out_strm_prms.i4_codec_tier > HIGH_TIER) ||
+ (ps_static_cfg_prms->s_out_strm_prms.i4_codec_tier < MAIN_TIER))
+ {
+ error_code = IHEVC_CODEC_TIER_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: Codec tier out of range\n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if((ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_codec_level <
+ 120) &&
+ (ps_static_cfg_prms->s_out_strm_prms.i4_codec_tier == HIGH_TIER))
+ {
+ error_code = IHEVC_CODEC_TIER_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: Codec tier = HIGH TIER Not supported below Level 4\n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ /* Check error for max bitrate for the given level */
+ for(br_ctr = 0; br_ctr < ai4_num_bitrate_instances[i4_resolution_id]; br_ctr++)
+ {
+ WORD32 frame_qp = ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id]
+ .ai4_frame_qp[br_ctr];
+ WORD32 tgt_bitrate = ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id]
+ .ai4_tgt_bitrate[br_ctr];
+ WORD32 peak_bitrate;
+
+ if(frame_qp > 51 || frame_qp <= 0)
+ {
+ error_code = IHEVCE_UNSUPPORTED_FRAME_QP;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: i4_frame_qp out of range\n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+ if((frame_qp < ps_static_cfg_prms->s_config_prms.i4_min_frame_qp) ||
+ ((frame_qp + ps_static_cfg_prms->s_coding_tools_prms.i4_max_temporal_layers + 1) >
+ ps_static_cfg_prms->s_config_prms.i4_max_frame_qp))
+ {
+ error_code = IHEVCE_UNSUPPORTED_FRAME_QP;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: i4_frame_qp out of range\n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ if(tgt_bitrate >
+ g_as_level_data[codec_level_index]
+ .i4_max_bit_rate[ps_static_cfg_prms->s_out_strm_prms.i4_codec_tier] *
+ 1000 ||
+ tgt_bitrate <= 0)
+ {
+ error_code = IHEVCE_BITRATE_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: i4_tgt_bitrate out of range for resoltuion number %d bitrate "
+ "number %d\n",
+ i4_resolution_id,
+ br_ctr);
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ peak_bitrate = tgt_bitrate << 1;
+ peak_bitrate =
+ MIN(peak_bitrate,
+ g_as_level_data[codec_level_index]
+ .i4_max_bit_rate[ps_static_cfg_prms->s_out_strm_prms.i4_codec_tier] *
+ 1000);
+ ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id]
+ .ai4_peak_bitrate[br_ctr] = peak_bitrate;
+ ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id]
+ .ai4_max_vbv_buffer_size[br_ctr] = peak_bitrate;
+ }
+ }
+
+ if((ps_static_cfg_prms->i4_br_id < 0) ||
+ (ps_static_cfg_prms->i4_br_id >= ai4_num_bitrate_instances[ps_static_cfg_prms->i4_res_id]))
+ {
+ error_code = IHEVCE_INVALID_NUM_BR_INSTANCES;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: invalid i4_num_bitrate_instances \n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ /* Check error for rate control mode for the given level */
+ if(ps_static_cfg_prms->s_config_prms.i4_rate_control_mode != 2 &&
+ ps_static_cfg_prms->s_config_prms.i4_rate_control_mode != 3 &&
+ ps_static_cfg_prms->s_config_prms.i4_rate_control_mode != 5)
+ {
+ error_code = IHEVCE_RATE_CONTROL_MDOE_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: i4_rate_control_mode out of range\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ /* Check error for pass number */
+ if(ps_static_cfg_prms->s_pass_prms.i4_pass != 0)
+ {
+ error_code = IHEVCE_RATE_CONTROL_PASS_INVALID;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: i4_pass out of range\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ /* Check error for cu level qp modultion for the given level */
+ if(ps_static_cfg_prms->s_config_prms.i4_cu_level_rc != 0 &&
+ ps_static_cfg_prms->s_config_prms.i4_cu_level_rc != 1)
+ {
+ error_code = IHEVCE_RATE_CONTROL_MDOE_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: i4_cu_level_rc out of range\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ /* size error checks for the api structures */
+ if(ps_static_cfg_prms->i4_size != sizeof(ihevce_static_cfg_params_t))
+ {
+ error_code = IHEVCE_INVALID_SIZE;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: Size element of ihevce_static_cfg_params_t is not matching with actual "
+ "size");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+ if(ps_static_cfg_prms->s_src_prms.i4_size != sizeof(ihevce_src_params_t))
+ {
+ error_code = IHEVCE_INVALID_SIZE;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: Size element of ihevce_src_params_t is not matching with actual size");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+ if(ps_static_cfg_prms->s_tgt_lyr_prms.i4_size != sizeof(ihevce_tgt_layer_params_t))
+ {
+ error_code = IHEVCE_INVALID_SIZE;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: Size element of ihevce_tgt_layer_params_t is not matching with actual "
+ "size");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+ if(ps_static_cfg_prms->s_out_strm_prms.i4_size != sizeof(ihevce_out_strm_params_t))
+ {
+ error_code = IHEVCE_INVALID_SIZE;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: Size element of ihevce_out_strm_params_t is not matching with actual "
+ "size");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+ if(ps_static_cfg_prms->s_coding_tools_prms.i4_size != sizeof(ihevce_coding_params_t))
+ {
+ error_code = IHEVCE_INVALID_SIZE;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: Size element of ihevce_coding_params_t is not matching with actual "
+ "size");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+ if(ps_static_cfg_prms->s_config_prms.i4_size != sizeof(ihevce_config_prms_t))
+ {
+ error_code = IHEVCE_INVALID_SIZE;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: Size element of ihevce_config_prms_t is not matching with actual size");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+ if(ps_static_cfg_prms->s_multi_thrd_prms.i4_size != sizeof(ihevce_static_multi_thread_params_t))
+ {
+ error_code = IHEVCE_INVALID_SIZE;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: Size element of ihevce_static_multi_thread_params_t is not matching "
+ "with actual size");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+ for(i4_resolution_id = 0; i4_resolution_id < i4_num_resolutions; i4_resolution_id++)
+ {
+ if(ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_size !=
+ sizeof(ihevce_tgt_params_t))
+ {
+ error_code = IHEVCE_INVALID_SIZE;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: Size element of ihevce_tgt_params_t is not matching with actual "
+ "size");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+ }
+
+ if(ps_static_cfg_prms->s_lap_prms.i4_size != sizeof(ihevce_lap_params_t))
+ {
+ error_code = IHEVCE_INVALID_SIZE;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: Size element of ihevce_lap_params_t is not matching with actual size");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ for(i4_resolution_id = 0; i4_resolution_id < i4_num_resolutions; i4_resolution_id++)
+ {
+ if(ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_size !=
+ sizeof(ihevce_tgt_params_t))
+ {
+ error_code = IHEVCE_INVALID_SIZE;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: Size element of ihevce_tgt_params_t is not matching with actual "
+ "size");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+ }
+
+ /* Check SEI related error checks */
+ if(1 == ps_static_cfg_prms->s_out_strm_prms.i4_sei_enable_flag)
+ {
+ WORD32 i;
+ /* Check values for i4_sei_hash_flags */
+ if(!((ps_static_cfg_prms->s_out_strm_prms.i4_decoded_pic_hash_sei_flag == 2) ||
+ (ps_static_cfg_prms->s_out_strm_prms.i4_decoded_pic_hash_sei_flag == 3) ||
+ (ps_static_cfg_prms->s_out_strm_prms.i4_decoded_pic_hash_sei_flag == 0)))
+ {
+ error_code = IHEVCE_SEI_HASH_VALUE_NOT_SUPPORTED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: i4_sei_hash_flags out of range\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ /* Content Light Level Info error check */
+ if((ps_static_cfg_prms->s_out_strm_prms.i4_sei_cll_enable > 1) ||
+ (ps_static_cfg_prms->s_out_strm_prms.i4_sei_cll_enable < 0))
+ {
+ error_code = IHEVCE_SEI_CLL_ENABLE_OUT_OF_RANGE;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: i4_sei_cll_enable out of range\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ if((ps_static_cfg_prms->s_out_strm_prms.i4_sei_buffer_period_flags ||
+ ps_static_cfg_prms->s_out_strm_prms.i4_sei_pic_timing_flags) &&
+ (!ps_static_cfg_prms->s_out_strm_prms.i4_vui_enable))
+ {
+ error_code = IHEVCE_SEI_ENABLED_VUI_DISABLED;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: Both SEI and VUI ought to be enabled when either "
+ "'i4_sei_buffer_period_flags' or "
+ "'i4_sei_pic_timing_flags' are enabled\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ if((1 == ps_static_cfg_prms->s_out_strm_prms.i4_sei_buffer_period_flags) &&
+ (3 == ps_static_cfg_prms->s_config_prms.i4_rate_control_mode))
+ {
+ error_code = IHEVCE_SEI_MESSAGES_DEPENDENCY;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: i4_sei_buffer_period_flags should be disabled for CQP mode of Rate "
+ "control \n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ /* Check values for i4_sei_mastering_disp_colour_vol_flags */
+ if((ps_static_cfg_prms->s_out_strm_prms.i4_sei_mastering_disp_colour_vol_flags != 0) &&
+ (ps_static_cfg_prms->s_out_strm_prms.i4_sei_mastering_disp_colour_vol_flags != 1))
+ {
+ error_code = IHEVCE_MASTERING_DISP_COL_VOL_OUT_OF_RANGE;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: i4_sei_mastering_disp_colour_vol_flags out of range\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ if(1 == ps_static_cfg_prms->s_out_strm_prms.i4_sei_mastering_disp_colour_vol_flags)
+ {
+ /* Check values for u2_display_primaries_x and u2_display_primaries_y */
+ for(i = 0; i < 3; i++)
+ {
+ if((ps_static_cfg_prms->s_out_strm_prms.au2_display_primaries_x[i] > 50000))
+ {
+ error_code = IHEVCE_DISPLAY_PRIMARY_X_OUT_OF_RANGE;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: au2_display_primaries_x out of range\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ if((ps_static_cfg_prms->s_out_strm_prms.au2_display_primaries_y[i] > 50000))
+ {
+ error_code = IHEVCE_DISPLAY_PRIMARY_Y_OUT_OF_RANGE;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: au2_display_primaries_y out of range\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+ }
+
+ if((ps_static_cfg_prms->s_out_strm_prms.u2_white_point_x > 50000))
+ {
+ error_code = IHEVCE_WHITE_POINT_X_OUT_OF_RANGE;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: u2_white_point_x out of range\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ if((ps_static_cfg_prms->s_out_strm_prms.u2_white_point_y > 50000))
+ {
+ error_code = IHEVCE_WHITE_POINT_Y_OUT_OF_RANGE;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: u2_white_point_y out of range\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ if(ps_static_cfg_prms->s_out_strm_prms.u4_max_display_mastering_luminance <=
+ ps_static_cfg_prms->s_out_strm_prms.u4_min_display_mastering_luminance)
+ {
+ error_code = IHEVCE_MAX_DISP_MATERING_LUM_OUT_OF_RANGE;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle,
+ "IHEVCE ERROR: u4_max_display_mastering_luminance should be greater then "
+ "u4_min_display_mastering_luminance \n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+ }
+ }
+
+ if(1 == ps_static_cfg_prms->s_out_strm_prms.i4_vui_enable)
+ {
+ /* validate static vui parameters */
+ if(((ps_static_cfg_prms->s_vui_sei_prms.u1_aspect_ratio_info_present_flag & 0xFE) > 0))
+ {
+ error_code = IHEVC_INVALID_ASPECT_RATIO_PARAMS;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: invalid aspect ratio parameters\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ if(((ps_static_cfg_prms->s_vui_sei_prms.u1_overscan_info_present_flag & 0xFE) > 0) ||
+ ((ps_static_cfg_prms->s_vui_sei_prms.u1_overscan_appropriate_flag & 0xFE) > 0))
+ {
+ error_code = IHEVC_INVALID_OVERSCAN_PARAMS;
+ ps_sys_api->ihevce_printf(pv_cb_handle, "IHEVCE ERROR: invalid overscan parameters\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ if(((ps_static_cfg_prms->s_vui_sei_prms.u1_video_signal_type_present_flag & 0xFE) > 0) ||
+ (ps_static_cfg_prms->s_vui_sei_prms.u1_video_format > 5) ||
+ ((ps_static_cfg_prms->s_vui_sei_prms.u1_video_full_range_flag & 0xFE) > 0))
+ {
+ error_code = IHEVC_INVALID_VIDEO_PARAMS;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: invalid video signal type parameters\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ if(((ps_static_cfg_prms->s_vui_sei_prms.u1_colour_description_present_flag & 0xFE) > 0))
+ {
+ error_code = IHEVC_INVALID_COLOUR_PARAMS;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: invalid colour description parameters\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ if(((ps_static_cfg_prms->s_vui_sei_prms.u1_chroma_loc_info_present_flag & 0xFE) > 0) ||
+ (ps_static_cfg_prms->s_vui_sei_prms.u1_chroma_sample_loc_type_top_field > 5) ||
+ (ps_static_cfg_prms->s_vui_sei_prms.u1_chroma_sample_loc_type_bottom_field > 5))
+ {
+ error_code = IHEVC_INVALID_CHROMA_PARAMS;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: invalid chroma info parameters\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ if((ps_static_cfg_prms->s_vui_sei_prms.u1_timing_info_present_flag & 0xFE) > 0)
+ {
+ error_code = IHEVC_INVALID_TIMING_INFO_PARAM;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: invalid timing info present flag\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+
+ if(((ps_static_cfg_prms->s_vui_sei_prms.u1_vui_hrd_parameters_present_flag & 0xFE) > 0) ||
+ ((ps_static_cfg_prms->s_vui_sei_prms.u1_nal_hrd_parameters_present_flag & 0xFE) > 0))
+ {
+ error_code = IHEVC_INVALID_HRD_PRESENT_PARAMS;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: invalid vui or vcl hrd parameters present flag\n");
+ return (IHEVCE_SETUNSUPPORTEDINPUT(error_code));
+ }
+ }
+
+ error_code = ihevce_validate_tile_config_params(ps_static_cfg_prms);
+ if(IHEVCE_SUCCESS != error_code)
+ {
+ return error_code;
+ }
+
+ if(ps_static_cfg_prms->s_slice_params.i4_slice_segment_mode != 0)
+ {
+ error_code = IHEVCE_BAD_SLICE_PARAMS;
+ ps_sys_api->ihevce_printf(
+ pv_cb_handle, "IHEVCE ERROR: i4_slice_segment_mode should be 0 \n");
+ return IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ }
+
+ return IHEVCE_SUCCESS;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_get_level_index \endif
+*
+* \brief
+* This function returns the index of level based on codec_level value
+* Please see the LEVEL_T enum def
+*
+* \param[in] Codec Level
+*
+* \return
+* Index of Codec level
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32 ihevce_get_level_index(WORD32 i4_codec_level)
+{
+ switch(i4_codec_level)
+ {
+ case LEVEL1:
+ return 0;
+ case LEVEL2:
+ return 1;
+ case LEVEL2_1:
+ return 2;
+ case LEVEL3:
+ return 3;
+ case LEVEL3_1:
+ return 4;
+ case LEVEL4:
+ return 5;
+ case LEVEL4_1:
+ return 6;
+ case LEVEL5:
+ return 7;
+ case LEVEL5_1:
+ return 8;
+ case LEVEL5_2:
+ return 9;
+ case LEVEL6:
+ return 10;
+ case LEVEL6_1:
+ return 11;
+ case LEVEL6_2:
+ return 12;
+ default:
+ return 0;
+ }
+}
diff --git a/encoder/ihevce_error_checks.h b/encoder/ihevce_error_checks.h
new file mode 100644
index 0000000..4945349
--- /dev/null
+++ b/encoder/ihevce_error_checks.h
@@ -0,0 +1,70 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_error_checks.h
+*
+* \brief
+* This file contains interface declarations of Error Checking functions
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_ERROR_CHECKS_H_
+#define _IHEVCE_ERROR_CHECKS_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+WORD32 ihevce_hle_validate_static_params(ihevce_static_cfg_params_t *ps_static_cfg_prms);
+
+WORD32 ihevce_get_level_index(WORD32 i4_codec_level);
+
+#endif /* _IHEVCE_ERROR_CHECKS_H_ */
diff --git a/encoder/ihevce_error_codes.h b/encoder/ihevce_error_codes.h
new file mode 100644
index 0000000..4bdd274
--- /dev/null
+++ b/encoder/ihevce_error_codes.h
@@ -0,0 +1,435 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file ihevce_error_codes.h
+*
+* @brief
+* This file contains important error codes returned by various modules of
+* HEVC encoder
+*
+* @author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_ERROR_CODES_H_
+#define _IHEVCE_ERROR_CODES_H_
+
+/**
+******************************************************************************
+ * @brief Error start codes for various classes of errors in HEVC encoder
+******************************************************************************
+ */
+#define IHEVCE_BITSTREAM_ERROR_START 0x0000E100
+#define IHEVCE_RC_ERROR_START 0x0000E200
+#define IHEVCE_LAP_ERROR_START 0x0000E300
+#define IHEVCE_API_ERROR_START 0x0000E400
+#define IHEVCE_CABAC_ERROR_START 0x0000E500
+
+/**
+******************************************************************************
+ * @brief Extended error code for each error in HEVC encoder
+******************************************************************************
+ */
+typedef enum
+{
+
+ /** no error */
+ IHEVCE_SUCCESS = 0,
+
+ /** bitstream init failure, buffer ptr not aligned to WORD (32bits) */
+ IHEVCE_BITSTREAM_BUFPTR_ALIGN_FAIL = IHEVCE_BITSTREAM_ERROR_START + 0x01,
+
+ /** bitstream init failure, buf size not multiple of WORD size (32bits) */
+ IHEVCE_BITSTREAM_BUFSIZE_ALIGN_FAIL = IHEVCE_BITSTREAM_ERROR_START + 0x02,
+
+ /** bitstream runtime failure, buf size limit exceeded during encode */
+ IHEVCE_BITSTREAM_BUFFER_OVERFLOW = IHEVCE_BITSTREAM_ERROR_START + 0x03,
+
+ /* Error codes for static source parameters */
+ /**width not set within supported limit */
+ IHEVCE_WIDTH_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x01,
+
+ /**height not set within supported limit */
+ IHEVCE_HEIGHT_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x02,
+
+ /**frame rate not supported */
+ IHEVCE_FRAME_RATE_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x03,
+
+ /*content type not supported */
+ IHEVCE_CONTENT_TYPE_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x04,
+
+ /* chroma format not supported */
+ IHEVCE_CHROMA_FORMAT_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x05,
+
+ /*Error codes for static output strream parameters */
+ /* codec not supported */
+ IHEVCE_CODEC_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x06,
+
+ /* codec profile not supported */
+ IHEVCE_CODEC_PROFILE_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x07,
+
+ /*codec level not supported */
+ IHEVCE_CODEC_LEVEL_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x08,
+
+ IHEVCE_VUI_ENABLE_OUT_OF_RANGE = IHEVCE_API_ERROR_START + 0x09,
+
+ IHEVCE_SEI_ENABLE_OUT_OF_RANGE = IHEVCE_API_ERROR_START + 0x0A,
+
+ IHEVCE_SPS_AT_CDR_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x0B,
+
+ /* Error codes for static coding tools parameters */
+ /* Temporal layers not supported */
+ IHEVCE_TEMPORAL_LAYERS_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x0C,
+
+ IHEVCE_INVALID_GOP_PERIOD = IHEVCE_API_ERROR_START + 0x0D,
+
+ IHEVCE_IDR_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x0E,
+
+ IHEVCE_REF_FRAMES_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x0F,
+
+ IHEVCE_CONSECUTIVE_MAX_TID_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x10,
+
+ IHEVCE_CONSECUTIVE_TID_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x11,
+
+ IHEVCE_INVALID_MIN_TID = IHEVCE_API_ERROR_START + 0x12,
+
+ IHEVCE_INVALID_WEIGHTED_PREDICTION_INPUT = IHEVCE_API_ERROR_START + 0x13,
+
+ IHEVCE_INVALID_TEMPORAL_MVP_INPUT = IHEVCE_API_ERROR_START + 0x14,
+
+ IHEVCE_INVALID_QUALITY_PRESET_INPUT = IHEVCE_API_ERROR_START + 0x15,
+
+ IHEVCE_CHROMA_PREDICTION_FROM_LUMA_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x16,
+
+ IHEVCE_INVALID_CONSTRAINED_IPRED_INPUT = IHEVCE_API_ERROR_START + 0x17,
+
+ IHEVCE_INVALID_ASYMETRIC_MACROBLOCK_ENABLE_INPUT = IHEVCE_API_ERROR_START + 0x18,
+
+ IHEVCE_INVALID_DEBLOCKING_TYPE_INPUT = IHEVCE_API_ERROR_START + 0x19,
+
+ IHEVCE_INVALID_SAO_ENABLE_INPUT = IHEVCE_API_ERROR_START + 0x1A,
+
+ IHEVCE_INVALID_LF_ACROSS_TILES_ENABLE_INPUT = IHEVCE_API_ERROR_START + 0x1B,
+
+ IHEVCE_INVALID_DEFAULT_SC_MATRIX_ENABLE_INPUT = IHEVCE_API_ERROR_START + 0x1C,
+
+ IHEVCE_INVALID_CROPPING_MODE = IHEVCE_API_ERROR_START + 0x1D,
+
+ IHEVCE_INVALID_FRAME_SKIPS_INPUT = IHEVCE_API_ERROR_START + 0x1E,
+
+ IHEVCE_INVALID_SLICE_TYPE_INPUT = IHEVCE_API_ERROR_START + 0x1F,
+
+ IHEVCE_INVALID_REFRESH_TYPE_INPUT = IHEVCE_API_ERROR_START + 0x20,
+
+ IHEVCE_INVALID_FORCE_FRAME_INPUT = IHEVCE_API_ERROR_START + 0x21,
+
+ /*Error codes for static config parameters */
+ IHEVCE_NUMBER_OF_TILES_INPUT_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x22,
+
+ IHEVCE_INDEPENDENT_TILES_INPUT_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x23,
+
+ IHEVCE_NUMBER_OF_TILE_ROWS_INPUT_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x24,
+
+ IHEVCE_NUMBER_OF_TILE_COLS_INPUT_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x25,
+
+ IHEVCE_MIN_CU_SIZE_INPUT_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x26,
+
+ IHEVCE_MIN_TU_SIZE_INPUT_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x27,
+
+ IHEVCE_MAX_CU_SIZE_INPUT_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x28,
+
+ IHEVCE_MAX_TU_SIZE_INPUT_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x29,
+
+ IHEVCE_INVALID_MAX_TU_SIZE = IHEVCE_API_ERROR_START + 0x2A,
+
+ IHEVCE_INVALID_TR_TREE_DEPTH_FOR_I_FRAME = IHEVCE_API_ERROR_START + 0x2B,
+
+ IHEVCE_INVALID_TR_TREE_DEPTH = IHEVCE_API_ERROR_START + 0x2C,
+
+ IHEVCE_UNSUPPORTED_FRAME_QP = IHEVCE_API_ERROR_START + 0x2D,
+
+ IHEVCE_UNSUPPORTED_HORIZONTAL_SEARCH_RANGE = IHEVCE_API_ERROR_START + 0x2E,
+
+ IHEVCE_UNSUPPORTED_VERTICAL_SEARCH_RANGE = IHEVCE_API_ERROR_START + 0x2F,
+
+ /*Error codes for static look ahead parameters */
+ IHEVCE_UNSUPPORTED_LOOK_AHEAD = IHEVCE_API_ERROR_START + 0x30,
+
+ /*Error codes for dynamic source parameters */
+ IHEVCE_DYN_WIDTH_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x31,
+
+ IHEVCE_DYN_HEIGHT_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x32,
+
+ IHEVCE_DYN_FRAME_RATE_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x33,
+
+ IHEVCE_DYN_CONTENT_TYPE_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x34,
+
+ IHEVCE_DYN_CHROMA_FORMAT_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x35,
+
+ IHEVCE_INVALID_DYN_WIDTH = IHEVCE_API_ERROR_START + 0x36,
+
+ IHEVCE_INVALID_DYN_HEIGHT = IHEVCE_API_ERROR_START + 0x37,
+
+ IHEVCE_INVALID_DYN_FRAME_RATE = IHEVCE_API_ERROR_START + 0x38,
+
+ IHEVCE_INVALID_DYN_CONTENT_TYPE = IHEVCE_API_ERROR_START + 0x39,
+
+ IHEVCE_INVALID_DYN_CHROMA_FORMAT = IHEVCE_API_ERROR_START + 0x3a,
+
+ /*Error codes for dynamic coding parameters */
+ IHEVCE_DYN_TEMPORAL_LAYERS_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x3b,
+
+ IHEVCE_DYN_CDR_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x3c,
+
+ IHEVCE_DYN_IDR_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x3d,
+
+ IHEVCE_DYN_REF_FRAMES_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x3e,
+
+ IHEVCE_DYN_CONSECUTIVE_MAX_TID_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x3f,
+
+ IHEVCE_DYN_LT_FRAMES_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x40,
+
+ IHEVCE_DYN_MIN_TID_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x41,
+
+ IHEVCE_DYN_WEIGHTED_PREDICTION_INPUT_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x42,
+
+ IHEVCE_DYN_TEMPORAL_MVP_INPUT_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x43,
+
+ IHEVCE_DYN_QUALITY_PRESET_INPUT_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x44,
+
+ IHEVCE_DYN_CHROMA_PREDICTION_FROM_LUMA_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x45,
+
+ IHEVCE_DYN_CONSTRAINED_IPRED_INPUT_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x46,
+
+ IHEVCE_DYN_ASYMETRIC_MACROBLOCK_ENABLE_INPUT_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x47,
+
+ IHEVCE_DYN_DEBLOCKING_TYPE_INPUT_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x48,
+
+ IHEVCE_DYN_SAO_ENABLE_INPUT_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x49,
+
+ IHEVCE_DYN_LF_ACROSS_TILES_ENABLE_INPUT_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x4a,
+
+ IHEVCE_DYN_DEFAULT_SC_MATRIX_ENABLE_INPUT_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x4b,
+
+ IHEVCE_DYN_CROPPING_MODE_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x4c,
+
+ IHEVCE_DYN_FRAME_SKIPS_INPUT_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x4d,
+
+ IHEVCE_DYN_SLICE_TYPE_INPUT_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x4e,
+
+ IHEVCE_DYN_REFRESH_TYPE_INPUT_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x4f,
+
+ IHEVCE_DYN_FORCE_FRAME_INPUT_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x50,
+
+ IHEVCE_INVALID_DYN_TEMPORAL_LAYERS = IHEVCE_API_ERROR_START + 0x51,
+
+ IHEVCE_INVALID_DYN_CDR = IHEVCE_API_ERROR_START + 0x52,
+
+ IHEVCE_INVALID_DYN_IDR = IHEVCE_API_ERROR_START + 0x53,
+
+ IHEVCE_INVALID_DYN_REF_FRAMES = IHEVCE_API_ERROR_START + 0x54,
+
+ IHEVCE_INVALID_DYN_CONSECUTIVE_MAX_TID = IHEVCE_API_ERROR_START + 0x55,
+
+ IHEVCE_INVALID_DYN_LT_FRAMES = IHEVCE_API_ERROR_START + 0x56,
+
+ IHEVCE_INVALID_DYN_MIN_TID = IHEVCE_API_ERROR_START + 0x57,
+
+ IHEVCE_INVALID_DYN_WEIGHTED_PREDICTION_INPUT = IHEVCE_API_ERROR_START + 0x58,
+
+ IHEVCE_INVALID_DYN_TEMPORAL_MVP_INPUT = IHEVCE_API_ERROR_START + 0x59,
+
+ IHEVCE_INVALID_DYN_QUALITY_PRESET_INPUT = IHEVCE_API_ERROR_START + 0x5a,
+
+ IHEVCE_INVALID_DYN_CHROMA_PREDICTION_FROM_LUMA = IHEVCE_API_ERROR_START + 0x5b,
+
+ IHEVCE_INVALID_DYN_CONSTRAINED_IPRED_INPUT = IHEVCE_API_ERROR_START + 0x5c,
+
+ IHEVCE_INVALID_DYN_ASYMETRIC_MACROBLOCK_ENABLE_INPUT = IHEVCE_API_ERROR_START + 0x5d,
+
+ IHEVCE_INVALID_DYN_DEBLOCKING_TYPE_INPUT = IHEVCE_API_ERROR_START + 0x5e,
+
+ IHEVCE_INVALID_DYN_SAO_ENABLE_INPUT = IHEVCE_API_ERROR_START + 0x5f,
+
+ IHEVCE_INVALID_DYN_LF_ACROSS_TILES_ENABLE_INPUT = IHEVCE_API_ERROR_START + 0x60,
+
+ IHEVCE_INVALID_DYN_DEFAULT_SC_MATRIX_ENABLE_INPUT = IHEVCE_API_ERROR_START + 0x61,
+
+ IHEVCE_INVALID_DYN_CROPPING_MODE = IHEVCE_API_ERROR_START + 0x62,
+
+ IHEVCE_INVALID_DYN_FRAME_SKIPS_INPUT = IHEVCE_API_ERROR_START + 0x63,
+
+ IHEVCE_INVALID_DYN_SLICE_TYPE_INPUT = IHEVCE_API_ERROR_START + 0x64,
+
+ IHEVCE_INVALID_DYN_REFRESH_TYPE_INPUT = IHEVCE_API_ERROR_START + 0x65,
+
+ IHEVCE_INVALID_DYN_FORCE_FRAME_INPUT = IHEVCE_API_ERROR_START + 0x66,
+
+ IHEVCE_INVALID_CORE_CONFIG = IHEVCE_API_ERROR_START + 0x67,
+
+ IHEVCE_SEI_MESSAGES_DEPENDENCY = IHEVCE_API_ERROR_START + 0x68,
+
+ IHEVCE_VUI_DEPENDENCY = IHEVCE_API_ERROR_START + 0x69,
+
+ IHEVCE_SEI_ENABLED_VUI_DISABLED = IHEVCE_API_ERROR_START + 0x6A,
+
+ IHEVCE_SEI_HASH_VALUE_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x6B,
+
+ /* Level related error codes */
+ IHEVCE_PIC_SIZE_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x6C,
+
+ IHEVCE_BITRATE_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x6D,
+
+ IHEVCE_LUMA_SAMPLE_RATE_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x6E,
+
+ IHEVCE_INVALID_MEM_CTRL_FLAG = IHEVCE_API_ERROR_START + 0x6F,
+
+ IHEVCE_NUM_DECOMP_LYRS_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x70,
+
+ IHEVCE_RATE_CONTROL_MDOE_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x71,
+
+ IHEVCE_MAX_VBV_SIZE_OUT_OF_RANGE = IHEVCE_API_ERROR_START + 0x72,
+
+ IHEVCE_INVALID_NUM_BR_INSTANCES = IHEVCE_API_ERROR_START + 0x73,
+
+ IHEVCE_INVALID_MBR_QUALITY_SETTING = IHEVCE_API_ERROR_START + 0x74,
+
+ IHEVCE_NUM_MAX_RESOLUTIONS_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x75,
+
+ IHEVCE_MULTI_RES_LAYER_REUSE_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x76,
+
+ IHEVCE_TGT_WIDTH_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x77,
+
+ IHEVCE_TGT_HEIGHT_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x78,
+
+ IHEVCE_TGT_FRAME_RATE_SCALING_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x79,
+
+ IHEVCE_INVALID_SIZE = IHEVCE_API_ERROR_START + 0x7A,
+
+ IHEVCE_INPUT_BIT_DEPTH_OUT_OF_RANGE = IHEVCE_API_ERROR_START + 0x7B,
+
+ IHEVCE_OUTPUT_BIT_DEPTH_OUT_OF_RANGE = IHEVCE_API_ERROR_START + 0x7C,
+
+ /** VUI parameters error codes **/
+
+ IHEVC_INVALID_ASPECT_RATIO_PARAMS = IHEVCE_API_ERROR_START + 0x7D,
+
+ IHEVC_INVALID_OVERSCAN_PARAMS = IHEVCE_API_ERROR_START + 0x7E,
+
+ IHEVC_INVALID_VIDEO_PARAMS = IHEVCE_API_ERROR_START + 0x7F,
+
+ IHEVC_INVALID_COLOUR_PARAMS = IHEVCE_API_ERROR_START + 0x80,
+
+ IHEVC_INVALID_CHROMA_PARAMS = IHEVCE_API_ERROR_START + 0x81,
+
+ IHEVC_INVALID_TIMING_INFO_PARAM = IHEVCE_API_ERROR_START + 0x82,
+
+ IHEVC_INVALID_HRD_PRESENT_PARAMS = IHEVCE_API_ERROR_START + 0x83,
+
+ IHEVCE_SYSTEM_APIS_NOT_INITIALLIZED = IHEVCE_API_ERROR_START + 0x84,
+
+ /* Codec Tier related error codes */
+ IHEVC_CODEC_TIER_NOT_SUPPORTED = IHEVCE_API_ERROR_START + 0x85,
+
+ IHEVCE_CANNOT_ALLOCATE_MEMORY = IHEVCE_API_ERROR_START + 0x86,
+
+ IHEVCE_RATE_CONTROL_PASS_INVALID = IHEVCE_API_ERROR_START + 0x87,
+
+ IHEVCE_INVALID_TEMPORAL_SCALABILITY = IHEVCE_API_ERROR_START + 0x88,
+
+ IHEVCE_MULTI_PASS_INVALID_IDR_CDR = IHEVCE_API_ERROR_START + 0x89,
+
+ IHEVCE_BAD_TILE_CONFIGURATION = IHEVCE_API_ERROR_START + 0x8A,
+
+ IHEVCE_BAD_SLICE_PARAMS = IHEVCE_API_ERROR_START + 0x8B,
+
+ IHEVCE_SLICE_SEG_ARG_INVALID = IHEVCE_API_ERROR_START + 0x8C,
+
+ /* Error codes for mastering display */
+ IHEVCE_MASTERING_DISP_COL_VOL_OUT_OF_RANGE = IHEVCE_API_ERROR_START + 0x8D,
+
+ IHEVCE_DISPLAY_PRIMARY_X_OUT_OF_RANGE = IHEVCE_API_ERROR_START + 0x8E,
+
+ IHEVCE_DISPLAY_PRIMARY_Y_OUT_OF_RANGE = IHEVCE_API_ERROR_START + 0x8F,
+
+ IHEVCE_WHITE_POINT_X_OUT_OF_RANGE = IHEVCE_API_ERROR_START + 0x90,
+
+ IHEVCE_WHITE_POINT_Y_OUT_OF_RANGE = IHEVCE_API_ERROR_START + 0x91,
+
+ IHEVCE_MAX_DISP_MATERING_LUM_OUT_OF_RANGE = IHEVCE_API_ERROR_START + 0x92,
+
+ IHEVCE_INTEROPERABILITY_FLAG_SUPPORTED = IHEVCE_API_ERROR_START + 0x93,
+
+ IHEVCE_VISUAL_QUALITY_ENHANCEMENTS_TOGGLER_VALUE_UNSUPPORTED = IHEVCE_API_ERROR_START + 0x94,
+
+ IHEVCE_ARCHITECTURE_TYPE_UNSUPPORTED = IHEVCE_API_ERROR_START + 0x95,
+
+ IHEVCE_SEI_PAYLOAD_ENABLE_OUT_OF_RANGE = IHEVCE_API_ERROR_START + 0x96,
+
+ IHEVCE_BAD_DIST_CFG_PARAMETERS = IHEVCE_API_ERROR_START + 0x97,
+
+ IHEVCE_UNSUPPORTED_PROC_CONFIG = IHEVCE_API_ERROR_START + 0x98,
+
+ IHEVCE_INVALID_MRES_SINGLE_OUT = IHEVCE_API_ERROR_START + 0x99,
+
+ IHEVCE_SEI_CLL_ENABLE_OUT_OF_RANGE = IHEVCE_API_ERROR_START + 0x9A,
+
+ /** max failure error code to ensure enum is 32 bits wide */
+ IHEVCE_FAIL = 0xFFFFFFFF
+
+} IHEVCE_ERROR_T;
+
+typedef enum
+{
+ IHEVCE_UNSUPPORTEDINPUT = 31, /**
+ Bit 31 - Unsupported feature/parameter
+ */
+
+ IHEVCE_UNSUPPORTEDPARAM = 30, /**
+ Bit 30 - Unsupported input parameter or
+ configuration.
+ */
+
+ IHEVCE_FATALERROR = 29 /**
+ Bit 29 - Fatal error (stop the codec).
+ */
+
+} IHEVCE_ErrorBit;
+
+/** Check for fatal error */
+#define IHEVCE_ISFATALERROR(x) (((x) >> IHEVCE_FATALERROR) & 0x1)
+/** Check for unsupported parameter */
+#define IHEVCE_ISUNSUPPORTEDPARAM(x) (((x) >> IHEVCE_UNSUPPORTEDPARAM) & 0x1)
+/** Check for unsupported input */
+#define IHEVCE_ISUNSUPPORTEDINPUT(x) (((x) >> IHEVCE_UNSUPPORTEDINPUT) & 0x1)
+/** Check for corrupted header */
+
+/** Set fatal error bit */
+#define IHEVCE_SETFATALERROR(x) ((x) |= (0x1U << IHEVCE_FATALERROR))
+/** Set unsupported parameter bit */
+#define IHEVCE_SETUNSUPPORTEDPARAM(x) ((x) |= (0x1U << IHEVCE_UNSUPPORTEDPARAM))
+/** Set unsupported input bit */
+#define IHEVCE_SETUNSUPPORTEDINPUT(x) ((x) |= (0x1U << IHEVCE_UNSUPPORTEDINPUT))
+
+#endif /* _IHEVCE_ERROR_CODES_H_ */
diff --git a/encoder/ihevce_frame_process.c b/encoder/ihevce_frame_process.c
new file mode 100644
index 0000000..4b75a60
--- /dev/null
+++ b/encoder/ihevce_frame_process.c
@@ -0,0 +1,7108 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+******************************************************************************
+* \file ihevce_frame_process.c
+*
+* \brief
+* This file contains top level functions related Frame processing
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+*
+* List of Functions
+*
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+#include <time.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_debug.h"
+#include "ihevc_macros.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+#include "ihevc_common_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_buffer_que_interface.h"
+#include "ihevce_hle_interface.h"
+#include "ihevce_hle_q_func.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_lap_interface.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_checks.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_ipe_instr_set_router.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_enc_loop_utils.h"
+#include "ihevce_inter_pred.h"
+#include "ihevce_common_utils.h"
+#include "ihevce_sub_pic_rc.h"
+#include "hme_datatype.h"
+#include "hme_interface.h"
+#include "hme_common_defs.h"
+#include "hme_defs.h"
+#include "ihevce_enc_loop_pass.h"
+#include "ihevce_trace.h"
+#include "ihevce_encode_header.h"
+#include "ihevce_encode_header_sei_vui.h"
+#include "ihevce_ipe_structs.h"
+#include "ihevce_ipe_pass.h"
+#include "ihevce_dep_mngr_interface.h"
+#include "ihevce_rc_enc_structs.h"
+#include "hme_globals.h"
+#include "ihevce_me_pass.h"
+#include "ihevce_coarse_me_pass.h"
+#include "ihevce_frame_process.h"
+#include "ihevce_rc_interface.h"
+#include "ihevce_profile.h"
+#include "ihevce_decomp_pre_intra_structs.h"
+#include "ihevce_decomp_pre_intra_pass.h"
+#include "ihevce_frame_process_utils.h"
+
+#include "cast_types.h"
+#include "osal.h"
+#include "osal_defaults.h"
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+#define REF_MOD_STRENGTH 1.0
+#define REF_MAX_STRENGTH 1.4f
+
+/*****************************************************************************/
+/* Extern variables */
+/*****************************************************************************/
+
+/**
+* @var QP2QUANT_MD[]
+*
+* @brief Direct Cost Comoparision Table
+*
+* @param Comments: Direct cost is compared with 16 * QP2QUANT_MD[Qp]
+* If direct cost is less than 16 * QP2QUANT_MD[Qp]
+* than direct cost is assumed to be zero
+*/
+const WORD16 QP2QUANT_MD[52] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4,
+ 5, 6, 6, 7, 8, 9, 10, 11, 13, 14, 16, 18, 20,
+ 23, 25, 29, 32, 36, 40, 45, 51, 57, 64, 72, 81, 91 };
+
+/*
+Gaussian 11x11 window with a sigma of 1.5 - values multiplied by 2048
+Window made into 9x9 window as most entries were zero
+The center weight has been reduced by 1 after dropping first row/col and last row/col
+*/
+UWORD8 g_u1_win_size = 9;
+UWORD8 g_u1_win_q_shift = 11;
+UWORD8 au1_g_win[81] = { 0, 1, 2, 3, 4, 3, 2, 1, 0, 1, 3, 8, 16, 20, 16, 8, 3,
+ 1, 2, 8, 24, 48, 60, 48, 24, 8, 2, 3, 16, 48, 93, 116, 93, 48,
+ 16, 3, 4, 20, 60, 116, 144, 116, 60, 20, 4, 3, 16, 48, 93, 116, 93,
+ 48, 16, 3, 2, 8, 24, 48, 60, 48, 24, 8, 2, 1, 3, 8, 16, 20,
+ 16, 8, 3, 1, 0, 1, 2, 3, 4, 3, 2, 1, 0 };
+
+/* lagrange params */
+const double lamda_modifier_for_I_pic[8] = { 0.85, 0.7471, 0.6646, 0.5913,
+ 0.5261, 0.4680, 0.4164, 0.3705 };
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_mbr_quality_tool_set_configuration \endif
+*
+* \brief
+* tool set selection for auxilary bitrate. currently only num intra and inter
+* candidates for auxilary bitrates are controlled
+*
+* \param[in] ps_enc_loop_thrd_ctxt : enc ctxt
+* \param[in] ps_stat_prms: static parameters
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_mbr_quality_tool_set_configuration(
+ ihevce_enc_loop_ctxt_t *ps_enc_loop_thrd_ctxt, ihevce_static_cfg_params_t *ps_stat_prms)
+{
+ /* for single bitrate encoder*/
+ switch(ps_stat_prms->s_tgt_lyr_prms.i4_mbr_quality_setting)
+ {
+ case IHEVCE_MBR_HIGH_QUALITY:
+ ps_enc_loop_thrd_ctxt->i4_num_modes_to_evaluate_intra = 3;
+ ps_enc_loop_thrd_ctxt->i4_num_modes_to_evaluate_inter = 4;
+ break;
+
+ case IHEVCE_MBR_MEDIUM_SPEED:
+ ps_enc_loop_thrd_ctxt->i4_num_modes_to_evaluate_intra = 3;
+ ps_enc_loop_thrd_ctxt->i4_num_modes_to_evaluate_inter = 3;
+ break;
+
+ case IHEVCE_MBR_HIGH_SPEED:
+ ps_enc_loop_thrd_ctxt->i4_num_modes_to_evaluate_intra = 2;
+ ps_enc_loop_thrd_ctxt->i4_num_modes_to_evaluate_inter = 2;
+ break;
+
+ case IHEVCE_MBR_EXTREME_SPEED:
+ ps_enc_loop_thrd_ctxt->i4_num_modes_to_evaluate_intra = 1;
+ ps_enc_loop_thrd_ctxt->i4_num_modes_to_evaluate_inter = 1;
+ break;
+
+ default:
+ assert(0);
+ break;
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_find_free_indx \endif
+*
+* \brief
+* Pre encode Frame processing slave thread entry point function
+*
+* \param[in] Frame processing thread context pointer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32 ihevce_find_free_indx(recon_pic_buf_t **pps_recon_buf_q, WORD32 i4_num_buf)
+{
+ WORD32 i4_ctr;
+ WORD32 i4_is_full = 1;
+ WORD32 i4_least_POC = 0x7FFFFFFF;
+ WORD32 i4_least_POC_idx = -1;
+ WORD32 i4_least_GOP_num = 0x7FFFFFFF;
+
+ for(i4_ctr = 0; i4_ctr < i4_num_buf; i4_ctr++)
+ {
+ if(pps_recon_buf_q[i4_ctr]->i4_is_free == 1)
+ {
+ i4_is_full = 0;
+ break;
+ }
+ }
+ if(i4_is_full)
+ {
+ /* remove if any non-reference pictures are present */
+ for(i4_ctr = 0; i4_ctr < i4_num_buf; i4_ctr++)
+ {
+ if(!pps_recon_buf_q[i4_ctr]->i4_is_reference &&
+ pps_recon_buf_q[i4_ctr]->i4_non_ref_free_flag)
+ {
+ i4_least_POC_idx = i4_ctr;
+ break;
+ }
+ }
+ /* if all non reference pictures are removed, then find the least poc
+ in the least gop number*/
+ if(i4_least_POC_idx == -1)
+ {
+ for(i4_ctr = 0; i4_ctr < i4_num_buf; i4_ctr++)
+ {
+ if(i4_least_GOP_num > pps_recon_buf_q[i4_ctr]->i4_idr_gop_num)
+ {
+ i4_least_GOP_num = pps_recon_buf_q[i4_ctr]->i4_idr_gop_num;
+ }
+ }
+ for(i4_ctr = 0; i4_ctr < i4_num_buf; i4_ctr++)
+ {
+ if(i4_least_POC > pps_recon_buf_q[i4_ctr]->i4_poc &&
+ i4_least_GOP_num == pps_recon_buf_q[i4_ctr]->i4_idr_gop_num)
+ {
+ i4_least_POC = pps_recon_buf_q[i4_ctr]->i4_poc;
+ i4_least_POC_idx = i4_ctr;
+ }
+ }
+ }
+ }
+ return i4_least_POC_idx;
+}
+
+/*!
+******************************************************************************
+* \if Function name : complexity_RC_reset_marking \endif
+*
+* \brief
+* this function the complexity variation and set the complexity change flag for
+* rate control to reset the model
+*
+* \param[in] ps_enc_loop_thrd_ctxt : enc ctxt
+* \param[in] ps_stat_prms: static parameters
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void complexity_RC_reset_marking(enc_ctxt_t *ps_enc_ctxt, WORD32 i4_cur_ipe_idx, WORD32 i4_end_flag)
+{
+ rc_lap_out_params_t *ps_cur_ipe_lap_out;
+ rc_lap_out_params_t *ps_lap_out_temp;
+ WORD32 i4_max_temporal_layers;
+
+ ps_cur_ipe_lap_out =
+ &ps_enc_ctxt->s_multi_thrd.aps_curr_inp_pre_enc[i4_cur_ipe_idx]->s_rc_lap_out;
+ ps_cur_ipe_lap_out->i4_is_cmplx_change_reset_model = 0;
+ ps_cur_ipe_lap_out->i4_is_cmplx_change_reset_bits = 0;
+
+ i4_max_temporal_layers = ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_max_temporal_layers;
+
+ /*reset the RC_reset counter at reset points*/
+ if(ps_cur_ipe_lap_out->i4_is_I_only_scd || ps_cur_ipe_lap_out->i4_is_non_I_scd ||
+ ps_cur_ipe_lap_out->i4_rc_scene_type == SCENE_TYPE_SCENE_CUT)
+ {
+ ps_enc_ctxt->i4_past_RC_reset_count = 0;
+ }
+
+ if(ps_cur_ipe_lap_out->i4_rc_scene_type == SCENE_TYPE_SCENE_CUT)
+ {
+ ps_enc_ctxt->i4_past_RC_scd_reset_count = 0;
+ }
+ ps_enc_ctxt->i4_past_RC_reset_count++;
+ ps_enc_ctxt->i4_past_RC_scd_reset_count++;
+
+ /*complexity based rate control reset */
+
+ if((ps_cur_ipe_lap_out->i4_rc_pic_type == IV_P_FRAME ||
+ ps_cur_ipe_lap_out->i4_rc_pic_type == IV_I_FRAME) &&
+ (i4_max_temporal_layers > 1) && (!i4_end_flag) &&
+ (ps_enc_ctxt->s_multi_thrd.i4_delay_pre_me_btw_l0_ipe > (2 * (1 << i4_max_temporal_layers))))
+ {
+ WORD32 i4_is_cur_pic_high_complex_region =
+ ps_enc_ctxt->s_multi_thrd.aps_curr_out_pre_enc[i4_cur_ipe_idx]
+ ->i4_is_high_complex_region;
+ WORD32 i4_next_ipe_idx;
+ WORD32 i4_next_next_ipe_idx;
+ WORD32 i4_temp_ipe_idx;
+ WORD32 i;
+
+ ps_enc_ctxt->i4_future_RC_reset = 0;
+ ps_enc_ctxt->i4_future_RC_scd_reset = 0;
+ ASSERT(i4_is_cur_pic_high_complex_region != -1);
+
+ /*get the next idx of p/i picture */
+ i4_next_ipe_idx =
+ (i4_cur_ipe_idx + 1) % (ps_enc_ctxt->s_multi_thrd.i4_max_delay_pre_me_btw_l0_ipe + 1);
+ i4_temp_ipe_idx =
+ (i4_cur_ipe_idx + 1) % (ps_enc_ctxt->s_multi_thrd.i4_max_delay_pre_me_btw_l0_ipe + 1);
+ for(i = 0; i < (1 << i4_max_temporal_layers); i++)
+ {
+ ps_lap_out_temp =
+ &ps_enc_ctxt->s_multi_thrd.aps_curr_inp_pre_enc[i4_next_ipe_idx]->s_rc_lap_out;
+
+ if(ps_lap_out_temp->i4_rc_pic_type == IV_P_FRAME ||
+ ps_lap_out_temp->i4_rc_pic_type == IV_I_FRAME)
+ {
+ break;
+ }
+ i4_next_ipe_idx = (i4_next_ipe_idx + 1) %
+ (ps_enc_ctxt->s_multi_thrd.i4_max_delay_pre_me_btw_l0_ipe + 1);
+ }
+ /* get the next idx of next p/i picture*/
+ i4_next_next_ipe_idx =
+ (i4_next_ipe_idx + 1) % (ps_enc_ctxt->s_multi_thrd.i4_max_delay_pre_me_btw_l0_ipe + 1);
+ for(i = 0; i < (1 << i4_max_temporal_layers); i++)
+ {
+ ps_lap_out_temp =
+ &ps_enc_ctxt->s_multi_thrd.aps_curr_inp_pre_enc[i4_next_next_ipe_idx]->s_rc_lap_out;
+
+ if(ps_lap_out_temp->i4_rc_pic_type == IV_P_FRAME ||
+ ps_lap_out_temp->i4_rc_pic_type == IV_I_FRAME)
+ {
+ break;
+ }
+ i4_next_next_ipe_idx = (i4_next_next_ipe_idx + 1) %
+ (ps_enc_ctxt->s_multi_thrd.i4_max_delay_pre_me_btw_l0_ipe + 1);
+ }
+
+ /*check for any possible RC reset in the future 8 frames*/
+ for(i = 0; i < 8; i++)
+ {
+ ps_lap_out_temp =
+ &ps_enc_ctxt->s_multi_thrd.aps_curr_inp_pre_enc[i4_temp_ipe_idx]->s_rc_lap_out;
+
+ if(ps_lap_out_temp->i4_is_I_only_scd || ps_lap_out_temp->i4_is_non_I_scd ||
+ ps_lap_out_temp->i4_rc_scene_type == SCENE_TYPE_SCENE_CUT)
+ {
+ ps_enc_ctxt->i4_future_RC_reset = 1;
+ }
+ if(ps_cur_ipe_lap_out->i4_rc_scene_type == SCENE_TYPE_SCENE_CUT)
+ {
+ ps_enc_ctxt->i4_future_RC_scd_reset = 1;
+ }
+ i4_temp_ipe_idx = (i4_temp_ipe_idx + 1) %
+ (ps_enc_ctxt->s_multi_thrd.i4_max_delay_pre_me_btw_l0_ipe + 1);
+ }
+
+ if((!ps_enc_ctxt->i4_future_RC_reset) && (ps_enc_ctxt->i4_past_RC_reset_count > 8))
+ {
+ /*if the prev two P/I pic is not in high complex region
+ then enable reset RC flag*/
+ if((!ps_enc_ctxt->ai4_is_past_pic_complex[0]) &&
+ (!ps_enc_ctxt->ai4_is_past_pic_complex[1]))
+ {
+ if(i4_is_cur_pic_high_complex_region)
+ {
+ ps_cur_ipe_lap_out->i4_is_cmplx_change_reset_model = 1;
+ ps_cur_ipe_lap_out->i4_is_cmplx_change_reset_bits = 1;
+ ps_enc_ctxt->i4_is_I_reset_done = 0;
+ }
+ }
+
+ /*if the next two P/I pic is not in high complex region
+ then enable reset RC flag*/
+ if((!ps_enc_ctxt->s_multi_thrd.aps_curr_out_pre_enc[i4_next_ipe_idx]
+ ->i4_is_high_complex_region) &&
+ (!ps_enc_ctxt->s_multi_thrd.aps_curr_out_pre_enc[i4_next_next_ipe_idx]
+ ->i4_is_high_complex_region))
+ {
+ if(i4_is_cur_pic_high_complex_region)
+ {
+ ps_cur_ipe_lap_out->i4_is_cmplx_change_reset_model = 1;
+ ps_cur_ipe_lap_out->i4_is_cmplx_change_reset_bits = 1;
+ ps_enc_ctxt->i4_is_I_reset_done = 0;
+ }
+ }
+ }
+ else if((!ps_enc_ctxt->i4_future_RC_scd_reset) && (ps_enc_ctxt->i4_past_RC_scd_reset_count > 8))
+ {
+ /*if the prev two P/I pic is not in high complex region
+ then enable reset RC flag*/
+ if((!ps_enc_ctxt->ai4_is_past_pic_complex[0]) &&
+ (!ps_enc_ctxt->ai4_is_past_pic_complex[1]))
+ {
+ if(i4_is_cur_pic_high_complex_region)
+ {
+ ps_cur_ipe_lap_out->i4_is_cmplx_change_reset_bits = 1;
+ }
+ }
+
+ /*if the next two P/I pic is not in high complex region
+ then enable reset RC flag*/
+ if((!ps_enc_ctxt->s_multi_thrd.aps_curr_out_pre_enc[i4_next_ipe_idx]
+ ->i4_is_high_complex_region) &&
+ (!ps_enc_ctxt->s_multi_thrd.aps_curr_out_pre_enc[i4_next_next_ipe_idx]
+ ->i4_is_high_complex_region))
+ {
+ if(i4_is_cur_pic_high_complex_region)
+ {
+ ps_cur_ipe_lap_out->i4_is_cmplx_change_reset_bits = 1;
+ }
+ }
+ }
+
+ /* forcing I frame reset after complexity change is disable as it gives gain, could be due to that
+ required i reset is already happening on pre Intra SAD*/
+ /*if(!ps_enc_ctxt->i4_is_I_reset_done && (ps_cur_ipe_lap_out->i4_pic_type
+ == IV_I_FRAME))
+ {
+ ps_cur_ipe_lap_out->i4_is_I_only_scd = 1;
+ ps_enc_ctxt->i4_is_I_reset_done = 1;
+ }*/
+
+ ps_enc_ctxt->ai4_is_past_pic_complex[0] = i4_is_cur_pic_high_complex_region;
+
+ ps_enc_ctxt->ai4_is_past_pic_complex[1] = ps_enc_ctxt->ai4_is_past_pic_complex[0];
+ }
+ return;
+}
+/*!
+******************************************************************************
+* \if Function name : ihevce_manage_ref_pics \endif
+*
+* \brief
+* Reference picture management based on delta poc array given by LAP
+* Populates the reference list after removing non used reference pictures
+* populates the delta poc of reference pics to be signalled in slice header
+*
+* \param[in] encoder context pointer
+* \param[in] current LAP Encoder buffer pointer
+* \param[in] current frame process and entropy buffer pointer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_pre_enc_manage_ref_pics(
+ enc_ctxt_t *ps_enc_ctxt,
+ ihevce_lap_enc_buf_t *ps_curr_inp,
+ pre_enc_me_ctxt_t *ps_curr_out,
+ WORD32 i4_ping_pong)
+{
+ /* local variables */
+ WORD32 ctr;
+ WORD32 ref_pics;
+ WORD32 ai4_buf_status[HEVCE_MAX_DPB_PICS] = { 0 };
+ WORD32 curr_poc;
+ WORD32 wp_flag = 0;
+ WORD32 num_ref_pics_list0 = 0;
+ WORD32 num_ref_pics_list1 = 0;
+ WORD32 cra_poc = ps_curr_inp->s_lap_out.i4_assoc_IRAP_poc;
+ WORD32 slice_type = ps_curr_out->s_slice_hdr.i1_slice_type;
+ recon_pic_buf_t *(*aps_pre_enc_ref_pic_list)[HEVCE_MAX_REF_PICS * 2];
+ WORD32 i4_inc_L1_active_ref_pic = 0;
+ WORD32 i4_inc_L0_active_ref_pic = 0;
+
+ (void)ps_curr_out;
+ curr_poc = ps_curr_inp->s_lap_out.i4_poc;
+
+ /* Number of reference pics given by LAP should not be greater than max */
+ ASSERT(HEVCE_MAX_REF_PICS >= ps_curr_inp->s_lap_out.i4_num_ref_pics);
+
+ /*derive ref_pic_list based on ping_pong instance */
+ aps_pre_enc_ref_pic_list = ps_enc_ctxt->aps_pre_enc_ref_lists[i4_ping_pong];
+
+ /* derive the weighted prediction enable flag based on slice type */
+ if(BSLICE == slice_type)
+ {
+ wp_flag = ps_curr_inp->s_lap_out.i1_weighted_bipred_flag;
+ }
+ else if(PSLICE == slice_type)
+ {
+ wp_flag = ps_curr_inp->s_lap_out.i1_weighted_pred_flag;
+ }
+ else
+ {
+ wp_flag = 0;
+ }
+
+ /*to support diplicate pics*/
+ {
+ WORD32 i, j;
+ for(i = 0; i < 2; i++)
+ {
+ for(j = 0; j < HEVCE_MAX_REF_PICS * 2; j++)
+ {
+ aps_pre_enc_ref_pic_list[i][j] =
+ &ps_enc_ctxt->as_pre_enc_ref_lists[i4_ping_pong][i][j];
+ }
+ }
+ }
+
+ /* run a loop over the number of reference pics given by LAP */
+ for(ref_pics = 0; ref_pics < ps_curr_inp->s_lap_out.i4_num_ref_pics; ref_pics++)
+ {
+ WORD32 ref_poc;
+ WORD32 i4_loop = 1;
+ WORD32 i4_temp_list;
+
+ ref_poc = curr_poc + ps_curr_inp->s_lap_out.as_ref_pics[ref_pics].i4_ref_pic_delta_poc;
+
+ /* run a loop to check the poc based on delta poc array */
+ for(ctr = 0; ctr < ps_enc_ctxt->i4_pre_enc_num_buf_recon_q; ctr++)
+ {
+ /* if the POC is matching with current ref picture*/
+ if((ref_poc == ps_enc_ctxt->pps_pre_enc_recon_buf_q[ctr]->i4_poc) &&
+ (0 == ps_enc_ctxt->pps_pre_enc_recon_buf_q[ctr]->i4_is_free))
+ {
+ /* mark the buf status as used */
+ ai4_buf_status[ctr] = 1;
+
+ /* populate the reference lists based on delta poc array */
+ if((ref_poc < curr_poc) || (0 == curr_poc))
+ {
+ /* list 0 */
+ memcpy(
+ &ps_enc_ctxt->as_pre_enc_ref_lists[i4_ping_pong][LIST_0][num_ref_pics_list0],
+ ps_enc_ctxt->pps_pre_enc_recon_buf_q[ctr],
+ sizeof(recon_pic_buf_t));
+ i4_temp_list = num_ref_pics_list0;
+
+ /*duplicate pics added to the list*/
+ while(i4_loop != ps_curr_inp->s_lap_out.as_ref_pics[ref_pics]
+ .i4_num_duplicate_entries_in_ref_list)
+ {
+ /* list 0 */
+ i4_temp_list++;
+ memcpy(
+ &ps_enc_ctxt->as_pre_enc_ref_lists[i4_ping_pong][LIST_0][i4_temp_list],
+ ps_enc_ctxt->pps_pre_enc_recon_buf_q[ctr],
+ sizeof(recon_pic_buf_t));
+ i4_loop++;
+ }
+
+ /* populate weights and offsets corresponding to this ref pic */
+ memcpy(
+ &ps_enc_ctxt->as_pre_enc_ref_lists[i4_ping_pong][LIST_0][num_ref_pics_list0]
+ .s_weight_offset,
+ &ps_curr_inp->s_lap_out.as_ref_pics[ref_pics].as_wght_off[0],
+ sizeof(ihevce_wght_offst_t));
+
+ /* Store the used as ref for current pic flag */
+ ps_enc_ctxt->as_pre_enc_ref_lists[i4_ping_pong][LIST_0][num_ref_pics_list0]
+ .i4_used_by_cur_pic_flag =
+ ps_curr_inp->s_lap_out.as_ref_pics[ref_pics].i4_used_by_cur_pic_flag;
+
+ num_ref_pics_list0++;
+ i4_loop = 1;
+ /*duplicate pics added to the list*/
+ while(i4_loop != ps_curr_inp->s_lap_out.as_ref_pics[ref_pics]
+ .i4_num_duplicate_entries_in_ref_list)
+ {
+ /* populate weights and offsets corresponding to this ref pic */
+ memcpy(
+ &ps_enc_ctxt
+ ->as_pre_enc_ref_lists[i4_ping_pong][LIST_0][num_ref_pics_list0]
+ .s_weight_offset,
+ &ps_curr_inp->s_lap_out.as_ref_pics[ref_pics].as_wght_off[i4_loop],
+ sizeof(ihevce_wght_offst_t));
+
+ /* Store the used as ref for current pic flag */
+ ps_enc_ctxt->as_pre_enc_ref_lists[i4_ping_pong][LIST_0][num_ref_pics_list0]
+ .i4_used_by_cur_pic_flag =
+ ps_curr_inp->s_lap_out.as_ref_pics[ref_pics].i4_used_by_cur_pic_flag;
+
+ num_ref_pics_list0++;
+ i4_loop++;
+ }
+ }
+ else
+ {
+ /* list 1 */
+ memcpy(
+ &ps_enc_ctxt->as_pre_enc_ref_lists[i4_ping_pong][LIST_1][num_ref_pics_list1],
+ ps_enc_ctxt->pps_pre_enc_recon_buf_q[ctr],
+ sizeof(recon_pic_buf_t));
+
+ i4_temp_list = num_ref_pics_list1;
+ /*duplicate pics added to the list*/
+ while(i4_loop != ps_curr_inp->s_lap_out.as_ref_pics[ref_pics]
+ .i4_num_duplicate_entries_in_ref_list)
+ {
+ /* list 1 */
+ i4_temp_list++;
+ memcpy(
+ &ps_enc_ctxt->as_pre_enc_ref_lists[i4_ping_pong][LIST_1][i4_temp_list],
+ ps_enc_ctxt->pps_pre_enc_recon_buf_q[ctr],
+ sizeof(recon_pic_buf_t));
+ i4_loop++;
+ }
+
+ /* populate weights and offsets corresponding to this ref pic */
+ memcpy(
+ &ps_enc_ctxt->as_pre_enc_ref_lists[i4_ping_pong][LIST_1][num_ref_pics_list1]
+ .s_weight_offset,
+ &ps_curr_inp->s_lap_out.as_ref_pics[ref_pics].as_wght_off[0],
+ sizeof(ihevce_wght_offst_t));
+
+ /* Store the used as ref for current pic flag */
+ ps_enc_ctxt->as_pre_enc_ref_lists[i4_ping_pong][LIST_1][num_ref_pics_list1]
+ .i4_used_by_cur_pic_flag =
+ ps_curr_inp->s_lap_out.as_ref_pics[ref_pics].i4_used_by_cur_pic_flag;
+
+ num_ref_pics_list1++;
+ i4_loop = 1;
+ /*duplicate pics added to the list*/
+ while(i4_loop != ps_curr_inp->s_lap_out.as_ref_pics[ref_pics]
+ .i4_num_duplicate_entries_in_ref_list)
+ {
+ /* populate weights and offsets corresponding to this ref pic */
+ memcpy(
+ &ps_enc_ctxt
+ ->as_pre_enc_ref_lists[i4_ping_pong][LIST_1][num_ref_pics_list1]
+ .s_weight_offset,
+ &ps_curr_inp->s_lap_out.as_ref_pics[ref_pics].as_wght_off[i4_loop],
+ sizeof(ihevce_wght_offst_t));
+
+ /* Store the used as ref for current pic flag */
+ ps_enc_ctxt->as_pre_enc_ref_lists[i4_ping_pong][LIST_1][num_ref_pics_list1]
+ .i4_used_by_cur_pic_flag =
+ ps_curr_inp->s_lap_out.as_ref_pics[ref_pics].i4_used_by_cur_pic_flag;
+
+ num_ref_pics_list1++;
+ i4_loop++;
+ }
+ }
+ break;
+ }
+ }
+
+ /* if the reference picture is not found then error */
+ ASSERT(ctr != ps_enc_ctxt->i4_pre_enc_num_buf_recon_q);
+ }
+ /* sort the reference pics in List0 in descending order POC */
+ if(num_ref_pics_list0 > 1)
+ {
+ /* run a loop for num ref pics -1 */
+ for(ctr = 0; ctr < num_ref_pics_list0 - 1; ctr++)
+ {
+ WORD32 max_idx = ctr;
+ recon_pic_buf_t *ps_temp;
+ WORD32 i;
+
+ for(i = (ctr + 1); i < num_ref_pics_list0; i++)
+ {
+ /* check for poc greater than current ref poc */
+ if(aps_pre_enc_ref_pic_list[LIST_0][i]->i4_poc >
+ aps_pre_enc_ref_pic_list[LIST_0][max_idx]->i4_poc)
+ {
+ max_idx = i;
+ }
+ }
+
+ /* if max of remaining is not current, swap the pointers */
+ if(max_idx != ctr)
+ {
+ ps_temp = aps_pre_enc_ref_pic_list[LIST_0][max_idx];
+ aps_pre_enc_ref_pic_list[LIST_0][max_idx] = aps_pre_enc_ref_pic_list[LIST_0][ctr];
+ aps_pre_enc_ref_pic_list[LIST_0][ctr] = ps_temp;
+ }
+ }
+ }
+
+ /* sort the reference pics in List1 in ascending order POC */
+ if(num_ref_pics_list1 > 1)
+ {
+ /* run a loop for num ref pics -1 */
+ for(ctr = 0; ctr < num_ref_pics_list1 - 1; ctr++)
+ {
+ WORD32 min_idx = ctr;
+ recon_pic_buf_t *ps_temp;
+ WORD32 i;
+
+ for(i = (ctr + 1); i < num_ref_pics_list1; i++)
+ {
+ /* check for p[oc less than current ref poc */
+ if(aps_pre_enc_ref_pic_list[LIST_1][i]->i4_poc <
+ aps_pre_enc_ref_pic_list[LIST_1][min_idx]->i4_poc)
+ {
+ min_idx = i;
+ }
+ }
+
+ /* if min of remaining is not current, swap the pointers */
+ if(min_idx != ctr)
+ {
+ ps_temp = aps_pre_enc_ref_pic_list[LIST_1][min_idx];
+ aps_pre_enc_ref_pic_list[LIST_1][min_idx] = aps_pre_enc_ref_pic_list[LIST_1][ctr];
+ aps_pre_enc_ref_pic_list[LIST_1][ctr] = ps_temp;
+ }
+ }
+ }
+
+ /* call the ME API to update the DPB of HME pyramids coarse layers */
+ ihevce_coarse_me_frame_dpb_update(
+ ps_enc_ctxt->s_module_ctxt.pv_coarse_me_ctxt,
+ num_ref_pics_list0,
+ num_ref_pics_list1,
+ &aps_pre_enc_ref_pic_list[LIST_0][0],
+ &aps_pre_enc_ref_pic_list[LIST_1][0]);
+
+ /* Default list creation based on uses as ref pic for current pic flag */
+ {
+ WORD32 num_ref_pics_list_final = 0;
+ WORD32 list_idx = 0;
+
+ /* LIST 0 */
+ /* run a loop for num ref pics in list 0 */
+ for(ctr = 0; ctr < num_ref_pics_list0; ctr++)
+ {
+ /* check for used as reference flag */
+ if(1 == aps_pre_enc_ref_pic_list[LIST_0][ctr]->i4_used_by_cur_pic_flag)
+ {
+ /* copy the pointer to the actual valid list idx */
+ aps_pre_enc_ref_pic_list[LIST_0][list_idx] = aps_pre_enc_ref_pic_list[LIST_0][ctr];
+
+ /* increment the valid pic counters and idx */
+ list_idx++;
+ num_ref_pics_list_final++;
+ }
+ }
+
+ /* finally store the number of pictures in List0 */
+ num_ref_pics_list0 = num_ref_pics_list_final;
+ /* LIST 1 */
+ num_ref_pics_list_final = 0;
+ list_idx = 0;
+
+ /* run a loop for num ref pics in list 1 */
+ for(ctr = 0; ctr < num_ref_pics_list1; ctr++)
+ {
+ /* check for used as reference flag */
+ if(1 == aps_pre_enc_ref_pic_list[LIST_1][ctr]->i4_used_by_cur_pic_flag)
+ {
+ /* copy the pointer to the actual valid list idx */
+ aps_pre_enc_ref_pic_list[LIST_1][list_idx] = aps_pre_enc_ref_pic_list[LIST_1][ctr];
+
+ /* increment the valid pic counters and idx */
+ list_idx++;
+ num_ref_pics_list_final++;
+ }
+ }
+
+ /* finally store the number of pictures in List1 */
+ num_ref_pics_list1 = num_ref_pics_list_final;
+ }
+ /*in case of single active ref picture on L0 and L1, then consider one of them weighted
+ and another non-weighted*/
+ if(ps_curr_inp->s_lap_out.i4_pic_type == IV_P_FRAME)
+ {
+ if(num_ref_pics_list0 > 2)
+ {
+ if(aps_pre_enc_ref_pic_list[LIST_0][0]->i4_poc ==
+ aps_pre_enc_ref_pic_list[LIST_0][1]->i4_poc)
+ {
+ i4_inc_L0_active_ref_pic = 1;
+ }
+ }
+ }
+ else
+ {
+ if(num_ref_pics_list0 >= 2 && num_ref_pics_list1 >= 2)
+ {
+ if(aps_pre_enc_ref_pic_list[LIST_0][0]->i4_poc ==
+ aps_pre_enc_ref_pic_list[LIST_0][1]->i4_poc)
+ {
+ i4_inc_L0_active_ref_pic = 1;
+ }
+ if(aps_pre_enc_ref_pic_list[LIST_1][0]->i4_poc ==
+ aps_pre_enc_ref_pic_list[LIST_1][1]->i4_poc)
+ {
+ i4_inc_L1_active_ref_pic = 1;
+ }
+ }
+ }
+
+ /* append the reference pics in List1 and end of list0 */
+ for(ctr = 0; ctr < num_ref_pics_list1; ctr++)
+ {
+ aps_pre_enc_ref_pic_list[LIST_0][num_ref_pics_list0 + ctr] =
+ aps_pre_enc_ref_pic_list[LIST_1][ctr];
+ }
+
+ /* append the reference pics in List0 and end of list1 */
+ for(ctr = 0; ctr < num_ref_pics_list0; ctr++)
+ {
+ aps_pre_enc_ref_pic_list[LIST_1][num_ref_pics_list1 + ctr] =
+ aps_pre_enc_ref_pic_list[LIST_0][ctr];
+ }
+
+ /* reference list modification for adding duplicate reference */
+ {
+
+ }
+
+ /* popluate the default weights and offsets for disabled cases */
+ {
+ WORD32 i;
+
+ /* populate the weights and offsets for all pics in L0 + L1 */
+ for(i = 0; i < (num_ref_pics_list0 + num_ref_pics_list1); i++)
+ {
+ /* populate the weights and offsets if weighted prediction is disabled */
+ if(1 == wp_flag)
+ {
+ /* if weights are disabled then populate default values */
+ if(0 ==
+ aps_pre_enc_ref_pic_list[LIST_0][i]->s_weight_offset.u1_luma_weight_enable_flag)
+ {
+ /* set to default values */
+ aps_pre_enc_ref_pic_list[LIST_0][i]->s_weight_offset.i2_luma_weight =
+ (1 << ps_curr_inp->s_lap_out.i4_log2_luma_wght_denom);
+
+ aps_pre_enc_ref_pic_list[LIST_0][i]->s_weight_offset.i2_luma_offset = 0;
+ }
+ }
+ }
+
+ for(i = 0; i < (num_ref_pics_list0 + num_ref_pics_list1); i++)
+ {
+ /* populate the weights and offsets if weighted prediction is enabled */
+ if(1 == wp_flag)
+ {
+ /* if weights are disabled then populate default values */
+ if(0 ==
+ aps_pre_enc_ref_pic_list[LIST_1][i]->s_weight_offset.u1_luma_weight_enable_flag)
+ {
+ /* set to default values */
+ aps_pre_enc_ref_pic_list[LIST_1][i]->s_weight_offset.i2_luma_weight =
+ (1 << ps_curr_inp->s_lap_out.i4_log2_luma_wght_denom);
+
+ aps_pre_enc_ref_pic_list[LIST_1][i]->s_weight_offset.i2_luma_offset = 0;
+ }
+ }
+ }
+ }
+
+ /* run a loop to free the non used reference pics */
+ for(ctr = 0; ctr < ps_enc_ctxt->i4_pre_enc_num_buf_recon_q; ctr++)
+ {
+ /* if not used as reference */
+ if(0 == ai4_buf_status[ctr])
+ {
+ ps_enc_ctxt->pps_pre_enc_recon_buf_q[ctr]->i4_is_free = 1;
+ ps_enc_ctxt->pps_pre_enc_recon_buf_q[ctr]->i4_poc = -1;
+ }
+ }
+
+ /* store the number of reference pics in the list for ME/MC etc */
+ ps_enc_ctxt->i4_pre_enc_num_ref_l0 = num_ref_pics_list0;
+ ps_enc_ctxt->i4_pre_enc_num_ref_l1 = num_ref_pics_list1;
+
+#define HME_USE_ONLY_2REF
+#ifndef HME_USE_ONLY_2REF
+ ps_enc_ctxt->i4_pre_enc_num_ref_l0_active = num_ref_pics_list0;
+ ps_enc_ctxt->i4_pre_enc_num_ref_l1_active = num_ref_pics_list1;
+#else
+#if MULTI_REF_ENABLE == 1
+ if(ps_curr_inp->s_lap_out.i4_quality_preset >= IHEVCE_QUALITY_P3)
+ {
+ if(ps_curr_inp->s_lap_out.i4_pic_type == IV_P_FRAME)
+ {
+ if(IHEVCE_QUALITY_P6 == ps_curr_inp->s_lap_out.i4_quality_preset)
+ {
+ if(1 == ps_enc_ctxt->s_runtime_src_prms.i4_field_pic)
+ {
+ ps_enc_ctxt->i4_pre_enc_num_ref_l0_active =
+ MIN(MAX_NUM_REFS_IN_PPICS_IN_XS25 + 1, num_ref_pics_list0);
+ }
+ else
+ {
+ ps_enc_ctxt->i4_pre_enc_num_ref_l0_active =
+ MIN(MAX_NUM_REFS_IN_PPICS_IN_XS25, num_ref_pics_list0);
+ ps_enc_ctxt->i4_pre_enc_num_ref_l0_active += i4_inc_L0_active_ref_pic;
+ }
+
+ ps_enc_ctxt->i4_pre_enc_num_ref_l1_active = 0;
+ }
+ else
+ {
+ if(1 == ps_enc_ctxt->s_runtime_src_prms.i4_field_pic)
+ {
+ ps_enc_ctxt->i4_pre_enc_num_ref_l0_active = MIN(3, num_ref_pics_list0);
+ }
+ else
+ {
+ ps_enc_ctxt->i4_pre_enc_num_ref_l0_active = MIN(2, num_ref_pics_list0);
+ ps_enc_ctxt->i4_pre_enc_num_ref_l0_active += i4_inc_L0_active_ref_pic;
+ }
+
+ ps_enc_ctxt->i4_pre_enc_num_ref_l1_active = 0;
+ }
+ }
+ else
+ {
+ if(1 == ps_enc_ctxt->s_runtime_src_prms.i4_field_pic)
+ {
+ ps_enc_ctxt->i4_pre_enc_num_ref_l0_active = MIN(2, num_ref_pics_list0);
+ ps_enc_ctxt->i4_pre_enc_num_ref_l1_active = MIN(1, num_ref_pics_list1);
+ ps_enc_ctxt->i4_pre_enc_num_ref_l1_active += i4_inc_L1_active_ref_pic;
+ }
+ else
+ {
+ ps_enc_ctxt->i4_pre_enc_num_ref_l0_active = MIN(1, num_ref_pics_list0);
+ ps_enc_ctxt->i4_pre_enc_num_ref_l1_active = MIN(1, num_ref_pics_list1);
+ ps_enc_ctxt->i4_pre_enc_num_ref_l1_active += i4_inc_L1_active_ref_pic;
+ ps_enc_ctxt->i4_pre_enc_num_ref_l0_active += i4_inc_L0_active_ref_pic;
+ }
+ }
+ }
+ else
+ {
+ if(ps_curr_inp->s_lap_out.i4_pic_type == IV_P_FRAME)
+ {
+ if(1 == ps_enc_ctxt->s_runtime_src_prms.i4_field_pic)
+ ps_enc_ctxt->i4_pre_enc_num_ref_l0_active = MIN(4, num_ref_pics_list0);
+ else
+ ps_enc_ctxt->i4_pre_enc_num_ref_l0_active = MIN(4, num_ref_pics_list0);
+
+ ps_enc_ctxt->i4_pre_enc_num_ref_l1_active = 0;
+ }
+ else
+ {
+ if(1 == ps_enc_ctxt->s_runtime_src_prms.i4_field_pic)
+ {
+ ps_enc_ctxt->i4_pre_enc_num_ref_l0_active = MIN(4, num_ref_pics_list0);
+ ps_enc_ctxt->i4_pre_enc_num_ref_l1_active = MIN(4, num_ref_pics_list1);
+ }
+ else
+ {
+ ps_enc_ctxt->i4_pre_enc_num_ref_l0_active = MIN(4, num_ref_pics_list0);
+ ps_enc_ctxt->i4_pre_enc_num_ref_l1_active = MIN(4, num_ref_pics_list1);
+ }
+ }
+ }
+#else
+ {
+ if(ps_curr_inp->s_lap_out.i4_pic_type == IV_P_FRAME)
+ {
+ if(1 == ps_enc_ctxt->s_runtime_src_prms.i4_field_pic)
+ ps_enc_ctxt->i4_pre_enc_num_ref_l0_active = MIN(3, num_ref_pics_list0);
+ else
+ ps_enc_ctxt->i4_pre_enc_num_ref_l0_active = MIN(2, num_ref_pics_list0);
+
+ ps_enc_ctxt->i4_pre_enc_num_ref_l1_active = 0;
+ }
+ else
+ {
+ if(1 == ps_enc_ctxt->s_runtime_src_prms.i4_field_pic)
+ {
+ ps_enc_ctxt->i4_pre_enc_num_ref_l0_active = MIN(2, num_ref_pics_list0);
+ ps_enc_ctxt->i4_pre_enc_num_ref_l1_active = MIN(1, num_ref_pics_list1);
+ }
+ else
+ {
+ ps_enc_ctxt->i4_pre_enc_num_ref_l0_active = MIN(1, num_ref_pics_list0);
+ ps_enc_ctxt->i4_pre_enc_num_ref_l1_active = MIN(1, num_ref_pics_list1);
+ }
+ }
+ }
+#endif
+#endif
+
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_manage_ref_pics \endif
+*
+* \brief
+* Reference picture management based on delta poc array given by LAP
+* Populates the reference list after removing non used reference pictures
+* populates the delta poc of reference pics to be signalled in slice header
+*
+* \param[in] encoder context pointer
+* \param[in] current LAP Encoder buffer pointer
+* \param[in] current frame process and entropy buffer pointer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_manage_ref_pics(
+ enc_ctxt_t *ps_enc_ctxt,
+ ihevce_lap_enc_buf_t *ps_curr_inp,
+ slice_header_t *ps_slice_header,
+ WORD32 i4_me_frm_id,
+ WORD32 i4_thrd_id,
+ WORD32 i4_bitrate_instance_id)
+{
+ WORD32 ctr;
+ WORD32 ref_pics;
+ WORD32 curr_poc, curr_idr_gop_num;
+ WORD32 wp_flag;
+ WORD32 num_ref_pics_list0 = 0;
+ WORD32 num_ref_pics_list1 = 0;
+ WORD32 cra_poc = ps_curr_inp->s_lap_out.i4_assoc_IRAP_poc;
+ WORD32 slice_type = ps_slice_header->i1_slice_type;
+ recon_pic_buf_t *(*aps_ref_list)[HEVCE_MAX_REF_PICS * 2];
+ recon_pic_buf_t(*aps_ref_list_temp)[HEVCE_MAX_REF_PICS * 2];
+ WORD32 i4_num_rpics_l0_excl_dup;
+ WORD32 i4_num_rpics_l1_excl_dup;
+ WORD32 i4_inc_L1_active_ref_pic = 0;
+ WORD32 i4_inc_L0_active_ref_pic = 0;
+ WORD32 i4_bridx = i4_bitrate_instance_id; //bitrate instance index
+ WORD32 i4_resolution_id = ps_enc_ctxt->i4_resolution_id;
+ me_enc_rdopt_ctxt_t *ps_cur_out_me_prms;
+ recon_pic_buf_t ***ppps_recon_bufs = ps_enc_ctxt->pps_recon_buf_q;
+ WORD32 i4_num_recon_bufs = ps_enc_ctxt->ai4_num_buf_recon_q[i4_bridx];
+
+ ps_cur_out_me_prms = ps_enc_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_me_frm_id];
+
+ /*to support diplicate pics*/
+ {
+ WORD32 i, j;
+ for(i = 0; i < NUM_REF_LISTS; i++)
+ {
+ for(j = 0; j < HEVCE_MAX_REF_PICS * 2; j++)
+ {
+ ps_cur_out_me_prms->aps_ref_list[i4_bridx][i][j] =
+ &ps_cur_out_me_prms->as_ref_list[i4_bridx][i][j];
+ }
+ }
+ }
+
+ aps_ref_list = ps_cur_out_me_prms->aps_ref_list[i4_bridx];
+ aps_ref_list_temp = ps_cur_out_me_prms->as_ref_list[i4_bridx];
+
+ curr_poc = ps_curr_inp->s_lap_out.i4_poc;
+ curr_idr_gop_num = ps_curr_inp->s_lap_out.i4_idr_gop_num;
+
+ /* Number of reference pics given by LAP should not be greater than max */
+ ASSERT(HEVCE_MAX_REF_PICS >= ps_curr_inp->s_lap_out.i4_num_ref_pics);
+
+ /* derive the weighted prediction enable flag based on slice type */
+ if(BSLICE == slice_type)
+ {
+ wp_flag = ps_curr_inp->s_lap_out.i1_weighted_bipred_flag;
+ }
+ else if(PSLICE == slice_type)
+ {
+ wp_flag = ps_curr_inp->s_lap_out.i1_weighted_pred_flag;
+ }
+ else
+ {
+ wp_flag = 0;
+ }
+
+ ps_slice_header->s_rplm.i1_ref_pic_list_modification_flag_l0 = 0;
+ ps_slice_header->s_rplm.i1_ref_pic_list_modification_flag_l1 = 0;
+ ASSERT(curr_poc != INVALID_POC);
+
+ /* run a loop over the number of reference pics given by LAP */
+ for(ref_pics = 0; ref_pics < ps_curr_inp->s_lap_out.i4_num_ref_pics; ref_pics++)
+ {
+ WORD32 ref_poc;
+ WORD32 i4_loop = 1;
+ WORD32 i4_temp_list;
+
+ ref_poc = curr_poc + ps_curr_inp->s_lap_out.as_ref_pics[ref_pics].i4_ref_pic_delta_poc;
+ if((0 == curr_poc) && curr_idr_gop_num)
+ {
+ curr_idr_gop_num -= 1;
+ }
+ ASSERT(ref_poc != INVALID_POC);
+ /* run a loop to check the poc based on delta poc array */
+ for(ctr = 0; ctr < i4_num_recon_bufs; ctr++)
+ {
+ /* if the POC is matching with current ref picture*/
+ if((ref_poc == ppps_recon_bufs[i4_bridx][ctr]->i4_poc) &&
+ (0 == ppps_recon_bufs[i4_bridx][ctr]->i4_is_free) &&
+ (curr_idr_gop_num == ppps_recon_bufs[i4_bridx][ctr]->i4_idr_gop_num))
+ {
+ /* populate the reference lists based on delta poc array */
+ if((ref_poc < curr_poc) || (0 == curr_poc))
+ {
+ /* list 0 */
+ memcpy(
+ &aps_ref_list_temp[LIST_0][num_ref_pics_list0],
+ ppps_recon_bufs[i4_bridx][ctr],
+ sizeof(recon_pic_buf_t));
+
+ i4_temp_list = num_ref_pics_list0;
+
+ /*duplicate pics added to the list*/
+ while(i4_loop != ps_curr_inp->s_lap_out.as_ref_pics[ref_pics]
+ .i4_num_duplicate_entries_in_ref_list)
+ {
+ i4_temp_list++;
+ /* list 0 */
+ memcpy(
+ &aps_ref_list_temp[LIST_0][i4_temp_list],
+ ppps_recon_bufs[i4_bridx][ctr],
+ sizeof(recon_pic_buf_t));
+ i4_loop++;
+ }
+
+ /* populate weights and offsets corresponding to this ref pic */
+ memcpy(
+ &aps_ref_list_temp[LIST_0][num_ref_pics_list0].s_weight_offset,
+ &ps_curr_inp->s_lap_out.as_ref_pics[ref_pics].as_wght_off[0],
+ sizeof(ihevce_wght_offst_t));
+
+ /* Store the used as ref for current pic flag */
+ aps_ref_list_temp[LIST_0][num_ref_pics_list0].i4_used_by_cur_pic_flag =
+ ps_curr_inp->s_lap_out.as_ref_pics[ref_pics].i4_used_by_cur_pic_flag;
+
+ if(wp_flag)
+ {
+ WORD16 i2_luma_weight = (aps_ref_list[LIST_0][num_ref_pics_list0]
+ ->s_weight_offset.i2_luma_weight);
+
+ aps_ref_list[LIST_0][num_ref_pics_list0]->i4_inv_luma_wt =
+ ((1 << 15) + (i2_luma_weight >> 1)) / i2_luma_weight;
+
+ aps_ref_list[LIST_0][num_ref_pics_list0]->i4_log2_wt_denom =
+ ps_curr_inp->s_lap_out.i4_log2_luma_wght_denom;
+ }
+ else
+ {
+ WORD16 i2_luma_weight =
+ (1 << ps_curr_inp->s_lap_out.i4_log2_luma_wght_denom);
+
+ aps_ref_list[LIST_0][num_ref_pics_list0]->s_weight_offset.i2_luma_weight =
+ i2_luma_weight;
+
+ aps_ref_list[LIST_0][num_ref_pics_list0]->i4_inv_luma_wt =
+ ((1 << 15) + (i2_luma_weight >> 1)) / i2_luma_weight;
+
+ aps_ref_list[LIST_0][num_ref_pics_list0]->i4_log2_wt_denom =
+ ps_curr_inp->s_lap_out.i4_log2_luma_wght_denom;
+ }
+
+ num_ref_pics_list0++;
+ i4_loop = 1;
+
+ /*duplicate pics added to the list*/
+ while(i4_loop != ps_curr_inp->s_lap_out.as_ref_pics[ref_pics]
+ .i4_num_duplicate_entries_in_ref_list)
+ {
+ /* populate weights and offsets corresponding to this ref pic */
+ memcpy(
+ &aps_ref_list_temp[LIST_0][num_ref_pics_list0].s_weight_offset,
+ &ps_curr_inp->s_lap_out.as_ref_pics[ref_pics].as_wght_off[i4_loop],
+ sizeof(ihevce_wght_offst_t));
+
+ /* Store the used as ref for current pic flag */
+ aps_ref_list_temp[LIST_0][num_ref_pics_list0].i4_used_by_cur_pic_flag =
+ ps_curr_inp->s_lap_out.as_ref_pics[ref_pics].i4_used_by_cur_pic_flag;
+
+ if(wp_flag)
+ {
+ WORD16 i2_luma_weight = (aps_ref_list[LIST_0][num_ref_pics_list0]
+ ->s_weight_offset.i2_luma_weight);
+
+ aps_ref_list[LIST_0][num_ref_pics_list0]->i4_inv_luma_wt =
+ ((1 << 15) + (i2_luma_weight >> 1)) / i2_luma_weight;
+
+ aps_ref_list[LIST_0][num_ref_pics_list0]->i4_log2_wt_denom =
+ ps_curr_inp->s_lap_out.i4_log2_luma_wght_denom;
+ }
+ else
+ {
+ WORD16 i2_luma_weight =
+ (1 << ps_curr_inp->s_lap_out.i4_log2_luma_wght_denom);
+
+ aps_ref_list[LIST_0][num_ref_pics_list0]
+ ->s_weight_offset.i2_luma_weight = i2_luma_weight;
+
+ aps_ref_list[LIST_0][num_ref_pics_list0]->i4_inv_luma_wt =
+ ((1 << 15) + (i2_luma_weight >> 1)) / i2_luma_weight;
+
+ aps_ref_list[LIST_0][num_ref_pics_list0]->i4_log2_wt_denom =
+ ps_curr_inp->s_lap_out.i4_log2_luma_wght_denom;
+ }
+
+ num_ref_pics_list0++;
+ i4_loop++;
+ ps_slice_header->s_rplm.i1_ref_pic_list_modification_flag_l0 = 1;
+ ps_slice_header->s_rplm.i1_ref_pic_list_modification_flag_l1 = 1;
+ }
+ }
+ else
+ {
+ /* list 1 */
+ memcpy(
+ &aps_ref_list_temp[LIST_1][num_ref_pics_list1],
+ ppps_recon_bufs[i4_bridx][ctr],
+ sizeof(recon_pic_buf_t));
+ i4_temp_list = num_ref_pics_list1;
+ /*duplicate pics added to the list*/
+ while(i4_loop != ps_curr_inp->s_lap_out.as_ref_pics[ref_pics]
+ .i4_num_duplicate_entries_in_ref_list)
+ {
+ i4_temp_list++;
+ /* list 1 */
+ memcpy(
+ &aps_ref_list_temp[LIST_1][i4_temp_list],
+ ppps_recon_bufs[i4_bridx][ctr],
+ sizeof(recon_pic_buf_t));
+ i4_loop++;
+ }
+
+ /* populate weights and offsets corresponding to this ref pic */
+ memcpy(
+ &aps_ref_list_temp[LIST_1][num_ref_pics_list1].s_weight_offset,
+ &ps_curr_inp->s_lap_out.as_ref_pics[ref_pics].as_wght_off[0],
+ sizeof(ihevce_wght_offst_t));
+
+ /* Store the used as ref for current pic flag */
+ aps_ref_list_temp[LIST_1][num_ref_pics_list1].i4_used_by_cur_pic_flag =
+ ps_curr_inp->s_lap_out.as_ref_pics[ref_pics].i4_used_by_cur_pic_flag;
+
+ if(wp_flag)
+ {
+ WORD16 i2_luma_weight = (aps_ref_list[LIST_1][num_ref_pics_list1]
+ ->s_weight_offset.i2_luma_weight);
+
+ aps_ref_list[LIST_1][num_ref_pics_list1]->i4_inv_luma_wt =
+ ((1 << 15) + (i2_luma_weight >> 1)) / i2_luma_weight;
+
+ aps_ref_list[LIST_1][num_ref_pics_list1]->i4_log2_wt_denom =
+ ps_curr_inp->s_lap_out.i4_log2_luma_wght_denom;
+ }
+ else
+ {
+ WORD16 i2_luma_weight =
+ (1 << ps_curr_inp->s_lap_out.i4_log2_luma_wght_denom);
+
+ aps_ref_list[LIST_1][num_ref_pics_list1]->s_weight_offset.i2_luma_weight =
+ i2_luma_weight;
+
+ aps_ref_list[LIST_1][num_ref_pics_list1]->i4_inv_luma_wt =
+ ((1 << 15) + (i2_luma_weight >> 1)) / i2_luma_weight;
+
+ aps_ref_list[LIST_1][num_ref_pics_list1]->i4_log2_wt_denom =
+ ps_curr_inp->s_lap_out.i4_log2_luma_wght_denom;
+ }
+
+ num_ref_pics_list1++;
+ i4_loop = 1;
+ /*duplicate pics added to the list*/
+ while(i4_loop != ps_curr_inp->s_lap_out.as_ref_pics[ref_pics]
+ .i4_num_duplicate_entries_in_ref_list)
+ {
+ /* populate weights and offsets corresponding to this ref pic */
+ memcpy(
+ &aps_ref_list_temp[LIST_1][num_ref_pics_list1].s_weight_offset,
+ &ps_curr_inp->s_lap_out.as_ref_pics[ref_pics].as_wght_off[i4_loop],
+ sizeof(ihevce_wght_offst_t));
+
+ /* Store the used as ref for current pic flag */
+ aps_ref_list_temp[LIST_1][num_ref_pics_list1].i4_used_by_cur_pic_flag =
+ ps_curr_inp->s_lap_out.as_ref_pics[ref_pics].i4_used_by_cur_pic_flag;
+
+ if(wp_flag)
+ {
+ WORD16 i2_luma_weight = (aps_ref_list[LIST_1][num_ref_pics_list1]
+ ->s_weight_offset.i2_luma_weight);
+
+ aps_ref_list[LIST_1][num_ref_pics_list1]->i4_inv_luma_wt =
+ ((1 << 15) + (i2_luma_weight >> 1)) / i2_luma_weight;
+
+ aps_ref_list[LIST_1][num_ref_pics_list1]->i4_log2_wt_denom =
+ ps_curr_inp->s_lap_out.i4_log2_luma_wght_denom;
+ }
+ else
+ {
+ WORD16 i2_luma_weight =
+ (1 << ps_curr_inp->s_lap_out.i4_log2_luma_wght_denom);
+
+ aps_ref_list[LIST_1][num_ref_pics_list1]
+ ->s_weight_offset.i2_luma_weight = i2_luma_weight;
+
+ aps_ref_list[LIST_1][num_ref_pics_list1]->i4_inv_luma_wt =
+ ((1 << 15) + (i2_luma_weight >> 1)) / i2_luma_weight;
+
+ aps_ref_list[LIST_1][num_ref_pics_list1]->i4_log2_wt_denom =
+ ps_curr_inp->s_lap_out.i4_log2_luma_wght_denom;
+ }
+
+ num_ref_pics_list1++;
+ i4_loop++;
+ ps_slice_header->s_rplm.i1_ref_pic_list_modification_flag_l1 = 1;
+ ps_slice_header->s_rplm.i1_ref_pic_list_modification_flag_l0 = 1;
+ }
+ }
+ break;
+ }
+ }
+
+ /* if the reference picture is not found then error */
+ ASSERT(ctr != i4_num_recon_bufs);
+ }
+
+ i4_num_rpics_l0_excl_dup = num_ref_pics_list0;
+ i4_num_rpics_l1_excl_dup = num_ref_pics_list1;
+
+ /* sort the reference pics in List0 in descending order POC */
+ if(num_ref_pics_list0 > 1)
+ {
+ /* run a loop for num ref pics -1 */
+ for(ctr = 0; ctr < num_ref_pics_list0 - 1; ctr++)
+ {
+ WORD32 max_idx = ctr;
+ recon_pic_buf_t *ps_temp;
+ WORD32 i;
+
+ for(i = (ctr + 1); i < num_ref_pics_list0; i++)
+ {
+ /* check for poc greater than current ref poc */
+ if(aps_ref_list[LIST_0][i]->i4_poc > aps_ref_list[LIST_0][max_idx]->i4_poc)
+ {
+ max_idx = i;
+ }
+ }
+
+ /* if max of remaining is not current, swap the pointers */
+ if(max_idx != ctr)
+ {
+ ps_temp = aps_ref_list[LIST_0][max_idx];
+ aps_ref_list[LIST_0][max_idx] = aps_ref_list[LIST_0][ctr];
+ aps_ref_list[LIST_0][ctr] = ps_temp;
+ }
+ }
+ }
+
+ /* sort the reference pics in List1 in ascending order POC */
+ if(num_ref_pics_list1 > 1)
+ {
+ /* run a loop for num ref pics -1 */
+ for(ctr = 0; ctr < num_ref_pics_list1 - 1; ctr++)
+ {
+ WORD32 min_idx = ctr;
+ recon_pic_buf_t *ps_temp;
+ WORD32 i;
+
+ for(i = (ctr + 1); i < num_ref_pics_list1; i++)
+ {
+ /* check for p[oc less than current ref poc */
+ if(aps_ref_list[LIST_1][i]->i4_poc < aps_ref_list[LIST_1][min_idx]->i4_poc)
+ {
+ min_idx = i;
+ }
+ }
+
+ /* if min of remaining is not current, swap the pointers */
+ if(min_idx != ctr)
+ {
+ ps_temp = aps_ref_list[LIST_1][min_idx];
+ aps_ref_list[LIST_1][min_idx] = aps_ref_list[LIST_1][ctr];
+ aps_ref_list[LIST_1][ctr] = ps_temp;
+ }
+ }
+ }
+
+ /* popluate the slice header parameters to signal delta POCs and use flags */
+ {
+ WORD32 i;
+ WORD32 prev_poc = curr_poc;
+
+ ps_slice_header->s_stref_picset.i1_inter_ref_pic_set_prediction_flag = 0;
+
+ ps_slice_header->s_stref_picset.i1_num_neg_pics = num_ref_pics_list0;
+
+ ps_slice_header->s_stref_picset.i1_num_pos_pics = num_ref_pics_list1;
+
+ ps_slice_header->s_stref_picset.i1_num_ref_idc = -1;
+
+ /* populate the delta POCs of reference pics */
+ i = 0;
+
+ for(ctr = 0; ctr < i4_num_rpics_l0_excl_dup; ctr++)
+ {
+ WORD32 ref_poc_l0 = aps_ref_list[LIST_0][i]->i4_poc;
+
+ ps_slice_header->s_stref_picset.ai2_delta_poc[ctr] = prev_poc - ref_poc_l0;
+ ps_slice_header->s_stref_picset.ai1_used[ctr] =
+ aps_ref_list[LIST_0][i]->i4_used_by_cur_pic_flag;
+
+ /* check if this picture has to be used as reference */
+ if(1 == ps_slice_header->s_stref_picset.ai1_used[ctr])
+ {
+ /* check for CRA poc related use flag signalling */
+ ps_slice_header->s_stref_picset.ai1_used[ctr] =
+ (curr_poc > cra_poc) ? (ref_poc_l0 >= cra_poc) : (slice_type != ISLICE);
+ }
+ if(!(prev_poc - ref_poc_l0))
+ {
+ ctr -= 1;
+ i4_num_rpics_l0_excl_dup -= 1;
+ }
+ prev_poc = ref_poc_l0;
+
+ i++;
+ }
+
+ i = 0;
+ prev_poc = curr_poc;
+ for(; ctr < (i4_num_rpics_l0_excl_dup + i4_num_rpics_l1_excl_dup); ctr++)
+ {
+ WORD32 ref_poc_l1 = aps_ref_list[LIST_1][i]->i4_poc;
+
+ ps_slice_header->s_stref_picset.ai2_delta_poc[ctr] = ref_poc_l1 - prev_poc;
+
+ ps_slice_header->s_stref_picset.ai1_used[ctr] =
+ aps_ref_list[LIST_1][i]->i4_used_by_cur_pic_flag;
+
+ /* check if this picture has to be used as reference */
+ if(1 == ps_slice_header->s_stref_picset.ai1_used[ctr])
+ {
+ /* check for CRA poc related use flag signalling */
+ ps_slice_header->s_stref_picset.ai1_used[ctr] =
+ (curr_poc > cra_poc) ? (ref_poc_l1 >= cra_poc) : (slice_type != ISLICE);
+ /* (slice_type != ISLICE); */
+ }
+ if(!(ref_poc_l1 - prev_poc))
+ {
+ ctr -= 1;
+ i4_num_rpics_l1_excl_dup -= 1;
+ }
+ prev_poc = ref_poc_l1;
+ i++;
+ }
+ ps_slice_header->s_stref_picset.i1_num_neg_pics = i4_num_rpics_l0_excl_dup;
+
+ ps_slice_header->s_stref_picset.i1_num_pos_pics = i4_num_rpics_l1_excl_dup;
+
+ if(IV_IDR_FRAME == ps_curr_inp->s_lap_out.i4_pic_type)
+ {
+ ps_slice_header->s_stref_picset.i1_num_neg_pics = 0;
+ ps_slice_header->s_stref_picset.i1_num_pos_pics = 0;
+ }
+
+ /* not used so set to -1 */
+ memset(&ps_slice_header->s_stref_picset.ai1_ref_idc[0], -1, MAX_DPB_SIZE);
+ }
+ /* call the ME API to update the DPB of HME pyramids
+ Upadate list for reference bit-rate only */
+ if(0 == i4_bridx)
+ {
+ ihevce_me_frame_dpb_update(
+ ps_enc_ctxt->s_module_ctxt.pv_me_ctxt,
+ num_ref_pics_list0,
+ num_ref_pics_list1,
+ &aps_ref_list[LIST_0][0],
+ &aps_ref_list[LIST_1][0],
+ i4_thrd_id);
+ }
+
+ /* Default list creation based on uses as ref pic for current pic flag */
+ {
+ WORD32 num_ref_pics_list_final = 0;
+ WORD32 list_idx = 0;
+
+ /* LIST 0 */
+ /* run a loop for num ref pics in list 0 */
+ for(ctr = 0; ctr < num_ref_pics_list0; ctr++)
+ {
+ /* check for used as reference flag */
+ if(1 == aps_ref_list[LIST_0][ctr]->i4_used_by_cur_pic_flag)
+ {
+ /* copy the pointer to the actual valid list idx */
+ aps_ref_list[LIST_0][list_idx] = aps_ref_list[LIST_0][ctr];
+
+ /* increment the valid pic counters and idx */
+ list_idx++;
+ num_ref_pics_list_final++;
+ }
+ }
+
+ /* finally store the number of pictures in List0 */
+ num_ref_pics_list0 = num_ref_pics_list_final;
+
+ /* LIST 1 */
+ num_ref_pics_list_final = 0;
+ list_idx = 0;
+
+ /* run a loop for num ref pics in list 1 */
+ for(ctr = 0; ctr < num_ref_pics_list1; ctr++)
+ {
+ /* check for used as reference flag */
+ if(1 == aps_ref_list[LIST_1][ctr]->i4_used_by_cur_pic_flag)
+ {
+ /* copy the pointer to the actual valid list idx */
+ aps_ref_list[LIST_1][list_idx] = aps_ref_list[LIST_1][ctr];
+
+ /* increment the valid pic counters and idx */
+ list_idx++;
+ num_ref_pics_list_final++;
+ }
+ }
+
+ /* finally store the number of pictures in List1 */
+ num_ref_pics_list1 = num_ref_pics_list_final;
+ }
+ /*in case of single active ref picture on L0 and L1, then consider one of them weighted
+ and another non-weighted*/
+ if(ps_curr_inp->s_lap_out.i4_pic_type == IV_P_FRAME)
+ {
+ if(num_ref_pics_list0 > 2)
+ {
+ if(aps_ref_list[LIST_0][0]->i4_poc == aps_ref_list[LIST_0][1]->i4_poc)
+ {
+ i4_inc_L0_active_ref_pic = 1;
+ }
+ }
+ }
+ else
+ {
+ if(num_ref_pics_list0 >= 2 && num_ref_pics_list1 >= 2)
+ {
+ if(aps_ref_list[LIST_0][0]->i4_poc == aps_ref_list[LIST_0][1]->i4_poc)
+ {
+ i4_inc_L0_active_ref_pic = 1;
+ }
+
+ if(aps_ref_list[LIST_1][0]->i4_poc == aps_ref_list[LIST_1][1]->i4_poc)
+ {
+ i4_inc_L1_active_ref_pic = 1;
+ }
+ }
+ }
+ /* append the reference pics in List1 and end of list0 */
+ for(ctr = 0; ctr < num_ref_pics_list1; ctr++)
+ {
+ aps_ref_list[LIST_0][num_ref_pics_list0 + ctr] = aps_ref_list[LIST_1][ctr];
+ }
+
+ /* append the reference pics in List0 and end of list1 */
+ for(ctr = 0; ctr < num_ref_pics_list0; ctr++)
+ {
+ aps_ref_list[LIST_1][num_ref_pics_list1 + ctr] = aps_ref_list[LIST_0][ctr];
+ }
+
+ /* reference list modification for adding duplicate reference */
+ {
+ WORD32 i4_latest_idx = 0;
+ recon_pic_buf_t *ps_ref_list_cur;
+ recon_pic_buf_t *ps_ref_list_prev;
+ /*List 0*/
+ ps_ref_list_cur = aps_ref_list[LIST_0][0];
+ ps_ref_list_prev = ps_ref_list_cur;
+ for(ctr = 0; ctr < (num_ref_pics_list0 + num_ref_pics_list1); ctr++)
+ {
+ if(ps_ref_list_cur->i4_poc != ps_ref_list_prev->i4_poc)
+ {
+ i4_latest_idx++;
+ }
+ ps_ref_list_prev = ps_ref_list_cur;
+ ps_slice_header->s_rplm.i4_ref_poc_l0[ctr] = ps_ref_list_cur->i4_poc;
+ ps_slice_header->s_rplm.i1_list_entry_l0[ctr] = i4_latest_idx;
+ if((ctr + 1) < (num_ref_pics_list0 + num_ref_pics_list1))
+ {
+ ps_ref_list_cur = aps_ref_list[LIST_0][ctr + 1];
+ }
+ } /*end for*/
+
+ /*LIST 1*/
+ i4_latest_idx = 0;
+ ps_ref_list_cur = aps_ref_list[LIST_1][0];
+ ps_ref_list_prev = ps_ref_list_cur;
+ for(ctr = 0; ctr < (num_ref_pics_list0 + num_ref_pics_list1); ctr++)
+ {
+ if(ps_ref_list_cur->i4_poc != ps_ref_list_prev->i4_poc)
+ {
+ i4_latest_idx++;
+ }
+ ps_ref_list_prev = ps_ref_list_cur;
+ ps_slice_header->s_rplm.i4_ref_poc_l1[ctr] = ps_ref_list_cur->i4_poc;
+ ps_slice_header->s_rplm.i1_list_entry_l1[ctr] = i4_latest_idx;
+ if((ctr + 1) < (num_ref_pics_list0 + num_ref_pics_list1))
+ {
+ ps_ref_list_cur = aps_ref_list[LIST_1][ctr + 1];
+ }
+ } /*end for*/
+ }
+
+ /* set number of active references used for l0 and l1 in slice hdr */
+ ps_slice_header->i1_num_ref_idx_active_override_flag = 1;
+ ps_slice_header->i1_num_ref_idx_l0_active = num_ref_pics_list0 + num_ref_pics_list1;
+ if(BSLICE == slice_type)
+ {
+ /* i1_num_ref_idx_l1_active applicable only for B pics */
+ ps_slice_header->i1_num_ref_idx_l1_active = num_ref_pics_list0 + num_ref_pics_list1;
+ }
+ /* popluate the slice header parameters with weights and offsets */
+ {
+ WORD32 i;
+
+ /* populate the log 2 weight denom if weighted prediction is enabled */
+ if(1 == wp_flag)
+ {
+ ps_slice_header->s_wt_ofst.i1_chroma_log2_weight_denom =
+ ps_curr_inp->s_lap_out.i4_log2_chroma_wght_denom;
+ ps_slice_header->s_wt_ofst.i1_luma_log2_weight_denom =
+ ps_curr_inp->s_lap_out.i4_log2_luma_wght_denom;
+ }
+
+ /* populate the weights and offsets for all pics in L0 + L1 */
+ for(i = 0; i < (num_ref_pics_list0 + num_ref_pics_list1); i++)
+ {
+ /* populate the weights and offsets if weighted prediction is enabled */
+ if(1 == wp_flag)
+ {
+ ps_slice_header->s_wt_ofst.i1_luma_weight_l0_flag[i] =
+ aps_ref_list[LIST_0][i]->s_weight_offset.u1_luma_weight_enable_flag;
+
+ /* if weights are enabled then copy to slice header */
+ if(1 == ps_slice_header->s_wt_ofst.i1_luma_weight_l0_flag[i])
+ {
+ ps_slice_header->s_wt_ofst.i2_luma_weight_l0[i] =
+ aps_ref_list[LIST_0][i]->s_weight_offset.i2_luma_weight;
+ ps_slice_header->s_wt_ofst.i2_luma_offset_l0[i] =
+ aps_ref_list[LIST_0][i]->s_weight_offset.i2_luma_offset;
+
+ {
+ WORD16 i2_luma_weight =
+ (aps_ref_list[LIST_0][i]->s_weight_offset.i2_luma_weight);
+
+ aps_ref_list[LIST_0][i]->i4_inv_luma_wt =
+ ((1 << 15) + (i2_luma_weight >> 1)) / i2_luma_weight;
+
+ aps_ref_list[LIST_0][i]->i4_log2_wt_denom =
+ ps_curr_inp->s_lap_out.i4_log2_luma_wght_denom;
+ }
+ }
+ else
+ {
+ WORD16 i2_luma_weight = (1 << ps_curr_inp->s_lap_out.i4_log2_luma_wght_denom);
+
+ /* set to default values */
+ aps_ref_list[LIST_0][i]->s_weight_offset.i2_luma_weight = (i2_luma_weight);
+
+ aps_ref_list[LIST_0][i]->s_weight_offset.i2_luma_offset = 0;
+
+ aps_ref_list[LIST_0][i]->i4_inv_luma_wt =
+ ((1 << 15) + (i2_luma_weight >> 1)) / i2_luma_weight;
+
+ aps_ref_list[LIST_0][i]->i4_log2_wt_denom =
+ ps_curr_inp->s_lap_out.i4_log2_luma_wght_denom;
+ }
+
+ ps_slice_header->s_wt_ofst.i1_chroma_weight_l0_flag[i] =
+ aps_ref_list[LIST_0][i]->s_weight_offset.u1_chroma_weight_enable_flag;
+
+ /* if weights are enabled then copy to slice header */
+ if(1 == ps_slice_header->s_wt_ofst.i1_chroma_weight_l0_flag[i])
+ {
+ ps_slice_header->s_wt_ofst.i2_chroma_weight_l0_cb[i] =
+ aps_ref_list[LIST_0][i]->s_weight_offset.i2_cb_weight;
+ ps_slice_header->s_wt_ofst.i2_chroma_offset_l0_cb[i] =
+ aps_ref_list[LIST_0][i]->s_weight_offset.i2_cb_offset;
+
+ ps_slice_header->s_wt_ofst.i2_chroma_weight_l0_cr[i] =
+ aps_ref_list[LIST_0][i]->s_weight_offset.i2_cr_weight;
+ ps_slice_header->s_wt_ofst.i2_chroma_offset_l0_cr[i] =
+ aps_ref_list[LIST_0][i]->s_weight_offset.i2_cr_offset;
+ }
+ else
+ {
+ /* set to default values */
+ aps_ref_list[LIST_0][i]->s_weight_offset.i2_cb_weight =
+ (1 << ps_curr_inp->s_lap_out.i4_log2_chroma_wght_denom);
+ aps_ref_list[LIST_0][i]->s_weight_offset.i2_cr_weight =
+ (1 << ps_curr_inp->s_lap_out.i4_log2_chroma_wght_denom);
+ aps_ref_list[LIST_0][i]->s_weight_offset.i2_cb_offset = 0;
+ aps_ref_list[LIST_0][i]->s_weight_offset.i2_cr_offset = 0;
+ }
+ }
+ }
+
+ for(i = 0; i < (num_ref_pics_list0 + num_ref_pics_list1); i++)
+ {
+ /* populate the weights and offsets if weighted prediction is enabled */
+ if(1 == wp_flag)
+ {
+ ps_slice_header->s_wt_ofst.i1_luma_weight_l1_flag[i] =
+ aps_ref_list[LIST_1][i]->s_weight_offset.u1_luma_weight_enable_flag;
+
+ /* if weights are enabled then copy to slice header */
+ if(1 == ps_slice_header->s_wt_ofst.i1_luma_weight_l1_flag[i])
+ {
+ ps_slice_header->s_wt_ofst.i2_luma_weight_l1[i] =
+ aps_ref_list[LIST_1][i]->s_weight_offset.i2_luma_weight;
+ ps_slice_header->s_wt_ofst.i2_luma_offset_l1[i] =
+ aps_ref_list[LIST_1][i]->s_weight_offset.i2_luma_offset;
+
+ {
+ WORD16 i2_luma_weight =
+ (aps_ref_list[LIST_1][i]->s_weight_offset.i2_luma_weight);
+
+ aps_ref_list[LIST_1][i]->i4_inv_luma_wt =
+ ((1 << 15) + (i2_luma_weight >> 1)) / i2_luma_weight;
+
+ aps_ref_list[LIST_1][i]->i4_log2_wt_denom =
+ ps_curr_inp->s_lap_out.i4_log2_luma_wght_denom;
+ }
+ }
+ else
+ {
+ WORD16 i2_luma_weight = (1 << ps_curr_inp->s_lap_out.i4_log2_luma_wght_denom);
+
+ /* set to default values */
+ aps_ref_list[LIST_1][i]->s_weight_offset.i2_luma_weight = (i2_luma_weight);
+
+ aps_ref_list[LIST_1][i]->s_weight_offset.i2_luma_offset = 0;
+
+ aps_ref_list[LIST_1][i]->i4_inv_luma_wt =
+ ((1 << 15) + (i2_luma_weight >> 1)) / i2_luma_weight;
+
+ aps_ref_list[LIST_1][i]->i4_log2_wt_denom =
+ ps_curr_inp->s_lap_out.i4_log2_luma_wght_denom;
+ }
+
+ ps_slice_header->s_wt_ofst.i1_chroma_weight_l1_flag[i] =
+ aps_ref_list[LIST_1][i]->s_weight_offset.u1_chroma_weight_enable_flag;
+
+ /* if weights are enabled then copy to slice header */
+ if(1 == ps_slice_header->s_wt_ofst.i1_chroma_weight_l1_flag[i])
+ {
+ ps_slice_header->s_wt_ofst.i2_chroma_weight_l1_cb[i] =
+ aps_ref_list[LIST_1][i]->s_weight_offset.i2_cb_weight;
+ ps_slice_header->s_wt_ofst.i2_chroma_offset_l1_cb[i] =
+ aps_ref_list[LIST_1][i]->s_weight_offset.i2_cb_offset;
+
+ ps_slice_header->s_wt_ofst.i2_chroma_weight_l1_cr[i] =
+ aps_ref_list[LIST_1][i]->s_weight_offset.i2_cr_weight;
+ ps_slice_header->s_wt_ofst.i2_chroma_offset_l1_cr[i] =
+ aps_ref_list[LIST_1][i]->s_weight_offset.i2_cr_offset;
+ }
+ else
+ {
+ /* set to default values */
+ aps_ref_list[LIST_1][i]->s_weight_offset.i2_cb_weight =
+ (1 << ps_curr_inp->s_lap_out.i4_log2_chroma_wght_denom);
+ aps_ref_list[LIST_1][i]->s_weight_offset.i2_cr_weight =
+ (1 << ps_curr_inp->s_lap_out.i4_log2_chroma_wght_denom);
+ aps_ref_list[LIST_1][i]->s_weight_offset.i2_cb_offset = 0;
+ aps_ref_list[LIST_1][i]->s_weight_offset.i2_cr_offset = 0;
+ }
+ }
+ }
+ }
+
+ /* store the number of reference pics in the list for ME/MC etc */
+ ps_enc_ctxt->i4_num_ref_l0 = num_ref_pics_list0;
+ ps_enc_ctxt->i4_num_ref_l1 = num_ref_pics_list1;
+
+#define HME_USE_ONLY_2REF
+#ifndef HME_USE_ONLY_2REF
+ ps_enc_ctxt->i4_num_ref_l0_active = num_ref_pics_list0;
+ ps_enc_ctxt->i4_num_ref_l1_active = num_ref_pics_list1;
+#else
+#if MULTI_REF_ENABLE == 1
+ if(ps_curr_inp->s_lap_out.i4_quality_preset >= IHEVCE_QUALITY_P3)
+ {
+ if(ps_curr_inp->s_lap_out.i4_pic_type == IV_P_FRAME)
+ {
+ if(ps_curr_inp->s_lap_out.i4_quality_preset == IHEVCE_QUALITY_P6)
+ {
+ if(1 == ps_enc_ctxt->s_runtime_src_prms.i4_field_pic)
+ {
+ ps_enc_ctxt->i4_num_ref_l0_active =
+ MIN(MAX_NUM_REFS_IN_PPICS_IN_XS25 + 1, num_ref_pics_list0);
+ }
+ else
+ {
+ ps_enc_ctxt->i4_num_ref_l0_active =
+ MIN(MAX_NUM_REFS_IN_PPICS_IN_XS25, num_ref_pics_list0);
+
+ ps_enc_ctxt->i4_num_ref_l0_active += i4_inc_L0_active_ref_pic;
+ }
+ }
+ else
+ {
+ if(1 == ps_enc_ctxt->s_runtime_src_prms.i4_field_pic)
+ {
+ ps_enc_ctxt->i4_num_ref_l0_active = MIN(3, num_ref_pics_list0);
+ }
+ else
+ {
+ ps_enc_ctxt->i4_num_ref_l0_active = MIN(2, num_ref_pics_list0);
+ ps_enc_ctxt->i4_num_ref_l0_active += i4_inc_L0_active_ref_pic;
+ }
+ }
+
+ ps_enc_ctxt->i4_num_ref_l1_active = 0;
+ }
+ else
+ {
+ if(1 == ps_enc_ctxt->s_runtime_src_prms.i4_field_pic)
+ {
+ ps_enc_ctxt->i4_num_ref_l0_active = MIN(2, num_ref_pics_list0);
+ ps_enc_ctxt->i4_num_ref_l1_active = MIN(1, num_ref_pics_list1);
+ ps_enc_ctxt->i4_num_ref_l1_active += i4_inc_L1_active_ref_pic;
+ }
+ else
+ {
+ ps_enc_ctxt->i4_num_ref_l0_active = MIN(1, num_ref_pics_list0);
+ ps_enc_ctxt->i4_num_ref_l1_active = MIN(1, num_ref_pics_list1);
+
+ ps_enc_ctxt->i4_num_ref_l1_active += i4_inc_L1_active_ref_pic;
+ ps_enc_ctxt->i4_num_ref_l0_active += i4_inc_L0_active_ref_pic;
+ }
+ }
+ }
+ else
+ {
+ if(ps_curr_inp->s_lap_out.i4_pic_type == IV_P_FRAME)
+ {
+ if(1 == ps_enc_ctxt->s_runtime_src_prms.i4_field_pic)
+ ps_enc_ctxt->i4_num_ref_l0_active = MIN(4, num_ref_pics_list0);
+ else
+ ps_enc_ctxt->i4_num_ref_l0_active = MIN(4, num_ref_pics_list0);
+
+ ps_enc_ctxt->i4_num_ref_l1_active = 0;
+ }
+ else
+ {
+ if(1 == ps_enc_ctxt->s_runtime_src_prms.i4_field_pic)
+ {
+ ps_enc_ctxt->i4_num_ref_l0_active = MIN(4, num_ref_pics_list0);
+ ps_enc_ctxt->i4_num_ref_l1_active = MIN(4, num_ref_pics_list1);
+ }
+ else
+ {
+ ps_enc_ctxt->i4_num_ref_l0_active = MIN(4, num_ref_pics_list0);
+ ps_enc_ctxt->i4_num_ref_l1_active = MIN(4, num_ref_pics_list1);
+ }
+ }
+ }
+#else
+ if(ps_curr_inp->s_lap_out.i4_pic_type == IV_P_FRAME)
+ {
+ if(1 == ps_enc_ctxt->s_runtime_src_prms.i4_field_pic)
+ ps_enc_ctxt->i4_num_ref_l0_active = MIN(3, num_ref_pics_list0);
+ else
+ ps_enc_ctxt->i4_num_ref_l0_active = MIN(2, num_ref_pics_list0);
+
+ ps_enc_ctxt->i4_num_ref_l1_active = 0;
+ }
+ else
+ {
+ if(1 == ps_enc_ctxt->s_runtime_src_prms.i4_field_pic)
+ {
+ ps_enc_ctxt->i4_num_ref_l0_active = MIN(2, num_ref_pics_list0);
+ ps_enc_ctxt->i4_num_ref_l1_active = MIN(1, num_ref_pics_list1);
+ }
+ else
+ {
+ ps_enc_ctxt->i4_num_ref_l0_active = MIN(1, num_ref_pics_list0);
+ ps_enc_ctxt->i4_num_ref_l1_active = MIN(1, num_ref_pics_list1);
+ }
+ }
+#endif
+
+#endif
+
+ ps_slice_header->i1_num_ref_idx_l0_active = MAX(1, ps_enc_ctxt->i4_num_ref_l0_active);
+ if(BSLICE == slice_type)
+ {
+ /* i1_num_ref_idx_l1_active applicable only for B pics */
+ ps_slice_header->i1_num_ref_idx_l1_active = MAX(1, ps_enc_ctxt->i4_num_ref_l1_active);
+ }
+ if(1 == ps_enc_ctxt->s_runtime_src_prms.i4_field_pic)
+ {
+ /* If Interlace field is enabled, p field following an cra I field should have only one ref frame */
+ WORD32 cra_second_poc = cra_poc + 1;
+
+ if(curr_poc == cra_second_poc)
+ {
+ /* set number of active references used for l0 and l1 for me */
+ ps_enc_ctxt->i4_num_ref_l0_active = 1;
+ ps_enc_ctxt->i4_num_ref_l1_active = 0;
+
+ /* set number of active references used for l0 and l1 in slice hdr */
+ ps_slice_header->i1_num_ref_idx_active_override_flag = 1;
+ ps_slice_header->i1_num_ref_idx_l0_active =
+ ps_enc_ctxt->i4_num_ref_l0 + ps_enc_ctxt->i4_num_ref_l1;
+ }
+ }
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_get_frame_lambda_prms \endif
+*
+* \brief
+* Function whihc calculates the Lambda params for current picture
+*
+* \param[in] ps_enc_ctxt : encoder ctxt pointer
+* \param[in] ps_cur_pic_ctxt : current pic ctxt
+* \param[in] i4_cur_frame_qp : current pic QP
+* \param[in] first_field : is first field flag
+* \param[in] i4_temporal_lyr_id : Current picture layer id
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_get_frame_lambda_prms(
+ enc_ctxt_t *ps_enc_ctxt,
+ pre_enc_me_ctxt_t *ps_cur_pic_ctxt,
+ WORD32 i4_cur_frame_qp,
+ WORD32 first_field,
+ WORD32 i4_is_ref_pic,
+ WORD32 i4_temporal_lyr_id,
+ double f_i_pic_lamda_modifier,
+ WORD32 i4_inst_id,
+ WORD32 i4_lambda_type)
+{
+ double lambda_modifier = CONST_LAMDA_MOD_VAL;
+ double lambda_uv_modifier = CONST_LAMDA_MOD_VAL;
+ double lambda = 0;
+ double lambda_uv;
+ WORD32 i4_use_const_lamda_modifier;
+
+ /* initialize lambda based on frm qp, slice type, num b and temporal id */
+ /* This lamba calculation mimics the jctvc doc (TODO add doc number */
+
+ WORD32 num_b_frms =
+ (1 << ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_max_temporal_layers) - 1;
+ WORD32 chroma_qp = (ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_chr_format == IV_YUV_422SP_UV)
+ ? MIN(i4_cur_frame_qp, 51)
+ : gai1_ihevc_chroma_qp_scale[i4_cur_frame_qp + MAX_QP_BD_OFFSET];
+
+ WORD32 i4_qp_bdoffset =
+ 6 * (ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.i4_internal_bit_depth - 8);
+ WORD32 slice_type = ps_cur_pic_ctxt->s_slice_hdr.i1_slice_type;
+
+ (void)first_field;
+ (void)i4_is_ref_pic;
+ (void)i4_temporal_lyr_id;
+ i4_use_const_lamda_modifier = USE_CONSTANT_LAMBDA_MODIFIER;
+ i4_use_const_lamda_modifier = i4_use_const_lamda_modifier ||
+ ((ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
+ ((ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)) ||
+ (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1)) ||
+ (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_2)) ||
+ (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_3))));
+
+ /* lambda modifier is the dependent on slice type and temporal id */
+ if(ISLICE == slice_type)
+ {
+ double temporal_correction_islice = 1.0 - 0.05 * num_b_frms;
+ temporal_correction_islice = MAX(0.5, temporal_correction_islice);
+
+ lambda_modifier = 0.57 * temporal_correction_islice;
+ lambda_uv_modifier = lambda_modifier;
+ if(i4_use_const_lamda_modifier)
+ {
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].lambda_modifier = f_i_pic_lamda_modifier;
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].lambda_uv_modifier = f_i_pic_lamda_modifier;
+ }
+ else
+ {
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].lambda_modifier = lambda_modifier;
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].lambda_uv_modifier = lambda_uv_modifier;
+ }
+ }
+ else if(PSLICE == slice_type)
+ {
+ if(first_field)
+ lambda_modifier = 0.442; //0.442*0.8;
+ else
+ lambda_modifier = 0.442;
+ lambda_uv_modifier = lambda_modifier;
+ if(i4_use_const_lamda_modifier)
+ {
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].lambda_modifier = CONST_LAMDA_MOD_VAL;
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].lambda_uv_modifier = CONST_LAMDA_MOD_VAL;
+ }
+ else
+ {
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].lambda_modifier = lambda_modifier;
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].lambda_uv_modifier = lambda_uv_modifier;
+ }
+ }
+ else
+ {
+ /* BSLICE */
+ if(1 == i4_is_ref_pic)
+ {
+ lambda_modifier = 0.3536;
+ }
+ else if(2 == i4_is_ref_pic)
+ {
+ lambda_modifier = 0.45;
+ }
+ else
+ {
+ lambda_modifier = 0.68;
+ }
+ lambda_uv_modifier = lambda_modifier;
+ if(i4_use_const_lamda_modifier)
+ {
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].lambda_modifier = CONST_LAMDA_MOD_VAL;
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].lambda_uv_modifier = CONST_LAMDA_MOD_VAL;
+ }
+ else
+ {
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].lambda_modifier = lambda_modifier;
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].lambda_uv_modifier = lambda_uv_modifier;
+ }
+ /* TODO: Disable lambda modification for interlace encode to match HM runs */
+ //if(0 == ps_enc_ctxt->s_runtime_src_prms.i4_field_pic)
+ {
+ /* modify b lambda further based on temporal id */
+ if(i4_temporal_lyr_id)
+ {
+ lambda_modifier *= CLIP3((((double)(i4_cur_frame_qp - 12)) / 6.0), 2.00, 4.00);
+ lambda_uv_modifier *= CLIP3((((double)(chroma_qp - 12)) / 6.0), 2.00, 4.00);
+ }
+ }
+ }
+ if(i4_use_const_lamda_modifier)
+ {
+ if(ISLICE == slice_type)
+ {
+ lambda_modifier = f_i_pic_lamda_modifier;
+ lambda_uv_modifier = f_i_pic_lamda_modifier;
+ }
+ else
+ {
+ lambda_modifier = CONST_LAMDA_MOD_VAL;
+ lambda_uv_modifier = CONST_LAMDA_MOD_VAL;
+ }
+ }
+
+ switch(i4_lambda_type)
+ {
+ case 0:
+ {
+ i4_qp_bdoffset = 0;
+
+ lambda = pow(2.0, (((double)(i4_cur_frame_qp + i4_qp_bdoffset - 12)) / 3.0));
+ lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
+
+ /* modify the base lambda according to lambda modifier */
+ lambda *= lambda_modifier;
+ lambda_uv *= lambda_uv_modifier;
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].u4_chroma_cost_weighing_factor =
+ (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i8_cl_ssd_lambda_qf =
+ (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i8_cl_ssd_lambda_chroma_qf =
+ (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_cl_sad_lambda_qf =
+ (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
+ if(i4_use_const_lamda_modifier)
+ {
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_ol_sad_lambda_qf =
+ (WORD32)((sqrt(lambda)) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_cl_satd_lambda_qf =
+ (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_ol_satd_lambda_qf =
+ (WORD32)((sqrt(lambda)) * (1 << (LAMBDA_Q_SHIFT)));
+ }
+ else
+ {
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_ol_sad_lambda_qf =
+ (WORD32)((sqrt(lambda) / 1.5) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_cl_satd_lambda_qf =
+ (WORD32)(sqrt(lambda * 1.5) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_ol_satd_lambda_qf =
+ (WORD32)((sqrt(lambda * 1.5)) * (1 << (LAMBDA_Q_SHIFT)));
+ }
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i8_cl_ssd_type2_lambda_qf =
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i8_cl_ssd_lambda_qf;
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i8_cl_ssd_type2_lambda_chroma_qf =
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i8_cl_ssd_lambda_chroma_qf;
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_cl_sad_type2_lambda_qf =
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_cl_sad_lambda_qf;
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_ol_sad_type2_lambda_qf =
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_ol_sad_lambda_qf;
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_cl_satd_type2_lambda_qf =
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_cl_satd_lambda_qf;
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_ol_satd_type2_lambda_qf =
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_ol_satd_lambda_qf;
+
+ break;
+ }
+ case 1:
+ {
+ lambda = pow(2.0, (((double)(i4_cur_frame_qp + i4_qp_bdoffset - 12)) / 3.0));
+ lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
+
+ /* modify the base lambda according to lambda modifier */
+ lambda *= lambda_modifier;
+ lambda_uv *= lambda_uv_modifier;
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].u4_chroma_cost_weighing_factor =
+ (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i8_cl_ssd_lambda_qf =
+ (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i8_cl_ssd_lambda_chroma_qf =
+ (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_cl_sad_lambda_qf =
+ (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
+ if(i4_use_const_lamda_modifier)
+ {
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_ol_sad_lambda_qf =
+ (WORD32)((sqrt(lambda)) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_cl_satd_lambda_qf =
+ (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_ol_satd_lambda_qf =
+ (WORD32)((sqrt(lambda)) * (1 << (LAMBDA_Q_SHIFT)));
+ }
+ else
+ {
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_ol_sad_lambda_qf =
+ (WORD32)((sqrt(lambda) / 1.5) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_cl_satd_lambda_qf =
+ (WORD32)(sqrt(lambda * 1.5) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_ol_satd_lambda_qf =
+ (WORD32)((sqrt(lambda * 1.5)) * (1 << (LAMBDA_Q_SHIFT)));
+ }
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i8_cl_ssd_type2_lambda_qf =
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i8_cl_ssd_lambda_qf;
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i8_cl_ssd_type2_lambda_chroma_qf =
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i8_cl_ssd_lambda_chroma_qf;
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_cl_sad_type2_lambda_qf =
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_cl_sad_lambda_qf;
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_ol_sad_type2_lambda_qf =
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_ol_sad_lambda_qf;
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_cl_satd_type2_lambda_qf =
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_cl_satd_lambda_qf;
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_ol_satd_type2_lambda_qf =
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_ol_satd_lambda_qf;
+
+ break;
+ }
+ case 2:
+ {
+ lambda = pow(2.0, (((double)(i4_cur_frame_qp + i4_qp_bdoffset - 12)) / 3.0));
+ lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bdoffset - 12)) / 3.0));
+
+ /* modify the base lambda according to lambda modifier */
+ lambda *= lambda_modifier;
+ lambda_uv *= lambda_uv_modifier;
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].u4_chroma_cost_weighing_factor =
+ (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i8_cl_ssd_lambda_qf =
+ (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i8_cl_ssd_lambda_chroma_qf =
+ (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_cl_sad_lambda_qf =
+ (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
+
+ if(i4_use_const_lamda_modifier)
+ {
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_ol_sad_lambda_qf =
+ (WORD32)((sqrt(lambda)) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_cl_satd_lambda_qf =
+ (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_ol_satd_lambda_qf =
+ (WORD32)((sqrt(lambda)) * (1 << (LAMBDA_Q_SHIFT)));
+ }
+ else
+ {
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_ol_sad_lambda_qf =
+ (WORD32)((sqrt(lambda) / 1.5) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_cl_satd_lambda_qf =
+ (WORD32)(sqrt(lambda * 1.5) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_ol_satd_lambda_qf =
+ (WORD32)((sqrt(lambda * 1.5)) * (1 << (LAMBDA_Q_SHIFT)));
+ }
+ /* lambda corresponding to 8- bit, for metrics based on 8- bit ( Example 8bit SAD in encloop)*/
+
+ lambda = pow(2.0, (((double)(i4_cur_frame_qp - 12)) / 3.0));
+ lambda_uv = pow(2.0, (((double)(chroma_qp - 12)) / 3.0));
+
+ /* modify the base lambda according to lambda modifier */
+ lambda *= lambda_modifier;
+ lambda_uv *= lambda_uv_modifier;
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].u4_chroma_cost_weighing_factor =
+ (UWORD32)((lambda / lambda_uv) * (1 << CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i8_cl_ssd_type2_lambda_qf =
+ (LWORD64)(lambda * (1 << LAMBDA_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i8_cl_ssd_type2_lambda_chroma_qf =
+ (LWORD64)(lambda_uv * (1 << LAMBDA_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_cl_sad_type2_lambda_qf =
+ (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
+ if(i4_use_const_lamda_modifier)
+ {
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_ol_sad_type2_lambda_qf =
+ (WORD32)((sqrt(lambda)) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_cl_satd_type2_lambda_qf =
+ (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_ol_satd_type2_lambda_qf =
+ (WORD32)((sqrt(lambda)) * (1 << (LAMBDA_Q_SHIFT)));
+ }
+ else
+ {
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_ol_sad_type2_lambda_qf =
+ (WORD32)((sqrt(lambda) / 1.5) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_cl_satd_type2_lambda_qf =
+ (WORD32)(sqrt(lambda * 1.5) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_ol_satd_type2_lambda_qf =
+ (WORD32)((sqrt(lambda * 1.5)) * (1 << (LAMBDA_Q_SHIFT)));
+ }
+
+ break;
+ }
+ default:
+ {
+ /* Intended to be a barren wasteland! */
+ ASSERT(0);
+ }
+ }
+
+ /* Assign the final lambdas after up shifting to its q format */
+
+ /* closed loop ssd lambda is same as final lambda */
+
+ /* --- Initialized the lambda for SATD computations --- */
+ if(i4_use_const_lamda_modifier)
+ {
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_cl_satd_lambda_qf =
+ (WORD32)(sqrt(lambda) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_ol_satd_lambda_qf =
+ (WORD32)((sqrt(lambda)) * (1 << (LAMBDA_Q_SHIFT)));
+ }
+ else
+ {
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_cl_satd_lambda_qf =
+ (WORD32)(sqrt(lambda * 1.5) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_cur_pic_ctxt->as_lambda_prms[i4_inst_id].i4_ol_satd_lambda_qf =
+ (WORD32)((sqrt(lambda * 1.5)) * (1 << (LAMBDA_Q_SHIFT)));
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_update_qp_L1_sad_based \endif
+*
+* \brief
+* Function which recalculates qp in case of scene cut based on L1 satd/act
+*
+* \param[in] ps_enc_ctxt : encoder ctxt pointer
+* \param[in] ps_cur_pic_ctxt : current pic ctxt
+* \param[in] i4_cur_frame_qp : current pic QP
+* \param[in] first_field : is first field flag
+* \param[in] i4_temporal_lyr_id : Current picture layer id
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_update_qp_L1_sad_based(
+ enc_ctxt_t *ps_enc_ctxt,
+ ihevce_lap_enc_buf_t *ps_curr_inp,
+ ihevce_lap_enc_buf_t *ps_prev_inp,
+ pre_enc_me_ctxt_t *ps_curr_out,
+ WORD32 i4_is_last_thread)
+{
+ WORD32 i4_l1_ht, i4_l1_wd;
+ ihevce_ed_blk_t *ps_ed_4x4 = ps_curr_out->ps_layer1_buf;
+ WORD32 best_satd_16x16;
+ //LWORD64 acc_satd = 0;
+ LWORD64 acc_sad = 0; /*SAD accumulated to compare with coarse me sad*/
+ WORD32 i4_tot_4x4block_l1_x, i4_tot_4x4block_l1_y;
+ WORD32 i4_tot_ctb_l1_x, i4_tot_ctb_l1_y;
+ WORD32 i;
+ WORD32 i4_act_factor;
+ UWORD8 u1_cu_possible_qp;
+ WORD32 i4_q_scale_mod;
+ LWORD64 i8_best_satd_16x16;
+ LWORD64 i8_frame_satd_by_act_L1_accum;
+ LWORD64 i8_frame_acc_sadt_L1, i8_frame_acc_sadt_L1_squared;
+ WORD32 i4_new_frame_qp = 0, i4_qp_for_I_pic = 0;
+ LWORD64 pre_intra_satd_act_evaluated = 0;
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1 = ps_curr_out->ps_ed_ctb_l1;
+ WORD32 i4_j;
+ double scale_factor_cmplx_change_detection;
+ WORD32 i4_cmplx_change_detection_thrsh;
+ long double ld_frame_avg_satd_L1;
+
+ if(i4_is_last_thread)
+ {
+ ihevce_decomp_pre_intra_master_ctxt_t *ps_master_ctxt =
+ (ihevce_decomp_pre_intra_master_ctxt_t *)
+ ps_enc_ctxt->s_module_ctxt.pv_decomp_pre_intra_ctxt;
+ ihevce_decomp_pre_intra_ctxt_t *ps_ctxt = ps_master_ctxt->aps_decomp_pre_intra_thrd_ctxt[0];
+
+ i4_l1_wd = ps_ctxt->as_layers[1].i4_actual_wd;
+ i4_l1_ht = ps_ctxt->as_layers[1].i4_actual_ht;
+
+ if((ps_curr_inp->s_lap_out.i4_quality_preset == IHEVCE_QUALITY_P6) &&
+ (ps_curr_inp->s_lap_out.i4_temporal_lyr_id > TEMPORAL_LAYER_DISABLE))
+ {
+ i8_frame_acc_sadt_L1 = -1;
+ }
+ else
+ {
+ /*the accumulation of intra satd and calculation of new qp happens for all thread
+ It must be made sure every thread returns same value of intra satd and qp*/
+ i8_frame_acc_sadt_L1 = ihevce_decomp_pre_intra_get_frame_satd(
+ ps_enc_ctxt->s_module_ctxt.pv_decomp_pre_intra_ctxt, &i4_l1_wd, &i4_l1_ht);
+ }
+
+#if USE_SQRT_AVG_OF_SATD_SQR
+ if((ps_curr_inp->s_lap_out.i4_quality_preset == IHEVCE_QUALITY_P6) &&
+ (ps_curr_inp->s_lap_out.i4_temporal_lyr_id > TEMPORAL_LAYER_DISABLE))
+ {
+ i8_frame_acc_sadt_L1_squared = 0x7fffffff;
+ }
+ else
+ {
+ i8_frame_acc_sadt_L1_squared = ihevce_decomp_pre_intra_get_frame_satd_squared(
+ ps_enc_ctxt->s_module_ctxt.pv_decomp_pre_intra_ctxt, &i4_l1_wd, &i4_l1_ht);
+ }
+#else
+ i8_frame_acc_sadt_L1_squared = i8_frame_acc_sadt_L1;
+#endif
+ if((i4_l1_wd * i4_l1_ht) > (245760 /*640 * 384*/))
+ {
+ scale_factor_cmplx_change_detection =
+ (double)0.12 * ((i4_l1_wd * i4_l1_ht) / (640.0 * 384.0));
+ i4_cmplx_change_detection_thrsh =
+ (WORD32)(HME_HIGH_SAD_BLK_THRESH * (1 - scale_factor_cmplx_change_detection));
+ }
+ else
+ {
+ scale_factor_cmplx_change_detection =
+ (double)0.12 * ((640.0 * 384.0) / (i4_l1_wd * i4_l1_ht));
+ i4_cmplx_change_detection_thrsh =
+ (WORD32)(HME_HIGH_SAD_BLK_THRESH * (1 + scale_factor_cmplx_change_detection));
+ }
+ i4_tot_4x4block_l1_x =
+ ((i4_l1_wd + ((MAX_CTB_SIZE >> 1) - 1)) & 0xFFFFFFE0) /
+ 4; //((i4_l1_wd + 31) & 0xFFFFFFE0)/4;//(i4_l1_wd + (i4_l1_wd % 32 )) / 4;
+ i4_tot_4x4block_l1_y =
+ ((i4_l1_ht + ((MAX_CTB_SIZE >> 1) - 1)) & 0xFFFFFFE0) /
+ 4; //((i4_l1_ht + 31) & 0xFFFFFFE0)/4;//(i4_l1_ht + (i4_l1_ht % 32 )) / 4;
+ ld_frame_avg_satd_L1 =
+ (WORD32)log(
+ 1 + (long double)i8_frame_acc_sadt_L1_squared /
+ ((long double)((i4_tot_4x4block_l1_x * i4_tot_4x4block_l1_y) >> 2))) /
+ log(2.0);
+ /* L1 satd accumalated for computing qp */
+ i8_frame_satd_by_act_L1_accum = 0;
+ i4_tot_ctb_l1_x =
+ ((i4_l1_wd + ((MAX_CTB_SIZE >> 1) - 1)) & 0xFFFFFFE0) / (MAX_CTB_SIZE >> 1);
+ i4_tot_ctb_l1_y =
+ ((i4_l1_ht + ((MAX_CTB_SIZE >> 1) - 1)) & 0xFFFFFFE0) / (MAX_CTB_SIZE >> 1);
+
+ for(i = 0; i < (i4_tot_ctb_l1_x * i4_tot_ctb_l1_y); i += 1)
+ {
+ for(i4_j = 0; i4_j < 16; i4_j++)
+ {
+ if(ps_ed_ctb_l1->i4_best_satd_8x8[i4_j] != -1)
+ {
+ ASSERT(ps_ed_ctb_l1->i4_best_satd_8x8[i4_j] >= 0);
+ ASSERT(ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[i4_j] >= 0);
+
+ if((ps_curr_inp->s_lap_out.i4_quality_preset == IHEVCE_QUALITY_P6) &&
+ (ps_curr_inp->s_lap_out.i4_temporal_lyr_id > TEMPORAL_LAYER_DISABLE))
+ {
+ best_satd_16x16 = 0;
+ }
+ else
+ {
+ best_satd_16x16 = ps_ed_ctb_l1->i4_best_satd_8x8[i4_j];
+ }
+
+ acc_sad += (WORD32)ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[i4_j];
+ //acc_satd += (WORD32)best_satd_16x16;
+ u1_cu_possible_qp = ihevce_cu_level_qp_mod(
+ 32,
+ best_satd_16x16,
+ ld_frame_avg_satd_L1,
+ REF_MOD_STRENGTH, // To be changed later
+ &i4_act_factor,
+ &i4_q_scale_mod,
+ &ps_enc_ctxt->s_rc_quant);
+ i8_best_satd_16x16 = best_satd_16x16 << QP_LEVEL_MOD_ACT_FACTOR;
+
+ if((ps_curr_inp->s_lap_out.i4_quality_preset == IHEVCE_QUALITY_P6) &&
+ (ps_curr_inp->s_lap_out.i4_temporal_lyr_id > TEMPORAL_LAYER_DISABLE))
+ {
+ i4_act_factor = (1 << QP_LEVEL_MOD_ACT_FACTOR);
+ }
+
+ if(0 != i4_act_factor)
+ {
+ i8_frame_satd_by_act_L1_accum +=
+ ((WORD32)(i8_best_satd_16x16 / i4_act_factor));
+ /*Accumulate SAD for those regions which will undergo evaluation in L0 stage*/
+ if(ps_ed_4x4->intra_or_inter != 2)
+ pre_intra_satd_act_evaluated +=
+ ((WORD32)(i8_best_satd_16x16 / i4_act_factor));
+ }
+ }
+ ps_ed_4x4 += 4;
+ }
+ ps_ed_ctb_l1 += 1;
+ }
+ /** store the L1 satd in context struct
+ Note: this variable is common across all thread. it must be made sure all threads write same value*/
+ if((ps_curr_inp->s_lap_out.i4_quality_preset == IHEVCE_QUALITY_P6) &&
+ (ps_curr_inp->s_lap_out.i4_temporal_lyr_id > TEMPORAL_LAYER_DISABLE))
+ {
+ i8_frame_satd_by_act_L1_accum = ps_prev_inp->s_rc_lap_out.i8_frame_satd_by_act_L1_accum;
+ ps_curr_inp->s_rc_lap_out.i8_frame_satd_by_act_L1_accum = i8_frame_satd_by_act_L1_accum;
+ ps_curr_inp->s_rc_lap_out.i8_satd_by_act_L1_accum_evaluated = -1;
+ }
+ else
+ {
+ ps_curr_inp->s_rc_lap_out.i8_frame_satd_by_act_L1_accum = i8_frame_satd_by_act_L1_accum;
+ ps_curr_inp->s_rc_lap_out.i8_satd_by_act_L1_accum_evaluated =
+ pre_intra_satd_act_evaluated;
+ }
+
+ ps_curr_inp->s_rc_lap_out.i8_pre_intra_satd = i8_frame_acc_sadt_L1;
+ /*accumulate raw intra sad without subtracting non coded sad*/
+ ps_curr_inp->s_rc_lap_out.i8_raw_pre_intra_sad = acc_sad;
+ }
+ /*update pre-enc qp using data from L1 to use better qp in L0 in case of cbr mode*/
+ if(i4_is_last_thread)
+ {
+ /* acquire mutex lock for rate control calls */
+ osal_mutex_lock(ps_enc_ctxt->pv_rc_mutex_lock_hdl);
+ {
+ LWORD64 i8_est_L0_satd_by_act;
+ WORD32 i4_cur_q_scale;
+ if(ps_enc_ctxt->ps_stat_prms->s_config_prms.i4_rate_control_mode != CONST_QP)
+ {
+ /*RCTODO :This needs to be reviewed in the context of 10/12 bit encoding as the Qp seems to be sub-optimal*/
+ if(ps_enc_ctxt->ps_stat_prms->s_pass_prms.i4_pass != 2)
+ i4_cur_q_scale =
+ ps_enc_ctxt->s_rc_quant.pi4_qp_to_qscale
+ [ps_curr_out->i4_curr_frm_qp]; // + ps_enc_ctxt->s_rc_quant.i1_qp_offset];
+ else
+ i4_cur_q_scale = ps_enc_ctxt->s_rc_quant
+ .pi4_qp_to_qscale[MAX(ps_curr_out->i4_curr_frm_qp, 0)];
+ }
+ else
+ i4_cur_q_scale =
+ ps_enc_ctxt->s_rc_quant.pi4_qp_to_qscale
+ [ps_curr_out->i4_curr_frm_qp + ps_enc_ctxt->s_rc_quant.i1_qp_offset];
+
+ i4_cur_q_scale = (i4_cur_q_scale + (1 << (QSCALE_Q_FAC_3 - 1))) >> QSCALE_Q_FAC_3;
+
+ i8_est_L0_satd_by_act = ihevce_get_L0_satd_based_on_L1(
+ i8_frame_satd_by_act_L1_accum,
+ ps_curr_inp->s_rc_lap_out.i4_num_pels_in_frame_considered,
+ i4_cur_q_scale);
+ /*HEVC_RC query rate control for qp*/
+ if(ps_enc_ctxt->ps_stat_prms->s_config_prms.i4_rate_control_mode != 3)
+ {
+ i4_new_frame_qp = ihevce_get_L0_est_satd_based_scd_qp(
+ ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[0],
+ &ps_curr_inp->s_rc_lap_out,
+ i8_est_L0_satd_by_act,
+ 8.00);
+ }
+ else
+ i4_new_frame_qp = ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms
+ .as_tgt_params[ps_enc_ctxt->i4_resolution_id]
+ .ai4_frame_qp[0];
+ i4_new_frame_qp = CLIP3(i4_new_frame_qp, 1, 51);
+ i4_qp_for_I_pic = CLIP3(i4_qp_for_I_pic, 1, 51);
+ ps_curr_inp->s_rc_lap_out.i4_L1_qp = i4_new_frame_qp;
+ /*I frame qp = qp-3 due to effect of lambda modifier*/
+ i4_qp_for_I_pic = i4_new_frame_qp - 3;
+
+ /*use new qp get possible qp even for inter pictures assuming default offset*/
+ if(ps_curr_inp->s_lap_out.i4_pic_type != IV_IDR_FRAME &&
+ ps_curr_inp->s_lap_out.i4_pic_type != IV_I_FRAME)
+ {
+ i4_new_frame_qp += ps_curr_inp->s_lap_out.i4_temporal_lyr_id + 1;
+ }
+
+ /*accumulate the L1 ME sad using skip sad value based on qp*/
+ /*accumulate this only for last thread as it ll be guranteed that L1 ME sad is completely populated*/
+ /*The lambda modifier in encoder is tuned in such a way that the qp offsets according to lambda modifer are as follows
+ Note: These qp offset only account for lambda modifier, Hence this should be applied over qp offset that is already there due to picture type
+ relative lambda scale(these lambda diff are mapped into qp difference which is applied over and obove the qp offset)
+ Qi = Iqp 1
+ Qp = Iqp 1
+ Qb = Iqp + 1.55 1.48
+ Qb1 = Iqp + 3.1 2.05
+ Qb2 = Iqp + 3.1 2.05*/
+
+ /*ihevce_compute_offsets_from_rc(ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[0],ai4_offsets,&ps_curr_inp->s_lap_out);*/
+
+ if(ps_curr_inp->s_lap_out.i4_pic_type == IV_I_FRAME ||
+ ps_curr_inp->s_lap_out.i4_pic_type == IV_IDR_FRAME)
+ {
+ i4_new_frame_qp = i4_new_frame_qp - 3;
+ }
+ else if(ps_curr_inp->s_lap_out.i4_pic_type == IV_P_FRAME)
+ {
+ i4_new_frame_qp = i4_new_frame_qp - 2;
+ }
+ if(ps_curr_inp->s_lap_out.i4_pic_type == IV_B_FRAME &&
+ ps_curr_inp->s_lap_out.i4_temporal_lyr_id == 1)
+ {
+ i4_new_frame_qp = i4_new_frame_qp + 2;
+ }
+ else if(
+ ps_curr_inp->s_lap_out.i4_pic_type == IV_B_FRAME &&
+ ps_curr_inp->s_lap_out.i4_temporal_lyr_id == 2)
+ {
+ i4_new_frame_qp = i4_new_frame_qp + 6;
+ }
+ else if(
+ ps_curr_inp->s_lap_out.i4_pic_type == IV_B_FRAME &&
+ ps_curr_inp->s_lap_out.i4_temporal_lyr_id == 3)
+ {
+ i4_new_frame_qp = i4_new_frame_qp + 7;
+ }
+
+ i4_new_frame_qp = CLIP3(i4_new_frame_qp, 1, 51);
+ i4_qp_for_I_pic = CLIP3(i4_qp_for_I_pic, 1, 51);
+
+ {
+ calc_l1_level_hme_intra_sad_different_qp(
+ ps_enc_ctxt, ps_curr_out, ps_curr_inp, i4_tot_ctb_l1_x, i4_tot_ctb_l1_y);
+
+ /** frame accumulated SAD over entire frame after accounting for dead zone SAD, this is least of intra or inter*/
+ /*ihevce_accum_hme_sad_subgop_rc(ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[0],&ps_curr_inp->s_lap_out); */
+ ihevce_rc_register_L1_analysis_data(
+ ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[0],
+ &ps_curr_inp->s_rc_lap_out,
+ i8_est_L0_satd_by_act,
+ ps_curr_inp->s_rc_lap_out.ai8_pre_intra_sad
+ [i4_new_frame_qp], //since the sad passed will be used to calc complexity it should be non coded sad subtracted sad
+ ps_curr_inp->s_rc_lap_out.ai8_frame_acc_coarse_me_sad[i4_new_frame_qp]);
+
+ ihevce_coarse_me_get_rc_param(
+ ps_enc_ctxt->s_module_ctxt.pv_coarse_me_ctxt,
+ &ps_curr_out->i8_acc_frame_coarse_me_cost,
+ &ps_curr_out->i8_acc_frame_coarse_me_sad,
+ &ps_curr_out->i8_acc_num_blks_high_sad,
+ &ps_curr_out->i8_total_blks,
+ ps_curr_inp->s_lap_out.i4_is_prev_pic_in_Tid0_same_scene);
+
+ if(ps_curr_out->i8_total_blks)
+ {
+ ps_curr_out->i4_complexity_percentage = (WORD32)(
+ (ps_curr_out->i8_acc_num_blks_high_sad * 100) /
+ (ps_curr_out->i8_total_blks));
+ }
+ /*not for Const QP mode*/
+ if(ps_enc_ctxt->ps_stat_prms->s_config_prms.i4_rate_control_mode != 3)
+ {
+ if(ps_curr_inp->s_lap_out.i4_is_prev_pic_in_Tid0_same_scene &&
+ ps_curr_out->i8_total_blks &&
+ (((float)(ps_curr_out->i8_acc_num_blks_high_sad * 100) /
+ (ps_curr_out->i8_total_blks)) > (i4_cmplx_change_detection_thrsh)))
+ {
+ ps_curr_out->i4_is_high_complex_region = 1;
+ }
+ else
+ {
+ ps_curr_out->i4_is_high_complex_region = 0;
+ }
+ }
+ ps_curr_inp->s_rc_lap_out.i8_frame_acc_coarse_me_cost =
+ ps_curr_out->i8_acc_frame_coarse_me_cost;
+ /*check for I only reset case and Non I SCD*/
+ ihevce_rc_check_non_lap_scd(
+ ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[0], &ps_curr_inp->s_rc_lap_out);
+ }
+ }
+ /* release mutex lock after rate control calls */
+ osal_mutex_unlock(ps_enc_ctxt->pv_rc_mutex_lock_hdl);
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_frame_init \endif
+*
+* \brief
+* Pre encode Frame processing slave thread entry point function
+*
+* \param[in] Frame processing thread context pointer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_frame_init(
+ enc_ctxt_t *ps_enc_ctxt,
+ pre_enc_me_ctxt_t *ps_curr_inp_prms,
+ me_enc_rdopt_ctxt_t *ps_cur_out_me_prms,
+ WORD32 i4_cur_frame_qp,
+ WORD32 i4_me_frm_id,
+ WORD32 i4_thrd_id)
+{
+ ihevce_lap_enc_buf_t *ps_curr_inp;
+ WORD32 first_field = 1;
+ me_master_ctxt_t *ps_master_ctxt;
+
+ (void)i4_thrd_id;
+ (void)ps_cur_out_me_prms;
+ ps_curr_inp = ps_curr_inp_prms->ps_curr_inp;
+
+ ps_master_ctxt = (me_master_ctxt_t *)ps_enc_ctxt->s_module_ctxt.pv_me_ctxt;
+
+ /* get frame level lambda params */
+ ihevce_get_frame_lambda_prms(
+ ps_enc_ctxt,
+ ps_curr_inp_prms,
+ i4_cur_frame_qp,
+ first_field,
+ ps_curr_inp->s_lap_out.i4_is_ref_pic,
+ ps_curr_inp->s_lap_out.i4_temporal_lyr_id,
+ ps_curr_inp->s_lap_out.f_i_pic_lamda_modifier,
+ 0,
+ ENC_LAMBDA_TYPE);
+
+ if(1 == ps_curr_inp_prms->i4_frm_proc_valid_flag)
+ {
+ UWORD8 i1_cu_qp_delta_enabled_flag =
+ ps_enc_ctxt->ps_stat_prms->s_config_prms.i4_cu_level_rc;
+
+ /* picture level init of ME */
+ ihevce_me_frame_init(
+ ps_enc_ctxt->s_module_ctxt.pv_me_ctxt,
+ ps_cur_out_me_prms,
+ ps_enc_ctxt->ps_stat_prms,
+ &ps_enc_ctxt->s_frm_ctb_prms,
+ &ps_curr_inp_prms->as_lambda_prms[0],
+ ps_enc_ctxt->i4_num_ref_l0,
+ ps_enc_ctxt->i4_num_ref_l1,
+ ps_enc_ctxt->i4_num_ref_l0_active,
+ ps_enc_ctxt->i4_num_ref_l1_active,
+ &ps_cur_out_me_prms->aps_ref_list[0][LIST_0][0],
+ &ps_cur_out_me_prms->aps_ref_list[0][LIST_1][0],
+ ps_cur_out_me_prms->aps_ref_list[0],
+ &ps_enc_ctxt->s_func_selector,
+ ps_curr_inp,
+ ps_curr_inp_prms->pv_me_lyr_ctxt,
+ i4_me_frm_id,
+ i4_thrd_id,
+ i4_cur_frame_qp,
+ ps_curr_inp->s_lap_out.i4_temporal_lyr_id,
+ i1_cu_qp_delta_enabled_flag,
+ ps_enc_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_me_frm_id]->pv_dep_mngr_encloop_dep_me);
+
+ /* -------------------------------------------------------- */
+ /* Preparing Job Queue for ME and each instance of enc_loop */
+ /* -------------------------------------------------------- */
+ ihevce_prepare_job_queue(ps_enc_ctxt, ps_curr_inp, i4_me_frm_id);
+
+ /* Dep. Mngr : Reset the num ctb processed in every row for ENC sync */
+ ihevce_dmgr_rst_row_row_sync(
+ ps_enc_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_me_frm_id]->pv_dep_mngr_encloop_dep_me);
+ }
+}
+
+/****************************************************************************
+Function Name : ihevce_rc_close
+Description : closing the Rate control by passing the stored data in to the stat file for 2 pass encoding.
+Inputs :
+Globals :
+Processing :
+Outputs :
+Returns :
+Issues :
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+
+void ihevce_rc_close(
+ enc_ctxt_t *ps_enc_ctxt,
+ WORD32 i4_enc_frm_id_rc,
+ WORD32 i4_store_retrive,
+ WORD32 i4_update_cnt,
+ WORD32 i4_bit_rate_idx)
+{
+ rc_bits_sad_t s_rc_frame_stat;
+ WORD32 out_buf_id;
+ WORD32 i4_pic_type, k;
+ WORD32 cur_qp;
+ ihevce_lap_output_params_t s_lap_out;
+ rc_lap_out_params_t s_rc_lap_out;
+
+ for(k = 0; k < i4_update_cnt; k++) //ELP_RC
+ {
+ ihevce_rc_store_retrive_update_info(
+ (void *)ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[i4_bit_rate_idx],
+ &s_rc_frame_stat,
+ i4_enc_frm_id_rc,
+ i4_bit_rate_idx,
+ 2,
+ &out_buf_id,
+ &i4_pic_type,
+ &cur_qp,
+ (void *)&s_lap_out,
+ (void *)&s_rc_lap_out);
+
+ ihevce_rc_update_pic_info(
+ (void *)ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[i4_bit_rate_idx],
+ (s_rc_frame_stat.u4_total_texture_bits +
+ s_rc_frame_stat.u4_total_header_bits), //pass total bits
+ s_rc_frame_stat.u4_total_header_bits,
+ s_rc_frame_stat.u4_total_sad,
+ s_rc_frame_stat.u4_total_intra_sad,
+ (IV_PICTURE_CODING_TYPE_T)i4_pic_type,
+ cur_qp,
+ 0,
+ s_rc_frame_stat.i4_qp_normalized_8x8_cu_sum,
+ s_rc_frame_stat.i4_8x8_cu_sum,
+ s_rc_frame_stat.i8_sad_by_qscale,
+ &s_lap_out,
+ &s_rc_lap_out,
+ out_buf_id,
+ s_rc_frame_stat.u4_open_loop_intra_sad,
+ s_rc_frame_stat.i8_total_ssd_frame,
+ i4_enc_frm_id_rc); //ps_curr_out->i4_inp_timestamp_low)
+ i4_enc_frm_id_rc++;
+ i4_enc_frm_id_rc = (i4_enc_frm_id_rc % ps_enc_ctxt->i4_max_fr_enc_loop_parallel_rc);
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_enc_frm_proc_slave_thrd \endif
+*
+* \brief
+* Enocde Frame processing slave thread entry point function
+*
+* \param[in] Frame processing thread context pointer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32 ihevce_enc_frm_proc_slave_thrd(void *pv_frm_proc_thrd_ctxt)
+{
+ frm_proc_thrd_ctxt_t *ps_thrd_ctxt;
+ enc_ctxt_t *ps_enc_ctxt;
+ WORD32 i4_me_end_flag, i4_enc_end_flag;
+ WORD32 i4_thrd_id;
+ ihevce_hle_ctxt_t *ps_hle_ctxt;
+ WORD32 i4_num_bitrates; //number of bit-rates instances running
+ WORD32 i; //ctr
+ void *pv_dep_mngr_prev_frame_me_done;
+ void *pv_dep_mngr_prev_frame_done;
+ WORD32 i4_resolution_id;
+ WORD32 i4_enc_frm_id_rc = 0;
+ WORD32 i4_enc_frm_id = 0;
+ WORD32 i4_me_frm_id = 0;
+
+ ps_thrd_ctxt = (frm_proc_thrd_ctxt_t *)pv_frm_proc_thrd_ctxt;
+ ps_hle_ctxt = ps_thrd_ctxt->ps_hle_ctxt;
+ ps_enc_ctxt = (enc_ctxt_t *)ps_thrd_ctxt->pv_enc_ctxt; /*Changed for mres*/
+ i4_thrd_id = ps_thrd_ctxt->i4_thrd_id;
+ i4_me_end_flag = 0;
+ i4_enc_end_flag = 0;
+ i4_num_bitrates = ps_enc_ctxt->i4_num_bitrates;
+ i4_resolution_id = ps_enc_ctxt->i4_resolution_id;
+
+ /*pv_dep_mngr_prev_frame_me_done =
+ ps_enc_ctxt->s_multi_thrd.pv_dep_mngr_prev_frame_me_done;*/
+
+ while((0 == i4_me_end_flag) && (0 == i4_enc_end_flag))
+ {
+ WORD32 result;
+ WORD32 ai4_in_buf_id[MAX_NUM_ME_PARALLEL];
+ me_enc_rdopt_ctxt_t *ps_curr_out_me;
+
+ if(1 == ps_enc_ctxt->s_multi_thrd.i4_num_me_frm_pllel)
+ {
+ pv_dep_mngr_prev_frame_me_done =
+ ps_enc_ctxt->s_multi_thrd.apv_dep_mngr_prev_frame_me_done[0];
+ }
+ else
+ {
+ pv_dep_mngr_prev_frame_me_done =
+ ps_enc_ctxt->s_multi_thrd.apv_dep_mngr_prev_frame_me_done[i4_me_frm_id];
+ }
+
+ /* Wait till the previous frame ME is completly done*/
+ {
+ ihevce_dmgr_chk_frm_frm_sync(pv_dep_mngr_prev_frame_me_done, ps_thrd_ctxt->i4_thrd_id);
+ }
+
+ /****** Lock the critical section ******/
+ if(NULL != ps_enc_ctxt->s_multi_thrd.apv_mutex_handle[i4_me_frm_id])
+ {
+ result = osal_mutex_lock(ps_enc_ctxt->s_multi_thrd.apv_mutex_handle[i4_me_frm_id]);
+
+ if(OSAL_SUCCESS != result)
+ return 0;
+ }
+
+ {
+ /************************************/
+ /****** ENTER CRITICAL SECTION ******/
+ /************************************/
+
+ /* First slave getting the mutex lock will act as master and does ME init
+ * of current frame and other slaves skip it
+ */
+ if(ps_enc_ctxt->s_multi_thrd.ai4_me_master_done_flag[i4_me_frm_id] == 0)
+ {
+ WORD32 i4_ref_cur_qp; //current frame Qp for reference bit-rate instance
+ ihevce_lap_enc_buf_t *ps_curr_inp = NULL;
+
+ if(0 == i4_me_end_flag)
+ {
+ /* ------- get the input prms buffer from pre encode que ------------ */
+ ps_enc_ctxt->s_multi_thrd.aps_cur_inp_me_prms[i4_me_frm_id] =
+ (pre_enc_me_ctxt_t *)ihevce_q_get_filled_buff(
+ (void *)ps_enc_ctxt,
+ IHEVCE_PRE_ENC_ME_Q,
+ &ai4_in_buf_id[i4_me_frm_id],
+ BUFF_QUE_BLOCKING_MODE);
+ /*always buffer must be available*/
+ ASSERT(ps_enc_ctxt->s_multi_thrd.aps_cur_inp_me_prms[i4_me_frm_id] != NULL);
+
+ ps_enc_ctxt->s_multi_thrd.is_in_buf_freed[i4_enc_frm_id] = 0;
+
+ /* ------- get the input prms buffer from L0 IPE queue ------------ */
+ ps_enc_ctxt->s_multi_thrd.aps_cur_L0_ipe_inp_prms[i4_me_frm_id] =
+ (pre_enc_L0_ipe_encloop_ctxt_t *)ihevce_q_get_filled_buff(
+ (void *)ps_enc_ctxt,
+ IHEVCE_L0_IPE_ENC_Q,
+ &ps_enc_ctxt->s_multi_thrd.ai4_in_frm_l0_ipe_id[i4_me_frm_id],
+ BUFF_QUE_BLOCKING_MODE);
+
+ /*always buffer must be available*/
+ ASSERT(ps_enc_ctxt->s_multi_thrd.aps_cur_L0_ipe_inp_prms[i4_me_frm_id] != NULL);
+
+ /* ------- get the free buffer from me_enc que ------------ */
+ ps_enc_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_me_frm_id] =
+ (me_enc_rdopt_ctxt_t *)ihevce_q_get_free_buff(
+ ps_enc_ctxt,
+ IHEVCE_ME_ENC_RDOPT_Q,
+ &ps_enc_ctxt->s_multi_thrd.ai4_me_out_buf_id[i4_me_frm_id],
+ BUFF_QUE_BLOCKING_MODE);
+
+ /*always buffer must be available*/
+ ASSERT(ps_enc_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_me_frm_id] != NULL);
+ }
+ if(NULL != ps_enc_ctxt->s_multi_thrd.aps_cur_inp_me_prms[i4_me_frm_id] &&
+ NULL != ps_enc_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_me_frm_id] &&
+ NULL != ps_enc_ctxt->s_multi_thrd.aps_cur_L0_ipe_inp_prms[i4_me_frm_id])
+ {
+ ps_curr_inp =
+ ps_enc_ctxt->s_multi_thrd.aps_cur_inp_me_prms[i4_me_frm_id]->ps_curr_inp;
+
+ ps_curr_out_me = ps_enc_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_me_frm_id];
+
+ ps_curr_out_me->ps_curr_inp_from_l0_ipe_prms =
+ ps_enc_ctxt->s_multi_thrd.aps_cur_L0_ipe_inp_prms[i4_me_frm_id];
+
+ /*initialization of curr out me*/
+ ps_curr_out_me->ps_curr_inp_from_me_prms =
+ ps_enc_ctxt->s_multi_thrd.aps_cur_inp_me_prms[i4_me_frm_id];
+
+ ps_curr_out_me->curr_inp_from_me_buf_id = ai4_in_buf_id[i4_me_frm_id];
+
+ ps_curr_out_me->i4_buf_id =
+ ps_enc_ctxt->s_multi_thrd.ai4_me_out_buf_id[i4_me_frm_id];
+
+ ps_curr_out_me->ps_curr_inp =
+ ps_enc_ctxt->s_multi_thrd.aps_cur_inp_me_prms[i4_me_frm_id]->ps_curr_inp;
+
+ ps_curr_out_me->curr_inp_buf_id =
+ ps_enc_ctxt->s_multi_thrd.aps_cur_inp_me_prms[i4_me_frm_id]->curr_inp_buf_id;
+
+ ps_curr_out_me->curr_inp_from_l0_ipe_buf_id =
+ ps_enc_ctxt->s_multi_thrd.ai4_in_frm_l0_ipe_id[i4_me_frm_id];
+
+ ps_curr_out_me->i4_frm_proc_valid_flag =
+ ps_enc_ctxt->s_multi_thrd.aps_cur_inp_me_prms[i4_me_frm_id]
+ ->i4_frm_proc_valid_flag;
+
+ ps_curr_out_me->i4_end_flag =
+ ps_enc_ctxt->s_multi_thrd.aps_cur_inp_me_prms[i4_me_frm_id]->i4_end_flag;
+
+ /* set the parameters for sync b/w entropy thread */
+
+ ps_enc_ctxt->s_multi_thrd.me_end_flag =
+ ps_enc_ctxt->s_multi_thrd.aps_cur_inp_me_prms[i4_me_frm_id]->i4_end_flag;
+
+ /* do the processing if input frm data is valid */
+ if(1 == ps_curr_inp->s_input_buf.i4_inp_frm_data_valid_flag)
+ {
+ /* slice header will be populated in pre-enocde stage */
+ memcpy(
+ &ps_enc_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_me_frm_id]
+ ->s_slice_hdr,
+ &ps_enc_ctxt->s_multi_thrd.aps_cur_inp_me_prms[i4_me_frm_id]
+ ->s_slice_hdr,
+ sizeof(slice_header_t));
+
+ if(ps_enc_ctxt->s_multi_thrd.aps_cur_inp_me_prms[i4_me_frm_id]
+ ->i4_frm_proc_valid_flag)
+ {
+ WORD32 ctr;
+ recon_pic_buf_t *ps_frm_recon;
+ for(i = 0; i < i4_num_bitrates; i++)
+ {
+ /* run a loop to free the non used reference pics */
+ /* This is done here because its assured that recon buf
+ * between app and encode loop is set as produced
+ */
+ {
+ WORD32 i4_free_id;
+ i4_free_id = ihevce_find_free_indx(
+ ps_enc_ctxt->pps_recon_buf_q[i],
+ ps_enc_ctxt->ai4_num_buf_recon_q[i]);
+
+ if(i4_free_id != -1)
+ {
+ ps_enc_ctxt->pps_recon_buf_q[i][i4_free_id]->i4_is_free = 1;
+ ps_enc_ctxt->pps_recon_buf_q[i][i4_free_id]->i4_poc = -1;
+ }
+ }
+
+ ps_frm_recon = NULL;
+ for(ctr = 0; ctr < ps_enc_ctxt->ai4_num_buf_recon_q[i]; ctr++)
+ {
+ if(ps_enc_ctxt->pps_recon_buf_q[i][ctr]->i4_is_free)
+ {
+ ps_frm_recon = ps_enc_ctxt->pps_recon_buf_q[i][ctr];
+ break;
+ }
+ }
+ ASSERT(ps_frm_recon != NULL);
+
+ ps_frm_recon->i4_is_free = 0;
+ ps_frm_recon->i4_non_ref_free_flag = 0;
+ ps_frm_recon->i4_topfield_first =
+ ps_curr_inp->s_input_buf.i4_topfield_first;
+ ps_frm_recon->i4_poc = ps_curr_inp->s_lap_out.i4_poc;
+ ps_frm_recon->i4_pic_type = ps_curr_inp->s_lap_out.i4_pic_type;
+ ps_frm_recon->i4_display_num =
+ ps_curr_inp->s_lap_out.i4_display_num;
+ ps_frm_recon->i4_idr_gop_num =
+ ps_curr_inp->s_lap_out.i4_idr_gop_num;
+ ps_frm_recon->i4_bottom_field =
+ ps_curr_inp->s_input_buf.i4_bottom_field;
+ ps_frm_recon->i4_is_reference =
+ ps_curr_inp->s_lap_out.i4_is_ref_pic;
+
+ {
+ WORD32 sei_hash_enabled =
+ (ps_enc_ctxt->ps_stat_prms->s_out_strm_prms
+ .i4_sei_enable_flag == 1) &&
+ (ps_enc_ctxt->ps_stat_prms->s_out_strm_prms
+ .i4_decoded_pic_hash_sei_flag != 0);
+
+ /* Deblock a picture for all reference frames unconditionally. */
+ /* Deblock non ref if psnr compute or save recon is enabled */
+ ps_frm_recon->i4_deblk_pad_hpel_cur_pic =
+ ps_frm_recon->i4_is_reference ||
+ (ps_enc_ctxt->ps_stat_prms->i4_save_recon) ||
+ (1 == sei_hash_enabled);
+ }
+
+ ps_frm_recon->s_yuv_buf_desc.i4_y_ht =
+ ps_enc_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_ht;
+ ps_frm_recon->s_yuv_buf_desc.i4_uv_ht =
+ ps_enc_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_ht >>
+ ((ps_enc_ctxt->s_runtime_src_prms.i4_chr_format ==
+ IV_YUV_422SP_UV)
+ ? 0
+ : 1);
+ ps_frm_recon->s_yuv_buf_desc.i4_y_wd =
+ ps_enc_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_wd;
+ ps_frm_recon->s_yuv_buf_desc.i4_uv_wd =
+ ps_enc_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_wd;
+ ps_frm_recon->s_yuv_buf_desc.i4_y_strd =
+ ps_enc_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_wd +
+ (PAD_HORZ << 1);
+ ps_frm_recon->s_yuv_buf_desc.i4_uv_strd =
+ ps_enc_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_wd +
+ (PAD_HORZ << 1);
+
+ /* reset the row_frm dep mngr for ME reverse sync for reference bitrate */
+ if(i == 0)
+ {
+ ihevce_dmgr_map_rst_sync(ps_frm_recon->pv_dep_mngr_recon);
+ }
+
+ ps_enc_ctxt->s_multi_thrd.ps_frm_recon[i4_enc_frm_id][i] =
+ ps_frm_recon;
+ }
+ }
+ /* Reference buffer management and reference list creation */
+ /* This needs to be created for each bit-rate since the reconstructed output is
+ different for all bit-rates. ME uses only 0th instnace ref list */
+ for(i = i4_num_bitrates - 1; i >= 0; i--)
+ {
+ ihevce_manage_ref_pics(
+ ps_enc_ctxt,
+ ps_curr_inp,
+ &ps_enc_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_me_frm_id]
+ ->s_slice_hdr,
+ i4_me_frm_id,
+ i4_thrd_id,
+ i); /* bitrate instance ID */
+ }
+ /*query of qp to be moved just before encoding starts*/
+ i4_ref_cur_qp = ps_enc_ctxt->s_multi_thrd.aps_cur_inp_me_prms[i4_me_frm_id]
+ ->i4_curr_frm_qp;
+ /* The Qp populated in Pre enc stage needs to overwritten with Qp
+ queried from rate control*/
+ }
+ else
+ {
+ i4_ref_cur_qp = 0;
+ }
+
+ /* call the core encoding loop */
+ ihevce_frame_init(
+ ps_enc_ctxt,
+ ps_enc_ctxt->s_multi_thrd.aps_cur_inp_me_prms[i4_me_frm_id],
+ ps_enc_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_me_frm_id],
+ i4_ref_cur_qp,
+ i4_me_frm_id,
+ i4_thrd_id);
+ }
+
+ ps_enc_ctxt->s_multi_thrd.ai4_me_master_done_flag[i4_me_frm_id] = 1;
+ }
+ }
+
+ /************************************/
+ /****** EXIT CRITICAL SECTION ******/
+ /************************************/
+
+ /****** Unlock the critical section ******/
+ if(NULL != ps_enc_ctxt->s_multi_thrd.apv_mutex_handle[i4_me_frm_id])
+ {
+ result = osal_mutex_unlock(ps_enc_ctxt->s_multi_thrd.apv_mutex_handle[i4_me_frm_id]);
+ if(OSAL_SUCCESS != result)
+ return 0;
+ }
+
+ if((1 == ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.i4_mres_single_out) &&
+ (1 == ps_enc_ctxt->s_multi_thrd.aps_cur_inp_me_prms[i4_me_frm_id]
+ ->ps_curr_inp->s_lap_out.i4_first_frm_new_res))
+ {
+ /* Reset the enc frame rc id whenver change in resolution happens */
+ i4_enc_frm_id_rc = 0;
+ }
+
+ /*update end flag for each thread */
+ i4_me_end_flag = ps_enc_ctxt->s_multi_thrd.aps_cur_inp_me_prms[i4_me_frm_id]->i4_end_flag;
+ if(NULL != ps_enc_ctxt->s_multi_thrd.aps_cur_inp_me_prms[i4_me_frm_id] &&
+ NULL != ps_enc_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_me_frm_id] &&
+ NULL != ps_enc_ctxt->s_multi_thrd.aps_cur_L0_ipe_inp_prms[i4_me_frm_id])
+ {
+ pre_enc_me_ctxt_t *ps_curr_inp_prms;
+ pre_enc_L0_ipe_encloop_ctxt_t *ps_curr_L0_IPE_inp_prms;
+ ihevce_lap_enc_buf_t *ps_curr_inp;
+
+ /* get the current buffer pointer */
+ ps_curr_inp_prms = ps_enc_ctxt->s_multi_thrd.aps_cur_inp_me_prms[i4_me_frm_id];
+ ps_curr_L0_IPE_inp_prms =
+ ps_enc_ctxt->s_multi_thrd.aps_cur_L0_ipe_inp_prms[i4_me_frm_id];
+ ps_curr_inp = ps_enc_ctxt->s_multi_thrd.aps_cur_inp_me_prms[i4_me_frm_id]->ps_curr_inp;
+
+ /* -------------------------------------------------- */
+ /* Motion estimation (enc layer) of entire frame */
+ /* -------------------------------------------------- */
+ if((i4_me_end_flag == 0) &&
+ (1 ==
+ ps_enc_ctxt->s_multi_thrd.aps_cur_inp_me_prms[i4_me_frm_id]->i4_frm_proc_valid_flag))
+ {
+ /* Init i4_is_prev_frame_reference for the next P-frame */
+ me_master_ctxt_t *ps_master_ctxt =
+ (me_master_ctxt_t *)ps_enc_ctxt->s_module_ctxt.pv_me_ctxt;
+
+ /* get the current thread ctxt pointer */
+ me_ctxt_t *ps_ctxt = ps_master_ctxt->aps_me_ctxt[i4_thrd_id];
+
+ me_frm_ctxt_t *ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i4_me_frm_id];
+
+ if(ISLICE != ps_enc_ctxt->s_multi_thrd.aps_cur_inp_me_prms[i4_me_frm_id]
+ ->s_slice_hdr.i1_slice_type)
+ {
+ ihevce_me_process(
+ ps_enc_ctxt->s_module_ctxt.pv_me_ctxt,
+ ps_curr_inp,
+ ps_curr_inp_prms->ps_ctb_analyse,
+ ps_enc_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_me_frm_id],
+ ps_curr_inp_prms->plf_intra_8x8_cost,
+ ps_curr_L0_IPE_inp_prms->ps_ipe_analyse_ctb,
+ ps_curr_L0_IPE_inp_prms,
+ ps_curr_inp_prms->pv_me_lyr_ctxt,
+ &ps_enc_ctxt->s_multi_thrd,
+ ((ps_enc_ctxt->s_multi_thrd.i4_num_me_frm_pllel == 1) ? 0 : 1),
+ i4_thrd_id,
+ i4_me_frm_id);
+ }
+ else
+
+ {
+ /* Init i4_is_prev_frame_reference for the next P-frame */
+ me_master_ctxt_t *ps_master_ctxt =
+ (me_master_ctxt_t *)ps_enc_ctxt->s_module_ctxt.pv_me_ctxt;
+
+ /* get the current thread ctxt pointer */
+ me_ctxt_t *ps_ctxt = ps_master_ctxt->aps_me_ctxt[i4_thrd_id];
+
+ me_frm_ctxt_t *ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i4_me_frm_id];
+
+ multi_thrd_ctxt_t *ps_multi_thrd_ctxt = &ps_enc_ctxt->s_multi_thrd;
+
+ if(ps_enc_ctxt->s_multi_thrd.i4_num_me_frm_pllel != 1)
+ {
+ ps_frm_ctxt->i4_is_prev_frame_reference = 0;
+ }
+ else
+ {
+ ps_frm_ctxt->i4_is_prev_frame_reference =
+ ps_multi_thrd_ctxt->aps_cur_inp_me_prms[i4_me_frm_id]
+ ->ps_curr_inp->s_lap_out.i4_is_ref_pic;
+ }
+ }
+ }
+ }
+ /************************************/
+ /****** ENTER CRITICAL SECTION *****/
+ /************************************/
+ {
+ WORD32 result_frame_init;
+ void *pv_mutex_handle_frame_init;
+
+ /* Create mutex for locking non-reentrant sections */
+ pv_mutex_handle_frame_init =
+ ps_enc_ctxt->s_multi_thrd.apv_mutex_handle_me_end[i4_me_frm_id];
+
+ /****** Lock the critical section ******/
+ if(NULL != pv_mutex_handle_frame_init)
+ {
+ result_frame_init = osal_mutex_lock(pv_mutex_handle_frame_init);
+
+ if(OSAL_SUCCESS != result_frame_init)
+ return 0;
+ }
+ }
+
+ if(0 == ps_enc_ctxt->s_multi_thrd.ai4_me_enc_buff_prod_flag[i4_me_frm_id])
+ {
+ /* ------- set buffer produced from me_enc que ------------ */
+ ihevce_q_set_buff_prod(
+ ps_enc_ctxt,
+ IHEVCE_ME_ENC_RDOPT_Q,
+ ps_enc_ctxt->s_multi_thrd.ai4_me_out_buf_id[i4_me_frm_id]);
+
+ ps_enc_ctxt->s_multi_thrd.ai4_me_enc_buff_prod_flag[i4_me_frm_id] = 1;
+ }
+ if(NULL != ps_enc_ctxt->s_multi_thrd.aps_cur_inp_me_prms[i4_me_frm_id] &&
+ NULL != ps_enc_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_me_frm_id])
+ {
+ ihevce_lap_enc_buf_t *ps_curr_inp;
+
+ WORD32 first_field = 1;
+
+ /* Increment the counter to keep track of no of threads exiting the current mutex*/
+ ps_enc_ctxt->s_multi_thrd.me_num_thrds_exited[i4_me_frm_id]++;
+
+ ps_curr_inp = ps_enc_ctxt->s_multi_thrd.aps_cur_inp_me_prms[i4_me_frm_id]->ps_curr_inp;
+ /* Last slave thread will reset the master done frame init flag and set the prev
+ * frame me done flag for curr frame
+ */
+ if(ps_enc_ctxt->s_multi_thrd.me_num_thrds_exited[i4_me_frm_id] ==
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds)
+ {
+ ps_enc_ctxt->s_multi_thrd.me_num_thrds_exited[i4_me_frm_id] = 0;
+
+ ps_enc_ctxt->s_multi_thrd.ai4_me_master_done_flag[i4_me_frm_id] = 0;
+
+ /* Update Dyn. Vert. Search prms for P Pic. */
+ if(IV_P_FRAME == ps_curr_inp->s_lap_out.i4_pic_type)
+ {
+ WORD32 i4_idx_dvsr_p = ps_enc_ctxt->s_multi_thrd.i4_idx_dvsr_p;
+ /* Sanity Check */
+ ASSERT(ps_curr_inp->s_lap_out.i4_pic_type < IV_IP_FRAME);
+
+ /* Frame END processing for Dynamic Vertival Search */
+ ihevce_l0_me_frame_end(
+ ps_enc_ctxt->s_module_ctxt.pv_me_ctxt,
+ i4_idx_dvsr_p,
+ ps_curr_inp->s_lap_out.i4_display_num,
+ i4_me_frm_id);
+
+ ps_enc_ctxt->s_multi_thrd.i4_idx_dvsr_p++;
+ if(ps_enc_ctxt->s_multi_thrd.i4_idx_dvsr_p == NUM_SG_INTERLEAVED)
+ {
+ ps_enc_ctxt->s_multi_thrd.i4_idx_dvsr_p = 0;
+ }
+ }
+ if(1 == ps_enc_ctxt->s_multi_thrd.aps_cur_inp_me_prms[i4_me_frm_id]
+ ->i4_frm_proc_valid_flag)
+ {
+ /* Init i4_is_prev_frame_reference for the next P-frame */
+ me_master_ctxt_t *ps_master_ctxt =
+ (me_master_ctxt_t *)ps_enc_ctxt->s_module_ctxt.pv_me_ctxt;
+
+ /* get the current thread ctxt pointer */
+ me_ctxt_t *ps_ctxt = ps_master_ctxt->aps_me_ctxt[i4_thrd_id];
+
+ me_frm_ctxt_t *ps_frm_ctxt = ps_ctxt->aps_me_frm_prms[i4_me_frm_id];
+
+ ps_frm_ctxt->ps_curr_descr->aps_layers[0]->i4_non_ref_free = 1;
+ }
+ ps_enc_ctxt->s_multi_thrd.aps_cur_inp_me_prms[i4_me_frm_id] = NULL;
+ ps_enc_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_me_frm_id] = NULL;
+ ps_enc_ctxt->s_multi_thrd.aps_cur_L0_ipe_inp_prms[i4_me_frm_id] = NULL;
+ ps_enc_ctxt->s_multi_thrd.ai4_me_enc_buff_prod_flag[i4_me_frm_id] = 0;
+ ps_enc_ctxt->s_multi_thrd.ai4_me_master_done_flag[i4_me_frm_id] = 0;
+
+ /* Set me processing done for curr frame in the dependency manager */
+ ihevce_dmgr_update_frm_frm_sync(pv_dep_mngr_prev_frame_me_done);
+ }
+ }
+ /************************************/
+ /****** EXIT CRITICAL SECTION ******/
+ /************************************/
+
+ {
+ void *pv_mutex_handle_frame_init;
+
+ /* Create mutex for locking non-reentrant sections */
+ pv_mutex_handle_frame_init =
+ ps_enc_ctxt->s_multi_thrd.apv_mutex_handle_me_end[i4_me_frm_id];
+ /****** Unlock the critical section ******/
+ if(NULL != pv_mutex_handle_frame_init)
+ {
+ result = osal_mutex_unlock(pv_mutex_handle_frame_init);
+ if(OSAL_SUCCESS != result)
+ return 0;
+ }
+ }
+ /* -------------------------------------------- */
+ /* Encode Loop of entire frame */
+ /* -------------------------------------------- */
+ ASSERT(ps_enc_ctxt->s_multi_thrd.i4_num_enc_loop_frm_pllel <= MAX_NUM_ENC_LOOP_PARALLEL);
+
+ if(1 == ps_enc_ctxt->s_multi_thrd.i4_num_enc_loop_frm_pllel)
+ {
+ pv_dep_mngr_prev_frame_done = ps_enc_ctxt->s_multi_thrd.apv_dep_mngr_prev_frame_done[0];
+ }
+ else
+ {
+ pv_dep_mngr_prev_frame_done =
+ ps_enc_ctxt->s_multi_thrd.apv_dep_mngr_prev_frame_done[i4_enc_frm_id];
+ }
+ /* Wait till the prev frame enc loop is completed*/
+ {
+ ihevce_dmgr_chk_frm_frm_sync(pv_dep_mngr_prev_frame_done, ps_thrd_ctxt->i4_thrd_id);
+ }
+
+ /************************************/
+ /****** ENTER CRITICAL SECTION ******/
+ /************************************/
+ {
+ WORD32 result_frame_init;
+ void *pv_mutex_handle_frame_init;
+
+ /* Create mutex for locking non-reentrant sections */
+ pv_mutex_handle_frame_init =
+ ps_enc_ctxt->s_multi_thrd.apv_mutex_handle_frame_init[i4_enc_frm_id];
+
+ /****** Lock the critical section ******/
+ if(NULL != pv_mutex_handle_frame_init)
+ {
+ result_frame_init = osal_mutex_lock(pv_mutex_handle_frame_init);
+
+ if(OSAL_SUCCESS != result_frame_init)
+ return 0;
+ }
+ }
+
+ {
+ ihevce_lap_enc_buf_t *ps_curr_inp = NULL;
+ pre_enc_me_ctxt_t *ps_curr_inp_from_me = NULL;
+ me_enc_rdopt_ctxt_t *ps_curr_inp_enc = NULL;
+ pre_enc_L0_ipe_encloop_ctxt_t *ps_curr_L0_IPE_inp_prms = NULL;
+ recon_pic_buf_t *(*aps_ref_list)[HEVCE_MAX_REF_PICS * 2];
+ WORD32 ai4_cur_qp[IHEVCE_MAX_NUM_BITRATES] = { 0 };
+ WORD32 i4_field_pic = ps_enc_ctxt->s_runtime_src_prms.i4_field_pic;
+ WORD32 first_field = 1;
+ WORD32 result_frame_init;
+ void *pv_mutex_handle_frame_init;
+
+ /* Create mutex for locking non-reentrant sections */
+ pv_mutex_handle_frame_init =
+ ps_enc_ctxt->s_multi_thrd.apv_mutex_handle_frame_init[i4_enc_frm_id];
+
+ //aquire and initialize -> output and recon buffers
+ if(ps_enc_ctxt->s_multi_thrd.enc_master_done_frame_init[i4_enc_frm_id] == 0)
+ {
+ WORD32
+ i4_bitrate_ctr; //bit-rate instance counter (for loop variable) [0->reference bit-rate, 1,2->auxiliarty bit-rates]
+ /* ------- get the input prms buffer from me que ------------ */
+ ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id] =
+ (me_enc_rdopt_ctxt_t *)ihevce_q_get_filled_buff(
+ ps_enc_ctxt,
+ IHEVCE_ME_ENC_RDOPT_Q,
+ &ps_enc_ctxt->s_multi_thrd.i4_enc_in_buf_id[i4_enc_frm_id],
+ BUFF_QUE_BLOCKING_MODE);
+ i4_enc_end_flag =
+ ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id]->i4_end_flag;
+
+ ASSERT(ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id] != NULL);
+
+ if(ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id] != NULL)
+ {
+ ps_curr_inp =
+ ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id]->ps_curr_inp;
+ ps_curr_inp_from_me =
+ ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id]
+ ->ps_curr_inp_from_me_prms;
+ ps_curr_inp_enc = ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id];
+ ps_curr_L0_IPE_inp_prms =
+ ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id]
+ ->ps_curr_inp_from_l0_ipe_prms;
+
+ for(i4_bitrate_ctr = 0; i4_bitrate_ctr < i4_num_bitrates; i4_bitrate_ctr++)
+ {
+ iv_enc_recon_data_buffs_t
+ *ps_recon_out[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES] = {
+ { NULL }
+ };
+ frm_proc_ent_cod_ctxt_t *ps_curr_out[MAX_NUM_ENC_LOOP_PARALLEL]
+ [IHEVCE_MAX_NUM_BITRATES] = { { NULL } };
+
+ /* ------- get free output buffer from Frame buffer que ---------- */
+ /* There is a separate queue for each bit-rate instnace. The output
+ buffer is acquired from the corresponding queue based on the
+ bitrate instnace */
+ ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr] =
+ (frm_proc_ent_cod_ctxt_t *)ihevce_q_get_free_buff(
+ (void *)ps_enc_ctxt,
+ IHEVCE_FRM_PRS_ENT_COD_Q +
+ i4_bitrate_ctr, /*decides the buffer queue */
+ &ps_enc_ctxt->s_multi_thrd.out_buf_id[i4_enc_frm_id][i4_bitrate_ctr],
+ BUFF_QUE_BLOCKING_MODE);
+ ps_enc_ctxt->s_multi_thrd.is_out_buf_freed[i4_enc_frm_id][i4_bitrate_ctr] =
+ 0;
+ ps_enc_ctxt->s_multi_thrd
+ .ps_curr_out_enc_grp[i4_enc_frm_id][i4_bitrate_ctr] =
+ ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr];
+ //ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr]->i4_enc_order_num = ps_curr_inp->s_lap_out.i4_enc_order_num;
+ /*registered User Data Call*/
+ if(ps_enc_ctxt->ps_stat_prms->s_out_strm_prms.i4_sei_payload_enable_flag)
+ {
+ ihevce_fill_sei_payload(
+ ps_enc_ctxt,
+ ps_curr_inp,
+ ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr]);
+ }
+
+ /*derive end flag and input valid flag in output buffer */
+ if(NULL != ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id])
+ {
+ ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr]->i4_end_flag =
+ ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id]
+ ->i4_end_flag;
+ ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr]->i4_frm_proc_valid_flag =
+ ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id]
+ ->i4_frm_proc_valid_flag;
+
+ ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr]->i4_out_flush_flag =
+ ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id]
+ ->ps_curr_inp->s_lap_out.i4_out_flush_flag;
+ }
+
+ /*derive other parameters in output buffer */
+ if(NULL != ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr] &&
+ (NULL != ps_curr_inp_from_me) &&
+ (1 == ps_curr_inp->s_input_buf.i4_inp_frm_data_valid_flag) &&
+ (i4_enc_end_flag == 0))
+ {
+ /* copy the time stamps from inp to entropy inp */
+ ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr]->i4_inp_timestamp_low =
+ ps_curr_inp_from_me->i4_inp_timestamp_low;
+ ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr]->i4_inp_timestamp_high =
+ ps_curr_inp_from_me->i4_inp_timestamp_high;
+ ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr]->pv_app_frm_ctxt =
+ ps_curr_inp_from_me->pv_app_frm_ctxt;
+
+ /*copy slice header params from temp structure to output buffer */
+ memcpy(
+ &ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr]->s_slice_hdr,
+ &ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id]
+ ->s_slice_hdr,
+ sizeof(slice_header_t));
+
+ ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr]
+ ->s_slice_hdr.pu4_entry_point_offset =
+ &ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr]
+ ->ai4_entry_point_offset[0];
+
+ ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr]->i4_slice_nal_type =
+ ps_curr_inp_from_me->i4_slice_nal_type;
+
+ /* populate sps, vps and pps pointers for the entropy input params */
+ ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr]->ps_pps =
+ &ps_enc_ctxt->as_pps[i4_bitrate_ctr];
+ ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr]->ps_sps =
+ &ps_enc_ctxt->as_sps[i4_bitrate_ctr];
+ ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr]->ps_vps =
+ &ps_enc_ctxt->as_vps[i4_bitrate_ctr];
+
+ /* SEI header will be populated in pre-enocde stage */
+ memcpy(
+ &ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr]->s_sei,
+ &ps_curr_inp_from_me->s_sei,
+ sizeof(sei_params_t));
+
+ /*AUD and EOS presnt flags are populated*/
+ ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr]->i1_aud_present_flag =
+ ps_enc_ctxt->ps_stat_prms->s_out_strm_prms.i4_aud_enable_flags;
+
+ ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr]->i1_eos_present_flag =
+ ps_enc_ctxt->ps_stat_prms->s_out_strm_prms.i4_eos_enable_flags;
+
+ /* Information required for SEI Picture timing info */
+ {
+ ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr]->i4_display_num =
+ ps_curr_inp->s_lap_out.i4_display_num;
+ }
+
+ /* The Qp populated in Pre enc stage needs to overwritten with Qp
+ queried from rate control*/
+ ps_curr_out[i4_enc_frm_id][i4_bitrate_ctr]
+ ->s_slice_hdr.i1_slice_qp_delta =
+ (WORD8)ps_curr_inp_from_me->i4_curr_frm_qp -
+ ps_enc_ctxt->as_pps[i4_bitrate_ctr].i1_pic_init_qp;
+ }
+
+ /* ------- get a filled descriptor from output Que ------------ */
+ if(/*(1 == ps_curr_inp->s_input_buf.i4_inp_frm_data_valid_flag) &&*/
+ (ps_enc_ctxt->ps_stat_prms->i4_save_recon != 0))
+ {
+ /*swaping of buf_id for 0th and reference bitrate location, as encoder
+ assumes always 0th loc for reference bitrate and app must receive in
+ the configured order*/
+ WORD32 i4_recon_buf_id = i4_bitrate_ctr;
+ if(i4_bitrate_ctr == 0)
+ {
+ i4_recon_buf_id = ps_enc_ctxt->i4_ref_mbr_id;
+ }
+ else if(i4_bitrate_ctr == ps_enc_ctxt->i4_ref_mbr_id)
+ {
+ i4_recon_buf_id = 0;
+ }
+
+ /* ------- get free Recon buffer from Frame buffer que ---------- */
+ /* There is a separate queue for each bit-rate instnace. The recon
+ buffer is acquired from the corresponding queue based on the
+ bitrate instnace */
+ ps_enc_ctxt->s_multi_thrd.ps_recon_out[i4_enc_frm_id][i4_bitrate_ctr] =
+ (iv_enc_recon_data_buffs_t *)ihevce_q_get_filled_buff(
+ (void *)ps_enc_ctxt,
+ IHEVCE_RECON_DATA_Q +
+ i4_recon_buf_id, /*decides the buffer queue */
+ &ps_enc_ctxt->s_multi_thrd
+ .recon_buf_id[i4_enc_frm_id][i4_bitrate_ctr],
+ BUFF_QUE_BLOCKING_MODE);
+
+ ps_enc_ctxt->s_multi_thrd
+ .is_recon_dumped[i4_enc_frm_id][i4_bitrate_ctr] = 0;
+ ps_recon_out[i4_enc_frm_id][i4_bitrate_ctr] =
+ ps_enc_ctxt->s_multi_thrd
+ .ps_recon_out[i4_enc_frm_id][i4_bitrate_ctr];
+
+ ps_recon_out[i4_enc_frm_id][i4_bitrate_ctr]->i4_end_flag =
+ ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id]
+ ->i4_end_flag;
+ }
+
+ } //bitrate ctr
+ }
+ }
+ if(ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id] != NULL)
+ {
+ ps_curr_inp =
+ ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id]->ps_curr_inp;
+ ps_curr_inp_from_me = ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id]
+ ->ps_curr_inp_from_me_prms;
+ ps_curr_inp_enc = ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id];
+ ps_curr_L0_IPE_inp_prms =
+ ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id]
+ ->ps_curr_inp_from_l0_ipe_prms;
+ }
+ if((NULL != ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id]) &&
+ ((1 == ps_curr_inp_enc->i4_frm_proc_valid_flag) &&
+ (ps_enc_ctxt->s_multi_thrd.enc_master_done_frame_init[i4_enc_frm_id] == 0)))
+ {
+ for(i = 0; i < i4_num_bitrates; i++)
+ {
+ aps_ref_list = ps_curr_inp_enc->aps_ref_list[i];
+ /* acquire mutex lock for rate control calls */
+ osal_mutex_lock(ps_enc_ctxt->pv_rc_mutex_lock_hdl);
+
+ /*utlize the satd data from pre enc stage to get more accurate estimate SAD for I pic*/
+ if(ps_curr_inp->s_lap_out.i4_pic_type == IV_I_FRAME ||
+ ps_curr_inp->s_lap_out.i4_pic_type == IV_IDR_FRAME)
+ {
+ ihevce_rc_update_cur_frm_intra_satd(
+ (void *)ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[i],
+ ps_curr_inp_from_me->i8_frame_acc_satd_cost,
+ ps_enc_ctxt->i4_active_enc_frame_id);
+ }
+
+ /*pels assuming satd/act is obtained for entire frame*/
+ ps_curr_inp->s_rc_lap_out.i4_num_pels_in_frame_considered =
+ ps_curr_inp->s_lap_out.s_input_buf.i4_y_ht *
+ ps_curr_inp->s_lap_out.s_input_buf.i4_y_wd;
+
+ /*Service pending request to change average bitrate if any*/
+ {
+ LWORD64 i8_new_bitrate =
+ ihevce_rc_get_new_bitrate(ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[0]);
+ LWORD64 i8_new_peak_bitrate = ihevce_rc_get_new_peak_bitrate(
+ ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[0]);
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->i8_buf_level_bitrate_change = -1;
+ if((i8_new_bitrate != -1) &&
+ (i8_new_peak_bitrate != -1)) /*-1 indicates no pending request*/
+ {
+ LWORD64 buffer_level = ihevce_rc_change_avg_bitrate(
+ ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[0]);
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->i8_buf_level_bitrate_change = buffer_level;
+ }
+ }
+
+ if((1 == ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.i4_mres_single_out) &&
+ (1 == ps_curr_inp->s_lap_out.i4_first_frm_new_res))
+ {
+ /* Whenver change in resolution happens change the buffer level */
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->i8_buf_level_bitrate_change = 0;
+ }
+#if 1 //KISH ELP
+ {
+ rc_bits_sad_t as_rc_frame_stat[IHEVCE_MAX_NUM_BITRATES];
+
+ if(ps_enc_ctxt->ai4_rc_query[i] ==
+ ps_enc_ctxt->i4_max_fr_enc_loop_parallel_rc) //KISH
+ {
+ WORD32 out_buf_id[IHEVCE_MAX_NUM_BITRATES];
+ WORD32 i4_pic_type;
+ WORD32 cur_qp[IHEVCE_MAX_NUM_BITRATES];
+ ihevce_lap_output_params_t s_lap_out;
+
+ rc_lap_out_params_t s_rc_lap_out;
+ WORD32 i4_suppress_bpic_update;
+
+ ihevce_rc_store_retrive_update_info(
+ (void *)ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[i],
+ &as_rc_frame_stat[i],
+ ps_enc_ctxt->i4_active_enc_frame_id,
+ i,
+ 2,
+ &out_buf_id[i],
+ &i4_pic_type,
+ &cur_qp[i],
+ (void *)&s_lap_out,
+ (void *)&s_rc_lap_out);
+
+ i4_suppress_bpic_update =
+ (WORD32)(s_rc_lap_out.i4_rc_temporal_lyr_id > 1);
+ /*RC inter face update before update to happen only for ELP disabled */
+ if(1 == ps_enc_ctxt->i4_max_fr_enc_loop_parallel_rc)
+ {
+ /* SGI & Enc Loop Parallelism related changes*/
+ ihevce_rc_interface_update(
+ (void *)ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[i],
+ (IV_PICTURE_CODING_TYPE_T)s_rc_lap_out.i4_rc_pic_type,
+ &s_rc_lap_out,
+ cur_qp[i],
+ i4_enc_frm_id_rc);
+ }
+
+ ihevce_rc_update_pic_info(
+ (void *)ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[i],
+ (as_rc_frame_stat[i].u4_total_texture_bits +
+ as_rc_frame_stat[i].u4_total_header_bits), //pass total bits
+ as_rc_frame_stat[i].u4_total_header_bits,
+ as_rc_frame_stat[i].u4_total_sad,
+ as_rc_frame_stat[i].u4_total_intra_sad,
+ (IV_PICTURE_CODING_TYPE_T)i4_pic_type,
+ cur_qp[i],
+ i4_suppress_bpic_update,
+ as_rc_frame_stat[i].i4_qp_normalized_8x8_cu_sum,
+ as_rc_frame_stat[i].i4_8x8_cu_sum,
+ as_rc_frame_stat[i].i8_sad_by_qscale,
+ &s_lap_out,
+ &s_rc_lap_out,
+ out_buf_id[i],
+ as_rc_frame_stat[i].u4_open_loop_intra_sad,
+ as_rc_frame_stat[i].i8_total_ssd_frame,
+ ps_enc_ctxt
+ ->i4_active_enc_frame_id); //ps_curr_out->i4_inp_timestamp_low)
+
+ //DBG_PRINTF("\n Sad = %d \t total bits = %d ", s_rc_frame_stat.u4_total_sad, (s_rc_frame_stat.u4_total_texture_bits + s_rc_frame_stat.u4_total_header_bits));
+ /*populate qp for pre enc*/
+
+ //g_count--;
+ ps_enc_ctxt->ai4_rc_query[i]--;
+
+ if(i == (i4_num_bitrates - 1))
+ {
+ ihevce_rc_cal_pre_enc_qp(
+ (void *)ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[0]);
+
+ ps_enc_ctxt->i4_active_enc_frame_id++;
+ ps_enc_ctxt->i4_active_enc_frame_id =
+ (ps_enc_ctxt->i4_active_enc_frame_id %
+ ps_enc_ctxt->i4_max_fr_enc_loop_parallel_rc);
+ }
+ }
+ }
+#endif
+ if(ps_enc_ctxt->ai4_rc_query[i] < ps_enc_ctxt->i4_max_fr_enc_loop_parallel_rc)
+ {
+ /*HEVC_RC query rate control for qp*/
+ ai4_cur_qp[i] = ihevce_rc_get_pic_quant(
+ (void *)ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[i],
+ &ps_curr_inp->s_rc_lap_out,
+ ENC_GET_QP,
+ i4_enc_frm_id_rc,
+ 0,
+ &ps_curr_inp->s_lap_out.ai4_frame_bits_estimated[i]);
+
+ ps_curr_inp->s_rc_lap_out.i4_orig_rc_qp = ai4_cur_qp[i];
+
+ ps_enc_ctxt->s_multi_thrd.i4_in_frame_rc_enabled = 0;
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->i4_sub_pic_level_rc = 0;
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->ai4_frame_bits_estimated =
+ ps_curr_inp->s_lap_out.ai4_frame_bits_estimated[i];
+
+ {
+ ps_enc_ctxt->ai4_rc_query[i]++;
+ }
+ }
+
+ /* SGI & Enc Loop Parallelism related changes*/
+ ihevce_rc_interface_update(
+ (void *)ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[i],
+ (IV_PICTURE_CODING_TYPE_T)ps_curr_inp->s_lap_out.i4_pic_type,
+ &ps_curr_inp->s_rc_lap_out,
+ ai4_cur_qp[i],
+ i4_enc_frm_id_rc);
+
+ //DBG_PRINTF("HEVC_QP = %d MPEG2_QP = %d\n",cur_qp,gu1_HEVCToMpeg2Quant[cur_qp]);//i_model_print
+
+ /* release mutex lock after rate control calls */
+ osal_mutex_unlock(ps_enc_ctxt->pv_rc_mutex_lock_hdl);
+
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->s_slice_hdr.i1_slice_qp_delta =
+ (WORD8)ai4_cur_qp[i] - ps_enc_ctxt->as_pps[i].i1_pic_init_qp;
+
+ ps_enc_ctxt->s_multi_thrd.cur_qp[i4_enc_frm_id][i] = ai4_cur_qp[i];
+
+ /* For interlace pictures, first_field depends on topfield_first and bottom field */
+ if(i4_field_pic)
+ {
+ first_field =
+ (ps_curr_inp->s_input_buf.i4_topfield_first ^
+ ps_curr_inp->s_input_buf.i4_bottom_field);
+ }
+ /* get frame level lambda params */
+ ihevce_get_frame_lambda_prms(
+ ps_enc_ctxt,
+ ps_curr_inp_from_me,
+ ai4_cur_qp[i],
+ first_field,
+ ps_curr_inp->s_lap_out.i4_is_ref_pic,
+ ps_curr_inp->s_lap_out.i4_temporal_lyr_id,
+ ps_curr_inp->s_lap_out.f_i_pic_lamda_modifier,
+ i,
+ ENC_LOOP_LAMBDA_TYPE);
+
+#if ADAPT_COLOCATED_FROM_L0_FLAG
+ ps_enc_ctxt->s_multi_thrd.ps_frm_recon[i4_enc_frm_id][i]->i4_frame_qp =
+ ai4_cur_qp[i];
+#endif
+ } //bitrate counter ends
+
+ /* Reset the Dependency Mngrs local to EncLoop., ie CU_TopRight and Dblk */
+ ihevce_enc_loop_dep_mngr_frame_reset(
+ ps_enc_ctxt->s_module_ctxt.pv_enc_loop_ctxt, i4_enc_frm_id);
+ }
+
+ {
+ /*Set the master done flag for frame init so that other
+ * threads can skip it
+ */
+ ps_enc_ctxt->s_multi_thrd.enc_master_done_frame_init[i4_enc_frm_id] = 1;
+ }
+
+ /************************************/
+ /****** EXIT CRITICAL SECTION ******/
+ /************************************/
+
+ /****** Unlock the critical section ******/
+ if(NULL != pv_mutex_handle_frame_init)
+ {
+ result_frame_init = osal_mutex_unlock(pv_mutex_handle_frame_init);
+ if(OSAL_SUCCESS != result_frame_init)
+ return 0;
+ }
+ ps_enc_ctxt->s_multi_thrd.i4_encode = 1;
+ ps_enc_ctxt->s_multi_thrd.i4_num_re_enc = 0;
+ /************************************/
+ /****** Do Enc loop process ******/
+ /************************************/
+ /* Each thread will run the enc-loop.
+ Each thread will initialize it's own enc_loop context and do the processing.
+ Each thread will run all the bit-rate instances one after another */
+ if((i4_enc_end_flag == 0) &&
+ (NULL != ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id]) &&
+ (1 == ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id]
+ ->i4_frm_proc_valid_flag))
+ {
+ while(1)
+ {
+ ctb_enc_loop_out_t *ps_ctb_enc_loop_frm[IHEVCE_MAX_NUM_BITRATES];
+ cu_enc_loop_out_t *ps_cu_enc_loop_frm[IHEVCE_MAX_NUM_BITRATES];
+ tu_enc_loop_out_t *ps_tu_frm[IHEVCE_MAX_NUM_BITRATES];
+ pu_t *ps_pu_frm[IHEVCE_MAX_NUM_BITRATES];
+ UWORD8 *pu1_frm_coeffs[IHEVCE_MAX_NUM_BITRATES];
+ me_master_ctxt_t *ps_master_me_ctxt =
+ (me_master_ctxt_t *)ps_enc_ctxt->s_module_ctxt.pv_me_ctxt;
+ ihevce_enc_loop_master_ctxt_t *ps_master_ctxt =
+ (ihevce_enc_loop_master_ctxt_t *)ps_enc_ctxt->s_module_ctxt.pv_enc_loop_ctxt;
+
+ for(i = 0; i < i4_num_bitrates; i++)
+ {
+ if(i4_thrd_id == 0)
+ {
+ PROFILE_START(
+ &ps_hle_ctxt->profile_enc[ps_enc_ctxt->i4_resolution_id][i]);
+ }
+ if(NULL != ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id])
+ {
+ ps_ctb_enc_loop_frm[i] =
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->ps_frm_ctb_data;
+ ps_cu_enc_loop_frm[i] =
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->ps_frm_cu_data;
+ ps_tu_frm[i] =
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->ps_frm_tu_data;
+ ps_pu_frm[i] =
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->ps_frm_pu_data;
+ pu1_frm_coeffs[i] = (UWORD8 *)ps_enc_ctxt->s_multi_thrd
+ .ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->pv_coeff_data;
+ }
+ /*derive reference picture list based on ping or pong instnace */
+ aps_ref_list = ps_curr_inp_enc->aps_ref_list[i];
+
+ /* Always consider chroma cost when computing cost for derived instance */
+ ps_master_ctxt->aps_enc_loop_thrd_ctxt[i4_thrd_id]->i4_consider_chroma_cost =
+ 1;
+
+ /*************************
+ * MULTI BITRATE CODE START
+ **************************/
+ if(i4_num_bitrates > 1)
+ {
+ ihevce_mbr_quality_tool_set_configuration(
+ ps_master_ctxt->aps_enc_loop_thrd_ctxt[i4_thrd_id],
+ ps_enc_ctxt->ps_stat_prms);
+ }
+ /************************
+ * MULTI BITRATE CODE END
+ *************************/
+ /* picture level init of Encode loop module */
+ ihevce_enc_loop_frame_init(
+ ps_enc_ctxt->s_module_ctxt.pv_enc_loop_ctxt,
+ ps_enc_ctxt->s_multi_thrd.cur_qp[i4_enc_frm_id][i],
+ aps_ref_list,
+ ps_enc_ctxt->s_multi_thrd.ps_frm_recon[i4_enc_frm_id][i],
+ &ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->s_slice_hdr,
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]->ps_pps,
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]->ps_sps,
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]->ps_vps,
+ ps_curr_inp_enc->ps_curr_inp->s_lap_out.i1_weighted_pred_flag,
+ ps_curr_inp_enc->ps_curr_inp->s_lap_out.i1_weighted_bipred_flag,
+ ps_curr_inp_enc->ps_curr_inp->s_lap_out.i4_log2_luma_wght_denom,
+ ps_curr_inp_enc->ps_curr_inp->s_lap_out.i4_log2_chroma_wght_denom,
+ ps_curr_inp_enc->ps_curr_inp->s_lap_out.i4_poc,
+ ps_curr_inp_enc->ps_curr_inp->s_lap_out.i4_display_num,
+ ps_enc_ctxt,
+ ps_curr_inp_enc,
+ i,
+ i4_thrd_id,
+ i4_enc_frm_id, // update this to enc_loop_ctxt struct
+ i4_num_bitrates,
+ ps_curr_inp_enc->ps_curr_inp->s_lap_out.i4_quality_preset,
+ ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id]
+ ->pv_dep_mngr_encloop_dep_me);
+
+ ihevce_enc_loop_process(
+ ps_enc_ctxt->s_module_ctxt.pv_enc_loop_ctxt,
+ ps_curr_inp,
+ ps_curr_inp_from_me->ps_ctb_analyse,
+ ps_curr_L0_IPE_inp_prms->ps_ipe_analyse_ctb,
+ ps_enc_ctxt->s_multi_thrd.ps_frm_recon[i4_enc_frm_id][i],
+ ps_curr_inp_enc->ps_cur_ctb_cu_tree,
+ ps_ctb_enc_loop_frm[i],
+ ps_cu_enc_loop_frm[i],
+ ps_tu_frm[i],
+ ps_pu_frm[i],
+ pu1_frm_coeffs[i],
+ &ps_enc_ctxt->s_frm_ctb_prms,
+ &ps_curr_inp_from_me->as_lambda_prms[i],
+ &ps_enc_ctxt->s_multi_thrd,
+ i4_thrd_id,
+ i4_enc_frm_id,
+ ps_enc_ctxt->ps_stat_prms->s_pass_prms.i4_pass);
+ if(i4_thrd_id == 0)
+ {
+ PROFILE_STOP(
+ &ps_hle_ctxt->profile_enc[ps_enc_ctxt->i4_resolution_id][i], NULL);
+ }
+ } //loop over bitrate ends
+ {
+ break;
+ }
+ } /*end of while(ps_enc_ctxt->s_multi_thrd.ai4_encode[i4_enc_frm_id] == 1)*/
+ }
+
+ /************************************/
+ /****** ENTER CRITICAL SECTION ******/
+ /************************************/
+
+ /****** Lock the critical section ******/
+ if(NULL != ps_enc_ctxt->s_multi_thrd.apv_post_enc_mutex_handle[i4_enc_frm_id])
+ {
+ result = osal_mutex_lock(
+ ps_enc_ctxt->s_multi_thrd.apv_post_enc_mutex_handle[i4_enc_frm_id]);
+
+ if(OSAL_SUCCESS != result)
+ return 0;
+ }
+ if(ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id] != NULL)
+ {
+ /* Increment the counter to keep track of no of threads exiting the current mutex*/
+ ps_enc_ctxt->s_multi_thrd.num_thrds_exited[i4_enc_frm_id]++;
+
+ /* If the end frame is reached force the last slave to enter the next critical section*/
+ if(i4_enc_end_flag == 1)
+ {
+ if(ps_enc_ctxt->s_multi_thrd.num_thrds_done ==
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds - 1)
+ {
+ ps_enc_ctxt->s_multi_thrd.num_thrds_exited[i4_enc_frm_id] =
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds;
+ }
+ }
+
+ {
+ /*Last slave thread comming out of enc loop will execute next critical section*/
+ if(ps_enc_ctxt->s_multi_thrd.num_thrds_exited[i4_enc_frm_id] ==
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds)
+ {
+ iv_enc_recon_data_buffs_t *ps_recon_out_temp = NULL;
+ recon_pic_buf_t *ps_frm_recon_temp = NULL;
+ ihevce_lap_enc_buf_t *ps_curr_inp;
+ rc_lap_out_params_t *ps_rc_lap_out_next_encode;
+
+ WORD32 ai4_act_qp[IHEVCE_MAX_NUM_BITRATES];
+ ps_enc_ctxt->s_multi_thrd.num_thrds_exited[i4_enc_frm_id] = 0;
+
+ ps_curr_inp = ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id]
+ ->ps_curr_inp;
+
+ for(i = 0; i < i4_num_bitrates; i++)
+ {
+ {
+ WORD32 j, i4_avg_QP;
+ ihevce_enc_loop_master_ctxt_t *ps_master_ctxt =
+ (ihevce_enc_loop_master_ctxt_t *)
+ ps_enc_ctxt->s_module_ctxt.pv_enc_loop_ctxt;
+ ihevce_enc_loop_ctxt_t *ps_ctxt, *ps_ctxt_temp;
+ ihevce_enc_loop_ctxt_t *ps_ctxt_last_thrd;
+ LWORD64 i8_total_cu_bits_into_qscale = 0, i8_total_cu_bits = 0;
+ UWORD32 total_frame_intra_sad = 0;
+ UWORD32 total_frame_inter_sad = 0;
+ UWORD32 total_frame_sad = 0;
+
+ LWORD64 total_frame_intra_cost = 0;
+ LWORD64 total_frame_inter_cost = 0;
+ LWORD64 total_frame_cost = 0;
+
+ ps_ctxt_last_thrd =
+ ps_master_ctxt->aps_enc_loop_thrd_ctxt[i4_thrd_id];
+ if(ps_enc_ctxt->s_multi_thrd.i4_in_frame_rc_enabled)
+ {
+ WORD32 i4_total_ctb =
+ ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_horz *
+ ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_vert;
+
+ ai4_act_qp[i] =
+ ps_enc_ctxt->s_multi_thrd
+ .ai4_curr_qp_acc[ps_ctxt_last_thrd->i4_enc_frm_id][i] /
+ i4_total_ctb;
+ }
+ else
+ {
+ ai4_act_qp[i] =
+ ps_enc_ctxt->s_multi_thrd.cur_qp[i4_enc_frm_id][i];
+ }
+
+ ps_enc_ctxt->s_multi_thrd
+ .ai4_curr_qp_acc[ps_ctxt_last_thrd->i4_enc_frm_id][i] = 0;
+
+ /*Reset all the values of sub pic rc to default after the frame is completed */
+ {
+ ps_enc_ctxt->s_multi_thrd
+ .ai4_acc_ctb_ctr[ps_ctxt_last_thrd->i4_enc_frm_id][i] = 0;
+ ps_enc_ctxt->s_multi_thrd
+ .ai4_ctb_ctr[ps_ctxt_last_thrd->i4_enc_frm_id][i] = 0;
+
+ ps_enc_ctxt->s_multi_thrd
+ .ai4_threshold_reached[ps_ctxt_last_thrd->i4_enc_frm_id][i] =
+ 0;
+
+ ps_enc_ctxt->s_multi_thrd
+ .ai4_curr_qp_estimated[ps_ctxt_last_thrd->i4_enc_frm_id][i] =
+ (1 << QP_LEVEL_MOD_ACT_FACTOR);
+
+ ps_enc_ctxt->s_multi_thrd
+ .af_acc_hdr_bits_scale_err[ps_ctxt_last_thrd->i4_enc_frm_id]
+ [i] = 0;
+ }
+ for(j = 0; j < ps_master_ctxt->i4_num_proc_thrds; j++)
+ {
+ /* ENC_LOOP state structure */
+ ps_ctxt = ps_master_ctxt->aps_enc_loop_thrd_ctxt[j];
+
+ total_frame_intra_sad +=
+ ps_ctxt
+ ->aaps_enc_loop_rc_params[ps_ctxt_last_thrd
+ ->i4_enc_frm_id][i]
+ ->u4_frame_intra_sad_acc;
+ total_frame_inter_sad +=
+ ps_ctxt
+ ->aaps_enc_loop_rc_params[ps_ctxt_last_thrd
+ ->i4_enc_frm_id][i]
+ ->u4_frame_inter_sad_acc;
+ total_frame_sad +=
+ ps_ctxt
+ ->aaps_enc_loop_rc_params[ps_ctxt_last_thrd
+ ->i4_enc_frm_id][i]
+ ->u4_frame_sad_acc;
+
+ total_frame_intra_cost +=
+ ps_ctxt
+ ->aaps_enc_loop_rc_params[ps_ctxt_last_thrd
+ ->i4_enc_frm_id][i]
+ ->i8_frame_intra_cost_acc;
+ total_frame_inter_cost +=
+ ps_ctxt
+ ->aaps_enc_loop_rc_params[ps_ctxt_last_thrd
+ ->i4_enc_frm_id][i]
+ ->i8_frame_inter_cost_acc;
+ total_frame_cost +=
+ ps_ctxt
+ ->aaps_enc_loop_rc_params[ps_ctxt_last_thrd
+ ->i4_enc_frm_id][i]
+ ->i8_frame_cost_acc;
+ /*Reset thrd id flag once the frame is completed */
+ ps_enc_ctxt->s_multi_thrd
+ .ai4_thrd_id_valid_flag[ps_ctxt_last_thrd->i4_enc_frm_id][i]
+ [j] = -1;
+ }
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->s_pic_level_info.u4_frame_sad = total_frame_sad;
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->s_pic_level_info.u4_frame_intra_sad = total_frame_intra_sad;
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->s_pic_level_info.u4_frame_inter_sad = total_frame_inter_sad;
+
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->s_pic_level_info.i8_frame_cost = total_frame_cost;
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->s_pic_level_info.i8_frame_intra_cost = total_frame_intra_cost;
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->s_pic_level_info.i8_frame_inter_cost = total_frame_inter_cost;
+ }
+ ps_enc_ctxt->s_multi_thrd.ai4_produce_outbuf[i4_enc_frm_id][i] = 1;
+ ps_recon_out_temp =
+ ps_enc_ctxt->s_multi_thrd.ps_recon_out[i4_enc_frm_id][i];
+ ps_frm_recon_temp =
+ ps_enc_ctxt->s_multi_thrd.ps_frm_recon[i4_enc_frm_id][i];
+
+ /* end of frame processing only if current input is valid */
+ if(1 == ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id]
+ ->i4_frm_proc_valid_flag)
+ {
+ /* Calculate the SEI Hash if enabled */
+ if(0 !=
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->s_sei.i1_decoded_pic_hash_sei_flag)
+ {
+ void *pv_y_buf;
+ void *pv_u_buf;
+
+ {
+ pv_y_buf = ps_frm_recon_temp->s_yuv_buf_desc.pv_y_buf;
+ pv_u_buf = ps_frm_recon_temp->s_yuv_buf_desc.pv_u_buf;
+ }
+
+ ihevce_populate_hash_sei(
+ &ps_enc_ctxt->s_multi_thrd
+ .ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->s_sei,
+ ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms
+ .i4_internal_bit_depth,
+ pv_y_buf,
+ ps_frm_recon_temp->s_yuv_buf_desc.i4_y_wd,
+ ps_frm_recon_temp->s_yuv_buf_desc.i4_y_ht,
+ ps_frm_recon_temp->s_yuv_buf_desc.i4_y_strd,
+ pv_u_buf,
+ ps_frm_recon_temp->s_yuv_buf_desc.i4_uv_wd,
+ ps_frm_recon_temp->s_yuv_buf_desc.i4_uv_ht,
+ ps_frm_recon_temp->s_yuv_buf_desc.i4_uv_strd,
+ 0,
+ 0);
+ }
+ /* Sending qp, poc and pic-type to entropy thread for printing on console */
+ if(ps_enc_ctxt->ps_stat_prms->i4_log_dump_level != 0)
+ {
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->i4_qp =
+ ps_enc_ctxt->s_multi_thrd.cur_qp[i4_enc_frm_id][i];
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->i4_poc = ps_curr_inp->s_lap_out.i4_poc;
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->i4_pic_type = ps_curr_inp->s_lap_out.i4_pic_type;
+ }
+
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->i4_is_I_scenecut =
+ ((ps_curr_inp->s_lap_out.i4_scene_type == 1) &&
+ (ps_curr_inp->s_lap_out.i4_pic_type == IV_IDR_FRAME ||
+ ps_curr_inp->s_lap_out.i4_pic_type == IV_I_FRAME));
+
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->i4_is_non_I_scenecut =
+ ((ps_curr_inp->s_lap_out.i4_scene_type ==
+ SCENE_TYPE_SCENE_CUT) &&
+ (ps_enc_ctxt->s_multi_thrd
+ .ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->i4_is_I_scenecut == 0));
+
+ /*ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]->i4_is_I_only_scd = ps_curr_inp->s_lap_out.i4_is_I_only_scd;
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]->i4_is_non_I_scd = ps_curr_inp->s_lap_out.i4_is_non_I_scd;
+
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]->i4_is_model_valid = ps_curr_inp->s_lap_out.i4_is_model_valid;*/
+
+ /* -------------------------------------------- */
+ /* MSE Computation for PSNR */
+ /* -------------------------------------------- */
+ if(ps_enc_ctxt->ps_stat_prms->i4_log_dump_level != 0)
+ {
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->i4_qp =
+ ps_enc_ctxt->s_multi_thrd.cur_qp[i4_enc_frm_id][i];
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->i4_poc = ps_curr_inp->s_lap_out.i4_poc;
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->i4_pic_type = ps_curr_inp->s_lap_out.i4_pic_type;
+ }
+
+ /* if non reference B picture */
+ if(0 == ps_frm_recon_temp->i4_is_reference)
+ {
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i]
+ ->i4_pic_type += 2;
+ }
+
+#define FORCE_EXT_REF_PIC 0
+
+ /* -------------------------------------------- */
+ /* Dumping of recon to App Queue */
+ /* -------------------------------------------- */
+ if(1 == ps_enc_ctxt->ps_stat_prms->i4_save_recon)
+ {
+ {
+ WORD32 i, j;
+ UWORD8 *pu1_recon;
+ UWORD8 *pu1_chrm_buf_u;
+ UWORD8 *pu1_chrm_buf_v;
+ UWORD8 *pu1_curr_recon;
+
+ pu1_recon =
+ (UWORD8 *)ps_frm_recon_temp->s_yuv_buf_desc.pv_y_buf;
+
+ /** Copying Luma into recon buffer **/
+ pu1_curr_recon = (UWORD8 *)ps_recon_out_temp->pv_y_buf;
+
+ for(j = 0; j < ps_curr_inp->s_lap_out.s_input_buf.i4_y_ht;
+ j++)
+ {
+ memcpy(
+ pu1_curr_recon,
+ pu1_recon,
+ ps_curr_inp->s_lap_out.s_input_buf.i4_y_wd);
+
+ pu1_recon +=
+ ps_frm_recon_temp->s_yuv_buf_desc.i4_y_strd;
+ pu1_curr_recon +=
+ ps_curr_inp->s_lap_out.s_input_buf.i4_y_wd;
+ }
+
+ /* recon chroma is converted from Semiplanar to Planar for dumping */
+ pu1_recon =
+ (UWORD8 *)ps_frm_recon_temp->s_yuv_buf_desc.pv_u_buf;
+ pu1_chrm_buf_u = (UWORD8 *)ps_recon_out_temp->pv_cb_buf;
+ pu1_chrm_buf_v =
+ pu1_chrm_buf_u +
+ ((ps_curr_inp->s_lap_out.s_input_buf.i4_uv_wd >> 1) *
+ ps_curr_inp->s_lap_out.s_input_buf.i4_uv_ht);
+
+ for(j = 0; j < ps_curr_inp->s_lap_out.s_input_buf.i4_uv_ht;
+ j++)
+ {
+ for(i = 0;
+ i<ps_curr_inp->s_lap_out.s_input_buf.i4_uv_wd>> 1;
+ i++)
+ {
+ *pu1_chrm_buf_u++ = *pu1_recon++;
+ *pu1_chrm_buf_v++ = *pu1_recon++;
+ }
+
+ pu1_recon -=
+ ps_curr_inp->s_lap_out.s_input_buf.i4_uv_wd;
+ pu1_recon +=
+ ps_frm_recon_temp->s_yuv_buf_desc.i4_uv_strd;
+ }
+
+ /* set the POC and number of bytes in Y & UV buf */
+ ps_recon_out_temp->i4_poc = ps_frm_recon_temp->i4_poc;
+ ps_recon_out_temp->i4_y_pixels =
+ ps_curr_inp->s_lap_out.s_input_buf.i4_y_ht *
+ ps_curr_inp->s_lap_out.s_input_buf.i4_y_wd;
+ ps_recon_out_temp->i4_uv_pixels =
+ ps_curr_inp->s_lap_out.s_input_buf.i4_uv_wd *
+ ps_curr_inp->s_lap_out.s_input_buf.i4_uv_ht;
+ }
+ }
+ ps_frm_recon_temp->i4_non_ref_free_flag = 1;
+ /* -------------------------------------------- */
+ /* End of picture updates */
+ /* -------------------------------------------- */
+ }
+
+ /* After the MSE (or PSNR) computation is done we will update
+ these data in output buffer structure and then signal entropy
+ thread that the buffer is produced. */
+ if(ps_enc_ctxt->s_multi_thrd.ai4_produce_outbuf[i4_enc_frm_id][i] == 1)
+ {
+ /* set the output buffer as produced */
+ ihevce_q_set_buff_prod(
+ (void *)ps_enc_ctxt,
+ IHEVCE_FRM_PRS_ENT_COD_Q + i,
+ ps_enc_ctxt->s_multi_thrd.out_buf_id[i4_enc_frm_id][i]);
+
+ ps_enc_ctxt->s_multi_thrd.is_out_buf_freed[i4_enc_frm_id][i] = 1;
+ ps_enc_ctxt->s_multi_thrd.ai4_produce_outbuf[i4_enc_frm_id][i] = 0;
+ }
+
+ } //bit-rate counter ends
+ /* -------------------------------------------- */
+ /* Frame level RC update */
+ /* -------------------------------------------- */
+ /* Query enc_loop to get the Parameters for Rate control */
+ if(1 == ps_curr_inp->s_input_buf.i4_inp_frm_data_valid_flag)
+ {
+ frm_proc_ent_cod_ctxt_t *ps_curr_out = NULL;
+ /*HEVC_RC*/
+ rc_bits_sad_t as_rc_frame_stat[IHEVCE_MAX_NUM_BITRATES];
+ osal_mutex_lock(ps_enc_ctxt->pv_rc_mutex_lock_hdl);
+
+ for(i = 0; i < i4_num_bitrates; i++)
+ {
+ /*each bit-rate RC params are collated by master thread */
+ ihevce_enc_loop_get_frame_rc_prms(
+ ps_enc_ctxt->s_module_ctxt.pv_enc_loop_ctxt,
+ &as_rc_frame_stat[i],
+ i,
+ i4_enc_frm_id);
+
+ /*update bits estimate on rd opt thread so that mismatch between rdopt and entropy can be taken care of*/
+ ps_curr_out =
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i];
+
+ ps_rc_lap_out_next_encode =
+ (rc_lap_out_params_t *)
+ ps_curr_inp->s_rc_lap_out.ps_rc_lap_out_next_encode;
+
+ ps_curr_out->i4_is_end_of_idr_gop = 0;
+
+ if(NULL != ps_rc_lap_out_next_encode)
+ {
+ if(ps_rc_lap_out_next_encode->i4_rc_pic_type == IV_IDR_FRAME)
+ {
+ /*If the next pic is IDR, then signal end of gopf for current frame*/
+ ps_curr_out->i4_is_end_of_idr_gop = 1;
+ }
+ }
+ else if(NULL == ps_rc_lap_out_next_encode)
+ {
+ /*If the lap out next is NULL, then end of sequence reached*/
+ ps_curr_out->i4_is_end_of_idr_gop = 1;
+ }
+
+ if(NULL == ps_curr_out)
+ {
+ DBG_PRINTF("error in getting curr out in encode loop\n");
+ }
+
+ //DBG_PRINTF("\nRDOPT head = %d RDOPT text = %d\n",s_rc_frame_stat.u4_total_header_bits,s_rc_frame_stat.u4_total_texture_bits);
+ /* acquire mutex lock for rate control calls */
+
+ /* Note : u4_total_intra_sad coming out of enc_loop */
+ /* will not be accurate becos of intra gating */
+ /* need to access the importance of this sad in RC */
+
+ //Store the rc update parameters for deterministic Enc loop parallelism
+
+ {
+ ihevce_rc_store_retrive_update_info(
+ (void *)ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[i],
+ &as_rc_frame_stat[i],
+ i4_enc_frm_id_rc,
+ i,
+ 1,
+ &ps_enc_ctxt->s_multi_thrd.out_buf_id[i4_enc_frm_id][i],
+ &ps_curr_inp->s_lap_out.i4_pic_type,
+ &ai4_act_qp[i],
+ (void *)&ps_curr_inp->s_lap_out,
+ (void *)&ps_curr_inp->s_rc_lap_out); // STORE
+ }
+ }
+
+ /* release mutex lock after rate control calls */
+ osal_mutex_unlock(ps_enc_ctxt->pv_rc_mutex_lock_hdl);
+ }
+ if((ps_enc_ctxt->ps_stat_prms->i4_save_recon != 0) /*&&
+ (1 == ps_curr_inp->s_input_buf.s_input_buf.i4_inp_frm_data_valid_flag)*/)
+ {
+ WORD32 i4_bitrate_ctr;
+ for(i4_bitrate_ctr = 0; i4_bitrate_ctr < i4_num_bitrates;
+ i4_bitrate_ctr++)
+ {
+ /*swaping of buf_id for 0th and reference bitrate location, as encoder
+ assumes always 0th loc for reference bitrate and app must receive in
+ the configured order*/
+ WORD32 i4_recon_buf_id = i4_bitrate_ctr;
+ if(i4_bitrate_ctr == 0)
+ {
+ i4_recon_buf_id = ps_enc_ctxt->i4_ref_mbr_id;
+ }
+ else if(i4_bitrate_ctr == ps_enc_ctxt->i4_ref_mbr_id)
+ {
+ i4_recon_buf_id = 0;
+ }
+
+ /* Call back to Apln. saying recon buffer is produced */
+ ps_hle_ctxt->ihevce_output_recon_fill_done(
+ ps_hle_ctxt->pv_recon_cb_handle,
+ ps_enc_ctxt->s_multi_thrd
+ .ps_recon_out[i4_enc_frm_id][i4_bitrate_ctr],
+ i4_recon_buf_id, /* br instance */
+ i4_resolution_id /* res_intance */);
+
+ /* --- release the current recon buffer ---- */
+ ihevce_q_rel_buf(
+ (void *)ps_enc_ctxt,
+ (IHEVCE_RECON_DATA_Q + i4_recon_buf_id),
+ ps_enc_ctxt->s_multi_thrd
+ .recon_buf_id[i4_enc_frm_id][i4_bitrate_ctr]);
+
+ ps_enc_ctxt->s_multi_thrd
+ .is_recon_dumped[i4_enc_frm_id][i4_bitrate_ctr] = 1;
+ }
+ }
+
+ if(i4_enc_end_flag == 1)
+ {
+ if(ps_enc_ctxt->s_multi_thrd.is_in_buf_freed[i4_enc_frm_id] == 0)
+ {
+ /* release the pre_enc/enc queue buffer */
+ ihevce_q_rel_buf(
+ (void *)ps_enc_ctxt,
+ IHEVCE_PRE_ENC_ME_Q,
+ ps_curr_inp_enc->curr_inp_from_me_buf_id);
+
+ ps_enc_ctxt->s_multi_thrd.is_in_buf_freed[i4_enc_frm_id] = 1;
+ }
+ }
+ /* release encoder owned input buffer*/
+ ihevce_q_rel_buf(
+ (void *)ps_enc_ctxt,
+ IHEVCE_INPUT_DATA_CTRL_Q,
+ ps_curr_inp_enc->curr_inp_buf_id);
+ /* release the pre_enc/enc queue buffer */
+ ihevce_q_rel_buf(
+ ps_enc_ctxt,
+ IHEVCE_PRE_ENC_ME_Q,
+ ps_curr_inp_enc->curr_inp_from_me_buf_id);
+
+ ps_enc_ctxt->s_multi_thrd.is_in_buf_freed[i4_enc_frm_id] = 1;
+
+ /* release the pre_enc/enc queue buffer */
+ ihevce_q_rel_buf(
+ ps_enc_ctxt,
+ IHEVCE_L0_IPE_ENC_Q,
+ ps_curr_inp_enc->curr_inp_from_l0_ipe_buf_id);
+
+ ps_enc_ctxt->s_multi_thrd.is_L0_ipe_in_buf_freed[i4_enc_frm_id] = 1;
+ /* release the me/enc queue buffer */
+ ihevce_q_rel_buf(
+ ps_enc_ctxt,
+ IHEVCE_ME_ENC_RDOPT_Q,
+ ps_enc_ctxt->s_multi_thrd.i4_enc_in_buf_id[i4_enc_frm_id]);
+
+ /* reset the pointers to NULL */
+ ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id] = NULL;
+ ps_enc_ctxt->s_multi_thrd.enc_master_done_frame_init[i4_enc_frm_id] = 0;
+ for(i = 0; i < i4_num_bitrates; i++)
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i] = NULL;
+
+ /* Set the prev_frame_done variable to 1 to indicate that
+ *prev frame is done */
+ ihevce_dmgr_update_frm_frm_sync(pv_dep_mngr_prev_frame_done);
+ }
+ }
+ }
+ else
+ {
+ /* Increment the counter to keep track of no of threads exiting the current mutex*/
+ ps_enc_ctxt->s_multi_thrd.num_thrds_exited[i4_enc_frm_id]++;
+ /*Last slave thread comming out of enc loop will execute next critical section*/
+ if(ps_enc_ctxt->s_multi_thrd.num_thrds_exited[i4_enc_frm_id] ==
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds)
+ {
+ ps_enc_ctxt->s_multi_thrd.num_thrds_exited[i4_enc_frm_id] = 0;
+
+ /* reset the pointers to NULL */
+ ps_enc_ctxt->s_multi_thrd.aps_cur_inp_enc_prms[i4_enc_frm_id] = NULL;
+
+ ps_enc_ctxt->s_multi_thrd.enc_master_done_frame_init[i4_enc_frm_id] = 0;
+
+ for(i = 0; i < i4_num_bitrates; i++)
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_enc_frm_id][i] = NULL;
+
+ /* Set the prev_frame_done variable to 1 to indicate that
+ *prev frame is done
+ */
+ ihevce_dmgr_update_frm_frm_sync(pv_dep_mngr_prev_frame_done);
+ }
+ }
+
+ /* Toggle the ping pong flag of the thread exiting curr frame*/
+ /*ps_enc_ctxt->s_multi_thrd.ping_pong[ps_thrd_ctxt->i4_thrd_id] =
+ !ps_enc_ctxt->s_multi_thrd.ping_pong[ps_thrd_ctxt->i4_thrd_id];*/
+ }
+
+ /************************************/
+ /****** EXIT CRITICAL SECTION ******/
+ /************************************/
+ /****** Unlock the critical section ******/
+ if(NULL != ps_enc_ctxt->s_multi_thrd.apv_post_enc_mutex_handle[i4_enc_frm_id])
+ {
+ result = osal_mutex_unlock(
+ ps_enc_ctxt->s_multi_thrd.apv_post_enc_mutex_handle[i4_enc_frm_id]);
+ if(OSAL_SUCCESS != result)
+ return 0;
+ }
+
+ if((0 == i4_me_end_flag) && (0 == i4_enc_end_flag))
+ {
+ i4_enc_frm_id++;
+ i4_enc_frm_id_rc++;
+
+ if(i4_enc_frm_id == NUM_ME_ENC_BUFS)
+ {
+ i4_enc_frm_id = 0;
+ }
+
+ if(i4_enc_frm_id_rc == ps_enc_ctxt->i4_max_fr_enc_loop_parallel_rc)
+ {
+ i4_enc_frm_id_rc = 0;
+ }
+ i4_me_frm_id++;
+
+ if(i4_me_frm_id == NUM_ME_ENC_BUFS)
+ i4_me_frm_id = 0;
+ }
+ if(1 == ps_enc_ctxt->s_multi_thrd.i4_force_end_flag)
+ {
+ i4_me_end_flag = 1;
+ i4_enc_end_flag = 1;
+ }
+ }
+
+ /****** Lock the critical section ******/
+
+ if(NULL != ps_enc_ctxt->s_multi_thrd.apv_post_enc_mutex_handle[i4_enc_frm_id])
+ {
+ WORD32 result;
+
+ result =
+ osal_mutex_lock(ps_enc_ctxt->s_multi_thrd.apv_post_enc_mutex_handle[i4_enc_frm_id]);
+
+ if(OSAL_SUCCESS != result)
+ return 0;
+ }
+
+ if(ps_enc_ctxt->s_multi_thrd.num_thrds_done ==
+ (ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds - 1))
+ {
+ if(1 != ps_enc_ctxt->s_multi_thrd.i4_force_end_flag)
+ {
+ osal_mutex_lock(ps_enc_ctxt->pv_rc_mutex_lock_hdl);
+ for(i = 0; i < ps_enc_ctxt->i4_num_bitrates; i++)
+ {
+ ihevce_rc_close(
+ ps_enc_ctxt,
+ ps_enc_ctxt->i4_active_enc_frame_id,
+ 2,
+ MIN(ps_enc_ctxt->ai4_rc_query[i], ps_enc_ctxt->i4_max_fr_enc_loop_parallel_rc),
+ i);
+ }
+ osal_mutex_unlock(ps_enc_ctxt->pv_rc_mutex_lock_hdl);
+ }
+ }
+
+ ps_enc_ctxt->s_multi_thrd.num_thrds_done++;
+
+ /****** UnLock the critical section ******/
+ if(NULL != ps_enc_ctxt->s_multi_thrd.apv_post_enc_mutex_handle[i4_enc_frm_id])
+ {
+ WORD32 result;
+
+ result =
+ osal_mutex_unlock(ps_enc_ctxt->s_multi_thrd.apv_post_enc_mutex_handle[i4_enc_frm_id]);
+
+ if(OSAL_SUCCESS != result)
+ return 0;
+ }
+
+ /****** Lock the critical section ******/
+ if(NULL != ps_enc_ctxt->s_multi_thrd.apv_post_enc_mutex_handle[i4_enc_frm_id])
+ {
+ WORD32 result;
+ result =
+ osal_mutex_lock(ps_enc_ctxt->s_multi_thrd.apv_post_enc_mutex_handle[i4_enc_frm_id]);
+
+ if(OSAL_SUCCESS != result)
+ return 0;
+ }
+ if((ps_enc_ctxt->s_multi_thrd.num_thrds_done ==
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds) &&
+ (ps_enc_ctxt->s_multi_thrd.i4_force_end_flag))
+ {
+ WORD32 num_bufs_preenc_me_que, num_bufs_L0_ipe_enc;
+ WORD32 buf_id_ctr, frm_id_ctr;
+ frm_proc_ent_cod_ctxt_t *ps_curr_out_enc_ent[IHEVCE_MAX_NUM_BITRATES];
+ WORD32 out_buf_id_enc_ent[IHEVCE_MAX_NUM_BITRATES];
+
+ if(ps_enc_ctxt->s_multi_thrd.i4_num_enc_loop_frm_pllel > 1)
+ {
+ num_bufs_preenc_me_que = (MAX_L0_IPE_ENC_STAGGER - 1) + MIN_L1_L0_STAGGER_NON_SEQ +
+ NUM_BUFS_DECOMP_HME +
+ ps_enc_ctxt->ps_stat_prms->s_lap_prms.i4_rc_look_ahead_pics;
+
+ num_bufs_L0_ipe_enc = MAX_L0_IPE_ENC_STAGGER;
+ }
+ else
+ {
+ num_bufs_preenc_me_que = (MIN_L0_IPE_ENC_STAGGER - 1) + MIN_L1_L0_STAGGER_NON_SEQ +
+ NUM_BUFS_DECOMP_HME +
+ ps_enc_ctxt->ps_stat_prms->s_lap_prms.i4_rc_look_ahead_pics;
+
+ num_bufs_L0_ipe_enc = MIN_L0_IPE_ENC_STAGGER;
+ }
+ for(buf_id_ctr = 0; buf_id_ctr < num_bufs_preenc_me_que; buf_id_ctr++)
+ {
+ /* release encoder owned input buffer*/
+ ihevce_q_rel_buf((void *)ps_enc_ctxt, IHEVCE_PRE_ENC_ME_Q, buf_id_ctr);
+ }
+ for(buf_id_ctr = 0; buf_id_ctr < num_bufs_L0_ipe_enc; buf_id_ctr++)
+ {
+ /* release encoder owned input buffer*/
+ ihevce_q_rel_buf((void *)ps_enc_ctxt, IHEVCE_L0_IPE_ENC_Q, buf_id_ctr);
+ }
+ for(frm_id_ctr = 0; frm_id_ctr < NUM_ME_ENC_BUFS; frm_id_ctr++)
+ {
+ for(i = 0; i < ps_enc_ctxt->i4_num_bitrates; i++)
+ {
+ if(NULL != ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[frm_id_ctr][i])
+ {
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[frm_id_ctr][i]
+ ->i4_frm_proc_valid_flag = 0;
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[frm_id_ctr][i]->i4_end_flag = 1;
+ /* set the output buffer as produced */
+ ihevce_q_set_buff_prod(
+ (void *)ps_enc_ctxt,
+ IHEVCE_FRM_PRS_ENT_COD_Q + i,
+ ps_enc_ctxt->s_multi_thrd.out_buf_id[frm_id_ctr][i]);
+ }
+ }
+ }
+ for(buf_id_ctr = 0; buf_id_ctr < NUM_FRMPROC_ENTCOD_BUFS;
+ buf_id_ctr++) /*** Set buffer produced for NUM_FRMPROC_ENTCOD_BUFS buffers for entropy to exit ***/
+ {
+ for(i = 0; i < ps_enc_ctxt->i4_num_bitrates; i++)
+ {
+ ps_curr_out_enc_ent[i] = (frm_proc_ent_cod_ctxt_t *)ihevce_q_get_free_buff(
+ (void *)ps_enc_ctxt,
+ IHEVCE_FRM_PRS_ENT_COD_Q + i, /*decides the buffer queue */
+ &out_buf_id_enc_ent[i],
+ BUFF_QUE_NON_BLOCKING_MODE);
+ if(NULL != ps_curr_out_enc_ent[i])
+ {
+ ps_curr_out_enc_ent[i]->i4_frm_proc_valid_flag = 0;
+ ps_curr_out_enc_ent[i]->i4_end_flag = 1;
+ /* set the output buffer as produced */
+ ihevce_q_set_buff_prod(
+ (void *)ps_enc_ctxt, IHEVCE_FRM_PRS_ENT_COD_Q + i, out_buf_id_enc_ent[i]);
+ }
+ }
+ }
+ }
+
+ /* The last thread coming out of Enc. Proc. */
+ /* Release all the Recon buffers the application might have queued in */
+ if((ps_enc_ctxt->s_multi_thrd.num_thrds_done ==
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds) &&
+ (ps_enc_ctxt->ps_stat_prms->i4_save_recon != 0) &&
+ (ps_enc_ctxt->s_multi_thrd.i4_is_recon_free_done == 0))
+ {
+ WORD32 i4_bitrate_ctr;
+
+ for(i4_bitrate_ctr = 0; i4_bitrate_ctr < i4_num_bitrates; i4_bitrate_ctr++)
+ {
+ WORD32 end_flag = 0;
+ while(0 == end_flag)
+ {
+ /*swaping of buf_id for 0th and reference bitrate location, as encoder
+ assumes always 0th loc for reference bitrate and app must receive in
+ the configured order*/
+ WORD32 i4_recon_buf_id = i4_bitrate_ctr;
+ if(i4_bitrate_ctr == 0)
+ {
+ i4_recon_buf_id = ps_enc_ctxt->i4_ref_mbr_id;
+ }
+ else if(i4_bitrate_ctr == ps_enc_ctxt->i4_ref_mbr_id)
+ {
+ i4_recon_buf_id = 0;
+ }
+
+ /* ------- get free Recon buffer from Frame buffer que ---------- */
+ /* There is a separate queue for each bit-rate instnace. The recon
+ buffer is acquired from the corresponding queue based on the
+ bitrate instnace */
+ ps_enc_ctxt->s_multi_thrd.ps_recon_out[i4_enc_frm_id][i4_bitrate_ctr] =
+ (iv_enc_recon_data_buffs_t *)ihevce_q_get_filled_buff(
+ (void *)ps_enc_ctxt,
+ IHEVCE_RECON_DATA_Q + i4_recon_buf_id, /*decides the buffer queue */
+ &ps_enc_ctxt->s_multi_thrd.recon_buf_id[i4_enc_frm_id][i4_bitrate_ctr],
+ BUFF_QUE_BLOCKING_MODE);
+
+ /* Update the end_flag from application */
+ end_flag = ps_enc_ctxt->s_multi_thrd.ps_recon_out[i4_enc_frm_id][i4_bitrate_ctr]
+ ->i4_is_last_buf;
+
+ ps_enc_ctxt->s_multi_thrd.ps_recon_out[i4_enc_frm_id][i4_bitrate_ctr]->i4_end_flag =
+ 1;
+ ps_enc_ctxt->s_multi_thrd.ps_recon_out[i4_enc_frm_id][i4_bitrate_ctr]->i4_y_pixels =
+ 0;
+ ps_enc_ctxt->s_multi_thrd.ps_recon_out[i4_enc_frm_id][i4_bitrate_ctr]->i4_uv_pixels =
+ 0;
+
+ /* Call back to Apln. saying recon buffer is produced */
+ ps_hle_ctxt->ihevce_output_recon_fill_done(
+ ps_hle_ctxt->pv_recon_cb_handle,
+ ps_enc_ctxt->s_multi_thrd.ps_recon_out[i4_enc_frm_id][i4_bitrate_ctr],
+ i4_recon_buf_id, /* br instance */
+ i4_resolution_id /* res_intance */);
+
+ /* --- release the current recon buffer ---- */
+ ihevce_q_rel_buf(
+ (void *)ps_enc_ctxt,
+ (IHEVCE_RECON_DATA_Q + i4_recon_buf_id),
+ ps_enc_ctxt->s_multi_thrd.recon_buf_id[i4_enc_frm_id][i4_bitrate_ctr]);
+ }
+ }
+ /* Set the recon free done flag */
+ ps_enc_ctxt->s_multi_thrd.i4_is_recon_free_done = 1;
+ }
+
+ /****** UnLock the critical section ******/
+ if(NULL != ps_enc_ctxt->s_multi_thrd.apv_post_enc_mutex_handle[i4_enc_frm_id])
+ {
+ WORD32 result;
+ result =
+ osal_mutex_unlock(ps_enc_ctxt->s_multi_thrd.apv_post_enc_mutex_handle[i4_enc_frm_id]);
+
+ if(OSAL_SUCCESS != result)
+ return 0;
+ }
+
+ return (0);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_set_pre_enc_prms \endif
+*
+* \brief
+* Set CTB parameters
+* Set ME params
+* Set pps, sps, vps, vui params
+* Do RC init
+*
+* \param[in] Encoder context pointer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_set_pre_enc_prms(enc_ctxt_t *ps_enc_ctxt)
+{
+ WORD32 i;
+ WORD32 i4_num_instance,
+ i4_resolution_id = ps_enc_ctxt->i4_resolution_id; //number of bit-rate instances
+
+ i4_num_instance = ps_enc_ctxt->i4_num_bitrates;
+
+#if PIC_ALIGN_CTB_SIZE
+
+ ps_enc_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_wd =
+ ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
+ SET_CTB_ALIGN(
+ ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width,
+ ps_enc_ctxt->s_frm_ctb_prms.i4_ctb_size);
+
+ ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_horz =
+ ps_enc_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_wd / ps_enc_ctxt->s_frm_ctb_prms.i4_ctb_size;
+
+ ps_enc_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_ht =
+ ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
+ SET_CTB_ALIGN(
+ ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height,
+ ps_enc_ctxt->s_frm_ctb_prms.i4_ctb_size);
+
+ ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_vert =
+ ps_enc_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_ht / ps_enc_ctxt->s_frm_ctb_prms.i4_ctb_size;
+#else // PIC_ALIGN_CTB_SIZE
+ /* Allign the frame width to min CU size */
+ ps_enc_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_wd =
+ ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
+ SET_CTB_ALIGN(
+ ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width,
+ ps_enc_ctxt->s_frm_ctb_prms.i4_min_cu_size);
+
+ ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_horz =
+ ps_enc_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_wd / ps_enc_ctxt->s_frm_ctb_prms.i4_ctb_size;
+
+ if((ps_enc_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_wd %
+ ps_enc_ctxt->s_frm_ctb_prms.i4_ctb_size) != 0)
+ ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_horz =
+ ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_horz + 1;
+
+ /* Allign the frame hieght to min CU size */
+ ps_enc_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_ht =
+ ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
+ SET_CTB_ALIGN(
+ ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height,
+ ps_enc_ctxt->s_frm_ctb_prms.i4_min_cu_size);
+
+ ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_vert =
+ ps_enc_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_ht / ps_enc_ctxt->s_frm_ctb_prms.i4_ctb_size;
+
+ if((ps_enc_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_ht %
+ ps_enc_ctxt->s_frm_ctb_prms.i4_ctb_size) != 0)
+ ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_vert =
+ ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_vert + 1;
+
+#endif // PIC_ALIGN_CTB_SIZE
+
+ ps_enc_ctxt->s_frm_ctb_prms.i4_max_cus_in_row = ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_horz *
+ ps_enc_ctxt->s_frm_ctb_prms.i4_num_cus_in_ctb;
+
+ ps_enc_ctxt->s_frm_ctb_prms.i4_max_pus_in_row = ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_horz *
+ ps_enc_ctxt->s_frm_ctb_prms.i4_num_pus_in_ctb;
+
+ ps_enc_ctxt->s_frm_ctb_prms.i4_max_tus_in_row = ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_horz *
+ ps_enc_ctxt->s_frm_ctb_prms.i4_num_tus_in_ctb;
+ ihevce_coarse_me_set_resolution(
+ ps_enc_ctxt->s_module_ctxt.pv_coarse_me_ctxt,
+ 1,
+ &ps_enc_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_wd,
+ &ps_enc_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_ht);
+
+ /*if Resolution need to be changed dynamically then needs to go to encode group */
+ ihevce_me_set_resolution(
+ ps_enc_ctxt->s_module_ctxt.pv_me_ctxt,
+ 1,
+ &ps_enc_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_wd,
+ &ps_enc_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_ht);
+ i4_num_instance = ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id]
+ .i4_num_bitrate_instances;
+ for(i = 0; i < i4_num_instance; i++)
+ {
+ WORD32 i4_id;
+ /*swaping of buf_id for 0th and reference bitrate location, as encoder
+ assumes always 0th loc for reference bitrate and app must receive in
+ the configured order*/
+ if(i == 0)
+ {
+ i4_id = ps_enc_ctxt->i4_ref_mbr_id;
+ }
+ else if(i == ps_enc_ctxt->i4_ref_mbr_id)
+ {
+ i4_id = 0;
+ }
+ else
+ {
+ i4_id = i;
+ }
+ /* populate vps based on encoder configuration and tools */
+ ihevce_populate_vps(
+ ps_enc_ctxt,
+ &ps_enc_ctxt->as_vps[i],
+ &ps_enc_ctxt->s_runtime_src_prms,
+ &ps_enc_ctxt->ps_stat_prms->s_out_strm_prms,
+ &ps_enc_ctxt->s_runtime_coding_prms,
+ &ps_enc_ctxt->ps_stat_prms->s_config_prms,
+ ps_enc_ctxt->ps_stat_prms,
+ i4_resolution_id);
+
+ /* populate sps based on encoder configuration and tools */
+ ihevce_populate_sps(
+ ps_enc_ctxt,
+ &ps_enc_ctxt->as_sps[i],
+ &ps_enc_ctxt->as_vps[i],
+ &ps_enc_ctxt->s_runtime_src_prms,
+ &ps_enc_ctxt->ps_stat_prms->s_out_strm_prms,
+ &ps_enc_ctxt->s_runtime_coding_prms,
+ &ps_enc_ctxt->ps_stat_prms->s_config_prms,
+ &ps_enc_ctxt->s_frm_ctb_prms,
+ ps_enc_ctxt->ps_stat_prms,
+ i4_resolution_id);
+
+ /* populate pps based on encoder configuration and tools */
+ ihevce_populate_pps(
+ &ps_enc_ctxt->as_pps[i],
+ &ps_enc_ctxt->as_sps[i],
+ &ps_enc_ctxt->s_runtime_src_prms,
+ &ps_enc_ctxt->ps_stat_prms->s_out_strm_prms,
+ &ps_enc_ctxt->s_runtime_coding_prms,
+ &ps_enc_ctxt->ps_stat_prms->s_config_prms,
+ ps_enc_ctxt->ps_stat_prms,
+ i4_id,
+ i4_resolution_id,
+ ps_enc_ctxt->ps_tile_params_base,
+ &ps_enc_ctxt->ai4_column_width_array[0],
+ &ps_enc_ctxt->ai4_row_height_array[0]);
+
+ // if(ps_enc_ctxt->as_sps[i].i1_vui_parameters_present_flag == 1)
+ {
+ ihevce_populate_vui(
+ &ps_enc_ctxt->as_sps[i].s_vui_parameters,
+ &ps_enc_ctxt->as_sps[i],
+ &ps_enc_ctxt->s_runtime_src_prms,
+ &ps_enc_ctxt->ps_stat_prms->s_vui_sei_prms,
+ i4_resolution_id,
+ &ps_enc_ctxt->s_runtime_tgt_params,
+ ps_enc_ctxt->ps_stat_prms,
+ i4_id);
+ }
+ }
+
+ osal_mutex_lock(ps_enc_ctxt->pv_rc_mutex_lock_hdl);
+ /* run the loop over all bit-rate instnaces */
+ for(i = 0; i < i4_num_instance; i++)
+ {
+ /*HEVC_RC Do one time initialization of rate control*/
+ ihevce_rc_init(
+ (void *)ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[i],
+ &ps_enc_ctxt->s_runtime_src_prms,
+ &ps_enc_ctxt->s_runtime_tgt_params,
+ &ps_enc_ctxt->s_rc_quant,
+ &ps_enc_ctxt->ps_stat_prms->s_sys_api,
+ &ps_enc_ctxt->ps_stat_prms->s_lap_prms,
+ ps_enc_ctxt->i4_max_fr_enc_loop_parallel_rc);
+
+ ihevce_vbv_complaince_init_level(
+ (void *)ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[i],
+ &ps_enc_ctxt->as_sps[i].s_vui_parameters);
+ }
+ osal_mutex_unlock(ps_enc_ctxt->pv_rc_mutex_lock_hdl);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_pre_enc_init \endif
+*
+* \brief
+* set out_buf params
+* Calculate end_flag if flushmode on
+* Slice initialization
+* Populate SIE params
+* reference list creation
+*
+* \param[in] Encoder context pointer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_pre_enc_init(
+ enc_ctxt_t *ps_enc_ctxt,
+ ihevce_lap_enc_buf_t *ps_curr_inp,
+ pre_enc_me_ctxt_t *ps_curr_out,
+ WORD32 *pi4_end_flag_ret,
+ WORD32 *pi4_cur_qp_ret,
+ WORD32 *pi4_decomp_lyr_idx,
+ WORD32 i4_ping_pong)
+{
+ WORD32 end_flag = 0;
+ WORD32 cur_qp;
+ //recon_pic_buf_t *ps_frm_recon;
+ WORD32 first_field = 1;
+ WORD32 i4_field_pic = ps_enc_ctxt->s_runtime_src_prms.i4_field_pic;
+ WORD32 i4_decomp_lyrs_idx = 0;
+ WORD32 i4_resolution_id = ps_enc_ctxt->i4_resolution_id;
+ WORD32 slice_type = ISLICE;
+ WORD32 nal_type;
+ WORD32 min_cu_size;
+
+ WORD32 stasino_enabled;
+
+ /* copy the time stamps from inp to entropy inp */
+ ps_curr_out->i4_inp_timestamp_low = ps_curr_inp->s_input_buf.i4_inp_timestamp_low;
+ ps_curr_out->i4_inp_timestamp_high = ps_curr_inp->s_input_buf.i4_inp_timestamp_high;
+ ps_curr_out->pv_app_frm_ctxt = ps_curr_inp->s_input_buf.pv_app_frm_ctxt;
+
+ /* get the min cu size from config params */
+ min_cu_size = ps_enc_ctxt->ps_stat_prms->s_config_prms.i4_min_log2_cu_size;
+
+ min_cu_size = 1 << min_cu_size;
+
+ ps_curr_inp->s_lap_out.s_input_buf.i4_y_wd =
+ ps_curr_inp->s_lap_out.s_input_buf.i4_y_wd +
+ SET_CTB_ALIGN(ps_curr_inp->s_lap_out.s_input_buf.i4_y_wd, min_cu_size);
+
+ ps_curr_inp->s_lap_out.s_input_buf.i4_y_ht =
+ ps_curr_inp->s_lap_out.s_input_buf.i4_y_ht +
+ SET_CTB_ALIGN(ps_curr_inp->s_lap_out.s_input_buf.i4_y_ht, min_cu_size);
+
+ ps_curr_inp->s_lap_out.s_input_buf.i4_uv_wd =
+ ps_curr_inp->s_lap_out.s_input_buf.i4_uv_wd +
+ SET_CTB_ALIGN(ps_curr_inp->s_lap_out.s_input_buf.i4_uv_wd, min_cu_size);
+
+ if(IV_YUV_420SP_UV == ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_chr_format)
+ {
+ ps_curr_inp->s_lap_out.s_input_buf.i4_uv_ht =
+ ps_curr_inp->s_lap_out.s_input_buf.i4_uv_ht +
+ SET_CTB_ALIGN(ps_curr_inp->s_lap_out.s_input_buf.i4_uv_ht, (min_cu_size >> 1));
+ }
+ else if(IV_YUV_422SP_UV == ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_chr_format)
+ {
+ ps_curr_inp->s_lap_out.s_input_buf.i4_uv_ht =
+ ps_curr_inp->s_lap_out.s_input_buf.i4_uv_ht +
+ SET_CTB_ALIGN(ps_curr_inp->s_lap_out.s_input_buf.i4_uv_ht, min_cu_size);
+ }
+
+ /* update the END flag from LAP out */
+ end_flag = ps_curr_inp->s_lap_out.i4_end_flag;
+ ps_curr_out->i4_end_flag = end_flag;
+ ps_enc_ctxt->s_multi_thrd.i4_last_pic_flag = end_flag;
+
+ /* ----------------------------------------------------------------------*/
+ /* Slice initialization for current frame; Required for entropy context */
+ /* ----------------------------------------------------------------------*/
+ {
+ WORD32 cur_poc = ps_curr_inp->s_lap_out.i4_poc;
+
+ /* max merge candidates derived based on quality preset for now */
+ WORD32 max_merge_candidates = 2;
+
+ /* pocs less than random acess poc tagged for discard as they */
+ /* could be refering to pics before the cra. */
+
+ /* CRA case: as the leading pictures can refer the picture precedes the associated
+ IRAP(CRA) in decoding order, hence make it Random access skipped leading pictures (RASL)*/
+
+ if((1 == ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.i4_enable_temporal_scalability) &&
+ (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_max_temporal_layers ==
+ ps_curr_inp->s_lap_out.i4_temporal_lyr_id)) //TEMPORALA_SCALABILITY CHANGES
+ {
+ if(ps_curr_inp->s_lap_out.i4_assoc_IRAP_poc)
+ {
+ nal_type = (cur_poc < ps_curr_inp->s_lap_out.i4_assoc_IRAP_poc)
+ ? (ps_curr_inp->s_lap_out.i4_is_ref_pic ? NAL_RASL_R : NAL_RASL_N)
+ : (ps_curr_inp->s_lap_out.i4_is_ref_pic ? NAL_TSA_R : NAL_TSA_N);
+ }
+ /* IDR case: as the leading pictures can't refer the picture precedes the associated
+ IRAP(IDR) in decoding order, hence make it Random access decodable leading pictures (RADL)*/
+ else
+ {
+ nal_type = (cur_poc < ps_curr_inp->s_lap_out.i4_assoc_IRAP_poc)
+ ? (ps_curr_inp->s_lap_out.i4_is_ref_pic ? NAL_RADL_R : NAL_RADL_N)
+ : (ps_curr_inp->s_lap_out.i4_is_ref_pic ? NAL_TSA_R : NAL_TSA_N);
+ }
+ }
+ else
+ {
+ if(ps_curr_inp->s_lap_out.i4_assoc_IRAP_poc)
+ {
+ nal_type = (cur_poc < ps_curr_inp->s_lap_out.i4_assoc_IRAP_poc)
+ ? (ps_curr_inp->s_lap_out.i4_is_ref_pic ? NAL_RASL_R : NAL_RASL_N)
+ : (ps_curr_inp->s_lap_out.i4_is_ref_pic ? NAL_TRAIL_R : NAL_TRAIL_N);
+ }
+ /* IDR case: as the leading pictures can't refer the picture precedes the associated
+ IRAP(IDR) in decoding order, hence make it Random access decodable leading pictures (RADL)*/
+ else
+ {
+ nal_type = (cur_poc < ps_curr_inp->s_lap_out.i4_assoc_IRAP_poc)
+ ? (ps_curr_inp->s_lap_out.i4_is_ref_pic ? NAL_RADL_R : NAL_RADL_N)
+ : (ps_curr_inp->s_lap_out.i4_is_ref_pic ? NAL_TRAIL_R : NAL_TRAIL_N);
+ }
+ }
+
+ switch(ps_curr_inp->s_lap_out.i4_pic_type)
+ {
+ case IV_IDR_FRAME:
+ /* IDR pic */
+ slice_type = ISLICE;
+ nal_type = NAL_IDR_W_LP;
+ cur_poc = 0;
+ ps_enc_ctxt->i4_cra_poc = cur_poc;
+ break;
+
+ case IV_I_FRAME:
+ slice_type = ISLICE;
+
+ if(ps_curr_inp->s_lap_out.i4_is_cra_pic)
+ {
+ nal_type = NAL_CRA;
+ }
+
+ ps_enc_ctxt->i4_cra_poc = cur_poc;
+ break;
+
+ case IV_P_FRAME:
+ slice_type = PSLICE;
+ break;
+
+ case IV_B_FRAME:
+ /* TODO : Mark the nal type as NAL_TRAIL_N for non ref pics */
+ slice_type = BSLICE;
+ break;
+
+ default:
+ /* This should never occur */
+ ASSERT(0);
+ }
+
+ /* number of merge candidates and error metric chosen based on quality preset */
+ switch(ps_curr_inp->s_lap_out.i4_quality_preset)
+ {
+ case IHEVCE_QUALITY_P0:
+ max_merge_candidates = 5;
+ break;
+
+ case IHEVCE_QUALITY_P2:
+ max_merge_candidates = 5;
+ break;
+
+ case IHEVCE_QUALITY_P3:
+ max_merge_candidates = 3;
+ break;
+
+ case IHEVCE_QUALITY_P4:
+ case IHEVCE_QUALITY_P5:
+ case IHEVCE_QUALITY_P6:
+ max_merge_candidates = 2;
+ break;
+
+ default:
+ ASSERT(0);
+ }
+
+ /* acquire mutex lock for rate control calls */
+ osal_mutex_lock(ps_enc_ctxt->pv_rc_mutex_lock_hdl);
+ {
+ ps_curr_inp->s_rc_lap_out.i4_num_pels_in_frame_considered =
+ ps_curr_inp->s_lap_out.s_input_buf.i4_y_ht *
+ ps_curr_inp->s_lap_out.s_input_buf.i4_y_wd;
+
+ /*initialize the frame info stat inside LAP out, Data inside this will be populated in ihevce_rc_get_bpp_based_frame_qp call*/
+ ps_curr_inp->s_rc_lap_out.ps_frame_info = &ps_curr_inp->s_frame_info;
+
+ ps_curr_inp->s_rc_lap_out.i4_is_bottom_field = ps_curr_inp->s_input_buf.i4_bottom_field;
+ if(ps_enc_ctxt->ps_stat_prms->s_config_prms.i4_rate_control_mode == 3)
+ {
+ /*for constant qp use same qp*/
+ /*HEVC_RC query rate control for qp*/
+ cur_qp = ihevce_rc_pre_enc_qp_query(
+ (void *)ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[0],
+ &ps_curr_inp->s_rc_lap_out,
+ 0);
+ }
+ else
+ {
+ cur_qp = ihevce_rc_get_bpp_based_frame_qp(
+ (void *)ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[0], &ps_curr_inp->s_rc_lap_out);
+ }
+ }
+ /* release mutex lock after rate control calls */
+ osal_mutex_unlock(ps_enc_ctxt->pv_rc_mutex_lock_hdl);
+
+ /* store the QP in output prms */
+ /* The same qp is also used in enc thread only for ME*/
+ ps_curr_out->i4_curr_frm_qp = cur_qp;
+
+ /* slice header entropy syn memory is not valid in pre encode stage */
+ ps_curr_out->s_slice_hdr.pu4_entry_point_offset = NULL;
+
+ /* derive the flag which indicates if stasino is enabled */
+ stasino_enabled = (ps_enc_ctxt->s_runtime_coding_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)) &&
+ (ps_enc_ctxt->s_runtime_coding_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER));
+
+ /* initialize the slice header */
+ ihevce_populate_slice_header(
+ &ps_curr_out->s_slice_hdr,
+ &ps_enc_ctxt->as_pps[0],
+ &ps_enc_ctxt->as_sps[0],
+ nal_type,
+ slice_type,
+ 0,
+ 0,
+ ps_curr_inp->s_lap_out.i4_poc,
+ cur_qp,
+ max_merge_candidates,
+ ps_enc_ctxt->ps_stat_prms->s_pass_prms.i4_pass,
+ ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id]
+ .i4_quality_preset,
+ stasino_enabled);
+
+ ps_curr_out->i4_slice_nal_type = nal_type;
+
+ ps_curr_out->s_slice_hdr.u4_nuh_temporal_id = 0;
+
+ if(1 == ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.i4_enable_temporal_scalability)
+ {
+ ps_curr_out->s_slice_hdr.u4_nuh_temporal_id =
+ (ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_max_temporal_layers ==
+ ps_curr_inp->s_lap_out.i4_temporal_lyr_id); //TEMPORALA_SCALABILITY CHANGES
+ }
+
+ /* populate sps, vps and pps pointers for the entropy input params */
+ ps_curr_out->ps_pps = &ps_enc_ctxt->as_pps[0];
+ ps_curr_out->ps_sps = &ps_enc_ctxt->as_sps[0];
+ ps_curr_out->ps_vps = &ps_enc_ctxt->as_vps[0];
+ }
+
+ /* By default, Sei messages are set to 0, to avoid unintialised memory access */
+ memset(&ps_curr_out->s_sei, 0, sizeof(sei_params_t));
+
+ /* VUI, SEI flags reset */
+ ps_curr_out->s_sei.i1_sei_parameters_present_flag = 0;
+ ps_curr_out->s_sei.i1_buf_period_params_present_flag = 0;
+ ps_curr_out->s_sei.i1_pic_timing_params_present_flag = 0;
+ ps_curr_out->s_sei.i1_recovery_point_params_present_flag = 0;
+ ps_curr_out->s_sei.i1_decoded_pic_hash_sei_flag = 0;
+ ps_curr_out->s_sei.i4_sei_mastering_disp_colour_vol_params_present_flags = 0;
+
+ if(ps_enc_ctxt->ps_stat_prms->s_out_strm_prms.i4_sei_enable_flag == 1)
+ {
+ /* insert buffering period, display volume, recovery point only at irap points */
+ WORD32 insert_per_irap =
+ ((slice_type == ISLICE) &&
+ (((NAL_IDR_N_LP == nal_type) || (NAL_CRA == nal_type)) || (NAL_IDR_W_LP == nal_type)));
+
+ ps_curr_out->s_sei.i1_sei_parameters_present_flag = 1;
+
+ /* populate Sei buffering period based on encoder configuration and tools */
+ if(ps_enc_ctxt->ps_stat_prms->s_out_strm_prms.i4_sei_buffer_period_flags == 1)
+ {
+ ihevce_populate_buffering_period_sei(
+ &ps_curr_out->s_sei,
+ &ps_enc_ctxt->as_sps[0].s_vui_parameters,
+ &ps_enc_ctxt->as_sps[0],
+ &ps_enc_ctxt->ps_stat_prms->s_vui_sei_prms);
+
+ ps_curr_out->s_sei.i1_buf_period_params_present_flag = insert_per_irap;
+
+ ihevce_populate_active_parameter_set_sei(
+ &ps_curr_out->s_sei, &ps_enc_ctxt->as_vps[0], &ps_enc_ctxt->as_sps[0]);
+ }
+
+ /* populate Sei picture timing based on encoder configuration and tools */
+ if(ps_enc_ctxt->ps_stat_prms->s_out_strm_prms.i4_sei_pic_timing_flags == 1)
+ {
+ ihevce_populate_picture_timing_sei(
+ &ps_curr_out->s_sei,
+ &ps_enc_ctxt->as_sps[0].s_vui_parameters,
+ &ps_enc_ctxt->s_runtime_src_prms,
+ ps_curr_inp->s_input_buf.i4_bottom_field);
+ ps_curr_out->s_sei.i1_pic_timing_params_present_flag = 1;
+ }
+
+ /* populate Sei recovery point based on encoder configuration and tools */
+ if(ps_enc_ctxt->ps_stat_prms->s_out_strm_prms.i4_sei_recovery_point_flags == 1)
+ {
+ ihevce_populate_recovery_point_sei(
+ &ps_curr_out->s_sei, &ps_enc_ctxt->ps_stat_prms->s_vui_sei_prms);
+ ps_curr_out->s_sei.i1_recovery_point_params_present_flag = insert_per_irap;
+ }
+
+ /* populate mastering_display_colour_volume parameters */
+ if(ps_enc_ctxt->ps_stat_prms->s_out_strm_prms.i4_sei_mastering_disp_colour_vol_flags == 1)
+ {
+ ihevce_populate_mastering_disp_col_vol_sei(
+ &ps_curr_out->s_sei, &ps_enc_ctxt->ps_stat_prms->s_out_strm_prms);
+
+ ps_curr_out->s_sei.i4_sei_mastering_disp_colour_vol_params_present_flags =
+ insert_per_irap;
+ }
+
+ /* populate SEI Hash Flag based on encoder configuration */
+ if(0 != ps_enc_ctxt->ps_stat_prms->s_out_strm_prms.i4_decoded_pic_hash_sei_flag)
+ {
+ /* Sanity checks */
+ ASSERT(0 != ps_enc_ctxt->as_sps[0].i1_chroma_format_idc);
+
+ ASSERT(
+ (0 < ps_enc_ctxt->ps_stat_prms->s_out_strm_prms.i4_decoded_pic_hash_sei_flag) &&
+ (4 > ps_enc_ctxt->ps_stat_prms->s_out_strm_prms.i4_decoded_pic_hash_sei_flag));
+
+ /* MD5 is not supported now! picture_md5[cIdx][i] pblm */
+ ASSERT(1 != ps_enc_ctxt->ps_stat_prms->s_out_strm_prms.i4_decoded_pic_hash_sei_flag);
+
+ ps_curr_out->s_sei.i1_decoded_pic_hash_sei_flag =
+ ps_enc_ctxt->ps_stat_prms->s_out_strm_prms.i4_decoded_pic_hash_sei_flag;
+ }
+ }
+
+ /* For interlace pictures, first_field depends on topfield_first and bottom field */
+ if(i4_field_pic)
+ {
+ first_field =
+ (ps_curr_inp->s_input_buf.i4_topfield_first ^ ps_curr_inp->s_input_buf.i4_bottom_field);
+ }
+
+ /* get frame level lambda params */
+ ihevce_get_frame_lambda_prms(
+ ps_enc_ctxt,
+ ps_curr_out,
+ cur_qp,
+ first_field,
+ ps_curr_inp->s_lap_out.i4_is_ref_pic,
+ ps_curr_inp->s_lap_out.i4_temporal_lyr_id,
+ lamda_modifier_for_I_pic[4] /*mean TRF*/,
+ 0,
+ PRE_ENC_LAMBDA_TYPE);
+ /* Coarse ME and Decomp buffers sharing */
+ {
+ UWORD8 *apu1_lyr_bufs[MAX_NUM_HME_LAYERS];
+ WORD32 ai4_lyr_buf_strd[MAX_NUM_HME_LAYERS];
+
+ /* get the Decomposition frame buffer from ME */
+ i4_decomp_lyrs_idx = ihevce_coarse_me_get_lyr_buf_desc(
+ ps_enc_ctxt->s_module_ctxt.pv_coarse_me_ctxt, &apu1_lyr_bufs[0], &ai4_lyr_buf_strd[0]);
+ /* register the buffers with decomp module along with frame init */
+ ihevce_decomp_pre_intra_frame_init(
+ ps_enc_ctxt->s_module_ctxt.pv_decomp_pre_intra_ctxt,
+ &apu1_lyr_bufs[0],
+ &ai4_lyr_buf_strd[0],
+ ps_curr_out->ps_layer1_buf,
+ ps_curr_out->ps_layer2_buf,
+ ps_curr_out->ps_ed_ctb_l1,
+ ps_curr_out->as_lambda_prms[0].i4_ol_sad_lambda_qf,
+ ps_curr_out->s_slice_hdr.i1_slice_type,
+ ps_curr_out->ps_ctb_analyse);
+ }
+
+ /* -------------------------------------------------------- */
+ /* Preparing Pre encode Passes Job Queue */
+ /* -------------------------------------------------------- */
+ ihevce_prepare_pre_enc_job_queue(ps_enc_ctxt, ps_curr_inp, i4_ping_pong);
+
+ /*assign return variables */
+ *pi4_end_flag_ret = end_flag;
+ *pi4_cur_qp_ret = cur_qp;
+ *pi4_decomp_lyr_idx = i4_decomp_lyrs_idx;
+ //*pps_frm_recon_ret = ps_frm_recon;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_pre_enc_process_frame \endif
+*
+* \brief
+* Frame processing main function
+*
+* \param[in] Encoder context pointer
+* \param[in] Current input buffer params pointer
+* \param[out] Current output buffer params pointer
+* \param[in] Current frame QP
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_pre_enc_process_frame(
+ enc_ctxt_t *ps_enc_ctxt,
+ ihevce_lap_enc_buf_t *ps_curr_inp,
+ pre_enc_me_ctxt_t *ps_curr_out,
+ WORD32 i4_cur_frame_qp,
+ WORD32 i4_thrd_id,
+ WORD32 i4_ping_pong)
+{
+ if(1 == ps_curr_out->i4_frm_proc_valid_flag)
+ {
+ /* ------------------------------------------------------------ */
+ /* Layer Decomp and Intra 4x4 Analysis */
+ /* ------------------------------------------------------------ */
+ ihevce_decomp_pre_intra_process(
+ ps_enc_ctxt->s_module_ctxt.pv_decomp_pre_intra_ctxt,
+ &ps_curr_inp->s_lap_out,
+ &ps_enc_ctxt->s_frm_ctb_prms,
+ &ps_enc_ctxt->s_multi_thrd,
+ i4_thrd_id,
+ i4_ping_pong,
+ ps_curr_out->ps_layer0_cur_satd,
+ ps_curr_out->ps_layer0_cur_mean);
+
+ /* ------------------------------------------------------------ */
+ /* Coarse Motion estimation and early intra-inter decision */
+ /* ------------------------------------------------------------ */
+ ihevce_coarse_me_process(
+ ps_enc_ctxt->s_module_ctxt.pv_coarse_me_ctxt,
+ ps_curr_inp,
+ &ps_enc_ctxt->s_multi_thrd,
+ i4_thrd_id,
+ i4_ping_pong);
+
+ /* ------------------------------------------------------------ */
+ /* Update qp used in based in L1 satd/act in case of scene cut */
+ /* ------------------------------------------------------------ */
+ //ihevce_update_qp_L1_sad_based(ps_enc_ctxt,ps_curr_inp,ps_curr_out);
+
+ /* Calculate the average activity values from the previous frame and
+ these would be used by the current frame*/
+ /*ihevce_decomp_pre_intra_curr_frame_pre_intra_deinit(
+ ps_enc_ctxt->s_module_ctxt.pv_decomp_pre_intra_ctxt,
+ ps_enc_ctxt->s_module_ctxt.pv_ipe_ctxt,
+ i4_thrd_id);*/
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_pre_enc_coarse_me_init \endif
+*
+* \brief
+* set out_buf params
+* Calculate end_flag if flushmode on
+* Slice initialization
+* Populate SIE params
+* reference list creation
+*
+* \param[in] Encoder context pointer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_pre_enc_coarse_me_init(
+ enc_ctxt_t *ps_enc_ctxt,
+ ihevce_lap_enc_buf_t *ps_curr_inp,
+ pre_enc_me_ctxt_t *ps_curr_out,
+ recon_pic_buf_t **pps_frm_recon_ret,
+ WORD32 i4_decomp_lyrs_idx,
+ WORD32 i4_cur_qp,
+ WORD32 i4_ping_pong)
+
+{
+ /* local variables */
+ recon_pic_buf_t *ps_frm_recon;
+ coarse_me_master_ctxt_t *ps_ctxt = NULL;
+ ps_ctxt = (coarse_me_master_ctxt_t *)ps_enc_ctxt->s_module_ctxt.pv_coarse_me_ctxt;
+ /* Reference buffer management and reference list creation for pre enc group */
+ ihevce_pre_enc_manage_ref_pics(ps_enc_ctxt, ps_curr_inp, ps_curr_out, i4_ping_pong);
+
+ /* get a free recon buffer for current picture */
+ {
+ WORD32 ctr;
+
+ ps_frm_recon = NULL;
+ for(ctr = 0; ctr < ps_enc_ctxt->i4_pre_enc_num_buf_recon_q; ctr++)
+ {
+ if(1 == ps_enc_ctxt->pps_pre_enc_recon_buf_q[ctr]->i4_is_free)
+ {
+ ps_frm_recon = ps_enc_ctxt->pps_pre_enc_recon_buf_q[ctr];
+ break;
+ }
+ }
+ }
+ /* should not be NULL */
+ ASSERT(ps_frm_recon != NULL);
+
+ /* populate reference /recon params based on LAP output */
+ ps_frm_recon->i4_is_free = 0;
+ /* top first field is set to 1 by application */
+ ps_frm_recon->i4_topfield_first = ps_curr_inp->s_input_buf.i4_topfield_first;
+ ps_frm_recon->i4_poc = ps_curr_inp->s_lap_out.i4_poc;
+ ps_frm_recon->i4_pic_type = ps_curr_inp->s_lap_out.i4_pic_type;
+ ps_frm_recon->i4_display_num = ps_curr_inp->s_lap_out.i4_display_num;
+ /* bottom field is toggled for every field by application */
+ ps_frm_recon->i4_bottom_field = ps_curr_inp->s_input_buf.i4_bottom_field;
+
+ /* Reference picture property is given by LAP */
+ ps_frm_recon->i4_is_reference = ps_curr_inp->s_lap_out.i4_is_ref_pic;
+
+ /* Deblock a picture for all reference frames unconditionally. */
+ /* Deblock non ref if psnr compute or save recon is enabled */
+ ps_frm_recon->i4_deblk_pad_hpel_cur_pic = ps_frm_recon->i4_is_reference ||
+ (ps_enc_ctxt->ps_stat_prms->i4_save_recon);
+
+ /* set the width, height and stride to defalut values */
+ ps_frm_recon->s_yuv_buf_desc.i4_y_ht = 0;
+ ps_frm_recon->s_yuv_buf_desc.i4_uv_ht = 0;
+ ps_frm_recon->s_yuv_buf_desc.i4_y_wd = 0;
+ ps_frm_recon->s_yuv_buf_desc.i4_uv_wd = 0;
+ ps_frm_recon->s_yuv_buf_desc.i4_y_strd = 0;
+ ps_frm_recon->s_yuv_buf_desc.i4_uv_strd = 0;
+
+ /* register the Layer1 MV bank pointer with ME module */
+ ihevce_coarse_me_set_lyr1_mv_bank(
+ ps_enc_ctxt->s_module_ctxt.pv_coarse_me_ctxt,
+ ps_curr_inp,
+ ps_curr_out->pv_me_mv_bank,
+ ps_curr_out->pv_me_ref_idx,
+ i4_decomp_lyrs_idx);
+
+ /* Coarse picture level init of ME */
+ ihevce_coarse_me_frame_init(
+ ps_enc_ctxt->s_module_ctxt.pv_coarse_me_ctxt,
+ ps_enc_ctxt->ps_stat_prms,
+ &ps_enc_ctxt->s_frm_ctb_prms,
+ &ps_curr_out->as_lambda_prms[0],
+ ps_enc_ctxt->i4_pre_enc_num_ref_l0,
+ ps_enc_ctxt->i4_pre_enc_num_ref_l1,
+ ps_enc_ctxt->i4_pre_enc_num_ref_l0_active,
+ ps_enc_ctxt->i4_pre_enc_num_ref_l1_active,
+ &ps_enc_ctxt->aps_pre_enc_ref_lists[i4_ping_pong][LIST_0][0],
+ &ps_enc_ctxt->aps_pre_enc_ref_lists[i4_ping_pong][LIST_1][0],
+ ps_curr_inp,
+ i4_cur_qp,
+ ps_curr_out->ps_layer1_buf,
+ ps_curr_out->ps_ed_ctb_l1,
+ ps_curr_out->pu1_me_reverse_map_info,
+ ps_curr_inp->s_lap_out.i4_temporal_lyr_id);
+
+ /*assign return variables */
+ *pps_frm_recon_ret = ps_frm_recon;
+}
+
+#define MAX_64BIT_VAL 0x7fffffffffffffff
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_variance_calc_acc_activity \endif
+*
+* \brief
+* Function to calculate modulation based on spatial variance across lap period
+*
+* \param[in] pv_ctxt : pointer to IPE module
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_variance_calc_acc_activity(enc_ctxt_t *ps_enc_ctxt, WORD32 i4_cur_ipe_idx)
+{
+ WORD32 j, i4_k;
+ WORD32 i = 0;
+ WORD32 is_i_frame =
+ ((ps_enc_ctxt->s_multi_thrd.aps_curr_inp_pre_enc[i4_cur_ipe_idx]->s_lap_out.i4_pic_type ==
+ IV_I_FRAME) ||
+ (ps_enc_ctxt->s_multi_thrd.aps_curr_inp_pre_enc[i4_cur_ipe_idx]->s_lap_out.i4_pic_type ==
+ IV_IDR_FRAME));
+
+ WORD32 is_p_frame =
+ (ps_enc_ctxt->s_multi_thrd.aps_curr_inp_pre_enc[i4_cur_ipe_idx]->s_lap_out.i4_pic_type ==
+ IV_P_FRAME);
+
+ WORD32 i4_delay_loop = (ps_enc_ctxt->s_multi_thrd.i4_max_delay_pre_me_btw_l0_ipe + 1);
+ pre_enc_me_ctxt_t *ps_pre_enc_me_ctxt_t;
+ pre_enc_me_ctxt_t *ps_curr_out = ps_enc_ctxt->s_multi_thrd.aps_curr_out_pre_enc[i4_cur_ipe_idx];
+ rc_lap_out_params_t *ps_temp_ipe_rc_lap_out;
+ UWORD8 is_no_scene_change = 1;
+ WORD32 loop_lap2, i4_pass_num;
+ UWORD32 u4_scene_num;
+ i4_pass_num = ps_enc_ctxt->ps_stat_prms->s_pass_prms.i4_pass;
+ ps_temp_ipe_rc_lap_out =
+ &ps_enc_ctxt->s_multi_thrd.aps_curr_inp_pre_enc[i4_cur_ipe_idx]->s_rc_lap_out;
+ ps_curr_out->i8_acc_frame_8x8_sum_act_sqr = 0;
+ ps_curr_out->i8_acc_frame_8x8_sum_act_for_strength = 0;
+ for(i4_k = 0; i4_k < 2; i4_k++)
+ {
+ ps_curr_out->i8_acc_frame_8x8_sum_act[i4_k] = 0;
+ ps_curr_out->i4_acc_frame_8x8_num_blks[i4_k] = 0;
+
+ ps_curr_out->i8_acc_frame_16x16_sum_act[i4_k] = 0;
+ ps_curr_out->i4_acc_frame_16x16_num_blks[i4_k] = 0;
+
+ ps_curr_out->i8_acc_frame_32x32_sum_act[i4_k] = 0;
+ ps_curr_out->i4_acc_frame_32x32_num_blks[i4_k] = 0;
+ }
+ ps_curr_out->i8_acc_frame_16x16_sum_act[i4_k] = 0;
+ ps_curr_out->i4_acc_frame_16x16_num_blks[i4_k] = 0;
+
+ ps_curr_out->i8_acc_frame_32x32_sum_act[i4_k] = 0;
+ ps_curr_out->i4_acc_frame_32x32_num_blks[i4_k] = 0;
+
+ u4_scene_num =
+ ps_enc_ctxt->s_multi_thrd.aps_curr_inp_pre_enc[i4_cur_ipe_idx]->s_lap_out.u4_scene_num;
+
+ //ps_curr_out->i4_acc_frame_median_sum_act = 0;
+
+#if MODULATION_OVER_LAP
+ if(ps_enc_ctxt->s_multi_thrd.i4_delay_pre_me_btw_l0_ipe - 1 <= 0)
+ loop_lap2 = 1;
+ else
+ loop_lap2 = ps_enc_ctxt->s_multi_thrd.i4_delay_pre_me_btw_l0_ipe - 1;
+#else
+ loop_lap2 = 1;
+#endif
+
+ if(ps_temp_ipe_rc_lap_out->ps_rc_lap_out_next_encode == NULL ||
+ ps_temp_ipe_rc_lap_out->i4_is_non_I_scd)
+ {
+ is_no_scene_change = 0;
+ }
+
+ /*Loop over complete lap2 struct ad make sure no scene change occurs in the lap2 frmaes */
+ for(i = 1; i < loop_lap2; i++)
+ {
+ WORD32 i4_temp_ipe_idx = (i4_cur_ipe_idx + i) % i4_delay_loop;
+ if(0 == is_no_scene_change)
+ {
+ loop_lap2 = i;
+ break;
+ }
+ ps_temp_ipe_rc_lap_out =
+ &ps_enc_ctxt->s_multi_thrd.aps_curr_inp_pre_enc[i4_temp_ipe_idx]->s_rc_lap_out;
+
+ /*check if the current frame scene num is same as previous frame scene num */
+ is_no_scene_change = (u4_scene_num == ps_temp_ipe_rc_lap_out->u4_rc_scene_num);
+
+ if(ps_temp_ipe_rc_lap_out->ps_rc_lap_out_next_encode == NULL ||
+ ps_temp_ipe_rc_lap_out->i4_is_non_I_scd)
+ {
+ is_no_scene_change = 0;
+ loop_lap2 = i;
+ break;
+ }
+ }
+
+ /*Only if there is no scene change then process the lap2 for modulation index calcuation */
+ if(((1 == is_i_frame) || (1 == is_p_frame)) &&
+ (1 == is_no_scene_change || (ps_enc_ctxt->i4_active_scene_num != (WORD32)u4_scene_num)))
+ {
+ //do
+ ps_enc_ctxt->i4_active_scene_num = u4_scene_num;
+ for(i = 0; i < loop_lap2; i++)
+ {
+ WORD32 i4_temp_ipe_idx = (i4_cur_ipe_idx + i) % i4_delay_loop;
+ UWORD8 i_frame =
+ ((ps_enc_ctxt->s_multi_thrd.aps_curr_inp_pre_enc[i4_temp_ipe_idx]
+ ->s_lap_out.i4_pic_type == IV_I_FRAME) ||
+ (ps_enc_ctxt->s_multi_thrd.aps_curr_inp_pre_enc[i4_temp_ipe_idx]
+ ->s_lap_out.i4_pic_type == IV_IDR_FRAME));
+ UWORD8 p_frame =
+ (ps_enc_ctxt->s_multi_thrd.aps_curr_inp_pre_enc[i4_temp_ipe_idx]
+ ->s_lap_out.i4_pic_type == IV_P_FRAME);
+
+ ps_pre_enc_me_ctxt_t = ps_enc_ctxt->s_multi_thrd.aps_curr_out_pre_enc[i4_temp_ipe_idx];
+
+ if(1 == (p_frame || i_frame))
+ {
+ ps_curr_out->i8_acc_frame_8x8_sum_act_sqr +=
+ ps_pre_enc_me_ctxt_t->u8_curr_frame_8x8_sum_act_sqr;
+ ps_curr_out->i8_acc_frame_8x8_sum_act_for_strength +=
+ ps_pre_enc_me_ctxt_t->i4_curr_frame_8x8_sum_act_for_strength[0];
+ for(i4_k = 0; i4_k < 2; i4_k++)
+ {
+ ps_curr_out->i8_acc_frame_8x8_sum_act[i4_k] +=
+ ps_pre_enc_me_ctxt_t->i8_curr_frame_8x8_sum_act[i4_k];
+ ps_curr_out->i4_acc_frame_8x8_num_blks[i4_k] +=
+ ps_pre_enc_me_ctxt_t->i4_curr_frame_8x8_num_blks[i4_k];
+
+ ps_curr_out->i8_acc_frame_16x16_sum_act[i4_k] +=
+ ps_pre_enc_me_ctxt_t->i8_curr_frame_16x16_sum_act[i4_k];
+ ps_curr_out->i4_acc_frame_16x16_num_blks[i4_k] +=
+ ps_pre_enc_me_ctxt_t->i4_curr_frame_16x16_num_blks[i4_k];
+
+ ps_curr_out->i8_acc_frame_32x32_sum_act[i4_k] +=
+ ps_pre_enc_me_ctxt_t->i8_curr_frame_32x32_sum_act[i4_k];
+ ps_curr_out->i4_acc_frame_32x32_num_blks[i4_k] +=
+ ps_pre_enc_me_ctxt_t->i4_curr_frame_32x32_num_blks[i4_k];
+ }
+
+ ps_curr_out->i8_acc_frame_16x16_sum_act[i4_k] +=
+ ps_pre_enc_me_ctxt_t->i8_curr_frame_16x16_sum_act[i4_k];
+ ps_curr_out->i4_acc_frame_16x16_num_blks[i4_k] +=
+ ps_pre_enc_me_ctxt_t->i4_curr_frame_16x16_num_blks[i4_k];
+
+ ps_curr_out->i8_acc_frame_32x32_sum_act[i4_k] +=
+ ps_pre_enc_me_ctxt_t->i8_curr_frame_32x32_sum_act[i4_k];
+ ps_curr_out->i4_acc_frame_32x32_num_blks[i4_k] +=
+ ps_pre_enc_me_ctxt_t->i4_curr_frame_32x32_num_blks[i4_k];
+
+ //ps_curr_out->i4_acc_frame_median_sum_act += ps_lap_out->i4_curr_frame_median_sum_act;
+ //ps_curr_out->i4_acc_frame_median_num_blks += ps_lap_out->i4_curr_frame_median_num_blks;
+ }
+
+ //ps_is_next_frame_available = (ihevce_lap_output_params_t *)ps_is_next_frame_available->ps_lap_out_next_encode;
+ if(NULL == ps_enc_ctxt->s_multi_thrd.aps_curr_inp_pre_enc[i4_temp_ipe_idx]
+ ->s_rc_lap_out.ps_rc_lap_out_next_encode)
+ break;
+ } //while(NULL != ps_is_next_frame_available);
+
+ /*calculate corr. average, overwrite frame avg by acc. avergae*/
+ {
+ for(i4_k = 0; i4_k < 2; i4_k++)
+ {
+ if(1 == is_i_frame)
+ {
+ ASSERT(0 != ps_curr_out->i4_acc_frame_8x8_num_blks[i4_k]);
+ ASSERT(0 != ps_curr_out->i4_acc_frame_16x16_num_blks[i4_k]);
+ ASSERT(0 != ps_curr_out->i4_acc_frame_32x32_num_blks[i4_k]);
+ }
+
+ /*In P frame, if no occlusion is present, tehn accumalted avg can be 0*/
+ /*In that case, modulation index is made 1*/
+ if(0 == ps_curr_out->i4_acc_frame_8x8_num_blks[i4_k])
+ {
+ ps_curr_out->i8_curr_frame_8x8_avg_act[i4_k] = 0;
+ }
+ else
+ {
+ ps_curr_out->i8_curr_frame_8x8_sum_act_for_strength =
+ (ps_curr_out->i8_acc_frame_8x8_sum_act_for_strength +
+ (ps_curr_out->i4_acc_frame_8x8_num_blks[i4_k] >> 1)) /
+ ps_curr_out->i4_acc_frame_8x8_num_blks[i4_k];
+ ps_curr_out->i8_curr_frame_8x8_avg_act[i4_k] =
+ (ps_curr_out->i8_acc_frame_8x8_sum_act[i4_k] +
+ (ps_curr_out->i4_acc_frame_8x8_num_blks[i4_k] >> 1)) /
+ ps_curr_out->i4_acc_frame_8x8_num_blks[i4_k];
+ ps_curr_out->ld_curr_frame_8x8_log_avg[i4_k] =
+ (log(1 + (long double)ps_curr_out->i8_curr_frame_8x8_avg_act[i4_k]) /
+ log(2.0));
+ }
+
+ if(0 == ps_curr_out->i4_acc_frame_16x16_num_blks[i4_k])
+ {
+ ps_curr_out->i8_curr_frame_16x16_avg_act[i4_k] = 0;
+ }
+ else
+ {
+ ps_curr_out->i8_curr_frame_16x16_avg_act[i4_k] =
+ (ps_curr_out->i8_acc_frame_16x16_sum_act[i4_k] +
+ (ps_curr_out->i4_acc_frame_16x16_num_blks[i4_k] >> 1)) /
+ ps_curr_out->i4_acc_frame_16x16_num_blks[i4_k];
+ ps_curr_out->ld_curr_frame_16x16_log_avg[i4_k] =
+ (log(1 + (long double)ps_curr_out->i8_curr_frame_16x16_avg_act[i4_k]) /
+ log(2.0));
+ }
+
+ if(0 == ps_curr_out->i4_acc_frame_32x32_num_blks[i4_k])
+ {
+ ps_curr_out->i8_curr_frame_32x32_avg_act[i4_k] = 0;
+ }
+ else
+ {
+ ps_curr_out->i8_curr_frame_32x32_avg_act[i4_k] =
+ (ps_curr_out->i8_acc_frame_32x32_sum_act[i4_k] +
+ (ps_curr_out->i4_acc_frame_32x32_num_blks[i4_k] >> 1)) /
+ ps_curr_out->i4_acc_frame_32x32_num_blks[i4_k];
+ ps_curr_out->ld_curr_frame_32x32_log_avg[i4_k] =
+ (log(1 + (long double)ps_curr_out->i8_curr_frame_32x32_avg_act[i4_k]) /
+ log(2.0));
+ }
+ }
+
+ if(1 == is_i_frame)
+ {
+ ASSERT(0 != ps_curr_out->i4_acc_frame_16x16_num_blks[i4_k]);
+ ASSERT(0 != ps_curr_out->i4_acc_frame_32x32_num_blks[i4_k]);
+ //ASSERT(0 != ps_curr_out->i4_acc_frame_median_num_blks);
+ }
+ if(0 == ps_curr_out->i4_acc_frame_16x16_num_blks[i4_k])
+ {
+ ps_curr_out->i8_curr_frame_16x16_avg_act[i4_k] = 0;
+ }
+ else
+ {
+ ps_curr_out->i8_curr_frame_16x16_avg_act[i4_k] =
+ (ps_curr_out->i8_acc_frame_16x16_sum_act[i4_k] +
+ (ps_curr_out->i4_acc_frame_16x16_num_blks[i4_k] >> 1)) /
+ ps_curr_out->i4_acc_frame_16x16_num_blks[i4_k];
+ ps_curr_out->ld_curr_frame_16x16_log_avg[i4_k] =
+ (log(1 + (long double)ps_curr_out->i8_curr_frame_16x16_avg_act[i4_k]) /
+ log(2.0));
+ }
+
+ if(0 == ps_curr_out->i4_acc_frame_32x32_num_blks[i4_k])
+ {
+ ps_curr_out->i8_curr_frame_32x32_avg_act[i4_k] = 0;
+ }
+ else
+ {
+ ps_curr_out->i8_curr_frame_32x32_avg_act[i4_k] =
+ (ps_curr_out->i8_acc_frame_32x32_sum_act[i4_k] +
+ (ps_curr_out->i4_acc_frame_32x32_num_blks[i4_k] >> 1)) /
+ ps_curr_out->i4_acc_frame_32x32_num_blks[i4_k];
+ ps_curr_out->ld_curr_frame_32x32_log_avg[i4_k] =
+ (log(1 + (long double)ps_curr_out->i8_curr_frame_32x32_avg_act[i4_k]) /
+ log(2.0));
+ }
+ }
+ /*store the avg activity for B pictures*/
+ {
+#if POW_OPT
+ ps_enc_ctxt->ald_lap2_8x8_log_avg_act_from_T0[0] =
+ ps_curr_out->ld_curr_frame_8x8_log_avg[0];
+ ps_enc_ctxt->ald_lap2_8x8_log_avg_act_from_T0[1] =
+ ps_curr_out->ld_curr_frame_8x8_log_avg[1];
+
+ ps_enc_ctxt->ald_lap2_16x16_log_avg_act_from_T0[0] =
+ ps_curr_out->ld_curr_frame_16x16_log_avg[0];
+ ps_enc_ctxt->ald_lap2_16x16_log_avg_act_from_T0[1] =
+ ps_curr_out->ld_curr_frame_16x16_log_avg[1];
+ ps_enc_ctxt->ald_lap2_16x16_log_avg_act_from_T0[2] =
+ ps_curr_out->ld_curr_frame_16x16_log_avg[2];
+
+ ps_enc_ctxt->ald_lap2_32x32_log_avg_act_from_T0[0] =
+ ps_curr_out->ld_curr_frame_32x32_log_avg[0];
+ ps_enc_ctxt->ald_lap2_32x32_log_avg_act_from_T0[1] =
+ ps_curr_out->ld_curr_frame_32x32_log_avg[1];
+ ps_enc_ctxt->ald_lap2_32x32_log_avg_act_from_T0[2] =
+ ps_curr_out->ld_curr_frame_32x32_log_avg[2];
+#else
+ ps_enc_ctxt->ai8_lap2_8x8_avg_act_from_T0[0] =
+ ps_curr_out->i8_curr_frame_8x8_avg_act[0];
+ ps_enc_ctxt->ai8_lap2_8x8_avg_act_from_T0[1] =
+ ps_curr_out->i8_curr_frame_8x8_avg_act[1];
+
+ ps_enc_ctxt->ai8_lap2_16x16_avg_act_from_T0[0] =
+ ps_curr_out->i8_curr_frame_16x16_avg_act[0];
+ ps_enc_ctxt->ai8_lap2_16x16_avg_act_from_T0[1] =
+ ps_curr_out->i8_curr_frame_16x16_avg_act[1];
+ ps_enc_ctxt->ai8_lap2_16x16_avg_act_from_T0[2] =
+ ps_curr_out->i8_curr_frame_16x16_avg_act[2];
+
+ ps_enc_ctxt->ai8_lap2_32x32_avg_act_from_T0[0] =
+ ps_curr_out->i8_curr_frame_32x32_avg_act[0];
+ ps_enc_ctxt->ai8_lap2_32x32_avg_act_from_T0[1] =
+ ps_curr_out->i8_curr_frame_32x32_avg_act[1];
+ ps_enc_ctxt->ai8_lap2_32x32_avg_act_from_T0[2] =
+ ps_curr_out->i8_curr_frame_32x32_avg_act[2];
+#endif
+ }
+ /*Calculte modulation index */
+ {
+ LWORD64 i8_mean, i8_mean_sqr, i8_variance;
+ LWORD64 i8_deviation;
+ WORD32 i4_mod_factor;
+ float f_strength;
+
+ if(ps_curr_out->i4_acc_frame_8x8_num_blks[0] > 0)
+ {
+#if STRENGTH_BASED_ON_CURR_FRM
+ i8_mean_sqr =
+ ((ps_curr_out->i8_curr_frame_8x8_sum_act_sqr +
+ (ps_curr_out->i4_curr_frame_8x8_num_blks[0] >> 1)) /
+ ps_curr_out->i4_curr_frame_8x8_num_blks[0]);
+#else
+ i8_mean_sqr =
+ ((ps_curr_out->i8_acc_frame_8x8_sum_act_sqr +
+ (ps_curr_out->i4_acc_frame_8x8_num_blks[0] >> 1)) /
+ ps_curr_out->i4_acc_frame_8x8_num_blks[0]);
+#endif
+ i8_mean = (ps_curr_out->i8_curr_frame_8x8_sum_act_for_strength);
+
+ i8_variance = i8_mean_sqr - (i8_mean * i8_mean);
+ i8_deviation = (LWORD64)sqrt((long double)i8_variance);
+#if STRENGTH_BASED_ON_DEVIATION
+
+ if(((float)i8_deviation) <= (REF_MOD_DEVIATION))
+ {
+ f_strength =
+ (float)((((float)i8_deviation - (BELOW_REF_DEVIATION)) * REF_MOD_STRENGTH) / ((REF_MOD_DEVIATION) - (BELOW_REF_DEVIATION)));
+ }
+ else
+ {
+ f_strength =
+ (float)((((float)i8_deviation - (ABOVE_REF_DEVIATION)) * REF_MOD_STRENGTH) / ((REF_MOD_DEVIATION) - (ABOVE_REF_DEVIATION)));
+ }
+
+#else
+ f_strength = (((float)((float)i8_mean_sqr / (float)(i8_mean * i8_mean)) - 1.0)) *
+ REF_MOD_STRENGTH / REF_MOD_VARIANCE;
+#endif
+ i4_mod_factor = (WORD32)(i8_deviation / 60);
+
+ f_strength = (float)CLIP3(f_strength, 0.0, REF_MAX_STRENGTH);
+ }
+ else
+ {
+ /*If not sufficient blocks are present, turn modulation index to 1 */
+ i4_mod_factor = 1;
+ f_strength = 0;
+ }
+ ps_curr_out->ai4_mod_factor_derived_by_variance[0] = i4_mod_factor;
+ ps_curr_out->ai4_mod_factor_derived_by_variance[1] = i4_mod_factor;
+ ps_curr_out->f_strength = f_strength;
+
+ if(1 == ps_enc_ctxt->s_runtime_src_prms.i4_field_pic)
+ {
+ /*For Interlace period, store mod factor and strenght if only first field*/
+ if(1 == ps_enc_ctxt->s_multi_thrd.aps_curr_inp_pre_enc[i4_cur_ipe_idx]
+ ->s_lap_out.i4_first_field)
+ {
+ ps_enc_ctxt->ai4_mod_factor_derived_by_variance[0] = i4_mod_factor;
+ ps_enc_ctxt->ai4_mod_factor_derived_by_variance[1] = i4_mod_factor;
+ ps_enc_ctxt->f_strength = f_strength;
+ }
+ }
+ else
+ {
+ ps_enc_ctxt->ai4_mod_factor_derived_by_variance[0] = i4_mod_factor;
+ ps_enc_ctxt->ai4_mod_factor_derived_by_variance[1] = i4_mod_factor;
+ ps_enc_ctxt->f_strength = f_strength;
+ }
+ }
+ }
+ else
+ {
+ ps_curr_out->ai4_mod_factor_derived_by_variance[0] =
+ ps_enc_ctxt->ai4_mod_factor_derived_by_variance[0];
+ ps_curr_out->ai4_mod_factor_derived_by_variance[1] =
+ ps_enc_ctxt->ai4_mod_factor_derived_by_variance[1];
+ ps_curr_out->f_strength = ps_enc_ctxt->f_strength;
+ /*copy the prev avg activity from Tid 0 for B pictures*/
+ {
+#if POW_OPT
+ ps_curr_out->ld_curr_frame_8x8_log_avg[0] =
+ ps_enc_ctxt->ald_lap2_8x8_log_avg_act_from_T0[0];
+ ps_curr_out->ld_curr_frame_8x8_log_avg[1] =
+ ps_enc_ctxt->ald_lap2_8x8_log_avg_act_from_T0[1];
+
+ ps_curr_out->ld_curr_frame_16x16_log_avg[0] =
+ ps_enc_ctxt->ald_lap2_16x16_log_avg_act_from_T0[0];
+ ps_curr_out->ld_curr_frame_16x16_log_avg[1] =
+ ps_enc_ctxt->ald_lap2_16x16_log_avg_act_from_T0[1];
+ ps_curr_out->ld_curr_frame_16x16_log_avg[2] =
+ ps_enc_ctxt->ald_lap2_16x16_log_avg_act_from_T0[2];
+
+ ps_curr_out->ld_curr_frame_32x32_log_avg[0] =
+ ps_enc_ctxt->ald_lap2_32x32_log_avg_act_from_T0[0];
+ ps_curr_out->ld_curr_frame_32x32_log_avg[1] =
+ ps_enc_ctxt->ald_lap2_32x32_log_avg_act_from_T0[1];
+ ps_curr_out->ld_curr_frame_32x32_log_avg[2] =
+ ps_enc_ctxt->ald_lap2_32x32_log_avg_act_from_T0[2];
+#else
+ ps_curr_out->i8_curr_frame_8x8_avg_act[0] =
+ ps_enc_ctxt->ai8_lap2_8x8_avg_act_from_T0[0];
+ ps_curr_out->i8_curr_frame_8x8_avg_act[1] =
+ ps_enc_ctxt->ai8_lap2_8x8_avg_act_from_T0[1];
+
+ ps_curr_out->i8_curr_frame_16x16_avg_act[0] =
+ ps_enc_ctxt->ai8_lap2_16x16_avg_act_from_T0[0];
+ ps_curr_out->i8_curr_frame_16x16_avg_act[1] =
+ ps_enc_ctxt->ai8_lap2_16x16_avg_act_from_T0[1];
+ ps_curr_out->i8_curr_frame_16x16_avg_act[2] =
+ ps_enc_ctxt->ai8_lap2_16x16_avg_act_from_T0[2];
+
+ ps_curr_out->i8_curr_frame_32x32_avg_act[0] =
+ ps_enc_ctxt->ai8_lap2_32x32_avg_act_from_T0[0];
+ ps_curr_out->i8_curr_frame_32x32_avg_act[1] =
+ ps_enc_ctxt->ai8_lap2_32x32_avg_act_from_T0[1];
+ ps_curr_out->i8_curr_frame_32x32_avg_act[2] =
+ ps_enc_ctxt->ai8_lap2_32x32_avg_act_from_T0[2];
+#endif
+ }
+ }
+
+ /*If Compenated block, then CLIP qp to max of frame qp and modulated qp*/
+ {
+ WORD32 ctb_ctr, vert_ctr;
+ WORD32 ctb_ctr_blks = ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_horz;
+ WORD32 vert_ctr_blks = ps_enc_ctxt->s_frm_ctb_prms.i4_num_ctbs_vert;
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_pic_l1 =
+ ps_enc_ctxt->s_multi_thrd.aps_curr_out_pre_enc[i4_cur_ipe_idx]->ps_ed_ctb_l1;
+ WORD32 i4_pic_type =
+ ps_enc_ctxt->s_multi_thrd.aps_curr_inp_pre_enc[i4_cur_ipe_idx]->s_lap_out.i4_pic_type;
+ for(vert_ctr = 0; vert_ctr < vert_ctr_blks; vert_ctr++)
+ {
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_row_l1 = ps_ed_ctb_pic_l1 + vert_ctr * ctb_ctr_blks;
+
+ for(ctb_ctr = 0; ctb_ctr < ctb_ctr_blks; ctb_ctr++)
+ {
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_curr_l1 = ps_ed_ctb_row_l1 + ctb_ctr;
+
+ WORD32 is_min_block_comensated_in_l32x32 = 0;
+
+ /*Populate avg satd to calculate MI and activity factors*/
+ for(i = 0; i < 4; i++)
+ {
+ WORD32 is_min_block_comensated_in_l116x16 = 0;
+
+ for(j = 0; j < 4; j++)
+ {
+ /*Accumulate the sum of 8*8 activities in the current layer (16*16 CU in L0)*/
+ if(ps_ed_ctb_curr_l1->i4_sum_4x4_satd[i * 4 + j] != -1)
+ {
+ WORD32 is_skipped = 0;
+ if((i4_pic_type != IV_I_FRAME) && (i4_pic_type != IV_IDR_FRAME) &&
+ (1 == is_skipped))
+ {
+ is_min_block_comensated_in_l116x16 += 1;
+ is_min_block_comensated_in_l32x32 += 1;
+
+ if(ps_ed_ctb_curr_l1->i4_8x8_satd[i * 4 + j][0] <
+ ps_curr_out->i8_curr_frame_8x8_avg_act[0])
+ ps_ed_ctb_curr_l1->i4_8x8_satd[i * 4 + j][0] = -1;
+
+ if(ps_ed_ctb_curr_l1->i4_8x8_satd[i * 4 + j][1] <
+ ps_curr_out->i8_curr_frame_8x8_avg_act[1])
+ ps_ed_ctb_curr_l1->i4_8x8_satd[i * 4 + j][1] = -1;
+ }
+ }
+ }
+
+ if(4 == is_min_block_comensated_in_l116x16)
+ {
+ if(ps_ed_ctb_curr_l1->i4_16x16_satd[i][0] <
+ ps_curr_out->i8_curr_frame_16x16_avg_act[0])
+ ps_ed_ctb_curr_l1->i4_16x16_satd[i][0] = -1;
+
+ if(ps_ed_ctb_curr_l1->i4_16x16_satd[i][1] <
+ ps_curr_out->i8_curr_frame_16x16_avg_act[1])
+ ps_ed_ctb_curr_l1->i4_16x16_satd[i][1] = -1;
+
+ if(ps_ed_ctb_curr_l1->i4_16x16_satd[i][2] <
+ ps_curr_out->i8_curr_frame_16x16_avg_act[2])
+ ps_ed_ctb_curr_l1->i4_16x16_satd[i][2] = -1;
+ }
+ }
+
+ if((16 == is_min_block_comensated_in_l32x32))
+ {
+ if(ps_ed_ctb_curr_l1->i4_32x32_satd[0][0] <
+ ps_curr_out->i8_curr_frame_32x32_avg_act[0])
+ ps_ed_ctb_curr_l1->i4_32x32_satd[0][0] = -1;
+
+ if(ps_ed_ctb_curr_l1->i4_32x32_satd[0][1] <
+ ps_curr_out->i8_curr_frame_32x32_avg_act[1])
+ ps_ed_ctb_curr_l1->i4_32x32_satd[0][1] = -1;
+
+ if(ps_ed_ctb_curr_l1->i4_32x32_satd[0][2] <
+ ps_curr_out->i8_curr_frame_32x32_avg_act[2])
+ ps_ed_ctb_curr_l1->i4_32x32_satd[0][2] = -1;
+ }
+ }
+ }
+ }
+
+ /**/
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_pre_enc_process_frame_thrd \endif
+*
+* \brief
+* Pre-Encode Frame processing thread interface function
+*
+* \param[in] High level encoder context pointer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32 ihevce_pre_enc_process_frame_thrd(void *pv_frm_proc_thrd_ctxt)
+{
+ frm_proc_thrd_ctxt_t *ps_thrd_ctxt = (frm_proc_thrd_ctxt_t *)pv_frm_proc_thrd_ctxt;
+ ihevce_hle_ctxt_t *ps_hle_ctxt = ps_thrd_ctxt->ps_hle_ctxt;
+ enc_ctxt_t *ps_enc_ctxt = (enc_ctxt_t *)ps_thrd_ctxt->pv_enc_ctxt;
+ multi_thrd_ctxt_t *ps_multi_thrd = &ps_enc_ctxt->s_multi_thrd;
+ WORD32 i4_thrd_id = ps_thrd_ctxt->i4_thrd_id;
+ WORD32 i4_resolution_id = ps_enc_ctxt->i4_resolution_id;
+ WORD32 i4_end_flag = 0;
+ WORD32 i4_out_flush_flag = 0;
+ WORD32 i4_cur_decomp_idx = 0;
+ WORD32 i4_cur_coarse_me_idx = 0;
+ WORD32 i4_cur_ipe_idx = 0;
+ ihevce_lap_enc_buf_t *ps_lap_inp_buf = NULL;
+ void *pv_dep_mngr_prev_frame_pre_enc_l1 = ps_multi_thrd->pv_dep_mngr_prev_frame_pre_enc_l1;
+ void *pv_dep_mngr_prev_frame_pre_enc_l0 = ps_multi_thrd->pv_dep_mngr_prev_frame_pre_enc_l0;
+ void *pv_dep_mngr_prev_frame_pre_enc_coarse_me =
+ ps_multi_thrd->pv_dep_mngr_prev_frame_pre_enc_coarse_me;
+ WORD32 i4_num_buf_prod_for_l0_ipe = 0;
+ WORD32 i4_decomp_end_flag = 0;
+
+ (void)ps_hle_ctxt;
+ (void)i4_resolution_id;
+ if(i4_thrd_id == 0)
+ {
+ PROFILE_START(&ps_hle_ctxt->profile_pre_enc[i4_resolution_id]);
+ }
+
+ /* ---------- Processing Loop until Flush command is received --------- */
+ while(0 == i4_end_flag)
+ {
+ /* Wait till previous frame(instance)'s decomp_intra is processed */
+ {
+ ihevce_dmgr_chk_frm_frm_sync(pv_dep_mngr_prev_frame_pre_enc_l1, i4_thrd_id);
+ }
+
+ /* ----------------------------------------------------------- */
+ /* decomp pre_intra init */
+ /* ----------------------------------------------------------- */
+
+ /****** Lock the critical section for decomp pre_intra init ******/
+ {
+ WORD32 i4_status;
+
+ i4_status = osal_mutex_lock(ps_multi_thrd->pv_mutex_hdl_pre_enc_init);
+ if(OSAL_SUCCESS != i4_status)
+ return 0;
+ }
+
+ ps_multi_thrd->ai4_decomp_coarse_me_complete_flag[i4_cur_decomp_idx] = 0;
+
+ /* init */
+ if((ps_multi_thrd->ai4_pre_enc_init_done[i4_cur_decomp_idx] == 0) &&
+ (0 == i4_decomp_end_flag))
+ {
+ ihevce_lap_enc_buf_t *ps_curr_inp = NULL;
+ pre_enc_me_ctxt_t *ps_curr_out = NULL;
+ WORD32 in_buf_id;
+ WORD32 out_buf_id;
+
+ do
+ {
+ ps_lap_inp_buf = NULL;
+ if(0 == ps_multi_thrd->i4_last_inp_buf)
+ {
+ /* ------- get input buffer input data que ---------- */
+ ps_lap_inp_buf = (ihevce_lap_enc_buf_t *)ihevce_q_get_filled_buff(
+ (void *)ps_enc_ctxt,
+ IHEVCE_INPUT_DATA_CTRL_Q,
+ &in_buf_id,
+ BUFF_QUE_BLOCKING_MODE);
+ ps_multi_thrd->i4_last_inp_buf = ihevce_check_last_inp_buf(
+ (WORD32 *)ps_lap_inp_buf->s_input_buf.pv_synch_ctrl_bufs);
+ }
+
+ ps_curr_inp =
+ ihevce_lap_process(ps_enc_ctxt->pv_lap_interface_ctxt, ps_lap_inp_buf);
+
+ } while(NULL == ps_curr_inp);
+
+ /* set the flag saying init is done so that other cores dont do it */
+ ps_multi_thrd->ai4_pre_enc_init_done[i4_cur_decomp_idx] = 1;
+
+ ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_decomp_idx] = ps_curr_inp;
+ ps_multi_thrd->ai4_in_buf_id_pre_enc[i4_cur_decomp_idx] =
+ ps_curr_inp->s_input_buf.i4_buf_id;
+
+ /* ------- get free output buffer from pre-enc/enc buffer que ---------- */
+ ps_curr_out = (pre_enc_me_ctxt_t *)ihevce_q_get_free_buff(
+ (void *)ps_enc_ctxt, IHEVCE_PRE_ENC_ME_Q, &out_buf_id, BUFF_QUE_BLOCKING_MODE);
+ ps_multi_thrd->aps_curr_out_pre_enc[i4_cur_decomp_idx] = ps_curr_out;
+ ps_multi_thrd->ai4_out_buf_id_pre_enc[i4_cur_decomp_idx] = out_buf_id;
+
+ if((NULL != ps_curr_inp) && (NULL != ps_curr_out))
+ {
+ /* by default last picture to be encoded flag is set to 0 */
+ /* this flag will be used by slave threads to exit at the end */
+ ps_multi_thrd->i4_last_pic_flag = 0;
+
+ /* store the buffer id */
+ ps_curr_out->i4_buf_id = out_buf_id;
+
+ ps_curr_out->i8_acc_num_blks_high_sad = 0;
+ ps_curr_out->i8_total_blks = 0;
+ ps_curr_out->i4_is_high_complex_region = -1;
+
+ /* set the parameters for sync b/w pre-encode and encode threads */
+ ps_curr_out->i4_end_flag = ps_curr_inp->s_lap_out.i4_end_flag;
+ ps_curr_out->i4_frm_proc_valid_flag = 1;
+ if(ps_curr_out->i4_end_flag)
+ {
+ ps_curr_out->i4_frm_proc_valid_flag =
+ ps_curr_inp->s_input_buf.i4_inp_frm_data_valid_flag;
+ ps_multi_thrd->i4_last_pic_flag = 1;
+ ps_multi_thrd->ai4_end_flag_pre_enc[i4_cur_decomp_idx] = 1;
+ }
+ if(ps_curr_inp->s_lap_out.i4_out_flush_flag)
+ {
+ ps_curr_out->i4_frm_proc_valid_flag =
+ ps_curr_inp->s_input_buf.i4_inp_frm_data_valid_flag;
+ }
+
+ /* do the init processing if input frm data is valid */
+ if(1 == ps_curr_inp->s_input_buf.i4_inp_frm_data_valid_flag)
+ {
+ WORD32 end_flag = ps_multi_thrd->ai4_end_flag_pre_enc[i4_cur_decomp_idx];
+ WORD32 cur_qp = 0, count;
+
+ ihevce_pre_enc_init(
+ ps_enc_ctxt,
+ ps_curr_inp,
+ ps_curr_out,
+ &end_flag,
+ &cur_qp,
+ &ps_multi_thrd->ai4_decomp_lyr_buf_idx[i4_cur_decomp_idx],
+ i4_cur_decomp_idx);
+
+ ps_multi_thrd->ai4_end_flag_pre_enc[i4_cur_decomp_idx] = end_flag;
+ ps_multi_thrd->ai4_cur_frame_qp_pre_enc[i4_cur_decomp_idx] = cur_qp;
+
+ for(count = 0; count < ((HEVCE_MAX_HEIGHT >> 1) / 8); count++)
+ {
+ ps_multi_thrd->aai4_l1_pre_intra_done[i4_cur_decomp_idx][count] = 0;
+ }
+ }
+ }
+ }
+ else if(1 == i4_decomp_end_flag)
+ {
+ /* Once end is reached all subsequent flags are set to 1 to indicate end */
+ ps_multi_thrd->ai4_end_flag_pre_enc[i4_cur_decomp_idx] = 1;
+ }
+
+ /****** UnLock the critical section after decomp pre_intra init ******/
+ {
+ WORD32 i4_status;
+ i4_status = osal_mutex_unlock(ps_multi_thrd->pv_mutex_hdl_pre_enc_init);
+
+ if(OSAL_SUCCESS != i4_status)
+ return 0;
+ }
+
+ /* ------------------------------------------------------------ */
+ /* Layer Decomp and Pre Intra Analysis */
+ /* ------------------------------------------------------------ */
+ if(0 == i4_decomp_end_flag)
+ {
+ pre_enc_me_ctxt_t *ps_curr_out = ps_multi_thrd->aps_curr_out_pre_enc[i4_cur_decomp_idx];
+
+ if(1 == ps_curr_out->i4_frm_proc_valid_flag)
+ {
+ ihevce_decomp_pre_intra_process(
+ ps_enc_ctxt->s_module_ctxt.pv_decomp_pre_intra_ctxt,
+ &ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_decomp_idx]->s_lap_out,
+ &ps_enc_ctxt->s_frm_ctb_prms,
+ ps_multi_thrd,
+ i4_thrd_id,
+ i4_cur_decomp_idx,
+ ps_curr_out->ps_layer0_cur_satd,
+ ps_curr_out->ps_layer0_cur_mean);
+ }
+ }
+
+ /* ------------------------------------------------------------ */
+ /* Layer Decomp and Pre Intra Deinit */
+ /* ------------------------------------------------------------ */
+
+ /****** Lock the critical section for decomp deinit ******/
+ {
+ WORD32 i4_status;
+ i4_status = osal_mutex_lock(ps_multi_thrd->pv_mutex_hdl_pre_enc_decomp_deinit);
+
+ if(OSAL_SUCCESS != i4_status)
+ return 0;
+ }
+
+ ps_multi_thrd->ai4_num_thrds_processed_decomp[i4_cur_decomp_idx]++;
+ i4_decomp_end_flag = ps_multi_thrd->ai4_end_flag_pre_enc[i4_cur_decomp_idx];
+
+ /* check for last thread condition */
+ if(ps_multi_thrd->ai4_num_thrds_processed_decomp[i4_cur_decomp_idx] ==
+ ps_multi_thrd->i4_num_pre_enc_proc_thrds)
+ {
+ ps_multi_thrd->ai4_num_thrds_processed_decomp[i4_cur_decomp_idx] = 0;
+
+ /* reset the init flag so that init happens by the first thread for the next frame
+ of same ping_pong instance */
+ ps_multi_thrd->ai4_pre_enc_init_done[i4_cur_decomp_idx] = 0;
+
+ /* update the pre enc l1 done in dep manager */
+ ihevce_dmgr_update_frm_frm_sync(pv_dep_mngr_prev_frame_pre_enc_l1);
+ }
+
+ /* index increment */
+ i4_cur_decomp_idx = i4_cur_decomp_idx + 1;
+
+ /* wrap around case */
+ if(i4_cur_decomp_idx == (MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME))
+ {
+ i4_cur_decomp_idx = 0;
+ }
+
+ /****** UnLock the critical section after decomp pre_intra deinit ******/
+ {
+ WORD32 i4_status;
+ i4_status = osal_mutex_unlock(ps_multi_thrd->pv_mutex_hdl_pre_enc_decomp_deinit);
+
+ if(OSAL_SUCCESS != i4_status)
+ return 0;
+ }
+
+ /* ------------------------------------------------------------ */
+ /* HME Init */
+ /* ------------------------------------------------------------ */
+
+ /* Wait till previous frame(instance)'s coarse_me is processed */
+ {
+ ihevce_dmgr_chk_frm_frm_sync(pv_dep_mngr_prev_frame_pre_enc_coarse_me, i4_thrd_id);
+ }
+
+ /****** Lock the critical section for hme init ******/
+ {
+ WORD32 i4_status;
+
+ i4_status = osal_mutex_lock(ps_multi_thrd->pv_mutex_hdl_pre_enc_hme_init);
+ if(OSAL_SUCCESS != i4_status)
+ return 0;
+ }
+
+ if(0 == ps_multi_thrd->ai4_pre_enc_hme_init_done[i4_cur_coarse_me_idx])
+ {
+ /* do the init processing if input frm data is valid */
+ if(1 ==
+ ps_multi_thrd->aps_curr_out_pre_enc[i4_cur_coarse_me_idx]->i4_frm_proc_valid_flag)
+ {
+ recon_pic_buf_t *ps_frm_recon = NULL;
+
+ /* DPB management for coarse me + HME init */
+ ihevce_pre_enc_coarse_me_init(
+ ps_enc_ctxt,
+ ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_coarse_me_idx],
+ ps_multi_thrd->aps_curr_out_pre_enc[i4_cur_coarse_me_idx],
+ &ps_frm_recon,
+ ps_multi_thrd->ai4_decomp_lyr_buf_idx[i4_cur_coarse_me_idx],
+ ps_multi_thrd->ai4_cur_frame_qp_pre_enc[i4_cur_coarse_me_idx],
+ i4_cur_coarse_me_idx);
+
+ /* store the recon buffer pointer */
+ ps_multi_thrd->aps_frm_recon_pre_enc[i4_cur_coarse_me_idx] = ps_frm_recon;
+ }
+
+ ps_multi_thrd->ai4_pre_enc_hme_init_done[i4_cur_coarse_me_idx] = 1;
+ }
+
+ /****** Unlock the critical section for hme init ******/
+ {
+ WORD32 i4_status;
+
+ i4_status = osal_mutex_unlock(ps_multi_thrd->pv_mutex_hdl_pre_enc_hme_init);
+ if(OSAL_SUCCESS != i4_status)
+ return 0;
+ }
+
+ /* ------------------------------------------------------------ */
+ /* Coarse Motion estimation and early intra-inter decision */
+ /* ------------------------------------------------------------ */
+ if(1 == ps_multi_thrd->aps_curr_out_pre_enc[i4_cur_coarse_me_idx]->i4_frm_proc_valid_flag)
+ {
+ ihevce_coarse_me_process(
+ ps_enc_ctxt->s_module_ctxt.pv_coarse_me_ctxt,
+ ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_coarse_me_idx],
+ &ps_enc_ctxt->s_multi_thrd,
+ i4_thrd_id,
+ i4_cur_coarse_me_idx);
+ }
+
+ /* update the end flag */
+ i4_end_flag = ps_multi_thrd->ai4_end_flag_pre_enc[i4_cur_coarse_me_idx];
+ i4_out_flush_flag =
+ ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_coarse_me_idx]->s_lap_out.i4_out_flush_flag;
+
+ /****** Lock the critical section for hme deinit ******/
+ {
+ WORD32 i4_status;
+ i4_status = osal_mutex_lock(ps_multi_thrd->pv_mutex_hdl_pre_enc_hme_deinit);
+
+ if(OSAL_SUCCESS != i4_status)
+ return 0;
+ }
+
+ /* last thread finishing pre_enc_process will update the flag indicating
+ decomp and coarse ME is done. So that the next frame (next ping_pong instance)
+ can start immediately after finishing current frame's IPE */
+ if(1 == ps_multi_thrd->aps_curr_out_pre_enc[i4_cur_coarse_me_idx]->i4_frm_proc_valid_flag)
+ {
+ ps_multi_thrd->ai4_num_thrds_processed_coarse_me[i4_cur_coarse_me_idx]++;
+
+ /* ------------------------------------------------------------ */
+ /* Update qp used in based in L1 satd/act in case of scene cut */
+ /* ------------------------------------------------------------ */
+ {
+ ihevce_lap_enc_buf_t *ps_curr_inp =
+ ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_coarse_me_idx];
+
+ if(1 == ps_curr_inp->s_input_buf.i4_inp_frm_data_valid_flag)
+ {
+ WORD32 i4_prev_coarse_me_idx;
+
+ /* wrap around case */
+ if(i4_cur_coarse_me_idx == 0)
+ {
+ i4_prev_coarse_me_idx = MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME - 1;
+ }
+ else
+ {
+ i4_prev_coarse_me_idx = i4_cur_coarse_me_idx - 1;
+ }
+
+ ihevce_update_qp_L1_sad_based(
+ ps_enc_ctxt,
+ ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_coarse_me_idx],
+ ps_multi_thrd->aps_curr_inp_pre_enc[i4_prev_coarse_me_idx],
+ ps_multi_thrd->aps_curr_out_pre_enc[i4_cur_coarse_me_idx],
+ ((ps_multi_thrd->ai4_num_thrds_processed_coarse_me[i4_cur_coarse_me_idx] ==
+ ps_multi_thrd->i4_num_pre_enc_proc_thrds)));
+ }
+ }
+ /* check for last thread condition */
+ if(ps_multi_thrd->ai4_num_thrds_processed_coarse_me[i4_cur_coarse_me_idx] ==
+ ps_multi_thrd->i4_num_pre_enc_proc_thrds)
+ {
+ ihevce_lap_enc_buf_t *ps_curr_inp =
+ ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_coarse_me_idx];
+
+ /* Frame END processing */
+ ihevce_coarse_me_frame_end(ps_enc_ctxt->s_module_ctxt.pv_coarse_me_ctxt);
+
+ if(1 == ps_curr_inp->s_input_buf.i4_inp_frm_data_valid_flag)
+ {
+ WORD32 i4_enable_noise_detection = 0;
+ WORD32 i4_vqet = ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_vqet;
+
+ if(i4_vqet & (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER))
+ {
+ if(i4_vqet & (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION))
+ {
+ i4_enable_noise_detection = 1;
+ }
+ }
+
+ if(1 != ((ps_curr_inp->s_lap_out.i4_pic_type == IV_B_FRAME) &&
+ (ps_enc_ctxt->s_lap_stat_prms.ai4_quality_preset[i4_resolution_id] ==
+ IHEVCE_QUALITY_P6)))
+ {
+ ihevce_decomp_pre_intra_curr_frame_pre_intra_deinit(
+ ps_enc_ctxt->s_module_ctxt.pv_decomp_pre_intra_ctxt,
+ ps_multi_thrd->aps_curr_out_pre_enc[i4_cur_coarse_me_idx],
+ 1,
+ &ps_enc_ctxt->s_frm_ctb_prms,
+ ps_curr_inp->s_lap_out.i4_temporal_lyr_id,
+ i4_enable_noise_detection);
+ }
+ }
+
+ ps_multi_thrd->ai4_decomp_coarse_me_complete_flag[i4_cur_coarse_me_idx] = 1;
+
+ ps_multi_thrd->ai4_num_thrds_processed_coarse_me[i4_cur_coarse_me_idx] = 0;
+
+ /* get the layer 1 ctxt to be passed on to encode group */
+ ihevce_coarse_me_get_lyr1_ctxt(
+ ps_enc_ctxt->s_module_ctxt.pv_coarse_me_ctxt,
+ ps_multi_thrd->aps_curr_out_pre_enc[i4_cur_coarse_me_idx]->pv_me_lyr_ctxt,
+ ps_multi_thrd->aps_curr_out_pre_enc[i4_cur_coarse_me_idx]->pv_me_lyr_bnk_ctxt);
+
+ /* reset the init flag so that init happens by the first thread for the next frame
+ of same ping_pong instance */
+ ps_multi_thrd->ai4_pre_enc_hme_init_done[i4_cur_coarse_me_idx] = 0;
+
+ /* update the pre enc l1 done in dep manager */
+ ihevce_dmgr_update_frm_frm_sync(pv_dep_mngr_prev_frame_pre_enc_coarse_me);
+ }
+
+ i4_num_buf_prod_for_l0_ipe++;
+
+ /* index increment */
+ i4_cur_coarse_me_idx = i4_cur_coarse_me_idx + 1;
+
+ /* wrap around case */
+ if(i4_cur_coarse_me_idx == (MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME))
+ {
+ i4_cur_coarse_me_idx = 0;
+ }
+ }
+ else
+ {
+ /* for invalid frame set the processed flag to 1 for L0 IPE */
+ ps_multi_thrd->ai4_decomp_coarse_me_complete_flag[i4_cur_coarse_me_idx] = 1;
+
+ if(1 == i4_out_flush_flag)
+ {
+ /* update the num thrds who have finished pre-enc processing */
+ ps_multi_thrd->ai4_num_thrds_processed_coarse_me[i4_cur_coarse_me_idx]++;
+
+ if(ps_multi_thrd->ai4_num_thrds_processed_coarse_me[i4_cur_coarse_me_idx] ==
+ ps_multi_thrd->i4_num_pre_enc_proc_thrds)
+ {
+ ps_multi_thrd->ai4_decomp_coarse_me_complete_flag[i4_cur_coarse_me_idx] = 1;
+
+ /* reset num thread finished counter */
+ ps_multi_thrd->ai4_num_thrds_processed_coarse_me[i4_cur_coarse_me_idx] = 0;
+
+ ps_multi_thrd->ai4_pre_enc_hme_init_done[i4_cur_coarse_me_idx] = 0;
+
+ /* update flag indicating coarse_me and decomp is done */
+ ihevce_dmgr_update_frm_frm_sync(pv_dep_mngr_prev_frame_pre_enc_coarse_me);
+ }
+ }
+
+ i4_num_buf_prod_for_l0_ipe++;
+
+ /* index increment */
+ i4_cur_coarse_me_idx = i4_cur_coarse_me_idx + 1;
+
+ /* wrap around case */
+ if(i4_cur_coarse_me_idx == (MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME))
+ {
+ i4_cur_coarse_me_idx = 0;
+ }
+ }
+
+ /****** UnLock the critical section after hme deinit ******/
+ {
+ WORD32 i4_status;
+ i4_status =
+ osal_mutex_unlock(ps_enc_ctxt->s_multi_thrd.pv_mutex_hdl_pre_enc_hme_deinit);
+
+ if(OSAL_SUCCESS != i4_status)
+ return 0;
+ }
+
+ /* ----------------------------------------------------------- */
+ /* IPE init and process */
+ /* ----------------------------------------------------------- */
+
+ if(i4_num_buf_prod_for_l0_ipe >= ps_multi_thrd->i4_delay_pre_me_btw_l0_ipe || i4_end_flag ||
+ i4_out_flush_flag)
+ {
+ do
+ {
+ /* Wait till previous frame(instance)'s IPE is processed */
+ {
+ ihevce_dmgr_chk_frm_frm_sync(pv_dep_mngr_prev_frame_pre_enc_l0, i4_thrd_id);
+ }
+
+ /* Wait till current frame(instance)'s L1 and below layers are processed */
+ {
+ volatile WORD32 *pi4_cur_l1_complete =
+ &ps_multi_thrd->ai4_decomp_coarse_me_complete_flag[i4_cur_ipe_idx];
+
+ while(1)
+ {
+ if(*pi4_cur_l1_complete)
+ break;
+ }
+ }
+
+ /* ----------------------------------------------------------- */
+ /* L0 IPE qp init */
+ /* ----------------------------------------------------------- */
+
+ /****** Lock the critical section for init ******/
+ {
+ WORD32 i4_status;
+ i4_status = osal_mutex_lock(ps_multi_thrd->pv_mutex_hdl_l0_ipe_init);
+
+ if(OSAL_SUCCESS != i4_status)
+ return 0;
+ }
+
+ /* first thread that enters will calculate qp and write that to shared variable
+ that will be accessed by other threads */
+ if(ps_multi_thrd->ai4_num_thrds_processed_L0_ipe_qp_init[i4_cur_ipe_idx] == 0)
+ {
+ volatile WORD32 i4_is_qp_valid = -1;
+ WORD32 i4_update_qp;
+ WORD32 i4_cur_q_scale;
+
+ i4_cur_q_scale =
+ ps_multi_thrd->aps_curr_out_pre_enc[i4_cur_ipe_idx]->i4_curr_frm_qp;
+ i4_cur_q_scale = ps_enc_ctxt->s_rc_quant.pi4_qp_to_qscale[i4_cur_q_scale];
+ i4_cur_q_scale = (i4_cur_q_scale + (1 << (QSCALE_Q_FAC_3 - 1))) >>
+ QSCALE_Q_FAC_3;
+ /* Get free buffer to store L0 IPE output to enc loop */
+ ps_multi_thrd->ps_L0_IPE_curr_out_pre_enc =
+ (pre_enc_L0_ipe_encloop_ctxt_t *)ihevce_q_get_free_buff(
+ (void *)ps_enc_ctxt,
+ IHEVCE_L0_IPE_ENC_Q,
+ &ps_multi_thrd->i4_L0_IPE_out_buf_id,
+ BUFF_QUE_BLOCKING_MODE);
+ if(ps_enc_ctxt->ps_stat_prms->s_pass_prms.i4_pass != 2 &&
+ ps_enc_ctxt->ps_stat_prms->s_config_prms.i4_rate_control_mode != 3)
+ {
+ complexity_RC_reset_marking(
+ ps_enc_ctxt, i4_cur_ipe_idx, (i4_end_flag || i4_out_flush_flag));
+ }
+ if(1 == ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]
+ ->s_input_buf.i4_inp_frm_data_valid_flag)
+ {
+ while(i4_is_qp_valid == -1)
+ {
+ /*this rate control call is outside mutex lock to avoid deadlock. If this acquires mutex lock enc will not be able to
+ populate qp*/
+ i4_is_qp_valid = ihevce_rc_check_is_pre_enc_qp_valid(
+ (void *)ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[0],
+ (volatile WORD32 *)&ps_enc_ctxt->s_multi_thrd.i4_force_end_flag);
+ if(1 == ps_enc_ctxt->s_multi_thrd.i4_force_end_flag)
+ {
+ /*** For force end condition break from this loop ***/
+ i4_is_qp_valid = 1;
+ break;
+ }
+ }
+
+ /*lock rate control context*/
+ osal_mutex_lock(ps_enc_ctxt->pv_rc_mutex_lock_hdl);
+
+ /* Qp query has to happen irrespective of using it or not since producer consumer logic will be disturbed */
+ i4_update_qp = ihevce_rc_pre_enc_qp_query(
+ (void *)ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[0],
+ &ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]->s_rc_lap_out,
+ 0);
+
+ if(ps_enc_ctxt->ps_stat_prms->s_config_prms.i4_rate_control_mode != 3)
+ {
+ ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]
+ ->s_rc_lap_out.i8_frm_satd_act_accum_L0_frm_L1 =
+ ihevce_get_L0_satd_based_on_L1(
+ ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]
+ ->s_rc_lap_out.i8_frame_satd_by_act_L1_accum,
+ ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]
+ ->s_rc_lap_out.i4_num_pels_in_frame_considered,
+ i4_cur_q_scale);
+
+ if(ps_enc_ctxt->ps_stat_prms->s_pass_prms.i4_pass != 2)
+ {
+ if(ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]
+ ->s_rc_lap_out.i4_rc_scene_type ==
+ SCENE_TYPE_SCENE_CUT ||
+ ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]
+ ->s_rc_lap_out.i4_is_I_only_scd ||
+ ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]
+ ->s_rc_lap_out.i4_is_non_I_scd == 1)
+ {
+ float i_to_avg_rest_ratio;
+ WORD32 i4_count = 0;
+
+ while(1)
+ {
+ i_to_avg_rest_ratio = ihevce_get_i_to_avg_ratio(
+ ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[0],
+ &ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]
+ ->s_rc_lap_out,
+ 1,
+ 0,
+ 0,
+ ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]
+ ->s_rc_lap_out.ai4_offsets,
+ 0);
+ /* HEVC_RC query rate control for qp */
+ i4_update_qp = ihevce_get_L0_est_satd_based_scd_qp(
+ ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[0],
+ &ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]
+ ->s_rc_lap_out,
+ ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]
+ ->s_rc_lap_out.i8_frm_satd_act_accum_L0_frm_L1,
+ i_to_avg_rest_ratio);
+
+ ihevce_set_L0_scd_qp(
+ ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[0],
+ i4_update_qp);
+
+ if(ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]
+ ->s_lap_out.i4_pic_type != IV_IDR_FRAME &&
+ ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]
+ ->s_lap_out.i4_pic_type != IV_I_FRAME)
+ {
+ i4_update_qp +=
+ ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]
+ ->s_lap_out.i4_temporal_lyr_id +
+ 1;
+
+ i4_update_qp =
+ CLIP3(i4_update_qp, MIN_HEVC_QP, MAX_HEVC_QP);
+ }
+
+ i4_count++;
+ if((i4_update_qp ==
+ ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]
+ ->s_rc_lap_out.i4_L0_qp) ||
+ i4_count > 4)
+ break;
+
+ ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]
+ ->s_rc_lap_out.i4_L0_qp = i4_update_qp;
+ }
+ }
+ }
+ else
+ {
+ //i4_update_qp = ihevce_get_first_pass_qp(ps_enc_ctxt->s_multi_thrd.aps_curr_inp_pre_enc[i4_cur_ipe_idx]->s_lap_out.pv_frame_info);
+ i4_update_qp = ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]
+ ->s_rc_lap_out.ps_frame_info->i4_rc_hevc_qp;
+ }
+ }
+
+ {
+ WORD32 i4_index = 0;
+ rc_lap_out_params_t *ps_rc_lap_temp =
+ &ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]->s_rc_lap_out;
+ WORD32 i4_offset;
+
+ if(ps_rc_lap_temp->i4_rc_pic_type != IV_IDR_FRAME &&
+ ps_rc_lap_temp->i4_rc_pic_type != IV_I_FRAME)
+ {
+ i4_index = ps_rc_lap_temp->i4_rc_temporal_lyr_id + 1;
+ }
+ i4_offset = ps_rc_lap_temp->ai4_offsets[i4_index];
+ ASSERT(i4_offset >= 0);
+ /* Map the current frame Qp to L0 Qp */
+ ps_rc_lap_temp->i4_L0_qp = i4_update_qp - i4_offset;
+ }
+ osal_mutex_unlock(ps_enc_ctxt->pv_rc_mutex_lock_hdl);
+ ASSERT(ps_multi_thrd->i4_qp_update_l0_ipe == -1);
+ ps_multi_thrd->i4_qp_update_l0_ipe = i4_update_qp;
+ ps_multi_thrd->i4_rc_l0_qp = i4_update_qp;
+ }
+ ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]
+ ->s_lap_out.f_i_pic_lamda_modifier = CONST_LAMDA_MOD_VAL;
+ }
+ /* update qp only if it is not scene cut since it has already been
+ populated in L1 for scene cut frames */
+ if(1 == ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]
+ ->s_input_buf.i4_inp_frm_data_valid_flag &&
+ ps_enc_ctxt->ps_stat_prms->s_config_prms.i4_rate_control_mode != 3)
+ {
+ /*get relevant lambda params*/
+ ihevce_get_frame_lambda_prms(
+ ps_enc_ctxt,
+ ps_multi_thrd->aps_curr_out_pre_enc[i4_cur_ipe_idx],
+ ps_multi_thrd->i4_qp_update_l0_ipe,
+ ps_enc_ctxt->s_runtime_src_prms.i4_field_pic,
+ ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]->s_lap_out.i4_is_ref_pic,
+ ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]
+ ->s_lap_out.i4_temporal_lyr_id,
+ ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]
+ ->s_lap_out.f_i_pic_lamda_modifier,
+ 0,
+ PRE_ENC_LAMBDA_TYPE);
+
+ ps_multi_thrd->aps_curr_out_pre_enc[i4_cur_ipe_idx]->i4_curr_frm_qp =
+ ps_multi_thrd->i4_qp_update_l0_ipe;
+ }
+ /* Compute accumulated activity and strength */
+ if(1 == ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]
+ ->s_input_buf.i4_inp_frm_data_valid_flag &&
+ ps_multi_thrd->ai4_num_thrds_processed_L0_ipe_qp_init[i4_cur_ipe_idx] == 0)
+ {
+ ihevce_variance_calc_acc_activity(ps_enc_ctxt, i4_cur_ipe_idx);
+ }
+
+ /* Mark qp as read by last thread */
+ ps_multi_thrd->ai4_num_thrds_processed_L0_ipe_qp_init[i4_cur_ipe_idx]++;
+ if(ps_multi_thrd->ai4_num_thrds_processed_L0_ipe_qp_init[i4_cur_ipe_idx] ==
+ ps_multi_thrd->i4_num_pre_enc_proc_thrds)
+ {
+ ps_multi_thrd->ai4_num_thrds_processed_L0_ipe_qp_init[i4_cur_ipe_idx] = 0;
+ ps_multi_thrd->i4_qp_update_l0_ipe = -1;
+ }
+
+ /****** UnLock the critical section after deinit ******/
+ {
+ WORD32 i4_status;
+ i4_status = osal_mutex_unlock(ps_multi_thrd->pv_mutex_hdl_l0_ipe_init);
+
+ if(OSAL_SUCCESS != i4_status)
+ return 0;
+ }
+
+ if(1 == ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]
+ ->s_input_buf.i4_inp_frm_data_valid_flag)
+ {
+ WORD32 i4_slice_type =
+ (WORD32)ps_multi_thrd->aps_curr_out_pre_enc[i4_cur_ipe_idx]
+ ->s_slice_hdr.i1_slice_type;
+ WORD32 i4_quality_preset =
+ (WORD32)ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]
+ ->s_lap_out.i4_quality_preset;
+ WORD32 i4_temporal_layer_id =
+ (WORD32)ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]
+ ->s_lap_out.i4_temporal_lyr_id;
+#if DISABLE_L0_IPE_INTRA_IN_BPICS
+ if(1 != ((i4_quality_preset == IHEVCE_QUALITY_P6) &&
+ (i4_temporal_layer_id > TEMPORAL_LAYER_DISABLE)))
+#endif
+ {
+ UWORD8 i1_cu_qp_delta_enabled_flag =
+ ps_enc_ctxt->ps_stat_prms->s_config_prms.i4_cu_level_rc;
+
+ ihevce_populate_ipe_frame_init(
+ ps_enc_ctxt->s_module_ctxt.pv_ipe_ctxt,
+ ps_enc_ctxt->ps_stat_prms,
+ ps_multi_thrd->aps_curr_out_pre_enc[i4_cur_ipe_idx]->i4_curr_frm_qp,
+ i4_slice_type,
+ i4_thrd_id,
+ ps_multi_thrd->aps_curr_out_pre_enc[i4_cur_ipe_idx],
+ i1_cu_qp_delta_enabled_flag,
+ &ps_enc_ctxt->s_rc_quant,
+ i4_quality_preset,
+ i4_temporal_layer_id,
+ &ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx]->s_lap_out);
+
+ ihevce_ipe_process(
+ ps_enc_ctxt->s_module_ctxt.pv_ipe_ctxt,
+ &ps_enc_ctxt->s_frm_ctb_prms,
+ &ps_multi_thrd->aps_curr_out_pre_enc[i4_cur_ipe_idx]->as_lambda_prms[0],
+ ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx],
+ ps_multi_thrd->ps_L0_IPE_curr_out_pre_enc,
+ ps_multi_thrd->aps_curr_out_pre_enc[i4_cur_ipe_idx]->ps_ctb_analyse,
+ ps_multi_thrd->ps_L0_IPE_curr_out_pre_enc->ps_ipe_analyse_ctb,
+ &ps_enc_ctxt->s_multi_thrd,
+ i4_slice_type,
+ ps_multi_thrd->aps_curr_out_pre_enc[i4_cur_ipe_idx]->ps_layer1_buf,
+ ps_multi_thrd->aps_curr_out_pre_enc[i4_cur_ipe_idx]->ps_layer2_buf,
+ ps_multi_thrd->aps_curr_out_pre_enc[i4_cur_ipe_idx]->ps_ed_ctb_l1,
+ i4_thrd_id,
+ i4_cur_ipe_idx);
+ }
+ }
+
+ /* ----------------------------------------------------------- */
+ /* pre-enc de-init */
+ /* ----------------------------------------------------------- */
+
+ /****** Lock the critical section for deinit ******/
+ {
+ WORD32 i4_status;
+ i4_status = osal_mutex_lock(ps_multi_thrd->pv_mutex_hdl_pre_enc_deinit);
+
+ if(OSAL_SUCCESS != i4_status)
+ return 0;
+ }
+
+ ps_multi_thrd->ai4_num_thrds_processed_pre_enc[i4_cur_ipe_idx]++;
+ if(ps_multi_thrd->ai4_num_thrds_processed_pre_enc[i4_cur_ipe_idx] ==
+ ps_multi_thrd->i4_num_pre_enc_proc_thrds)
+ {
+ ps_multi_thrd->ai4_pre_enc_deinit_done[i4_cur_ipe_idx] = 0;
+ ps_multi_thrd->ai4_num_thrds_processed_pre_enc[i4_cur_ipe_idx] = 0;
+
+ /* reset the init flag so that init happens by the first thread for the
+ next frame of same ping_pnog instnace */
+ ps_multi_thrd->ai4_pre_enc_init_done[i4_cur_ipe_idx] = 0;
+ }
+
+ /* de-init */
+ if(0 == ps_multi_thrd->ai4_pre_enc_deinit_done[i4_cur_ipe_idx])
+ {
+ ihevce_lap_enc_buf_t *ps_curr_inp =
+ ps_multi_thrd->aps_curr_inp_pre_enc[i4_cur_ipe_idx];
+ pre_enc_me_ctxt_t *ps_curr_out =
+ ps_multi_thrd->aps_curr_out_pre_enc[i4_cur_ipe_idx];
+
+ /* set the flag saying de init is done so that other cores dont do it */
+ ps_multi_thrd->ai4_pre_enc_deinit_done[i4_cur_ipe_idx] = 1;
+
+ if(1 == ps_curr_out->i4_frm_proc_valid_flag)
+ {
+ LWORD64 frame_acc_satd_by_modqp;
+ float L1_full_processed_ratio;
+
+ if(ps_curr_inp->s_rc_lap_out.i8_satd_by_act_L1_accum_evaluated)
+ {
+ L1_full_processed_ratio =
+ ((float)ps_curr_inp->s_rc_lap_out.i8_frame_satd_by_act_L1_accum /
+ ps_curr_inp->s_rc_lap_out.i8_satd_by_act_L1_accum_evaluated);
+ }
+ else
+ {
+ L1_full_processed_ratio = 1.0;
+ }
+ /* Get frame-level satd cost and mode bit cost from IPE */
+ ps_curr_out->i8_frame_acc_satd_cost = ihevce_ipe_get_frame_intra_satd_cost(
+ ps_enc_ctxt->s_module_ctxt.pv_ipe_ctxt,
+ &frame_acc_satd_by_modqp,
+ &ps_curr_inp->s_rc_lap_out.i8_est_I_pic_header_bits,
+ &ps_curr_inp->s_lap_out.i8_frame_level_activity_fact,
+ &ps_curr_inp->s_lap_out.i8_frame_l0_acc_satd);
+
+ if((ps_curr_inp->s_lap_out.i4_quality_preset == IHEVCE_QUALITY_P6) &&
+ (ps_curr_inp->s_lap_out.i4_temporal_lyr_id > TEMPORAL_LAYER_DISABLE))
+ {
+ ps_curr_inp->s_rc_lap_out.i8_est_I_pic_header_bits = -1;
+ }
+
+ {
+ WORD32 i4_cur_q_scale = (ps_enc_ctxt->s_rc_quant.pi4_qp_to_qscale
+ [ps_enc_ctxt->s_multi_thrd.i4_rc_l0_qp +
+ ps_enc_ctxt->s_rc_quant.i1_qp_offset] +
+ (1 << (QSCALE_Q_FAC_3 - 1))) >>
+ QSCALE_Q_FAC_3;
+
+ /* calculate satd/act_fac = satd/qm * (qp_used_at_L0_analysis) */
+ ps_curr_inp->s_rc_lap_out.i8_frame_satd_act_accum =
+ frame_acc_satd_by_modqp * i4_cur_q_scale;
+ }
+
+ /* Because of early intra inter decision, L0 intra analysis might not happen for entire frame, correct the error
+ based on L1 data */
+ ps_curr_inp->s_rc_lap_out.i8_est_I_pic_header_bits = (LWORD64)(
+ ps_curr_inp->s_rc_lap_out.i8_est_I_pic_header_bits *
+ L1_full_processed_ratio);
+
+ if(L1_full_processed_ratio < 1.5)
+ {
+ ps_curr_inp->s_rc_lap_out.i8_frame_satd_act_accum = (LWORD64)(
+ ps_curr_inp->s_rc_lap_out.i8_frame_satd_act_accum *
+ L1_full_processed_ratio);
+ }
+ else
+ {
+ /* This is the case when too many candidates would not have gone through intra analysis, scaling based on L1 is found to be inappropriate,
+ Hence directly estimating L0 satd from L1 satd */
+ ps_curr_inp->s_rc_lap_out.i8_frame_satd_act_accum =
+ ps_curr_inp->s_rc_lap_out.i8_frm_satd_act_accum_L0_frm_L1;
+ }
+ }
+
+ /* register the current input buffer to be cnosumed by encode group threads */
+ ps_curr_out->curr_inp_buf_id =
+ ps_multi_thrd->ai4_in_buf_id_pre_enc[i4_cur_ipe_idx];
+ ps_curr_out->ps_curr_inp = ps_curr_inp;
+
+ /* set the output buffer as produced */
+ ihevce_q_set_buff_prod(
+ (void *)ps_enc_ctxt,
+ IHEVCE_PRE_ENC_ME_Q,
+ ps_multi_thrd->ai4_out_buf_id_pre_enc[i4_cur_ipe_idx]);
+
+ /* set the output buffer of L0 IPE as produced */
+ ihevce_q_set_buff_prod(
+ (void *)ps_enc_ctxt,
+ IHEVCE_L0_IPE_ENC_Q,
+ ps_multi_thrd->i4_L0_IPE_out_buf_id);
+
+ /* update flag indicating ipe is done */
+ ihevce_dmgr_update_frm_frm_sync(pv_dep_mngr_prev_frame_pre_enc_l0);
+ }
+
+ {
+ /* index increment */
+ i4_cur_ipe_idx = i4_cur_ipe_idx + 1;
+
+ /* wrap around case */
+ if(i4_cur_ipe_idx == (MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME))
+ {
+ i4_cur_ipe_idx = 0;
+ }
+ i4_num_buf_prod_for_l0_ipe--;
+ }
+ /*NOTE: update of above indices should mark end if ipe.do not access below this*/
+
+ /****** UnLock the critical section after deinit ******/
+ {
+ WORD32 i4_status;
+ i4_status = osal_mutex_unlock(ps_multi_thrd->pv_mutex_hdl_pre_enc_deinit);
+
+ if(OSAL_SUCCESS != i4_status)
+ return 0;
+ }
+
+ if(1 == ps_multi_thrd->i4_force_end_flag)
+ {
+ i4_end_flag = 1;
+ break;
+ }
+ } while((i4_end_flag || i4_out_flush_flag) && i4_num_buf_prod_for_l0_ipe);
+ }
+ }
+ if(i4_thrd_id == 0)
+ {
+ PROFILE_STOP(&ps_hle_ctxt->profile_pre_enc[i4_resolution_id], NULL);
+ }
+
+ return 0;
+}
+
+void calc_l1_level_hme_intra_sad_different_qp(
+ enc_ctxt_t *ps_enc_ctxt,
+ pre_enc_me_ctxt_t *ps_curr_out,
+ ihevce_lap_enc_buf_t *ps_curr_inp,
+ WORD32 i4_tot_ctb_l1_x,
+ WORD32 i4_tot_ctb_l1_y)
+{
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1;
+ WORD32 i4_qp_counter, i4_qp_start = 0, i4_qp_end = 0, i, i4_j, i4_new_frame_qp;
+ LWORD64 i8_l1_intra_sad_nc_accounted = 0, cur_intra_sad, raw_hme_sad = 0;
+ LWORD64 cur_hme_sad = 0, cur_hme_sad_for_offset = 0, acc_hme_l1_sad = 0,
+ acc_hme_l1_sad_for_offset = 0;
+ i4_qp_start = 1;
+ i4_qp_end = 51;
+
+ for(i4_qp_counter = i4_qp_start; i4_qp_counter <= i4_qp_end; i4_qp_counter = i4_qp_counter + 3)
+ {
+ i8_l1_intra_sad_nc_accounted = 0;
+ cur_intra_sad = 0;
+ raw_hme_sad = 0;
+ cur_hme_sad = 0;
+ cur_hme_sad_for_offset = 0;
+ acc_hme_l1_sad = 0;
+ ps_ed_ctb_l1 = ps_curr_out->ps_ed_ctb_l1;
+ i4_new_frame_qp = i4_qp_counter;
+ acc_hme_l1_sad = 0;
+
+ for(i = 0; i < (i4_tot_ctb_l1_x * i4_tot_ctb_l1_y); i += 1)
+ {
+ for(i4_j = 0; i4_j < 16; i4_j++)
+ {
+ if(ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[i4_j] != -1)
+ {
+ ASSERT(ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[i4_j] >= 0);
+ if(ps_curr_inp->s_rc_lap_out.i4_rc_pic_type != IV_I_FRAME &&
+ ps_curr_inp->s_rc_lap_out.i4_rc_pic_type != IV_IDR_FRAME)
+ {
+ /*When l1 is disabled for B pics i4_best_sad_8x8_l1_ipe is set to max value always,
+ so will enter this path even for incomplete ctb, hence the assert holdsto good only for P pic */
+ if(ps_curr_inp->s_rc_lap_out.i4_rc_quality_preset == IHEVCE_QUALITY_P6)
+ {
+ if(ps_curr_inp->s_rc_lap_out.i4_rc_pic_type == IV_P_FRAME)
+ {
+ ASSERT(ps_ed_ctb_l1->i4_best_sad_8x8_l1_me[i4_j] >= 0);
+ ASSERT(ps_ed_ctb_l1->i4_best_sad_8x8_l1_me_for_decide[i4_j] >= 0);
+ }
+ }
+ else
+ {
+ ASSERT(ps_ed_ctb_l1->i4_best_sad_8x8_l1_me[i4_j] >= 0);
+ ASSERT(ps_ed_ctb_l1->i4_best_sad_8x8_l1_me_for_decide[i4_j] >= 0);
+ }
+
+#if 1 //DISABLE_L1_L2_IPE_INTRA_IN_BPICS && RC_DEPENDENCY_FOR_BPIC
+ if((ps_ed_ctb_l1->i4_best_sad_8x8_l1_me[i4_j] != -1))
+#endif
+ {
+ cur_hme_sad = ps_ed_ctb_l1->i4_best_sad_8x8_l1_me[i4_j] -
+ (QP2QUANT_MD[i4_new_frame_qp] << 3);
+ }
+ raw_hme_sad += ps_ed_ctb_l1->i4_best_sad_8x8_l1_me[i4_j];
+
+ if(cur_hme_sad > 0)
+ acc_hme_l1_sad += cur_hme_sad;
+ }
+ if(cur_hme_sad_for_offset > 0)
+ {
+ acc_hme_l1_sad_for_offset += cur_hme_sad_for_offset;
+ }
+ ASSERT(ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[i4_j] >= 0);
+ /*intra sad is scaled by 1.17 to be account for 1/3 vs 1/6th rounding*/
+ cur_intra_sad = (LWORD64)(
+ (ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[i4_j] * 1.17) -
+ (QP2QUANT_MD[i4_new_frame_qp] << 3));
+
+ if(cur_intra_sad > 0)
+ i8_l1_intra_sad_nc_accounted += cur_intra_sad;
+ }
+ }
+ ps_ed_ctb_l1 += 1;
+ }
+ if((ps_curr_inp->s_rc_lap_out.i4_rc_quality_preset == IHEVCE_QUALITY_P6) &&
+ (ps_curr_inp->s_rc_lap_out.i4_rc_pic_type == IV_B_FRAME))
+ {
+ ps_curr_inp->s_rc_lap_out.ai8_pre_intra_sad[i4_qp_counter] = -1;
+ ps_curr_inp->s_rc_lap_out.ai8_pre_intra_sad[i4_qp_counter + 1] = -1;
+ ps_curr_inp->s_rc_lap_out.ai8_pre_intra_sad[i4_qp_counter + 2] = -1;
+ }
+ else
+ {
+ ps_curr_inp->s_rc_lap_out.ai8_pre_intra_sad[i4_qp_counter] =
+ i8_l1_intra_sad_nc_accounted;
+ ps_curr_inp->s_rc_lap_out.ai8_pre_intra_sad[i4_qp_counter + 1] =
+ i8_l1_intra_sad_nc_accounted;
+ ps_curr_inp->s_rc_lap_out.ai8_pre_intra_sad[i4_qp_counter + 2] =
+ i8_l1_intra_sad_nc_accounted;
+ }
+ ps_curr_inp->s_rc_lap_out.ai8_frame_acc_coarse_me_sad[i4_qp_counter] = acc_hme_l1_sad;
+ ps_curr_inp->s_rc_lap_out.ai8_frame_acc_coarse_me_sad[i4_qp_counter + 1] = acc_hme_l1_sad;
+ ps_curr_inp->s_rc_lap_out.ai8_frame_acc_coarse_me_sad[i4_qp_counter + 2] = acc_hme_l1_sad;
+ ps_curr_inp->s_rc_lap_out.i8_raw_l1_coarse_me_sad = raw_hme_sad;
+ }
+}
diff --git a/encoder/ihevce_frame_process.h b/encoder/ihevce_frame_process.h
new file mode 100644
index 0000000..cc023a2
--- /dev/null
+++ b/encoder/ihevce_frame_process.h
@@ -0,0 +1,120 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_frame_process.h
+*
+* \brief
+* This file contains interface defination of frame proceswsing pass
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_FRAME_PROCESS_H_
+#define _IHEVCE_FRAME_PROCESS_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+DOUBLE ihevce_compute_psnr(
+ UWORD8 *pu1_ip_buf,
+ UWORD8 *pu1_ref_buf,
+ WORD32 width,
+ WORD32 height,
+ WORD32 horz_jmp,
+ WORD32 ip_stride,
+ WORD32 ref_stride,
+ double *acc_mse,
+ ihevce_logo_attrs_t *ps_logo_ctxt,
+ WORD32 i4_is_chroma);
+
+void ihevce_pre_enc_manage_ref_pics(
+ enc_ctxt_t *ps_enc_ctxt,
+ ihevce_lap_enc_buf_t *ps_curr_inp,
+ pre_enc_me_ctxt_t *ps_curr_out,
+ WORD32 i4_ping_pong);
+
+void ihevce_manage_ref_pics(
+ enc_ctxt_t *ps_enc_ctxt,
+ ihevce_lap_enc_buf_t *ps_curr_inp,
+ slice_header_t *ps_slice_header,
+ WORD32 i4_me_frm_id,
+ WORD32 i4_thrd_id,
+ WORD32 i4_bitrate_instance_id);
+
+void ihevce_get_frame_lambda_prms(
+ enc_ctxt_t *ps_enc_ctxt,
+ pre_enc_me_ctxt_t *ps_cur_pic_ctxt,
+ WORD32 i4_cur_frame_qp,
+ WORD32 first_field,
+ WORD32 i4_is_ref_pic,
+ WORD32 i4_temporal_lyr_id,
+ double f_i_pic_lamda_modifier,
+ WORD32 i4_inst_id,
+ WORD32 i4_lambda_type);
+
+void calc_l1_level_hme_intra_sad_different_qp(
+ enc_ctxt_t *ps_enc_ctxt,
+ pre_enc_me_ctxt_t *ps_curr_out,
+ ihevce_lap_enc_buf_t *ps_curr_inp,
+ WORD32 i4_tot_ctb_l1_x,
+ WORD32 i4_tot_ctb_l1_y);
+
+WORD32 ihevce_pre_enc_process_frame_thrd(void *pv_hle_ctxt);
+
+WORD32 ihevce_enc_process_frame_thrd(void *pv_hle_ctxt);
+
+WORD32 ihevce_enc_frm_proc_slave_thrd(void *pv_frm_proc_thrd_ctxt);
+
+WORD32 ihevce_pre_enc_frm_proc_slave_thrd(void *pv_frm_proc_thrd_ctxt);
+
+void ihevce_set_pre_enc_prms(enc_ctxt_t *ps_enc_ctxt);
+
+#endif /* _IHEVCE_FRAME_PROCESS_H_ */
diff --git a/encoder/ihevce_frame_process_utils.c b/encoder/ihevce_frame_process_utils.c
new file mode 100644
index 0000000..a9c618b
--- /dev/null
+++ b/encoder/ihevce_frame_process_utils.c
@@ -0,0 +1,484 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+******************************************************************************
+* \file ihevce_frame_process_utils.c
+*
+* \brief
+* This file contains definitions of top level functions related to frame
+* processing
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+* List of Functions
+*
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_debug.h"
+#include "ihevc_macros.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+#include "ihevc_common_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_hle_interface.h"
+#include "ihevce_hle_q_func.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_checks.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_rc_enc_structs.h"
+#include "ihevce_rc_interface.h"
+#include "ihevce_frame_process_utils.h"
+
+#include "cast_types.h"
+#include "osal.h"
+#include "osal_defaults.h"
+
+/*****************************************************************************/
+/* Globals */
+/*****************************************************************************/
+
+/************** Version Number string *******************/
+UWORD8 gau1_version_string[] = "i265-v4.13-218 Build ";
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*!
+******************************************************************************
+*
+* @brief
+* API to return frame qp in constant qp mode based on init I frame qp,
+* slice type and current temporal layer.
+*
+* I picture is given the same qp as the init qp configure in static params
+* P picture is set equal to I frame qp + 1
+* B picture is set equal to P frame qp + temporal layer
+*
+* @param[in] static_params_frame_qp
+* frame level qp set for I frames in create time params
+*
+* @param[in] slice_type
+* slice type for current frame (I/P/B)
+*
+* @param[in] temporal_id
+* temoporal layer ID of the current frame. This is associalted with B frame.
+* temporal layer ID. I and P frames have temporal_id set to 0.
+*
+* @param[in] min_qp
+* minimum qp to be allocated for this frame.
+*
+* @param[in] max_qp
+* maximum qp to be allocated for this frame
+*
+* @return
+* current frame qp
+*
+* @author
+* Ittiam
+*
+* @remarks
+* This is right place to plug in frame level RC call for current frame qp
+* allocation later when RC support is added
+*
+*****************************************************************************
+*/
+WORD32 ihevce_get_cur_frame_qp(
+ WORD32 static_params_frame_qp,
+ WORD32 slice_type,
+ WORD32 temporal_id,
+ WORD32 min_qp,
+ WORD32 max_qp,
+ rc_quant_t *ps_rc_quant_ctxt)
+{
+ WORD32 i4_curr_qp = static_params_frame_qp;
+
+ /* sanity checks */
+ ASSERT(max_qp >= min_qp);
+ ASSERT((min_qp >= ps_rc_quant_ctxt->i2_min_qp) && (min_qp <= ps_rc_quant_ctxt->i2_max_qp));
+ ASSERT(
+ (static_params_frame_qp >= ps_rc_quant_ctxt->i2_min_qp) &&
+ (static_params_frame_qp <= ps_rc_quant_ctxt->i2_max_qp));
+ if(ISLICE == slice_type)
+ {
+ /* I frame qp is same as init qp in static params */
+ i4_curr_qp = static_params_frame_qp;
+ }
+ else if(PSLICE == slice_type)
+ {
+ /* P frame qp is I frame qp + 1 */
+ i4_curr_qp = static_params_frame_qp + 1;
+ }
+ else if(BSLICE == slice_type)
+ {
+ /* B frame qp is I frame qp + 1 + temporal layer id */
+ i4_curr_qp = static_params_frame_qp + temporal_id + 1;
+ }
+ else
+ {
+ /* illegal slice type */
+ ASSERT(0);
+ }
+
+ i4_curr_qp = CLIP3(i4_curr_qp, min_qp, max_qp);
+
+ return (i4_curr_qp);
+}
+
+/*!
+******************************************************************************
+* \if Function name : calc_block_ssim \endif
+*
+* \brief
+* Calc Block SSIM
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*****************************************************************************
+*/
+unsigned int calc_block_ssim(
+ unsigned char *pu1_ref,
+ unsigned char *pu1_tst,
+ unsigned char *pu1_win,
+ WORD32 i4_horz_jump,
+ unsigned short u2_ref_stride,
+ unsigned short u2_tst_stride,
+ unsigned char u1_win_size,
+ unsigned char u1_win_q_shift)
+{
+ unsigned int u4_wtd_ref_mean, u4_wtd_tst_mean, u4_wtd_ref_sq, u4_wtd_tst_sq, u4_wtd_ref_tst;
+ unsigned int u4_wtd_ref_mean_sq, u4_wtd_tst_mean_sq, u4_wtd_ref_tst_mean_prod;
+ unsigned char u1_wt, u1_ref_smpl, u1_tst_smpl;
+ unsigned short u2_wtd_ref_smpl, u2_wtd_tst_smpl, u2_win_q_rounding;
+ int i4_row, i4_col;
+
+ u4_wtd_ref_mean = 0;
+ u4_wtd_tst_mean = 0;
+ u4_wtd_ref_sq = 0;
+ u4_wtd_tst_sq = 0;
+ u4_wtd_ref_tst = 0;
+
+ for(i4_row = 0; i4_row < u1_win_size; i4_row++)
+ {
+ for(i4_col = 0; i4_col < u1_win_size; i4_col++)
+ {
+ u1_wt = *pu1_win++;
+ u1_ref_smpl = pu1_ref[i4_col * i4_horz_jump];
+ u1_tst_smpl = pu1_tst[i4_col * i4_horz_jump];
+
+ u2_wtd_ref_smpl = u1_wt * u1_ref_smpl;
+ u2_wtd_tst_smpl = u1_wt * u1_tst_smpl;
+
+ u4_wtd_ref_mean += u2_wtd_ref_smpl;
+ u4_wtd_tst_mean += u2_wtd_tst_smpl;
+
+ u4_wtd_ref_sq += u2_wtd_ref_smpl * u1_ref_smpl;
+ u4_wtd_tst_sq += u2_wtd_tst_smpl * u1_tst_smpl;
+ u4_wtd_ref_tst += u2_wtd_ref_smpl * u1_tst_smpl;
+ }
+ pu1_ref += u2_ref_stride;
+ pu1_tst += u2_tst_stride;
+ }
+
+ {
+ unsigned int u4_num, u4_den, u4_term1;
+
+ u2_win_q_rounding = (1 << u1_win_q_shift) >> 1;
+ u4_wtd_ref_mean += (u2_win_q_rounding >> 8);
+ u4_wtd_tst_mean += (u2_win_q_rounding >> 8);
+
+ /* Keep the mean terms within 16-bits before squaring */
+ u4_wtd_ref_mean >>= (u1_win_q_shift - 8);
+ u4_wtd_tst_mean >>= (u1_win_q_shift - 8);
+
+ /* Bring down the square of sum terms to same Q format as the sum of square terms */
+ u4_wtd_ref_mean_sq = (u4_wtd_ref_mean * u4_wtd_ref_mean + 16) >> (16 - u1_win_q_shift);
+ u4_wtd_tst_mean_sq = (u4_wtd_tst_mean * u4_wtd_tst_mean + 16) >> (16 - u1_win_q_shift);
+ u4_wtd_ref_tst_mean_prod = (u4_wtd_ref_mean * u4_wtd_tst_mean + 16) >>
+ (16 - u1_win_q_shift);
+
+ /* Compute self and cross variances */
+ if(u4_wtd_ref_sq > u4_wtd_ref_mean_sq)
+ u4_wtd_ref_sq -= u4_wtd_ref_mean_sq;
+ else
+ u4_wtd_ref_sq = 0;
+
+ if(u4_wtd_tst_sq > u4_wtd_tst_mean_sq)
+ u4_wtd_tst_sq -= u4_wtd_tst_mean_sq;
+ else
+ u4_wtd_tst_sq = 0;
+
+ if(u4_wtd_ref_tst > u4_wtd_ref_tst_mean_prod)
+ u4_wtd_ref_tst -= u4_wtd_ref_tst_mean_prod;
+ else
+ u4_wtd_ref_tst = 0;
+
+ /* Keep the numerator in Q12 format before division */
+ u4_num = ((u4_wtd_ref_tst_mean_prod << 1) + C1) << (12 - u1_win_q_shift);
+ u4_den = ((u4_wtd_ref_mean_sq + u4_wtd_tst_mean_sq) + C1 + u2_win_q_rounding) >>
+ u1_win_q_shift;
+ u4_term1 = (u4_num) / u4_den;
+
+ u4_num = (u4_wtd_ref_tst << 1) + C2;
+ u4_den = (u4_wtd_ref_sq + u4_wtd_tst_sq) + C2;
+ /* If numerator takes less than 20-bits, product would not overflow; so no need to normalize */
+ if(u4_num < 1048576)
+ {
+ return ((u4_num * u4_term1) / u4_den);
+ }
+
+ /* While the above should be done really with getRange calculation, for simplicity,
+ the other cases go through a less accurate calculation */
+ u4_num = (u4_num + u2_win_q_rounding) >> u1_win_q_shift;
+ u4_den = (u4_den + u2_win_q_rounding) >> u1_win_q_shift;
+
+ /* What is returned is SSIM in 1Q12 */
+ return ((u4_term1 * u4_num) / u4_den);
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_fill_sei_payload \endif
+*
+* \brief
+* Fills SEI Payload
+*
+* \param[in] ps_enc_ctxt
+* Encoder Context
+*
+* \param[in] ps_curr_inp
+* Current Input pointer
+*
+* \param[in] ps_curr_out
+* Current Output pointer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_fill_sei_payload(
+ enc_ctxt_t *ps_enc_ctxt,
+ ihevce_lap_enc_buf_t *ps_curr_inp,
+ frm_proc_ent_cod_ctxt_t *ps_curr_out)
+{
+ UWORD32 *pu4_length, i4_cmd_len;
+ UWORD32 *pu4_tag, i4_pic_type;
+ UWORD8 *pu1_user_data;
+
+ pu4_tag = ((UWORD32 *)(ps_curr_inp->s_input_buf.pv_synch_ctrl_bufs));
+ ps_curr_out->u4_num_sei_payload = 0;
+ i4_pic_type = ps_curr_inp->s_lap_out.i4_pic_type;
+ (void)ps_enc_ctxt;
+ while(1)
+ {
+ if(((*pu4_tag) & IHEVCE_COMMANDS_TAG_MASK) == IHEVCE_SYNCH_API_END_TAG)
+ break;
+
+ pu4_length = pu4_tag + 1;
+ pu1_user_data = (UWORD8 *)(pu4_length + 1);
+ i4_cmd_len = *pu4_length;
+
+ if((*pu4_tag & IHEVCE_COMMANDS_TAG_MASK) == IHEVCE_SYNCH_API_REG_KEYFRAME_SEI_TAG)
+ {
+ if(i4_pic_type == IV_IDR_FRAME)
+ {
+ memcpy(
+ (void *)((ps_curr_out->as_sei_payload[ps_curr_out->u4_num_sei_payload]
+ .pu1_sei_payload)),
+ (void *)pu1_user_data,
+ i4_cmd_len);
+ ps_curr_out->as_sei_payload[ps_curr_out->u4_num_sei_payload].u4_payload_length =
+ (i4_cmd_len);
+ ps_curr_out->as_sei_payload[ps_curr_out->u4_num_sei_payload].u4_payload_type =
+ ((*pu4_tag & IHEVCE_PAYLOAD_TYPE_MASK) >> IHEVCE_PAYLOAD_TYPE_SHIFT);
+ ps_curr_out->u4_num_sei_payload++;
+ }
+ }
+ else if((*pu4_tag & IHEVCE_COMMANDS_TAG_MASK) == IHEVCE_SYNCH_API_REG_ALLFRAME_SEI_TAG)
+ {
+ memcpy(
+ (void *)((
+ ps_curr_out->as_sei_payload[ps_curr_out->u4_num_sei_payload].pu1_sei_payload)),
+ (void *)pu1_user_data,
+ i4_cmd_len);
+ ps_curr_out->as_sei_payload[ps_curr_out->u4_num_sei_payload].u4_payload_length =
+ (i4_cmd_len);
+ ps_curr_out->as_sei_payload[ps_curr_out->u4_num_sei_payload].u4_payload_type =
+ ((*pu4_tag & IHEVCE_PAYLOAD_TYPE_MASK) >> IHEVCE_PAYLOAD_TYPE_SHIFT);
+ ps_curr_out->u4_num_sei_payload++;
+ }
+
+ //The formula (((x-1)>>2)+1) gives us the ceiling of (x mod 4). Hence this will take the pointer to the next address boundary divisible by 4.
+ //And then we add 2 bytes for the tag and the payload length.
+ if(i4_cmd_len)
+ pu4_tag += (((i4_cmd_len - 1) >> 2) + 1 + 2);
+ else
+ pu4_tag += 2;
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_dyn_bitrate \endif
+*
+* \brief
+* Call back function to be called for changing the bitrate
+*
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_dyn_bitrate(void *pv_hle_ctxt, void *pv_dyn_bitrate_prms)
+{
+ ihevce_hle_ctxt_t *ps_hle_ctxt = (ihevce_hle_ctxt_t *)pv_hle_ctxt;
+ ihevce_dyn_config_prms_t *ps_dyn_bitrate_prms = (ihevce_dyn_config_prms_t *)pv_dyn_bitrate_prms;
+ enc_ctxt_t *ps_enc_ctxt =
+ (enc_ctxt_t *)ps_hle_ctxt->apv_enc_hdl[ps_dyn_bitrate_prms->i4_tgt_res_id];
+ ihevce_static_cfg_params_t *ps_static_cfg_params = ps_hle_ctxt->ps_static_cfg_prms;
+
+ if(ps_enc_ctxt->ps_stat_prms->i4_log_dump_level > 0)
+ {
+ ps_static_cfg_params->s_sys_api.ihevce_printf(
+ ps_static_cfg_params->s_sys_api.pv_cb_handle,
+ "\n Average Bitrate changed to %d",
+ ps_dyn_bitrate_prms->i4_new_tgt_bitrate);
+ ps_static_cfg_params->s_sys_api.ihevce_printf(
+ ps_static_cfg_params->s_sys_api.pv_cb_handle,
+ "\n Peak Bitrate changed to %d",
+ ps_dyn_bitrate_prms->i4_new_peak_bitrate);
+ }
+
+
+ /* acquire mutex lock for rate control calls */
+ osal_mutex_lock(ps_enc_ctxt->pv_rc_mutex_lock_hdl);
+
+ ihevce_rc_register_dyn_change_bitrate(
+ ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[ps_dyn_bitrate_prms->i4_tgt_br_id],
+ (LWORD64)ps_dyn_bitrate_prms->i4_new_tgt_bitrate,
+ (LWORD64)ps_dyn_bitrate_prms->i4_new_peak_bitrate,
+ ps_dyn_bitrate_prms->i4_new_rate_factor,
+ ps_enc_ctxt->ps_stat_prms->s_config_prms.i4_rate_control_mode);
+
+ /*unlock rate control context*/
+ osal_mutex_unlock(ps_enc_ctxt->pv_rc_mutex_lock_hdl);
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_validate_encoder_parameters \endif
+*
+* \brief
+* Call back function to be called for changing the bitrate
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*****************************************************************************
+*/
+WORD32 ihevce_validate_encoder_parameters(ihevce_static_cfg_params_t *ps_static_cfg_prms)
+{
+ return (ihevce_hle_validate_static_params(ps_static_cfg_prms));
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_get_encoder_version \endif
+*
+* \brief
+* Call back function to be called for changing the bitrate
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*****************************************************************************
+*/
+const char *ihevce_get_encoder_version()
+{
+ return ((const char *)gau1_version_string);
+}
diff --git a/encoder/ihevce_frame_process_utils.h b/encoder/ihevce_frame_process_utils.h
new file mode 100644
index 0000000..fef5b44
--- /dev/null
+++ b/encoder/ihevce_frame_process_utils.h
@@ -0,0 +1,86 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_frame_process_utils.h
+*
+* \brief
+* This file contains declarations of top level functions related to frame
+* processing
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_FRAME_PROCESS_UTILS_H_
+#define _IHEVCE_FRAME_PROCESS_UTILS_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+#define C1 13317 /* This value is twice of (0.01*255)^2 in Q11*/
+#define C2 119854 /* This value is twice of (0.03*255)^2 in Q11*/
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+WORD32 ihevce_get_cur_frame_qp(
+ WORD32 static_params_frame_qp,
+ WORD32 slice_type,
+ WORD32 temporal_id,
+ WORD32 min_qp,
+ WORD32 max_qp,
+ rc_quant_t *ps_rc_quant_ctxt);
+
+void ihevce_fill_sei_payload(
+ enc_ctxt_t *ps_enc_ctxt,
+ ihevce_lap_enc_buf_t *ps_curr_inp,
+ frm_proc_ent_cod_ctxt_t *ps_curr_out);
+
+void ihevce_dyn_bitrate(void *pv_hle_ctxt, void *pv_dyn_bitrate_prms);
+
+#endif /* _IHEVCE_FRAME_PROCESS_UTILS_H_ */
diff --git a/encoder/ihevce_function_selector.c b/encoder/ihevce_function_selector.c
new file mode 100644
index 0000000..4e26bdd
--- /dev/null
+++ b/encoder/ihevce_function_selector.c
@@ -0,0 +1,464 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevce_function_selector.c
+*
+* @brief
+* Contains functions to initialize function pointers used in hevc
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+* ihevce_default_arch()
+* ihevce_init_function_ptr_generic()
+* ihevce_init_function_ptr_av8()
+* ihevce_init_function_ptr_a9q()
+* ihevce_init_function_ptr()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <assert.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_default_arch \endif
+*
+* \brief
+* Get Default architecture
+*
+*****************************************************************************
+*/
+IV_ARCH_T ihevce_default_arch(void)
+{
+#if(defined(ENABLE_NEON) && defined(ARMV8))
+ return ARCH_ARM_V8_NEON;
+#elif(defined(ENABLE_NEON) && defined(ARM))
+ return ARCH_ARM_A9Q;
+#else
+ return ARCH_ARM_NONEON;
+#endif
+}
+
+// clang-format off
+/*!
+******************************************************************************
+* \if Function name : ihevce_init_function_ptr_generic \endif
+*
+* \brief
+* Function pointer initialization of encoder context struct
+*
+*****************************************************************************
+*/
+static void ihevce_init_function_ptr_generic(enc_ctxt_t *ps_enc_ctxt)
+{
+ ps_enc_ctxt->s_func_selector.ihevc_deblk_chroma_horz_fptr = &ihevc_deblk_chroma_horz;
+ ps_enc_ctxt->s_func_selector.ihevc_deblk_chroma_vert_fptr = &ihevc_deblk_chroma_vert;
+ ps_enc_ctxt->s_func_selector.ihevc_deblk_422chroma_horz_fptr = &ihevc_deblk_422chroma_horz;
+ ps_enc_ctxt->s_func_selector.ihevc_deblk_422chroma_vert_fptr = &ihevc_deblk_422chroma_vert;
+ ps_enc_ctxt->s_func_selector.ihevc_deblk_luma_vert_fptr = &ihevc_deblk_luma_vert;
+ ps_enc_ctxt->s_func_selector.ihevc_deblk_luma_horz_fptr = &ihevc_deblk_luma_horz;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_chroma_copy_fptr = &ihevc_inter_pred_chroma_copy;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_chroma_copy_w16out_fptr = &ihevc_inter_pred_chroma_copy_w16out;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_chroma_horz_fptr = &ihevc_inter_pred_chroma_horz;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_chroma_horz_w16out_fptr = &ihevc_inter_pred_chroma_horz_w16out;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_chroma_vert_fptr = &ihevc_inter_pred_chroma_vert;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_chroma_vert_w16inp_fptr = &ihevc_inter_pred_chroma_vert_w16inp;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_chroma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16inp_w16out;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_chroma_vert_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16out;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_luma_horz_fptr = &ihevc_inter_pred_luma_horz;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_luma_vert_fptr = &ihevc_inter_pred_luma_vert;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_luma_vert_w16out_fptr = &ihevc_inter_pred_luma_vert_w16out;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_luma_vert_w16inp_fptr = &ihevc_inter_pred_luma_vert_w16inp;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_luma_copy_fptr = &ihevc_inter_pred_luma_copy;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_luma_copy_w16out_fptr = &ihevc_inter_pred_luma_copy_w16out;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_luma_horz_w16out_fptr = &ihevc_inter_pred_luma_horz_w16out;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_luma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_luma_vert_w16inp_w16out;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_ref_substitution_fptr = &ihevc_intra_pred_chroma_ref_substitution;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_ref_substitution_fptr = &ihevc_intra_pred_luma_ref_substitution;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_ref_filtering_fptr = &ihevc_intra_pred_ref_filtering;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_dc_fptr = &ihevc_intra_pred_chroma_dc;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_horz_fptr = &ihevc_intra_pred_chroma_horz;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_mode2_fptr = &ihevc_intra_pred_chroma_mode2;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_mode_18_34_fptr = &ihevc_intra_pred_chroma_mode_18_34;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_mode_27_to_33_fptr = &ihevc_intra_pred_chroma_mode_27_to_33;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_mode_3_to_9_fptr = &ihevc_intra_pred_chroma_mode_3_to_9;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_planar_fptr = &ihevc_intra_pred_chroma_planar;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_ver_fptr = &ihevc_intra_pred_chroma_ver;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_mode_11_to_17_fptr = &ihevc_intra_pred_chroma_mode_11_to_17;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_mode_19_to_25_fptr = &ihevc_intra_pred_chroma_mode_19_to_25;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_mode_11_to_17_fptr = &ihevc_intra_pred_luma_mode_11_to_17;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_mode_19_to_25_fptr = &ihevc_intra_pred_luma_mode_19_to_25;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_dc_fptr = &ihevc_intra_pred_luma_dc;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_horz_fptr = &ihevc_intra_pred_luma_horz;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_mode2_fptr = &ihevc_intra_pred_luma_mode2;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_mode_18_34_fptr = &ihevc_intra_pred_luma_mode_18_34;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_mode_27_to_33_fptr = &ihevc_intra_pred_luma_mode_27_to_33;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_mode_3_to_9_fptr = &ihevc_intra_pred_luma_mode_3_to_9;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_planar_fptr = &ihevc_intra_pred_luma_planar;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_ver_fptr = &ihevc_intra_pred_luma_ver;
+
+ ps_enc_ctxt->s_func_selector.ihevc_itrans_recon_4x4_ttype1_fptr = &ihevc_itrans_recon_4x4_ttype1;
+ ps_enc_ctxt->s_func_selector.ihevc_itrans_recon_4x4_fptr = &ihevc_itrans_recon_4x4;
+ ps_enc_ctxt->s_func_selector.ihevc_itrans_recon_8x8_fptr = &ihevc_itrans_recon_8x8;
+ ps_enc_ctxt->s_func_selector.ihevc_itrans_recon_16x16_fptr = &ihevc_itrans_recon_16x16;
+ ps_enc_ctxt->s_func_selector.ihevc_itrans_recon_32x32_fptr = &ihevc_itrans_recon_32x32;
+ ps_enc_ctxt->s_func_selector.ihevc_chroma_itrans_recon_4x4_fptr = &ihevc_chroma_itrans_recon_4x4;
+ ps_enc_ctxt->s_func_selector.ihevc_chroma_itrans_recon_8x8_fptr = &ihevc_chroma_itrans_recon_8x8;
+ ps_enc_ctxt->s_func_selector.ihevc_chroma_itrans_recon_16x16_fptr = &ihevc_chroma_itrans_recon_16x16;
+
+ ps_enc_ctxt->s_func_selector.ihevc_memcpy_mul_8_fptr = &ihevc_memcpy_mul_8;
+ ps_enc_ctxt->s_func_selector.ihevc_memcpy_fptr = &ihevc_memcpy;
+ ps_enc_ctxt->s_func_selector.ihevc_memset_mul_8_fptr = &ihevc_memset_mul_8;
+ ps_enc_ctxt->s_func_selector.ihevc_memset_fptr = &ihevc_memset;
+ ps_enc_ctxt->s_func_selector.ihevc_memset_16bit_mul_8_fptr = &ihevc_memset_16bit_mul_8;
+ ps_enc_ctxt->s_func_selector.ihevc_memset_16bit_fptr = &ihevc_memset_16bit;
+
+ ps_enc_ctxt->s_func_selector.ihevc_weighted_pred_bi_fptr = &ihevc_weighted_pred_bi;
+ ps_enc_ctxt->s_func_selector.ihevc_weighted_pred_bi_default_fptr = &ihevc_weighted_pred_bi_default;
+ ps_enc_ctxt->s_func_selector.ihevc_weighted_pred_uni_fptr = &ihevc_weighted_pred_uni;
+ ps_enc_ctxt->s_func_selector.ihevc_weighted_pred_chroma_bi_fptr = &ihevc_weighted_pred_chroma_bi;
+ ps_enc_ctxt->s_func_selector.ihevc_weighted_pred_chroma_bi_default_fptr = &ihevc_weighted_pred_chroma_bi_default;
+ ps_enc_ctxt->s_func_selector.ihevc_weighted_pred_chroma_uni_fptr = &ihevc_weighted_pred_chroma_uni;
+
+ ps_enc_ctxt->s_func_selector.ihevc_resi_trans_4x4_ttype1_fptr = &ihevc_resi_trans_4x4_ttype1;
+ ps_enc_ctxt->s_func_selector.ihevc_resi_trans_4x4_fptr = &ihevc_resi_trans_4x4;
+ ps_enc_ctxt->s_func_selector.ihevc_resi_trans_8x8_fptr = &ihevc_resi_trans_8x8;
+ ps_enc_ctxt->s_func_selector.ihevc_resi_trans_16x16_fptr = &ihevc_resi_trans_16x16;
+ ps_enc_ctxt->s_func_selector.ihevc_resi_trans_32x32_fptr = &ihevc_resi_trans_32x32;
+
+ ps_enc_ctxt->s_func_selector.ihevc_quant_iquant_ssd_fptr = &ihevc_quant_iquant_ssd;
+ ps_enc_ctxt->s_func_selector.ihevc_quant_iquant_ssd_rdoq_fptr = &ihevc_quant_iquant_ssd_rdoq;
+ ps_enc_ctxt->s_func_selector.ihevc_quant_iquant_ssd_flat_scale_mat_fptr = &ihevc_quant_iquant_ssd_flat_scale_mat;
+ ps_enc_ctxt->s_func_selector.ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_fptr = &ihevc_quant_iquant_ssd_flat_scale_mat_rdoq;
+ ps_enc_ctxt->s_func_selector.ihevc_q_iq_ssd_var_rnd_fact_fptr = &ihevc_q_iq_ssd_var_rnd_fact;
+ ps_enc_ctxt->s_func_selector.ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_fptr = &ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact;
+
+ ps_enc_ctxt->s_func_selector.ihevc_quant_iquant_fptr = &ihevc_quant_iquant;
+ ps_enc_ctxt->s_func_selector.ihevc_quant_iquant_rdoq_fptr = &ihevc_quant_iquant_rdoq;
+ ps_enc_ctxt->s_func_selector.ihevc_quant_iquant_flat_scale_mat_fptr = &ihevc_quant_iquant_flat_scale_mat;
+ ps_enc_ctxt->s_func_selector.ihevc_quant_iquant_flat_scale_mat_rdoq_fptr = &ihevc_quant_iquant_flat_scale_mat_rdoq;
+ ps_enc_ctxt->s_func_selector.ihevc_q_iq_var_rnd_fact_fptr = &ihevc_q_iq_var_rnd_fact;
+ ps_enc_ctxt->s_func_selector.ihevc_q_iq_flat_scale_mat_var_rnd_fact_fptr = &ihevc_q_iq_flat_scale_mat_var_rnd_fact;
+ ps_enc_ctxt->s_func_selector.ihevc_pad_bottom_fptr = &ihevc_pad_bottom;
+ ps_enc_ctxt->s_func_selector.ihevc_pad_horz_chroma_fptr = &ihevc_pad_horz_chroma;
+ ps_enc_ctxt->s_func_selector.ihevc_pad_horz_luma_fptr = &ihevc_pad_horz_luma;
+ ps_enc_ctxt->s_func_selector.ihevc_pad_left_chroma_fptr = &ihevc_pad_left_chroma;
+ ps_enc_ctxt->s_func_selector.ihevc_pad_left_luma_fptr = &ihevc_pad_left_luma;
+ ps_enc_ctxt->s_func_selector.ihevc_pad_right_chroma_fptr = &ihevc_pad_right_chroma;
+ ps_enc_ctxt->s_func_selector.ihevc_pad_right_luma_fptr = &ihevc_pad_right_luma;
+ ps_enc_ctxt->s_func_selector.ihevc_pad_top_fptr = &ihevc_pad_top;
+ ps_enc_ctxt->s_func_selector.ihevc_pad_vert_fptr = &ihevc_pad_vert;
+ ps_enc_ctxt->s_func_selector.ihevc_sao_edge_offset_class0_fptr = &ihevc_sao_edge_offset_class0;
+ ps_enc_ctxt->s_func_selector.ihevc_sao_edge_offset_class1_fptr = &ihevc_sao_edge_offset_class1;
+ ps_enc_ctxt->s_func_selector.ihevc_sao_edge_offset_class2_fptr = &ihevc_sao_edge_offset_class2;
+ ps_enc_ctxt->s_func_selector.ihevc_sao_edge_offset_class3_fptr = &ihevc_sao_edge_offset_class3;
+
+ ps_enc_ctxt->s_func_selector.ihevc_sao_edge_offset_class0_chroma_fptr = &ihevc_sao_edge_offset_class0_chroma;
+ ps_enc_ctxt->s_func_selector.ihevc_sao_edge_offset_class1_chroma_fptr = &ihevc_sao_edge_offset_class1_chroma;
+ ps_enc_ctxt->s_func_selector.ihevc_sao_edge_offset_class2_chroma_fptr = &ihevc_sao_edge_offset_class2_chroma;
+ ps_enc_ctxt->s_func_selector.ihevc_sao_edge_offset_class3_chroma_fptr = &ihevc_sao_edge_offset_class3_chroma;
+}
+
+#ifdef ENABLE_NEON
+#ifdef ARMV8
+/*!
+******************************************************************************
+* \if Function name : ihevce_init_function_ptr_av8 \endif
+*
+* \brief
+* Function pointer initialization of encoder context struct
+*
+*****************************************************************************
+*/
+static void ihevce_init_function_ptr_av8(enc_ctxt_t *ps_enc_ctxt)
+{
+ ps_enc_ctxt->s_func_selector.ihevc_deblk_chroma_horz_fptr = &ihevc_deblk_chroma_horz_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_deblk_chroma_vert_fptr = &ihevc_deblk_chroma_vert_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_deblk_luma_vert_fptr = &ihevc_deblk_luma_vert_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_deblk_luma_horz_fptr = &ihevc_deblk_luma_horz_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_chroma_copy_fptr = &ihevc_inter_pred_chroma_copy_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_chroma_copy_w16out_fptr = &ihevc_inter_pred_chroma_copy_w16out_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_chroma_horz_fptr = &ihevc_inter_pred_chroma_horz;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_chroma_horz_w16out_fptr = &ihevc_inter_pred_chroma_horz_w16out_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_chroma_vert_fptr = &ihevc_inter_pred_chroma_vert;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_chroma_vert_w16inp_fptr = &ihevc_inter_pred_chroma_vert_w16inp_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_chroma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16inp_w16out_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_chroma_vert_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16out_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_luma_horz_fptr = &ihevc_inter_pred_luma_horz_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_luma_vert_fptr = &ihevc_inter_pred_luma_vert_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_luma_vert_w16out_fptr = &ihevc_inter_pred_luma_vert_w16out_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_luma_vert_w16inp_fptr = &ihevc_inter_pred_luma_vert_w16inp_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_luma_copy_fptr = &ihevc_inter_pred_luma_copy_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_luma_copy_w16out_fptr = &ihevc_inter_pred_luma_copy_w16out_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_luma_horz_w16out_fptr = &ihevc_inter_pred_luma_horz_w16out_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_luma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_luma_vert_w16inp_w16out_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_ref_substitution_fptr = &ihevc_intra_pred_chroma_ref_substitution;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_ref_substitution_fptr = &ihevc_intra_pred_luma_ref_substitution;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_ref_filtering_fptr = &ihevc_intra_pred_ref_filtering_neonintr;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_dc_fptr = &ihevc_intra_pred_chroma_dc_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_horz_fptr = &ihevc_intra_pred_chroma_horz_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_mode2_fptr = &ihevc_intra_pred_chroma_mode2_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_mode_18_34_fptr = &ihevc_intra_pred_chroma_mode_18_34_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_mode_27_to_33_fptr = &ihevc_intra_pred_chroma_mode_27_to_33_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_mode_3_to_9_fptr = &ihevc_intra_pred_chroma_mode_3_to_9_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_planar_fptr = &ihevc_intra_pred_chroma_planar_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_ver_fptr = &ihevc_intra_pred_chroma_ver_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_mode_11_to_17_fptr = &ihevc_intra_pred_chroma_mode_11_to_17_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_mode_19_to_25_fptr = &ihevc_intra_pred_chroma_mode_19_to_25_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_mode_11_to_17_fptr = &ihevc_intra_pred_luma_mode_11_to_17_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_mode_19_to_25_fptr = &ihevc_intra_pred_luma_mode_19_to_25_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_dc_fptr = &ihevc_intra_pred_luma_dc_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_horz_fptr = &ihevc_intra_pred_luma_horz_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_mode2_fptr = &ihevc_intra_pred_luma_mode2_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_mode_18_34_fptr = &ihevc_intra_pred_luma_mode_18_34_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_mode_27_to_33_fptr = &ihevc_intra_pred_luma_mode_27_to_33_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_mode_3_to_9_fptr = &ihevc_intra_pred_luma_mode_3_to_9_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_planar_fptr = &ihevc_intra_pred_luma_planar_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_ver_fptr = &ihevc_intra_pred_luma_ver_av8;
+
+ ps_enc_ctxt->s_func_selector.ihevc_itrans_recon_4x4_ttype1_fptr = &ihevc_itrans_recon_4x4_ttype1_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_itrans_recon_4x4_fptr = &ihevc_itrans_recon_4x4_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_itrans_recon_8x8_fptr = &ihevc_itrans_recon_8x8_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_itrans_recon_16x16_fptr = &ihevc_itrans_recon_16x16_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_itrans_recon_32x32_fptr = &ihevc_itrans_recon_32x32;
+
+ ps_enc_ctxt->s_func_selector.ihevc_memcpy_mul_8_fptr = &ihevc_memcpy_mul_8_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_memcpy_fptr = &ihevc_memcpy_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_memset_mul_8_fptr = &ihevc_memset_mul_8_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_memset_fptr = &ihevc_memset_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_memset_16bit_mul_8_fptr = &ihevc_memset_16bit_mul_8_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_memset_16bit_fptr = &ihevc_memset_16bit_av8;
+
+ ps_enc_ctxt->s_func_selector.ihevc_weighted_pred_bi_fptr = &ihevc_weighted_pred_bi_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_weighted_pred_bi_default_fptr = &ihevc_weighted_pred_bi_default_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_weighted_pred_uni_fptr = &ihevc_weighted_pred_uni_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_weighted_pred_chroma_bi_fptr = &ihevc_weighted_pred_chroma_bi_neonintr;
+ ps_enc_ctxt->s_func_selector.ihevc_weighted_pred_chroma_bi_default_fptr = &ihevc_weighted_pred_chroma_bi_default_neonintr;
+ ps_enc_ctxt->s_func_selector.ihevc_weighted_pred_chroma_uni_fptr = &ihevc_weighted_pred_chroma_uni_neonintr;
+
+ ps_enc_ctxt->s_func_selector.ihevc_resi_trans_4x4_ttype1_fptr = &ihevc_resi_trans_4x4_ttype1_neon;
+ ps_enc_ctxt->s_func_selector.ihevc_resi_trans_4x4_fptr = &ihevc_resi_trans_4x4_neon;
+ ps_enc_ctxt->s_func_selector.ihevc_resi_trans_8x8_fptr = &ihevc_resi_trans_8x8_neon;
+ ps_enc_ctxt->s_func_selector.ihevc_resi_trans_16x16_fptr = &ihevc_resi_trans_16x16_neon;
+ ps_enc_ctxt->s_func_selector.ihevc_resi_trans_32x32_fptr = &ihevc_resi_trans_32x32_neon;
+
+ ps_enc_ctxt->s_func_selector.ihevc_quant_iquant_ssd_flat_scale_mat_fptr = &ihevc_quant_iquant_ssd_flat_scale_mat_neon;
+ ps_enc_ctxt->s_func_selector.ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_fptr = &ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_neon;
+
+ ps_enc_ctxt->s_func_selector.ihevc_sao_edge_offset_class0_fptr = &ihevc_sao_edge_offset_class0_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_sao_edge_offset_class1_fptr = &ihevc_sao_edge_offset_class1_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_sao_edge_offset_class2_fptr = &ihevc_sao_edge_offset_class2_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_sao_edge_offset_class3_fptr = &ihevc_sao_edge_offset_class3_av8;
+
+ ps_enc_ctxt->s_func_selector.ihevc_sao_edge_offset_class0_chroma_fptr = &ihevc_sao_edge_offset_class0_chroma_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_sao_edge_offset_class1_chroma_fptr = &ihevc_sao_edge_offset_class1_chroma_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_sao_edge_offset_class2_chroma_fptr = &ihevc_sao_edge_offset_class2_chroma_av8;
+ ps_enc_ctxt->s_func_selector.ihevc_sao_edge_offset_class3_chroma_fptr = &ihevc_sao_edge_offset_class3_chroma_av8;
+}
+
+#else
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_init_function_ptr_a9q \endif
+*
+* \brief
+* Function pointer initialization of encoder context struct
+*
+*****************************************************************************
+*/
+static void ihevce_init_function_ptr_a9q(enc_ctxt_t *ps_enc_ctxt)
+{
+ ps_enc_ctxt->s_func_selector.ihevc_deblk_chroma_horz_fptr = &ihevc_deblk_chroma_horz_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_deblk_chroma_vert_fptr = &ihevc_deblk_chroma_vert_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_deblk_luma_vert_fptr = &ihevc_deblk_luma_vert_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_deblk_luma_horz_fptr = &ihevc_deblk_luma_horz_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_chroma_copy_fptr = &ihevc_inter_pred_chroma_copy_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_chroma_copy_w16out_fptr = &ihevc_inter_pred_chroma_copy_w16out_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_chroma_horz_fptr = &ihevc_inter_pred_chroma_horz;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_chroma_horz_w16out_fptr = &ihevc_inter_pred_chroma_horz_w16out_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_chroma_vert_fptr = &ihevc_inter_pred_chroma_vert_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_chroma_vert_w16inp_fptr = &ihevc_inter_pred_chroma_vert_w16inp_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_chroma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16inp_w16out_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_chroma_vert_w16out_fptr = &ihevc_inter_pred_chroma_vert_w16out_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_luma_horz_fptr = &ihevc_inter_pred_luma_horz_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_luma_vert_fptr = &ihevc_inter_pred_luma_vert_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_luma_vert_w16out_fptr = &ihevc_inter_pred_luma_vert_w16out_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_luma_vert_w16inp_fptr = &ihevc_inter_pred_luma_vert_w16inp_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_luma_copy_fptr = &ihevc_inter_pred_luma_copy_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_luma_copy_w16out_fptr = &ihevc_inter_pred_luma_copy_w16out_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_luma_horz_w16out_fptr = &ihevc_inter_pred_luma_horz_w16out_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_inter_pred_luma_vert_w16inp_w16out_fptr = &ihevc_inter_pred_luma_vert_w16inp_w16out_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_ref_substitution_fptr = &ihevc_intra_pred_chroma_ref_substitution;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_ref_substitution_fptr = &ihevc_intra_pred_luma_ref_substitution_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_ref_filtering_fptr = &ihevc_intra_pred_ref_filtering;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_dc_fptr = &ihevc_intra_pred_chroma_dc_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_horz_fptr = &ihevc_intra_pred_chroma_horz_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_mode2_fptr = &ihevc_intra_pred_chroma_mode2_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_mode_18_34_fptr = &ihevc_intra_pred_chroma_mode_18_34_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_mode_27_to_33_fptr = &ihevc_intra_pred_chroma_mode_27_to_33_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_mode_3_to_9_fptr = &ihevc_intra_pred_chroma_mode_3_to_9_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_planar_fptr = &ihevc_intra_pred_chroma_planar_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_ver_fptr = &ihevc_intra_pred_chroma_ver_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_mode_11_to_17_fptr = &ihevc_intra_pred_chroma_mode_11_to_17_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_chroma_mode_19_to_25_fptr = &ihevc_intra_pred_chroma_mode_19_to_25_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_mode_11_to_17_fptr = &ihevc_intra_pred_luma_mode_11_to_17_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_mode_19_to_25_fptr = &ihevc_intra_pred_luma_mode_19_to_25_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_dc_fptr = &ihevc_intra_pred_luma_dc_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_horz_fptr = &ihevc_intra_pred_luma_horz_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_mode2_fptr = &ihevc_intra_pred_luma_mode2_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_mode_18_34_fptr = &ihevc_intra_pred_luma_mode_18_34_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_mode_27_to_33_fptr = &ihevc_intra_pred_luma_mode_27_to_33_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_mode_3_to_9_fptr = &ihevc_intra_pred_luma_mode_3_to_9_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_planar_fptr = &ihevc_intra_pred_luma_planar_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_intra_pred_luma_ver_fptr = &ihevc_intra_pred_luma_ver_a9q;
+
+ ps_enc_ctxt->s_func_selector.ihevc_itrans_recon_4x4_ttype1_fptr = &ihevc_itrans_recon_4x4_ttype1_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_itrans_recon_4x4_fptr = &ihevc_itrans_recon_4x4_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_itrans_recon_8x8_fptr = &ihevc_itrans_recon_8x8_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_itrans_recon_16x16_fptr = &ihevc_itrans_recon_16x16_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_itrans_recon_32x32_fptr = &ihevc_itrans_recon_32x32;
+
+ ps_enc_ctxt->s_func_selector.ihevc_memcpy_mul_8_fptr = &ihevc_memcpy_mul_8_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_memcpy_fptr = &ihevc_memcpy_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_memset_mul_8_fptr = &ihevc_memset_mul_8_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_memset_fptr = &ihevc_memset_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_memset_16bit_mul_8_fptr = &ihevc_memset_16bit_mul_8_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_memset_16bit_fptr = &ihevc_memset_16bit_a9q;
+
+ ps_enc_ctxt->s_func_selector.ihevc_weighted_pred_bi_fptr = &ihevc_weighted_pred_bi_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_weighted_pred_bi_default_fptr = &ihevc_weighted_pred_bi_default_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_weighted_pred_uni_fptr = &ihevc_weighted_pred_uni_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_weighted_pred_chroma_bi_fptr = &ihevc_weighted_pred_chroma_bi;
+ ps_enc_ctxt->s_func_selector.ihevc_weighted_pred_chroma_bi_default_fptr = &ihevc_weighted_pred_chroma_bi_default;
+ ps_enc_ctxt->s_func_selector.ihevc_weighted_pred_chroma_uni_fptr = &ihevc_weighted_pred_chroma_uni;
+
+ ps_enc_ctxt->s_func_selector.ihevc_resi_trans_4x4_ttype1_fptr = &ihevc_resi_trans_4x4_ttype1_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_resi_trans_4x4_fptr = &ihevc_resi_trans_4x4_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_resi_trans_8x8_fptr = &ihevc_resi_trans_8x8_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_resi_trans_16x16_fptr = &ihevc_resi_trans_16x16_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_resi_trans_32x32_fptr = &ihevc_resi_trans_32x32_a9q;
+
+ ps_enc_ctxt->s_func_selector.ihevc_quant_iquant_ssd_flat_scale_mat_fptr = &ihevc_quant_iquant_ssd_flat_scale_mat_neon;
+ ps_enc_ctxt->s_func_selector.ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_fptr = &ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_neon;
+
+ ps_enc_ctxt->s_func_selector.ihevc_sao_edge_offset_class0_fptr = &ihevc_sao_edge_offset_class0_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_sao_edge_offset_class1_fptr = &ihevc_sao_edge_offset_class1_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_sao_edge_offset_class2_fptr = &ihevc_sao_edge_offset_class2_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_sao_edge_offset_class3_fptr = &ihevc_sao_edge_offset_class3_a9q;
+
+ ps_enc_ctxt->s_func_selector.ihevc_sao_edge_offset_class0_chroma_fptr = &ihevc_sao_edge_offset_class0_chroma_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_sao_edge_offset_class1_chroma_fptr = &ihevc_sao_edge_offset_class1_chroma_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_sao_edge_offset_class2_chroma_fptr = &ihevc_sao_edge_offset_class2_chroma_a9q;
+ ps_enc_ctxt->s_func_selector.ihevc_sao_edge_offset_class3_chroma_fptr = &ihevc_sao_edge_offset_class3_chroma_a9q;
+}
+#endif
+#endif
+// clang-format on
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_init_function_ptr \endif
+*
+* \brief
+* Function pointer initialization of encoder context struct
+*
+*****************************************************************************
+*/
+void ihevce_init_function_ptr(void *pv_enc_ctxt, IV_ARCH_T e_processor_arch)
+{
+ (void)e_processor_arch;
+ ihevce_init_function_ptr_generic(pv_enc_ctxt);
+#ifdef ENABLE_NEON
+ switch(e_processor_arch)
+ {
+#ifdef ARMV8
+ case ARCH_ARM_V8_NEON:
+ ihevce_init_function_ptr_av8(pv_enc_ctxt);
+ break;
+#else
+ case ARCH_ARM_A9Q:
+ ihevce_init_function_ptr_a9q(pv_enc_ctxt);
+ break;
+#endif
+ default:
+ break;
+ }
+#endif
+}
diff --git a/encoder/ihevce_function_selector.h b/encoder/ihevce_function_selector.h
new file mode 100644
index 0000000..a7ea807
--- /dev/null
+++ b/encoder/ihevce_function_selector.h
@@ -0,0 +1,158 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ihevce_function_selector.h
+*
+* @brief
+* Structure definitions used in the decoder
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef _IHEVCE_FUNCTION_SELECTOR_H_
+#define _IHEVCE_FUNCTION_SELECTOR_H_
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+typedef struct
+{
+ ihevc_deblk_chroma_horz_ft *ihevc_deblk_chroma_horz_fptr;
+ ihevc_deblk_chroma_vert_ft *ihevc_deblk_chroma_vert_fptr;
+ ihevc_deblk_chroma_horz_ft *ihevc_deblk_422chroma_horz_fptr;
+ ihevc_deblk_chroma_vert_ft *ihevc_deblk_422chroma_vert_fptr;
+ ihevc_deblk_luma_vert_ft *ihevc_deblk_luma_vert_fptr;
+ ihevc_deblk_luma_horz_ft *ihevc_deblk_luma_horz_fptr;
+ ihevc_inter_pred_ft *ihevc_inter_pred_chroma_copy_fptr;
+ ihevc_inter_pred_w16out_ft *ihevc_inter_pred_chroma_copy_w16out_fptr;
+ ihevc_inter_pred_ft *ihevc_inter_pred_chroma_horz_fptr;
+ ihevc_inter_pred_w16out_ft *ihevc_inter_pred_chroma_horz_w16out_fptr;
+ ihevc_inter_pred_ft *ihevc_inter_pred_chroma_vert_fptr;
+ ihevc_inter_pred_w16inp_ft *ihevc_inter_pred_chroma_vert_w16inp_fptr;
+ ihevc_inter_pred_w16inp_w16out_ft *ihevc_inter_pred_chroma_vert_w16inp_w16out_fptr;
+ ihevc_inter_pred_w16out_ft *ihevc_inter_pred_chroma_vert_w16out_fptr;
+ ihevc_inter_pred_ft *ihevc_inter_pred_luma_horz_fptr;
+ ihevc_inter_pred_ft *ihevc_inter_pred_luma_vert_fptr;
+ ihevc_inter_pred_w16out_ft *ihevc_inter_pred_luma_vert_w16out_fptr;
+ ihevc_inter_pred_w16inp_ft *ihevc_inter_pred_luma_vert_w16inp_fptr;
+ ihevc_inter_pred_ft *ihevc_inter_pred_luma_copy_fptr;
+ ihevc_inter_pred_w16out_ft *ihevc_inter_pred_luma_copy_w16out_fptr;
+ ihevc_inter_pred_w16out_ft *ihevc_inter_pred_luma_horz_w16out_fptr;
+ ihevc_inter_pred_w16inp_w16out_ft *ihevc_inter_pred_luma_vert_w16inp_w16out_fptr;
+ ihevc_intra_pred_chroma_ref_substitution_ft *ihevc_intra_pred_chroma_ref_substitution_fptr;
+ ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
+ ihevc_intra_pred_ref_filtering_ft *ihevc_intra_pred_ref_filtering_fptr;
+ ihevc_intra_pred_chroma_dc_ft *ihevc_intra_pred_chroma_dc_fptr;
+ ihevc_intra_pred_chroma_horz_ft *ihevc_intra_pred_chroma_horz_fptr;
+ ihevc_intra_pred_chroma_mode2_ft *ihevc_intra_pred_chroma_mode2_fptr;
+ ihevc_intra_pred_chroma_mode_18_34_ft *ihevc_intra_pred_chroma_mode_18_34_fptr;
+ ihevc_intra_pred_chroma_mode_27_to_33_ft *ihevc_intra_pred_chroma_mode_27_to_33_fptr;
+ ihevc_intra_pred_chroma_mode_3_to_9_ft *ihevc_intra_pred_chroma_mode_3_to_9_fptr;
+ ihevc_intra_pred_chroma_planar_ft *ihevc_intra_pred_chroma_planar_fptr;
+ ihevc_intra_pred_chroma_ver_ft *ihevc_intra_pred_chroma_ver_fptr;
+ ihevc_intra_pred_chroma_mode_11_to_17_ft *ihevc_intra_pred_chroma_mode_11_to_17_fptr;
+ ihevc_intra_pred_chroma_mode_19_to_25_ft *ihevc_intra_pred_chroma_mode_19_to_25_fptr;
+ ihevc_intra_pred_luma_mode_11_to_17_ft *ihevc_intra_pred_luma_mode_11_to_17_fptr;
+ ihevc_intra_pred_luma_mode_19_to_25_ft *ihevc_intra_pred_luma_mode_19_to_25_fptr;
+ ihevc_intra_pred_luma_dc_ft *ihevc_intra_pred_luma_dc_fptr;
+ ihevc_intra_pred_luma_horz_ft *ihevc_intra_pred_luma_horz_fptr;
+ ihevc_intra_pred_luma_mode2_ft *ihevc_intra_pred_luma_mode2_fptr;
+ ihevc_intra_pred_luma_mode_18_34_ft *ihevc_intra_pred_luma_mode_18_34_fptr;
+ ihevc_intra_pred_luma_mode_27_to_33_ft *ihevc_intra_pred_luma_mode_27_to_33_fptr;
+ ihevc_intra_pred_luma_mode_3_to_9_ft *ihevc_intra_pred_luma_mode_3_to_9_fptr;
+ ihevc_intra_pred_luma_planar_ft *ihevc_intra_pred_luma_planar_fptr;
+ ihevc_intra_pred_luma_ver_ft *ihevc_intra_pred_luma_ver_fptr;
+ ihevc_itrans_recon_4x4_ttype1_ft *ihevc_itrans_recon_4x4_ttype1_fptr;
+ ihevc_itrans_recon_4x4_ft *ihevc_itrans_recon_4x4_fptr;
+ ihevc_itrans_recon_8x8_ft *ihevc_itrans_recon_8x8_fptr;
+ ihevc_itrans_recon_16x16_ft *ihevc_itrans_recon_16x16_fptr;
+ ihevc_itrans_recon_32x32_ft *ihevc_itrans_recon_32x32_fptr;
+ ihevc_chroma_itrans_recon_4x4_ft *ihevc_chroma_itrans_recon_4x4_fptr;
+ ihevc_chroma_itrans_recon_8x8_ft *ihevc_chroma_itrans_recon_8x8_fptr;
+ ihevc_chroma_itrans_recon_16x16_ft *ihevc_chroma_itrans_recon_16x16_fptr;
+ ihevc_memcpy_mul_8_ft *ihevc_memcpy_mul_8_fptr;
+ ihevc_memcpy_ft *ihevc_memcpy_fptr;
+ ihevc_memset_mul_8_ft *ihevc_memset_mul_8_fptr;
+ ihevc_memset_ft *ihevc_memset_fptr;
+ ihevc_memset_16bit_mul_8_ft *ihevc_memset_16bit_mul_8_fptr;
+ ihevc_memset_16bit_ft *ihevc_memset_16bit_fptr;
+
+ ihevc_weighted_pred_bi_ft *ihevc_weighted_pred_bi_fptr;
+ ihevc_weighted_pred_bi_default_ft *ihevc_weighted_pred_bi_default_fptr;
+ ihevc_weighted_pred_uni_ft *ihevc_weighted_pred_uni_fptr;
+ ihevc_weighted_pred_chroma_bi_ft *ihevc_weighted_pred_chroma_bi_fptr;
+ ihevc_weighted_pred_chroma_bi_default_ft *ihevc_weighted_pred_chroma_bi_default_fptr;
+ ihevc_weighted_pred_chroma_uni_ft *ihevc_weighted_pred_chroma_uni_fptr;
+ ihevc_resi_trans_4x4_ttype1_ft *ihevc_resi_trans_4x4_ttype1_fptr;
+ ihevc_resi_trans_4x4_ft *ihevc_resi_trans_4x4_fptr;
+ ihevc_resi_trans_8x8_ft *ihevc_resi_trans_8x8_fptr;
+ ihevc_resi_trans_16x16_ft *ihevc_resi_trans_16x16_fptr;
+ ihevc_resi_trans_32x32_ft *ihevc_resi_trans_32x32_fptr;
+ ihevc_quant_iquant_ssd_ft *ihevc_quant_iquant_ssd_fptr;
+ ihevc_quant_iquant_ssd_rdoq_ft *ihevc_quant_iquant_ssd_rdoq_fptr;
+ ihevc_quant_iquant_ssd_flat_scale_mat_ft *ihevc_quant_iquant_ssd_flat_scale_mat_fptr;
+ ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_ft *ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_fptr;
+ ihevc_q_iq_ssd_var_rnd_fact_ft *ihevc_q_iq_ssd_var_rnd_fact_fptr;
+ ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_ft *ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_fptr;
+ ihevc_quant_iquant_ssd_ft *ihevc_quant_iquant_fptr;
+ ihevc_quant_iquant_ssd_rdoq_ft *ihevc_quant_iquant_rdoq_fptr;
+ ihevc_quant_iquant_ssd_flat_scale_mat_ft *ihevc_quant_iquant_flat_scale_mat_fptr;
+ ihevc_quant_iquant_ssd_flat_scale_mat_rdoq_ft *ihevc_quant_iquant_flat_scale_mat_rdoq_fptr;
+ ihevc_q_iq_ssd_var_rnd_fact_ft *ihevc_q_iq_var_rnd_fact_fptr;
+ ihevc_q_iq_ssd_flat_scale_mat_var_rnd_fact_ft *ihevc_q_iq_flat_scale_mat_var_rnd_fact_fptr;
+ ihevc_pad_horz_luma_ft *ihevc_pad_horz_luma_fptr;
+ ihevc_pad_horz_chroma_ft *ihevc_pad_horz_chroma_fptr;
+ ihevc_pad_vert_ft *ihevc_pad_vert_fptr;
+ ihevc_pad_top_ft *ihevc_pad_top_fptr;
+ ihevc_pad_bottom_ft *ihevc_pad_bottom_fptr;
+ ihevc_pad_left_luma_ft *ihevc_pad_left_luma_fptr;
+ ihevc_pad_left_chroma_ft *ihevc_pad_left_chroma_fptr;
+ ihevc_pad_right_luma_ft *ihevc_pad_right_luma_fptr;
+ ihevc_pad_right_chroma_ft *ihevc_pad_right_chroma_fptr;
+ ihevc_sao_edge_offset_class0_ft *ihevc_sao_edge_offset_class0_fptr;
+ ihevc_sao_edge_offset_class1_ft *ihevc_sao_edge_offset_class1_fptr;
+ ihevc_sao_edge_offset_class2_ft *ihevc_sao_edge_offset_class2_fptr;
+ ihevc_sao_edge_offset_class3_ft *ihevc_sao_edge_offset_class3_fptr;
+
+ ihevc_sao_edge_offset_class0_chroma_ft *ihevc_sao_edge_offset_class0_chroma_fptr;
+ ihevc_sao_edge_offset_class1_chroma_ft *ihevc_sao_edge_offset_class1_chroma_fptr;
+ ihevc_sao_edge_offset_class2_chroma_ft *ihevc_sao_edge_offset_class2_chroma_fptr;
+ ihevc_sao_edge_offset_class3_chroma_ft *ihevc_sao_edge_offset_class3_chroma_fptr;
+} func_selector_t;
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+IV_ARCH_T ihevce_default_arch(void);
+
+void ihevce_init_function_ptr(void *pv_enc_ctxt, IV_ARCH_T e_processor_arch);
+
+#endif /* _IHEVCE_FUNCTION_SELECTOR_H_ */
diff --git a/encoder/ihevce_global_tables.c b/encoder/ihevce_global_tables.c
new file mode 100644
index 0000000..6b08e83
--- /dev/null
+++ b/encoder/ihevce_global_tables.c
@@ -0,0 +1,689 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+*
+* @file ihevce_global_tables.c
+*
+* @brief
+* This file contains definitions of global tables used by the encoder
+*
+* @author
+* Ittiam
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "hme_datatype.h"
+#include "hme_common_defs.h"
+#include "hme_common_utils.h"
+#include "hme_interface.h"
+#include "hme_defs.h"
+#include "ihevce_me_instr_set_router.h"
+#include "hme_err_compute.h"
+#include "hme_globals.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_enc_loop_utils.h"
+#include "ihevce_enc_loop_pass.h"
+#include "ihevce_global_tables.h"
+
+/*****************************************************************************/
+/* Globals */
+/*****************************************************************************/
+const level_data_t g_as_level_data[TOTAL_NUM_LEVELS] = {
+ /* LEVEL1 */
+ { LEVEL1, 552960, 36864, { 128, 0 }, { 350, 0 }, 2, 16, 1, 1 },
+
+ /* LEVEL2 */
+ { LEVEL2, 3686400, 122880, { 1500, 0 }, { 1500, 0 }, 2, 16, 1, 1 },
+
+ /* LEVEL2_1 */
+ { LEVEL2_1, 7372800, 245760, { 3000, 0 }, { 3000, 0 }, 2, 20, 1, 1 },
+
+ /* LEVEL3 */
+ { LEVEL3, 16588800, 552960, { 6000, 0 }, { 6000, 0 }, 2, 30, 2, 2 },
+
+ /* LEVEL3_1 */
+ { LEVEL3_1, 33177600, 983040, { 10000, 0 }, { 10000, 0 }, 2, 40, 3, 3 },
+
+ /* LEVEL4 */
+ { LEVEL4, 66846720, 2228224, { 12000, 30000 }, { 12000, 30000 }, 4, 75, 5, 5 },
+
+ /* LEVEL4_1 */
+ { LEVEL4_1, 133693440, 2228224, { 20000, 50000 }, { 20000, 50000 }, 4, 75, 5, 5 },
+
+ /* LEVEL5 */
+ { LEVEL5, 267386880, 8912896, { 25000, 100000 }, { 25000, 100000 }, 6, 200, 11, 10 },
+
+ /* LEVEL5_1 */
+ { LEVEL5_1, 534773760, 8912896, { 40000, 160000 }, { 40000, 160000 }, 8, 200, 11, 10 },
+
+ /* LEVEL5_2 */
+ { LEVEL5_2, 1069547520, 8912896, { 60000, 240000 }, { 60000, 240000 }, 8, 200, 11, 10 },
+
+ /* LEVEL6 */
+ { LEVEL6, 1069547520, 35651584, { 60000, 240000 }, { 60000, 240000 }, 8, 600, 22, 20 },
+
+ /* LEVEL6_1 */
+ { LEVEL6_1, 2139095040, 35651584, { 120000, 480000 }, { 120000, 480000 }, 8, 600, 22, 20 },
+
+ /* LEVEL6_2 */
+ { LEVEL6_2, 4278190080, 35651584, { 240000, 800000 }, { 240000, 800000 }, 6, 600, 22, 20 },
+
+};
+
+/** \brief Default flat Scaling matrix for 4x4 transform */
+const WORD16 gi2_flat_scale_mat_4x4[] = { 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16 };
+
+/** \brief Default flat Scaling matrix for 8x8 transform */
+const WORD16 gi2_flat_scale_mat_8x8[] = { 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16 };
+
+/** \brief Default flat Scaling matrix for 16x16 transform */
+const WORD16 gi2_flat_scale_mat_16x16[] = {
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+ 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
+};
+
+/** \brief Default flat ReScaling matrix for 4x4 transform */
+const WORD16 gi2_flat_rescale_mat_4x4[] = { 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048,
+ 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048 };
+
+/** \brief Default flat ReScaling matrix for 8x8 transform */
+const WORD16 gi2_flat_rescale_mat_8x8[] = {
+ 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048,
+ 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048,
+ 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048,
+ 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048
+};
+
+/** \brief Default flat ReScaling matrix for 16x16 transform */
+const WORD16 gi2_flat_rescale_mat_16x16[] = {
+ 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048,
+ 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048,
+ 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048,
+ 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048,
+ 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048,
+ 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048,
+ 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048,
+ 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048,
+ 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048,
+ 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048,
+ 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048,
+ 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048,
+ 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048,
+ 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048,
+ 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048,
+ 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048, 2048
+};
+
+/**
+* @brief Give the scanning order of csb in a 32x32 TU
+* based on first idx. 0 - upright_diagonal, 1 - horizontal, 2 - vertical scan
+*/
+const UWORD8 g_u1_scan_table_8x8[3][64] = {
+ /* diag up right scan */
+ { 0, 8, 1, 16, 9, 2, 24, 17, 10, 3, 32, 25, 18, 11, 4, 40, 33, 26, 19, 12, 5, 48,
+ 41, 34, 27, 20, 13, 6, 56, 49, 42, 35, 28, 21, 14, 7, 57, 50, 43, 36, 29, 22, 15, 58,
+ 51, 44, 37, 30, 23, 59, 52, 45, 38, 31, 60, 53, 46, 39, 61, 54, 47, 62, 55, 63 },
+
+ /* horizontal scan */
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21,
+ 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43,
+ 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63 },
+
+ /* vertical scan */
+ { 0, 8, 16, 24, 32, 40, 48, 56, 1, 9, 17, 25, 33, 41, 49, 57, 2, 10, 18, 26, 34, 42,
+ 50, 58, 3, 11, 19, 27, 35, 43, 51, 59, 4, 12, 20, 28, 36, 44, 52, 60, 5, 13, 21, 29,
+ 37, 45, 53, 61, 6, 14, 22, 30, 38, 46, 54, 62, 7, 15, 23, 31, 39, 47, 55, 63 }
+};
+
+/**
+* @brief Give the scanning order of csb in a 16x16 TU or 4x4 csb
+* based on first idx. 0 - upright_diagonal, 1 - horizontal, 2 - vertical scan
+*/
+const UWORD8 g_u1_scan_table_4x4[3][16] = {
+ /* diag up right scan */
+ { 0, 4, 1, 8, 5, 2, 12, 9, 6, 3, 13, 10, 7, 14, 11, 15 },
+
+ /* horizontal scan */
+ { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
+
+ /* vertical scan */
+ { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }
+};
+
+/**
+* @brief Give the scanning order of csb in a 8x8 TU
+* based on first idx. 0 - upright_diagonal, 1 - horizontal, 2 - vertical scan
+*/
+const UWORD8 g_u1_scan_table_2x2[3][4] = {
+ /* diag up right scan */
+ { 0, 2, 1, 3 },
+
+ /* horizontal scan */
+ { 0, 1, 2, 3 },
+
+ /* vertical scan */
+ { 0, 2, 1, 3 }
+};
+
+/**
+* @brief Give the scanning order of csb in a 4x4 TU
+* scan idx. doesn't matter as it's 0 for all cases
+*/
+const UWORD8 g_u1_scan_table_1x1[1] = { 0 };
+
+/**
+******************************************************************************
+* @brief For a given frac pt, fracx, fracy, this module figures out the
+* corresponding fpel/hpel buffers along with x and y offsets if any. The
+* grid used is shown as follows:
+* A j E k B
+* l m n o p
+* F q G r H
+* s t u v w
+* C x I y D
+*
+* In this grid capital letters are fpel/hpel bufs.
+******************************************************************************
+*/
+qpel_input_buf_cfg_t gas_qpel_inp_buf_cfg[4][4] = {
+ {
+ /* 0, 0 pt: both buf id would be fxfy = 0 */
+ { 0, 0, 0, 0, 0, 0 },
+ /* 1, 0 pt: pt j; avg of A and E */
+ { 0, 0, 0, 1, 0, 0 },
+ /* 2, 0 pt: pt E, buf id 0 and 1 would be hxfy = 1 */
+ { 1, 0, 0, 1, 0, 0 },
+ /* 3, 0 pt: pt k, avg of E and B */
+ { 1, 0, 0, 0, 1, 0 },
+ },
+ {
+ /* 0, 1 pt: pt l: avg of A and F */
+ { 0, 0, 0, 2, 0, 0 },
+ /* 1, 1 pt: pt m : avg of E and F */
+ { 1, 0, 0, 2, 0, 0 },
+ /* 2, 2 pt: pt n: avg of E and G */
+ { 1, 0, 0, 3, 0, 0 },
+ /* 3, 2 pt : pt o: avg of E and H */
+ { 1, 0, 0, 2, 1, 0 },
+ },
+ {
+ /* 0, 2 pt: pt F; both buf id would be fxhy = 2 */
+ { 2, 0, 0, 2, 0, 0 },
+ /* 1, 2 pt: pt q; avg of F and G */
+ { 2, 0, 0, 3, 0, 0 },
+ /* 2, 2 pt: pt G: both buf id would be hxhy = 3 */
+ { 3, 0, 0, 3, 0, 0 },
+ /* 2, 3 pt: pt r: avg of G and H */
+ { 3, 0, 0, 2, 1, 0 },
+ },
+ {
+ /* 0, 3 pt: pt s; avg of F and C */
+ { 2, 0, 0, 0, 0, 1 },
+ /* 1, 3 ot: pt t; avg of F and I */
+ { 2, 0, 0, 1, 0, 1 },
+ /* 2, 3 pt: pt u, avg of G and I */
+ { 3, 0, 0, 1, 0, 1 },
+ /* 3, 3 pt; pt v, avg of H and I */
+ { 2, 1, 0, 1, 0, 1 },
+ }
+};
+
+/**
+* @brief is partition vertical
+*/
+const WORD8 gai1_is_part_vertical[TOT_NUM_PARTS] = { 0, 1, 1, 0, 0, 0, 0, 0, 0,
+ 1, 1, 1, 1, 0, 0, 0, 0 };
+
+/**
+* @brief partition dimensions
+*/
+const WORD8 gai1_part_wd_and_ht[TOT_NUM_PARTS][2] = { { 16, 16 }, { 16, 8 }, { 16, 8 }, { 8, 16 },
+ { 8, 16 }, { 8, 8 }, { 8, 8 }, { 8, 8 },
+ { 8, 8 }, { 16, 4 }, { 16, 12 }, { 16, 12 },
+ { 16, 4 }, { 4, 16 }, { 12, 16 }, { 12, 16 },
+ { 4, 16 } };
+
+/**
+******************************************************************************
+* @brief bits to code given ref id assuming more than 2 ref ids active
+******************************************************************************
+*/
+UWORD8 gau1_ref_bits[16] = { 1, 3, 3, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7, 9 };
+
+/**
+* @brief raster to zscan lookup table
+*/
+const UWORD8 gau1_ctb_raster_to_zscan[256] = {
+ 0, 1, 4, 5, 16, 17, 20, 21, 64, 65, 68, 69, 80, 81, 84, 85, 2, 3, 6,
+ 7, 18, 19, 22, 23, 66, 67, 70, 71, 82, 83, 86, 87, 8, 9, 12, 13, 24, 25,
+ 28, 29, 72, 73, 76, 77, 88, 89, 92, 93, 10, 11, 14, 15, 26, 27, 30, 31, 74,
+ 75, 78, 79, 90, 91, 94, 95, 32, 33, 36, 37, 48, 49, 52, 53, 96, 97, 100, 101,
+ 112, 113, 116, 117, 34, 35, 38, 39, 50, 51, 54, 55, 98, 99, 102, 103, 114, 115, 118,
+ 119, 40, 41, 44, 45, 56, 57, 60, 61, 104, 105, 108, 109, 120, 121, 124, 125, 42, 43,
+ 46, 47, 58, 59, 62, 63, 106, 107, 110, 111, 122, 123, 126, 127, 128, 129, 132, 133, 144,
+ 145, 148, 149, 192, 193, 196, 197, 208, 209, 212, 213, 130, 131, 134, 135, 146, 147, 150, 151,
+ 194, 195, 198, 199, 210, 211, 214, 215, 136, 137, 140, 141, 152, 153, 156, 157, 200, 201, 204,
+ 205, 216, 217, 220, 221, 138, 139, 142, 143, 154, 155, 158, 159, 202, 203, 206, 207, 218, 219,
+ 222, 223, 160, 161, 164, 165, 176, 177, 180, 181, 224, 225, 228, 229, 240, 241, 244, 245, 162,
+ 163, 166, 167, 178, 179, 182, 183, 226, 227, 230, 231, 242, 243, 246, 247, 168, 169, 172, 173,
+ 184, 185, 188, 189, 232, 233, 236, 237, 248, 249, 252, 253, 170, 171, 174, 175, 186, 187, 190,
+ 191, 234, 235, 238, 239, 250, 251, 254, 255
+};
+
+/**
+* @brief <Fill me>
+*/
+UWORD32 gau4_frame_qstep_multiplier[54] = { 16, 16, 16, 15, 15, 15, 15, 15, 15, 13, 13, 13, 13, 12,
+ 12, 11, 11, 10, 10, 9, 9, 8, 8, 8, 7, 7, 7, 6,
+ 6, 5, 5, 5, 4, 4, 3, 3, 3, 2, 2, 2, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+/**
+******************************************************************************
+* @brief Look up table for choosing the appropriate function for
+* Intra prediction
+*
+* @remarks Same look up table enums are used for luma & chroma but each
+* have seperate functions implemented
+******************************************************************************
+*/
+WORD32 g_i4_ip_funcs[MAX_NUM_IP_MODES] = {
+ IP_FUNC_MODE_0, /* Mode 0 */
+ IP_FUNC_MODE_1, /* Mode 1 */
+ IP_FUNC_MODE_2, /* Mode 2 */
+ IP_FUNC_MODE_3TO9, /* Mode 3 */
+ IP_FUNC_MODE_3TO9, /* Mode 4 */
+ IP_FUNC_MODE_3TO9, /* Mode 5 */
+ IP_FUNC_MODE_3TO9, /* Mode 6 */
+ IP_FUNC_MODE_3TO9, /* Mode 7 */
+ IP_FUNC_MODE_3TO9, /* Mode 8 */
+ IP_FUNC_MODE_3TO9, /* Mode 9 */
+ IP_FUNC_MODE_10, /* Mode 10 */
+ IP_FUNC_MODE_11TO17, /* Mode 11 */
+ IP_FUNC_MODE_11TO17, /* Mode 12 */
+ IP_FUNC_MODE_11TO17, /* Mode 13 */
+ IP_FUNC_MODE_11TO17, /* Mode 14 */
+ IP_FUNC_MODE_11TO17, /* Mode 15 */
+ IP_FUNC_MODE_11TO17, /* Mode 16 */
+ IP_FUNC_MODE_11TO17, /* Mode 17 */
+ IP_FUNC_MODE_18_34, /* Mode 18 */
+ IP_FUNC_MODE_19TO25, /* Mode 19 */
+ IP_FUNC_MODE_19TO25, /* Mode 20 */
+ IP_FUNC_MODE_19TO25, /* Mode 21 */
+ IP_FUNC_MODE_19TO25, /* Mode 22 */
+ IP_FUNC_MODE_19TO25, /* Mode 23 */
+ IP_FUNC_MODE_19TO25, /* Mode 24 */
+ IP_FUNC_MODE_19TO25, /* Mode 25 */
+ IP_FUNC_MODE_26, /* Mode 26 */
+ IP_FUNC_MODE_27TO33, /* Mode 27 */
+ IP_FUNC_MODE_27TO33, /* Mode 26 */
+ IP_FUNC_MODE_27TO33, /* Mode 29 */
+ IP_FUNC_MODE_27TO33, /* Mode 30 */
+ IP_FUNC_MODE_27TO33, /* Mode 31 */
+ IP_FUNC_MODE_27TO33, /* Mode 32 */
+ IP_FUNC_MODE_27TO33, /* Mode 33 */
+ IP_FUNC_MODE_18_34, /* Mode 34 */
+};
+
+/**
+******************************************************************************
+* @brief Look up table for calculating the TU size for all the TUs in a CU
+* if CU part mode is one of SIZE_2Nx2N, SIZE_2NxN, SIZE_Nx2N
+*
+* i ranging (0 to 3)
+* tu_size[i] = cu_size >> gau1_inter_tu_shft_amt[i];
+*
+* @remarks For non AMP cases only TU size = CU/2 is used
+* and number of TU partitions in these CU will be 4
+******************************************************************************
+*/
+UWORD8 gau1_inter_tu_shft_amt[4] = {
+ /* SIZE_2Nx2N, SIZE_2NxN, SIZE_Nx2N cases */
+ 1,
+ 1,
+ 1,
+ 1
+};
+
+/**
+******************************************************************************
+* @brief Look up table for calculating the TU size for all the TUs in a CU
+* if CU part mode is one of SIZE_2NxnU, SIZE_2NxnD, SIZE_nLx2N
+* SIZE_nRx2N (AMP motion partition cases)
+*
+* part_mode = {SIZE_2NxnU,SIZE_2NxnD,SIZE_nLx2N,SIZE_nRx2N}
+* i ranging (0 to 9)
+* tu_size[i] = cu_size >> gau1_inter_tu_shft_amt_amp[part_mode-4][i];
+*
+* @remarks For AMP cases a mixture of TU size = CU/2 & CU/4 is used
+* based on motion partition orientation, number of TU partitions
+* in these CU will be 10
+******************************************************************************
+*/
+UWORD8 gau1_inter_tu_shft_amt_amp[4][10] = {
+ /* SIZE_2NxnU case */
+ { 2, 2, 2, 2, 2, 2, 2, 2, 1, 1 },
+
+ /* SIZE_2NxnD case */
+ { 1, 1, 2, 2, 2, 2, 2, 2, 2, 2 },
+
+ /* SIZE_nLx2N case */
+ { 2, 2, 2, 2, 1, 2, 2, 2, 2, 1 },
+
+ /* SIZE_nRx2N case */
+ { 1, 2, 2, 2, 2, 1, 2, 2, 2, 2 }
+};
+
+/**
+******************************************************************************
+* @brief Look up table for calculating the TU position in horizontal
+* for all the TUs in a CU, if CU part mode is one of
+* SIZE_2Nx2N, SIZE_2NxN, SIZE_Nx2N
+*
+* i ranging (0 to 3)
+* tu_posx[i](in pixels in cu) =
+* ((cusize >> 2) * gau1_inter_tu_posx_scl_amt[i]);
+******************************************************************************
+*/
+UWORD8 gau1_inter_tu_posx_scl_amt[4] = {
+ /* SIZE_2Nx2N, SIZE_2NxN, SIZE_Nx2N cases */
+ 0,
+ 2,
+ 0,
+ 2
+};
+
+/**
+******************************************************************************
+* @brief Look up table for calculating the TU position in horizontal
+* for all the TUs in a CU, if CU part mode is one of
+* SIZE_2NxnU, SIZE_2NxnD, SIZE_nLx2N,SIZE_nRx2N (AMP motion partition cases)
+*
+* part_mode = {SIZE_2NxnU,SIZE_2NxnD,SIZE_nLx2N,SIZE_nRx2N}
+* i ranging (0 to 9)
+* tu_posx[i](in pixels in cu) =
+* ((cusize >> 2) * gau1_inter_tu_posx_scl_amt_amp[part_mode-4][i]);
+******************************************************************************
+*/
+UWORD8 gau1_inter_tu_posx_scl_amt_amp[4][10] = {
+ /* SIZE_2NxnU case */
+ { 0, 1, 0, 1, 2, 3, 2, 3, 0, 2 },
+
+ /* SIZE_2NxnD case */
+ { 0, 2, 0, 1, 0, 1, 2, 3, 2, 3 },
+
+ /* SIZE_nLx2N case */
+ { 0, 1, 0, 1, 2, 0, 1, 0, 1, 2 },
+
+ /* SIZE_nRx2N case */
+ { 0, 2, 3, 2, 3, 0, 2, 3, 2, 3 }
+};
+
+/**
+******************************************************************************
+* @brief Look up table for calculating the TU position in vertical
+* for all the TUs in a CU, if CU part mode is one of
+* SIZE_2Nx2N, SIZE_2NxN, SIZE_Nx2N
+*
+* i ranging (0 to 3)
+* tu_posy[i](in pixels in cu) =
+* ((cusize >> 2) * gau1_inter_tu_posy_scl_amt[i]);
+******************************************************************************
+*/
+UWORD8 gau1_inter_tu_posy_scl_amt[4] = {
+ /* SIZE_2Nx2N, SIZE_2NxN, SIZE_Nx2N cases */
+ 0,
+ 0,
+ 2,
+ 2
+};
+
+/**
+******************************************************************************
+* @brief Look up table for calculating the TU position in vertical
+* for all the TUs in a CU, if CU part mode is one of
+* SIZE_2NxnU, SIZE_2NxnD, SIZE_nLx2N,SIZE_nRx2N (AMP motion partition cases)
+*
+* part_mode = {SIZE_2NxnU,SIZE_2NxnD,SIZE_nLx2N,SIZE_nRx2N}
+* i ranging (0 to 9)
+* tu_posy[i](in pixels in cu) =
+* ((cusize >> 2) * gau1_inter_tu_posy_scl_amt_amp[part_mode-4][i]);
+******************************************************************************
+*/
+UWORD8 gau1_inter_tu_posy_scl_amt_amp[4][10] = {
+ /* SIZE_2NxnU case */
+ { 0, 0, 1, 1, 0, 0, 1, 1, 2, 2 },
+
+ /* SIZE_2NxnD case */
+ { 0, 0, 2, 2, 3, 3, 2, 2, 3, 3 },
+
+ /* SIZE_nLx2N case */
+ { 0, 0, 1, 1, 0, 2, 2, 3, 3, 2 },
+
+ /* SIZE_nRx2N case */
+ { 0, 0, 0, 1, 1, 2, 2, 2, 3, 3 }
+};
+
+/**
+* @brief transform shift. Initialized in ihevce_enc_loop_init()
+*/
+WORD32 ga_trans_shift[5];
+
+/**
+* @brief chroma 422 intra angle mapping
+*/
+const UWORD8 gau1_chroma422_intra_angle_mapping[36] = {
+ 0, 1, 2, 2, 2, 2, 3, 5, 7, 8, 10, 12, 13, 15, 17, 18, 19, 20,
+ 21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31, DM_CHROMA_IDX
+};
+
+// clang-format off
+/**
+******************************************************************************
+* @breif LUT for returning the fractional bits(Q12) to encode a bin based on
+* probability state and the encoded bin (MPS / LPS). The fractional
+* bits are computed as -log2(probabililty of symbol)
+*
+* Probabilites of the cabac states (0-63) are explained in section C
+* of ieee paper by Detlev Marpe et al (VOL. 13, NO. 7, JULY 2003)
+* alpha = (0.01875/0.5) ^ (1/63), p0 = 0.5 and p63 = 0.01875
+*
+* Note that HEVC and AVC use the same cabac tables
+*
+* input : curpState[bits7-1] | (curMPS ^ encoded bin)[bit0]
+*
+* output : fractionnal bits to encode the bin
+*
+******************************************************************************
+*/
+UWORD16 gau2_ihevce_cabac_bin_to_bits[64 * 2] =
+{
+ /* bits for mps */ /* bits for lps */
+ ROUND_Q12(1.000000000), ROUND_Q12(1.000000000),
+ ROUND_Q12(0.928535439), ROUND_Q12(1.075189930),
+ ROUND_Q12(0.863825936), ROUND_Q12(1.150379860),
+ ROUND_Q12(0.804976479), ROUND_Q12(1.225569790),
+ ROUND_Q12(0.751252392), ROUND_Q12(1.300759720),
+ ROUND_Q12(0.702043265), ROUND_Q12(1.375949650),
+ ROUND_Q12(0.656836490), ROUND_Q12(1.451139580),
+ ROUND_Q12(0.615197499), ROUND_Q12(1.526329510),
+ ROUND_Q12(0.576754745), ROUND_Q12(1.601519441),
+ ROUND_Q12(0.541188141), ROUND_Q12(1.676709371),
+ ROUND_Q12(0.508220033), ROUND_Q12(1.751899301),
+ ROUND_Q12(0.477608072), ROUND_Q12(1.827089231),
+ ROUND_Q12(0.449139524), ROUND_Q12(1.902279161),
+ ROUND_Q12(0.422626680), ROUND_Q12(1.977469091),
+ ROUND_Q12(0.397903130), ROUND_Q12(2.052659021),
+ ROUND_Q12(0.374820697), ROUND_Q12(2.127848951),
+ ROUND_Q12(0.353246914), ROUND_Q12(2.203038881),
+ ROUND_Q12(0.333062915), ROUND_Q12(2.278228811),
+ ROUND_Q12(0.314161674), ROUND_Q12(2.353418741),
+ ROUND_Q12(0.296446520), ROUND_Q12(2.428608671),
+ ROUND_Q12(0.279829872), ROUND_Q12(2.503798601),
+ ROUND_Q12(0.264232174), ROUND_Q12(2.578988531),
+ ROUND_Q12(0.249580966), ROUND_Q12(2.654178461),
+ ROUND_Q12(0.235810099), ROUND_Q12(2.729368392),
+ ROUND_Q12(0.222859049), ROUND_Q12(2.804558322),
+ ROUND_Q12(0.210672321), ROUND_Q12(2.879748252),
+ ROUND_Q12(0.199198934), ROUND_Q12(2.954938182),
+ ROUND_Q12(0.188391967), ROUND_Q12(3.030128112),
+ ROUND_Q12(0.178208162), ROUND_Q12(3.105318042),
+ ROUND_Q12(0.168607572), ROUND_Q12(3.180507972),
+ ROUND_Q12(0.159553254), ROUND_Q12(3.255697902),
+ ROUND_Q12(0.151010993), ROUND_Q12(3.330887832),
+ ROUND_Q12(0.142949058), ROUND_Q12(3.406077762),
+ ROUND_Q12(0.135337985), ROUND_Q12(3.481267692),
+ ROUND_Q12(0.128150381), ROUND_Q12(3.556457622),
+ ROUND_Q12(0.121360753), ROUND_Q12(3.631647552),
+ ROUND_Q12(0.114945349), ROUND_Q12(3.706837482),
+ ROUND_Q12(0.108882016), ROUND_Q12(3.782027412),
+ ROUND_Q12(0.103150076), ROUND_Q12(3.857217343),
+ ROUND_Q12(0.097730208), ROUND_Q12(3.932407273),
+ ROUND_Q12(0.092604344), ROUND_Q12(4.007597203),
+ ROUND_Q12(0.087755577), ROUND_Q12(4.082787133),
+ ROUND_Q12(0.083168071), ROUND_Q12(4.157977063),
+ ROUND_Q12(0.078826986), ROUND_Q12(4.233166993),
+ ROUND_Q12(0.074718402), ROUND_Q12(4.308356923),
+ ROUND_Q12(0.070829259), ROUND_Q12(4.383546853),
+ ROUND_Q12(0.067147292), ROUND_Q12(4.458736783),
+ ROUND_Q12(0.063660977), ROUND_Q12(4.533926713),
+ ROUND_Q12(0.060359483), ROUND_Q12(4.609116643),
+ ROUND_Q12(0.057232622), ROUND_Q12(4.684306573),
+ ROUND_Q12(0.054270808), ROUND_Q12(4.759496503),
+ ROUND_Q12(0.051465018), ROUND_Q12(4.834686433),
+ ROUND_Q12(0.048806753), ROUND_Q12(4.909876363),
+ ROUND_Q12(0.046288005), ROUND_Q12(4.985066294),
+ ROUND_Q12(0.043901228), ROUND_Q12(5.060256224),
+ ROUND_Q12(0.041639305), ROUND_Q12(5.135446154),
+ ROUND_Q12(0.039495525), ROUND_Q12(5.210636084),
+ ROUND_Q12(0.037463555), ROUND_Q12(5.285826014),
+ ROUND_Q12(0.035537418), ROUND_Q12(5.361015944),
+ ROUND_Q12(0.033711472), ROUND_Q12(5.436205874),
+ ROUND_Q12(0.031980387), ROUND_Q12(5.511395804),
+ ROUND_Q12(0.030339132), ROUND_Q12(5.586585734),
+ ROUND_Q12(0.028782950), ROUND_Q12(5.661775664),
+ ROUND_Q12(0.027307346), ROUND_Q12(5.736965594)
+};
+// clang-format on
+
+/**
+* @brief <Fill Me>
+*/
+WORD32 gai4_subBlock2csbfId_map4x4TU[1];
+WORD32 gai4_subBlock2csbfId_map8x8TU[4];
+WORD32 gai4_subBlock2csbfId_map16x16TU[16];
+WORD32 gai4_subBlock2csbfId_map32x32TU[64];
+
+/**
+* @brief the neighbor flags for a general ctb (ctb inside the frame; not any corners).
+* The table gau4_nbr_flags_8x8_4x4blks generated for 16x16 4x4 blocks(ctb_size = 64).
+* But the same table holds good for other 4x4 blocks 2d arrays(eg 8x8 4x4 blks,4x4 4x4blks).
+* But the flags must be accessed with stride of 16 since the table has been generated for
+* ctb_size = 64. For odd 4x4 2d arrays(eg 3x3 4x4 blks) the flags needs modification.
+* The flags also need modification for corner ctbs.
+*/
+const UWORD32 gau4_nbr_flags_8x8_4x4blks[64] = {
+ 0x11188, 0x11180, 0x11188, 0x11180, 0x11188, 0x11180, 0x11188, 0x11180, 0x11188, 0x10180,
+ 0x11180, 0x10180, 0x11188, 0x10180, 0x11180, 0x10180, 0x11188, 0x11180, 0x11188, 0x10180,
+ 0x11188, 0x11180, 0x11188, 0x10180, 0x11188, 0x10180, 0x11180, 0x10180, 0x11180, 0x10180,
+ 0x11180, 0x10180, 0x11188, 0x11180, 0x11188, 0x11180, 0x11188, 0x11180, 0x11188, 0x10180,
+ 0x11188, 0x10180, 0x11180, 0x10180, 0x11188, 0x10180, 0x11180, 0x10180, 0x11188, 0x11180,
+ 0x11188, 0x10180, 0x11188, 0x11180, 0x11188, 0x10180, 0x11180, 0x10180, 0x11180, 0x10180,
+ 0x11180, 0x10180, 0x11180, 0x10180
+};
+
+/**
+* @brief subset of intra modes to be evaluated during pre enc intra process
+*/
+const UWORD8 gau1_modes_to_eval[11] = { 0, 1, 26, 2, 6, 10, 14, 18, 22, 30, 34 };
+
+const float gad_look_up_activity[TOT_QP_MOD_OFFSET] = { 0.314980262f, 0.353553391f, 0.396850263f,
+ 0.445449359f, 0.5f, 0.561231024f,
+ 0.629960525f, 0.707106781f, 0.793700526f,
+ 0.890898718f, 1.0f, 1.122462048f,
+ 1.25992105f, 1.414213562f };
diff --git a/encoder/ihevce_global_tables.h b/encoder/ihevce_global_tables.h
new file mode 100644
index 0000000..9866404
--- /dev/null
+++ b/encoder/ihevce_global_tables.h
@@ -0,0 +1,133 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+*
+* @file ihevce_global_tables.h
+*
+* @brief
+* This file contains declarations of global tables used by the encoder
+*
+* @author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_GLOBAL_TABLES_H_
+#define _IHEVCE_GLOBAL_TABLES_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+typedef struct
+{
+ LEVEL_T e_level;
+
+ UWORD32 u4_max_luma_sample_rate;
+
+ WORD32 i4_max_luma_picture_size;
+
+ WORD32 i4_max_bit_rate[TOTAL_NUM_TIERS];
+
+ WORD32 i4_max_cpb[TOTAL_NUM_TIERS];
+
+ WORD32 i4_min_compression_ratio;
+
+ WORD32 i4_max_slices_per_picture;
+
+ WORD32 i4_max_num_tile_rows;
+
+ WORD32 i4_max_num_tile_columns;
+} level_data_t;
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+extern const level_data_t g_as_level_data[TOTAL_NUM_LEVELS];
+
+extern const WORD16 gi2_flat_scale_mat_4x4[];
+extern const WORD16 gi2_flat_scale_mat_8x8[];
+
+extern const WORD16 gi2_flat_scale_mat_16x16[];
+
+extern const WORD16 gi2_flat_rescale_mat_4x4[];
+extern const WORD16 gi2_flat_rescale_mat_8x8[];
+extern const WORD16 gi2_flat_rescale_mat_16x16[];
+
+extern const UWORD8 g_u1_scan_table_8x8[3][64];
+extern const UWORD8 g_u1_scan_table_4x4[3][16];
+extern const UWORD8 g_u1_scan_table_2x2[3][4];
+extern const UWORD8 g_u1_scan_table_1x1[1];
+
+extern qpel_input_buf_cfg_t gas_qpel_inp_buf_cfg[4][4];
+
+extern const WORD8 gai1_is_part_vertical[TOT_NUM_PARTS];
+extern const WORD8 gai1_part_wd_and_ht[TOT_NUM_PARTS][2];
+
+extern UWORD8 gau1_ref_bits[16];
+
+extern const UWORD8 gau1_ctb_raster_to_zscan[256];
+extern WORD32 ga_trans_shift[5];
+
+extern UWORD32 gau4_frame_qstep_multiplier[54];
+
+extern UWORD8 gau1_inter_tu_posy_scl_amt_amp[4][10];
+
+extern UWORD8 gau1_inter_tu_posy_scl_amt[4];
+
+extern UWORD8 gau1_inter_tu_posx_scl_amt_amp[4][10];
+
+extern UWORD8 gau1_inter_tu_posx_scl_amt[4];
+
+extern UWORD8 gau1_inter_tu_shft_amt_amp[4][10];
+
+extern UWORD8 gau1_inter_tu_shft_amt[4];
+
+extern WORD32 g_i4_ip_funcs[MAX_NUM_IP_MODES];
+extern const UWORD8 gau1_chroma422_intra_angle_mapping[36];
+extern const UWORD32 gau4_nbr_flags_8x8_4x4blks[64];
+extern const UWORD8 gau1_modes_to_eval[11];
+
+extern WORD32 gai4_subBlock2csbfId_map4x4TU[1];
+extern WORD32 gai4_subBlock2csbfId_map8x8TU[4];
+extern WORD32 gai4_subBlock2csbfId_map16x16TU[16];
+extern WORD32 gai4_subBlock2csbfId_map32x32TU[64];
+
+extern const float gad_look_up_activity[TOT_QP_MOD_OFFSET];
+
+#endif
diff --git a/encoder/ihevce_had_satd.c b/encoder/ihevce_had_satd.c
new file mode 100644
index 0000000..5646450
--- /dev/null
+++ b/encoder/ihevce_had_satd.c
@@ -0,0 +1,1769 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file ihevce_had_satd.c
+*
+* @brief
+* This file contains functions of Hadamard SAD and SATD
+*
+* @author
+* Ittiam
+*
+* List of Functions
+* <TODO: TO BE ADDED>
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "hme_datatype.h"
+#include "hme_interface.h"
+#include "hme_common_defs.h"
+#include "hme_defs.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+static void ihevce_hadamard_4x4_8bit(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd)
+{
+ WORD32 k;
+ WORD16 m[16];
+
+ /*===== hadamard horz transform =====*/
+ for(k = 0; k < 4; k++)
+ {
+ WORD32 r0, r1, r2, r3;
+ WORD32 h0, h1, h2, h3;
+
+ /* Compute the residue block */
+ r0 = pu1_src[0] - pu1_pred[0];
+ r1 = pu1_src[1] - pu1_pred[1];
+ r2 = pu1_src[2] - pu1_pred[2];
+ r3 = pu1_src[3] - pu1_pred[3];
+
+ h0 = r0 + r1;
+ h1 = r0 - r1;
+ h2 = r2 + r3;
+ h3 = r2 - r3;
+
+ m[k * 4 + 0] = h0 + h2;
+ m[k * 4 + 1] = h1 + h3;
+ m[k * 4 + 2] = h0 - h2;
+ m[k * 4 + 3] = h1 - h3;
+
+ pu1_pred += pred_strd;
+ pu1_src += src_strd;
+ }
+
+ /*===== hadamard vert transform =====*/
+ for(k = 0; k < 4; k++)
+ {
+ WORD32 v0, v1, v2, v3;
+
+ v0 = m[0 + k] + m[4 + k];
+ v1 = m[0 + k] - m[4 + k];
+ v2 = m[8 + k] + m[12 + k];
+ v3 = m[8 + k] - m[12 + k];
+
+ pi2_dst[0 * dst_strd + k] = v0 + v2;
+ pi2_dst[1 * dst_strd + k] = v1 + v3;
+ pi2_dst[2 * dst_strd + k] = v0 - v2;
+ pi2_dst[3 * dst_strd + k] = v1 - v3;
+ }
+}
+
+static void ihevce_hadamard_8x8_8bit(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd)
+{
+ WORD32 i;
+
+ // y0
+ ihevce_hadamard_4x4_8bit(pu1_src, src_strd, pu1_pred, pred_strd, pi2_dst, dst_strd);
+ // y1
+ ihevce_hadamard_4x4_8bit(pu1_src + 4, src_strd, pu1_pred + 4, pred_strd, pi2_dst + 4, dst_strd);
+ // y2
+ ihevce_hadamard_4x4_8bit(
+ pu1_src + 4 * src_strd,
+ src_strd,
+ pu1_pred + 4 * pred_strd,
+ pred_strd,
+ pi2_dst + (4 * dst_strd),
+ dst_strd);
+ // y3
+ ihevce_hadamard_4x4_8bit(
+ pu1_src + 4 + 4 * src_strd,
+ src_strd,
+ pu1_pred + 4 + 4 * pred_strd,
+ pred_strd,
+ pi2_dst + (4 * dst_strd) + 4,
+ dst_strd);
+
+ /* Child HAD results combined as follows to get Parent result */
+ /* _ _ */
+ /* | (y0 + y1) + (y2 + y3) (y0 - y1) + (y2 - y3) | */
+ /* | (y0 + y1) - (y2 + y3) (y0 - y1) - (y2 - y3) | */
+ /* \- -/ */
+ for(i = 0; i < 16; i++)
+ {
+ WORD32 idx = (i >> 2) * dst_strd + (i % 4);
+ WORD16 a0 = pi2_dst[idx];
+ WORD16 a1 = pi2_dst[4 + idx];
+ WORD16 a2 = pi2_dst[(4 * dst_strd) + idx];
+ WORD16 a3 = pi2_dst[(4 * dst_strd) + 4 + idx];
+
+ WORD16 b0 = (a0 + a1);
+ WORD16 b1 = (a0 - a1);
+ WORD16 b2 = (a2 + a3);
+ WORD16 b3 = (a2 - a3);
+
+ pi2_dst[idx] = b0 + b2;
+ pi2_dst[4 + idx] = b1 + b3;
+ pi2_dst[(4 * dst_strd) + idx] = b0 - b2;
+ pi2_dst[(4 * dst_strd) + 4 + idx] = b1 - b3;
+ }
+}
+
+static void ihevce_hadamard_16x16_8bit(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd)
+{
+ WORD32 i;
+
+ // y0
+ ihevce_hadamard_8x8_8bit(pu1_src, src_strd, pu1_pred, pred_strd, pi2_dst, dst_strd);
+ // y1
+ ihevce_hadamard_8x8_8bit(pu1_src + 8, src_strd, pu1_pred + 8, pred_strd, pi2_dst + 8, dst_strd);
+ // y2
+ ihevce_hadamard_8x8_8bit(
+ pu1_src + 8 * src_strd,
+ src_strd,
+ pu1_pred + 8 * pred_strd,
+ pred_strd,
+ pi2_dst + (8 * dst_strd),
+ dst_strd);
+ // y3
+ ihevce_hadamard_8x8_8bit(
+ pu1_src + 8 + 8 * src_strd,
+ src_strd,
+ pu1_pred + 8 + 8 * pred_strd,
+ pred_strd,
+ pi2_dst + (8 * dst_strd) + 8,
+ dst_strd);
+
+ /* Child HAD results combined as follows to get Parent result */
+ /* _ _ */
+ /* | (y0 + y1) + (y2 + y3) (y0 - y1) + (y2 - y3) | */
+ /* | (y0 + y1) - (y2 + y3) (y0 - y1) - (y2 - y3) | */
+ /* \- -/ */
+ for(i = 0; i < 64; i++)
+ {
+ WORD32 idx = (i >> 3) * dst_strd + (i % 8);
+ WORD16 a0 = pi2_dst[idx];
+ WORD16 a1 = pi2_dst[8 + idx];
+ WORD16 a2 = pi2_dst[(8 * dst_strd) + idx];
+ WORD16 a3 = pi2_dst[(8 * dst_strd) + 8 + idx];
+
+ WORD16 b0 = (a0 + a1) >> 1;
+ WORD16 b1 = (a0 - a1) >> 1;
+ WORD16 b2 = (a2 + a3) >> 1;
+ WORD16 b3 = (a2 - a3) >> 1;
+
+ pi2_dst[idx] = b0 + b2;
+ pi2_dst[8 + idx] = b1 + b3;
+ pi2_dst[(8 * dst_strd) + idx] = b0 - b2;
+ pi2_dst[(8 * dst_strd) + 8 + idx] = b1 - b3;
+ }
+}
+
+static void ihevce_hadamard_32x32_8bit(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd)
+{
+ WORD32 i;
+
+ // y0
+ ihevce_hadamard_16x16_8bit(pu1_src, src_strd, pu1_pred, pred_strd, pi2_dst, dst_strd);
+ // y1
+ ihevce_hadamard_16x16_8bit(
+ pu1_src + 16, src_strd, pu1_pred + 16, pred_strd, pi2_dst + 16, dst_strd);
+ // y2
+ ihevce_hadamard_16x16_8bit(
+ pu1_src + 16 * src_strd,
+ src_strd,
+ pu1_pred + 16 * pred_strd,
+ pred_strd,
+ pi2_dst + (16 * dst_strd),
+ dst_strd);
+ // y3
+ ihevce_hadamard_16x16_8bit(
+ pu1_src + 16 + 16 * src_strd,
+ src_strd,
+ pu1_pred + 16 + 16 * pred_strd,
+ pred_strd,
+ pi2_dst + (16 * dst_strd) + 16,
+ dst_strd);
+
+ /* Child HAD results combined as follows to get Parent result */
+ /* _ _ */
+ /* | (y0 + y1) + (y2 + y3) (y0 - y1) + (y2 - y3) | */
+ /* | (y0 + y1) - (y2 + y3) (y0 - y1) - (y2 - y3) | */
+ /* \- -/ */
+ for(i = 0; i < 256; i++)
+ {
+ WORD32 idx = (i >> 4) * dst_strd + (i % 16);
+ WORD16 a0 = pi2_dst[idx] >> 2;
+ WORD16 a1 = pi2_dst[16 + idx] >> 2;
+ WORD16 a2 = pi2_dst[(16 * dst_strd) + idx] >> 2;
+ WORD16 a3 = pi2_dst[(16 * dst_strd) + 16 + idx] >> 2;
+
+ WORD16 b0 = (a0 + a1);
+ WORD16 b1 = (a0 - a1);
+ WORD16 b2 = (a2 + a3);
+ WORD16 b3 = (a2 - a3);
+
+ pi2_dst[idx] = b0 + b2;
+ pi2_dst[16 + idx] = b1 + b3;
+ pi2_dst[(16 * dst_strd) + idx] = b0 - b2;
+ pi2_dst[(16 * dst_strd) + 16 + idx] = b1 - b3;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Compute Hadamard sad for 4x4 block with 8-bit input
+*
+* @par Description:
+*
+* @param[in] pu1_origin
+* UWORD8 pointer to the current block
+*
+* @param[in] src_strd
+* WORD32 Source stride
+*
+* @param[in] pu1_pred_buf
+* UWORD8 pointer to the prediction block
+*
+* @param[in] pred_strd
+* WORD32 Pred stride
+*
+* @param[in] pi2_dst
+* WORD16 pointer to the transform block
+*
+* @param[in] dst_strd
+* WORD32 Destination stride
+*
+* @param[in] size
+* WORD32 transform Block size
+*
+* @returns hadamard SAD
+*
+* @remarks
+* Not updating the transform destination now. Only returning the SATD
+*
+*******************************************************************************
+*/
+UWORD32 ihevce_HAD_4x4_8bit(
+ UWORD8 *pu1_origin,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred_buf,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd)
+{
+ WORD32 k;
+ WORD16 v[16];
+ UWORD32 u4_sad = 0;
+
+ (void)pi2_dst;
+ (void)dst_strd;
+ ihevce_hadamard_4x4_8bit(pu1_origin, src_strd, pu1_pred_buf, pred_strd, v, 4);
+
+ for(k = 0; k < 16; ++k)
+ u4_sad += abs(v[k]);
+ u4_sad = ((u4_sad + 2) >> 2);
+
+ return u4_sad;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Computes Hadamard Sad for 8x8 block with 8-bit input
+*
+* @par Description:
+*
+* @param[in] pu1_origin
+* UWORD8 pointer to the current block
+*
+* @param[in] src_strd
+* WORD32 Source stride
+*
+* @param[in] pu1_pred_buf
+* UWORD8 pointer to the prediction block
+*
+* @param[in] pred_strd
+* WORD32 Pred stride
+*
+* @param[in] pi2_dst
+* WORD16 pointer to the transform block
+*
+* @param[in] dst_strd
+* WORD32 Destination stride
+*
+* @param[in] size
+* WORD32 transform Block size
+*
+* @returns Hadamard SAD
+*
+* @remarks
+* Not updating the transform destination now. Only returning the SATD
+*
+*******************************************************************************
+*/
+UWORD32 ihevce_HAD_8x8_8bit(
+ UWORD8 *pu1_origin,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred_buf,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd)
+{
+ WORD32 k;
+ UWORD32 u4_sad = 0;
+ WORD16 v[64];
+
+ (void)pi2_dst;
+ (void)dst_strd;
+ ihevce_hadamard_8x8_8bit(pu1_origin, src_strd, pu1_pred_buf, pred_strd, v, 8);
+
+ for(k = 0; k < 64; ++k)
+ u4_sad += abs(v[k]);
+ u4_sad = ((u4_sad + 4) >> 3);
+
+ return u4_sad;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Compute dc suppressed hadamard sad for 8x8 block with 8-bit input
+*
+* @par Description:
+*
+* @param[in] pu1_origin
+* UWORD8 pointer to the current block
+*
+* @param[in] src_strd
+* WORD32 Source stride
+*
+* @param[in] pu1_pred_buf
+* UWORD8 pointer to the prediction block
+*
+* @param[in] pred_strd
+* WORD32 Pred stride
+*
+* @param[in] pi2_dst
+* WORD16 pointer to the transform block
+*
+* @param[in] dst_strd
+* WORD32 Destination stride
+*
+* @param[in] size
+* WORD32 transform Block size
+*
+* @returns Hadamard SAD with DC Suppressed
+*
+* @remarks
+* Not updating the transform destination now. Only returning the SATD
+*
+*******************************************************************************
+*/
+UWORD32 ihevce_compute_ac_had_8x8_8bit(
+ UWORD8 *pu1_origin,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred_buf,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd)
+{
+ WORD32 k;
+ UWORD32 u4_sad = 0;
+ WORD16 v[64];
+
+ (void)pi2_dst;
+ (void)dst_strd;
+ ihevce_hadamard_8x8_8bit(pu1_origin, src_strd, pu1_pred_buf, pred_strd, v, 8);
+
+ v[0] = 0;
+ for(k = 0; k < 64; ++k)
+ u4_sad += abs(v[k]);
+ u4_sad = ((u4_sad + 4) >> 3);
+
+ return u4_sad;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Computes Hadamard Sad for 16x16 block with 8-bit input
+*
+* @par Description:
+*
+* @param[in] pu1_origin
+* UWORD8 pointer to the current block
+*
+* @param[in] src_strd
+* WORD32 Source stride
+*
+* @param[in] pu1_pred_buf
+* UWORD8 pointer to the prediction block
+*
+* @param[in] pred_strd
+* WORD32 Pred stride
+*
+* @param[in] pi2_dst
+* WORD16 pointer to the transform block
+*
+* @param[in] dst_strd
+* WORD32 Destination stride
+*
+* @param[in] size
+* WORD32 transform Block size
+*
+* @returns Hadamard SAD
+*
+* @remarks
+* Not updating the transform destination now. Only returning the SATD
+*
+*******************************************************************************
+*/
+UWORD32 ihevce_HAD_16x16_8bit(
+ UWORD8 *pu1_origin,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred_buf,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd)
+{
+ WORD32 k;
+ UWORD32 u4_sad = 0;
+ WORD16 v[256];
+
+ (void)pi2_dst;
+ (void)dst_strd;
+ ihevce_hadamard_16x16_8bit(pu1_origin, src_strd, pu1_pred_buf, pred_strd, v, 16);
+
+ for(k = 0; k < 256; ++k)
+ u4_sad += abs(v[k]);
+ u4_sad = ((u4_sad + 4) >> 3);
+
+ return u4_sad;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Computes Hadamard Sad for 32x32 block with 8-bit input
+*
+* @par Description:
+*
+* @param[in] pu1_origin
+* UWORD8 pointer to the current block
+*
+* @param[in] src_strd
+* WORD32 Source stride
+*
+* @param[in] pu1_pred_buf
+* UWORD8 pointer to the prediction block
+*
+* @param[in] pred_strd
+* WORD32 Pred stride
+*
+* @param[in] pi2_dst
+* WORD16 pointer to the transform block
+*
+* @param[in] dst_strd
+* WORD32 Destination stride
+*
+* @param[in] size
+* WORD32 transform Block size
+*
+* @returns Hadamard SAD
+*
+* @remarks
+* Not updating the transform destination now. Only returning the SATD
+*
+*******************************************************************************
+*/
+UWORD32 ihevce_HAD_32x32_8bit(
+ UWORD8 *pu1_origin,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred_buf,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd)
+{
+ WORD32 k;
+ UWORD32 u4_sad = 0;
+ WORD16 v[32 * 32];
+
+ (void)pi2_dst;
+ (void)dst_strd;
+ ihevce_hadamard_32x32_8bit(pu1_origin, src_strd, pu1_pred_buf, pred_strd, v, 32);
+
+ for(k = 0; k < 32 * 32; ++k)
+ u4_sad += abs(v[k]);
+ u4_sad = ((u4_sad + 2) >> 2);
+
+ return u4_sad;
+}
+
+//#if COMPUTE_16x16_R == C
+/**
+*******************************************************************************
+*
+* @brief
+* Computes 8x8 transform using children 4x4 hadamard results
+*
+* @par Description:
+*
+* @param[in] pi2_4x4_had
+* WORD16 pointer to 4x4 hadamard buffer(y0, y1, y2, y3 hadmard in Zscan order)
+*
+* @param[in] had4_strd
+* stride of 4x4 hadmard buffer pi2_y0, pi2_y1, pi2_y2, pi2_y3
+*
+* @param[out] pi2_dst
+* destination buffer where 8x8 hadamard result is stored
+*
+* @param[in] dst_stride
+* stride of destination block
+*
+* @param[in] i4_frm_qstep
+* frm_qstep value based on the which the threshold value is calculated
+*
+* @returns
+* 8x8 Hadamard SATD
+* @remarks
+*
+*******************************************************************************
+*/
+static UWORD32 ihevce_compute_8x8HAD_using_4x4(
+ WORD16 *pi2_4x4_had,
+ WORD32 had4_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 i4_frm_qstep,
+ WORD32 *pi4_cbf)
+{
+ /* Qstep value is right shifted by 8 */
+ WORD32 threshold = (i4_frm_qstep >> 8);
+
+ /* Initialize pointers to 4 subblocks of 4x4 HAD buffer */
+ WORD16 *pi2_y0 = pi2_4x4_had;
+ WORD16 *pi2_y1 = pi2_4x4_had + 4;
+ WORD16 *pi2_y2 = pi2_4x4_had + had4_strd * 4;
+ WORD16 *pi2_y3 = pi2_4x4_had + had4_strd * 4 + 4;
+
+ /* Initialize pointers to store 8x8 HAD output */
+ WORD16 *pi2_dst0 = pi2_dst;
+ WORD16 *pi2_dst1 = pi2_dst + 4;
+ WORD16 *pi2_dst2 = pi2_dst + dst_strd * 4;
+ WORD16 *pi2_dst3 = pi2_dst + dst_strd * 4 + 4;
+
+ UWORD32 u4_satd = 0;
+ WORD32 i;
+
+ /* Child HAD results combined as follows to get Parent result */
+ /* _ _ */
+ /* | (y0 + y1) + (y2 + y3) (y0 - y1) + (y2 - y3) | */
+ /* | (y0 + y1) - (y2 + y3) (y0 - y1) - (y2 - y3) | */
+ /* \- -/ */
+ for(i = 0; i < 16; i++)
+ {
+ WORD32 src_idx = (i >> 2) * had4_strd + (i % 4);
+ WORD32 dst_idx = (i >> 2) * dst_strd + (i % 4);
+
+ WORD16 a0 = pi2_y0[src_idx];
+ WORD16 a1 = pi2_y1[src_idx];
+ WORD16 a2 = pi2_y2[src_idx];
+ WORD16 a3 = pi2_y3[src_idx];
+
+ WORD16 b0 = (a0 + a1);
+ WORD16 b1 = (a0 - a1);
+ WORD16 b2 = (a2 + a3);
+ WORD16 b3 = (a2 - a3);
+
+ pi2_dst0[dst_idx] = b0 + b2;
+ pi2_dst1[dst_idx] = b1 + b3;
+ pi2_dst2[dst_idx] = b0 - b2;
+ pi2_dst3[dst_idx] = b1 - b3;
+
+ if(ABS(pi2_dst0[dst_idx]) > threshold)
+ *pi4_cbf = 1;
+ if(ABS(pi2_dst1[dst_idx]) > threshold)
+ *pi4_cbf = 1;
+ if(ABS(pi2_dst2[dst_idx]) > threshold)
+ *pi4_cbf = 1;
+ if(ABS(pi2_dst3[dst_idx]) > threshold)
+ *pi4_cbf = 1;
+
+ u4_satd += ABS(pi2_dst0[dst_idx]);
+ u4_satd += ABS(pi2_dst1[dst_idx]);
+ u4_satd += ABS(pi2_dst2[dst_idx]);
+ u4_satd += ABS(pi2_dst3[dst_idx]);
+ }
+
+ /* return the 8x8 satd */
+ return (u4_satd);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Computes Residue and Hadamard Transform for four 4x4 blocks (Z scan) of
+* a 8x8 block (Residue is computed for 8-bit src and prediction buffers)
+* Modified to incorporate the dead-zone implementation - Lokesh
+*
+* @par Description:
+*
+* @param[in] pu1_origin
+* UWORD8 pointer to the current block
+*
+* @param[in] src_strd
+* WORD32 Source stride
+*
+* @param[in] pu1_pred
+* UWORD8 pointer to the prediction block
+*
+* @param[in] pred_strd
+* WORD32 Pred stride
+*
+* @param[out] pi2_dst
+* WORD16 pointer to the transform block
+*
+* @param[in] dst_strd
+* WORD32 Destination stride
+*
+* @param[out] pi4_hsad
+* array for storing hadmard sad of each 4x4 block
+*
+* @param[in] hsad_stride
+* stride of hadmard sad destination buffer (for Zscan order of storing sads)
+*
+* @param[in] i4_frm_qstep
+* frm_qstep value based on the which the threshold value is calculated
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+static WORD32 ihevce_had4_4x4(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst4x4,
+ WORD32 dst_strd,
+ WORD32 *pi4_hsad,
+ WORD32 hsad_stride,
+ WORD32 i4_frm_qstep)
+{
+ WORD32 i, k;
+ WORD32 i4_child_total_sad = 0;
+
+ (void)i4_frm_qstep;
+ /* -------- Compute four 4x4 HAD Transforms ---------*/
+ for(i = 0; i < 4; i++)
+ {
+ UWORD8 *pu1_pi0, *pu1_pi1;
+ WORD16 *pi2_dst;
+ WORD32 blkx, blky;
+ UWORD32 u4_hsad = 0;
+ // TODO: choose deadzone as f(qstep)
+ WORD32 threshold = 0;
+
+ /*****************************************************/
+ /* Assuming the looping structure of the four */
+ /* blocks is in Z scan order of 4x4s in a 8x8 */
+ /* block instead of raster scan */
+ /*****************************************************/
+ blkx = (i & 0x1);
+ blky = (i >> 1);
+
+ pu1_pi0 = pu1_src + (blkx * 4) + (blky * 4 * src_strd);
+ pu1_pi1 = pu1_pred + (blkx * 4) + (blky * 4 * pred_strd);
+ pi2_dst = pi2_dst4x4 + (blkx * 4) + (blky * 4 * dst_strd);
+
+ ihevce_hadamard_4x4_8bit(pu1_pi0, src_strd, pu1_pi1, pred_strd, pi2_dst, dst_strd);
+
+ for(k = 0; k < 4; k++)
+ {
+ if(ABS(pi2_dst[0 * dst_strd + k]) < threshold)
+ pi2_dst[0 * dst_strd + k] = 0;
+
+ if(ABS(pi2_dst[1 * dst_strd + k]) < threshold)
+ pi2_dst[1 * dst_strd + k] = 0;
+
+ if(ABS(pi2_dst[2 * dst_strd + k]) < threshold)
+ pi2_dst[2 * dst_strd + k] = 0;
+
+ if(ABS(pi2_dst[3 * dst_strd + k]) < threshold)
+ pi2_dst[3 * dst_strd + k] = 0;
+
+ /* Accumulate the SATD */
+ u4_hsad += ABS(pi2_dst[0 * dst_strd + k]);
+ u4_hsad += ABS(pi2_dst[1 * dst_strd + k]);
+ u4_hsad += ABS(pi2_dst[2 * dst_strd + k]);
+ u4_hsad += ABS(pi2_dst[3 * dst_strd + k]);
+ }
+
+ /*===== Normalize the HSAD =====*/
+ pi4_hsad[blkx + (blky * hsad_stride)] = ((u4_hsad + 2) >> 2);
+ i4_child_total_sad += ((u4_hsad + 2) >> 2);
+ }
+ return i4_child_total_sad;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* HSAD is returned for the 4, 4x4 in 8x8
+*
+* @par Description:
+*
+* @param[in] pu1_origin
+* UWORD8 pointer to the current block
+*
+* @param[in] src_strd
+* WORD32 Source stride
+*
+* @param[in] pu1_pred
+* UWORD8 pointer to the prediction block
+*
+* @param[in] pred_strd
+* WORD32 Pred stride
+*
+* @param[out] pi2_dst
+* WORD16 pointer to the transform output block
+*
+* @param[out] dst_strd
+* WORD32 Destination stride
+*
+* @param[out] ppi4_hsad
+* pointer to base pointers for storing hadmard sads of various
+* block sizes (4x4 to 32x32)
+*
+* @param[in] pos_x_y_4x4
+* Denotes packed x,y postion of current 4x4 block w.r.t to start of ctb/CU/MB
+* Lower 16bits denote xpos and upper 16ypos of the 4x4block
+*
+* @param[in] num_4x4_in_row
+* Denotes the number of current 4x4 blocks in a ctb/CU/MB
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ihevce_had_8x8_using_4_4x4(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 **ppi4_hsad,
+ WORD32 pos_x_y_4x4,
+ WORD32 num_4x4_in_row)
+{
+ WORD16 ai2_4x4_had[64];
+ WORD32 pos_x = pos_x_y_4x4 & 0xFFFF;
+ WORD32 pos_y = (pos_x_y_4x4 >> 16) & 0xFFFF;
+ WORD32 *pi4_4x4_hsad;
+ WORD32 *pi4_8x8_hsad;
+
+ (void)pi2_dst;
+ (void)dst_strd;
+ ASSERT(pos_x >= 0);
+ ASSERT(pos_y >= 0);
+
+ /* Initialize pointers to store 4x4 and 8x8 HAD SATDs */
+ pi4_4x4_hsad = ppi4_hsad[HAD_4x4] + pos_x + pos_y * num_4x4_in_row;
+ pi4_8x8_hsad = ppi4_hsad[HAD_8x8] + (pos_x >> 1) + (pos_y >> 1) * (num_4x4_in_row >> 1);
+
+ /* -------- Compute four 4x4 HAD Transforms of 8x8 in one call--------- */
+ pi4_8x8_hsad[0] = ihevce_had4_4x4(
+ pu1_src, src_strd, pu1_pred, pred_strd, ai2_4x4_had, 8, pi4_4x4_hsad, num_4x4_in_row, 0);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Reursive Hadamard Transform for 8x8 block. HSAD is returned for the 8x8
+* block and its four subblocks(4x4).
+*
+* @par Description:
+*
+* @param[in] pu1_origin
+* UWORD8 pointer to the current block
+*
+* @param[in] src_strd
+* WORD32 Source stride
+*
+* @param[in] pu1_pred
+* UWORD8 pointer to the prediction block
+*
+* @param[in] pred_strd
+* WORD32 Pred stride
+*
+* @param[out] pi2_dst
+* WORD16 pointer to the transform output block
+*
+* @param[out] dst_strd
+* WORD32 Destination stride
+*
+* @param[out] ppi4_hsad
+* pointer to base pointers for storing hadmard sads of various
+* block sizes (4x4 to 32x32)
+*
+* @param[in] pos_x_y_4x4
+* Denotes packed x,y postion of current 4x4 block w.r.t to start of ctb/CU/MB
+* Lower 16bits denote xpos and upper 16ypos of the 4x4block
+*
+* @param[in] num_4x4_in_row
+* Denotes the number of current 4x4 blocks in a ctb/CU/MB
+*
+* @param[in] i4_frm_qstep
+* frm_qstep value based on the which the threshold value is calculated
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+WORD32 ihevce_had_8x8_using_4_4x4_r(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 **ppi4_hsad,
+ WORD32 **ppi4_tu_split,
+ WORD32 **ppi4_tu_early_cbf,
+ WORD32 pos_x_y_4x4,
+ WORD32 num_4x4_in_row,
+ WORD32 lambda,
+ WORD32 lambda_q_shift,
+ WORD32 i4_frm_qstep,
+ WORD32 i4_cur_depth,
+ WORD32 i4_max_depth,
+ WORD32 i4_max_tr_size,
+ WORD32 *pi4_tu_split_cost,
+ void *pv_func_sel)
+{
+ WORD16 ai2_4x4_had[64];
+ WORD32 pos_x = pos_x_y_4x4 & 0xFFFF;
+ WORD32 pos_y = (pos_x_y_4x4 >> 16) & 0xFFFF;
+ WORD32 *pi4_4x4_hsad;
+ WORD32 *pi4_8x8_hsad;
+ WORD32 *pi4_8x8_tu_split;
+
+ WORD32 *pi4_8x8_tu_early_cbf;
+
+ UWORD32 u4_satd;
+ WORD32 cost_child = 0, cost_parent = 0;
+ WORD32 early_cbf = 0;
+
+ const UWORD8 u1_cur_tr_size = 8;
+ /* Stores the best cost for the Current 8x8: Lokesh */
+ WORD32 best_cost = 0;
+
+ (void)pv_func_sel;
+ ASSERT(pos_x >= 0);
+ ASSERT(pos_y >= 0);
+
+ /* Initialize pointers to store 4x4 and 8x8 HAD SATDs */
+ pi4_4x4_hsad = ppi4_hsad[HAD_4x4] + pos_x + pos_y * num_4x4_in_row;
+ pi4_8x8_hsad = ppi4_hsad[HAD_8x8] + (pos_x >> 1) + (pos_y >> 1) * (num_4x4_in_row >> 1);
+ pi4_8x8_tu_split = ppi4_tu_split[HAD_8x8] + (pos_x >> 1) + (pos_y >> 1) * (num_4x4_in_row >> 1);
+ pi4_8x8_tu_early_cbf =
+ ppi4_tu_early_cbf[HAD_8x8] + (pos_x >> 1) + (pos_y >> 1) * (num_4x4_in_row >> 1);
+
+ /* -------- Compute four 4x4 HAD Transforms of 8x8 in one call--------- */
+ cost_child = ihevce_had4_4x4(
+ pu1_src, src_strd, pu1_pred, pred_strd, ai2_4x4_had, 8, pi4_4x4_hsad, num_4x4_in_row, 0);
+
+ /* -------- Compute 8x8 HAD Transform using 4x4 results ------------- */
+ u4_satd = ihevce_compute_8x8HAD_using_4x4(
+ ai2_4x4_had, 8, pi2_dst, dst_strd, i4_frm_qstep, &early_cbf);
+
+ /* store the normalized 8x8 satd */
+ cost_parent = ((u4_satd + 4) >> 3);
+
+ /* 4 CBF Flags, extra 1 becoz of the 0.5 bits per bin is assumed */
+ cost_child += ((4) * lambda) >> (lambda_q_shift + 1);
+
+ if(i4_cur_depth < i4_max_depth)
+ {
+ if((cost_child < cost_parent) || (i4_max_tr_size < u1_cur_tr_size))
+ {
+ //cost_child -= ((4) * lambda) >> (lambda_q_shift + 1);
+ *pi4_tu_split_cost += (4 * lambda) >> (lambda_q_shift + 1);
+ best_cost = cost_child;
+ best_cost <<= 1;
+ best_cost++;
+ pi4_8x8_tu_split[0] = 1;
+ pi4_8x8_hsad[0] = cost_child;
+ }
+ else
+ {
+ //cost_parent -= ((1) * lambda) >> (lambda_q_shift + 1);
+ best_cost = cost_parent;
+ best_cost <<= 1;
+ pi4_8x8_tu_split[0] = 0;
+ pi4_8x8_hsad[0] = cost_parent;
+ }
+ }
+ else
+ {
+ //cost_parent -= ((1) * lambda) >> (lambda_q_shift + 1);
+ best_cost = cost_parent;
+ best_cost <<= 1;
+ pi4_8x8_tu_split[0] = 0;
+ pi4_8x8_hsad[0] = cost_parent;
+ }
+
+ pi4_8x8_tu_early_cbf[0] = early_cbf;
+
+ /* best cost has tu_split_flag at LSB(Least significant bit) */
+ return ((best_cost << 1) + early_cbf);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Computes 16x16 transform using children 8x8 hadamard results
+* Modified to incorporate the dead-zone implementation - Lokesh
+*
+* @par Description:
+*
+* @param[in] pi2_8x8_had
+* WORD16 pointer to 8x8 hadamard buffer(y0, y1, y2, y3 hadmard in Zscan order)
+*
+* @param[in] had8_strd
+* stride of 8x8 hadmard buffer pi2_y0, pi2_y1, pi2_y2, pi2_y3
+*
+* @param[out] pi2_dst
+* destination buffer where 8x8 hadamard result is stored
+*
+* @param[in] dst_stride
+* stride of destination block
+*
+* @param[in] i4_frm_qstep
+* frm_qstep value based on the which the threshold value is calculated
+*
+* @returns
+* 16x16 Hadamard SATD
+* @remarks
+*
+*******************************************************************************
+*/
+static UWORD32 ihevce_compute_16x16HAD_using_8x8(
+ WORD16 *pi2_8x8_had,
+ WORD32 had8_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 i4_frm_qstep,
+ WORD32 *pi4_cbf)
+{
+ /* Qstep value is right shifted by 8 */
+ WORD32 threshold = (i4_frm_qstep >> 8);
+
+ /* Initialize pointers to 4 subblocks of 8x8 HAD buffer */
+ WORD16 *pi2_y0 = pi2_8x8_had;
+ WORD16 *pi2_y1 = pi2_8x8_had + 8;
+ WORD16 *pi2_y2 = pi2_8x8_had + had8_strd * 8;
+ WORD16 *pi2_y3 = pi2_8x8_had + had8_strd * 8 + 8;
+
+ /* Initialize pointers to store 8x8 HAD output */
+ WORD16 *pi2_dst0 = pi2_dst;
+ WORD16 *pi2_dst1 = pi2_dst + 8;
+ WORD16 *pi2_dst2 = pi2_dst + dst_strd * 8;
+ WORD16 *pi2_dst3 = pi2_dst + dst_strd * 8 + 8;
+
+ UWORD32 u4_satd = 0;
+ WORD32 i;
+
+ /* Child HAD results combined as follows to get Parent result */
+ /* _ _ */
+ /* | (y0 + y1) + (y2 + y3) (y0 - y1) + (y2 - y3) | */
+ /* | (y0 + y1) - (y2 + y3) (y0 - y1) - (y2 - y3) | */
+ /* \- -/ */
+ for(i = 0; i < 64; i++)
+ {
+ WORD32 src_idx = (i >> 3) * had8_strd + (i % 8);
+ WORD32 dst_idx = (i >> 3) * dst_strd + (i % 8);
+
+ WORD16 a0 = pi2_y0[src_idx];
+ WORD16 a1 = pi2_y1[src_idx];
+ WORD16 a2 = pi2_y2[src_idx];
+ WORD16 a3 = pi2_y3[src_idx];
+
+ WORD16 b0 = (a0 + a1) >> 1;
+ WORD16 b1 = (a0 - a1) >> 1;
+ WORD16 b2 = (a2 + a3) >> 1;
+ WORD16 b3 = (a2 - a3) >> 1;
+
+ pi2_dst0[dst_idx] = b0 + b2;
+ pi2_dst1[dst_idx] = b1 + b3;
+ pi2_dst2[dst_idx] = b0 - b2;
+ pi2_dst3[dst_idx] = b1 - b3;
+
+ /* Make the value of dst to zerp, if it falls below the dead-zone */
+ if(ABS(pi2_dst0[dst_idx]) > threshold)
+ *pi4_cbf = 1;
+ if(ABS(pi2_dst1[dst_idx]) > threshold)
+ *pi4_cbf = 1;
+ if(ABS(pi2_dst2[dst_idx]) > threshold)
+ *pi4_cbf = 1;
+ if(ABS(pi2_dst3[dst_idx]) > threshold)
+ *pi4_cbf = 1;
+
+ u4_satd += ABS(pi2_dst0[dst_idx]);
+ u4_satd += ABS(pi2_dst1[dst_idx]);
+ u4_satd += ABS(pi2_dst2[dst_idx]);
+ u4_satd += ABS(pi2_dst3[dst_idx]);
+ }
+
+ /* return 16x16 satd */
+ return (u4_satd);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Hadamard Transform for 16x16 block with 8x8 and 4x4 SATD updates.
+* Uses recursive 8x8 had output to compute satd for 16x16 and its children
+*
+* @par Description:
+*
+* @param[in] pu1_origin
+* UWORD8 pointer to the current block
+*
+* @param[in] src_strd
+* WORD32 Source stride
+*
+* @param[in] pu1_pred
+* UWORD8 pointer to the prediction block
+*
+* @param[in] pred_strd
+* WORD32 Pred stride
+*
+* @param[out] pi2_dst
+* WORD16 pointer to the transform output block
+*
+* @param[out] dst_strd
+* WORD32 Destination stride
+*
+* @param[out] ppi4_hsad
+* pointer to base pointers for storing hadmard sads of various
+* block sizes (4x4 to 32x32)
+*
+* @param[in] pos_x_y_4x4
+* Denotes packed x,y postion of current 4x4 block w.r.t to start of ctb/CU/MB
+* Lower 16bits denote xpos and upper 16ypos of the 4x4block
+*
+* @param[in] num_4x4_in_row
+* Denotes the number of current 4x4 blocks in a ctb/CU/MB
+*
+* @param[in] lambda
+* lambda values is the cost factor calculated based on QP
+*
+* @param[in] lambda_q_shift
+* lambda_q_shift used to reverse the lambda value back from q8 format
+*
+* @param[in] depth
+* depth gives the current TU depth with respect to the CU
+*
+* @param[in] i4_frm_qstep
+* frm_qstep value based on the which the threshold value is calculated
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+
+WORD32 ihevce_had_16x16_r(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 **ppi4_hsad,
+ WORD32 **ppi4_tu_split,
+ WORD32 **ppi4_tu_early_cbf,
+ WORD32 pos_x_y_4x4,
+ WORD32 num_4x4_in_row,
+ WORD32 lambda,
+ WORD32 lambda_q_shift,
+ WORD32 i4_frm_qstep,
+ WORD32 i4_cur_depth,
+ WORD32 i4_max_depth,
+ WORD32 i4_max_tr_size,
+ WORD32 *pi4_tu_split_cost,
+ void *pv_func_sel)
+{
+ WORD16 ai2_8x8_had[256];
+ WORD32 *pi4_16x16_hsad;
+ WORD32 *pi4_16x16_tu_split;
+
+ WORD32 *pi4_16x16_tu_early_cbf;
+
+ UWORD32 u4_satd = 0;
+ WORD32 tu_split_flag = 0;
+ WORD32 i4_early_cbf_flag = 0, early_cbf = 0;
+ const UWORD8 u1_cur_tr_size = 16;
+
+ /* cost_parent : Stores the cost of the parent HAD transform (16x16) */
+ /* cost_child : Stores the cost of the child HAD transform (16x16) */
+ WORD32 cost_parent = 0, cost_child = 0;
+
+ /*best_cost returns the best cost at the end of the function */
+ /*tu_split denoes whether the TU (16x16)is split or not */
+ WORD32 best_cost = 0, best_cost_tu_split;
+ WORD32 i;
+
+ WORD16 *pi2_y0;
+ UWORD8 *pu1_src0;
+ UWORD8 *pu1_pred0;
+ WORD32 pos_x_y_4x4_0;
+
+ WORD32 pos_x = pos_x_y_4x4 & 0xFFFF;
+ WORD32 pos_y = (pos_x_y_4x4 >> 16) & 0xFFFF;
+
+ ASSERT(pos_x >= 0);
+ ASSERT(pos_y >= 0);
+
+ /* Initialize pointers to store 16x16 SATDs */
+ pi4_16x16_hsad = ppi4_hsad[HAD_16x16] + (pos_x >> 2) + (pos_y >> 2) * (num_4x4_in_row >> 2);
+
+ pi4_16x16_tu_split =
+ ppi4_tu_split[HAD_16x16] + (pos_x >> 2) + (pos_y >> 2) * (num_4x4_in_row >> 2);
+
+ pi4_16x16_tu_early_cbf =
+ ppi4_tu_early_cbf[HAD_16x16] + (pos_x >> 2) + (pos_y >> 2) * (num_4x4_in_row >> 2);
+
+ /* -------- Compute four 8x8 HAD Transforms of 16x16 call--------- */
+ for(i = 0; i < 4; i++)
+ {
+ pu1_src0 = pu1_src + (i & 0x01) * 8 + (i >> 1) * src_strd * 8;
+ pu1_pred0 = pu1_pred + (i & 0x01) * 8 + (i >> 1) * pred_strd * 8;
+ pi2_y0 = ai2_8x8_had + (i & 0x01) * 8 + (i >> 1) * 16 * 8;
+ pos_x_y_4x4_0 = pos_x_y_4x4 + (i & 0x01) * 2 + (i >> 1) * (2 << 16);
+
+ best_cost_tu_split = ihevce_had_8x8_using_4_4x4_r(
+ pu1_src0,
+ src_strd,
+ pu1_pred0,
+ pred_strd,
+ pi2_y0,
+ 16,
+ ppi4_hsad,
+ ppi4_tu_split,
+ ppi4_tu_early_cbf,
+ pos_x_y_4x4_0,
+ num_4x4_in_row,
+ lambda,
+ lambda_q_shift,
+ i4_frm_qstep,
+ i4_cur_depth + 1,
+ i4_max_depth,
+ i4_max_tr_size,
+ pi4_tu_split_cost,
+ pv_func_sel);
+
+ /* Cost is shifted by two bits for Tu_split_flag and early cbf flag */
+ best_cost = (best_cost_tu_split >> 2);
+
+ /* Last but one bit stores the information regarding the TU_Split */
+ tu_split_flag += (best_cost_tu_split & 0x3) >> 1;
+
+ /* Last bit stores the information regarding the early_cbf */
+ i4_early_cbf_flag += (best_cost_tu_split & 0x1);
+
+ cost_child += best_cost;
+
+ tu_split_flag <<= 1;
+ i4_early_cbf_flag <<= 1;
+ }
+
+ /* -------- Compute 16x16 HAD Transform using 8x8 results ------------- */
+ pi2_y0 = ai2_8x8_had;
+
+ /* Threshold currently passed as "0" */
+ u4_satd =
+ ihevce_compute_16x16HAD_using_8x8(pi2_y0, 16, pi2_dst, dst_strd, i4_frm_qstep, &early_cbf);
+
+ /* store the normalized satd */
+ cost_parent = ((u4_satd + 4) >> 3);
+
+ /* 4 TU_Split flags , 4 CBF Flags, extra 1 becoz of the 0.5 bits per bin is assumed */
+ cost_child += ((4 + 4) * lambda) >> (lambda_q_shift + 1);
+
+ i4_early_cbf_flag += early_cbf;
+
+ /* Right now the depth is hard-coded to 4: The depth can be modified from the config file
+ which decides the extent to which TU_REC needs to be done */
+ if(i4_cur_depth < i4_max_depth)
+ {
+ if((cost_child < cost_parent) || (i4_max_tr_size < u1_cur_tr_size))
+ {
+ //cost_child -= ((4 + 4) * lambda) >> (lambda_q_shift + 1);
+ *pi4_tu_split_cost += ((4 + 4) * lambda) >> (lambda_q_shift + 1);
+ tu_split_flag += 1;
+ best_cost = cost_child;
+ }
+ else
+ {
+ //cost_parent -= ((1 + 1) * lambda) >> (lambda_q_shift + 1);
+ tu_split_flag += 0;
+ best_cost = cost_parent;
+ }
+ }
+ else
+ {
+ //cost_parent -= ((1 + 1) * lambda) >> (lambda_q_shift + 1);
+ tu_split_flag += 0;
+ best_cost = cost_parent;
+ }
+
+ pi4_16x16_hsad[0] = best_cost;
+ pi4_16x16_tu_split[0] = tu_split_flag;
+ pi4_16x16_tu_early_cbf[0] = i4_early_cbf_flag;
+
+ /*returning two values(best cost & tu_split_flag) as a single value*/
+ return ((best_cost << 10) + (tu_split_flag << 5) + i4_early_cbf_flag);
+}
+
+//#endif
+/**
+*******************************************************************************
+*
+* @brief
+* Computes 32x32 transform using children 16x16 hadamard results
+*
+* @par Description:
+*
+* @param[in] pi2_16x16_had
+* WORD16 pointer to 16x16 hadamard buffer(y0, y1, y2, y3 hadmard in Zscan order)
+*
+* @param[in] had16_strd
+* stride of 16x16 hadmard buffer pi2_y0, pi2_y1, pi2_y2, pi2_y3
+*
+* @param[out] pi2_dst
+* destination buffer where 16x16 hadamard result is stored
+*
+* @param[in] dst_stride
+* stride of destination block
+*
+* @param[in] i4_frm_qstep
+* frm_qstep value based on the which the threshold value is calculated
+*
+* @returns
+* 32x32 Hadamard SATD
+* @remarks
+*
+*******************************************************************************
+*/
+//#if COMPUTE_32x32_USING_16X16 == C
+UWORD32 ihevce_compute_32x32HAD_using_16x16(
+ WORD16 *pi2_16x16_had,
+ WORD32 had16_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 i4_frm_qstep,
+ WORD32 *pi4_cbf)
+{
+ /* Qstep value is right shifted by 8 */
+ WORD32 threshold = (i4_frm_qstep >> 8);
+
+ /* Initialize pointers to 4 subblocks of 8x8 HAD buffer */
+ WORD16 *pi2_y0 = pi2_16x16_had;
+ WORD16 *pi2_y1 = pi2_16x16_had + 16;
+ WORD16 *pi2_y2 = pi2_16x16_had + had16_strd * 16;
+ WORD16 *pi2_y3 = pi2_16x16_had + had16_strd * 16 + 16;
+
+ /* Initialize pointers to store 8x8 HAD output */
+ WORD16 *pi2_dst0 = pi2_dst;
+ WORD16 *pi2_dst1 = pi2_dst + 16;
+ WORD16 *pi2_dst2 = pi2_dst + dst_strd * 16;
+ WORD16 *pi2_dst3 = pi2_dst + dst_strd * 16 + 16;
+
+ UWORD32 u4_satd = 0;
+ WORD32 i;
+
+ /* Child HAD results combined as follows to get Parent result */
+ /* _ _ */
+ /* | (y0 + y1) + (y2 + y3) (y0 - y1) + (y2 - y3) | */
+ /* | (y0 + y1) - (y2 + y3) (y0 - y1) - (y2 - y3) | */
+ /* \- -/ */
+ for(i = 0; i < 256; i++)
+ {
+ WORD32 src_idx = (i >> 4) * had16_strd + (i % 16);
+ WORD32 dst_idx = (i >> 4) * dst_strd + (i % 16);
+
+ WORD16 a0 = pi2_y0[src_idx] >> 2;
+ WORD16 a1 = pi2_y1[src_idx] >> 2;
+ WORD16 a2 = pi2_y2[src_idx] >> 2;
+ WORD16 a3 = pi2_y3[src_idx] >> 2;
+
+ WORD16 b0 = (a0 + a1);
+ WORD16 b1 = (a0 - a1);
+ WORD16 b2 = (a2 + a3);
+ WORD16 b3 = (a2 - a3);
+
+ pi2_dst0[dst_idx] = b0 + b2;
+ pi2_dst1[dst_idx] = b1 + b3;
+ pi2_dst2[dst_idx] = b0 - b2;
+ pi2_dst3[dst_idx] = b1 - b3;
+
+ /* Make the value of dst to zerp, if it falls below the dead-zone */
+ if(ABS(pi2_dst0[dst_idx]) > threshold)
+ *pi4_cbf = 1;
+ if(ABS(pi2_dst1[dst_idx]) > threshold)
+ *pi4_cbf = 1;
+ if(ABS(pi2_dst2[dst_idx]) > threshold)
+ *pi4_cbf = 1;
+ if(ABS(pi2_dst3[dst_idx]) > threshold)
+ *pi4_cbf = 1;
+
+ u4_satd += ABS(pi2_dst0[dst_idx]);
+ u4_satd += ABS(pi2_dst1[dst_idx]);
+ u4_satd += ABS(pi2_dst2[dst_idx]);
+ u4_satd += ABS(pi2_dst3[dst_idx]);
+ }
+
+ /* return 32x32 satd */
+ return (u4_satd);
+}
+//#endif
+
+/**
+*******************************************************************************
+*
+* @brief
+* Hadamard Transform for 32x32 block with 16x6, 8x8 and 4x4 SATD updates.
+* Uses recursive 16x16 had output to compute satd for 32x32 and its children
+*
+* @par Description:
+*
+* @param[in] pu1_origin
+* UWORD8 pointer to the current block
+*
+* @param[in] src_strd
+* WORD32 Source stride
+*
+* @param[in] pu1_pred
+* UWORD8 pointer to the prediction block
+*
+* @param[in] pred_strd
+* WORD32 Pred stride
+*
+* @param[out] pi2_dst
+* WORD16 pointer to the transform output block
+*
+* @param[out] dst_strd
+* WORD32 Destination stride
+*
+* @param[out] ppi4_hsad
+* pointer to base pointers for storing hadmard sads of various
+* block sizes (4x4 to 32x32)
+*
+* @param[in] pos_x_y_4x4
+* Denotes packed x,y postion of current 4x4 block w.r.t to start of ctb/CU/MB
+* Lower 16bits denote xpos and upper 16ypos of the 4x4block
+*
+* @param[in] num_4x4_in_row
+* Denotes the number of current 4x4 blocks in a ctb/CU/MB
+*
+* @param[in] lambda
+* lambda values is the cost factor calculated based on QP
+*
+* @param[in] lambda_q_shift
+* lambda_q_shift used to reverse the lambda value back from q8 format
+*
+* @param[in] depth
+* depth gives the current TU depth with respect to the CU
+*
+* @param[in] i4_frm_qstep
+* frm_qstep value based on the which the threshold value is calculated
+*
+*
+* @returns
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ihevce_had_32x32_r(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 **ppi4_hsad,
+ WORD32 **ppi4_tu_split,
+ WORD32 **ppi4_tu_early_cbf,
+ WORD32 pos_x_y_4x4,
+ WORD32 num_4x4_in_row,
+ WORD32 lambda,
+ WORD32 lambda_q_shift,
+ WORD32 i4_frm_qstep,
+ WORD32 i4_cur_depth,
+ WORD32 i4_max_depth,
+ WORD32 i4_max_tr_size,
+ WORD32 *pi4_tu_split_cost,
+ me_func_selector_t *ps_func_selector)
+
+{
+ WORD16 ai2_16x16_had[1024];
+ WORD32 *pi4_32x32_hsad;
+ WORD32 *pi4_32x32_tu_split;
+ WORD32 *pi4_32x32_tu_early_cbf;
+
+ WORD32 pos_x = pos_x_y_4x4 & 0xFFFF;
+ WORD32 pos_y = (pos_x_y_4x4 >> 16) & 0xFFFF;
+ WORD32 tu_split_flag = 0;
+ const UWORD8 u1_cur_tr_size = 32;
+ WORD32 i4_early_cbf_flag = 0, early_cbf = 0;
+
+ /* cost_parent : Stores the cost of the parent HAD transform (16x16) */
+ /* cost_child : Stores the cost of the child HAD transform (16x16) */
+ WORD32 cost_child = 0, cost_parent = 0;
+
+ /*retuned as the best cost for the entire TU (32x32) */
+ WORD32 best_cost = 0;
+ /*captures the best cost and tu_split at child level */
+ WORD32 best_cost_tu_split;
+
+ /* Initialize pointers to 4 8x8 blocks in 16x16 */
+ WORD16 *pi2_y0 = ai2_16x16_had;
+ WORD16 *pi2_y1 = ai2_16x16_had + 16;
+ WORD16 *pi2_y2 = ai2_16x16_had + 32 * 16;
+ WORD16 *pi2_y3 = ai2_16x16_had + 32 * 16 + 16;
+
+ UWORD8 *pu1_src0 = pu1_src;
+ UWORD8 *pu1_src1 = pu1_src + 16;
+ UWORD8 *pu1_src2 = pu1_src + src_strd * 16;
+ UWORD8 *pu1_src3 = pu1_src + src_strd * 16 + 16;
+
+ UWORD8 *pu1_pred0 = pu1_pred;
+ UWORD8 *pu1_pred1 = pu1_pred + 16;
+ UWORD8 *pu1_pred2 = pu1_pred + pred_strd * 16;
+ UWORD8 *pu1_pred3 = pu1_pred + pred_strd * 16 + 16;
+
+ ASSERT(pos_x >= 0);
+ ASSERT(pos_y >= 0);
+
+ /* Initialize pointers to store 32x32 SATDs */
+ pi4_32x32_hsad = ppi4_hsad[HAD_32x32] + (pos_x >> 3) + (pos_y >> 3) * (num_4x4_in_row >> 3);
+
+ pi4_32x32_tu_split =
+ ppi4_tu_split[HAD_32x32] + (pos_x >> 3) + (pos_y >> 3) * (num_4x4_in_row >> 3);
+
+ pi4_32x32_tu_early_cbf =
+ ppi4_tu_early_cbf[HAD_32x32] + (pos_x >> 3) + (pos_y >> 3) * (num_4x4_in_row >> 3);
+
+ /* -------- Compute four 8x8 HAD Transforms of 16x16 call--------- */
+ best_cost_tu_split = ps_func_selector->pf_had_16x16_r(
+ pu1_src0,
+ src_strd,
+ pu1_pred0,
+ pred_strd,
+ pi2_y0,
+ 32,
+ ppi4_hsad,
+ ppi4_tu_split,
+ ppi4_tu_early_cbf,
+ pos_x_y_4x4,
+ num_4x4_in_row,
+ lambda,
+ lambda_q_shift,
+ i4_frm_qstep,
+ i4_cur_depth + 1,
+ i4_max_depth,
+ i4_max_tr_size,
+ pi4_tu_split_cost,
+ NULL);
+
+ /* cost is shifted by 10bits */
+ best_cost = best_cost_tu_split >> 10;
+
+ /* Tu split is present in the 6-10 bits */
+ tu_split_flag += (best_cost_tu_split & 0x3E0) >> 5;
+
+ /*Early CBF info is present in the last 5 bits */
+ i4_early_cbf_flag += best_cost_tu_split & 0x1F;
+
+ tu_split_flag <<= 5;
+ i4_early_cbf_flag <<= 5;
+
+ cost_child += best_cost;
+
+ best_cost_tu_split = ps_func_selector->pf_had_16x16_r(
+ pu1_src1,
+ src_strd,
+ pu1_pred1,
+ pred_strd,
+ pi2_y1,
+ 32,
+ ppi4_hsad,
+ ppi4_tu_split,
+ ppi4_tu_early_cbf,
+ pos_x_y_4x4 + 4,
+ num_4x4_in_row,
+ lambda,
+ lambda_q_shift,
+ i4_frm_qstep,
+ i4_cur_depth + 1,
+ i4_max_depth,
+ i4_max_tr_size,
+ pi4_tu_split_cost,
+ NULL);
+
+ /* cost is shifted by 10bits */
+ best_cost = best_cost_tu_split >> 10;
+
+ /* Tu split is present in the 6-10 bits */
+ tu_split_flag += (best_cost_tu_split & 0x3E0) >> 5;
+
+ /*Early CBF info is present in the last 5 bits */
+ i4_early_cbf_flag += best_cost_tu_split & 0x1F;
+
+ tu_split_flag <<= 5;
+ i4_early_cbf_flag <<= 5;
+
+ cost_child += best_cost;
+
+ best_cost_tu_split = ps_func_selector->pf_had_16x16_r(
+ pu1_src2,
+ src_strd,
+ pu1_pred2,
+ pred_strd,
+ pi2_y2,
+ 32,
+ ppi4_hsad,
+ ppi4_tu_split,
+ ppi4_tu_early_cbf,
+ pos_x_y_4x4 + (4 << 16),
+ num_4x4_in_row,
+ lambda,
+ lambda_q_shift,
+ i4_frm_qstep,
+ i4_cur_depth + 1,
+ i4_max_depth,
+ i4_max_tr_size,
+ pi4_tu_split_cost,
+ NULL);
+
+ /* cost is shifted by 10bits */
+ best_cost = best_cost_tu_split >> 10;
+
+ /* Tu split is present in the 6-10 bits */
+ tu_split_flag += (best_cost_tu_split & 0x3E0) >> 5;
+
+ /*Early CBF info is present in the last 5 bits */
+ i4_early_cbf_flag += best_cost_tu_split & 0x1F;
+
+ tu_split_flag <<= 5;
+ i4_early_cbf_flag <<= 5;
+
+ cost_child += best_cost;
+
+ best_cost_tu_split = ps_func_selector->pf_had_16x16_r(
+ pu1_src3,
+ src_strd,
+ pu1_pred3,
+ pred_strd,
+ pi2_y3,
+ 32,
+ ppi4_hsad,
+ ppi4_tu_split,
+ ppi4_tu_early_cbf,
+ pos_x_y_4x4 + (4 << 16) + 4,
+ num_4x4_in_row,
+ lambda,
+ lambda_q_shift,
+ i4_frm_qstep,
+ i4_cur_depth + 1,
+ i4_max_depth,
+ i4_max_tr_size,
+ pi4_tu_split_cost,
+ NULL);
+
+ /* cost is shifted by 10bits */
+ best_cost = best_cost_tu_split >> 10;
+
+ /* Tu split is present in the 6-10 bits */
+ tu_split_flag += (best_cost_tu_split & 0x3E0) >> 5;
+
+ /*Early CBF info is present in the last 5 bits */
+ i4_early_cbf_flag += best_cost_tu_split & 0x1F;
+
+ tu_split_flag <<= 1;
+ i4_early_cbf_flag <<= 1;
+
+ cost_child += best_cost;
+
+ {
+ UWORD32 u4_satd = 0;
+
+ u4_satd = ps_func_selector->pf_compute_32x32HAD_using_16x16(
+ pi2_y0, 32, pi2_dst, dst_strd, i4_frm_qstep, &early_cbf);
+
+ cost_parent = ((u4_satd + 2) >> 2);
+ }
+
+ /* 4 TU_Split flags , 4 CBF Flags*/
+ cost_child += ((4 + 4) * lambda) >> (lambda_q_shift + 1);
+
+ i4_early_cbf_flag += early_cbf;
+
+ /* 1 TU_SPlit flag, 1 CBF flag */
+ //cost_parent += ((1 + 1)* lambda) >> (lambda_q_shift + 1);
+
+ if(i4_cur_depth < i4_max_depth)
+ {
+ if((cost_child < cost_parent) || (u1_cur_tr_size > i4_max_tr_size))
+ {
+ *pi4_tu_split_cost += ((4 + 4) * lambda) >> (lambda_q_shift + 1);
+ best_cost = cost_child;
+ tu_split_flag++;
+ }
+ else
+ {
+ tu_split_flag = 0;
+ best_cost = cost_parent;
+ }
+ }
+ else
+ {
+ tu_split_flag = 0;
+ best_cost = cost_parent;
+ }
+
+ pi4_32x32_tu_split[0] = tu_split_flag;
+
+ pi4_32x32_hsad[0] = best_cost;
+
+ pi4_32x32_tu_early_cbf[0] = i4_early_cbf_flag;
+}
diff --git a/encoder/ihevce_had_satd.h b/encoder/ihevce_had_satd.h
new file mode 100644
index 0000000..88034ae
--- /dev/null
+++ b/encoder/ihevce_had_satd.h
@@ -0,0 +1,304 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_had_satd.h
+*
+* \brief
+* This file contains function prototypes of HAD and SATD functions
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_HAD_SATD_H_
+#define _IHEVCE_HAD_SATD_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/* @breif enum for hadamard transform block sizes supported : 4x4 to 32x32 */
+typedef enum
+{
+ HAD_4x4 = 0,
+ HAD_8x8 = 1,
+ HAD_16x16 = 2,
+ HAD_32x32 = 3,
+ HAD_INVALID = 4
+} HAD_SIZE_T;
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+UWORD32 ihevce_HAD_4x4_8bit(
+ UWORD8 *pu1_origin,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred_buf,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd);
+
+UWORD32 ihevce_HAD_8x8_8bit(
+ UWORD8 *pu1_origin,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred_buf,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd);
+
+UWORD32 ihevce_compute_ac_had_8x8_8bit(
+ UWORD8 *pu1_origin,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred_buf,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd);
+
+UWORD32 ihevce_HAD_16x16_8bit(
+ UWORD8 *pu1_origin,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred_buf,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd);
+
+UWORD32 ihevce_HAD_32x32_8bit(
+ UWORD8 *pu1_origin,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred_buf,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd);
+
+typedef WORD32 FT_HAD_16X16_R(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 **ppi4_hsad,
+ WORD32 **ppi4_tu_split,
+ WORD32 **ppi4_tu_early_cbf,
+ WORD32 pos_x_y_4x4,
+ WORD32 num_4x4_in_row,
+ WORD32 lambda,
+ WORD32 lambda_q_shift,
+ WORD32 i4_frm_qstep,
+ WORD32 i4_cur_depth,
+ WORD32 i4_max_depth,
+ WORD32 i4_max_tr_size,
+ WORD32 *pi4_tu_split_cost,
+ void *pv_func_sel);
+
+typedef UWORD32 FT_HAD_32X32_USING_16X16(
+ WORD16 *pi2_16x16_had,
+ WORD32 had16_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 i4_frm_qstep,
+ WORD32 *pi4_cbf);
+
+typedef UWORD32 ihevce_compute_16x16HAD_using_8x8_ft(
+ WORD16 *pi2_8x8_had,
+ WORD32 had8_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 i4_frm_qstep,
+ WORD32 *pi4_cbf);
+
+typedef WORD32 FT_HAD_8X8_USING_4_4X4_R(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 **ppi4_hsad,
+ WORD32 **ppi4_tu_split,
+ WORD32 **ppi4_tu_early_cbf,
+ WORD32 pos_x_y_4x4,
+ WORD32 num_4x4_in_row,
+ WORD32 lambda,
+ WORD32 lambda_q_shift,
+ WORD32 i4_frm_qstep,
+ WORD32 i4_cur_depth,
+ WORD32 i4_max_depth,
+ WORD32 i4_max_tr_size,
+ WORD32 *pi4_tu_split_cost,
+ void *pv_func_sel);
+
+WORD32 ihevce_had_16x16_r_noise_detect(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 pos_x_y_4x4,
+ WORD32 num_4x4_in_row,
+ WORD32 scaling_for_pred);
+
+UWORD32 ihevce_compute_8x8HAD_using_4x4_noise_detect(
+ WORD16 *pi2_4x4_had,
+ WORD32 had4_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 i4_frm_qstep,
+ WORD32 *pi4_cbf);
+void ihevce_had4_4x4_noise_detect(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst4x4,
+ WORD16 *pi2_residue,
+ WORD32 dst_strd,
+ WORD32 scaling_for_pred);
+
+void ihevce_had_8x8_using_4_4x4_noise_detect(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 pos_x_y_4x4,
+ WORD32 num_4x4_in_row,
+ WORD32 scaling_for_pred);
+
+void ihevce_had_8x8_using_4_4x4(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 **ppi4_hsad,
+ WORD32 pos_x_y_4x4,
+ WORD32 num_4x4_in_row);
+
+typedef void ihevce_had_nxn_r_ft(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 **ppi4_hsad,
+ WORD32 **ppi4_tu_split,
+ WORD32 **ppi4_tu_early_cbf,
+ WORD32 pos_x_y_4x4,
+ WORD32 num_4x4_in_row,
+ WORD32 lambda,
+ WORD32 lambda_q_shift,
+ WORD32 i4_frm_qstep,
+ WORD32 i4_cur_depth,
+ WORD32 i4_max_depth,
+ WORD32 i4_max_tr_size,
+ WORD32 *pi4_tu_split_cost,
+ void *pv_func_sel);
+
+UWORD32 ihevce_mat_add_shift_satd_16bit(
+ WORD16 *pi2_buf1,
+ WORD32 buf1_strd,
+ WORD16 *pi2_buf2,
+ WORD32 buf2_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 size,
+ WORD32 shift,
+ WORD32 threshold,
+ WORD32 *pi4_cbf);
+
+UWORD32 ihevce_mat_sub_shift_satd_16bit(
+ WORD16 *pi2_buf1,
+ WORD32 buf1_strd,
+ WORD16 *pi2_buf2,
+ WORD32 buf2_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 size,
+ WORD32 shift,
+ WORD32 threshold,
+ WORD32 *pi4_cbf);
+
+void ihevce_mat_add_16bit(
+ WORD16 *pi2_buf1,
+ WORD32 buf1_strd,
+ WORD16 *pi2_buf2,
+ WORD32 buf2_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 size,
+ WORD32 threshold);
+
+void ihevce_mat_sub_16bit(
+ WORD16 *pi2_buf1,
+ WORD32 buf1_strd,
+ WORD16 *pi2_buf2,
+ WORD32 buf2_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 size,
+ WORD32 threshold);
+
+UWORD32 ihevce_compute_16x16HAD_using_8x8_noise_detect(
+ WORD16 *pi2_8x8_had,
+ WORD32 had8_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 i4_frm_qstep,
+ WORD32 *pi4_cbf);
+
+/******* C declarations ****************/
+FT_HAD_8X8_USING_4_4X4_R ihevce_had_8x8_using_4_4x4_r;
+FT_HAD_16X16_R ihevce_had_16x16_r;
+FT_HAD_32X32_USING_16X16 ihevce_compute_32x32HAD_using_16x16;
+
+/******** A9Q declarations **********/
+FT_HAD_8X8_USING_4_4X4_R ihevce_had_8x8_using_4_4x4_r_neon;
+FT_HAD_16X16_R ihevce_had_16x16_r_neon;
+FT_HAD_32X32_USING_16X16 ihevce_compute_32x32HAD_using_16x16_neon;
+
+#endif /* _IHEVCE_HAD_SATD_H_ */
diff --git a/encoder/ihevce_hle_interface.c b/encoder/ihevce_hle_interface.c
new file mode 100644
index 0000000..3d9926e
--- /dev/null
+++ b/encoder/ihevce_hle_interface.c
@@ -0,0 +1,2266 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+******************************************************************************
+* \file ihevce_hle_interface.c
+*
+* \brief
+* This file contains all the functions related High level enocder
+* Interface layer
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+* List of Functions
+* <TODO: TO BE ADDED>
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+#include <time.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_macros.h"
+#include "ihevc_debug.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+#include "ihevc_trans_tables.h"
+#include "ihevc_trans_macros.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_hle_interface.h"
+#include "ihevce_hle_q_func.h"
+#include "ihevce_buffer_que_interface.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_error_checks.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_memory_init.h"
+#include "ihevce_lap_interface.h"
+#include "ihevce_entropy_cod.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_frame_process_utils.h"
+#include "ihevce_frame_process.h"
+#include "ihevce_profile.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_dep_mngr_interface.h"
+#include "ihevce_common_utils.h"
+#include "hme_datatype.h"
+#include "hme_interface.h"
+#include "hme_common_defs.h"
+#include "hme_defs.h"
+#include "ihevce_coarse_me_pass.h"
+#include "ihevce_me_pass.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_enc_loop_pass.h"
+
+#include "cast_types.h"
+#include "osal.h"
+#include "osal_defaults.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+/*!
+******************************************************************************
+* \if Function name : ihevce_context_reset \endif
+*
+* \brief
+* Encoder reset function
+*
+* \param[in] Encoder context pointer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_context_reset(enc_ctxt_t *ps_enc_ctxt)
+{
+ ps_enc_ctxt->i4_end_flag = 0;
+
+ /* set the queue related pointer and buffer to default value */
+ ps_enc_ctxt->s_enc_ques.pv_q_mutex_hdl = NULL;
+
+ /* Reset the i/o queues created status to 0 */
+ ps_enc_ctxt->i4_io_queues_created = 0;
+
+ /* reset the frame limit flag to 0 */
+ ps_enc_ctxt->i4_frame_limit_reached = 0;
+
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_hle_interface_create \endif
+*
+* \brief
+* High level Encoder create function
+*
+* \param[in] High level enocder interface context pointer
+*
+* \return
+* success or fail
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+IV_API_CALL_STATUS_T ihevce_hle_interface_create(ihevce_hle_ctxt_t *ps_hle_ctxt)
+{
+ /* local variables */
+ enc_ctxt_t *ps_enc_ctxt;
+ iv_mem_rec_t s_memtab;
+ ihevce_static_cfg_params_t *ps_enc_static_cfg_params;
+ WORD32 i4_num_resolutions = ps_hle_ctxt->ps_static_cfg_prms->s_tgt_lyr_prms.i4_num_res_layers;
+ WORD32 i4_look_ahead_frames_in_first_pass = -1;
+ WORD32 i4_total_cores = 0, ctr, i4_mres_flag = 0;
+ ihevce_sys_api_t *ps_sys_api = &ps_hle_ctxt->ps_static_cfg_prms->s_sys_api;
+
+ WORD32 status = 0;
+ WORD32 i;
+ WORD32 *pi4_active_res_id = NULL;
+
+ /* OSAL Init */
+ status = ihevce_osal_init((void *)ps_hle_ctxt);
+
+ if(status != 0)
+ return (IV_FAIL);
+
+ /* --------------------------------------------------------------------- */
+ /* High Level Encoder Init */
+ /* --------------------------------------------------------------------- */
+
+ if(i4_num_resolutions > 1)
+ i4_mres_flag = 1;
+ /* set no error in the output */
+ ps_hle_ctxt->i4_error_code = 0;
+
+ /* Error checks on the static parameters passed */
+ ps_hle_ctxt->i4_error_code = ihevce_hle_validate_static_params(ps_hle_ctxt->ps_static_cfg_prms);
+
+ /*memory for static cfg params for encoder, which can be overwritten if encoder wants
+ encoder should use this for all its usage*/
+ s_memtab.i4_size = sizeof(iv_mem_rec_t);
+ s_memtab.i4_mem_alignment = 4;
+ s_memtab.i4_mem_size = sizeof(ihevce_static_cfg_params_t);
+ s_memtab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+
+ ps_hle_ctxt->ihevce_mem_alloc(
+ ps_hle_ctxt->pv_mem_mgr_hdl, &ps_hle_ctxt->ps_static_cfg_prms->s_sys_api, &s_memtab);
+ if(s_memtab.pv_base == NULL)
+ {
+ return (IV_FAIL);
+ }
+ ps_enc_static_cfg_params = (ihevce_static_cfg_params_t *)s_memtab.pv_base;
+ memcpy(
+ ps_enc_static_cfg_params,
+ ps_hle_ctxt->ps_static_cfg_prms,
+ (sizeof(ihevce_static_cfg_params_t)));
+
+ i4_total_cores = ps_enc_static_cfg_params->s_multi_thrd_prms.i4_max_num_cores;
+
+ /* check for validity of memory control flag (only 0,1,2 modes are allowed) */
+ if((ps_enc_static_cfg_params->s_multi_thrd_prms.i4_memory_alloc_ctrl_flag > 2) ||
+ (ps_enc_static_cfg_params->s_multi_thrd_prms.i4_memory_alloc_ctrl_flag < 0))
+ {
+ ps_hle_ctxt->i4_error_code = IHEVCE_INVALID_MEM_CTRL_FLAG;
+ }
+
+ if((i4_mres_flag == 1) &&
+ (ps_enc_static_cfg_params->s_multi_thrd_prms.i4_use_thrd_affinity == 1))
+ {
+ ps_sys_api->ihevce_printf(
+ ps_sys_api->pv_cb_handle,
+ "\nIHEVCE WARNING: Enabling thread affinity in multiresolution encoding will affect "
+ "performance\n");
+ }
+ if((ps_enc_static_cfg_params->s_tgt_lyr_prms.as_tgt_params[0].i4_quality_preset ==
+ IHEVCE_QUALITY_P6) &&
+ (ps_enc_static_cfg_params->s_config_prms.i4_cu_level_rc))
+ {
+ ps_sys_api->ihevce_printf(
+ ps_sys_api->pv_cb_handle,
+ "\nIHEVCE WARNING: Disabling CU level QP modulation for P6 preset\n");
+ ps_enc_static_cfg_params->s_config_prms.i4_cu_level_rc = 0;
+ }
+ if((ps_enc_static_cfg_params->s_tgt_lyr_prms.as_tgt_params[0].i4_quality_preset ==
+ IHEVCE_QUALITY_P7) &&
+ (ps_enc_static_cfg_params->s_config_prms.i4_cu_level_rc))
+ {
+ ps_sys_api->ihevce_printf(
+ ps_sys_api->pv_cb_handle,
+ "\nIHEVCE WARNING: Disabling CU level QP modulation for P7 preset\n");
+ ps_enc_static_cfg_params->s_config_prms.i4_cu_level_rc = 0;
+ }
+
+ if(0 != ps_hle_ctxt->i4_error_code)
+ {
+ ps_hle_ctxt->ihevce_mem_free(ps_hle_ctxt->pv_mem_mgr_hdl, &s_memtab);
+ return (IV_FAIL);
+ }
+ ps_hle_ctxt->ai4_num_core_per_res[0] = i4_total_cores;
+
+ if(1 == ps_enc_static_cfg_params->s_tgt_lyr_prms.i4_mres_single_out)
+ {
+ /* Memory Allocation of pi4_active_res_id */
+ s_memtab.i4_size = sizeof(iv_mem_rec_t);
+ s_memtab.i4_mem_alignment = 4;
+ s_memtab.i4_mem_size = sizeof(WORD32) * (IHEVCE_MAX_NUM_RESOLUTIONS + 1);
+ s_memtab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+
+ ps_hle_ctxt->ihevce_mem_alloc(
+ ps_hle_ctxt->pv_mem_mgr_hdl, &ps_enc_static_cfg_params->s_sys_api, &s_memtab);
+ if(s_memtab.pv_base == NULL)
+ {
+ return (IV_FAIL);
+ }
+
+ pi4_active_res_id = (WORD32 *)s_memtab.pv_base;
+ }
+ /* --------------------------------------------------------------------- */
+ /* Context and Memory Initialization of Encoder ctxt */
+ /* --------------------------------------------------------------------- */
+ for(ctr = 0; ctr < i4_num_resolutions; ctr++)
+ {
+ WORD32 i4_br_id;
+ s_memtab.i4_size = sizeof(iv_mem_rec_t);
+ s_memtab.i4_mem_alignment = 4;
+ s_memtab.i4_mem_size = sizeof(enc_ctxt_t);
+ s_memtab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+
+ ps_hle_ctxt->ihevce_mem_alloc(
+ ps_hle_ctxt->pv_mem_mgr_hdl, &ps_enc_static_cfg_params->s_sys_api, &s_memtab);
+ if(s_memtab.pv_base == NULL)
+ {
+ return (IV_FAIL);
+ }
+
+ ps_enc_ctxt = (enc_ctxt_t *)s_memtab.pv_base;
+
+ ps_enc_ctxt->ps_stat_prms = ps_enc_static_cfg_params;
+
+ /* check of number of cores to decide the num threads active */
+ ps_enc_ctxt->s_multi_thrd.i4_all_thrds_active_flag = 1;
+
+ if(1 == ps_enc_static_cfg_params->s_tgt_lyr_prms.i4_mres_single_out)
+ {
+ pi4_active_res_id[ctr] = 0;
+ ps_enc_ctxt->s_multi_thrd.pi4_active_res_id = pi4_active_res_id;
+ }
+
+ /*store num bit-rate instances in the encoder context */
+ ps_enc_ctxt->i4_num_bitrates =
+ ps_enc_static_cfg_params->s_tgt_lyr_prms.as_tgt_params[ctr].i4_num_bitrate_instances;
+
+ if(1 == ps_enc_static_cfg_params->s_config_prms.i4_rate_control_mode)
+ {
+ LWORD64 i8_peak_bitrate;
+ for(i4_br_id = 0; i4_br_id < ps_enc_ctxt->i4_num_bitrates; i4_br_id++)
+ {
+ i8_peak_bitrate =
+ (ULWORD64)(ps_enc_static_cfg_params->s_tgt_lyr_prms.as_tgt_params[ctr]
+ .ai4_peak_bitrate[i4_br_id]);
+
+ ps_enc_static_cfg_params->s_tgt_lyr_prms.as_tgt_params[ctr]
+ .ai4_tgt_bitrate[i4_br_id] = (WORD32)(
+ (i8_peak_bitrate * ps_enc_static_cfg_params->s_config_prms.i4_rate_factor) /
+ 1000);
+ }
+ }
+
+ if(BLU_RAY_SUPPORT == ps_enc_static_cfg_params->s_out_strm_prms.i4_interop_flags)
+ {
+ ps_enc_ctxt->i4_blu_ray_spec = 1;
+ }
+ else
+ {
+ ps_enc_ctxt->i4_blu_ray_spec = 0;
+ }
+
+ /* if all threads are required to be active */
+ if(1 == ps_enc_ctxt->s_multi_thrd.i4_all_thrds_active_flag)
+ {
+ /* store the number of threads to be created as passed by app with HT flag */
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds =
+ ps_hle_ctxt->ai4_num_core_per_res[ctr];
+
+ /* pre enc threads are doubled if HT is ON */
+ ps_enc_ctxt->s_multi_thrd.i4_num_pre_enc_proc_thrds =
+ ps_hle_ctxt->ai4_num_core_per_res[ctr];
+ }
+ else
+ {
+ // TODO: distribute threads across stages
+ }
+
+ /*Keep track of resolution id, this is used to differentiate from other encoder instance*/
+ ps_enc_ctxt->i4_resolution_id = ctr;
+ /* store hle ctxt in enc ctxt */
+ ps_enc_ctxt->pv_hle_ctxt = (void *)ps_hle_ctxt;
+ ps_enc_ctxt->pv_rc_mutex_lock_hdl = NULL;
+ ps_enc_ctxt->s_multi_thrd.pv_sub_pic_rc_mutex_lock_hdl = NULL;
+ ps_enc_ctxt->s_multi_thrd.pv_sub_pic_rc_for_qp_update_mutex_lock_hdl = NULL;
+ ps_enc_ctxt->i4_look_ahead_frames_in_first_pass = i4_look_ahead_frames_in_first_pass;
+
+ ps_enc_ctxt->ai4_is_past_pic_complex[0] = 0;
+ ps_enc_ctxt->ai4_is_past_pic_complex[1] = 0;
+ ps_enc_ctxt->i4_is_I_reset_done = 1;
+ ps_enc_ctxt->i4_past_RC_reset_count = 0;
+ ps_enc_ctxt->i4_future_RC_reset = 0;
+ ps_enc_ctxt->i4_past_RC_scd_reset_count = 0;
+ ps_enc_ctxt->i4_future_RC_scd_reset = 0;
+ ps_enc_ctxt->i4_active_scene_num = -1;
+ for(i = 0; i < IHEVCE_MAX_NUM_BITRATES; i++)
+ {
+ ps_enc_ctxt->ai4_rc_query[i] = 0;
+ }
+ ps_enc_ctxt->i4_active_enc_frame_id = 0;
+ ps_enc_ctxt->u1_is_popcnt_available = 1;
+
+#ifndef ARM
+ ps_enc_ctxt->e_arch_type = ARCH_X86_GENERIC;
+ ps_enc_ctxt->u1_is_popcnt_available = 0;
+#else
+ if(ps_enc_static_cfg_params->e_arch_type == ARCH_NA)
+ ps_enc_ctxt->e_arch_type = ihevce_default_arch();
+ else
+ ps_enc_ctxt->e_arch_type = ps_enc_static_cfg_params->e_arch_type;
+ ps_enc_ctxt->u1_is_popcnt_available = 0;
+#endif
+
+ {
+ ps_enc_static_cfg_params->e_arch_type = ps_enc_ctxt->e_arch_type;
+
+ ihevce_init_function_ptr(ps_enc_ctxt, ps_enc_ctxt->e_arch_type);
+ }
+
+ ihevce_mem_manager_init(ps_enc_ctxt, ps_hle_ctxt);
+
+ if(0 != ps_hle_ctxt->i4_error_code)
+ {
+ return (IV_FAIL);
+ }
+
+ /* mutex lock for RC calls */
+ ps_enc_ctxt->pv_rc_mutex_lock_hdl = osal_mutex_create(ps_hle_ctxt->pv_osal_handle);
+ if(NULL == ps_enc_ctxt->pv_rc_mutex_lock_hdl)
+ {
+ return IV_FAIL;
+ }
+
+ /* mutex lock for Sub pic RC calls */
+ ps_enc_ctxt->s_multi_thrd.pv_sub_pic_rc_mutex_lock_hdl =
+ osal_mutex_create(ps_hle_ctxt->pv_osal_handle);
+ if(NULL == ps_enc_ctxt->s_multi_thrd.pv_sub_pic_rc_mutex_lock_hdl)
+ {
+ return IV_FAIL;
+ }
+
+ ps_enc_ctxt->s_multi_thrd.pv_sub_pic_rc_for_qp_update_mutex_lock_hdl =
+ osal_mutex_create(ps_hle_ctxt->pv_osal_handle);
+ if(NULL == ps_enc_ctxt->s_multi_thrd.pv_sub_pic_rc_for_qp_update_mutex_lock_hdl)
+ {
+ return IV_FAIL;
+ }
+
+ /* reset the encoder context */
+ ihevce_context_reset(ps_enc_ctxt);
+
+ /* register the Encoder context in HLE interface ctxt */
+ ps_hle_ctxt->apv_enc_hdl[ctr] = ps_enc_ctxt;
+ }
+ /* init profile */
+ PROFILE_INIT(&ps_hle_ctxt->profile_hle);
+ for(ctr = 0; ctr < i4_num_resolutions; ctr++)
+ {
+ WORD32 i4_br_id;
+
+ PROFILE_INIT(&ps_hle_ctxt->profile_pre_enc[ctr]);
+ for(i4_br_id = 0; i4_br_id < ps_enc_ctxt->i4_num_bitrates; i4_br_id++)
+ {
+ PROFILE_INIT(&ps_hle_ctxt->profile_enc[ctr][i4_br_id]);
+ PROFILE_INIT(&ps_hle_ctxt->profile_entropy[ctr][i4_br_id]);
+ }
+ }
+ if(1 == ps_enc_static_cfg_params->s_tgt_lyr_prms.i4_mres_single_out)
+ pi4_active_res_id[i4_num_resolutions] = 0;
+
+ return (IV_SUCCESS);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_query_io_buf_req \endif
+*
+* \brief
+* High level Encoder IO buffers query function
+*
+* \param[in] High level encoder interface context pointer
+* \param[out] Input buffer requirment stucture pointer.
+* \param[out] Output buffer requirment stucture pointer.
+*
+* \return
+* success or fail
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+IV_API_CALL_STATUS_T ihevce_query_io_buf_req(
+ ihevce_hle_ctxt_t *ps_hle_ctxt,
+ iv_input_bufs_req_t *ps_input_bufs_req,
+ iv_res_layer_output_bufs_req_t *ps_res_layer_output_bufs_req,
+ iv_res_layer_recon_bufs_req_t *ps_res_layer_recon_bufs_req)
+{
+ /* local variables */
+ enc_ctxt_t *ps_enc_ctxt;
+ ihevce_src_params_t *ps_src_prms;
+ WORD32 ctb_align_pic_wd;
+ WORD32 ctb_align_pic_ht, i4_resolution_id = 0, i4_num_resolutions, i4_num_bitrate_instances;
+ WORD32 i4_resolution_id_ctr, br_ctr;
+
+ ps_enc_ctxt = (enc_ctxt_t *)ps_hle_ctxt->apv_enc_hdl[i4_resolution_id];
+ ps_src_prms = &ps_hle_ctxt->ps_static_cfg_prms->s_src_prms;
+ i4_num_resolutions = ps_hle_ctxt->ps_static_cfg_prms->s_tgt_lyr_prms.i4_num_res_layers;
+ /* set no error in the output */
+ ps_hle_ctxt->i4_error_code = 0;
+
+ /* ------- populate the Input buffer requirements -------- */
+ /* get the number of buffers required for LAP */
+ ps_input_bufs_req->i4_min_num_yuv_bufs =
+ ihevce_lap_get_num_ip_bufs(&ps_enc_ctxt->s_lap_stat_prms);
+
+ ps_input_bufs_req->i4_min_num_synch_ctrl_bufs = ps_input_bufs_req->i4_min_num_yuv_bufs;
+
+ ps_input_bufs_req->i4_min_num_asynch_ctrl_bufs = NUM_AYSNC_CMD_BUFS;
+
+ /* buffer sizes are populated based on create time parameters */
+ ctb_align_pic_wd =
+ ps_src_prms->i4_width +
+ SET_CTB_ALIGN(ps_src_prms->i4_width, ps_enc_ctxt->s_frm_ctb_prms.i4_ctb_size);
+
+ ctb_align_pic_ht =
+ ps_src_prms->i4_height +
+ SET_CTB_ALIGN(ps_src_prms->i4_height, ps_enc_ctxt->s_frm_ctb_prms.i4_ctb_size);
+
+ if(ps_src_prms->i4_input_bit_depth > 8)
+ {
+ ps_input_bufs_req->i4_min_size_y_buf = ctb_align_pic_wd * ctb_align_pic_ht * 2;
+
+ ps_input_bufs_req->i4_min_size_uv_buf = ps_input_bufs_req->i4_min_size_y_buf >> 1;
+ }
+ else
+ {
+ ps_input_bufs_req->i4_min_size_y_buf = ctb_align_pic_wd * ctb_align_pic_ht;
+
+ ps_input_bufs_req->i4_min_size_uv_buf = (ctb_align_pic_wd * ctb_align_pic_ht) >> 1;
+ }
+
+ ps_input_bufs_req->i4_min_size_uv_buf <<=
+ ((ps_src_prms->i4_chr_format == IV_YUV_422SP_UV) ? 1 : 0);
+
+ ps_input_bufs_req->i4_yuv_format = ps_src_prms->i4_chr_format;
+
+ ps_input_bufs_req->i4_min_size_synch_ctrl_bufs =
+ ((MAX_SEI_PAYLOAD_PER_TLV + 16) * MAX_NUMBER_OF_SEI_PAYLOAD) + 16;
+
+ ps_input_bufs_req->i4_min_size_asynch_ctrl_bufs =
+ ((MAX_SEI_PAYLOAD_PER_TLV + 16) * (MAX_NUMBER_OF_SEI_PAYLOAD - 6)) + 16;
+
+ for(i4_resolution_id_ctr = 0; i4_resolution_id_ctr < i4_num_resolutions; i4_resolution_id_ctr++)
+ {
+ ps_enc_ctxt = (enc_ctxt_t *)ps_hle_ctxt->apv_enc_hdl[i4_resolution_id_ctr];
+
+ i4_num_bitrate_instances = ps_enc_ctxt->s_runtime_tgt_params.i4_num_bitrate_instances;
+
+ /* buffer sizes are populated based on create time parameters */
+ ctb_align_pic_wd = ps_enc_ctxt->s_runtime_tgt_params.i4_width +
+ SET_CTB_ALIGN(
+ ps_enc_ctxt->s_runtime_tgt_params.i4_width,
+ ps_enc_ctxt->s_frm_ctb_prms.i4_ctb_size);
+
+ ctb_align_pic_ht = ps_enc_ctxt->s_runtime_tgt_params.i4_height +
+ SET_CTB_ALIGN(
+ ps_enc_ctxt->s_runtime_tgt_params.i4_height,
+ ps_enc_ctxt->s_frm_ctb_prms.i4_ctb_size);
+
+ for(br_ctr = 0; br_ctr < i4_num_bitrate_instances; br_ctr++)
+ {
+ /* ------- populate the Output buffer requirements -------- */
+ ps_res_layer_output_bufs_req->s_output_buf_req[i4_resolution_id_ctr][br_ctr]
+ .i4_min_num_out_bufs = NUM_OUTPUT_BUFS;
+
+ ps_res_layer_output_bufs_req->s_output_buf_req[i4_resolution_id_ctr][br_ctr]
+ .i4_min_size_bitstream_buf = (ctb_align_pic_wd * ctb_align_pic_ht);
+
+ if((ps_hle_ctxt->ps_static_cfg_prms->s_tgt_lyr_prms.i4_internal_bit_depth == 12) ||
+ ((ps_hle_ctxt->ps_static_cfg_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8) &&
+ (ps_src_prms->i4_chr_format == IV_YUV_422SP_UV)))
+ {
+ ps_res_layer_output_bufs_req->s_output_buf_req[i4_resolution_id_ctr][br_ctr]
+ .i4_min_size_bitstream_buf *= 2;
+ }
+
+ if((ps_hle_ctxt->ps_static_cfg_prms->s_tgt_lyr_prms.i4_internal_bit_depth == 10) &&
+ (ps_src_prms->i4_chr_format == IV_YUV_420SP_UV))
+ {
+ ps_res_layer_output_bufs_req->s_output_buf_req[i4_resolution_id_ctr][br_ctr]
+ .i4_min_size_bitstream_buf *= 3;
+ ps_res_layer_output_bufs_req->s_output_buf_req[i4_resolution_id_ctr][br_ctr]
+ .i4_min_size_bitstream_buf >>= 1;
+ }
+
+ //recon_dump
+ /* ------- populate the Recon buffer requirements -------- */
+ if(ps_enc_ctxt->ps_stat_prms->i4_save_recon == 0)
+ {
+ ps_res_layer_recon_bufs_req->s_recon_buf_req[i4_resolution_id_ctr][br_ctr]
+ .i4_min_num_recon_bufs = 0;
+
+ ps_res_layer_recon_bufs_req->s_recon_buf_req[i4_resolution_id_ctr][br_ctr]
+ .i4_min_size_y_buf = 0;
+
+ ps_res_layer_recon_bufs_req->s_recon_buf_req[i4_resolution_id_ctr][br_ctr]
+ .i4_min_size_uv_buf = 0;
+ }
+ else
+ {
+ ps_res_layer_recon_bufs_req->s_recon_buf_req[i4_resolution_id_ctr][br_ctr]
+ .i4_min_num_recon_bufs = 2 * HEVCE_MAX_REF_PICS + 1;
+
+ ps_res_layer_recon_bufs_req->s_recon_buf_req[i4_resolution_id_ctr][br_ctr]
+ .i4_min_size_y_buf =
+ ctb_align_pic_wd * ctb_align_pic_ht *
+ ((ps_hle_ctxt->ps_static_cfg_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8)
+ ? 2
+ : 1);
+
+ ps_res_layer_recon_bufs_req->s_recon_buf_req[i4_resolution_id_ctr][br_ctr]
+ .i4_min_size_uv_buf =
+ (ps_res_layer_recon_bufs_req->s_recon_buf_req[i4_resolution_id_ctr][br_ctr]
+ .i4_min_size_y_buf >>
+ 1);
+ ps_res_layer_recon_bufs_req->s_recon_buf_req[i4_resolution_id_ctr][br_ctr]
+ .i4_min_size_uv_buf <<=
+ ((ps_src_prms->i4_chr_format == IV_YUV_422SP_UV) ? 1 : 0);
+ }
+ }
+ }
+
+ return (IV_SUCCESS);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_create_ports \endif
+*
+* \brief
+* High level Encoder IO ports Create function
+*
+* \param[in] High level encoder interface context pointer
+* \param[in] Input data buffer descriptor
+* \param[in] Input control buffer descriptor
+* \param[in] Output data buffer descriptor
+* \param[in] Output control status buffer descriptor
+* \param[out] Pointer to store the ID for Input data Que
+* \param[out] Pointer to store the ID for Input control Que
+* \param[out] Pointer to store the ID for Output data Que
+* \param[out] Pointer to store the ID for Output control status Que
+*
+* \return
+* success or fail
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+IV_API_CALL_STATUS_T ihevce_create_ports(
+ ihevce_hle_ctxt_t *ps_hle_ctxt,
+ iv_input_data_ctrl_buffs_desc_t *ps_input_data_ctrl_buffs_desc,
+ iv_input_asynch_ctrl_buffs_desc_t *ps_input_asynch_ctrl_buffs_desc,
+ iv_res_layer_output_data_buffs_desc_t *ps_mres_output_data_buffs_desc,
+ iv_res_layer_recon_data_buffs_desc_t *ps_mres_recon_data_buffs_desc)
+{
+ /* local varaibles */
+ enc_ctxt_t *ps_enc_ctxt;
+ WORD32 res_ctr,
+ i4_num_resolutions = ps_hle_ctxt->ps_static_cfg_prms->s_tgt_lyr_prms.i4_num_res_layers;
+ void *pv_q_mutex_hdl = NULL;
+
+ /* set no error in the output */
+ ps_hle_ctxt->i4_error_code = 0;
+
+ for(res_ctr = 0; res_ctr < i4_num_resolutions; res_ctr++)
+ {
+ ps_enc_ctxt = (enc_ctxt_t *)ps_hle_ctxt->apv_enc_hdl[res_ctr];
+ /* check on buffer sizes provided by applciation needs to be checked */
+
+ /* call the memory manager que init function , pass the op data , status, recon for the first bitrate, internally we will increment*/
+ ihevce_mem_manager_que_init(
+ ps_enc_ctxt,
+ ps_hle_ctxt,
+ ps_input_data_ctrl_buffs_desc,
+ ps_input_asynch_ctrl_buffs_desc,
+ &ps_mres_output_data_buffs_desc->s_output_data_buffs[res_ctr][0],
+ &ps_mres_recon_data_buffs_desc->s_recon_data_buffs[res_ctr][0]);
+
+ /* set the number of Queues */
+ ps_enc_ctxt->s_enc_ques.i4_num_queues = IHEVCE_MAX_NUM_QUEUES;
+
+ /* allocate a mutex to take care of handling multiple threads accesing Queues */
+ /*my understanding, this is common semaphore for all the queue. Since main input is still
+ common across all instance fo encoder. Hence common semaphore is a must*/
+ if(0 == res_ctr)
+ {
+ ps_enc_ctxt->s_enc_ques.pv_q_mutex_hdl = osal_mutex_create(ps_hle_ctxt->pv_osal_handle);
+ /* store it in local variable for allocating it to other instances */
+ pv_q_mutex_hdl = ps_enc_ctxt->s_enc_ques.pv_q_mutex_hdl;
+ if(NULL == pv_q_mutex_hdl)
+ {
+ return IV_FAIL;
+ }
+ }
+ else
+ {
+ ps_enc_ctxt->s_enc_ques.pv_q_mutex_hdl = pv_q_mutex_hdl;
+ }
+
+ /* Set the i/o queues created status to 1 */
+ ps_enc_ctxt->i4_io_queues_created = 1;
+ }
+ return (IV_SUCCESS);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_hle_interface_thrd \endif
+*
+* \brief
+* High level encoder thread interface function
+*
+* \param[in] High level interface context pointer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32 ihevce_hle_interface_thrd(void *pv_proc_intf_ctxt)
+{
+ /* local variables */
+ WORD32 ctr, res_ctr;
+ ihevce_hle_ctxt_t *ps_hle_ctxt;
+ enc_ctxt_t *ps_enc_ctxt;
+ /* enc ctxt to store 0th instance's params which are required by all instances */
+ enc_ctxt_t *ps_enc_ctxt_base;
+ void *pv_lap_sem_hdl;
+ void *pv_enc_frame_process_sem_hdl;
+ void *pv_pre_enc_frame_process_sem_hdl;
+ void *apv_ent_coding_sem_hdl[IHEVCE_MAX_NUM_BITRATES];
+ void *pv_ent_common_mres_sem_hdl = NULL;
+ void *pv_out_common_mres_sem_hdl = NULL;
+
+ void *pv_inp_data_sem_hdl;
+ void *pv_lap_inp_data_sem_hdl;
+ void *pv_preenc_inp_data_sem_hdl;
+ void *pv_inp_ctrl_sem_hdl;
+ void *apv_out_stream_sem_hdl[IHEVCE_MAX_NUM_BITRATES];
+ void *apv_out_recon_sem_hdl[IHEVCE_MAX_NUM_BITRATES];
+ void *pv_out_ctrl_sts_sem_hdl;
+
+ lap_intface_t *ps_lap_interface_ctxt;
+ iv_mem_rec_t s_memtab;
+ WORD32 i4_num_bit_rate_instances[IHEVCE_MAX_NUM_RESOLUTIONS], i4_num_resolutions;
+ WORD32 i; //loop variable
+ WORD32 ai4_proc_count[MAX_NUMBER_PROC_GRPS] = { 0 }, i4_proc_grp_count;
+ WORD32 i4_acc_proc_num = 0;
+
+ /* Frame Encode processing threads & semaphores */
+ void *apv_enc_frm_proc_hdls[IHEVCE_MAX_NUM_RESOLUTIONS][MAX_NUM_FRM_PROC_THRDS_ENC];
+ frm_proc_thrd_ctxt_t
+ *aps_enc_frm_proc_thrd_ctxt[IHEVCE_MAX_NUM_RESOLUTIONS][MAX_NUM_FRM_PROC_THRDS_ENC];
+
+ /* Pre Frame Encode processing threads & semaphores */
+ void *apv_pre_enc_frm_proc_hdls[IHEVCE_MAX_NUM_RESOLUTIONS][MAX_NUM_FRM_PROC_THRDS_PRE_ENC];
+ frm_proc_thrd_ctxt_t
+ *aps_pre_enc_frm_proc_thrd_ctxt[IHEVCE_MAX_NUM_RESOLUTIONS][MAX_NUM_FRM_PROC_THRDS_PRE_ENC];
+
+ void *apv_entropy_thrd_hdls[IHEVCE_MAX_NUM_RESOLUTIONS][NUM_ENTROPY_THREADS];
+ frm_proc_thrd_ctxt_t *aps_entropy_thrd_ctxt[IHEVCE_MAX_NUM_RESOLUTIONS][NUM_ENTROPY_THREADS];
+
+ ps_hle_ctxt = (ihevce_hle_ctxt_t *)pv_proc_intf_ctxt;
+ ps_enc_ctxt_base = (enc_ctxt_t *)ps_hle_ctxt->apv_enc_hdl[0];
+ /* profile start */
+ PROFILE_START(&ps_hle_ctxt->profile_hle);
+ /* store default values of mem tab */
+ s_memtab.i4_size = sizeof(iv_mem_rec_t);
+ s_memtab.i4_mem_alignment = 4;
+
+ i4_num_resolutions = ps_enc_ctxt_base->ps_stat_prms->s_tgt_lyr_prms.i4_num_res_layers;
+ memset(
+ apv_entropy_thrd_hdls,
+ 0,
+ IHEVCE_MAX_NUM_RESOLUTIONS * NUM_ENTROPY_THREADS * sizeof(void *));
+ memset(
+ apv_entropy_thrd_hdls,
+ 0,
+ IHEVCE_MAX_NUM_RESOLUTIONS * NUM_ENTROPY_THREADS * sizeof(void *));
+ for(res_ctr = 0; res_ctr < i4_num_resolutions; res_ctr++)
+ {
+ i4_num_bit_rate_instances[res_ctr] =
+ ps_enc_ctxt_base->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[res_ctr]
+ .i4_num_bitrate_instances;
+ }
+ /* --------------------------------------------------------------------- */
+ /* Init number of threads for each stage */
+ /* --------------------------------------------------------------------- */
+
+ {
+ for(res_ctr = 0; res_ctr < i4_num_resolutions; res_ctr++)
+ {
+ ps_enc_ctxt = (enc_ctxt_t *)ps_hle_ctxt->apv_enc_hdl[res_ctr];
+ /* all the threads created will be made active */
+ ps_enc_ctxt->s_multi_thrd.i4_num_active_enc_thrds =
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds;
+
+ ps_enc_ctxt->s_multi_thrd.i4_num_active_pre_enc_thrds =
+ ps_enc_ctxt->s_multi_thrd.i4_num_pre_enc_proc_thrds;
+ }
+ }
+
+ /* --------------------------------------------------------------------- */
+ /* Multiple processing Threads Semaphores init */
+ /* --------------------------------------------------------------------- */
+ for(res_ctr = 0; res_ctr < i4_num_resolutions; res_ctr++)
+ {
+ osal_sem_attr_t attr = OSAL_DEFAULT_SEM_ATTR;
+
+ ps_enc_ctxt = (enc_ctxt_t *)ps_hle_ctxt->apv_enc_hdl[res_ctr];
+
+ attr.value = SEM_START_VALUE;
+
+ /* Create Semaphore handle for LAP thread */
+ if(0 == ps_enc_ctxt->i4_resolution_id)
+ {
+ pv_lap_sem_hdl = osal_sem_create(ps_hle_ctxt->pv_osal_handle, &attr);
+ if(NULL == pv_lap_sem_hdl)
+ {
+ return IV_FAIL;
+ }
+ }
+ else
+ {
+ /*NOTE: Tile workspace assigned this to null. Confirm this*/
+ pv_lap_sem_hdl = ps_enc_ctxt_base->s_thrd_sem_ctxt.pv_lap_sem_handle;
+ }
+ /* Create Semaphore for encode frame process thread */
+ pv_enc_frame_process_sem_hdl = osal_sem_create(ps_hle_ctxt->pv_osal_handle, &attr);
+ if(NULL == pv_enc_frame_process_sem_hdl)
+ {
+ return IV_FAIL;
+ }
+
+ /* Create Semaphore for pre_encode frame process thread */
+ pv_pre_enc_frame_process_sem_hdl = osal_sem_create(ps_hle_ctxt->pv_osal_handle, &attr);
+ if(NULL == pv_pre_enc_frame_process_sem_hdl)
+ {
+ return IV_FAIL;
+ }
+
+ /* Create Semaphore for input frame data q function */
+ if(0 == ps_enc_ctxt->i4_resolution_id)
+ {
+ pv_inp_data_sem_hdl = osal_sem_create(ps_hle_ctxt->pv_osal_handle, &attr);
+ if(NULL == pv_inp_data_sem_hdl)
+ {
+ return IV_FAIL;
+ }
+ }
+ else
+ {
+ pv_inp_data_sem_hdl = ps_enc_ctxt_base->s_thrd_sem_ctxt.pv_inp_data_sem_handle;
+ }
+
+ /*creating new input queue owned by encoder*/
+ /* Create Semaphore for input frame data q function */
+ pv_lap_inp_data_sem_hdl = osal_sem_create(ps_hle_ctxt->pv_osal_handle, &attr);
+ if(NULL == pv_lap_inp_data_sem_hdl)
+ {
+ return IV_FAIL;
+ }
+
+ /* Create Semaphore for input frame data q function */
+ pv_preenc_inp_data_sem_hdl = osal_sem_create(ps_hle_ctxt->pv_osal_handle, &attr);
+ if(NULL == pv_preenc_inp_data_sem_hdl)
+ {
+ return IV_FAIL;
+ }
+
+ /* Create Semaphore for input conrol data q function */
+ if(0 == ps_enc_ctxt->i4_resolution_id)
+ {
+ pv_inp_ctrl_sem_hdl = osal_sem_create(ps_hle_ctxt->pv_osal_handle, &attr);
+ if(NULL == pv_inp_ctrl_sem_hdl)
+ {
+ return IV_FAIL;
+ }
+ }
+ else
+ { /*Inp ctrl queue is same for all resolutions between app and lap*/
+ pv_inp_ctrl_sem_hdl = ps_enc_ctxt_base->s_thrd_sem_ctxt.pv_inp_ctrl_sem_handle;
+ }
+
+ /* Create Semaphore for output control status data q function */
+ pv_out_ctrl_sts_sem_hdl = osal_sem_create(ps_hle_ctxt->pv_osal_handle, &attr);
+ if(NULL == pv_out_ctrl_sts_sem_hdl)
+ {
+ return IV_FAIL;
+ }
+
+ /* Multi res single output case singel output queue is used for all output resolutions */
+ if(1 == ps_enc_ctxt_base->ps_stat_prms->s_tgt_lyr_prms.i4_mres_single_out)
+ {
+ ps_enc_ctxt->s_enc_ques.apv_q_hdl[IHEVCE_OUTPUT_DATA_Q] =
+ ps_enc_ctxt_base->s_enc_ques.apv_q_hdl[IHEVCE_OUTPUT_DATA_Q];
+ if(0 == ps_enc_ctxt->i4_resolution_id)
+ {
+ /* Create Semaphore for enropy coding thread */
+ pv_ent_common_mres_sem_hdl = osal_sem_create(ps_hle_ctxt->pv_osal_handle, &attr);
+ if(NULL == pv_ent_common_mres_sem_hdl)
+ {
+ return IV_FAIL;
+ }
+
+ /* Create Semaphore for output stream data q function */
+ pv_out_common_mres_sem_hdl = osal_sem_create(ps_hle_ctxt->pv_osal_handle, &attr);
+ if(NULL == pv_out_common_mres_sem_hdl)
+ {
+ return IV_FAIL;
+ }
+ }
+ ps_enc_ctxt->s_thrd_sem_ctxt.pv_ent_common_mres_sem_hdl = pv_ent_common_mres_sem_hdl;
+ ps_enc_ctxt->s_thrd_sem_ctxt.pv_out_common_mres_sem_hdl = pv_out_common_mres_sem_hdl;
+ }
+
+ /*create entropy and output semaphores for each thread.
+ Each thread will correspond to each bit-rate instance running */
+ for(i = 0; i < i4_num_bit_rate_instances[res_ctr]; i++)
+ {
+ /* Create Semaphore for enropy coding thread */
+ apv_ent_coding_sem_hdl[i] = osal_sem_create(ps_hle_ctxt->pv_osal_handle, &attr);
+ if(NULL == apv_ent_coding_sem_hdl[i])
+ {
+ return IV_FAIL;
+ }
+
+ /* Create Semaphore for output stream data q function */
+ apv_out_stream_sem_hdl[i] = osal_sem_create(ps_hle_ctxt->pv_osal_handle, &attr);
+ if(NULL == apv_out_stream_sem_hdl[i])
+ {
+ return IV_FAIL;
+ }
+
+ /* Create Semaphore for output recon data q function */
+ apv_out_recon_sem_hdl[i] = osal_sem_create(ps_hle_ctxt->pv_osal_handle, &attr);
+ if(NULL == apv_out_recon_sem_hdl[i])
+ {
+ return IV_FAIL;
+ }
+ }
+
+ /* update the semaphore handles and the thread creates status */
+
+ ps_enc_ctxt->s_thrd_sem_ctxt.pv_enc_frm_proc_sem_handle = pv_enc_frame_process_sem_hdl;
+ ps_enc_ctxt->s_thrd_sem_ctxt.pv_pre_enc_frm_proc_sem_handle =
+ pv_pre_enc_frame_process_sem_hdl;
+ ps_enc_ctxt->s_thrd_sem_ctxt.pv_lap_sem_handle = pv_lap_sem_hdl;
+ ps_enc_ctxt->s_thrd_sem_ctxt.pv_inp_data_sem_handle = pv_inp_data_sem_hdl;
+ ps_enc_ctxt->s_thrd_sem_ctxt.pv_lap_inp_data_sem_hdl = pv_lap_inp_data_sem_hdl;
+ ps_enc_ctxt->s_thrd_sem_ctxt.pv_preenc_inp_data_sem_hdl = pv_preenc_inp_data_sem_hdl;
+ ps_enc_ctxt->s_thrd_sem_ctxt.pv_inp_ctrl_sem_handle = pv_inp_ctrl_sem_hdl;
+ ps_enc_ctxt->s_thrd_sem_ctxt.pv_out_ctrl_sem_handle = pv_out_ctrl_sts_sem_hdl;
+ for(i = 0; i < i4_num_bit_rate_instances[res_ctr]; i++)
+ {
+ ps_enc_ctxt->s_thrd_sem_ctxt.apv_ent_cod_sem_handle[i] = apv_ent_coding_sem_hdl[i];
+ ps_enc_ctxt->s_thrd_sem_ctxt.apv_out_strm_sem_handle[i] = apv_out_stream_sem_hdl[i];
+ ps_enc_ctxt->s_thrd_sem_ctxt.apv_out_recon_sem_handle[i] = apv_out_recon_sem_hdl[i];
+ }
+ }
+
+ /* --------------------------------------------------------------------- */
+ /* Multiple processing Threads Mutex init */
+ /* --------------------------------------------------------------------- */
+ for(res_ctr = 0; res_ctr < i4_num_resolutions; res_ctr++)
+ {
+ ps_enc_ctxt = (enc_ctxt_t *)ps_hle_ctxt->apv_enc_hdl[res_ctr];
+
+ /* create a mutex lock for Job Queue access accross slave threads of encode frame processing */
+ ps_enc_ctxt->s_multi_thrd.pv_job_q_mutex_hdl_enc_grp_me =
+ osal_mutex_create(ps_hle_ctxt->pv_osal_handle);
+ if(NULL == ps_enc_ctxt->s_multi_thrd.pv_job_q_mutex_hdl_enc_grp_me)
+ {
+ return IV_FAIL;
+ }
+
+ /* create a mutex lock for Job Queue access accross slave threads of encode frame processing */
+ ps_enc_ctxt->s_multi_thrd.pv_job_q_mutex_hdl_enc_grp_enc_loop =
+ osal_mutex_create(ps_hle_ctxt->pv_osal_handle);
+ if(NULL == ps_enc_ctxt->s_multi_thrd.pv_job_q_mutex_hdl_enc_grp_enc_loop)
+ {
+ return IV_FAIL;
+ }
+
+ /* create mutex for enc thread group */
+ for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
+ {
+ ps_enc_ctxt->s_multi_thrd.apv_mutex_handle[i] =
+ osal_mutex_create(ps_hle_ctxt->pv_osal_handle);
+ if(NULL == ps_enc_ctxt->s_multi_thrd.apv_mutex_handle[i])
+ {
+ return IV_FAIL;
+ }
+
+ ps_enc_ctxt->s_multi_thrd.apv_mutex_handle_me_end[i] =
+ osal_mutex_create(ps_hle_ctxt->pv_osal_handle);
+ if(NULL == ps_enc_ctxt->s_multi_thrd.apv_mutex_handle_me_end[i])
+ {
+ return IV_FAIL;
+ }
+ }
+
+ for(i = 0; i < MAX_NUM_ENC_LOOP_PARALLEL; i++)
+ {
+ ps_enc_ctxt->s_multi_thrd.apv_post_enc_mutex_handle[i] =
+ osal_mutex_create(ps_hle_ctxt->pv_osal_handle);
+ if(NULL == ps_enc_ctxt->s_multi_thrd.apv_post_enc_mutex_handle[i])
+ {
+ return IV_FAIL;
+ }
+
+ ps_enc_ctxt->s_multi_thrd.apv_mutex_handle_frame_init[i] =
+ osal_mutex_create(ps_hle_ctxt->pv_osal_handle);
+ if(NULL == ps_enc_ctxt->s_multi_thrd.apv_mutex_handle_frame_init[i])
+ {
+ return IV_FAIL;
+ }
+ }
+
+ /*initialize mutex for pre-enc group */
+ ps_enc_ctxt->s_multi_thrd.pv_mutex_hdl_pre_enc_init =
+ osal_mutex_create(ps_hle_ctxt->pv_osal_handle);
+
+ ps_enc_ctxt->s_multi_thrd.pv_mutex_hdl_pre_enc_decomp_deinit =
+ osal_mutex_create(ps_hle_ctxt->pv_osal_handle);
+
+ ps_enc_ctxt->s_multi_thrd.pv_mutex_hdl_pre_enc_hme_init =
+ osal_mutex_create(ps_hle_ctxt->pv_osal_handle);
+
+ ps_enc_ctxt->s_multi_thrd.pv_mutex_hdl_pre_enc_hme_deinit =
+ osal_mutex_create(ps_hle_ctxt->pv_osal_handle);
+
+ ps_enc_ctxt->s_multi_thrd.pv_mutex_hdl_pre_enc_deinit =
+ osal_mutex_create(ps_hle_ctxt->pv_osal_handle);
+
+ ps_enc_ctxt->s_multi_thrd.pv_mutex_hdl_l0_ipe_init =
+ osal_mutex_create(ps_hle_ctxt->pv_osal_handle);
+
+ ps_enc_ctxt->s_multi_thrd.pv_job_q_mutex_hdl_pre_enc_decomp =
+ osal_mutex_create(ps_hle_ctxt->pv_osal_handle);
+
+ ps_enc_ctxt->s_multi_thrd.pv_job_q_mutex_hdl_pre_enc_hme =
+ osal_mutex_create(ps_hle_ctxt->pv_osal_handle);
+
+ ps_enc_ctxt->s_multi_thrd.pv_job_q_mutex_hdl_pre_enc_l0ipe =
+ osal_mutex_create(ps_hle_ctxt->pv_osal_handle);
+
+ if(NULL == ps_enc_ctxt->s_multi_thrd.pv_mutex_hdl_pre_enc_init ||
+ NULL == ps_enc_ctxt->s_multi_thrd.pv_mutex_hdl_pre_enc_decomp_deinit ||
+ NULL == ps_enc_ctxt->s_multi_thrd.pv_mutex_hdl_pre_enc_hme_init ||
+ NULL == ps_enc_ctxt->s_multi_thrd.pv_mutex_hdl_pre_enc_hme_deinit ||
+ NULL == ps_enc_ctxt->s_multi_thrd.pv_mutex_hdl_pre_enc_deinit ||
+ NULL == ps_enc_ctxt->s_multi_thrd.pv_mutex_hdl_l0_ipe_init ||
+ NULL == ps_enc_ctxt->s_multi_thrd.pv_job_q_mutex_hdl_pre_enc_decomp ||
+ NULL == ps_enc_ctxt->s_multi_thrd.pv_job_q_mutex_hdl_pre_enc_hme ||
+ NULL == ps_enc_ctxt->s_multi_thrd.pv_job_q_mutex_hdl_pre_enc_l0ipe)
+ {
+ return IV_FAIL;
+ }
+ }
+
+ /* --------------------------------------------------------------------- */
+ /* Multiple processing Threads Context init */
+ /* --------------------------------------------------------------------- */
+
+ for(res_ctr = 0; res_ctr < i4_num_resolutions; res_ctr++)
+ {
+ ps_enc_ctxt = (enc_ctxt_t *)ps_hle_ctxt->apv_enc_hdl[res_ctr];
+ ps_enc_ctxt_base = (enc_ctxt_t *)ps_hle_ctxt->apv_enc_hdl[0];
+
+ /*initialize multi-thread context for enc group*/
+ ps_enc_ctxt->s_multi_thrd.i4_is_recon_free_done = 0;
+ ps_enc_ctxt->s_multi_thrd.me_end_flag = 0;
+ ps_enc_ctxt->s_multi_thrd.enc_end_flag = 0;
+ ps_enc_ctxt->s_multi_thrd.i4_idx_dvsr_p = 0;
+ ps_enc_ctxt->s_multi_thrd.i4_last_inp_buf = 0;
+
+ {
+ /* For all the ME frames in Parallel */
+ WORD32 i4_frm_idx;
+
+ for(i4_frm_idx = 0; i4_frm_idx < MAX_NUM_ME_PARALLEL; i4_frm_idx++)
+ {
+ ps_enc_ctxt->s_multi_thrd.me_num_thrds_exited[i4_frm_idx] = 0;
+ ps_enc_ctxt->s_multi_thrd.ai4_me_master_done_flag[i4_frm_idx] = 0;
+ ps_enc_ctxt->s_multi_thrd.ai4_me_enc_buff_prod_flag[i4_frm_idx] = 0;
+ }
+ }
+
+ {
+ WORD32 i4_frm_idx;
+ ps_enc_ctxt->s_multi_thrd.num_thrds_done = 0;
+ ps_enc_ctxt->s_multi_thrd.num_thrds_exited_for_reenc = 0;
+ for(i4_frm_idx = 0; i4_frm_idx < MAX_NUM_ENC_LOOP_PARALLEL; i4_frm_idx++)
+ {
+ ps_enc_ctxt->s_multi_thrd.num_thrds_exited[i4_frm_idx] = 0;
+
+ ps_enc_ctxt->s_multi_thrd.enc_master_done_frame_init[i4_frm_idx] = 0;
+
+ for(i = 0; i < i4_num_bit_rate_instances[res_ctr]; i++)
+ {
+ /*reset the entropy buffer produced status */
+ ps_enc_ctxt->s_multi_thrd.ai4_produce_outbuf[i4_frm_idx][i] = 1;
+ ps_enc_ctxt->s_multi_thrd.ps_frm_recon[i4_frm_idx][i] = NULL;
+
+ ps_enc_ctxt->s_multi_thrd.ps_curr_out_enc_grp[i4_frm_idx][i] = NULL;
+ }
+ }
+ }
+ ps_enc_ctxt->s_multi_thrd.i4_seq_mode_enabled_flag = 0;
+
+ /* Set prev_frame_done = 1 to indicate that all the threads are in same frame*/
+ for(i = 0; i < ps_enc_ctxt->s_multi_thrd.i4_num_enc_loop_frm_pllel; i++)
+ {
+ ihevce_dmgr_set_done_frm_frm_sync(
+ ps_enc_ctxt->s_multi_thrd.apv_dep_mngr_prev_frame_done[i]);
+ }
+ /* Set prev_frame_done = 1 to indicate that all the threads are in same frame*/
+ ihevce_dmgr_set_done_frm_frm_sync(
+ ps_enc_ctxt->s_multi_thrd.pv_dep_mngr_prev_frame_enc_done_for_reenc);
+ /*to enable the dependency manager to wait when first reached*/
+ ihevce_dmgr_set_prev_done_frm_frm_sync(
+ ps_enc_ctxt->s_multi_thrd.pv_dep_mngr_prev_frame_enc_done_for_reenc);
+ for(i = 0; i < ps_enc_ctxt->s_multi_thrd.i4_num_me_frm_pllel; i++)
+ {
+ ihevce_dmgr_set_done_frm_frm_sync(
+ ps_enc_ctxt->s_multi_thrd.apv_dep_mngr_prev_frame_me_done[i]);
+ }
+
+ /* reset the completed status & start proc flags of slave encode frame processing threads */
+ for(ctr = 0; ctr < ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds; ctr++)
+ {
+ ps_enc_ctxt->s_multi_thrd.ai4_enc_frm_proc_start[ctr] = 0;
+ }
+
+ /* initialize multi-thread context for pre enc group */
+
+ ps_enc_ctxt->s_multi_thrd.i4_ctrl_blocking_mode = BUFF_QUE_BLOCKING_MODE;
+
+ //for (ctr=0; ctr< PING_PONG_BUF; ctr++)
+ for(ctr = 0; ctr < MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME; ctr++)
+ {
+ ps_enc_ctxt->s_multi_thrd.ai4_pre_enc_init_done[ctr] = 0;
+ ps_enc_ctxt->s_multi_thrd.ai4_pre_enc_hme_init_done[ctr] = 0;
+ ps_enc_ctxt->s_multi_thrd.ai4_pre_enc_deinit_done[ctr] = 1;
+ ps_enc_ctxt->s_multi_thrd.ai4_num_thrds_processed_decomp[ctr] = 0;
+ ps_enc_ctxt->s_multi_thrd.ai4_num_thrds_processed_coarse_me[ctr] = 0;
+ ps_enc_ctxt->s_multi_thrd.ai4_num_thrds_processed_pre_enc[ctr] = 0;
+
+ ps_enc_ctxt->s_multi_thrd.ai4_num_thrds_processed_L0_ipe_qp_init[ctr] = 0;
+ ps_enc_ctxt->s_multi_thrd.ai4_decomp_coarse_me_complete_flag[ctr] = 1;
+ ps_enc_ctxt->s_multi_thrd.ai4_end_flag_pre_enc[ctr] = 0;
+ }
+
+ /* Set prev_frame_done = 1 to indicate that all the threads are in same frame*/
+ ihevce_dmgr_set_done_frm_frm_sync(
+ ps_enc_ctxt->s_multi_thrd.pv_dep_mngr_prev_frame_pre_enc_l1);
+
+ ihevce_dmgr_set_done_frm_frm_sync(
+ ps_enc_ctxt->s_multi_thrd.pv_dep_mngr_prev_frame_pre_enc_coarse_me);
+
+ ihevce_dmgr_set_done_frm_frm_sync(
+ ps_enc_ctxt->s_multi_thrd.pv_dep_mngr_prev_frame_pre_enc_l0);
+
+ {
+ /**init idx for handling delay between pre-me and l0-ipe*/
+ ps_enc_ctxt->s_multi_thrd.i4_delay_pre_me_btw_l0_ipe = 0;
+ ps_enc_ctxt->s_multi_thrd.i4_max_delay_pre_me_btw_l0_ipe =
+ MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME - 1;
+ if(ps_enc_ctxt->s_lap_stat_prms.s_lap_params.i4_rc_look_ahead_pics)
+ {
+ ps_enc_ctxt->s_multi_thrd.i4_delay_pre_me_btw_l0_ipe +=
+ MIN_L1_L0_STAGGER_NON_SEQ +
+ ps_enc_ctxt->s_lap_stat_prms.s_lap_params.i4_rc_look_ahead_pics;
+ }
+ ps_enc_ctxt->s_multi_thrd.i4_qp_update_l0_ipe = -1;
+ }
+ }
+
+ /** Get Number of Processor Groups **/
+ i4_proc_grp_count = ps_enc_ctxt_base->ps_stat_prms->s_multi_thrd_prms.i4_num_proc_groups;
+ /*** Enc threads are allocated based on the assumption that there can be only 2 processor groups **/
+ ASSERT(i4_proc_grp_count <= MAX_NUMBER_PROC_GRPS);
+ /** Get Number of logical processors in Each Group **/
+ for(ctr = 0; ctr < i4_proc_grp_count; ctr++)
+ {
+ ai4_proc_count[ctr] =
+ ps_enc_ctxt_base->ps_stat_prms->s_multi_thrd_prms.ai4_num_cores_per_grp[ctr];
+ }
+
+ /* --------------------------------------------------------------------- */
+ /* Create a LAP thread */
+ /* --------------------------------------------------------------------- */
+ /* LAP thread will run on 0th resolution instance context */
+ {
+ s_memtab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+ s_memtab.i4_mem_size = sizeof(lap_intface_t);
+
+ /* initialise the interface strucure parameters */
+ ps_hle_ctxt->ihevce_mem_alloc(
+ ps_hle_ctxt->pv_mem_mgr_hdl, &ps_enc_ctxt_base->ps_stat_prms->s_sys_api, &s_memtab);
+ if(s_memtab.pv_base == NULL)
+ {
+ return (IV_FAIL);
+ }
+
+ ps_lap_interface_ctxt = (lap_intface_t *)s_memtab.pv_base;
+
+ /* populate the params */
+ ps_lap_interface_ctxt->pv_hle_ctxt = ps_hle_ctxt;
+ ps_enc_ctxt = (enc_ctxt_t *)ps_hle_ctxt->apv_enc_hdl[0];
+ ps_lap_interface_ctxt->pv_lap_module_ctxt = ps_enc_ctxt->s_module_ctxt.pv_lap_ctxt;
+ ps_lap_interface_ctxt->i4_ctrl_in_que_id = IHEVCE_INPUT_ASYNCH_CTRL_Q;
+ ps_lap_interface_ctxt->i4_ctrl_out_que_id = IHEVCE_OUTPUT_STATUS_Q;
+ ps_lap_interface_ctxt->i4_ctrl_cmd_buf_size = ENC_COMMAND_BUFF_SIZE;
+ ps_lap_interface_ctxt->i4_ctrl_in_que_blocking_mode = BUFF_QUE_BLOCKING_MODE;
+ ps_lap_interface_ctxt->ps_sys_api = &ps_enc_ctxt_base->ps_stat_prms->s_sys_api;
+ ps_enc_ctxt_base->pv_lap_interface_ctxt = (void *)ps_lap_interface_ctxt;
+ ps_lap_interface_ctxt->ihevce_dyn_bitrate_cb = ihevce_dyn_bitrate;
+ }
+
+ /* --------------------------------------------------------------------- */
+ /* Create Entropy Coding threads */
+ /* --------------------------------------------------------------------- */
+ /*Create entropy thread for each encoder instance*/
+ for(res_ctr = 0; res_ctr < i4_num_resolutions; res_ctr++)
+ {
+ osal_thread_attr_t s_thread_attr = OSAL_DEFAULT_THREAD_ATTR;
+ WORD32 i4_num_entropy_threads;
+
+ /* derive encoder ctxt from hle handle */
+ ps_enc_ctxt = (enc_ctxt_t *)ps_hle_ctxt->apv_enc_hdl[res_ctr];
+
+ i4_num_entropy_threads =
+ ps_enc_ctxt_base->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[res_ctr]
+ .i4_num_bitrate_instances;
+
+ /* initialise the interface strucure parameters */
+ for(ctr = 0; ctr < i4_num_entropy_threads; ctr++)
+ {
+ s_memtab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+ s_memtab.i4_mem_size = sizeof(frm_proc_thrd_ctxt_t);
+
+ ps_hle_ctxt->ihevce_mem_alloc(
+ ps_hle_ctxt->pv_mem_mgr_hdl, &ps_enc_ctxt_base->ps_stat_prms->s_sys_api, &s_memtab);
+ if(s_memtab.pv_base == NULL)
+ {
+ return (IV_FAIL);
+ }
+
+ aps_entropy_thrd_ctxt[res_ctr][ctr] = (frm_proc_thrd_ctxt_t *)s_memtab.pv_base;
+
+ /* initialise the interface strucure parameters */
+ aps_entropy_thrd_ctxt[res_ctr][ctr]->i4_thrd_id = ctr;
+ aps_entropy_thrd_ctxt[res_ctr][ctr]->ps_hle_ctxt = ps_hle_ctxt;
+ aps_entropy_thrd_ctxt[res_ctr][ctr]->pv_enc_ctxt = (void *)ps_enc_ctxt;
+
+ /* Initialize application thread attributes */
+ s_thread_attr.exit_code = 0;
+ s_thread_attr.name = 0;
+ s_thread_attr.priority_map_flag = 1;
+ s_thread_attr.priority = OSAL_PRIORITY_DEFAULT;
+ s_thread_attr.stack_addr = 0;
+ s_thread_attr.stack_size = THREAD_STACK_SIZE;
+ s_thread_attr.thread_func = ihevce_ent_coding_thrd;
+ s_thread_attr.thread_param =
+ (void *)(aps_entropy_thrd_ctxt[res_ctr]
+ [ctr]); //encioder and hle context are derived from this
+ s_thread_attr.core_affinity_mask = 0;
+ if(ps_enc_ctxt_base->ps_stat_prms->s_multi_thrd_prms.i4_num_proc_groups > 1)
+ {
+ /* Run ENTROPY thread on last group if there are more than one processor group */
+ s_thread_attr.group_num =
+ ps_hle_ctxt->ps_static_cfg_prms->s_multi_thrd_prms.i4_num_proc_groups - 1;
+ }
+ else
+ {
+ s_thread_attr.group_num = 0;
+ }
+
+ /* Create entropy coding thread */
+ apv_entropy_thrd_hdls[res_ctr][ctr] =
+ osal_thread_create(ps_hle_ctxt->pv_osal_handle, &s_thread_attr);
+ if(NULL == apv_entropy_thrd_hdls[res_ctr][ctr])
+ {
+ return IV_FAIL;
+ }
+ }
+ }
+
+ /* --------------------------------------------------------------------- */
+ /* Create all Slave Encode Frame processing threads */
+ /* - -------------------------------------------------------------------- */
+ for(res_ctr = 0; res_ctr < i4_num_resolutions; res_ctr++)
+ {
+ WORD32 enc_ctr = 0;
+ WORD32 i4_loop_count;
+ WORD32 i4_curr_grp_num = 0;
+ ps_enc_ctxt = (enc_ctxt_t *)ps_hle_ctxt->apv_enc_hdl[res_ctr];
+
+ i4_acc_proc_num = 0;
+ /* Calculate the start core number of enc threads for current resolution */
+ for(i4_loop_count = 0; i4_loop_count < res_ctr; i4_loop_count++)
+ {
+ /* Add number of cores taken by each resolution till the curr resolution */
+ enc_ctr += ps_hle_ctxt->ai4_num_core_per_res[i4_loop_count];
+ }
+ if(ps_enc_ctxt_base->ps_stat_prms->s_multi_thrd_prms.i4_num_proc_groups > 1)
+ {
+ /* Select the group number for each res based on processors present in each group */
+ for(i4_loop_count = 0;
+ i4_loop_count <
+ ps_enc_ctxt_base->ps_stat_prms->s_multi_thrd_prms.i4_num_proc_groups;
+ i4_loop_count++)
+ {
+ i4_acc_proc_num += ai4_proc_count[i4_loop_count];
+ if(enc_ctr >= i4_acc_proc_num)
+ {
+ /* if enc_ctr is greater than proc count for first group,
+ then increment group count.This group number will be starting grp num for
+ that resolution */
+ i4_curr_grp_num++;
+ }
+ else
+ break;
+ }
+ }
+
+ for(ctr = 0; ctr < ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds; ctr++)
+ {
+ osal_thread_attr_t s_thread_attr = OSAL_DEFAULT_THREAD_ATTR;
+
+ s_memtab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+ s_memtab.i4_mem_size = sizeof(frm_proc_thrd_ctxt_t);
+
+ ps_hle_ctxt->ihevce_mem_alloc(
+ ps_hle_ctxt->pv_mem_mgr_hdl, &ps_enc_ctxt_base->ps_stat_prms->s_sys_api, &s_memtab);
+ if(s_memtab.pv_base == NULL)
+ {
+ return (IV_FAIL);
+ }
+
+ aps_enc_frm_proc_thrd_ctxt[res_ctr][ctr] = (frm_proc_thrd_ctxt_t *)s_memtab.pv_base;
+
+ /* initialise the interface strucure parameters */
+ aps_enc_frm_proc_thrd_ctxt[res_ctr][ctr]->i4_thrd_id = ctr;
+
+ aps_enc_frm_proc_thrd_ctxt[res_ctr][ctr]->ps_hle_ctxt = ps_hle_ctxt;
+
+ ps_enc_ctxt = (enc_ctxt_t *)ps_hle_ctxt->apv_enc_hdl[res_ctr];
+
+ aps_enc_frm_proc_thrd_ctxt[res_ctr][ctr]->pv_enc_ctxt = (void *)ps_enc_ctxt;
+
+ /* Initialize application thread attributes */
+ s_thread_attr.exit_code = 0;
+ s_thread_attr.name = 0;
+ s_thread_attr.priority_map_flag = 1;
+ s_thread_attr.priority = OSAL_PRIORITY_DEFAULT;
+ s_thread_attr.stack_addr = 0;
+ s_thread_attr.stack_size = THREAD_STACK_SIZE;
+ s_thread_attr.thread_func = ihevce_enc_frm_proc_slave_thrd;
+ s_thread_attr.thread_param = (void *)(aps_enc_frm_proc_thrd_ctxt[res_ctr][ctr]);
+ s_thread_attr.group_num = i4_curr_grp_num;
+ if(1 == ps_enc_ctxt_base->ps_stat_prms->s_multi_thrd_prms.i4_use_thrd_affinity)
+ {
+ ihevce_static_multi_thread_params_t *ps_multi_thrd_prms =
+ &ps_enc_ctxt_base->ps_stat_prms->s_multi_thrd_prms;
+
+ s_thread_attr.core_affinity_mask = ps_multi_thrd_prms->au8_core_aff_mask[enc_ctr];
+ if((enc_ctr >= i4_acc_proc_num) &&
+ (ps_enc_ctxt_base->ps_stat_prms->s_multi_thrd_prms.i4_num_proc_groups > 1))
+ {
+ /*** When the cores in the Group0 is exhausted start enc threads in the next Processor Group ***/
+ s_thread_attr.group_num++;
+ i4_curr_grp_num++;
+ /* This takes care of the condition that differnt proc groups can have diff number of cores */
+ i4_acc_proc_num += ai4_proc_count[i4_curr_grp_num];
+ }
+ }
+ else
+ {
+ s_thread_attr.core_affinity_mask = 0;
+ if((enc_ctr >= i4_acc_proc_num) &&
+ (ps_enc_ctxt_base->ps_stat_prms->s_multi_thrd_prms.i4_num_proc_groups > 1))
+ {
+ /*** When the cores in the Group0 is exhausted start enc threads in the next Processor Group ***/
+ s_thread_attr.group_num++;
+ i4_curr_grp_num++;
+ /* This takes care of the condition that differnt proc groups can have diff number of cores */
+ i4_acc_proc_num += ai4_proc_count[i4_curr_grp_num];
+ }
+ }
+
+ /* Create frame processing thread */
+ apv_enc_frm_proc_hdls[res_ctr][ctr] =
+ osal_thread_create(ps_hle_ctxt->pv_osal_handle, &s_thread_attr);
+ if(NULL == apv_enc_frm_proc_hdls[res_ctr][ctr])
+ {
+ return IV_FAIL;
+ }
+ enc_ctr++;
+ }
+ }
+
+ /* --------------------------------------------------------------------- */
+ /* Create all Pre - Encode Frame processing threads */
+ /* --------------------------------------------------------------------- */
+ for(res_ctr = 0; res_ctr < i4_num_resolutions; res_ctr++)
+ {
+ WORD32 pre_enc_ctr = 0;
+ WORD32 i4_loop_count;
+ WORD32 i4_curr_grp_num = 0;
+ ps_enc_ctxt = (enc_ctxt_t *)ps_hle_ctxt->apv_enc_hdl[res_ctr];
+
+ i4_acc_proc_num = 0;
+
+ for(i4_loop_count = 0; i4_loop_count < res_ctr; i4_loop_count++)
+ pre_enc_ctr += ps_hle_ctxt->ai4_num_core_per_res[i4_loop_count];
+ if(ps_enc_ctxt->s_multi_thrd.i4_all_thrds_active_flag)
+ {
+ /* If its sequential mode of operation enc and pre-enc threads to be given same core affinity mask */
+ pre_enc_ctr -= ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds;
+ }
+
+ if(ps_enc_ctxt_base->ps_stat_prms->s_multi_thrd_prms.i4_num_proc_groups > 1)
+ {
+ /* Select the group number for each res based on processors present in each group */
+ for(i4_loop_count = 0;
+ i4_loop_count <
+ ps_enc_ctxt_base->ps_stat_prms->s_multi_thrd_prms.i4_num_proc_groups;
+ i4_loop_count++)
+ {
+ i4_acc_proc_num += ai4_proc_count[i4_loop_count];
+ if((pre_enc_ctr + ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds) >=
+ i4_acc_proc_num)
+ {
+ /* if pre_enc_ctr is greater than proc count for first group,
+ then increment group count.This group number will be starting grp num for
+ that resolution */
+ i4_curr_grp_num++;
+ }
+ else
+ break;
+ }
+ }
+
+ for(ctr = 0; ctr < ps_enc_ctxt->s_multi_thrd.i4_num_pre_enc_proc_thrds; ctr++)
+ {
+ osal_thread_attr_t s_thread_attr = OSAL_DEFAULT_THREAD_ATTR;
+
+ s_memtab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+ s_memtab.i4_mem_size = sizeof(frm_proc_thrd_ctxt_t);
+
+ ps_hle_ctxt->ihevce_mem_alloc(
+ ps_hle_ctxt->pv_mem_mgr_hdl, &ps_enc_ctxt_base->ps_stat_prms->s_sys_api, &s_memtab);
+ if(s_memtab.pv_base == NULL)
+ {
+ return (IV_FAIL);
+ }
+
+ aps_pre_enc_frm_proc_thrd_ctxt[res_ctr][ctr] = (frm_proc_thrd_ctxt_t *)s_memtab.pv_base;
+
+ /* initialise the interface strucure parameters */
+ aps_pre_enc_frm_proc_thrd_ctxt[res_ctr][ctr]->i4_thrd_id = ctr;
+
+ aps_pre_enc_frm_proc_thrd_ctxt[res_ctr][ctr]->ps_hle_ctxt = ps_hle_ctxt;
+ ps_enc_ctxt = (enc_ctxt_t *)ps_hle_ctxt->apv_enc_hdl[res_ctr];
+ aps_pre_enc_frm_proc_thrd_ctxt[res_ctr][ctr]->pv_enc_ctxt = (void *)ps_enc_ctxt;
+
+ /* Initialize application thread attributes */
+ s_thread_attr.exit_code = 0;
+ s_thread_attr.name = 0;
+ s_thread_attr.priority_map_flag = 1;
+ s_thread_attr.priority = OSAL_PRIORITY_DEFAULT;
+ s_thread_attr.stack_addr = 0;
+ s_thread_attr.stack_size = THREAD_STACK_SIZE;
+ s_thread_attr.thread_func = ihevce_pre_enc_process_frame_thrd;
+ s_thread_attr.thread_param = (void *)(aps_pre_enc_frm_proc_thrd_ctxt[res_ctr][ctr]);
+ s_thread_attr.group_num = i4_curr_grp_num;
+
+ if(1 == ps_enc_ctxt_base->ps_stat_prms->s_multi_thrd_prms.i4_use_thrd_affinity)
+ {
+ ihevce_static_multi_thread_params_t *ps_multi_thrd_prms =
+ &ps_enc_ctxt_base->ps_stat_prms->s_multi_thrd_prms;
+
+ s_thread_attr.core_affinity_mask =
+ ps_multi_thrd_prms->au8_core_aff_mask
+ [pre_enc_ctr + ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds];
+ if(((pre_enc_ctr + ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds) >=
+ i4_acc_proc_num) &&
+ (ps_enc_ctxt_base->ps_stat_prms->s_multi_thrd_prms.i4_num_proc_groups > 1))
+ {
+ /*** When the cores in the Group0 is exhausted start enc threads in the next Processor Group ***/
+ s_thread_attr.group_num++;
+ i4_curr_grp_num++;
+ /* This takes care of the condition that differnt proc groups can have diff number of cores */
+ i4_acc_proc_num += ai4_proc_count[i4_curr_grp_num];
+ }
+ }
+ else
+ {
+ s_thread_attr.core_affinity_mask = 0;
+
+ if(((pre_enc_ctr + ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds) >=
+ i4_acc_proc_num) &&
+ (ps_enc_ctxt_base->ps_stat_prms->s_multi_thrd_prms.i4_num_proc_groups > 1))
+ {
+ /*** When the cores in the Group0 is exhausted start enc threads in the next Processor Group ***/
+ s_thread_attr.group_num++;
+ i4_curr_grp_num++;
+ /* This takes care of the condition that differnt proc groups can have diff number of cores */
+ i4_acc_proc_num += ai4_proc_count[i4_curr_grp_num];
+ }
+ }
+
+ /* Create frame processing thread */
+ apv_pre_enc_frm_proc_hdls[res_ctr][ctr] =
+ osal_thread_create(ps_hle_ctxt->pv_osal_handle, &s_thread_attr);
+ if(NULL == apv_pre_enc_frm_proc_hdls[res_ctr][ctr])
+ {
+ return IV_FAIL;
+ }
+ pre_enc_ctr++;
+ }
+ }
+
+ /* Set the threads init done Flag */
+ ps_hle_ctxt->i4_hle_init_done = 1;
+
+ /* --------------------------------------------------------------------- */
+ /* Wait and destroy Processing threads */
+ /* --------------------------------------------------------------------- */
+
+ /* --------------------------------------------------------------------- */
+ /* Frame process Pre - Encode threads destroy */
+ /* --------------------------------------------------------------------- */
+ for(res_ctr = 0; res_ctr < i4_num_resolutions; res_ctr++)
+ {
+ ps_enc_ctxt = (enc_ctxt_t *)ps_hle_ctxt->apv_enc_hdl[res_ctr];
+
+ for(ctr = 0; ctr < ps_enc_ctxt->s_multi_thrd.i4_num_pre_enc_proc_thrds; ctr++)
+ {
+ /* Wait for thread to complete */
+ osal_thread_wait(apv_pre_enc_frm_proc_hdls[res_ctr][ctr]);
+
+ /* Destroy thread */
+ osal_thread_destroy(apv_pre_enc_frm_proc_hdls[res_ctr][ctr]);
+
+ s_memtab.i4_mem_size = sizeof(frm_proc_thrd_ctxt_t);
+ s_memtab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+ s_memtab.pv_base = (void *)aps_pre_enc_frm_proc_thrd_ctxt[res_ctr][ctr];
+
+ /* free the ctxt memory */
+ ps_hle_ctxt->ihevce_mem_free(ps_hle_ctxt->pv_mem_mgr_hdl, &s_memtab);
+ }
+ }
+
+ /* --------------------------------------------------------------------- */
+ /* Frame process Encode slave threads destroy */
+ /* --------------------------------------------------------------------- */
+ for(res_ctr = 0; res_ctr < i4_num_resolutions; res_ctr++)
+ {
+ ps_enc_ctxt = (enc_ctxt_t *)ps_hle_ctxt->apv_enc_hdl[res_ctr];
+
+ for(ctr = 0; ctr < ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds; ctr++)
+ {
+ /* Wait for thread to complete */
+ osal_thread_wait(apv_enc_frm_proc_hdls[res_ctr][ctr]);
+
+ /* Destroy thread */
+ osal_thread_destroy(apv_enc_frm_proc_hdls[res_ctr][ctr]);
+
+ s_memtab.i4_mem_size = sizeof(frm_proc_thrd_ctxt_t);
+ s_memtab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+ s_memtab.pv_base = (void *)aps_enc_frm_proc_thrd_ctxt[res_ctr][ctr];
+
+ /* free the ctxt memory */
+ ps_hle_ctxt->ihevce_mem_free(ps_hle_ctxt->pv_mem_mgr_hdl, &s_memtab);
+ }
+ }
+
+ /* --------------------------------------------------------------------- */
+ /* Entropy threads destroy */
+ /* --------------------------------------------------------------------- */
+ for(res_ctr = 0; res_ctr < i4_num_resolutions; res_ctr++)
+ {
+ WORD32 i4_num_bitrates =
+ ps_enc_ctxt_base->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[res_ctr]
+ .i4_num_bitrate_instances;
+
+ for(ctr = 0; ctr < i4_num_bitrates; ctr++)
+ {
+ /* Wait for Entropy Coding thread to complete */
+ osal_thread_wait(apv_entropy_thrd_hdls[res_ctr][ctr]);
+
+ /* Destroy Entropy Coding thread */
+ osal_thread_destroy(apv_entropy_thrd_hdls[res_ctr][ctr]);
+
+ //semaphore will come here
+
+ s_memtab.i4_mem_size = sizeof(frm_proc_thrd_ctxt_t);
+ s_memtab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+ s_memtab.pv_base = (void *)aps_entropy_thrd_ctxt[res_ctr][ctr];
+
+ /* free the ctxt memory */
+ ps_hle_ctxt->ihevce_mem_free(ps_hle_ctxt->pv_mem_mgr_hdl, &s_memtab);
+ }
+ }
+
+ s_memtab.i4_mem_size = sizeof(lap_intface_t);
+ s_memtab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+ s_memtab.pv_base = (void *)ps_lap_interface_ctxt;
+ ps_hle_ctxt->ihevce_mem_free(ps_hle_ctxt->pv_mem_mgr_hdl, &s_memtab);
+ /* profile stop */
+ PROFILE_STOP(&ps_hle_ctxt->profile_hle, NULL);
+ return (0);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_q_get_free_inp_data_buff \endif
+*
+* \brief
+* Gets a free buffer from the que requested
+*
+* \param[in] high level encoder context pointer
+* \param[in] pointer to return the buffer id
+* \param[in] blocking mode / non blocking mode
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void *ihevce_q_get_free_inp_data_buff(
+ ihevce_hle_ctxt_t *ps_hle_ctxt, WORD32 *pi4_buff_id, WORD32 i4_blocking_mode)
+{
+ void *pv_ptr;
+ enc_ctxt_t *ps_enc_ctxt;
+ WORD32 i4_resolution_id = 0;
+
+ ps_enc_ctxt = (enc_ctxt_t *)ps_hle_ctxt->apv_enc_hdl[i4_resolution_id];
+ if(ps_enc_ctxt->i4_frame_limit_reached == 1)
+ {
+ return (NULL);
+ }
+ /*Input buffer is same for all enc handles*/
+ pv_ptr = ihevce_q_get_free_buff(
+ ps_hle_ctxt->apv_enc_hdl[0], IHEVCE_INPUT_DATA_CTRL_Q, pi4_buff_id, i4_blocking_mode);
+
+ return (pv_ptr);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_q_get_free_inp_ctrl_buff \endif
+*
+* \brief
+* Gets a free buffer from the que requested
+*
+* \param[in] high level encoder context pointer
+* \param[in] pointer to return the buffer id
+* \param[in] blocking mode / non blocking mode
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void *ihevce_q_get_free_inp_ctrl_buff(
+ ihevce_hle_ctxt_t *ps_hle_ctxt, WORD32 *pi4_buff_id, WORD32 i4_blocking_mode)
+{
+ void *pv_ptr;
+
+ /*Input buffer is same for all enc handles*/
+ pv_ptr = ihevce_q_get_free_buff(
+ ps_hle_ctxt->apv_enc_hdl[0], IHEVCE_INPUT_ASYNCH_CTRL_Q, pi4_buff_id, i4_blocking_mode);
+
+ return (pv_ptr);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_q_get_free_out_strm_buff \endif
+*
+* \brief
+* Gets a free buffer from the que requested
+*
+* \param[in] high level encoder context pointer
+* \param[in] pointer to return the buffer id
+* \param[in] blocking mode / non blocking mode
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void *ihevce_q_get_free_out_strm_buff(
+ ihevce_hle_ctxt_t *ps_hle_ctxt,
+ WORD32 *pi4_buff_id,
+ WORD32 i4_blocking_mode,
+ WORD32 i4_bitrate_instance,
+ WORD32 i4_res_instance)
+{
+ void *pv_ptr;
+
+ pv_ptr = ihevce_q_get_free_buff(
+ ps_hle_ctxt->apv_enc_hdl[i4_res_instance],
+ (IHEVCE_OUTPUT_DATA_Q + i4_bitrate_instance),
+ pi4_buff_id,
+ i4_blocking_mode);
+ return (pv_ptr);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_q_get_free_out_recon_buff \endif
+*
+* \brief
+* Gets a free buffer from the que requested
+*
+* \param[in] high level encoder context pointer
+* \param[in] pointer to return the buffer id
+* \param[in] blocking mode / non blocking mode
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void *ihevce_q_get_free_out_recon_buff(
+ ihevce_hle_ctxt_t *ps_hle_ctxt,
+ WORD32 *pi4_buff_id,
+ WORD32 i4_blocking_mode,
+ WORD32 i4_bitrate_instance,
+ WORD32 i4_res_instance)
+{
+ void *pv_ptr;
+
+ pv_ptr = ihevce_q_get_free_buff(
+ ps_hle_ctxt->apv_enc_hdl[i4_res_instance],
+ (IHEVCE_RECON_DATA_Q + i4_bitrate_instance),
+ pi4_buff_id,
+ i4_blocking_mode);
+ return (pv_ptr);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_q_set_inp_data_buff_prod \endif
+*
+* \brief
+* Sets the input data buffer as produced in the que requested
+*
+* \param[in] high level encoder context pointer
+* \param[in] buffer id which needs to be set as produced
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+IV_API_CALL_STATUS_T
+ ihevce_q_set_inp_data_buff_prod(ihevce_hle_ctxt_t *ps_hle_ctxt, WORD32 i4_buff_id)
+{
+ IV_API_CALL_STATUS_T ret_status;
+
+ ret_status =
+ ihevce_q_set_buff_prod(ps_hle_ctxt->apv_enc_hdl[0], IHEVCE_INPUT_DATA_CTRL_Q, i4_buff_id);
+
+ return (ret_status);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_q_set_inp_ctrl_buff_prod \endif
+*
+* \brief
+* Sets the input data buffer as produced in the que requested
+*
+* \param[in] high level encoder context pointer
+* \param[in] buffer id which needs to be set as produced
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+IV_API_CALL_STATUS_T
+ ihevce_q_set_inp_ctrl_buff_prod(ihevce_hle_ctxt_t *ps_hle_ctxt, WORD32 i4_buff_id)
+
+{
+ IV_API_CALL_STATUS_T ret_status;
+
+ ret_status =
+ ihevce_q_set_buff_prod(ps_hle_ctxt->apv_enc_hdl[0], IHEVCE_INPUT_ASYNCH_CTRL_Q, i4_buff_id);
+
+ return (ret_status);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_q_set_out_strm_buff_prod \endif
+*
+* \brief
+* Sets the Output stream buffer as produced in the que requested
+*
+* \param[in] high level encoder context pointer
+* \param[in] buffer id which needs to be set as produced
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+IV_API_CALL_STATUS_T ihevce_q_set_out_strm_buff_prod(
+ ihevce_hle_ctxt_t *ps_hle_ctxt,
+ WORD32 i4_buff_id,
+ WORD32 i4_bitrate_instance_id,
+ WORD32 i4_resolution_id)
+{
+ IV_API_CALL_STATUS_T ret_status;
+
+ ret_status = ihevce_q_set_buff_prod(
+ ps_hle_ctxt->apv_enc_hdl[i4_resolution_id],
+ (IHEVCE_OUTPUT_DATA_Q + i4_bitrate_instance_id),
+ i4_buff_id);
+
+ return (ret_status);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_q_set_out_recon_buff_prod \endif
+*
+* \brief
+* Sets the Output recon buffer as produced in the que requested
+*
+* \param[in] high level encoder context pointer
+* \param[in] buffer id which needs to be set as produced
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+IV_API_CALL_STATUS_T ihevce_q_set_out_recon_buff_prod(
+ ihevce_hle_ctxt_t *ps_hle_ctxt,
+ WORD32 i4_buff_id,
+ WORD32 i4_bitrate_instance_id,
+ WORD32 i4_resolution_id)
+{
+ IV_API_CALL_STATUS_T ret_status;
+
+ ret_status = ihevce_q_set_buff_prod(
+ ps_hle_ctxt->apv_enc_hdl[i4_resolution_id],
+ (IHEVCE_RECON_DATA_Q + i4_bitrate_instance_id),
+ i4_buff_id);
+
+ return (ret_status);
+}
+
+//recon_dump
+/*!
+******************************************************************************
+* \if Function name : ihevce_q_get_filled_recon_buff \endif
+*
+* \brief
+* Gets a next filled recon buffer from the que requested
+*
+* \param[in] high level encoder context pointer
+* \param[in] pointer to return the buffer id
+* \param[in] blocking mode / non blocking mode
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void *ihevce_q_get_filled_recon_buff(
+ ihevce_hle_ctxt_t *ps_hle_ctxt,
+ WORD32 *pi4_buff_id,
+ WORD32 i4_blocking_mode,
+ WORD32 i4_bitrate_instance_id,
+ WORD32 i4_resolution_id)
+{
+ void *pv_ptr;
+
+ pv_ptr = ihevce_q_get_filled_buff(
+ ps_hle_ctxt->apv_enc_hdl[i4_resolution_id],
+ IHEVCE_RECON_DATA_Q + i4_bitrate_instance_id,
+ pi4_buff_id,
+ i4_blocking_mode);
+
+ return (pv_ptr);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_q_get_filled_ctrl_sts_buff \endif
+*
+* \brief
+* Gets a next filled control status buffer from the que requested
+*
+* \param[in] high level encoder context pointer
+* \param[in] pointer to return the buffer id
+* \param[in] blocking mode / non blocking mode
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void *ihevce_q_get_filled_ctrl_sts_buff(
+ ihevce_hle_ctxt_t *ps_hle_ctxt, WORD32 *pi4_buff_id, WORD32 i4_blocking_mode)
+{
+ void *pv_ptr;
+ pv_ptr = ihevce_q_get_filled_buff(
+ ps_hle_ctxt->apv_enc_hdl[0], IHEVCE_OUTPUT_STATUS_Q, pi4_buff_id, i4_blocking_mode);
+
+ return (pv_ptr);
+}
+
+//recon_dump
+/*!
+******************************************************************************
+* \if Function name : ihevce_q_rel_recon_buf \endif
+*
+* \brief
+* Frees the recon buffer in the recon buffer que
+*
+* \param[in] high level encoder context pointer
+* \param[in] buffer id which needs to be freed
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+IV_API_CALL_STATUS_T ihevce_q_rel_recon_buf(
+ ihevce_hle_ctxt_t *ps_hle_ctxt,
+ WORD32 i4_buff_id,
+ WORD32 i4_bitrate_instance_id,
+ WORD32 i4_resolution_id)
+{
+ IV_API_CALL_STATUS_T ret_status;
+
+ ret_status = ihevce_q_rel_buf(
+ ps_hle_ctxt->apv_enc_hdl[i4_resolution_id],
+ IHEVCE_RECON_DATA_Q + i4_bitrate_instance_id,
+ i4_buff_id);
+
+ return (ret_status);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_q_rel_ctrl_sts_buf \endif
+*
+* \brief
+* Frees the output control sttus buffer in buffer que
+*
+* \param[in] high level encoder context pointer
+* \param[in] buffer id which needs to be freed
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+IV_API_CALL_STATUS_T ihevce_q_rel_ctrl_sts_buf(ihevce_hle_ctxt_t *ps_hle_ctxt, WORD32 i4_buff_id)
+{
+ IV_API_CALL_STATUS_T ret_status;
+
+ ret_status = ihevce_q_rel_buf(ps_hle_ctxt->apv_enc_hdl[0], IHEVCE_OUTPUT_STATUS_Q, i4_buff_id);
+
+ return (ret_status);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_hle_interface_delete \endif
+*
+* \brief
+* High leve encoder delete interface
+*
+* \param[in] high level encoder interface context pointer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+IV_API_CALL_STATUS_T ihevce_hle_interface_delete(ihevce_hle_ctxt_t *ps_hle_ctxt)
+{
+ /* local varaibles */
+ enc_ctxt_t *ps_enc_ctxt;
+ iv_mem_rec_t s_memtab;
+ WORD32 ctr = 0, i, res_ctr, i4_num_resolutions;
+ WORD32 ai4_num_bitrate_instances[IHEVCE_MAX_NUM_RESOLUTIONS] = { 1 };
+
+ i4_num_resolutions = ps_hle_ctxt->ps_static_cfg_prms->s_tgt_lyr_prms.i4_num_res_layers;
+ for(ctr = 0; ctr < i4_num_resolutions; ctr++)
+ {
+ ai4_num_bitrate_instances[ctr] =
+ ps_hle_ctxt->ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[ctr]
+ .i4_num_bitrate_instances;
+ }
+
+ for(res_ctr = 0; res_ctr < i4_num_resolutions && ps_hle_ctxt->apv_enc_hdl[res_ctr]; res_ctr++)
+ {
+ ps_enc_ctxt = (enc_ctxt_t *)ps_hle_ctxt->apv_enc_hdl[res_ctr];
+
+ if(res_ctr == 0)
+ {
+ osal_sem_destroy(ps_enc_ctxt->s_thrd_sem_ctxt.pv_lap_sem_handle);
+ osal_sem_destroy(ps_enc_ctxt->s_thrd_sem_ctxt.pv_inp_data_sem_handle);
+ osal_sem_destroy(ps_enc_ctxt->s_thrd_sem_ctxt.pv_inp_ctrl_sem_handle);
+ if(1 == ps_hle_ctxt->ps_static_cfg_prms->s_tgt_lyr_prms.i4_mres_single_out)
+ {
+ osal_sem_destroy(ps_enc_ctxt->s_thrd_sem_ctxt.pv_ent_common_mres_sem_hdl);
+ osal_sem_destroy(ps_enc_ctxt->s_thrd_sem_ctxt.pv_out_common_mres_sem_hdl);
+ }
+ }
+
+ osal_sem_destroy(ps_enc_ctxt->s_thrd_sem_ctxt.pv_lap_inp_data_sem_hdl);
+ osal_sem_destroy(ps_enc_ctxt->s_thrd_sem_ctxt.pv_preenc_inp_data_sem_hdl);
+
+ osal_sem_destroy(ps_enc_ctxt->s_thrd_sem_ctxt.pv_enc_frm_proc_sem_handle);
+ osal_sem_destroy(ps_enc_ctxt->s_thrd_sem_ctxt.pv_pre_enc_frm_proc_sem_handle);
+
+ osal_sem_destroy(ps_enc_ctxt->s_thrd_sem_ctxt.pv_out_ctrl_sem_handle);
+
+ for(i = 0; i < ps_hle_ctxt->ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[res_ctr]
+ .i4_num_bitrate_instances;
+ i++)
+ {
+ osal_sem_destroy(ps_enc_ctxt->s_thrd_sem_ctxt.apv_ent_cod_sem_handle[i]);
+ osal_sem_destroy(ps_enc_ctxt->s_thrd_sem_ctxt.apv_out_strm_sem_handle[i]);
+ osal_sem_destroy(ps_enc_ctxt->s_thrd_sem_ctxt.apv_out_recon_sem_handle[i]);
+ }
+
+ /* destroy the mutex allocated for job queue usage encode group */
+ osal_mutex_destroy(ps_enc_ctxt->s_multi_thrd.pv_job_q_mutex_hdl_enc_grp_me);
+
+ /* destroy the mutex allocated for job queue usage encode group */
+ osal_mutex_destroy(ps_enc_ctxt->s_multi_thrd.pv_job_q_mutex_hdl_enc_grp_enc_loop);
+
+ /* destroy the mutexes allocated for enc thread group */
+ for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
+ {
+ osal_mutex_destroy(ps_enc_ctxt->s_multi_thrd.apv_mutex_handle[i]);
+
+ osal_mutex_destroy(ps_enc_ctxt->s_multi_thrd.apv_mutex_handle_me_end[i]);
+ }
+
+ for(i = 0; i < MAX_NUM_ENC_LOOP_PARALLEL; i++)
+ {
+ osal_mutex_destroy(ps_enc_ctxt->s_multi_thrd.apv_mutex_handle_frame_init[i]);
+
+ osal_mutex_destroy(ps_enc_ctxt->s_multi_thrd.apv_post_enc_mutex_handle[i]);
+ }
+
+ /* destroy the mutex allocated for job queue, init and de-init
+ usage pre enocde group */
+ osal_mutex_destroy(ps_enc_ctxt->s_multi_thrd.pv_job_q_mutex_hdl_pre_enc_decomp);
+ osal_mutex_destroy(ps_enc_ctxt->s_multi_thrd.pv_job_q_mutex_hdl_pre_enc_hme);
+ osal_mutex_destroy(ps_enc_ctxt->s_multi_thrd.pv_job_q_mutex_hdl_pre_enc_l0ipe);
+ osal_mutex_destroy(ps_enc_ctxt->s_multi_thrd.pv_mutex_hdl_pre_enc_init);
+ osal_mutex_destroy(ps_enc_ctxt->s_multi_thrd.pv_mutex_hdl_pre_enc_decomp_deinit);
+ osal_mutex_destroy(ps_enc_ctxt->s_multi_thrd.pv_mutex_hdl_pre_enc_hme_init);
+ osal_mutex_destroy(ps_enc_ctxt->s_multi_thrd.pv_mutex_hdl_pre_enc_hme_deinit);
+ osal_mutex_destroy(ps_enc_ctxt->s_multi_thrd.pv_mutex_hdl_l0_ipe_init);
+ osal_mutex_destroy(ps_enc_ctxt->s_multi_thrd.pv_mutex_hdl_pre_enc_deinit);
+
+ /* destroy the EncLoop Module */
+ /* Note : Only Destroys the resources allocated in the module like */
+ /* semaphore,etc. Memory free is done separately using memtabs */
+ ihevce_enc_loop_delete(ps_enc_ctxt->s_module_ctxt.pv_enc_loop_ctxt);
+
+ /* destroy the Coarse ME Module */
+ /* Note : Only Destroys the resources allocated in the module like */
+ /* semaphore,etc. Memory free is done separately using memtabs */
+ ihevce_coarse_me_delete(
+ ps_enc_ctxt->s_module_ctxt.pv_coarse_me_ctxt,
+ ps_hle_ctxt->ps_static_cfg_prms,
+ ps_enc_ctxt->i4_resolution_id);
+ /* destroy semaphores for all the threads in pre-enc and enc */
+ for(ctr = 0; ctr < ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds; ctr++)
+ {
+ osal_sem_destroy(ps_enc_ctxt->s_multi_thrd.apv_enc_thrd_sem_handle[ctr]);
+ }
+
+ for(ctr = 0; ctr < ps_enc_ctxt->s_multi_thrd.i4_num_pre_enc_proc_thrds; ctr++)
+ {
+ osal_sem_destroy(ps_enc_ctxt->s_multi_thrd.apv_pre_enc_thrd_sem_handle[ctr]);
+ }
+
+ /* destroy the ME-EncLoop Dep Mngr */
+ /* Note : Only Destroys the resources allocated in the module like */
+ /* semaphore,etc. Memory free is done separately using memtabs */
+ for(ctr = 0; ctr < NUM_ME_ENC_BUFS; ctr++)
+ {
+ ihevce_dmgr_del(ps_enc_ctxt->s_multi_thrd.apv_dep_mngr_encloop_dep_me[ctr]);
+ }
+ /* destroy the Prev. frame EncLoop Done Dep Mngr */
+ /* Note : Only Destroys the resources allocated in the module like */
+ /* semaphore,etc. Memory free is done separately using memtabs */
+ for(i = 0; i < ps_enc_ctxt->s_multi_thrd.i4_num_enc_loop_frm_pllel; i++)
+ {
+ ihevce_dmgr_del(ps_enc_ctxt->s_multi_thrd.apv_dep_mngr_prev_frame_done[i]);
+ }
+ /* destroy the Prev. frame EncLoop Done for re-encode Dep Mngr */
+ ihevce_dmgr_del(ps_enc_ctxt->s_multi_thrd.pv_dep_mngr_prev_frame_enc_done_for_reenc);
+
+ /* destroy the Prev. frame ME Done Dep Mngr */
+ /* Note : Only Destroys the resources allocated in the module like */
+ /* semaphore,etc. Memory free is done separately using memtabs */
+ for(i = 0; i < ps_enc_ctxt->s_multi_thrd.i4_num_me_frm_pllel; i++)
+ {
+ ihevce_dmgr_del(ps_enc_ctxt->s_multi_thrd.apv_dep_mngr_prev_frame_me_done[i]);
+ }
+
+ /* destroy the Prev. frame PreEnc L1 Done Dep Mngr */
+ /* Note : Only Destroys the resources allocated in the module like */
+ /* semaphore,etc. Memory free is done separately using memtabs */
+ ihevce_dmgr_del(ps_enc_ctxt->s_multi_thrd.pv_dep_mngr_prev_frame_pre_enc_l1);
+
+ /* destroy the Prev. frame PreEnc HME Done Dep Mngr */
+ /* Note : Only Destroys the resources allocated in the module like */
+ /* semaphore,etc. Memory free is done separately using memtabs */
+ ihevce_dmgr_del(ps_enc_ctxt->s_multi_thrd.pv_dep_mngr_prev_frame_pre_enc_coarse_me);
+
+ /* destroy the Prev. frame PreEnc L0 Done Dep Mngr */
+ /* Note : Only Destroys the resources allocated in the module like */
+ /* semaphore,etc. Memory free is done separately using memtabs */
+ ihevce_dmgr_del(ps_enc_ctxt->s_multi_thrd.pv_dep_mngr_prev_frame_pre_enc_l0);
+
+ /* destroy the ME-Prev Recon Dep Mngr */
+ /* Note : Only Destroys the resources allocated in the module like */
+ /* semaphore,etc. Memory free is done separately using memtabs */
+ for(ctr = 0; ctr < ps_enc_ctxt->ai4_num_buf_recon_q[0]; ctr++)
+ {
+ ihevce_dmgr_del(ps_enc_ctxt->pps_recon_buf_q[0][ctr]->pv_dep_mngr_recon);
+ }
+
+ /* destroy all the mutex created */
+ if(res_ctr == 0)
+ {
+ if(NULL != ps_enc_ctxt->s_enc_ques.pv_q_mutex_hdl)
+ {
+ osal_mutex_destroy(ps_enc_ctxt->s_enc_ques.pv_q_mutex_hdl);
+ }
+ }
+
+ if(NULL != ps_enc_ctxt->pv_rc_mutex_lock_hdl)
+ {
+ osal_mutex_destroy(ps_enc_ctxt->pv_rc_mutex_lock_hdl);
+ }
+
+ if(NULL != ps_enc_ctxt->s_multi_thrd.pv_sub_pic_rc_mutex_lock_hdl)
+ {
+ osal_mutex_destroy(ps_enc_ctxt->s_multi_thrd.pv_sub_pic_rc_mutex_lock_hdl);
+ }
+
+ if(NULL != ps_enc_ctxt->s_multi_thrd.pv_sub_pic_rc_for_qp_update_mutex_lock_hdl)
+ {
+ osal_mutex_destroy(
+ ps_enc_ctxt->s_multi_thrd.pv_sub_pic_rc_for_qp_update_mutex_lock_hdl);
+ }
+
+ /* call the memrory free function */
+ ihevce_mem_manager_free(ps_enc_ctxt, ps_hle_ctxt);
+ if((1 == ps_hle_ctxt->ps_static_cfg_prms->s_tgt_lyr_prms.i4_mres_single_out) &&
+ (res_ctr == 0))
+ {
+ s_memtab.i4_mem_size = sizeof(WORD32) * IHEVCE_MAX_NUM_RESOLUTIONS;
+ s_memtab.i4_mem_alignment = 4;
+ s_memtab.i4_size = sizeof(iv_mem_rec_t);
+ s_memtab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+ s_memtab.pv_base = ps_enc_ctxt->s_multi_thrd.pi4_active_res_id;
+ /* free active_res_id memory */
+ ps_hle_ctxt->ihevce_mem_free(ps_hle_ctxt->pv_mem_mgr_hdl, &s_memtab);
+ }
+ if(res_ctr == (i4_num_resolutions - 1))
+ {
+ s_memtab.i4_mem_size = sizeof(ihevce_static_cfg_params_t);
+ s_memtab.i4_mem_alignment = 4;
+ s_memtab.i4_size = sizeof(iv_mem_rec_t);
+ s_memtab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+ s_memtab.pv_base = ps_enc_ctxt->ps_stat_prms;
+
+ /* free the encoder context pointer */
+ ps_hle_ctxt->ihevce_mem_free(ps_hle_ctxt->pv_mem_mgr_hdl, &s_memtab);
+ }
+ s_memtab.i4_mem_size = sizeof(enc_ctxt_t);
+ s_memtab.i4_mem_alignment = 4;
+ s_memtab.i4_size = sizeof(iv_mem_rec_t);
+ s_memtab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+ s_memtab.pv_base = ps_enc_ctxt;
+
+ /* free the encoder context pointer */
+ ps_hle_ctxt->ihevce_mem_free(ps_hle_ctxt->pv_mem_mgr_hdl, &s_memtab);
+
+ /* reset the encoder handle to NULL */
+ ps_hle_ctxt->apv_enc_hdl[res_ctr] = NULL;
+ }
+ /* profile end */
+ PROFILE_END(&ps_hle_ctxt->profile_hle, "hle interface thread active time");
+ for(res_ctr = 0; res_ctr < i4_num_resolutions; res_ctr++)
+ {
+ WORD32 i4_br_id;
+
+ PROFILE_END(&ps_hle_ctxt->profile_pre_enc[res_ctr], "pre enc process");
+ for(i4_br_id = 0; i4_br_id < ai4_num_bitrate_instances[res_ctr]; i4_br_id++)
+ {
+ PROFILE_END(&ps_hle_ctxt->profile_enc[res_ctr][i4_br_id], "enc process");
+ PROFILE_END(&ps_hle_ctxt->profile_entropy[res_ctr][i4_br_id], "entropy process");
+ }
+ }
+
+ /* OSAL Delete */
+ ihevce_osal_delete((void *)ps_hle_ctxt);
+
+ return (IV_SUCCESS);
+}
diff --git a/encoder/ihevce_hle_interface.h b/encoder/ihevce_hle_interface.h
new file mode 100644
index 0000000..5aadef4
--- /dev/null
+++ b/encoder/ihevce_hle_interface.h
@@ -0,0 +1,435 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_hle_interface.h
+*
+* \brief
+* This file contains infertace prototypes of High level enocder interafce
+* structure and interface functions.
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_HLE_INTERFACE_H_
+#define _IHEVCE_HLE_INTERFACE_H_
+
+#include "ihevce_profile.h"
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define IHEVCE_DLL
+
+#define DUMP_MBR_MULTI_RES_INFO 0
+
+#define DUMP_RC_2_PASS_DATA_BINARY_APP 1
+/*print attributes */
+
+/*print everything on console */
+#define PRINTF(v, x, y, ...) ps_sys_api->ihevce_printf(v, __VA_ARGS__)
+
+#define FPRINTF(v, fp, x, y, ...) \
+ if(NULL != fp) \
+ { \
+ ps_sys_api->s_file_io_api.ihevce_fprintf(v, fp, __VA_ARGS__); \
+ }
+
+/* Semaphore attribute */
+#define SEM_START_VALUE 1
+#define THREAD_STACK_SIZE 0x80000
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+typedef enum
+{
+ BUFF_QUE_NON_BLOCKING_MODE = 0,
+
+ BUFF_QUE_BLOCKING_MODE
+
+} BUFF_QUE_MODES_T;
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+/**
+ * @brief Structure to describe Process interface parameters of Encoder
+ */
+typedef struct
+{
+ /**
+ * Size for version tracking purpose
+ */
+ WORD32 i4_size;
+
+ /**
+ * Flag to communicate that HLE thread int are done
+ */
+ WORD32 i4_hle_init_done;
+
+ /**
+ * Error code communciate any error during create stage
+ */
+ WORD32 i4_error_code;
+
+ /**
+ * GPU memory accumalator
+ */
+ WORD32 i4_gpu_mem_size;
+
+ /**
+ * OSAL handle
+ */
+ void *pv_osal_handle;
+
+ /**
+ * Encoder Handle
+ */
+ void *apv_enc_hdl[IHEVCE_MAX_NUM_RESOLUTIONS];
+
+ /**
+ * Static parameters structure
+ */
+ ihevce_static_cfg_params_t *ps_static_cfg_prms;
+
+ /**
+ * Memory Manager handle
+ */
+ void *pv_mem_mgr_hdl;
+
+ /**
+ * Input Buffer callback handle
+ */
+ void *pv_inp_cb_handle;
+
+ /**
+ * Ouput Buffer callback handle
+ */
+ void *pv_out_cb_handle;
+
+ /**
+ * Ouput Recon Buffer callback handle
+ */
+ void *pv_recon_cb_handle;
+
+ /**
+ * Call back API to be called while the buffer for bitstream filling is done
+ */
+ IV_API_CALL_STATUS_T (*ihevce_output_strm_fill_done)
+ (void *pv_out_cb_handle, void *pv_curr_out, WORD32 i4_bitrate_instance, WORD32 i4_res_instance);
+
+ /**
+ * Call back API to be called while the buffer for recon filling is done
+ */
+ IV_API_CALL_STATUS_T (*ihevce_output_recon_fill_done)
+ (void *pv_recon_cb_handle,
+ void *pv_curr_out,
+ WORD32 i4_bitrate_instance,
+ WORD32 i4_res_instance);
+
+ /**
+ * Call back API to be called while freeing the input buffer
+ */
+ IV_API_CALL_STATUS_T (*ihevce_set_free_input_buff)
+ (void *pv_inp_cb_handle, iv_input_data_ctrl_buffs_t *ps_input_buf);
+
+ /**
+ * Call back API to be called during allocation using memory manager
+ */
+ void (*ihevce_mem_alloc)(
+ void *pv_mem_mgr_hdl, ihevce_sys_api_t *ps_sys_api, iv_mem_rec_t *ps_memtab);
+
+ /**
+ * Call back API for freeing using memory manager
+ */
+ void (*ihevce_mem_free)(void *pv_mem_mgr_hdl, iv_mem_rec_t *ps_memtab);
+
+ /* create or run time input buffer allocation, 1: create time 0: run time*/
+ WORD32 i4_create_time_input_allocation;
+
+ /* create or run time output buffer allocation, 1: create time 0: run time*/
+ WORD32 i4_create_time_output_allocation;
+
+ /*Cores per resolution*/
+ WORD32 ai4_num_core_per_res[IHEVCE_MAX_NUM_RESOLUTIONS];
+
+ /**
+ * Error Handling callback handle
+ */
+ void *pv_cmd_err_cb_handle;
+
+ /**
+ * Call back API to be called when errors need to be reported
+ */
+ IV_API_CALL_STATUS_T (*ihevce_cmds_error_report)
+ (void *pv_cmd_err_cb_handle, WORD32 i4_error_code, WORD32 i4_cmd_type, WORD32 i4_buf_id);
+
+ /**
+ * Flag to indicate if ECU is enabled/disabled
+ */
+ WORD32 i4_p6_opt_enabled;
+
+ /**
+ * profile stats
+ */
+ profile_database_t profile_hle;
+ profile_database_t profile_pre_enc[IHEVCE_MAX_NUM_RESOLUTIONS];
+ profile_database_t profile_enc[IHEVCE_MAX_NUM_RESOLUTIONS][IHEVCE_MAX_NUM_BITRATES];
+ profile_database_t profile_entropy[IHEVCE_MAX_NUM_RESOLUTIONS][IHEVCE_MAX_NUM_BITRATES];
+
+} ihevce_hle_ctxt_t;
+
+/**
+******************************************************************************
+ * @brief Indivisual Thread context structure
+******************************************************************************
+ */
+typedef struct
+{
+ /** Unique Id associated with every frame processing thread */
+ WORD32 i4_thrd_id;
+
+ /** pointer to encoder context structure */
+ void *pv_enc_ctxt;
+
+ /** pointer to the hle context structure */
+ ihevce_hle_ctxt_t *ps_hle_ctxt;
+
+} frm_proc_thrd_ctxt_t;
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+/** Create API
+ *
+ * ps_hle_ctxt : Pointer to high level encoder context.
+ * It is assumed that application before calling this API
+ * has initialized with correct pointers for following
+ * - pv_osal_handle
+ * - pv_app_sem_handle
+ * - ps_static_cfg_prms
+ * - ihevce_mem_alloc
+ * - ihevce_mem_free
+ *
+ * Encoder after initilaization would store the encoder handle in
+ * - pv_enc_hdl
+ *
+ * Create Return status (success or fail) is returned
+ */
+IHEVCE_DLL IV_API_CALL_STATUS_T ihevce_hle_interface_create(ihevce_hle_ctxt_t *ps_hle_ctxt);
+
+/** Query IO buffers requirements API
+ *
+ * ps_hle_ctxt : Pointer to high level encoder context.
+ * ps_input_bufs_req : memory to store input buffer requirements
+ * ps_output_bufs_req : memory to store output buffer requirements
+ *
+ * Should be called only after a sucessfull create of codec instance
+ *
+ * Return status (success or fail) is returned
+ */
+IHEVCE_DLL IV_API_CALL_STATUS_T ihevce_query_io_buf_req(
+ ihevce_hle_ctxt_t *ps_hle_ctxt,
+ iv_input_bufs_req_t *ps_input_bufs_req,
+ iv_res_layer_output_bufs_req_t *ps_res_layer_output_bufs_req,
+ iv_res_layer_recon_bufs_req_t *ps_res_layer_recon_bufs_req);
+
+/** Create buffer ports for procesing API
+ *
+ * ps_hle_ctxt : Pointer to high level encoder context.
+ * ps_input_data_ctrl_buffs_desc :
+ * Pointer to Input (data/control) buffers details memory
+ * ps_input_asynch_ctrl_buffs_desc :
+ * Pointer to Input async control buffers details memory
+ * ps_output_data_buffs_desc :
+ * Pointer to output data buffers details memory
+ * ps_output_status_buffs_desc:
+ * Pointer to outtput async control buffers details memory
+ *
+ * Return status (success or fail) is returned
+ */
+IHEVCE_DLL IV_API_CALL_STATUS_T ihevce_create_ports(
+ ihevce_hle_ctxt_t *ps_hle_ctxt,
+ iv_input_data_ctrl_buffs_desc_t *ps_input_data_ctrl_buffs_desc,
+ iv_input_asynch_ctrl_buffs_desc_t *ps_input_asynch_ctrl_buffs_desc,
+ iv_res_layer_output_data_buffs_desc_t *ps_mres_output_data_buffs_desc,
+ iv_res_layer_recon_data_buffs_desc_t *ps_mres_recon_data_buffs_desc);
+
+/** Processing interface layer thread API
+ *
+ * This is the entry point for this thread
+ * pointer to ihevce_hle_ctxt_t has to be passed
+ * to this function as the argument
+ *
+ * return should be a exit code (0)
+ */
+IHEVCE_DLL WORD32 ihevce_hle_interface_thrd(void *pv_proc_intf_ctxt);
+
+/** Get version API
+ *
+ * This is API to return the version number of the encoder
+ *
+ * returns the version number string
+ */
+IHEVCE_DLL const char *ihevce_get_encoder_version(void);
+
+/** Validate Encoder parameters
+ *
+ * This is API to return the version number of the encoder
+ *
+ * returns the version number string
+ */
+IHEVCE_DLL WORD32 ihevce_validate_encoder_parameters(ihevce_static_cfg_params_t *ps_static_cfg_prms);
+
+/** Get free input frame data buffer API
+ *
+ * ps_hle_ctxt : Pointer to high level encoder context.
+ * pi4_buff_id : pointer to store the buffer id of the buffer returned.
+ * i4_blocking_mode : Blocking mode to control if the the API should wait
+ * for a free buffer to be available and then
+ * return with a valid buffer @sa BUFF_QUE_MODES_T
+ * returns NULL if no free buffer is present in queue (if non blocking mode)
+ */
+IHEVCE_DLL void *ihevce_q_get_free_inp_data_buff(
+ ihevce_hle_ctxt_t *ps_hle_ctxt, WORD32 *pi4_buff_id, WORD32 i4_blocking_mode);
+
+/** Get free input control data buffer API
+ *
+ * ps_hle_ctxt : Pointer to high level encoder context.
+ * pi4_buff_id : pointer to store the buffer id of the buffer returned.
+ * i4_blocking_mode : Blocking mode to control if the the API should wait
+ * for a free buffer to be available and then
+ * return with a valid buffer @sa BUFF_QUE_MODES_T
+ * returns NULL if no free buffer is present in queue (if non blocking mode)
+ */
+IHEVCE_DLL void *ihevce_q_get_free_inp_ctrl_buff(
+ ihevce_hle_ctxt_t *ps_hle_ctxt, WORD32 *pi4_buff_id, WORD32 i4_blocking_mode);
+
+IHEVCE_DLL void *ihevce_q_get_free_out_strm_buff(
+ ihevce_hle_ctxt_t *ps_hle_ctxt,
+ WORD32 *pi4_buff_id,
+ WORD32 i4_blocking_mode,
+ WORD32 i4_bitrate_instance,
+ WORD32 i4_res_instance);
+
+IHEVCE_DLL void *ihevce_q_get_free_out_recon_buff(
+ ihevce_hle_ctxt_t *ps_hle_ctxt,
+ WORD32 *pi4_buff_id,
+ WORD32 i4_blocking_mode,
+ WORD32 i4_bitrate_instance,
+ WORD32 i4_res_instance);
+
+/** Set Input frame data buffer as produced API
+ *
+ * ps_hle_ctxt : Pointer to high level encoder context.
+ * i4_buff_id : buffer id of the buffer returned during get free buf.
+ */
+IHEVCE_DLL IV_API_CALL_STATUS_T
+ ihevce_q_set_inp_data_buff_prod(ihevce_hle_ctxt_t *ps_hle_ctxt, WORD32 i4_buff_id);
+
+/** Set Input control data buffer as produced API
+ *
+ * ps_hle_ctxt : Pointer to high level encoder context.
+ * i4_buff_id : buffer id of the buffer returned during get free buf.
+ */
+IHEVCE_DLL IV_API_CALL_STATUS_T
+ ihevce_q_set_inp_ctrl_buff_prod(ihevce_hle_ctxt_t *ps_hle_ctxt, WORD32 i4_buff_id);
+
+IHEVCE_DLL IV_API_CALL_STATUS_T ihevce_q_set_out_strm_buff_prod(
+ ihevce_hle_ctxt_t *ps_hle_ctxt,
+ WORD32 i4_buff_id,
+ WORD32 i4_bitrate_instance_id,
+ WORD32 i4_resolution_id);
+
+IHEVCE_DLL IV_API_CALL_STATUS_T ihevce_q_set_out_recon_buff_prod(
+ ihevce_hle_ctxt_t *ps_hle_ctxt,
+ WORD32 i4_buff_id,
+ WORD32 i4_bitrate_instance_id,
+ WORD32 i4_resolution_id);
+
+/** Get next filled recon data buffer API
+ *
+ * ps_hle_ctxt : Pointer to high level encoder context.
+ * pi4_buff_id : pointer to store the buffer id of the buffer returned.
+ * i4_blocking_mode : Blocking mode to control if the the API should wait
+ * for a produced buffer to be available and then
+ * return with a valid buffer @sa BUFF_QUE_MODES_T
+ * returns NULL if no produced buffer is present in queue (if non blocking mode)
+ */
+IHEVCE_DLL void *ihevce_q_get_filled_recon_buff(
+ ihevce_hle_ctxt_t *ps_hle_ctxt,
+ WORD32 *pi4_buff_id,
+ WORD32 i4_blocking_mode,
+ WORD32 i4_bitrate_instance_id,
+ WORD32 i4_resolution_id);
+
+/** Release/ Free recon buffer buffer API
+ *
+ * ps_hle_ctxt : Pointer to high level encoder context.
+ * i4_buff_id : buffer id of the buffer returned during get next buf.
+ */
+IHEVCE_DLL IV_API_CALL_STATUS_T ihevce_q_rel_recon_buf(
+ ihevce_hle_ctxt_t *ps_hle_ctxt,
+ WORD32 i4_buff_id,
+ WORD32 i4_bitrate_instance_id,
+ WORD32 i4_resolution_id);
+
+/** Delete API
+ *
+ * Should be called only after the high level encoder thread exits or returns
+ */
+IHEVCE_DLL IV_API_CALL_STATUS_T ihevce_hle_interface_delete(ihevce_hle_ctxt_t *ps_hle_ctxt);
+
+/** Trace API
+ *
+ * Open and Close trace file pointer.
+ */
+IHEVCE_DLL WORD32 ihevce_trace_init(UWORD8 *pu1_file_name);
+
+IHEVCE_DLL WORD32 ihevce_trace_deinit(void);
+
+/** Header API
+ *
+ * Get sequence headers asynchronously
+ */
+WORD32 ihevce_entropy_encode_header(
+ ihevce_hle_ctxt_t *ps_hle_ctxt, WORD32 i4_bitrate_instance_id, WORD32 i4_resolution_id);
+
+#endif /* _IHEVCE_HLE_INTERFACE_H_ */
diff --git a/encoder/ihevce_hle_q_func.c b/encoder/ihevce_hle_q_func.c
new file mode 100644
index 0000000..eab3350
--- /dev/null
+++ b/encoder/ihevce_hle_q_func.c
@@ -0,0 +1,659 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+******************************************************************************
+* \file ihevce_hle_que_func.c
+*
+* \brief
+* This file contains Que finction of Hehg level encoder
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+* List of Functions
+* <TODO: TO BE ADDED>
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_macros.h"
+#include "ihevc_debug.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+#include "ihevc_trans_tables.h"
+#include "ihevc_trans_macros.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_hle_interface.h"
+#include "ihevce_hle_q_func.h"
+#include "ihevce_buffer_que_interface.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_error_checks.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+
+#include "cast_types.h"
+#include "osal.h"
+#include "osal_defaults.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_q_get_free_buff \endif
+*
+* \brief
+* Gets a free buffer from the que requested
+*
+* \param[in] high level encoder context pointer
+* \param[in] Que id of the buffer
+* \param[in] pointer to return the buffer id
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void *ihevce_q_get_free_buff(
+ void *pv_enc_ctxt, WORD32 i4_q_id, WORD32 *pi4_buff_id, WORD32 i4_blocking_mode)
+{
+ /* local varaibles */
+ WORD32 end_flag = 0;
+ void *pv_buff = NULL;
+ WORD32 i4_mres_single_out;
+ enc_ctxt_t *ps_enc_ctxt = (enc_ctxt_t *)pv_enc_ctxt;
+ i4_mres_single_out = ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.i4_mres_single_out;
+
+ while(1 != end_flag)
+ {
+ /* acquire mutex lock */
+ osal_mutex_lock(ps_enc_ctxt->s_enc_ques.pv_q_mutex_hdl);
+
+ /* call the buffer api function */
+ pv_buff =
+ ihevce_buff_que_get_free_buf(ps_enc_ctxt->s_enc_ques.apv_q_hdl[i4_q_id], pi4_buff_id);
+
+ /* release mutex lock */
+ osal_mutex_unlock(ps_enc_ctxt->s_enc_ques.pv_q_mutex_hdl);
+
+ /* if no free buffer is available */
+ if(NULL == pv_buff)
+ {
+ /* check if the mode is blocking */
+ if(BUFF_QUE_BLOCKING_MODE == i4_blocking_mode)
+ {
+ /* ------------------------------------------------- */
+ /* Get free buffers are called by producers */
+ /* these producers threads will be put in pend state */
+ /* ------------------------------------------------- */
+
+ /* choose the semaphore based on Que Id */
+ void *pv_sem_handle = NULL;
+
+ /* input data Que : application's input data processing */
+ /* thread is put to pend state */
+ if(IHEVCE_INPUT_DATA_CTRL_Q == i4_q_id)
+ {
+ pv_sem_handle = ps_enc_ctxt->s_thrd_sem_ctxt.pv_inp_data_sem_handle;
+ }
+
+ /* input ctrl Que : application's input ctrl processing */
+ /* thread is put to pend state */
+ if(IHEVCE_INPUT_ASYNCH_CTRL_Q == i4_q_id)
+ {
+ pv_sem_handle = ps_enc_ctxt->s_thrd_sem_ctxt.pv_inp_ctrl_sem_handle;
+ }
+
+ if(IHEVCE_ENC_INPUT_Q == i4_q_id)
+ {
+ pv_sem_handle = ps_enc_ctxt->s_thrd_sem_ctxt.pv_lap_inp_data_sem_hdl;
+ }
+
+ /* Output data Que : Output thread is put to pend state */
+ if(IHEVCE_OUTPUT_DATA_Q == i4_q_id)
+ {
+ if(1 == i4_mres_single_out)
+ {
+ pv_sem_handle = ps_enc_ctxt->s_thrd_sem_ctxt.pv_out_common_mres_sem_hdl;
+ }
+ else
+ {
+ pv_sem_handle = ps_enc_ctxt->s_thrd_sem_ctxt.apv_out_strm_sem_handle[0];
+ }
+ }
+ /* Recon data Que : Recon thread is put to pend state */
+ if(IHEVCE_RECON_DATA_Q == i4_q_id)
+ {
+ pv_sem_handle = ps_enc_ctxt->s_thrd_sem_ctxt.apv_out_recon_sem_handle[0];
+ }
+ /* frm prs ent cod data Que : frame process is put to pend state */
+ if(IHEVCE_FRM_PRS_ENT_COD_Q == i4_q_id)
+ {
+ pv_sem_handle = ps_enc_ctxt->s_thrd_sem_ctxt.pv_enc_frm_proc_sem_handle;
+ }
+ /* Pre encode/ encode data Que : pre enocde is put to pend state */
+ if(IHEVCE_PRE_ENC_ME_Q == i4_q_id)
+ {
+ pv_sem_handle = ps_enc_ctxt->s_thrd_sem_ctxt.pv_pre_enc_frm_proc_sem_handle;
+ }
+ /* ME/ENC Que : enc frame proc is put to pend state */
+ if(IHEVCE_ME_ENC_RDOPT_Q == i4_q_id)
+ {
+ pv_sem_handle = ps_enc_ctxt->s_thrd_sem_ctxt.pv_enc_frm_proc_sem_handle;
+ }
+ if(IHEVCE_L0_IPE_ENC_Q == i4_q_id)
+ {
+ pv_sem_handle = ps_enc_ctxt->s_thrd_sem_ctxt.pv_pre_enc_frm_proc_sem_handle;
+ }
+ /* output status queue should be used by both LAP and Frame
+ process in non blocking mode */
+ if(IHEVCE_OUTPUT_STATUS_Q == i4_q_id)
+ {
+ ASSERT(0);
+ }
+
+ /* go the pend state */
+ osal_sem_wait(pv_sem_handle);
+ }
+ /* if non blocking then return NULL and break from loop */
+ else
+ {
+ end_flag = 1;
+ }
+ }
+ /* if valid free buffer is available then break from loop */
+ else
+ {
+ end_flag = 1;
+ }
+ }
+
+ return (pv_buff);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_q_set_buff_prod \endif
+*
+* \brief
+* Sets the buffer as produced in the que requested
+*
+* \param[in] high level encoder context pointer
+* \param[in] Que id of the buffer
+* \param[in] buffer id which needs to be set as produced
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+IV_API_CALL_STATUS_T ihevce_q_set_buff_prod(void *pv_enc_ctxt, WORD32 i4_q_id, WORD32 i4_buff_id)
+{
+ /* local varaibles */
+
+ WORD32 i4_num_users = 0;
+ WORD32 i4_mres_single_out;
+ enc_ctxt_t *ps_enc_ctxt = (enc_ctxt_t *)pv_enc_ctxt;
+ i4_mres_single_out = ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.i4_mres_single_out;
+
+ /* acquire mutex lock */
+ osal_mutex_lock(ps_enc_ctxt->s_enc_ques.pv_q_mutex_hdl);
+
+ /* call the buffer api function */
+ ihevce_buff_que_set_buf_prod(
+ ps_enc_ctxt->s_enc_ques.apv_q_hdl[i4_q_id], i4_buff_id, i4_num_users);
+
+ /* release mutex lock */
+ osal_mutex_unlock(ps_enc_ctxt->s_enc_ques.pv_q_mutex_hdl);
+
+ /* ------------------------------------------------------------- */
+ /* after setting the buffer the consumers thread needs to be */
+ /* posted in case if that thread is in wait state */
+ /* currently this post is done unconditionally */
+ /* ------------------------------------------------------------- */
+
+ /* input command que : LAP & Frame process threads needs to posted */
+ if(IHEVCE_INPUT_ASYNCH_CTRL_Q == i4_q_id)
+ {
+ osal_sem_post(ps_enc_ctxt->s_thrd_sem_ctxt.pv_lap_sem_handle);
+ }
+
+ /* input data que : LAP thread needs to posted */
+ if(IHEVCE_INPUT_DATA_CTRL_Q == i4_q_id)
+ {
+ osal_sem_post(ps_enc_ctxt->s_thrd_sem_ctxt.pv_lap_sem_handle);
+ }
+
+ /* output stream data que : Entropy processing thread needs to posted */
+ if(IHEVCE_OUTPUT_DATA_Q == i4_q_id)
+ {
+ WORD32 i4_entropy_thrd_id;
+ WORD32 i4_bufque_id;
+
+ i4_bufque_id = (i4_q_id - IHEVCE_OUTPUT_DATA_Q);
+ i4_entropy_thrd_id = i4_bufque_id;
+
+ if(i4_bufque_id == 0)
+ {
+ i4_entropy_thrd_id = ps_enc_ctxt->i4_ref_mbr_id;
+ }
+ else if(i4_bufque_id == ps_enc_ctxt->i4_ref_mbr_id)
+ {
+ i4_entropy_thrd_id = 0;
+ }
+
+ if(IHEVCE_OUTPUT_DATA_Q == i4_q_id)
+ {
+ if(1 == i4_mres_single_out)
+ {
+ osal_sem_post(ps_enc_ctxt->s_thrd_sem_ctxt.pv_ent_common_mres_sem_hdl);
+ }
+ else
+ {
+ osal_sem_post(
+ ps_enc_ctxt->s_thrd_sem_ctxt.apv_ent_cod_sem_handle[i4_entropy_thrd_id]);
+ }
+ }
+ }
+
+ /* output recon data que : app's output data processing thread needs to posted */
+ if(IHEVCE_RECON_DATA_Q == i4_q_id)
+ {
+ osal_sem_post(ps_enc_ctxt->s_thrd_sem_ctxt.pv_enc_frm_proc_sem_handle);
+ }
+ /* output control que : app's output processing thread needs to posted */
+ if(IHEVCE_OUTPUT_STATUS_Q == i4_q_id)
+ {
+ osal_sem_post(ps_enc_ctxt->s_thrd_sem_ctxt.pv_out_ctrl_sem_handle);
+ }
+
+ /* frm process entropy que : entropy thread needs to posted */
+ if(IHEVCE_FRM_PRS_ENT_COD_Q == i4_q_id)
+ {
+ osal_sem_post(ps_enc_ctxt->s_thrd_sem_ctxt.apv_ent_cod_sem_handle[0]);
+ }
+ /* pre-encode/encode que : encode frame proc thread needs to posted */
+ if(IHEVCE_PRE_ENC_ME_Q == i4_q_id)
+ {
+ osal_sem_post(ps_enc_ctxt->s_thrd_sem_ctxt.pv_enc_frm_proc_sem_handle);
+ }
+ /* ME/ENC Que : enc frame proc needs to be posted */
+ if(IHEVCE_ME_ENC_RDOPT_Q == i4_q_id)
+ {
+ osal_sem_post(ps_enc_ctxt->s_thrd_sem_ctxt.pv_enc_frm_proc_sem_handle);
+ }
+ if(IHEVCE_L0_IPE_ENC_Q == i4_q_id)
+ {
+ osal_sem_post(ps_enc_ctxt->s_thrd_sem_ctxt.pv_enc_frm_proc_sem_handle);
+ }
+
+ if(IHEVCE_ENC_INPUT_Q == i4_q_id)
+ {
+ osal_sem_post(ps_enc_ctxt->s_thrd_sem_ctxt.pv_preenc_inp_data_sem_hdl);
+ }
+
+ return (IV_SUCCESS);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_q_get_filled_buff \endif
+*
+* \brief
+* Gets a next filled buffer from the que requested
+*
+* \param[in] high level encoder context pointer
+* \param[in] Que id of the buffer
+* \param[in] pointer to return the buffer id
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void *ihevce_q_get_filled_buff(
+ void *pv_enc_ctxt, WORD32 i4_q_id, WORD32 *pi4_buff_id, WORD32 i4_blocking_mode)
+{
+ /* local varaibles */
+ WORD32 end_flag = 0;
+ void *pv_buff = NULL;
+ WORD32 i4_mres_single_out;
+ enc_ctxt_t *ps_enc_ctxt = (enc_ctxt_t *)pv_enc_ctxt;
+ i4_mres_single_out = ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.i4_mres_single_out;
+
+ while(1 != end_flag)
+ {
+ /* acquire mutex lock */
+ osal_mutex_lock(ps_enc_ctxt->s_enc_ques.pv_q_mutex_hdl);
+
+ /* call the buffer api function */
+ pv_buff =
+ ihevce_buff_que_get_next_buf(ps_enc_ctxt->s_enc_ques.apv_q_hdl[i4_q_id], pi4_buff_id);
+
+ /* release mutex lock */
+ osal_mutex_unlock(ps_enc_ctxt->s_enc_ques.pv_q_mutex_hdl);
+
+ /* if no free buffer is available */
+ if(NULL == pv_buff)
+ {
+ /* check if the mode is blocking */
+ if(BUFF_QUE_BLOCKING_MODE == i4_blocking_mode)
+ {
+ /* ------------------------------------------------- */
+ /* Get filled buffers are called by consumers */
+ /* these consumer threads will be put in pend state */
+ /* ------------------------------------------------- */
+
+ /* choose the semaphore based on Que Id */
+ void *pv_sem_handle = NULL;
+
+ /* input data Que : LAP thread is put to pend state */
+ if(IHEVCE_INPUT_DATA_CTRL_Q == i4_q_id)
+ {
+ pv_sem_handle = ps_enc_ctxt->s_thrd_sem_ctxt.pv_lap_sem_handle;
+ }
+
+ /* input ctrl Que : LAP thread is put to pend state */
+ if(IHEVCE_INPUT_ASYNCH_CTRL_Q == i4_q_id)
+ {
+ pv_sem_handle = ps_enc_ctxt->s_thrd_sem_ctxt.pv_lap_sem_handle;
+ }
+
+ /* Output Stream data Que : Entropy processing */
+ /* thread is put to pend state */
+ if(IHEVCE_OUTPUT_DATA_Q == i4_q_id)
+ {
+ WORD32 i4_entropy_thrd_id;
+ WORD32 i4_bufque_id;
+
+ i4_bufque_id = (i4_q_id - IHEVCE_OUTPUT_DATA_Q);
+ i4_entropy_thrd_id = i4_bufque_id;
+
+ if(i4_bufque_id == 0)
+ {
+ i4_entropy_thrd_id = ps_enc_ctxt->i4_ref_mbr_id;
+ }
+ else if(i4_bufque_id == ps_enc_ctxt->i4_ref_mbr_id)
+ {
+ i4_entropy_thrd_id = 0;
+ }
+
+ if(IHEVCE_OUTPUT_DATA_Q == i4_q_id)
+ {
+ if(1 == i4_mres_single_out)
+ {
+ pv_sem_handle = ps_enc_ctxt->s_thrd_sem_ctxt.pv_ent_common_mres_sem_hdl;
+ }
+ else
+ {
+ pv_sem_handle = ps_enc_ctxt->s_thrd_sem_ctxt
+ .apv_ent_cod_sem_handle[i4_entropy_thrd_id];
+ }
+ }
+ }
+
+ /* Output Recon data Que : Frame processing */
+ /* thread is put to pend state */
+ if(IHEVCE_RECON_DATA_Q == i4_q_id)
+ {
+ pv_sem_handle = ps_enc_ctxt->s_thrd_sem_ctxt.pv_enc_frm_proc_sem_handle;
+ }
+ /* frm prs ent cod data Que : entropy thread is put to pend state */
+ if(IHEVCE_FRM_PRS_ENT_COD_Q == i4_q_id)
+ {
+ pv_sem_handle = ps_enc_ctxt->s_thrd_sem_ctxt.apv_ent_cod_sem_handle[0];
+ }
+ /* Output status Que : application's output processing */
+ /* thread is put to pend state */
+ if(IHEVCE_OUTPUT_STATUS_Q == i4_q_id)
+ {
+ pv_sem_handle = ps_enc_ctxt->s_thrd_sem_ctxt.pv_out_ctrl_sem_handle;
+ }
+
+ /* pre-encode/encode Que : encode frame proc thread */
+ if(IHEVCE_PRE_ENC_ME_Q == i4_q_id)
+ {
+ pv_sem_handle = ps_enc_ctxt->s_thrd_sem_ctxt.pv_enc_frm_proc_sem_handle;
+ }
+ /* ME/ENC Que : enc frame proc is put to pend state */
+ if(IHEVCE_ME_ENC_RDOPT_Q == i4_q_id)
+ {
+ pv_sem_handle = ps_enc_ctxt->s_thrd_sem_ctxt.pv_enc_frm_proc_sem_handle;
+ }
+ if(IHEVCE_L0_IPE_ENC_Q == i4_q_id)
+ {
+ pv_sem_handle = ps_enc_ctxt->s_thrd_sem_ctxt.pv_enc_frm_proc_sem_handle;
+ }
+ /* This call will be made from pre-enc enc thread, hence when input is not available the caller thread should go to pend */
+ if(IHEVCE_ENC_INPUT_Q == i4_q_id)
+ {
+ pv_sem_handle = ps_enc_ctxt->s_thrd_sem_ctxt.pv_preenc_inp_data_sem_hdl;
+ }
+
+ /* go the pend state */
+ osal_sem_wait(pv_sem_handle);
+ }
+ /* if non blocking then return NULL and break from loop */
+ else
+ {
+ end_flag = 1;
+ }
+ }
+ /* if valid filled buffer is available then break from loop */
+ else
+ {
+ end_flag = 1;
+ }
+ }
+
+ return (pv_buff);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_q_rel_buf \endif
+*
+* \brief
+* Frees the buffer as in the que requested
+*
+* \param[in] high level encoder context pointer
+* \param[in] Que id of the buffer
+* \param[in] buffer id which needs to be freed
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+IV_API_CALL_STATUS_T ihevce_q_rel_buf(void *pv_enc_ctxt, WORD32 i4_q_id, WORD32 i4_buff_id)
+{
+ /* local varaibles */
+ WORD32 i4_mres_single_out;
+ enc_ctxt_t *ps_enc_ctxt = (enc_ctxt_t *)pv_enc_ctxt;
+ i4_mres_single_out = ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.i4_mres_single_out;
+ /* acquire mutex lock */
+ osal_mutex_lock(ps_enc_ctxt->s_enc_ques.pv_q_mutex_hdl);
+
+ /* call the buffer api function */
+ ihevce_buff_que_rel_buf(ps_enc_ctxt->s_enc_ques.apv_q_hdl[i4_q_id], i4_buff_id);
+
+ /* release mutex lock */
+ osal_mutex_unlock(ps_enc_ctxt->s_enc_ques.pv_q_mutex_hdl);
+
+ /* ------------------------------------------------------------- */
+ /* after releasing the buffer the producer thread needs to be */
+ /* posted in case if that thread is in wait state */
+ /* currently this post is done unconditionally */
+ /* ------------------------------------------------------------- */
+
+ /* input data que : app's input data producing thread needs to posted */
+ if(IHEVCE_INPUT_DATA_CTRL_Q == i4_q_id)
+ {
+ osal_sem_post(ps_enc_ctxt->s_thrd_sem_ctxt.pv_inp_data_sem_handle);
+ }
+
+ /* input data control que : app's command que producing thread needs to posted */
+ if(IHEVCE_INPUT_ASYNCH_CTRL_Q == i4_q_id)
+ {
+ osal_sem_post(ps_enc_ctxt->s_thrd_sem_ctxt.pv_inp_ctrl_sem_handle);
+ }
+ /*multiple input queue*/
+ if(IHEVCE_ENC_INPUT_Q == i4_q_id)
+ {
+ osal_sem_post(ps_enc_ctxt->s_thrd_sem_ctxt.pv_lap_inp_data_sem_hdl);
+ }
+
+ /* output data que: Output thread needs to posted */
+ if(IHEVCE_OUTPUT_DATA_Q == i4_q_id)
+ {
+ if(1 == i4_mres_single_out)
+ {
+ osal_sem_post(ps_enc_ctxt->s_thrd_sem_ctxt.pv_out_common_mres_sem_hdl);
+ }
+ else
+ {
+ osal_sem_post(ps_enc_ctxt->s_thrd_sem_ctxt.apv_out_strm_sem_handle[0]);
+ }
+ }
+ /* Recon data que: Recon thread needs to posted */
+ if(IHEVCE_RECON_DATA_Q == i4_q_id)
+ {
+ osal_sem_post(ps_enc_ctxt->s_thrd_sem_ctxt.apv_out_recon_sem_handle[0]);
+ }
+ /* output status que: LAP & Frame process threads needs to posted */
+ if(IHEVCE_OUTPUT_STATUS_Q == i4_q_id)
+ {
+ osal_sem_post(ps_enc_ctxt->s_thrd_sem_ctxt.pv_lap_sem_handle);
+ osal_sem_post(ps_enc_ctxt->s_thrd_sem_ctxt.pv_pre_enc_frm_proc_sem_handle);
+ }
+
+ /* frm process entropy que : Frame process needs to posted */
+ if(IHEVCE_FRM_PRS_ENT_COD_Q == i4_q_id)
+ {
+ osal_sem_post(ps_enc_ctxt->s_thrd_sem_ctxt.pv_enc_frm_proc_sem_handle);
+ }
+ /* pre-encode/encode Que : pre-encode frame proc needs to be posted */
+ if(IHEVCE_PRE_ENC_ME_Q == i4_q_id)
+ {
+ osal_sem_post(ps_enc_ctxt->s_thrd_sem_ctxt.pv_pre_enc_frm_proc_sem_handle);
+ }
+ /* ME/ENC Que : enc frame proc needs to be posted */
+ if(IHEVCE_ME_ENC_RDOPT_Q == i4_q_id)
+ {
+ osal_sem_post(ps_enc_ctxt->s_thrd_sem_ctxt.pv_enc_frm_proc_sem_handle);
+ }
+ if(IHEVCE_L0_IPE_ENC_Q == i4_q_id)
+ {
+ osal_sem_post(ps_enc_ctxt->s_thrd_sem_ctxt.pv_pre_enc_frm_proc_sem_handle);
+ }
+ return (IV_SUCCESS);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_force_end \endif
+*
+* \brief
+* Sets force end flag in enc_ctxt for all resolutions
+*
+* \param[in] high level encoder context pointer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_force_end(ihevce_hle_ctxt_t *ps_hle_ctxt)
+{
+ enc_ctxt_t *ps_enc_ctxt;
+ WORD32 i4_resolution_id = 0;
+ WORD32 i4_num_res_layers = 0;
+ ps_enc_ctxt = (enc_ctxt_t *)ps_hle_ctxt->apv_enc_hdl[0];
+
+ i4_num_res_layers = ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.i4_num_res_layers;
+ for(i4_resolution_id = 0; i4_resolution_id < i4_num_res_layers; i4_resolution_id++)
+ {
+ ps_enc_ctxt = (enc_ctxt_t *)ps_hle_ctxt->apv_enc_hdl[i4_resolution_id];
+ ps_enc_ctxt->s_multi_thrd.i4_force_end_flag = 1;
+ }
+}
diff --git a/encoder/ihevce_hle_q_func.h b/encoder/ihevce_hle_q_func.h
new file mode 100644
index 0000000..540de6a
--- /dev/null
+++ b/encoder/ihevce_hle_q_func.h
@@ -0,0 +1,79 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_hle_q_func.h
+*
+* \brief
+* This file contains interface defination Que related functions
+* of high level encoder
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_HEL_Q_FUNC_H_
+#define _IHEVCE_HEL_Q_FUNC_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+void *ihevce_q_get_free_buff(
+ void *pv_enc_ctxt, WORD32 i4_q_id, WORD32 *pi4_buff_id, WORD32 i4_blocking_mode);
+
+IV_API_CALL_STATUS_T ihevce_q_set_buff_prod(void *pv_enc_ctxt, WORD32 i4_q_id, WORD32 i4_buff_id);
+
+void *ihevce_q_get_filled_buff(
+ void *pv_enc_ctxt, WORD32 i4_q_id, WORD32 *pi4_buff_id, WORD32 i4_blocking_mode);
+
+IV_API_CALL_STATUS_T ihevce_q_rel_buf(void *pv_enc_ctxt, WORD32 i4_q_id, WORD32 i4_buff_id);
+
+void ihevce_force_end(ihevce_hle_ctxt_t *ps_hle_ctxt);
+
+#endif /* _IHEVCE_HEL_Q_FUNC_H_ */
diff --git a/encoder/ihevce_inter_pred.c b/encoder/ihevce_inter_pred.c
new file mode 100644
index 0000000..190d803
--- /dev/null
+++ b/encoder/ihevce_inter_pred.c
@@ -0,0 +1,1330 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ihevce_inter_pred.c
+*
+* @brief
+* Contains funtions for giving out prediction samples for a given pu
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* - ihevc_inter_pred()
+*
+*
+*******************************************************************************
+*/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_debug.h"
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_inter_pred.h"
+#include "ihevc_weighted_pred.h"
+
+/*****************************************************************************/
+/* Global tables */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+* @brief Table of filter tap coefficients for HEVC luma inter prediction
+* input : sub pel mv position (dx/dy = 0:3)
+* output : filter coeffs to be used for that position
+*
+* @remarks See section 8.5.2.2.2.1 Luma sample interpolation process of HEVC
+******************************************************************************
+*/
+WORD8 gai1_hevc_luma_filter_taps[4][NTAPS_LUMA] = { { 0, 0, 0, 64, 0, 0, 0, 0 },
+ { -1, 4, -10, 58, 17, -5, 1, 0 },
+ { -1, 4, -11, 40, 40, -11, 4, -1 },
+ { 0, 1, -5, 17, 58, -10, 4, -1 } };
+
+/**
+******************************************************************************
+* @brief Table of filter tap coefficients for HEVC chroma inter prediction
+* input : chroma sub pel mv position (dx/dy = 0:7)
+* output : filter coeffs to be used for that position
+*
+* @remarks See section 8.5.2.2.2.2 Chroma sample interpolation process of HEVC
+The filter uses only the first four elements in each array
+******************************************************************************
+*/
+WORD8 gai1_hevc_chroma_filter_taps[8][NTAPS_CHROMA] = { { 0, 64, 0, 0 }, { -2, 58, 10, -2 },
+ { -4, 54, 16, -2 }, { -6, 46, 28, -4 },
+ { -4, 36, 36, -4 }, { -4, 28, 46, -6 },
+ { -2, 16, 54, -4 }, { -2, 10, 58, -2 } };
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* Performs Luma inter pred based on sub pel position dxdy and store the result
+* in a 16 bit destination buffer
+*
+* @param[in] pu1_src
+* pointer to the source correspoding to integer pel position of a mv (left and
+* top justified integer position)
+*
+* @param[out] pi2_dst
+* WORD16 pointer to the destination
+*
+* @param[in] src_strd
+* source buffer stride
+*
+* @param[in] dst_strd
+* destination buffer stride
+*
+* @param[in] pi2_hdst_scratch
+* scratch buffer for intermediate storage of horizontal filter output; used as
+* input for vertical filtering when sub pel components (dx != 0) && (dy != 0)
+*
+* Max scratch buffer required is w * (h + 7) * sizeof(WORD16)
+*
+* @param[in] ht
+* width of the prediction unit
+*
+* @param[in] wd
+* width of the prediction unit
+*
+* @param[in] dx
+* qpel position[0:3] of mv in x direction
+*
+* @param[in] dy
+* qpel position[0:3] of mv in y direction
+*
+* @returns
+* none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ihevce_luma_interpolate_16bit_dxdy(
+ UWORD8 *pu1_src,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD16 *pi2_hdst_scratch,
+ WORD32 ht,
+ WORD32 wd,
+ WORD32 dy,
+ WORD32 dx,
+ func_selector_t *ps_func_selector)
+{
+ if((0 == dx) && (0 == dy))
+ {
+ /*--------- full pel position : copy input by upscaling-------*/
+
+ ps_func_selector->ihevc_inter_pred_luma_copy_w16out_fptr(
+ pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[0][0], ht, wd);
+ }
+ else if((0 != dx) && (0 != dy))
+ {
+ /*----------sub pel in both x and y direction---------*/
+
+ UWORD8 *pu1_horz_src = pu1_src - (3 * src_strd);
+ WORD32 hdst_buf_stride = wd;
+ WORD16 *pi2_vert_src = pi2_hdst_scratch + (3 * hdst_buf_stride);
+
+ /* horizontal filtering of source done in a scratch buffer first */
+ ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr(
+ pu1_horz_src,
+ pi2_hdst_scratch,
+ src_strd,
+ hdst_buf_stride,
+ &gai1_hevc_luma_filter_taps[dx][0],
+ (ht + NTAPS_LUMA - 1),
+ wd);
+
+ /* vertical filtering on scratch buffer and stored in desitnation */
+ ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_w16out_fptr(
+ pi2_vert_src,
+ pi2_dst,
+ hdst_buf_stride,
+ dst_strd,
+ &gai1_hevc_luma_filter_taps[dy][0],
+ ht,
+ wd);
+ }
+ else if(0 == dy)
+ {
+ /*----------sub pel in x direction only ---------*/
+
+ ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr(
+ pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dx][0], ht, wd);
+ }
+ else /* if (0 == dx) */
+ {
+ /*----------sub pel in y direction only ---------*/
+
+ ps_func_selector->ihevc_inter_pred_luma_vert_w16out_fptr(
+ pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dy][0], ht, wd);
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Performs Luma inter pred based on sub pel position dxdy and store the result
+* in a 8 bit destination buffer
+*
+* @param[in] pu1_src
+* pointer to the source correspoding to integer pel position of a mv (left and
+* top justified integer position)
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* source buffer stride
+*
+* @param[in] dst_strd
+* destination buffer stride
+*
+* @param[in] pi2_hdst_scratch
+* scratch buffer for intermediate storage of horizontal filter output; used as
+* input for vertical filtering when sub pel components (dx != 0) && (dy != 0)
+*
+* Max scratch buffer required is w * (h + 7) * sizeof(WORD16)
+*
+* @param[in] ht
+* width of the prediction unit
+*
+* @param[in] wd
+* width of the prediction unit
+*
+* @param[in] dx
+* qpel position[0:3] of mv in x direction
+*
+* @param[in] dy
+* qpel position[0:3] of mv in y direction
+*
+* @returns
+* none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ihevce_luma_interpolate_8bit_dxdy(
+ UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD16 *pi2_hdst_scratch,
+ WORD32 ht,
+ WORD32 wd,
+ WORD32 dy,
+ WORD32 dx,
+ func_selector_t *ps_func_selector)
+{
+ if((0 == dx) && (0 == dy))
+ {
+ /*--------- full pel position : copy input as is -------*/
+
+ ps_func_selector->ihevc_inter_pred_luma_copy_fptr(
+ pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[0][0], ht, wd);
+ }
+ else if((0 != dx) && (0 != dy))
+ {
+ /*----------sub pel in both x and y direction---------*/
+
+ UWORD8 *pu1_horz_src = pu1_src - (3 * src_strd);
+ WORD32 hdst_buf_stride = wd;
+ WORD16 *pi2_vert_src = pi2_hdst_scratch + (3 * hdst_buf_stride);
+
+ /* horizontal filtering of source done in a scratch buffer first */
+ ps_func_selector->ihevc_inter_pred_luma_horz_w16out_fptr(
+ pu1_horz_src,
+ pi2_hdst_scratch,
+ src_strd,
+ hdst_buf_stride,
+ &gai1_hevc_luma_filter_taps[dx][0],
+ (ht + NTAPS_LUMA - 1),
+ wd);
+
+ /* vertical filtering on scratch buffer and stored in desitnation */
+ ps_func_selector->ihevc_inter_pred_luma_vert_w16inp_fptr(
+ pi2_vert_src,
+ pu1_dst,
+ hdst_buf_stride,
+ dst_strd,
+ &gai1_hevc_luma_filter_taps[dy][0],
+ ht,
+ wd);
+ }
+ else if(0 == dy)
+ {
+ /*----------sub pel in x direction only ---------*/
+
+ ps_func_selector->ihevc_inter_pred_luma_horz_fptr(
+ pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dx][0], ht, wd);
+ }
+ else /* if (0 == dx) */
+ {
+ /*----------sub pel in y direction only ---------*/
+
+ ps_func_selector->ihevc_inter_pred_luma_vert_fptr(
+ pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_luma_filter_taps[dy][0], ht, wd);
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Performs Luma prediction for a inter prediction unit(PU)
+*
+* @par Description:
+* For a given PU, Inter prediction followed by weighted prediction (if
+* required)
+*
+* @param[in] ps_inter_pred_ctxt
+* context for inter prediction; contains ref list, weight offsets, ctb offsets
+*
+* @param[in] ps_pu
+* pointer to PU structure whose inter prediction needs to be done
+*
+* @param[in] pu1_dst_buf
+* pointer to destination buffer where the inter prediction is done
+*
+* @param[in] dst_stride
+* pitch of the destination buffer
+*
+* @returns
+* IV_FAIL for mvs going outside ref frame padded limits
+* IV_SUCCESS after completing mc for given inter pu
+*
+* @remarks
+*
+*******************************************************************************
+*/
+IV_API_CALL_STATUS_T ihevce_luma_inter_pred_pu(
+ void *pv_inter_pred_ctxt,
+ pu_t *ps_pu,
+ void *pv_dst_buf,
+ WORD32 dst_stride,
+ WORD32 i4_flag_inter_pred_source)
+{
+ inter_pred_ctxt_t *ps_inter_pred_ctxt = (inter_pred_ctxt_t *)pv_inter_pred_ctxt;
+ func_selector_t *ps_func_selector = ps_inter_pred_ctxt->ps_func_selector;
+
+ WORD32 inter_pred_idc = ps_pu->b2_pred_mode;
+ UWORD8 *pu1_dst_buf = (UWORD8 *)pv_dst_buf;
+ WORD32 pu_wd = (ps_pu->b4_wd + 1) << 2;
+ WORD32 pu_ht = (ps_pu->b4_ht + 1) << 2;
+
+ WORD32 wp_flag = ps_inter_pred_ctxt->i1_weighted_pred_flag ||
+ ps_inter_pred_ctxt->i1_weighted_bipred_flag;
+
+ /* 16bit dest required for interpolate if weighted pred is on or bipred */
+ WORD32 store_16bit_output;
+
+ recon_pic_buf_t *ps_ref_pic_l0, *ps_ref_pic_l1;
+ UWORD8 *pu1_ref_pic, *pu1_ref_int_pel;
+ WORD32 ref_pic_stride;
+
+ /* offset of reference block in integer pel units */
+ WORD32 frm_x_ofst, frm_y_ofst;
+ WORD32 frm_x_pu, frm_y_pu;
+
+ /* scratch 16 bit buffers for interpolation in l0 and l1 direction */
+ WORD16 *pi2_scr_buf_l0 = &ps_inter_pred_ctxt->ai2_scratch_buf_l0[0];
+ WORD16 *pi2_scr_buf_l1 = &ps_inter_pred_ctxt->ai2_scratch_buf_l1[0];
+
+ /* scratch buffer for horizontal interpolation destination */
+ WORD16 *pi2_horz_scratch = &ps_inter_pred_ctxt->ai2_horz_scratch[0];
+
+ WORD32 wgt0, wgt1, off0, off1, shift, lvl_shift0, lvl_shift1;
+
+ /* get PU's frm x and frm y offset */
+ frm_x_pu = ps_inter_pred_ctxt->i4_ctb_frm_pos_x + (ps_pu->b4_pos_x << 2);
+ frm_y_pu = ps_inter_pred_ctxt->i4_ctb_frm_pos_y + (ps_pu->b4_pos_y << 2);
+
+ /* sanity checks */
+ ASSERT((wp_flag == 0) || (wp_flag == 1));
+ ASSERT(dst_stride >= pu_wd);
+ ASSERT(ps_pu->b1_intra_flag == 0);
+
+ lvl_shift0 = 0;
+ lvl_shift1 = 0;
+
+ if(wp_flag)
+ {
+ UWORD8 u1_is_wgt_pred_L0, u1_is_wgt_pred_L1;
+
+ if(inter_pred_idc != PRED_L1)
+ {
+ ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ps_pu->mv.i1_l0_ref_idx];
+ u1_is_wgt_pred_L0 = ps_ref_pic_l0->s_weight_offset.u1_luma_weight_enable_flag;
+ }
+ if(inter_pred_idc != PRED_L0)
+ {
+ ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ps_pu->mv.i1_l1_ref_idx];
+ u1_is_wgt_pred_L1 = ps_ref_pic_l1->s_weight_offset.u1_luma_weight_enable_flag;
+ }
+ if(inter_pred_idc == PRED_BI)
+ {
+ wp_flag = (u1_is_wgt_pred_L0 || u1_is_wgt_pred_L1);
+ }
+ else if(inter_pred_idc == PRED_L0)
+ {
+ wp_flag = u1_is_wgt_pred_L0;
+ }
+ else if(inter_pred_idc == PRED_L1)
+ {
+ wp_flag = u1_is_wgt_pred_L1;
+ }
+ else
+ {
+ /*other values are not allowed*/
+ assert(0);
+ }
+ }
+ store_16bit_output = (inter_pred_idc == PRED_BI) || (wp_flag);
+
+ if(inter_pred_idc != PRED_L1)
+ {
+ /*****************************************************/
+ /* L0 inter prediction */
+ /*****************************************************/
+
+ /* motion vecs in qpel precision */
+ WORD32 mv_x = ps_pu->mv.s_l0_mv.i2_mvx;
+ WORD32 mv_y = ps_pu->mv.s_l0_mv.i2_mvy;
+
+ /* sub pel offsets in x and y direction w.r.t integer pel */
+ WORD32 dx = mv_x & 0x3;
+ WORD32 dy = mv_y & 0x3;
+
+ /* ref idx is currently stored in the lower 4bits */
+ WORD32 ref_idx = (ps_pu->mv.i1_l0_ref_idx);
+
+ /* x and y integer offsets w.r.t frame start */
+ frm_x_ofst = (frm_x_pu + (mv_x >> 2));
+ frm_y_ofst = (frm_y_pu + (mv_y >> 2));
+
+ ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ref_idx];
+
+ /* picture buffer start and stride */
+ if(i4_flag_inter_pred_source == 1)
+ {
+ pu1_ref_pic = (UWORD8 *)ps_ref_pic_l0->s_yuv_buf_desc_src.pv_y_buf;
+ }
+ else
+ {
+ pu1_ref_pic = (UWORD8 *)ps_ref_pic_l0->s_yuv_buf_desc.pv_y_buf;
+ }
+ ref_pic_stride = ps_ref_pic_l0->s_yuv_buf_desc.i4_y_strd;
+
+ /* Error check for mvs going out of ref frame padded limits */
+ {
+ WORD32 min_x, max_x = ps_ref_pic_l0->s_yuv_buf_desc.i4_y_wd;
+ WORD32 min_y, max_y = ps_ref_pic_l0->s_yuv_buf_desc.i4_y_ht;
+
+ min_x =
+ -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT]
+ ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT] - 4)
+ : (PAD_HORZ - 4));
+
+ max_x += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT]
+ ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT] - 4)
+ : (PAD_HORZ - 4);
+
+ min_y =
+ -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP]
+ ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP] - 4)
+ : (PAD_VERT - 4));
+
+ max_y += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT]
+ ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT] - 4)
+ : (PAD_VERT - 4);
+
+ if((frm_x_ofst < min_x) || (frm_x_ofst + pu_wd) > max_x)
+ //ASSERT(0);
+ return (IV_FAIL);
+
+ if((frm_y_ofst < min_y) || (frm_y_ofst + pu_ht) > max_y)
+ //ASSERT(0);
+ return (IV_FAIL);
+ }
+
+ /* point to reference start location in ref frame */
+ /* Assuming clipping of mv is not required here as ME would */
+ /* take care of mv access not going beyond padded data */
+ pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst);
+
+ /* level shifted for subpel with both x and y componenet being non 0 */
+ /* this is because the interpolate function subtract this to contain */
+ /* the resulting data in 16 bits */
+ lvl_shift0 = (dx != 0) && (dy != 0) ? OFFSET14 : 0;
+
+ if(store_16bit_output)
+ {
+ /* do interpolation in 16bit L0 scratch buffer */
+ ihevce_luma_interpolate_16bit_dxdy(
+ pu1_ref_int_pel,
+ pi2_scr_buf_l0,
+ ref_pic_stride,
+ pu_wd,
+ pi2_horz_scratch,
+ pu_ht,
+ pu_wd,
+ dy,
+ dx,
+ ps_func_selector);
+ }
+ else
+ {
+ /* do interpolation in 8bit destination buffer and return */
+ ihevce_luma_interpolate_8bit_dxdy(
+ pu1_ref_int_pel,
+ pu1_dst_buf,
+ ref_pic_stride,
+ dst_stride,
+ pi2_horz_scratch,
+ pu_ht,
+ pu_wd,
+ dy,
+ dx,
+ ps_func_selector);
+
+ return (IV_SUCCESS);
+ }
+ }
+
+ if(inter_pred_idc != PRED_L0)
+ {
+ /*****************************************************/
+ /* L1 inter prediction */
+ /*****************************************************/
+
+ /* motion vecs in qpel precision */
+ WORD32 mv_x = ps_pu->mv.s_l1_mv.i2_mvx;
+ WORD32 mv_y = ps_pu->mv.s_l1_mv.i2_mvy;
+
+ /* sub pel offsets in x and y direction w.r.t integer pel */
+ WORD32 dx = mv_x & 0x3;
+ WORD32 dy = mv_y & 0x3;
+
+ /* ref idx is currently stored in the lower 4bits */
+ WORD32 ref_idx = (ps_pu->mv.i1_l1_ref_idx);
+
+ /* x and y integer offsets w.r.t frame start */
+ frm_x_ofst = (frm_x_pu + (mv_x >> 2));
+ frm_y_ofst = (frm_y_pu + (mv_y >> 2));
+
+ ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ref_idx];
+
+ /* picture buffer start and stride */
+
+ if(i4_flag_inter_pred_source == 1)
+ {
+ pu1_ref_pic = (UWORD8 *)ps_ref_pic_l1->s_yuv_buf_desc_src.pv_y_buf;
+ }
+ else
+ {
+ pu1_ref_pic = (UWORD8 *)ps_ref_pic_l1->s_yuv_buf_desc.pv_y_buf;
+ }
+ ref_pic_stride = ps_ref_pic_l1->s_yuv_buf_desc.i4_y_strd;
+
+ /* Error check for mvs going out of ref frame padded limits */
+ {
+ WORD32 min_x, max_x = ps_ref_pic_l1->s_yuv_buf_desc.i4_y_wd;
+ WORD32 min_y, max_y = ps_ref_pic_l1->s_yuv_buf_desc.i4_y_ht;
+
+ min_x =
+ -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT]
+ ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_LEFT] - 4)
+ : (PAD_HORZ - 4));
+
+ max_x += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT]
+ ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_RIGHT] - 4)
+ : (PAD_HORZ - 4);
+
+ min_y =
+ -(ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP]
+ ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_TOP] - 4)
+ : (PAD_VERT - 4));
+
+ max_y += ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT]
+ ? (ps_inter_pred_ctxt->ai4_tile_xtra_pel[E_BOT] - 4)
+ : (PAD_VERT - 4);
+
+ if((frm_x_ofst < min_x) || (frm_x_ofst + pu_wd) > max_x)
+ //ASSERT(0);
+ return (IV_FAIL);
+
+ if((frm_y_ofst < min_y) || (frm_y_ofst + pu_ht) > max_y)
+ //ASSERT(0);
+ return (IV_FAIL);
+ }
+
+ /* point to reference start location in ref frame */
+ /* Assuming clipping of mv is not required here as ME would */
+ /* take care of mv access not going beyond padded data */
+ pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst);
+
+ /* level shifted for subpel with both x and y componenet being non 0 */
+ /* this is because the interpolate function subtract this to contain */
+ /* the resulting data in 16 bits */
+ lvl_shift1 = (dx != 0) && (dy != 0) ? OFFSET14 : 0;
+
+ if(store_16bit_output)
+ {
+ /* do interpolation in 16bit L1 scratch buffer */
+ ihevce_luma_interpolate_16bit_dxdy(
+ pu1_ref_int_pel,
+ pi2_scr_buf_l1,
+ ref_pic_stride,
+ pu_wd,
+ pi2_horz_scratch,
+ pu_ht,
+ pu_wd,
+ dy,
+ dx,
+ ps_func_selector);
+ }
+ else
+ {
+ /* do interpolation in 8bit destination buffer and return */
+ ihevce_luma_interpolate_8bit_dxdy(
+ pu1_ref_int_pel,
+ pu1_dst_buf,
+ ref_pic_stride,
+ dst_stride,
+ pi2_horz_scratch,
+ pu_ht,
+ pu_wd,
+ dy,
+ dx,
+ ps_func_selector);
+
+ return (IV_SUCCESS);
+ }
+ }
+
+ if((inter_pred_idc != PRED_BI) && wp_flag)
+ {
+ /*****************************************************/
+ /* unidirection weighted prediction */
+ /*****************************************************/
+ ihevce_wght_offst_t *ps_weight_offset;
+ WORD16 *pi2_src;
+ WORD32 lvl_shift;
+
+ /* intialize the weight, offsets and ref based on l0/l1 mode */
+ if(inter_pred_idc == PRED_L0)
+ {
+ pi2_src = pi2_scr_buf_l0;
+ ps_weight_offset = &ps_ref_pic_l0->s_weight_offset;
+ lvl_shift = lvl_shift0;
+ }
+ else
+ {
+ pi2_src = pi2_scr_buf_l1;
+ ps_weight_offset = &ps_ref_pic_l1->s_weight_offset;
+ lvl_shift = lvl_shift1;
+ }
+
+ wgt0 = ps_weight_offset->i2_luma_weight;
+ off0 = ps_weight_offset->i2_luma_offset;
+ shift = ps_inter_pred_ctxt->i4_log2_luma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH;
+
+ /* do the uni directional weighted prediction */
+ ps_func_selector->ihevc_weighted_pred_uni_fptr(
+ pi2_src, pu1_dst_buf, pu_wd, dst_stride, wgt0, off0, shift, lvl_shift, pu_ht, pu_wd);
+ }
+ else
+ {
+ /*****************************************************/
+ /* Bipred prediction */
+ /*****************************************************/
+
+ if(wp_flag)
+ {
+ /*****************************************************/
+ /* Bi pred weighted prediction */
+ /*****************************************************/
+ wgt0 = ps_ref_pic_l0->s_weight_offset.i2_luma_weight;
+ off0 = ps_ref_pic_l0->s_weight_offset.i2_luma_offset;
+
+ wgt1 = ps_ref_pic_l1->s_weight_offset.i2_luma_weight;
+ off1 = ps_ref_pic_l1->s_weight_offset.i2_luma_offset;
+
+ shift = ps_inter_pred_ctxt->i4_log2_luma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH + 1;
+
+ ps_func_selector->ihevc_weighted_pred_bi_fptr(
+ pi2_scr_buf_l0,
+ pi2_scr_buf_l1,
+ pu1_dst_buf,
+ pu_wd,
+ pu_wd,
+ dst_stride,
+ wgt0,
+ off0,
+ wgt1,
+ off1,
+ shift,
+ lvl_shift0,
+ lvl_shift1,
+ pu_ht,
+ pu_wd);
+ }
+ else
+ {
+ /*****************************************************/
+ /* Default Bi pred prediction */
+ /*****************************************************/
+ ps_func_selector->ihevc_weighted_pred_bi_default_fptr(
+ pi2_scr_buf_l0,
+ pi2_scr_buf_l1,
+ pu1_dst_buf,
+ pu_wd,
+ pu_wd,
+ dst_stride,
+ lvl_shift0,
+ lvl_shift1,
+ pu_ht,
+ pu_wd);
+ }
+ }
+
+ return (IV_SUCCESS);
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Performs Chroma inter pred based on sub pel position dxdy and store the
+* result in a 16 bit destination buffer
+*
+* @param[in] pu1_src
+* pointer to the source correspoding to integer pel position of a mv (left and
+* top justified integer position)
+*
+* @param[out] pi2_dst
+* WORD16 pointer to the destination
+*
+* @param[in] src_strd
+* source buffer stride
+*
+* @param[in] dst_strd
+* destination buffer stride
+*
+* @param[in] pi2_hdst_scratch
+* scratch buffer for intermediate storage of horizontal filter output; used as
+* input for vertical filtering when sub pel components (dx != 0) && (dy != 0)
+*
+* Max scratch buffer required is w * (h + 3) * sizeof(WORD16)
+*
+* @param[in] ht
+* width of the prediction unit
+*
+* @param[in] wd
+* width of the prediction unit
+*
+* @param[in] dx
+* 1/8th pel position[0:7] of mv in x direction
+*
+* @param[in] dy
+* 1/8th pel position[0:7] of mv in y direction
+*
+* @returns
+* none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ihevce_chroma_interpolate_16bit_dxdy(
+ UWORD8 *pu1_src,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD16 *pi2_hdst_scratch,
+ WORD32 ht,
+ WORD32 wd,
+ WORD32 dy,
+ WORD32 dx,
+ func_selector_t *ps_func_selector)
+{
+ if((0 == dx) && (0 == dy))
+ {
+ /*--------- full pel position : copy input by upscaling-------*/
+
+ ps_func_selector->ihevc_inter_pred_chroma_copy_w16out_fptr(
+ pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[0][0], ht, wd);
+ }
+ else if((0 != dx) && (0 != dy))
+ {
+ /*----------sub pel in both x and y direction---------*/
+
+ UWORD8 *pu1_horz_src = pu1_src - src_strd;
+ WORD32 hdst_buf_stride = (wd << 1); /* uv interleave */
+ WORD16 *pi2_vert_src = pi2_hdst_scratch + hdst_buf_stride;
+
+ /* horizontal filtering of source done in a scratch buffer first */
+ ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr(
+ pu1_horz_src,
+ pi2_hdst_scratch,
+ src_strd,
+ hdst_buf_stride,
+ &gai1_hevc_chroma_filter_taps[dx][0],
+ (ht + NTAPS_CHROMA - 1),
+ wd);
+
+ /* vertical filtering on scratch buffer and stored in desitnation */
+ ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_w16out_fptr(
+ pi2_vert_src,
+ pi2_dst,
+ hdst_buf_stride,
+ dst_strd,
+ &gai1_hevc_chroma_filter_taps[dy][0],
+ ht,
+ wd);
+ }
+ else if(0 == dy)
+ {
+ /*----------sub pel in x direction only ---------*/
+
+ ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr(
+ pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dx][0], ht, wd);
+ }
+ else /* if (0 == dx) */
+ {
+ /*----------sub pel in y direction only ---------*/
+
+ ps_func_selector->ihevc_inter_pred_chroma_vert_w16out_fptr(
+ pu1_src, pi2_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dy][0], ht, wd);
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Performs Chroma inter pred based on sub pel position dxdy and store the
+* result in a 8 bit destination buffer
+*
+* @param[in] pu1_src
+* pointer to the source correspoding to integer pel position of a mv (left and
+* top justified integer position)
+*
+* @param[out] pu1_dst
+* UWORD8 pointer to the destination
+*
+* @param[in] src_strd
+* source buffer stride
+*
+* @param[in] dst_strd
+* destination buffer stride
+*
+* @param[in] pi2_hdst_scratch
+* scratch buffer for intermediate storage of horizontal filter output; used as
+* input for vertical filtering when sub pel components (dx != 0) && (dy != 0)
+*
+* Max scratch buffer required is w * (h + 3) * sizeof(WORD16)
+*
+* @param[in] ht
+* width of the prediction unit
+*
+* @param[in] wd
+* width of the prediction unit
+*
+* @param[in] dx
+* 1/8th pel position[0:7] of mv in x direction
+*
+* @param[in] dy
+* 1/8th pel position[0:7] of mv in y direction
+*
+* @returns
+* none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ihevce_chroma_interpolate_8bit_dxdy(
+ UWORD8 *pu1_src,
+ UWORD8 *pu1_dst,
+ WORD32 src_strd,
+ WORD32 dst_strd,
+ WORD16 *pi2_hdst_scratch,
+ WORD32 ht,
+ WORD32 wd,
+ WORD32 dy,
+ WORD32 dx,
+ func_selector_t *ps_func_selector)
+{
+ if((0 == dx) && (0 == dy))
+ {
+ /*--------- full pel position : copy input as is -------*/
+ ps_func_selector->ihevc_inter_pred_chroma_copy_fptr(
+ pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[0][0], ht, wd);
+ }
+ else if((0 != dx) && (0 != dy))
+ {
+ /*----------sub pel in both x and y direction---------*/
+ UWORD8 *pu1_horz_src = pu1_src - src_strd;
+ WORD32 hdst_buf_stride = (wd << 1); /* uv interleave */
+ WORD16 *pi2_vert_src = pi2_hdst_scratch + hdst_buf_stride;
+
+ /* horizontal filtering of source done in a scratch buffer first */
+ ps_func_selector->ihevc_inter_pred_chroma_horz_w16out_fptr(
+ pu1_horz_src,
+ pi2_hdst_scratch,
+ src_strd,
+ hdst_buf_stride,
+ &gai1_hevc_chroma_filter_taps[dx][0],
+ (ht + NTAPS_CHROMA - 1),
+ wd);
+
+ /* vertical filtering on scratch buffer and stored in desitnation */
+ ps_func_selector->ihevc_inter_pred_chroma_vert_w16inp_fptr(
+ pi2_vert_src,
+ pu1_dst,
+ hdst_buf_stride,
+ dst_strd,
+ &gai1_hevc_chroma_filter_taps[dy][0],
+ ht,
+ wd);
+ }
+ else if(0 == dy)
+ {
+ /*----------sub pel in x direction only ---------*/
+ ps_func_selector->ihevc_inter_pred_chroma_horz_fptr(
+ pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dx][0], ht, wd);
+ }
+ else /* if (0 == dx) */
+ {
+ /*----------sub pel in y direction only ---------*/
+ ps_func_selector->ihevc_inter_pred_chroma_vert_fptr(
+ pu1_src, pu1_dst, src_strd, dst_strd, &gai1_hevc_chroma_filter_taps[dy][0], ht, wd);
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Performs Chroma prediction for a inter prediction unit(PU)
+*
+* @par Description:
+* For a given PU, Inter prediction followed by weighted prediction (if
+* required). The reference and destination buffers are uv interleaved
+*
+* @param[in] ps_inter_pred_ctxt
+* context for inter prediction; contains ref list, weight offsets, ctb offsets
+*
+* @param[in] ps_pu
+* pointer to PU structure whose inter prediction needs to be done
+*
+* @param[in] pu1_dst_buf
+* pointer to destination buffer where the inter prediction is done
+*
+* @param[in] dst_stride
+* pitch of the destination buffer
+*
+* @returns
+* none
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ihevce_chroma_inter_pred_pu(
+ void *pv_inter_pred_ctxt, pu_t *ps_pu, UWORD8 *pu1_dst_buf, WORD32 dst_stride)
+{
+ inter_pred_ctxt_t *ps_inter_pred_ctxt = (inter_pred_ctxt_t *)pv_inter_pred_ctxt;
+ func_selector_t *ps_func_selector = ps_inter_pred_ctxt->ps_func_selector;
+
+ WORD32 inter_pred_idc = ps_pu->b2_pred_mode;
+ UWORD8 u1_is_422 = (ps_inter_pred_ctxt->u1_chroma_array_type == 2);
+ /* chroma width and height are half of luma width and height */
+ WORD32 pu_wd_chroma = (ps_pu->b4_wd + 1) << 1;
+ WORD32 pu_ht_chroma = (ps_pu->b4_ht + 1) << (u1_is_422 + 1);
+
+ WORD32 wp_flag = ps_inter_pred_ctxt->i1_weighted_pred_flag ||
+ ps_inter_pred_ctxt->i1_weighted_bipred_flag;
+
+ /* 16bit dest required for interpolate if weighted pred is on or bipred */
+ WORD32 store_16bit_output;
+
+ recon_pic_buf_t *ps_ref_pic_l0, *ps_ref_pic_l1;
+ UWORD8 *pu1_ref_pic, *pu1_ref_int_pel;
+ WORD32 ref_pic_stride;
+
+ /* offset of reference block in integer pel units */
+ WORD32 frm_x_ofst, frm_y_ofst;
+ WORD32 frm_x_pu, frm_y_pu;
+
+ /* scratch 16 bit buffers for interpolation in l0 and l1 direction */
+ WORD16 *pi2_scr_buf_l0 = &ps_inter_pred_ctxt->ai2_scratch_buf_l0[0];
+ WORD16 *pi2_scr_buf_l1 = &ps_inter_pred_ctxt->ai2_scratch_buf_l1[0];
+
+ /* scratch buffer for horizontal interpolation destination */
+ WORD16 *pi2_horz_scratch = &ps_inter_pred_ctxt->ai2_horz_scratch[0];
+
+ /* get PU's frm x and frm y offset : Note uv is interleaved */
+ frm_x_pu = ps_inter_pred_ctxt->i4_ctb_frm_pos_x + (ps_pu->b4_pos_x << 2);
+ frm_y_pu = (ps_inter_pred_ctxt->i4_ctb_frm_pos_y >> (u1_is_422 == 0)) +
+ (ps_pu->b4_pos_y << (u1_is_422 + 1));
+
+ /* sanity checks */
+ ASSERT((wp_flag == 0) || (wp_flag == 1));
+ ASSERT(dst_stride >= (pu_wd_chroma << 1)); /* uv interleaved */
+ ASSERT(ps_pu->b1_intra_flag == 0);
+
+ if(wp_flag)
+ {
+ UWORD8 u1_is_wgt_pred_L0, u1_is_wgt_pred_L1;
+
+ if(inter_pred_idc != PRED_L1)
+ {
+ ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ps_pu->mv.i1_l0_ref_idx];
+ u1_is_wgt_pred_L0 = ps_ref_pic_l0->s_weight_offset.u1_chroma_weight_enable_flag;
+ }
+ if(inter_pred_idc != PRED_L0)
+ {
+ ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ps_pu->mv.i1_l1_ref_idx];
+ u1_is_wgt_pred_L1 = ps_ref_pic_l1->s_weight_offset.u1_chroma_weight_enable_flag;
+ }
+ if(inter_pred_idc == PRED_BI)
+ {
+ wp_flag = (u1_is_wgt_pred_L0 || u1_is_wgt_pred_L1);
+ }
+ else if(inter_pred_idc == PRED_L0)
+ {
+ wp_flag = u1_is_wgt_pred_L0;
+ }
+ else if(inter_pred_idc == PRED_L1)
+ {
+ wp_flag = u1_is_wgt_pred_L1;
+ }
+ else
+ {
+ /*other values are not allowed*/
+ assert(0);
+ }
+ }
+ store_16bit_output = (inter_pred_idc == PRED_BI) || (wp_flag);
+
+ if(inter_pred_idc != PRED_L1)
+ {
+ /*****************************************************/
+ /* L0 inter prediction(Chroma ) */
+ /*****************************************************/
+
+ /* motion vecs in qpel precision */
+ WORD32 mv_x = ps_pu->mv.s_l0_mv.i2_mvx;
+ WORD32 mv_y = ps_pu->mv.s_l0_mv.i2_mvy;
+
+ /* sub pel offsets in x and y direction w.r.t integer pel */
+ WORD32 dx = mv_x & 0x7;
+ WORD32 dy = (mv_y & ((1 << (!u1_is_422 + 2)) - 1)) << u1_is_422;
+
+ /* ref idx is currently stored in the lower 4bits */
+ WORD32 ref_idx = (ps_pu->mv.i1_l0_ref_idx);
+
+ /* x and y integer offsets w.r.t frame start */
+
+ frm_x_ofst = (frm_x_pu + ((mv_x >> 3) << 1)); /* uv interleaved */
+ frm_y_ofst = (frm_y_pu + ((mv_y >> (3 - u1_is_422))));
+
+ ps_ref_pic_l0 = ps_inter_pred_ctxt->ps_ref_list[0][ref_idx];
+
+ /* picture buffer start and stride */
+ pu1_ref_pic = (UWORD8 *)ps_ref_pic_l0->s_yuv_buf_desc.pv_u_buf;
+ ref_pic_stride = ps_ref_pic_l0->s_yuv_buf_desc.i4_uv_strd;
+
+ /* point to reference start location in ref frame */
+ /* Assuming clipping of mv is not required here as ME would */
+ /* take care of mv access not going beyond padded data */
+ pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst);
+
+ if(store_16bit_output)
+ {
+ /* do interpolation in 16bit L0 scratch buffer */
+ ihevce_chroma_interpolate_16bit_dxdy(
+ pu1_ref_int_pel,
+ pi2_scr_buf_l0,
+ ref_pic_stride,
+ (pu_wd_chroma << 1),
+ pi2_horz_scratch,
+ pu_ht_chroma,
+ pu_wd_chroma,
+ dy,
+ dx,
+ ps_func_selector);
+ }
+ else
+ {
+ /* do interpolation in 8bit destination buffer and return */
+ ihevce_chroma_interpolate_8bit_dxdy(
+ pu1_ref_int_pel,
+ pu1_dst_buf,
+ ref_pic_stride,
+ dst_stride,
+ pi2_horz_scratch,
+ pu_ht_chroma,
+ pu_wd_chroma,
+ dy,
+ dx,
+ ps_func_selector);
+
+ return;
+ }
+ }
+
+ if(inter_pred_idc != PRED_L0)
+ {
+ /*****************************************************/
+ /* L1 inter prediction(Chroma) */
+ /*****************************************************/
+
+ /* motion vecs in qpel precision */
+ WORD32 mv_x = ps_pu->mv.s_l1_mv.i2_mvx;
+ WORD32 mv_y = ps_pu->mv.s_l1_mv.i2_mvy;
+
+ /* sub pel offsets in x and y direction w.r.t integer pel */
+ WORD32 dx = mv_x & 0x7;
+ WORD32 dy = (mv_y & ((1 << (!u1_is_422 + 2)) - 1)) << u1_is_422;
+
+ /* ref idx is currently stored in the lower 4bits */
+ WORD32 ref_idx = (ps_pu->mv.i1_l1_ref_idx);
+
+ /* x and y integer offsets w.r.t frame start */
+ frm_x_ofst = (frm_x_pu + ((mv_x >> 3) << 1)); /* uv interleaved */
+ frm_y_ofst = (frm_y_pu + ((mv_y >> (3 - u1_is_422))));
+
+ ps_ref_pic_l1 = ps_inter_pred_ctxt->ps_ref_list[1][ref_idx];
+
+ /* picture buffer start and stride */
+ pu1_ref_pic = (UWORD8 *)ps_ref_pic_l1->s_yuv_buf_desc.pv_u_buf;
+ ref_pic_stride = ps_ref_pic_l1->s_yuv_buf_desc.i4_uv_strd;
+
+ /* point to reference start location in ref frame */
+ /* Assuming clipping of mv is not required here as ME would */
+ /* take care of mv access not going beyond padded data */
+ pu1_ref_int_pel = pu1_ref_pic + frm_x_ofst + (ref_pic_stride * frm_y_ofst);
+
+ if(store_16bit_output)
+ {
+ /* do interpolation in 16bit L1 scratch buffer */
+ ihevce_chroma_interpolate_16bit_dxdy(
+ pu1_ref_int_pel,
+ pi2_scr_buf_l1,
+ ref_pic_stride,
+ (pu_wd_chroma << 1),
+ pi2_horz_scratch,
+ pu_ht_chroma,
+ pu_wd_chroma,
+ dy,
+ dx,
+ ps_func_selector);
+ }
+ else
+ {
+ /* do interpolation in 8bit destination buffer and return */
+ ihevce_chroma_interpolate_8bit_dxdy(
+ pu1_ref_int_pel,
+ pu1_dst_buf,
+ ref_pic_stride,
+ dst_stride,
+ pi2_horz_scratch,
+ pu_ht_chroma,
+ pu_wd_chroma,
+ dy,
+ dx,
+ ps_func_selector);
+
+ return;
+ }
+ }
+
+ if((inter_pred_idc != PRED_BI) && wp_flag)
+ {
+ /*****************************************************/
+ /* unidirection weighted prediction(Chroma) */
+ /*****************************************************/
+ ihevce_wght_offst_t *ps_weight_offset;
+ WORD16 *pi2_src;
+ WORD32 lvl_shift = 0;
+ WORD32 wgt_cb, wgt_cr, off_cb, off_cr;
+ WORD32 shift;
+
+ /* intialize the weight, offsets and ref based on l0/l1 mode */
+ if(inter_pred_idc == PRED_L0)
+ {
+ pi2_src = pi2_scr_buf_l0;
+ ps_weight_offset = &ps_ref_pic_l0->s_weight_offset;
+ }
+ else
+ {
+ pi2_src = pi2_scr_buf_l1;
+ ps_weight_offset = &ps_ref_pic_l1->s_weight_offset;
+ }
+
+ wgt_cb = ps_weight_offset->i2_cb_weight;
+ off_cb = ps_weight_offset->i2_cb_offset;
+ wgt_cr = ps_weight_offset->i2_cr_weight;
+ off_cr = ps_weight_offset->i2_cr_offset;
+
+ shift = ps_inter_pred_ctxt->i4_log2_chroma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH;
+
+ /* do the uni directional weighted prediction */
+ ps_func_selector->ihevc_weighted_pred_chroma_uni_fptr(
+ pi2_src,
+ pu1_dst_buf,
+ (pu_wd_chroma << 1),
+ dst_stride,
+ wgt_cb,
+ wgt_cr,
+ off_cb,
+ off_cr,
+ shift,
+ lvl_shift,
+ pu_ht_chroma,
+ pu_wd_chroma);
+ }
+ else
+ {
+ /*****************************************************/
+ /* Bipred prediction(Chroma) */
+ /*****************************************************/
+ if(wp_flag)
+ {
+ WORD32 wgt0_cb, wgt1_cb, wgt0_cr, wgt1_cr;
+ WORD32 off0_cb, off1_cb, off0_cr, off1_cr;
+ WORD32 shift;
+
+ /*****************************************************/
+ /* Bi pred weighted prediction (Chroma) */
+ /*****************************************************/
+ wgt0_cb = ps_ref_pic_l0->s_weight_offset.i2_cb_weight;
+ off0_cb = ps_ref_pic_l0->s_weight_offset.i2_cb_offset;
+
+ wgt0_cr = ps_ref_pic_l0->s_weight_offset.i2_cr_weight;
+ off0_cr = ps_ref_pic_l0->s_weight_offset.i2_cr_offset;
+
+ wgt1_cb = ps_ref_pic_l1->s_weight_offset.i2_cb_weight;
+ off1_cb = ps_ref_pic_l1->s_weight_offset.i2_cb_offset;
+
+ wgt1_cr = ps_ref_pic_l1->s_weight_offset.i2_cr_weight;
+ off1_cr = ps_ref_pic_l1->s_weight_offset.i2_cr_offset;
+
+ shift = ps_inter_pred_ctxt->i4_log2_chroma_wght_denom + SHIFT_14_MINUS_BIT_DEPTH + 1;
+
+ ps_func_selector->ihevc_weighted_pred_chroma_bi_fptr(
+ pi2_scr_buf_l0,
+ pi2_scr_buf_l1,
+ pu1_dst_buf,
+ (pu_wd_chroma << 1),
+ (pu_wd_chroma << 1),
+ dst_stride,
+ wgt0_cb,
+ wgt0_cr,
+ off0_cb,
+ off0_cr,
+ wgt1_cb,
+ wgt1_cr,
+ off1_cb,
+ off1_cr,
+ shift,
+ 0,
+ 0,
+ pu_ht_chroma,
+ pu_wd_chroma);
+ }
+ else
+ {
+ /*****************************************************/
+ /* Default Bi pred prediction (Chroma) */
+ /*****************************************************/
+ ps_func_selector->ihevc_weighted_pred_chroma_bi_default_fptr(
+ pi2_scr_buf_l0,
+ pi2_scr_buf_l1,
+ pu1_dst_buf,
+ (pu_wd_chroma << 1),
+ (pu_wd_chroma << 1),
+ dst_stride,
+ 0,
+ 0,
+ pu_ht_chroma,
+ pu_wd_chroma);
+ }
+ }
+}
diff --git a/encoder/ihevce_inter_pred.h b/encoder/ihevce_inter_pred.h
new file mode 100644
index 0000000..2137e8c
--- /dev/null
+++ b/encoder/ihevce_inter_pred.h
@@ -0,0 +1,111 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_inter_pred.h
+*
+* \brief
+* This file contains function prototypes of luma and chroma MC function
+* interfaces for a inter PU
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_INTER_PRED_H_
+#define _IHEVCE_INTER_PRED_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+/* enum to get availability of nbr by ANDing with nbr_mask */
+typedef enum TILE_NBR_MASK_E
+{
+ TILE_TL_NBR = 0x10000000,
+ TILE_TOP_NBR = 0x01000000,
+ TILE_TR_NBR = 0x00100000,
+ TILE_LT_NBR = 0x00010000,
+ TILE_RT_NBR = 0x00001000,
+ TILE_BL_NBR = 0x00000100,
+ TILE_BOT_NBR = 0x00000010,
+ TILE_BR_NBR = 0x00000001
+} TILE_NBR_MASK_E;
+
+/* enum to access an array of entries representing four directions */
+typedef enum
+{
+ E_TOP = 0,
+ E_LEFT = 1,
+ E_RIGHT = 2,
+ E_BOT = 3,
+
+ E_FOUR_DIRECTIONS = 4
+} IHEVCE_FOUR_DIRECTIONS_T;
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+extern WORD8 gai1_hevc_luma_filter_taps[4][NTAPS_LUMA];
+extern WORD8 gai1_hevc_chroma_filter_taps[8][NTAPS_CHROMA];
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+IV_API_CALL_STATUS_T ihevce_luma_inter_pred_pu(
+ void *pv_inter_pred_ctxt,
+ pu_t *ps_pu,
+ void *pv_dst_buf,
+ WORD32 dst_stride,
+ WORD32 i4_flag_inter_pred_source);
+
+IV_API_CALL_STATUS_T ihevce_luma_inter_pred_pu_high_speed(
+ void *pv_inter_pred_ctxt,
+ pu_t *ps_pu,
+ UWORD8 **ppu1_dst_buf,
+ WORD32 *pi4_dst_stride,
+ func_selector_t *ps_func_selector);
+
+void ihevce_chroma_inter_pred_pu(
+ void *pv_inter_pred_ctxt, pu_t *ps_pu, UWORD8 *pu1_dst_buf, WORD32 dst_stride);
+
+#endif /* _IHEVCE_INTER_PRED_H_ */
diff --git a/encoder/ihevce_ipe_instr_set_router.c b/encoder/ihevce_ipe_instr_set_router.c
new file mode 100644
index 0000000..c13faff
--- /dev/null
+++ b/encoder/ihevce_ipe_instr_set_router.c
@@ -0,0 +1,94 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_ipe_instr_set_router.c
+*
+* \brief
+* This file contains function pointer initialization of functions used during
+* pre-enc intra pred estimation
+*
+* \date
+* 15/07/2013
+*
+* \author
+* Ittiam
+*
+* List of Functions
+* ihevce_ipe_instr_set_router()
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevc_debug.h"
+#include "ihevce_ipe_instr_set_router.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_ipe_instr_set_router \endif
+*
+* \brief
+* Function pointer initialization of pre enc ipe struct
+*
+*****************************************************************************
+*/
+void ihevce_ipe_instr_set_router(
+ ihevce_ipe_optimised_function_list_t *ps_func_list, IV_ARCH_T e_arch)
+{
+ // clang-format off
+ switch(e_arch)
+ {
+
+#ifdef ENABLE_NEON
+ case ARCH_ARM_A9Q:
+ case ARCH_ARM_V8_NEON:
+ ps_func_list->pf_4x4_sad_computer = ihevce_4x4_sad_computer_neon;
+ ps_func_list->pf_8x8_sad_computer = ihevce_8x8_sad_computer_neon;
+ ps_func_list->pf_ed_4x4_find_best_modes = ihevce_ed_4x4_find_best_modes;
+ ps_func_list->pf_nxn_sad_computer = ihevce_nxn_sad_computer_neon;
+ ps_func_list->pf_scaling_filter_mxn = ihevce_scaling_filter_mxn_neon;
+ break;
+#endif
+
+ default:
+ ps_func_list->pf_4x4_sad_computer = ihevce_4x4_sad_computer;
+ ps_func_list->pf_8x8_sad_computer = ihevce_8x8_sad_computer;
+ ps_func_list->pf_ed_4x4_find_best_modes = ihevce_ed_4x4_find_best_modes;
+ ps_func_list->pf_nxn_sad_computer = ihevce_nxn_sad_computer;
+ ps_func_list->pf_scaling_filter_mxn = ihevce_scaling_filter_mxn;
+ break;
+ }
+ // clang-format on
+}
diff --git a/encoder/ihevce_ipe_instr_set_router.h b/encoder/ihevce_ipe_instr_set_router.h
new file mode 100644
index 0000000..85a7788
--- /dev/null
+++ b/encoder/ihevce_ipe_instr_set_router.h
@@ -0,0 +1,112 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_ipe_instr_set_router.h
+*
+* \brief
+* This file contains declarations related to pre enc intra pred estimation
+* functions used in encoder
+*
+* \date
+* 15/07/2013
+*
+* \author
+* Ittiam
+*
+* List of Functions
+*
+*
+******************************************************************************
+*/
+
+#ifndef __IHEVCE_IPE_INSTR_SET_ROUTER_H_
+#define __IHEVCE_IPE_INSTR_SET_ROUTER_H_
+
+#include "ihevc_typedefs.h"
+#include "ihevce_defs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+typedef UWORD16 FT_SAD_COMPUTER(UWORD8 *, UWORD8 *, WORD32, WORD32);
+
+typedef UWORD32 FT_BLK_SAD_COMPUTER(UWORD8 *, UWORD8 *, WORD32, WORD32, WORD32, WORD32);
+
+typedef WORD32 FT_SAD_COMPUTER_GENERIC(UWORD8 *, WORD32, UWORD8 *, WORD32, WORD32);
+
+typedef void
+ FT_SCALING_FILTER_BY_2(UWORD8 *, WORD32, UWORD8 *, WORD32, UWORD8 *, WORD32, WORD32, WORD32);
+
+typedef void FT_SCALE_BY_2(
+ UWORD8 *,
+ WORD32,
+ UWORD8 *,
+ WORD32,
+ WORD32,
+ WORD32,
+ UWORD8 *,
+ WORD32,
+ WORD32,
+ WORD32,
+ WORD32,
+ FT_COPY_2D *,
+ FT_SCALING_FILTER_BY_2 *);
+
+typedef void FT_ED_4X4_FIND_BEST_MODES(
+ UWORD8 *, WORD32, UWORD8 *, UWORD16 *, UWORD8 *, WORD32 *, WORD32, FT_SAD_COMPUTER *);
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+typedef struct
+{
+ FT_SAD_COMPUTER *pf_4x4_sad_computer;
+ FT_SAD_COMPUTER *pf_8x8_sad_computer;
+ FT_SAD_COMPUTER_GENERIC *pf_nxn_sad_computer;
+ FT_SCALING_FILTER_BY_2 *pf_scaling_filter_mxn;
+ FT_ED_4X4_FIND_BEST_MODES *pf_ed_4x4_find_best_modes;
+} ihevce_ipe_optimised_function_list_t;
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+void ihevce_ipe_instr_set_router(
+ ihevce_ipe_optimised_function_list_t *ps_func_list, IV_ARCH_T e_arch);
+
+/* Function List - C */
+FT_SAD_COMPUTER ihevce_4x4_sad_computer;
+FT_SAD_COMPUTER ihevce_8x8_sad_computer;
+FT_SAD_COMPUTER_GENERIC ihevce_nxn_sad_computer;
+FT_SCALE_BY_2 ihevce_scale_by_2;
+FT_SCALING_FILTER_BY_2 ihevce_scaling_filter_mxn;
+FT_ED_4X4_FIND_BEST_MODES ihevce_ed_4x4_find_best_modes;
+
+#ifdef ENABLE_NEON
+/* Function List - ARM Neon */
+FT_SAD_COMPUTER ihevce_4x4_sad_computer_neon;
+FT_SAD_COMPUTER ihevce_8x8_sad_computer_neon;
+FT_SAD_COMPUTER_GENERIC ihevce_nxn_sad_computer_neon;
+FT_BLK_SAD_COMPUTER ihevce_4mx4n_sad_computer_neon;
+FT_SCALING_FILTER_BY_2 ihevce_scaling_filter_mxn_neon;
+#endif
+
+#endif
diff --git a/encoder/ihevce_ipe_pass.c b/encoder/ihevce_ipe_pass.c
new file mode 100644
index 0000000..fc21d97
--- /dev/null
+++ b/encoder/ihevce_ipe_pass.c
@@ -0,0 +1,1604 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+******************************************************************************
+* \file ihevce_ipe_pass.c
+*
+* \brief
+* This file contains interface functions of Intra Prediction Estimation
+* module
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+*
+* List of Functions
+*
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_debug.h"
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+#include "ihevc_quant_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_hle_interface.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_inter_pred.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevce_ipe_instr_set_router.h"
+#include "ihevce_ipe_structs.h"
+#include "ihevce_ipe_pass.h"
+#include "ihevce_decomp_pre_intra_structs.h"
+#include "ihevce_decomp_pre_intra_pass.h"
+#include "ihevce_recur_bracketing.h"
+#include "ihevce_nbr_avail.h"
+#include "ihevce_global_tables.h"
+#include "ihevc_resi_trans.h"
+
+#include "cast_types.h"
+#include "osal.h"
+#include "osal_defaults.h"
+
+/*****************************************************************************/
+/* Global Tables */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+* @brief Look up table for choosing the appropriate function for
+* Intra prediction
+*
+* @remarks Same look up table enums are used for luma & chroma but each
+* have seperate functions implemented
+******************************************************************************
+*/
+WORD32 g_i4_ipe_funcs[MAX_NUM_IP_MODES] = {
+ IPE_FUNC_MODE_0, /* Mode 0 */
+ IPE_FUNC_MODE_1, /* Mode 1 */
+ IPE_FUNC_MODE_2, /* Mode 2 */
+ IPE_FUNC_MODE_3TO9, /* Mode 3 */
+ IPE_FUNC_MODE_3TO9, /* Mode 4 */
+ IPE_FUNC_MODE_3TO9, /* Mode 5 */
+ IPE_FUNC_MODE_3TO9, /* Mode 6 */
+ IPE_FUNC_MODE_3TO9, /* Mode 7 */
+ IPE_FUNC_MODE_3TO9, /* Mode 8 */
+ IPE_FUNC_MODE_3TO9, /* Mode 9 */
+ IPE_FUNC_MODE_10, /* Mode 10 */
+ IPE_FUNC_MODE_11TO17, /* Mode 11 */
+ IPE_FUNC_MODE_11TO17, /* Mode 12 */
+ IPE_FUNC_MODE_11TO17, /* Mode 13 */
+ IPE_FUNC_MODE_11TO17, /* Mode 14 */
+ IPE_FUNC_MODE_11TO17, /* Mode 15 */
+ IPE_FUNC_MODE_11TO17, /* Mode 16 */
+ IPE_FUNC_MODE_11TO17, /* Mode 17 */
+ IPE_FUNC_MODE_18_34, /* Mode 18 */
+ IPE_FUNC_MODE_19TO25, /* Mode 19 */
+ IPE_FUNC_MODE_19TO25, /* Mode 20 */
+ IPE_FUNC_MODE_19TO25, /* Mode 21 */
+ IPE_FUNC_MODE_19TO25, /* Mode 22 */
+ IPE_FUNC_MODE_19TO25, /* Mode 23 */
+ IPE_FUNC_MODE_19TO25, /* Mode 24 */
+ IPE_FUNC_MODE_19TO25, /* Mode 25 */
+ IPE_FUNC_MODE_26, /* Mode 26 */
+ IPE_FUNC_MODE_27TO33, /* Mode 27 */
+ IPE_FUNC_MODE_27TO33, /* Mode 26 */
+ IPE_FUNC_MODE_27TO33, /* Mode 29 */
+ IPE_FUNC_MODE_27TO33, /* Mode 30 */
+ IPE_FUNC_MODE_27TO33, /* Mode 31 */
+ IPE_FUNC_MODE_27TO33, /* Mode 32 */
+ IPE_FUNC_MODE_27TO33, /* Mode 33 */
+ IPE_FUNC_MODE_18_34, /* Mode 34 */
+};
+
+/**
+******************************************************************************
+* @brief Look up table for deciding whether to use original samples or
+* filtered reference samples for Intra prediction
+*
+* @remarks This table has the flags for transform size of 8, 16 and 32
+* Input is log2nT - 3 and intra prediction mode
+******************************************************************************
+*/
+UWORD8 gau1_ipe_filter_flag[3][MAX_NUM_IP_MODES] = {
+ { 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 },
+ { 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1 },
+ { 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1 }
+};
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_ipe_recompute_lambda_from_min_8x8_act_in_ctb \endif
+*
+* \brief
+* This function recomputes lambda using min 8x8 act in CTB
+*
+* \author
+* Ittiam
+*
+* \return
+* Nothing
+*
+******************************************************************************
+*/
+void ihevce_ipe_recompute_lambda_from_min_8x8_act_in_ctb(
+ ihevce_ipe_ctxt_t *ps_ctxt, ihevce_ed_ctb_l1_t *ps_ed_ctb_l1)
+{
+ WORD32 i4_cu_qp = 0;
+ WORD32 i4_mod_factor_num;
+#if MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON
+ WORD32 i4_activity;
+#endif
+ WORD32 i4_qscale;
+ WORD32 i4_curr_satd;
+ long double ld_avg_satd;
+
+#if MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON
+ WORD32 i4_mod_factor_denom = QP_MOD_FACTOR_DEN;
+#endif
+
+ if(ISLICE == ps_ctxt->i4_slice_type)
+ {
+ i4_mod_factor_num = INTRA_QP_MOD_FACTOR_NUM;
+ }
+ else
+ {
+ i4_mod_factor_num = INTER_QP_MOD_FACTOR_NUM;
+ }
+
+#if LAMDA_BASED_ON_QUANT
+ i4_curr_satd = ps_ed_ctb_l1->i4_32x32_satd[0][2];
+ i8_avg_satd = ps_ctxt->i8_curr_frame_32x32_avg_act[2];
+#else
+ i4_curr_satd = ps_ed_ctb_l1->i4_32x32_satd[0][3];
+
+ ld_avg_satd = 2.0 + ps_ctxt->ld_curr_frame_16x16_log_avg[0];
+
+#endif
+ if(ps_ctxt->i4_l0ipe_qp_mod)
+ {
+#if MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON
+ i4_cu_qp = ihevce_cu_level_qp_mod(
+ ps_ctxt->i4_qscale,
+ i4_curr_satd,
+ ld_avg_satd,
+ ps_ctxt->f_strength,
+ &i4_activity,
+ &i4_qscale,
+ ps_ctxt->ps_rc_quant_ctxt);
+#endif
+ }
+ ihevce_get_ipe_ol_cu_lambda_prms(ps_ctxt, i4_cu_qp);
+}
+/*!
+******************************************************************************
+* \if Function name : ihevce_ipe_pass_satd \endif
+*
+* \brief
+* This function calcuates the SATD for a given size and returns the value
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+* \return
+*
+* List of Functions
+*
+******************************************************************************
+*/
+UWORD32 ihevce_ipe_pass_satd(WORD16 *pi2_coeff, WORD32 coeff_stride, WORD32 trans_size)
+{
+ WORD32 i, j, satd;
+
+ satd = 0;
+
+ /* run a loop and find the satd by doing ABS */
+ for(i = 0; i < trans_size; i++)
+ {
+ for(j = 0; j < trans_size; j++)
+ {
+ satd += abs(*pi2_coeff++);
+ }
+ /* row level update */
+ pi2_coeff += coeff_stride - trans_size;
+ }
+
+ {
+ WORD32 transform_shift;
+ WORD32 log2_trans_size;
+
+ GETRANGE(log2_trans_size, trans_size);
+ log2_trans_size -= 1;
+ transform_shift = MAX_TR_DYNAMIC_RANGE - BIT_DEPTH - log2_trans_size;
+ satd >>= transform_shift;
+ }
+
+ return (satd);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_ipe_get_num_mem_recs \endif
+*
+* \brief
+* Number of memory records are returned for IPE module
+*
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32 ihevce_ipe_get_num_mem_recs(void)
+{
+ return (NUM_IPE_MEM_RECS);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_ipe_get_mem_recs \endif
+*
+* \brief
+* Memory requirements are returned for IPE.
+*
+* \param[in,out] ps_mem_tab : pointer to memory descriptors table
+* \param[in] ps_init_prms : Create time static parameters
+* \param[in] i4_num_proc_thrds : Number of processing threads for this module
+* \param[in] i4_mem_space : memspace in whihc memory request should be done
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32
+ ihevce_ipe_get_mem_recs(iv_mem_rec_t *ps_mem_tab, WORD32 i4_num_proc_thrds, WORD32 i4_mem_space)
+{
+ /* memories should be requested assuming worst case requirememnts */
+
+ /* Module context structure */
+ ps_mem_tab[IPE_CTXT].i4_mem_size = sizeof(ihevce_ipe_master_ctxt_t);
+
+ ps_mem_tab[IPE_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[IPE_CTXT].i4_mem_alignment = 8;
+
+ /* Threads ctxt structure */
+ ps_mem_tab[IPE_THRDS_CTXT].i4_mem_size = i4_num_proc_thrds * sizeof(ihevce_ipe_ctxt_t);
+
+ ps_mem_tab[IPE_THRDS_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[IPE_THRDS_CTXT].i4_mem_alignment = 32;
+
+ return (NUM_IPE_MEM_RECS);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_ipe_init \endif
+*
+* \brief
+* Intialization for IPE context state structure .
+*
+* \param[in] ps_mem_tab : pointer to memory descriptors table
+* \param[in] ps_init_prms : Create time static parameters
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void *ihevce_ipe_init(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ WORD32 i4_num_proc_thrds,
+ WORD32 i4_ref_id,
+ func_selector_t *ps_func_selector,
+ rc_quant_t *ps_rc_quant_ctxt,
+ WORD32 i4_resolution_id,
+ UWORD8 u1_is_popcnt_available)
+{
+ WORD32 i4_thrds;
+ UWORD32 u4_width, u4_ctb_in_a_row;
+ // WORD32 i4_ctr;
+ ihevce_ipe_master_ctxt_t *ps_master_ctxt;
+ ihevce_ipe_ctxt_t *ps_ctxt;
+
+ /* IPE master state structure */
+ ps_master_ctxt = (ihevce_ipe_master_ctxt_t *)ps_mem_tab[IPE_CTXT].pv_base;
+
+ ps_master_ctxt->i4_num_proc_thrds = i4_num_proc_thrds;
+
+ ps_ctxt = (ihevce_ipe_ctxt_t *)ps_mem_tab[IPE_THRDS_CTXT].pv_base;
+
+ ps_ctxt->ps_rc_quant_ctxt = ps_rc_quant_ctxt;
+
+ /*width of the input YUV to be encoded. */
+ u4_width = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
+ /*making the width a multiple of CTB size*/
+ u4_width += SET_CTB_ALIGN(
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, MAX_CTB_SIZE);
+
+ u4_ctb_in_a_row = (u4_width / MAX_CTB_SIZE);
+
+ /* perform all one initialisation here */
+ for(i4_thrds = 0; i4_thrds < ps_master_ctxt->i4_num_proc_thrds; i4_thrds++)
+ {
+ ps_master_ctxt->aps_ipe_thrd_ctxt[i4_thrds] = ps_ctxt;
+
+ /* initialise the CU and TU sizes */
+ ps_ctxt->u1_ctb_size = (1 << ps_init_prms->s_config_prms.i4_max_log2_cu_size);
+ ps_ctxt->u1_min_cu_size = (1 << ps_init_prms->s_config_prms.i4_min_log2_cu_size);
+ ps_ctxt->u1_min_tu_size = (1 << ps_init_prms->s_config_prms.i4_min_log2_tu_size);
+
+ /** Register the function selector pointer*/
+ ps_ctxt->ps_func_selector = ps_func_selector;
+
+ /* Initiailize the encoder quality preset */
+ /* IPE algorithm is controlled based on this preset */
+ ps_ctxt->i4_quality_preset =
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
+
+ if(ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P7)
+ {
+ ps_ctxt->i4_quality_preset = IHEVCE_QUALITY_P6;
+ }
+
+ /* initialise all the pointer to start of arrays */
+ ps_ctxt->ps_ipe_cu_tree = &ps_ctxt->as_ipe_cu_tree[0];
+
+ /* initialize QP */
+ ps_ctxt->i1_QP =
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].ai4_frame_qp[i4_ref_id];
+ ps_ctxt->u1_num_b_frames =
+ (1 << ps_init_prms->s_coding_tools_prms.i4_max_temporal_layers) - 1;
+
+ ps_ctxt->b_sad_type = IPE_SAD_TYPE;
+ ps_ctxt->u1_ipe_step_size = IPE_STEP_SIZE;
+
+ ps_ctxt->apf_ipe_lum_ip[IPE_FUNC_MODE_0] =
+ ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_planar_fptr;
+ ps_ctxt->apf_ipe_lum_ip[IPE_FUNC_MODE_1] =
+ ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_dc_fptr;
+ ps_ctxt->apf_ipe_lum_ip[IPE_FUNC_MODE_2] =
+ ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode2_fptr;
+ ps_ctxt->apf_ipe_lum_ip[IPE_FUNC_MODE_3TO9] =
+ ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_3_to_9_fptr;
+ ps_ctxt->apf_ipe_lum_ip[IPE_FUNC_MODE_10] =
+ ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_horz_fptr;
+ ps_ctxt->apf_ipe_lum_ip[IPE_FUNC_MODE_11TO17] =
+ ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_11_to_17_fptr;
+ ps_ctxt->apf_ipe_lum_ip[IPE_FUNC_MODE_18_34] =
+ ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_18_34_fptr;
+ ps_ctxt->apf_ipe_lum_ip[IPE_FUNC_MODE_19TO25] =
+ ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_19_to_25_fptr;
+ ps_ctxt->apf_ipe_lum_ip[IPE_FUNC_MODE_26] =
+ ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ver_fptr;
+ ps_ctxt->apf_ipe_lum_ip[IPE_FUNC_MODE_27TO33] =
+ ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_mode_27_to_33_fptr;
+
+ /* nbr parameters initialization */
+ /* perform all one initialisation here */
+
+ ps_ctxt->i4_nbr_map_strd = MAX_PU_IN_CTB_ROW + 1 + 8;
+
+ ps_ctxt->pu1_ctb_nbr_map = ps_ctxt->au1_nbr_ctb_map[0];
+
+ /* move the pointer to 1,2 location */
+ ps_ctxt->pu1_ctb_nbr_map += ps_ctxt->i4_nbr_map_strd;
+ ps_ctxt->pu1_ctb_nbr_map++;
+ ps_ctxt->i4_l0ipe_qp_mod = ps_init_prms->s_config_prms.i4_cu_level_rc & 1;
+ ps_ctxt->i4_pass = ps_init_prms->s_pass_prms.i4_pass;
+ if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 0)
+ {
+ /* initialise the scale & rescale matricies */
+ ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
+ ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
+ ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_flat_scale_mat_8x8[0];
+ ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_flat_scale_mat_16x16[0];
+ ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_flat_scale_mat_32x32[0];
+ /*init for inter matrix*/
+ ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
+ ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
+ ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_flat_scale_mat_8x8[0];
+ ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_flat_scale_mat_16x16[0];
+ ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_flat_scale_mat_32x32[0];
+
+ /*init for rescale matrix*/
+ ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
+ ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
+ ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0];
+ ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0];
+ ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0];
+ /*init for rescale inter matrix*/
+ ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
+ ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
+ ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_flat_rescale_mat_8x8[0];
+ ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_flat_rescale_mat_16x16[0];
+ ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_flat_rescale_mat_32x32[0];
+ }
+ else if(ps_init_prms->s_coding_tools_prms.i4_use_default_sc_mtx == 1)
+ {
+ /* initialise the scale & rescale matricies */
+ ps_ctxt->api2_scal_mat[0] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
+ ps_ctxt->api2_scal_mat[1] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
+ ps_ctxt->api2_scal_mat[2] = (WORD16 *)&gi2_intra_default_scale_mat_8x8[0];
+ ps_ctxt->api2_scal_mat[3] = (WORD16 *)&gi2_intra_default_scale_mat_16x16[0];
+ ps_ctxt->api2_scal_mat[4] = (WORD16 *)&gi2_intra_default_scale_mat_32x32[0];
+ /*init for inter matrix*/
+ ps_ctxt->api2_scal_mat[5] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
+ ps_ctxt->api2_scal_mat[6] = (WORD16 *)&gi2_flat_scale_mat_4x4[0];
+ ps_ctxt->api2_scal_mat[7] = (WORD16 *)&gi2_inter_default_scale_mat_8x8[0];
+ ps_ctxt->api2_scal_mat[8] = (WORD16 *)&gi2_inter_default_scale_mat_16x16[0];
+ ps_ctxt->api2_scal_mat[9] = (WORD16 *)&gi2_inter_default_scale_mat_32x32[0];
+
+ /*init for rescale matrix*/
+ ps_ctxt->api2_rescal_mat[0] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
+ ps_ctxt->api2_rescal_mat[1] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
+ ps_ctxt->api2_rescal_mat[2] = (WORD16 *)&gi2_intra_default_rescale_mat_8x8[0];
+ ps_ctxt->api2_rescal_mat[3] = (WORD16 *)&gi2_intra_default_rescale_mat_16x16[0];
+ ps_ctxt->api2_rescal_mat[4] = (WORD16 *)&gi2_intra_default_rescale_mat_32x32[0];
+ /*init for rescale inter matrix*/
+ ps_ctxt->api2_rescal_mat[5] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
+ ps_ctxt->api2_rescal_mat[6] = (WORD16 *)&gi2_flat_rescale_mat_4x4[0];
+ ps_ctxt->api2_rescal_mat[7] = (WORD16 *)&gi2_inter_default_rescale_mat_8x8[0];
+ ps_ctxt->api2_rescal_mat[8] = (WORD16 *)&gi2_inter_default_rescale_mat_16x16[0];
+ ps_ctxt->api2_rescal_mat[9] = (WORD16 *)&gi2_inter_default_rescale_mat_32x32[0];
+ }
+ else
+ {
+ ASSERT(0);
+ }
+
+ ps_ctxt->u1_bit_depth = ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth;
+
+ /**
+ * Initialize the intra prediction modes map for the CTB to INTRA_DC
+ **/
+ {
+ WORD32 row, col;
+ for(row = 0; row < (MAX_TU_ROW_IN_CTB + 1); row++)
+ for(col = 0; col < (MAX_TU_COL_IN_CTB + 1); col++)
+ ps_ctxt->au1_ctb_mode_map[row][col] = INTRA_DC;
+ }
+
+ ihevce_cmn_utils_instr_set_router(
+ &ps_ctxt->s_cmn_opt_func, u1_is_popcnt_available, ps_init_prms->e_arch_type);
+
+ ihevce_ipe_instr_set_router(
+ &ps_ctxt->s_ipe_optimised_function_list, ps_init_prms->e_arch_type);
+
+ /* increment the thread ctxt pointer */
+ ps_ctxt++;
+ }
+
+ /* return the handle to caller */
+ return ((void *)ps_master_ctxt);
+}
+/*!
+******************************************************************************
+* \if Function name : ihevce_ipe_get_frame_intra_satd_cost \endif
+*
+* \brief
+* Function to export frame-level accumalated SATD .
+*
+* \param[in] pv_ctxt : pointer to IPE module
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+LWORD64 ihevce_ipe_get_frame_intra_satd_cost(
+ void *pv_ctxt,
+ LWORD64 *pi8_frame_satd_by_qpmod,
+ LWORD64 *pi8_frame_acc_mode_bits_cost,
+ LWORD64 *pi8_frame_acc_activity_factor,
+ LWORD64 *pi8_frame_l0_acc_satd)
+{
+ WORD32 i4_thrds;
+
+ ihevce_ipe_master_ctxt_t *ps_master_ctxt;
+ ihevce_ipe_ctxt_t *ps_ctxt;
+ LWORD64 i8_frame_acc_satd_cost = 0;
+ LWORD64 i8_frame_acc_satd = 0;
+ LWORD64 i8_frame_satd_by_qpmod = 0;
+ LWORD64 i8_frame_acc_mode_bits_cost = 0;
+ LWORD64 i8_frame_acc_activity_factor = 0;
+ /* IPE master state structure */
+ ps_master_ctxt = (ihevce_ipe_master_ctxt_t *)pv_ctxt;
+
+ /* perform all one initialisation here */
+ for(i4_thrds = 0; i4_thrds < ps_master_ctxt->i4_num_proc_thrds; i4_thrds++)
+ {
+ ps_ctxt = ps_master_ctxt->aps_ipe_thrd_ctxt[i4_thrds];
+
+ i8_frame_acc_satd_cost += ps_ctxt->i8_frame_acc_satd_cost;
+ i8_frame_satd_by_qpmod += (ps_ctxt->i8_frame_acc_satd_by_modqp_q10 >> SATD_BY_ACT_Q_FAC);
+ i8_frame_acc_mode_bits_cost += ps_ctxt->i8_frame_acc_mode_bits_cost;
+
+ i8_frame_acc_activity_factor += ps_ctxt->i8_frame_acc_act_factor;
+
+ i8_frame_acc_satd += ps_ctxt->i8_frame_acc_satd;
+ }
+ *pi8_frame_satd_by_qpmod = i8_frame_satd_by_qpmod;
+
+ *pi8_frame_acc_mode_bits_cost = i8_frame_acc_mode_bits_cost;
+
+ *pi8_frame_acc_activity_factor = i8_frame_acc_activity_factor;
+
+ *pi8_frame_l0_acc_satd = i8_frame_acc_satd;
+
+ return (i8_frame_acc_satd_cost);
+}
+
+/**
+*******************************************************************************
+* \if Function name : ihevce_intra_pred_ref_filtering \endif
+*
+* \brief
+* Intra prediction interpolation filter for ref_filtering for Encoder
+*
+* \par Description:
+* Reference DC filtering for neighboring samples dependent on TU size and
+* mode Refer to section 8.4.4.2.3 in the standard
+*
+* \param[in] pu1_src pointer to the source
+* \param[out] pu1_dst pointer to the destination
+* \param[in] nt integer Transform Block size
+*
+* \returns
+* none
+*
+* \author
+* Ittiam
+*
+*******************************************************************************
+*/
+
+#if IHEVCE_INTRA_REF_FILTERING == C
+void ihevce_intra_pred_ref_filtering(UWORD8 *pu1_src, WORD32 nt, UWORD8 *pu1_dst)
+{
+ WORD32 i; /* Generic indexing variable */
+ WORD32 four_nt = 4 * nt;
+
+ /* Extremities Untouched*/
+ pu1_dst[0] = pu1_src[0];
+ pu1_dst[4 * nt] = pu1_src[4 * nt];
+ /* Perform bilinear filtering of Reference Samples */
+ for(i = 0; i < (four_nt - 1); i++)
+ {
+ pu1_dst[i + 1] = (pu1_src[i] + 2 * pu1_src[i + 1] + pu1_src[i + 2] + 2) >> 2;
+ }
+}
+#endif
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_ipe_process_ctb \endif
+*
+* \brief
+* CTB level IPE function
+*
+* \param[in] pv_ctxt : pointer to IPE module
+* \param[in] ps_frm_ctb_prms : CTB characteristics parameters
+* \param[in] ps_curr_src : pointer to input yuv buffer (row buffer)
+* \param[out] ps_ctb_out : pointer to CTB analyse output structure (row buffer)
+* \param[out] ps_row_cu : pointer to CU analyse output structure (row buffer)
+*
+* \return
+* None
+*
+* Note : This function will receive CTB pointers which may point to
+* blocks of CTB size or smaller (at the right and bottom edges of the picture)
+* This function recursively creates smaller square partitions and passes them
+* on for intra processing estimation
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_ipe_process_ctb(
+ ihevce_ipe_ctxt_t *ps_ctxt,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ iv_enc_yuv_buf_t *ps_curr_src,
+ ihevce_ipe_cu_tree_t *ps_curr_ctb_node,
+ ipe_l0_ctb_analyse_for_me_t *ps_l0_ipe_out_ctb,
+ ctb_analyse_t *ps_ctb_out,
+ //cu_analyse_t *ps_row_cu,
+ ihevce_ed_blk_t *ps_ed_l1_ctb,
+ ihevce_ed_blk_t *ps_ed_l2_ctb,
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1)
+{
+ /* reset the map buffer to 0*/
+ memset(
+ &ps_ctxt->au1_nbr_ctb_map[0][0],
+ 0,
+ (MAX_PU_IN_CTB_ROW + 1 + 8) * (MAX_PU_IN_CTB_ROW + 1 + 8));
+
+ /* set the CTB neighbour availability flags */
+ ihevce_set_ctb_nbr(
+ &ps_ctxt->s_ctb_nbr_avail_flags,
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ ps_ctxt->u2_ctb_num_in_row,
+ ps_ctxt->u2_ctb_row_num,
+ ps_frm_ctb_prms);
+
+ /* IPE cu and mode decision */
+ ihevce_bracketing_analysis(
+ ps_ctxt,
+ ps_curr_ctb_node,
+ ps_curr_src,
+ ps_ctb_out,
+ //ps_row_cu,
+ ps_ed_l1_ctb,
+ ps_ed_l2_ctb,
+ ps_ed_ctb_l1,
+ ps_l0_ipe_out_ctb);
+
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_ipe_process_row \endif
+*
+* \brief
+* Row level IPE function
+*
+* \param[in] pv_ctxt : pointer to IPE module
+* \param[in] ps_frm_ctb_prms : CTB characteristics parameters
+* \param[in] ps_curr_src : pointer to input yuv buffer (row buffer)
+* \param[out] ps_ctb_out : pointer to CTB analyse output structure (row buffer)
+* \param[out] ps_cu_out : pointer to CU analyse output structure (row buffer)
+*\param[out] pi4_num_ctbs_cur_row : pointer to store the number of ctbs processed in current row
+*\param[in] pi4_num_ctbs_top_row : pointer to check the number of ctbs processed in top row
+*
+* \return
+* None
+*
+* Note : Currently the frame level calculations done assumes that
+* framewidth of the input are excat multiple of ctbsize
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_ipe_process_row(
+ ihevce_ipe_ctxt_t *ps_ctxt,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ iv_enc_yuv_buf_t *ps_curr_src,
+ ipe_l0_ctb_analyse_for_me_t *ps_ipe_ctb_out_row,
+ ctb_analyse_t *ps_ctb_out,
+ //cu_analyse_t *ps_row_cu,
+ ihevce_ed_blk_t *ps_ed_l1_row,
+ ihevce_ed_blk_t *ps_ed_l2_row,
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_row,
+ WORD32 blk_inc_ctb_l1,
+ WORD32 blk_inc_ctb_l2)
+{
+ /* local variables */
+ UWORD16 ctb_ctr;
+ iv_enc_yuv_buf_t s_curr_src_bufs;
+ ipe_l0_ctb_analyse_for_me_t *ps_l0_ipe_out_ctb;
+ UWORD16 u2_pic_wdt;
+ UWORD16 u2_pic_hgt;
+ ihevce_ed_blk_t *ps_ed_l1_ctb;
+ ihevce_ed_blk_t *ps_ed_l2_ctb;
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1;
+
+ UWORD8 u1_ctb_size;
+
+ u2_pic_wdt = ps_frm_ctb_prms->i4_cu_aligned_pic_wd;
+ u2_pic_hgt = ps_frm_ctb_prms->i4_cu_aligned_pic_ht;
+
+ u1_ctb_size = ps_ctxt->u1_ctb_size;
+
+ /* ----------------------------------------------------- */
+ /* store the stride and dimensions of source */
+ /* buffer pointers will be over written at every CTB row */
+ /* ----------------------------------------------------- */
+ memcpy(&s_curr_src_bufs, ps_curr_src, sizeof(iv_enc_yuv_buf_t));
+ ps_l0_ipe_out_ctb = ps_ipe_ctb_out_row;
+
+ /* --------- Loop over all the CTBs in a row --------------- */
+ for(ctb_ctr = 0; ctb_ctr < ps_frm_ctb_prms->i4_num_ctbs_horz; ctb_ctr++)
+ {
+ //UWORD8 num_cus_in_ctb;
+
+ UWORD8 *pu1_tmp;
+
+ /* Create pointer to ctb node */
+ ihevce_ipe_cu_tree_t *ps_ctb_node;
+
+ WORD32 nbr_flags;
+
+ WORD32 row;
+ /* luma src */
+ pu1_tmp = (UWORD8 *)ps_curr_src->pv_y_buf;
+ pu1_tmp += (ctb_ctr * ps_frm_ctb_prms->i4_ctb_size);
+
+ s_curr_src_bufs.pv_y_buf = pu1_tmp;
+
+ /* Cb & CR pixel interleaved src */
+ pu1_tmp = (UWORD8 *)ps_curr_src->pv_u_buf;
+ pu1_tmp += (ctb_ctr * (ps_frm_ctb_prms->i4_ctb_size >> 1));
+
+ s_curr_src_bufs.pv_u_buf = pu1_tmp;
+
+ /* Store the number of current ctb within row in the context */
+ ps_ctxt->u2_ctb_num_in_row = ctb_ctr;
+
+ /* Initialize number of coding units in ctb to 0 */
+ ps_ctb_out->u1_num_cus_in_ctb = 0;
+ /* Initialize split flag to 0 - No partition */
+ ps_ctb_out->u4_cu_split_flags = 0;
+ /* store the cu pointer for current ctb out */
+ //ps_ctb_out->ps_coding_units_in_ctb = ps_row_cu;
+
+ /* Initialize the CTB parameters at the root node level */
+ ps_ctb_node = ps_ctxt->ps_ipe_cu_tree;
+ ps_ctb_node->ps_parent = NULL;
+ ps_ctb_node->u1_depth = 0;
+ ps_ctb_node->u1_cu_size = u1_ctb_size;
+ ps_ctb_node->u2_x0 = 0;
+ ps_ctb_node->u2_y0 = 0;
+
+ ps_ctb_node->u2_orig_x = ctb_ctr * ps_ctb_node->u1_cu_size;
+ ps_ctb_node->u2_orig_y = ps_ctxt->u2_ctb_row_num * ps_ctb_node->u1_cu_size;
+
+ ps_ctb_node->u1_width = u1_ctb_size;
+ ps_ctb_node->u1_height = u1_ctb_size;
+#if !(PIC_ALIGN_CTB_SIZE)
+ if(ps_ctxt->u2_ctb_num_in_row == (ps_frm_ctb_prms->i4_num_ctbs_horz - 1))
+ {
+ ps_ctb_node->u1_width = u2_pic_wdt - (ps_ctxt->u2_ctb_num_in_row) * (u1_ctb_size);
+ }
+ if(ps_ctxt->u2_ctb_row_num == (ps_frm_ctb_prms->i4_num_ctbs_vert - 1))
+ {
+ ps_ctb_node->u1_height = u2_pic_hgt - (ps_ctxt->u2_ctb_row_num) * (u1_ctb_size);
+ }
+#endif
+
+ switch(ps_ctb_node->u1_cu_size)
+ {
+ case 64:
+ ps_ctb_node->u1_log2_nt = 6;
+ ps_ctb_node->u1_part_flag_pos = 0;
+ break;
+ case 32:
+ ps_ctb_node->u1_log2_nt = 5;
+ ps_ctb_node->u1_part_flag_pos = 4;
+ break;
+ case 16:
+ ps_ctb_node->u1_log2_nt = 4;
+ ps_ctb_node->u1_part_flag_pos = 8;
+ break;
+ }
+
+ /* Set neighbor flags for the CTB */
+ nbr_flags = 0;
+
+ if(ps_ctxt->u2_ctb_num_in_row != 0)
+ {
+ nbr_flags |= LEFT_FLAG; /* Set Left Flag if not in first column */
+ ps_ctb_node->u1_num_left_avail = ((u2_pic_hgt - ps_ctb_node->u2_orig_y) >= u1_ctb_size)
+ ? u1_ctb_size
+ : u2_pic_hgt - ps_ctb_node->u2_orig_y;
+ }
+ else
+ {
+ ps_ctb_node->u1_num_left_avail = 0;
+ }
+
+ if((ps_ctxt->u2_ctb_num_in_row != 0) && (ps_ctxt->u2_ctb_row_num != 0))
+ nbr_flags |= TOP_LEFT_FLAG; /* Set Top-Left Flag if not in first row or first column */
+
+ if(ps_ctxt->u2_ctb_row_num != 0)
+ {
+ nbr_flags |= TOP_FLAG; /* Set Top Flag if not in first row */
+ ps_ctb_node->u1_num_top_avail = ((u2_pic_wdt - ps_ctb_node->u2_orig_x) >= u1_ctb_size)
+ ? u1_ctb_size
+ : u2_pic_wdt - ps_ctb_node->u2_orig_x;
+ }
+ else
+ {
+ ps_ctb_node->u1_num_top_avail = 0;
+ }
+
+ if(ps_ctxt->u2_ctb_row_num != 0)
+ {
+ if(ps_ctxt->u2_ctb_num_in_row == (ps_frm_ctb_prms->i4_num_ctbs_horz - 1))
+ ps_ctb_node->u1_num_top_right_avail = 0;
+ else
+ {
+ ps_ctb_node->u1_num_top_right_avail =
+ ((u2_pic_wdt - ps_ctb_node->u2_orig_x - u1_ctb_size) >= u1_ctb_size)
+ ? u1_ctb_size
+ : u2_pic_wdt - ps_ctb_node->u2_orig_x - u1_ctb_size;
+ nbr_flags |=
+ TOP_RIGHT_FLAG; /* Set Top-Right Flag if not in first row or last column*/
+ }
+ }
+ else
+ {
+ ps_ctb_node->u1_num_top_right_avail = 0;
+ }
+
+ ps_ctb_node->u1_num_bottom_left_avail = 0;
+
+ ps_ctb_node->i4_nbr_flag = nbr_flags;
+
+ /**
+ * Update CTB Mode Map
+ * In case this is first CTB in a row, set left most column to INTRA_DC (NA)
+ * else copy last column to first column
+ **/
+ if(ctb_ctr == 0)
+ {
+ for(row = 0; row < (MAX_TU_ROW_IN_CTB + 1); row++)
+ {
+ ps_ctxt->au1_ctb_mode_map[row][0] = INTRA_DC;
+ }
+ }
+ else
+ {
+ for(row = 0; row < (MAX_TU_ROW_IN_CTB + 1); row++)
+ {
+ ps_ctxt->au1_ctb_mode_map[row][0] =
+ ps_ctxt->au1_ctb_mode_map[row][MAX_TU_COL_IN_CTB];
+ }
+ }
+
+ /* --------- IPE call at CTB level ------------------ */
+
+ /* IPE CTB function is expected to Decide on the CUs sizes */
+ /* and populate the best intra prediction modes and TX flags*/
+ /* Interface of this CTb level function is kept open */
+
+ ps_ed_l1_ctb = ps_ed_l1_row + ctb_ctr * blk_inc_ctb_l1;
+ ps_ed_l2_ctb = ps_ed_l2_row + ctb_ctr * blk_inc_ctb_l2;
+ ps_ed_ctb_l1 = ps_ed_ctb_l1_row + ctb_ctr;
+
+ if(ps_ctxt->u1_use_lambda_derived_from_min_8x8_act_in_ctb)
+ {
+ /*HACK : MAMATHA, This function assumes that data is accumalated
+ for all probable CU-TU combinations for INcomplete CTB, which is currently not the case,
+ hence not recomputing lamda for the incomplete CTB */
+ if((ps_ctb_node->u1_width == u1_ctb_size) && (ps_ctb_node->u1_height == u1_ctb_size))
+ {
+ ihevce_ipe_recompute_lambda_from_min_8x8_act_in_ctb(ps_ctxt, ps_ed_ctb_l1);
+ }
+ }
+
+ ihevce_ipe_process_ctb(
+ ps_ctxt,
+ ps_frm_ctb_prms,
+ &s_curr_src_bufs,
+ ps_ctb_node,
+ ps_l0_ipe_out_ctb,
+ ps_ctb_out,
+ //ps_row_cu,
+ ps_ed_l1_ctb,
+ ps_ed_l2_ctb,
+ ps_ed_ctb_l1);
+
+ /* -------------- ctb level updates ----------------- */
+
+ ps_l0_ipe_out_ctb++;
+ //num_cus_in_ctb = ps_ctb_out->u1_num_cus_in_ctb;
+
+ //ps_row_cu += num_cus_in_ctb;
+
+ ps_ctb_out++;
+ }
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_ipe_process \endif
+*
+* \brief
+* Frame level IPE function
+*
+* \param[in] pv_ctxt : pointer to IPE module
+* \param[in] ps_frm_ctb_prms : CTB characteristics parameters
+* \param[in] ps_inp : pointer to input yuv buffer (frame buffer)
+* \param[out] ps_ctb_out : pointer to CTB analyse output structure (frame buffer)
+* \param[out] ps_cu_out : pointer to CU analyse output structure (frame buffer)
+*
+* \return
+* None
+*
+* Note : Currently the frame level calculations done assumes that
+* framewidth of the input are excat multiple of ctbsize
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_ipe_process(
+ void *pv_ctxt,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ frm_lambda_ctxt_t *ps_frm_lamda,
+ ihevce_lap_enc_buf_t *ps_curr_inp,
+ pre_enc_L0_ipe_encloop_ctxt_t *ps_L0_IPE_curr_out_pre_enc,
+ ctb_analyse_t *ps_ctb_out,
+ //cu_analyse_t *ps_cu_out,
+ ipe_l0_ctb_analyse_for_me_t *ps_ipe_ctb_out,
+ void *pv_multi_thrd_ctxt,
+ WORD32 slice_type,
+ ihevce_ed_blk_t *ps_ed_pic_l1,
+ ihevce_ed_blk_t *ps_ed_pic_l2,
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_pic,
+ WORD32 thrd_id,
+ WORD32 i4_ping_pong)
+{
+ /* local variables */
+ ihevce_ipe_master_ctxt_t *ps_master_ctxt;
+ iv_enc_yuv_buf_t *ps_inp = &ps_curr_inp->s_lap_out.s_input_buf;
+ ihevce_ipe_ctxt_t *ps_ctxt;
+ iv_enc_yuv_buf_t s_curr_src_bufs;
+ WORD32 end_of_frame;
+
+ ihevce_ed_blk_t *ps_ed_l1_row;
+ ihevce_ed_blk_t *ps_ed_l2_row;
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_row;
+ WORD32 blk_inc_ctb_l1 = 0;
+ WORD32 blk_inc_ctb_l2 = 0;
+
+ /* Layer 1 pre intra analysis related initilization.
+ * Compute no of 8x8 blks in the ctb which which is
+ * same as no of 4x4 blks in the ctb in layer 1 */
+ blk_inc_ctb_l1 = ps_frm_ctb_prms->i4_ctb_size >> 3;
+ blk_inc_ctb_l1 = blk_inc_ctb_l1 * blk_inc_ctb_l1;
+
+ /* Layer 2 pre intra analysis related initilization.
+ * Compute no of 16x16 blks in the ctb which which is
+ * same as no of 8x8 blks in the ctb in layer 2 */
+ blk_inc_ctb_l2 = ps_frm_ctb_prms->i4_ctb_size >> 4;
+ blk_inc_ctb_l2 = blk_inc_ctb_l2 * blk_inc_ctb_l2;
+
+ /* ----------------------------------------------------- */
+ /* store the stride and dimensions of source */
+ /* buffer pointers will be over written at every CTB row */
+ /* ----------------------------------------------------- */
+ memcpy(&s_curr_src_bufs, ps_inp, sizeof(iv_enc_yuv_buf_t));
+
+ ps_master_ctxt = (ihevce_ipe_master_ctxt_t *)pv_ctxt;
+ ps_ctxt = ps_master_ctxt->aps_ipe_thrd_ctxt[thrd_id];
+ end_of_frame = 0;
+
+ if(ISLICE == slice_type)
+ {
+ ps_ctxt->b_sad_type = IPE_SAD_TYPE;
+ ps_ctxt->i4_ol_satd_lambda = ps_frm_lamda->i4_ol_satd_lambda_qf;
+ ps_ctxt->i4_ol_sad_lambda = ps_frm_lamda->i4_ol_sad_lambda_qf;
+ }
+ else
+ {
+ ps_ctxt->b_sad_type = IPE_SAD_TYPE; /* SAD */
+ ps_ctxt->i4_ol_satd_lambda = ps_frm_lamda->i4_ol_satd_lambda_qf;
+ ps_ctxt->i4_ol_sad_lambda = ps_frm_lamda->i4_ol_sad_lambda_qf;
+ }
+
+ ihevce_populate_ipe_ol_cu_lambda_prms(
+ (void *)ps_ctxt,
+ ps_frm_lamda,
+ slice_type,
+ ps_curr_inp->s_lap_out.i4_temporal_lyr_id,
+ IPE_LAMBDA_TYPE);
+
+ /* register the slice type in the ctxt */
+ ps_ctxt->i4_slice_type = slice_type;
+
+ /** Frame-levelSATD cost accumalator init to 0 */
+ ps_ctxt->i8_frame_acc_satd_cost = 0;
+
+ /** Frame-levelSATD accumalator init to 0 */
+ ps_ctxt->i8_frame_acc_satd = 0;
+
+ /** Frame-level Activity factor accumalator init to 1 */
+ ps_ctxt->i8_frame_acc_act_factor = 1;
+
+ /** Frame-levelMode Bits cost accumalator init to 0 */
+ ps_ctxt->i8_frame_acc_mode_bits_cost = 0;
+
+ /** Frame -level SATD/qp acc init to 0*/
+ ps_ctxt->i8_frame_acc_satd_by_modqp_q10 = 0;
+
+ /* ------------ Loop over all the CTB rows --------------- */
+ while(0 == end_of_frame)
+ {
+ UWORD8 *pu1_tmp;
+ WORD32 vert_ctr;
+ //cu_analyse_t *ps_row_cu;
+ ctb_analyse_t *ps_ctb_out_row;
+ job_queue_t *ps_job;
+ ipe_l0_ctb_analyse_for_me_t *ps_ipe_ctb_out_row;
+
+ /* Get the current row from the job queue */
+ ps_job = (job_queue_t *)ihevce_pre_enc_grp_get_next_job(
+ pv_multi_thrd_ctxt, IPE_JOB_LYR0, 1, i4_ping_pong);
+
+ /* If all rows are done, set the end of process flag to 1, */
+ /* and the current row to -1 */
+ if(NULL == ps_job)
+ {
+ vert_ctr = -1;
+ end_of_frame = 1;
+ }
+ else
+ {
+ ASSERT(IPE_JOB_LYR0 == ps_job->i4_pre_enc_task_type);
+
+ /* Obtain the current row's details from the job */
+ vert_ctr = ps_job->s_job_info.s_ipe_job_info.i4_ctb_row_no;
+ //DBG_PRINTF("IPE PASS : Thread id %d, Vert Ctr %d\n",thrd_id,vert_ctr);
+
+ /* Update the ipe context with current row number */
+ ps_ctxt->u2_ctb_row_num = vert_ctr;
+
+ /* derive the current ctb row pointers */
+
+ /* luma src */
+ pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf;
+ pu1_tmp += (vert_ctr * ps_frm_ctb_prms->i4_ctb_size * ps_inp->i4_y_strd);
+
+ s_curr_src_bufs.pv_y_buf = pu1_tmp;
+
+ /* Cb & CR pixel interleaved src */
+ pu1_tmp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf;
+ pu1_tmp += (vert_ctr * (ps_frm_ctb_prms->i4_ctb_size >> 1) * ps_inp->i4_uv_strd);
+
+ s_curr_src_bufs.pv_u_buf = pu1_tmp;
+
+ /* row intra analyse cost buffer */
+ ps_ipe_ctb_out_row = ps_ipe_ctb_out + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
+
+ /* row ctb out structure */
+ ps_ctb_out_row = ps_ctb_out + vert_ctr * ps_frm_ctb_prms->i4_num_ctbs_horz;
+
+ /* call the row level processing function */
+ ps_ed_l1_row =
+ ps_ed_pic_l1 + ps_frm_ctb_prms->i4_num_ctbs_horz * blk_inc_ctb_l1 * vert_ctr;
+ ps_ed_l2_row =
+ ps_ed_pic_l2 + ps_frm_ctb_prms->i4_num_ctbs_horz * blk_inc_ctb_l2 * vert_ctr;
+ ps_ed_ctb_l1_row = ps_ed_ctb_l1_pic + ps_frm_ctb_prms->i4_num_ctbs_horz * vert_ctr;
+ ihevce_ipe_process_row(
+ ps_ctxt,
+ ps_frm_ctb_prms,
+ &s_curr_src_bufs,
+ ps_ipe_ctb_out_row,
+ ps_ctb_out_row,
+ //ps_row_cu,
+ ps_ed_l1_row,
+ ps_ed_l2_row,
+ ps_ed_ctb_l1_row,
+ blk_inc_ctb_l1,
+ blk_inc_ctb_l2);
+
+ memset(
+ ps_ed_l1_row,
+ 0,
+ ps_frm_ctb_prms->i4_num_ctbs_horz * blk_inc_ctb_l1 * sizeof(ihevce_ed_blk_t));
+ memset(
+ ps_ed_l2_row,
+ 0,
+ ps_frm_ctb_prms->i4_num_ctbs_horz * blk_inc_ctb_l2 * sizeof(ihevce_ed_blk_t));
+
+ /* set the output dependency */
+ ihevce_pre_enc_grp_job_set_out_dep(pv_multi_thrd_ctxt, ps_job, i4_ping_pong);
+ }
+ }
+
+ /* EIID: Print stat regarding how many 16x16 blocks are skipped in the frame, valid for single thread only */
+ //DBG_PRINTF("num_16x16_analyze_skipped: %d\n",ps_ctxt->u4_num_16x16_skips_at_L0_IPE);
+
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_get_frame_lambda_prms \endif
+*
+* \brief
+* Function whihc calculates the Lambda params for current picture
+*
+* \param[in] ps_enc_ctxt : encoder ctxt pointer
+* \param[in] ps_cur_pic_ctxt : current pic ctxt
+* \param[in] i4_cur_frame_qp : current pic QP
+* \param[in] first_field : is first field flag
+* \param[in] i4_temporal_lyr_id : Current picture layer id
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_get_ipe_ol_cu_lambda_prms(void *pv_ctxt, WORD32 i4_cur_cu_qp)
+{
+ ihevce_ipe_ctxt_t *ps_ctxt = (ihevce_ipe_ctxt_t *)pv_ctxt;
+ //WORD32 chroma_qp = gau1_ihevc_chroma_qp_scale[i4_cur_cu_qp];
+
+ /* Store the params for IPE pass */
+ ps_ctxt->i4_ol_satd_lambda = ps_ctxt->i4_ol_satd_lambda_qf_array[i4_cur_cu_qp];
+ ps_ctxt->i4_ol_sad_lambda = ps_ctxt->i4_ol_sad_lambda_qf_array[i4_cur_cu_qp];
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_get_frame_lambda_prms \endif
+*
+* \brief
+* Function whihc calculates the Lambda params for current picture
+*
+* \param[in] ps_enc_ctxt : encoder ctxt pointer
+* \param[in] ps_cur_pic_ctxt : current pic ctxt
+* \param[in] i4_cur_frame_qp : current pic QP
+* \param[in] first_field : is first field flag
+* \param[in] i4_temporal_lyr_id : Current picture layer id
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_populate_ipe_ol_cu_lambda_prms(
+ void *pv_ctxt,
+ frm_lambda_ctxt_t *ps_frm_lamda,
+ WORD32 i4_slice_type,
+ WORD32 i4_temporal_lyr_id,
+ WORD32 i4_lambda_type)
+{
+ WORD32 i4_curr_cu_qp;
+ double lambda_modifier;
+ double lambda_uv_modifier;
+ double lambda;
+ double lambda_uv;
+
+ ihevce_ipe_ctxt_t *ps_ctxt = (ihevce_ipe_ctxt_t *)pv_ctxt;
+
+ WORD32 i4_qp_bd_offset = 6 * (ps_ctxt->u1_bit_depth - 8);
+
+ for(i4_curr_cu_qp =
+ ps_ctxt->ps_rc_quant_ctxt->i2_min_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
+ i4_curr_cu_qp <= ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
+ i4_curr_cu_qp++)
+ {
+ WORD32 chroma_qp = i4_curr_cu_qp;
+
+ if((BSLICE == i4_slice_type) && (i4_temporal_lyr_id))
+ {
+ lambda_modifier = ps_frm_lamda->lambda_modifier *
+ CLIP3((((double)(i4_curr_cu_qp - 12)) / 6.0), 2.00, 4.00);
+ lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier *
+ CLIP3((((double)(chroma_qp - 12)) / 6.0), 2.00, 4.00);
+ }
+ else
+ {
+ lambda_modifier = ps_frm_lamda->lambda_modifier;
+ lambda_uv_modifier = ps_frm_lamda->lambda_uv_modifier;
+ }
+ if(ps_ctxt->i4_use_const_lamda_modifier)
+ {
+ if(ISLICE == i4_slice_type)
+ {
+ lambda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
+ lambda_uv_modifier = ps_ctxt->f_i_pic_lamda_modifier;
+ }
+ else
+ {
+ lambda_modifier = CONST_LAMDA_MOD_VAL;
+ lambda_uv_modifier = CONST_LAMDA_MOD_VAL;
+ }
+ }
+
+ switch(i4_lambda_type)
+ {
+ case 0:
+ {
+ i4_qp_bd_offset = 0;
+
+ lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bd_offset - 12)) / 3.0));
+
+ lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bd_offset - 12)) / 3.0));
+
+ lambda *= lambda_modifier;
+ lambda_uv *= lambda_uv_modifier;
+ if(ps_ctxt->i4_use_const_lamda_modifier)
+ {
+ ps_ctxt->i4_ol_sad_lambda_qf_array[i4_curr_cu_qp] =
+ (WORD32)((sqrt(lambda)) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_ctxt->i4_ol_satd_lambda_qf_array[i4_curr_cu_qp] =
+ (WORD32)((sqrt(lambda)) * (1 << LAMBDA_Q_SHIFT));
+ }
+ else
+ {
+ ps_ctxt->i4_ol_sad_lambda_qf_array[i4_curr_cu_qp] =
+ (WORD32)((sqrt(lambda) / 2) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_ctxt->i4_ol_satd_lambda_qf_array[i4_curr_cu_qp] =
+ (WORD32)((sqrt(lambda * 1.9) / 2) * (1 << LAMBDA_Q_SHIFT));
+ }
+
+ ps_ctxt->i4_ol_sad_type2_lambda_qf_array[i4_curr_cu_qp] =
+ ps_ctxt->i4_ol_sad_lambda_qf_array[i4_curr_cu_qp];
+
+ ps_ctxt->i4_ol_satd_type2_lambda_qf_array[i4_curr_cu_qp] =
+ ps_ctxt->i4_ol_satd_lambda_qf_array[i4_curr_cu_qp];
+
+ break;
+ }
+ case 1:
+ {
+ ASSERT(0); /* should not enter the path for IPE*/
+ lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bd_offset - 12)) / 3.0));
+
+ lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bd_offset - 12)) / 3.0));
+
+ lambda *= lambda_modifier;
+ lambda_uv *= lambda_uv_modifier;
+ if(ps_ctxt->i4_use_const_lamda_modifier)
+ {
+ ps_ctxt->i4_ol_sad_lambda_qf_array[i4_curr_cu_qp] =
+ (WORD32)((sqrt(lambda)) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_ctxt->i4_ol_satd_lambda_qf_array[i4_curr_cu_qp] =
+ (WORD32)((sqrt(lambda)) * (1 << LAMBDA_Q_SHIFT));
+ }
+ else
+ {
+ ps_ctxt->i4_ol_sad_lambda_qf_array[i4_curr_cu_qp] =
+ (WORD32)((sqrt(lambda) / 2) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_ctxt->i4_ol_satd_lambda_qf_array[i4_curr_cu_qp] =
+ (WORD32)((sqrt(lambda * 1.9) / 2) * (1 << LAMBDA_Q_SHIFT));
+ }
+
+ ps_ctxt->i4_ol_sad_type2_lambda_qf_array[i4_curr_cu_qp] =
+ ps_ctxt->i4_ol_sad_lambda_qf_array[i4_curr_cu_qp];
+
+ ps_ctxt->i4_ol_satd_type2_lambda_qf_array[i4_curr_cu_qp] =
+ ps_ctxt->i4_ol_satd_lambda_qf_array[i4_curr_cu_qp];
+
+ break;
+ }
+ case 2:
+ {
+ ASSERT(0); /* should not enter the path for IPE*/
+ lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bd_offset - 12)) / 3.0));
+
+ lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bd_offset - 12)) / 3.0));
+
+ lambda *= lambda_modifier;
+ lambda_uv *= lambda_uv_modifier;
+ if(ps_ctxt->i4_use_const_lamda_modifier)
+ {
+ ps_ctxt->i4_ol_sad_lambda_qf_array[i4_curr_cu_qp] =
+ (WORD32)((sqrt(lambda)) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_ctxt->i4_ol_satd_lambda_qf_array[i4_curr_cu_qp] =
+ (WORD32)((sqrt(lambda)) * (1 << LAMBDA_Q_SHIFT));
+ }
+ else
+ {
+ ps_ctxt->i4_ol_sad_lambda_qf_array[i4_curr_cu_qp] =
+ (WORD32)((sqrt(lambda) / 2) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_ctxt->i4_ol_satd_lambda_qf_array[i4_curr_cu_qp] =
+ (WORD32)((sqrt(lambda * 1.9) / 2) * (1 << LAMBDA_Q_SHIFT));
+ }
+ i4_qp_bd_offset = 0;
+
+ lambda = pow(2.0, (((double)(i4_curr_cu_qp + i4_qp_bd_offset - 12)) / 3.0));
+
+ lambda_uv = pow(2.0, (((double)(chroma_qp + i4_qp_bd_offset - 12)) / 3.0));
+
+ lambda *= lambda_modifier;
+ lambda_uv *= lambda_uv_modifier;
+ if(ps_ctxt->i4_use_const_lamda_modifier)
+ {
+ ps_ctxt->i4_ol_sad_type2_lambda_qf_array[i4_curr_cu_qp] =
+ (WORD32)((sqrt(lambda)) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_ctxt->i4_ol_satd_type2_lambda_qf_array[i4_curr_cu_qp] =
+ (WORD32)((sqrt(lambda)) * (1 << LAMBDA_Q_SHIFT));
+ }
+ else
+ {
+ ps_ctxt->i4_ol_sad_type2_lambda_qf_array[i4_curr_cu_qp] =
+ (WORD32)((sqrt(lambda) / 2) * (1 << LAMBDA_Q_SHIFT));
+
+ ps_ctxt->i4_ol_satd_type2_lambda_qf_array[i4_curr_cu_qp] =
+ (WORD32)((sqrt(lambda * 1.9) / 2) * (1 << LAMBDA_Q_SHIFT));
+ }
+ break;
+ }
+ default:
+ {
+ /* Intended to be a barren wasteland! */
+ ASSERT(0);
+ }
+ }
+ }
+}
+
+#define ME_COST_THRSHOLD 7
+/*!
+******************************************************************************
+* \if Function name : ihevce_get_frame_lambda_prms \endif
+*
+* \brief
+* Function whihc calculates the Lambda params for current picture
+*
+* \param[in] ps_enc_ctxt : encoder ctxt pointer
+* \param[in] ps_cur_pic_ctxt : current pic ctxt
+* \param[in] i4_cur_frame_qp : current pic QP
+* \param[in] first_field : is first field flag
+* \param[in] i4_temporal_lyr_id : Current picture layer id
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+#define MAX_64BIT_VAL 0x7fffffffffffffff
+void ihevce_populate_ipe_frame_init(
+ void *pv_ctxt,
+ ihevce_static_cfg_params_t *ps_stat_prms,
+ WORD32 i4_curr_frm_qp,
+ WORD32 i4_slice_type,
+ WORD32 i4_thrd_id,
+ pre_enc_me_ctxt_t *ps_curr_out,
+ WORD8 i1_cu_qp_delta_enabled_flag,
+ rc_quant_t *ps_rc_quant_ctxt,
+ WORD32 i4_quality_preset,
+ WORD32 i4_temporal_lyr_id,
+ ihevce_lap_output_params_t *ps_lap_out)
+{
+ ihevce_ipe_master_ctxt_t *ps_master_ctxt = (ihevce_ipe_master_ctxt_t *)pv_ctxt;
+ WORD32 i4_i;
+ WORD32 ai4_mod_factor_num[2];
+
+ ihevce_ipe_ctxt_t *ps_ctxt = ps_master_ctxt->aps_ipe_thrd_ctxt[i4_thrd_id];
+ ps_ctxt->i4_hevc_qp = i4_curr_frm_qp;
+ ps_ctxt->i4_quality_preset = i4_quality_preset;
+ ps_ctxt->i4_temporal_lyr_id = i4_temporal_lyr_id;
+ ps_ctxt->ps_rc_quant_ctxt = ps_rc_quant_ctxt;
+ ps_ctxt->i4_qscale =
+ ps_ctxt->ps_rc_quant_ctxt
+ ->pi4_qp_to_qscale[i4_curr_frm_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
+
+ ps_ctxt->i4_frm_qp = i4_curr_frm_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
+ ps_ctxt->i4_slice_type = i4_slice_type; //EIID
+ ps_ctxt->i4_temporal_layer = ps_lap_out->i4_temporal_lyr_id;
+ ps_ctxt->i4_is_ref_pic = ps_lap_out->i4_is_ref_pic;
+ ps_ctxt->u4_num_16x16_skips_at_L0_IPE = 0;
+ ps_ctxt->i4_use_const_lamda_modifier = USE_CONSTANT_LAMBDA_MODIFIER;
+ ps_ctxt->i4_use_const_lamda_modifier =
+ ps_ctxt->i4_use_const_lamda_modifier ||
+ ((ps_stat_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
+ ((ps_stat_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)) ||
+ (ps_stat_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1)) ||
+ (ps_stat_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_2)) ||
+ (ps_stat_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_3))));
+ {
+ ps_ctxt->f_i_pic_lamda_modifier = ps_lap_out->f_i_pic_lamda_modifier;
+ }
+#if POW_OPT
+ for(i4_i = 0; i4_i < 2; i4_i++)
+ {
+ ps_ctxt->ld_curr_frame_8x8_log_avg[i4_i] = ps_curr_out->ld_curr_frame_8x8_log_avg[i4_i];
+ ps_ctxt->ld_curr_frame_16x16_log_avg[i4_i] = ps_curr_out->ld_curr_frame_16x16_log_avg[i4_i];
+ ps_ctxt->ld_curr_frame_32x32_log_avg[i4_i] = ps_curr_out->ld_curr_frame_32x32_log_avg[i4_i];
+ }
+
+ ps_ctxt->ld_curr_frame_16x16_log_avg[2] = ps_curr_out->ld_curr_frame_16x16_log_avg[2];
+ ps_ctxt->ld_curr_frame_32x32_log_avg[2] = ps_curr_out->ld_curr_frame_32x32_log_avg[2];
+ ps_ctxt->i8_curr_frame_avg_mean_act = ps_curr_out->i8_curr_frame_avg_mean_act;
+#else
+ for(i4_i = 0; i4_i < 2; i4_i++)
+ {
+ ps_ctxt->i8_curr_frame_8x8_avg_act[i4_i] = ps_curr_out->i8_curr_frame_8x8_avg_act[i4_i];
+ ps_ctxt->i8_curr_frame_16x16_avg_act[i4_i] = ps_curr_out->i8_curr_frame_16x16_avg_act[i4_i];
+ ps_ctxt->i8_curr_frame_32x32_avg_act[i4_i] = ps_curr_out->i8_curr_frame_32x32_avg_act[i4_i];
+ }
+
+ ps_ctxt->i8_curr_frame_16x16_avg_act[2] = ps_curr_out->i8_curr_frame_16x16_avg_act[2];
+ ps_ctxt->i8_curr_frame_32x32_avg_act[2] = ps_curr_out->i8_curr_frame_32x32_avg_act[2];
+#endif
+
+ ps_ctxt->pi2_trans_out =
+ (WORD16 *)&ps_ctxt->au1_pred_samples[0]; //overlaying trans coeff memory with pred_samples
+ ps_ctxt->pi2_trans_tmp = (WORD16 *)&ps_ctxt->au1_pred_samples[2048];
+
+ /*Mod factor NUM */
+ ps_ctxt->ai4_mod_factor_derived_by_variance[0] =
+ ps_curr_out->ai4_mod_factor_derived_by_variance[0];
+ ps_ctxt->ai4_mod_factor_derived_by_variance[1] =
+ ps_curr_out->ai4_mod_factor_derived_by_variance[1];
+
+ ps_ctxt->f_strength = ps_curr_out->f_strength;
+
+ if(ps_stat_prms->s_coding_tools_prms.i4_vqet & (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER))
+ {
+ if(ps_stat_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION))
+ {
+ ps_ctxt->i4_enable_noise_detection = 1;
+ }
+ else
+ {
+ ps_ctxt->i4_enable_noise_detection = 0;
+ }
+ }
+ else
+ {
+ ps_ctxt->i4_enable_noise_detection = 0;
+ }
+
+ {
+ if(ISLICE == ps_ctxt->i4_slice_type)
+ {
+ ai4_mod_factor_num[0] = INTRA_QP_MOD_FACTOR_NUM; //16;
+ ai4_mod_factor_num[1] = INTRA_QP_MOD_FACTOR_NUM; //16;
+ }
+ else
+ {
+ ai4_mod_factor_num[0] = INTER_QP_MOD_FACTOR_NUM; //4;
+ ai4_mod_factor_num[1] = INTER_QP_MOD_FACTOR_NUM; //4;
+ }
+
+#if ENABLE_QP_MOD_BASED_ON_SPATIAL_VARIANCE
+ for(i4_i = 0; i4_i < 2; i4_i++)
+ {
+ WORD32 mod_factor_num_val =
+ ps_ctxt->ai4_mod_factor_derived_by_variance[i4_i] * QP_MOD_FACTOR_DEN;
+
+ ai4_mod_factor_num[i4_i] = CLIP3(mod_factor_num_val, 1, ai4_mod_factor_num[i4_i]);
+ ps_ctxt->ai4_mod_factor_derived_by_variance[i4_i] = ai4_mod_factor_num[i4_i];
+ }
+#else
+ for(i4_i = 0; i4_i < 2; i4_i++)
+ {
+ ps_ctxt->ai4_mod_factor_derived_by_variance[i4_i] = ai4_mod_factor_num[i4_i];
+ }
+#endif
+ }
+
+ ps_ctxt->u1_use_lambda_derived_from_min_8x8_act_in_ctb = MODULATE_LAMDA_WHEN_SPATIAL_MOD_ON &&
+ i1_cu_qp_delta_enabled_flag;
+
+ ps_ctxt->u1_use_satd = 1;
+ ps_ctxt->u1_level_1_refine_on = 1;
+ ps_ctxt->u1_disable_child_cu_decide = 0;
+
+#if !OLD_XTREME_SPEED
+ if(((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P5) ||
+ (ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)) &&
+ (ps_ctxt->i4_slice_type != ISLICE))
+ {
+ ps_ctxt->u1_use_satd = 0;
+ ps_ctxt->u1_level_1_refine_on = 1;
+ ps_ctxt->u1_disable_child_cu_decide = 0;
+ }
+
+#endif
+
+ if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P4) && (ps_ctxt->i4_slice_type != ISLICE))
+ ps_ctxt->u1_use_satd = 0;
+ if(ps_ctxt->i4_quality_preset > IHEVCE_QUALITY_P3)
+ ps_ctxt->u1_use_satd = 0;
+}
diff --git a/encoder/ihevce_ipe_pass.h b/encoder/ihevce_ipe_pass.h
new file mode 100644
index 0000000..c9624e5
--- /dev/null
+++ b/encoder/ihevce_ipe_pass.h
@@ -0,0 +1,171 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_ipe_pass.h
+*
+* \brief
+* This file contains interface defination of Encode loop pass function
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_IPE_PASS_H_
+#define _IHEVCE_IPE_PASS_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define IPE_TEST_DBG_L0 0
+#define IPE_TEST_DBG_L1 0
+#define IPE_TEST_DBG_L2 0
+#define IPE_TEST_DBG_L3 0
+#define IPE_TEST_DBG_L4 0
+
+#define IPE_ME_DBG_L0 0
+#define IPE_ME_DBG_L1 0
+
+#define INTRA_PART_DBG 0 // Dump Debug Information related to intra partitioning
+
+#define INTRA_NON_CTB_PIC_DBG 0
+
+#define IPE_MODE_MAP_DBG 0
+
+#define FAST_INTRA_8421_MODES_ENABLE 1
+
+#define FAST_PART_WITH_OPTION_4 1
+
+#define IPE_SAD_TYPE 0 /* 0 => Hadamard SAD, 1 => full SAD */
+#define IPE_STEP_SIZE 1 /* Intra Prediction Mode Step Size During Analysis */
+#define LAMBDA_DIV_FACTOR 1
+
+/*satd/q_scale is accumualted cu level*/
+#define SATD_BY_ACT_Q_FAC 10
+
+/** defines the ratio of bits generated per cabac bin in Q8 format */
+#define CABAC_BITS_PER_BIN 192
+
+/** define modulation factor for qp modulation */
+#define INTRA_QP_MOD_FACTOR_NUM 16
+#define INTER_QP_MOD_FACTOR_NUM 4
+#define QP_MOD_FACTOR_DEN 2
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+typedef enum
+{
+ CU_1TU = 0,
+ CU_4TU,
+ SUB_CU_1TU,
+ SUB_CU_4TU
+} IPE_CU_TU_SPLIT_PATTERN;
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+WORD32 ihevce_ipe_get_num_mem_recs(void);
+
+WORD32
+ ihevce_ipe_get_mem_recs(iv_mem_rec_t *ps_mem_tab, WORD32 i4_num_proc_thrds, WORD32 i4_mem_space);
+
+void *ihevce_ipe_init(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ WORD32 i4_num_proc_thrds,
+ WORD32 i4_ref_id,
+ func_selector_t *ps_func_selector,
+ rc_quant_t *ps_rc_quant_ctxt,
+ WORD32 i4_resolution_id,
+ UWORD8 u1_is_popcnt_available);
+
+void ihevce_intra_pred_ref_filtering(UWORD8 *pu1_src, WORD32 nt, UWORD8 *pu1_dst);
+void ihevce_intra_pred_ref_filtering(UWORD8 *pu1_src, WORD32 nt, UWORD8 *pu1_dst);
+
+UWORD32 ihevce_ipe_pass_satd(WORD16 *pi2_coeff, WORD32 coeff_stride, WORD32 trans_size);
+
+void ihevce_ipe_process(
+ void *pv_ctxt,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ frm_lambda_ctxt_t *ps_frm_lamda,
+ ihevce_lap_enc_buf_t *ps_curr_inp,
+ pre_enc_L0_ipe_encloop_ctxt_t *ps_L0_IPE_curr_out_pre_enc,
+ ctb_analyse_t *ps_ctb_out,
+ //cu_analyse_t *ps_cu_out,
+ ipe_l0_ctb_analyse_for_me_t *ps_ipe_ctb_out,
+ void *pv_multi_thrd_ctxt,
+ WORD32 slice_type,
+ ihevce_ed_blk_t *ps_ed_pic_l1,
+ ihevce_ed_blk_t *ps_ed_pic_l2,
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1_pic,
+ WORD32 thrd_id,
+ WORD32 i4_ping_pong);
+
+void ihevce_populate_ipe_ol_cu_lambda_prms(
+ void *pv_ctxt,
+ frm_lambda_ctxt_t *ps_frm_lamda,
+ WORD32 i4_slice_type,
+ WORD32 i4_temporal_lyr_id,
+ WORD32 i4_lambda_type);
+
+void ihevce_get_ipe_ol_cu_lambda_prms(void *pv_ctxt, WORD32 i4_cur_cu_qp);
+
+void ihevce_populate_ipe_frame_init(
+ void *pv_ctxt,
+ ihevce_static_cfg_params_t *ps_stat_prms,
+ WORD32 i4_curr_frm_qp,
+ WORD32 i4_slice_type,
+ WORD32 i4_thrd_id,
+ pre_enc_me_ctxt_t *ps_curr_out,
+ WORD8 i1_cu_qp_delta_enabled_flag,
+ rc_quant_t *ps_rc_quant_ctxt,
+ WORD32 i4_quality_preset,
+ WORD32 i4_temporal_lyr_id,
+ ihevce_lap_output_params_t *ps_lap_out);
+
+LWORD64 ihevce_ipe_get_frame_intra_satd_cost(
+ void *pv_ctxt,
+ LWORD64 *pi8_frame_satd_by_qpmod,
+ LWORD64 *pi8_frame_acc_mode_bits_cost,
+ LWORD64 *pi8_frame_acc_activity_factor,
+ LWORD64 *pi8_frame_l0_acc_satd);
+#endif /* _IHEVCE_IPE_PASS_H_ */
diff --git a/encoder/ihevce_ipe_structs.h b/encoder/ihevce_ipe_structs.h
new file mode 100644
index 0000000..0b7acd9
--- /dev/null
+++ b/encoder/ihevce_ipe_structs.h
@@ -0,0 +1,494 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_ipe_structs.h
+*
+* \brief
+* This file contains strcutures of ipe pass
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_IPE_STRUCTS_H_
+#define _IHEVCE_IPE_STRUCTS_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define MAX_FAST_IP_MODES 23
+#define NUM_INTRA_RDOPT_MODES 1
+#if 1 // FAST_PART_WITH_OPTION_4
+#define MAX_TREE_NODES \
+ ((MAX_CTB_SIZE == MIN_TU_SIZE) \
+ ? 1 \
+ : (MAX_CTB_SIZE == (MIN_TU_SIZE << 1) \
+ ? 5 \
+ : (MAX_CTB_SIZE == (MIN_TU_SIZE << 2) \
+ ? 21 \
+ : (MAX_CTB_SIZE == (MIN_TU_SIZE << 3) ? 37 : 53))))
+#else // FAST_PART_WITH_OPTION_4
+#define MAX_TREE_NODES \
+ ((MAX_CTB_SIZE == MIN_TU_SIZE) \
+ ? 1 \
+ : (MAX_CTB_SIZE == (MIN_TU_SIZE << 1) \
+ ? 5 \
+ : (MAX_CTB_SIZE == (MIN_TU_SIZE << 2) \
+ ? 9 \
+ : (MAX_CTB_SIZE == (MIN_TU_SIZE << 3) ? 13 : 17))))
+#endif // FAST_PART_WITH_OPTION_4
+#define BOTTOM_LEFT_FLAG 0x0000000F
+#define LEFT_FLAG 0x000000F0
+#define TOP_LEFT_FLAG 0x00010000
+#define TOP_FLAG 0x00000F00
+#define TOP_RIGHT_FLAG 0x0000F000
+#define MAX_UWORD8 0xFF
+#define MAX_DOUBLE 1.7e+308 ///< max. value of double-type value
+#define MAX_INTRA_COST_IPE 0x0F7F7F7F
+
+#define MAX_TU_ROW_IN_CTB (MAX_CTB_SIZE >> 2)
+#define MAX_TU_COL_IN_CTB (MAX_CTB_SIZE >> 2)
+
+#define BIT_DEPTH 8
+
+#define FAST_PARTITION_WITH_TRANSFORM 1
+
+#define IHEVCE_INTRA_REF_FILTERING C
+#define IHEVCE_INTRA_LUMA_REF_SUBSTITUTION C
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+/** /breif 4x4 DST, 4x4, 8x8, 16x16, 32x32 */
+#define NUM_TRANS_TYPES 5
+#define INTRA_PLANAR 0
+#define INTRA_DC 1
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+#define INTRA_ANGULAR(x) (x)
+
+/** @breif max 30bit value */
+#define MAX30 ((1 << 30) - 1)
+
+/* @bried macro to clip a data to max of 30bits (assuming unsgined) */
+#define CLIP30(x) ((x) > MAX30 ? MAX30 : (x))
+
+/* @bried compute the (lambda * rate) with a qshift and clip result to 30bits */
+#define COMPUTE_RATE_COST_CLIP30(r, l, qshift) ((WORD32)CLIP30((ULWORD64)((r) * (l)) >> (qshift)))
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+typedef UWORD32 (*pf_res_trans_luma_had)(
+ UWORD8 *pu1_origin,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred_buf,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 size);
+
+typedef void (*pf_ipe_intra_pred)(
+ UWORD8 *pu1_ref, WORD32 src_strd, UWORD8 *pu1_dst, WORD32 dst_strd, WORD32 nt, WORD32 mode);
+
+typedef UWORD32 (*pf_ipe_res_trans)(
+ UWORD8 *pu1_src,
+ UWORD8 *pu1_pred,
+ WORD16 *pi2_tmp,
+ WORD16 *pi2_dst,
+ WORD32 src_strd,
+ WORD32 pred_strd,
+ WORD32 dst_strd,
+ WORD32 chroma_flag);
+
+typedef FT_CALC_HAD_SATD_8BIT *pf_ipe_res_trans_had;
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+typedef enum
+{
+
+ IPE_CTXT = 0,
+ IPE_THRDS_CTXT,
+
+ /* should be last entry */
+ NUM_IPE_MEM_RECS
+
+} IPE_MEM_TABS_T;
+
+typedef enum
+{
+ IPE_FUNC_MODE_0 = 0,
+ IPE_FUNC_MODE_1,
+ IPE_FUNC_MODE_2,
+ IPE_FUNC_MODE_3TO9,
+ IPE_FUNC_MODE_10,
+ IPE_FUNC_MODE_11TO17,
+ IPE_FUNC_MODE_18_34,
+ IPE_FUNC_MODE_19TO25,
+ IPE_FUNC_MODE_26,
+ IPE_FUNC_MODE_27TO33,
+
+ NUM_IPE_FUNCS
+
+} IPE_FUNCS_T;
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+/**
+******************************************************************************
+ * @brief IPE CTB to CU and TU Quadtree Recursive Structure
+******************************************************************************
+ */
+
+typedef struct ihevce_ipe_cu_tree_t ihevce_ipe_cu_tree_t;
+
+typedef struct ihevce_ipe_cu_tree_t
+{
+ /**
+ * Origin of current coding unit relative to top-left of CTB
+ */
+ UWORD16 u2_x0;
+
+ UWORD16 u2_y0;
+
+ /**
+ * Origin of current coding unit relative to top-left of Picture
+ */
+ UWORD16 u2_orig_x;
+
+ UWORD16 u2_orig_y;
+
+ /**
+ * Size of current coding unit in luma pixels
+ */
+ UWORD8 u1_cu_size;
+
+ UWORD8 u1_width;
+
+ UWORD8 u1_height;
+
+ UWORD8 u1_depth;
+
+ UWORD8 u1_part_flag_pos;
+
+ UWORD8 u1_log2_nt;
+
+ WORD32 i4_nbr_flag;
+
+ /**
+ * Recursive Bracketing Parameters
+ */
+ UWORD8 best_mode;
+
+ WORD32 best_satd;
+
+ WORD32 best_cost;
+
+ /**
+ * Number of pixels available in these neighbors
+ */
+ UWORD8 u1_num_left_avail;
+
+ UWORD8 u1_num_top_avail;
+
+ UWORD8 u1_num_top_right_avail;
+
+ UWORD8 u1_num_bottom_left_avail;
+
+ UWORD8 au1_best_mode_1tu[NUM_BEST_MODES];
+
+ WORD32 au4_best_cost_1tu[NUM_BEST_MODES];
+
+ UWORD8 au1_best_mode_4tu[NUM_BEST_MODES];
+
+ WORD32 au4_best_cost_4tu[NUM_BEST_MODES];
+
+ ihevce_ipe_cu_tree_t *ps_parent;
+
+ ihevce_ipe_cu_tree_t *ps_sub_cu[4];
+
+ /* best mode bits cost */
+ UWORD16 u2_mode_bits_cost;
+
+} ihevce_ipe_cu_tree_node_t;
+
+/**
+******************************************************************************
+ * @brief IPE module context memory
+******************************************************************************
+ */
+typedef struct
+{
+ ihevce_ipe_cu_tree_t *ps_ipe_cu_tree;
+
+ /* one parent and four children */
+ ihevce_ipe_cu_tree_t as_ipe_cu_tree[5];
+
+ UWORD8 au1_ctb_mode_map[MAX_TU_ROW_IN_CTB + 1][MAX_TU_COL_IN_CTB + 1];
+
+ UWORD8 au1_cand_mode_list[3];
+
+ /** Pointer to structure containing function pointers of common*/
+ func_selector_t *ps_func_selector;
+
+ /**
+ * CU level Qp / 6
+ */
+ WORD32 i4_cu_qp_div6;
+
+ /**
+ * CU level Qp % 6
+ */
+ WORD32 i4_cu_qp_mod6;
+
+ /** array of luma intra prediction function pointers */
+ pf_ipe_intra_pred apf_ipe_lum_ip[NUM_IPE_FUNCS];
+
+ /** array of function pointers for residual and
+ * forward transform for all transform sizes
+ */
+ pf_res_trans_luma apf_resd_trns[NUM_TRANS_TYPES];
+
+ /** array of function pointers for residual and
+ * forward transform for all transform sizes
+ */
+ pf_res_trans_luma_had apf_resd_trns_had[NUM_TRANS_TYPES];
+
+ /** array of pointer to store the scaling matrices for
+ * all transform sizes and qp % 6 (pre computed)
+ */
+ WORD16 *api2_scal_mat[NUM_TRANS_TYPES * 2];
+
+ /** array of pointer to store the re-scaling matrices for
+ * all transform sizes and qp % 6 (pre computed)
+ */
+ WORD16 *api2_rescal_mat[NUM_TRANS_TYPES * 2];
+
+ /** Qunatization rounding factor for inter and intra CUs */
+ WORD32 i4_quant_rnd_factor[2];
+
+ UWORD8 u1_ctb_size;
+
+ UWORD8 u1_min_cu_size;
+
+ UWORD8 u1_min_tu_size;
+
+ UWORD16 u2_ctb_row_num;
+
+ UWORD16 u2_ctb_num_in_row;
+
+ WORD8 i1_QP;
+
+ UWORD8 u1_num_b_frames;
+
+ UWORD8 b_sad_type;
+
+ UWORD8 u1_ipe_step_size;
+
+ WORD32 i4_ol_satd_lambda;
+
+ WORD32 i4_ol_sad_lambda;
+
+ UWORD8 au1_nbr_ctb_map[MAX_PU_IN_CTB_ROW + 1 + 8][MAX_PU_IN_CTB_ROW + 1 + 8];
+
+ /**
+ * Pointer to (1,1) location in au1_nbr_ctb_map
+ */
+ UWORD8 *pu1_ctb_nbr_map;
+
+ /**
+ * neigbour map buffer stride;
+ */
+ WORD32 i4_nbr_map_strd;
+
+ /** CTB neighbour availability flags */
+ nbr_avail_flags_t s_ctb_nbr_avail_flags;
+
+ /** Slice Type of the current picture being processed */
+ WORD32 i4_slice_type;
+
+ /** Temporal ID of the current picture being processed */
+ WORD32 i4_temporal_lyr_id;
+
+ WORD32 i4_ol_sad_lambda_qf_array[MAX_HEVC_QP_10bit + 1];
+ WORD32 i4_ol_satd_lambda_qf_array[MAX_HEVC_QP_10bit + 1];
+
+ /************************************************************************/
+ /* The fields with the string 'type2' in their names are required */
+ /* when both 8bit and hbd lambdas are needed. The lambdas corresponding */
+ /* to the bit_depth != internal_bit_depth are stored in these fields */
+ /************************************************************************/
+ WORD32 i4_ol_sad_type2_lambda_qf_array[MAX_HEVC_QP_10bit + 1];
+ WORD32 i4_ol_satd_type2_lambda_qf_array[MAX_HEVC_QP_10bit + 1];
+
+ /*Store the HEVC frame level qp for level modulation*/
+ WORD32 i4_hevc_qp;
+ /*Store the frame level qscale for level modulation*/
+ WORD32 i4_qscale;
+#if POW_OPT
+ /* Averge activity of 8x8 blocks from previous frame
+ * If L1, maps to 16*16 in L0
+ */
+ long double ld_curr_frame_8x8_log_avg[2];
+
+ /* Averge activity of 16x16 blocks from previous frame
+ * If L1, maps to 32*32 in L0
+ */
+ long double ld_curr_frame_16x16_log_avg[3];
+
+ /* Averge activity of 32x32 blocks from previous frame
+ * If L1, maps to 64*64 in L0
+ */
+ long double ld_curr_frame_32x32_log_avg[3];
+#else
+ /* Averge activity of 8x8 blocks from previous frame
+ * If L1, maps to 16*16 in L0
+ */
+ LWORD64 i8_curr_frame_8x8_avg_act[2];
+
+ /* Averge activity of 16x16 blocks from previous frame
+ * If L1, maps to 32*32 in L0
+ */
+ LWORD64 i8_curr_frame_16x16_avg_act[3];
+
+ /* Averge activity of 32x32 blocks from previous frame
+ * If L1, maps to 64*64 in L0
+ */
+ LWORD64 i8_curr_frame_32x32_avg_act[3];
+#endif
+ /** Frame-levelSATD cost accumalator */
+ LWORD64 i8_frame_acc_satd_cost;
+
+ /** Frame-levelSATD accumalator */
+ LWORD64 i8_frame_acc_satd;
+
+ /** Frame-level activity factor for CU 8x8 accumalator */
+ LWORD64 i8_frame_acc_act_factor;
+
+ /** Frame-level Mode Bits cost accumalator */
+ LWORD64 i8_frame_acc_mode_bits_cost;
+
+ /** Encoder quality preset : See IHEVCE_QUALITY_CONFIG_T for presets */
+ WORD32 i4_quality_preset;
+
+ /** Frame-level SATD/qp accumulator in q10 format*/
+ LWORD64 i8_frame_acc_satd_by_modqp_q10;
+
+ /** For testing EIID only. */
+ UWORD32 u4_num_16x16_skips_at_L0_IPE;
+
+ /** Reference sample array. Used as local variable in mode_eval_filtering */
+ UWORD8 au1_ref_samples[1028];
+ /** filtered reference sample array. Used as local variable in mode_eval_filtering */
+ UWORD8 au1_filt_ref_samples[1028];
+ /** array for the modes to be evaluated. Used as local variable in mode_eval_filtering */
+ UWORD8 au1_modes_to_eval[MAX_NUM_IP_MODES];
+ /** temp array for the modes to be evaluated. Used as local variable in mode_eval_filtering */
+ UWORD8 au1_modes_to_eval_temp[MAX_NUM_IP_MODES];
+ /** pred samples array. Used as local variable in mode_eval_filtering */
+ MEM_ALIGN32 UWORD8 au1_pred_samples[4096];
+ /** array for storing satd cost. Used as local variable in mode_eval_filtering*/
+ UWORD16 au2_mode_bits_satd_cost[MAX_NUM_IP_MODES];
+ /** array for storing satd values. used as local variable in mode_eval_filtering */
+ UWORD16 au2_mode_bits_satd[MAX_NUM_IP_MODES];
+
+ /** reference data, local for pu_calc_8x8 */
+ UWORD8 au1_ref_8x8pu[4][18];
+ /** mode_bits_cost, local for pu_calc_8x8 */
+ UWORD16 au2_mode_bits_cost_8x8pu[4][MAX_NUM_IP_MODES];
+ /** mode_bits, local for pu_calc_8x8 */
+ UWORD16 au2_mode_bits_8x8_pu[MAX_NUM_IP_MODES];
+
+ /** tranform coeff temp, local to ihevce_pu_calc_4x4_blk */
+ WORD16 *pi2_trans_tmp; //this memory is overlayed with au1_pred_samples[4096]. First half.
+
+ /** tranform coeff out, local to ihevce_pu_calc_4x4_blk */
+ WORD16 *pi2_trans_out; //this memory is overlayed with au1_pred_samples[4096]. Second half.
+
+ UWORD8 u1_use_lambda_derived_from_min_8x8_act_in_ctb;
+
+ UWORD8 u1_bit_depth;
+
+ rc_quant_t *ps_rc_quant_ctxt;
+ /** Flag that specifies whether to use SATD or SAD in L0 IPE */
+ UWORD8 u1_use_satd;
+
+ /** Flag that specifies level of refinement */
+ UWORD8 u1_level_1_refine_on;
+
+ /** Flag indicates that child mode decision is disabled in L0 IPE recur bracketing */
+ UWORD8 u1_disable_child_cu_decide;
+
+ /*Modulation factor*/
+ WORD32 ai4_mod_factor_derived_by_variance[2];
+ float f_strength;
+ WORD32 i4_l0ipe_qp_mod;
+
+ WORD32 i4_frm_qp;
+ WORD32 i4_temporal_layer;
+ WORD32 i4_pass;
+
+ double f_i_pic_lamda_modifier;
+ WORD32 i4_use_const_lamda_modifier;
+ WORD32 i4_is_ref_pic;
+ LWORD64 i8_curr_frame_avg_mean_act;
+ WORD32 i4_enable_noise_detection;
+
+ ihevce_ipe_optimised_function_list_t s_ipe_optimised_function_list;
+
+ ihevce_cmn_opt_func_t s_cmn_opt_func;
+
+} ihevce_ipe_ctxt_t;
+
+/**
+******************************************************************************
+ * @brief IPE module overall context
+******************************************************************************
+ */
+typedef struct
+{
+ /*array of ipe ctxt */
+ ihevce_ipe_ctxt_t *aps_ipe_thrd_ctxt[MAX_NUM_FRM_PROC_THRDS_PRE_ENC];
+
+ /** Number of processing threads created run time */
+ WORD32 i4_num_proc_thrds;
+
+} ihevce_ipe_master_ctxt_t;
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+void ihevce_ipe_analyse_update_cost(
+ ihevce_ipe_cu_tree_t *ps_cu_node, UWORD8 u1_mode, DOUBLE lf_cost);
+#endif /* _IHEVCE_IPE_STRUCTS_H_ */
diff --git a/encoder/ihevce_lap_enc_structs.h b/encoder/ihevce_lap_enc_structs.h
new file mode 100644
index 0000000..054f369
--- /dev/null
+++ b/encoder/ihevce_lap_enc_structs.h
@@ -0,0 +1,497 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_lap_enc_structs.h
+*
+* \brief
+* This file contains structure definations shared between Encoder and LAP
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_LAP_ENC_STRUCTS_H_
+#define _IHEVCE_LAP_ENC_STRUCTS_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define MAX_NUM_BUFS_LAP_ENC 15
+#define MAX_REF_PICS 16
+#define MAX_PICS_FOR_SGI 16 /*max pics to be hold for Sub-Gop Interleave*/
+#define MAX_DUPLICATE_ENTRIES_IN_REF_LIST 2
+#define MAX_LAP_WINDOW_SIZE 60
+#define MAX_SUB_GOP_SIZE 16
+#define MAX_SCENE_NUM 30
+#define INIT_HEVCE_QP_RC (-300)
+#define MAX_TEMPORAL_LAYERS 3
+#define NUM_LAP2_LOOK_AHEAD 120
+
+#define INFINITE_GOP_CDR_TIME_S 3
+#define FRAME_PARALLEL_LVL 0
+#define NUM_SG_INTERLEAVED (1 + FRAME_PARALLEL_LVL)
+
+//#define MAX_NUM_ENC_LOOP_PARALLEL ((1 << FRAME_PARALLEL_LVL) + 2)
+//#define MAX_NUM_ME_PARALLEL ((1 << FRAME_PARALLEL_LVL) + 2)
+#define MAX_NUM_ENC_LOOP_PARALLEL 1
+#define MAX_NUM_ME_PARALLEL 1
+#define DIST_MODE_3_NON_REF_B 0 // disabled for normal cases
+
+#define DENOM_DEFAULT 7
+#define WGHT_DEFAULT (1 << DENOM_DEFAULT)
+
+#define MAX_NON_REF_B_PICS_IN_QUEUE_SGI MAX_PICS_FOR_SGI //ELP_RC
+
+/*minimum stagger in non sequential operation*/
+#define MIN_L1_L0_STAGGER_NON_SEQ 1
+
+/* Enable or disable Psedo presets*/
+#undef PSEUDO_PRESETS
+
+/**
+*******************************************************************************
+@brief Ivalid POC value since negative POCs are also valid as per syntax
+*******************************************************************************
+ */
+#define INVALID_POC -16384
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+/* Scenetype enums */
+typedef enum SCENE_TYPE_E
+{
+ SCENE_TYPE_NORMAL = 0,
+ SCENE_TYPE_SCENE_CUT,
+ SCENE_TYPE_FLASH,
+ SCENE_TYPE_FADE_IN,
+ SCENE_TYPE_FADE_OUT,
+ SCENE_TYPE_DISSOLVE,
+ SCENE_TYPE_PAUSE_TO_RESUME,
+ MAX_NUM_SCENE_TYPES
+} SCENE_TYPE_E;
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief Logo structure
+******************************************************************************
+ */
+
+typedef struct
+{
+ /** i4_is_logo_on : Specifies if logo is on or off */
+ WORD32 i4_is_logo_on;
+
+ /** logo_width : Width of the logo in pixels */
+ WORD32 logo_width;
+
+ /** logo_height : Width of the logo in pixels */
+ WORD32 logo_height;
+
+ /** logo_x_offset : horizontal offset for logo from the right end of pic */
+ WORD32 logo_x_offset;
+
+ /** logo_y_offset : vertical offset for logo from the bottom end of pic */
+ WORD32 logo_y_offset;
+
+} ihevce_logo_attrs_t;
+
+typedef struct
+{
+ /**
+ * Input YUV buffers pointers and related parameters
+ */
+ ihevce_lap_params_t s_lap_params;
+
+ /** Width of input luma */
+ WORD32 i4_width;
+
+ /** Height of input luma */
+ WORD32 i4_height;
+
+ /** Max closed gop period : Max spacing between IDR frames */
+ WORD32 i4_max_closed_gop_period;
+
+ /** Min closed gop period : Min spacing between IDR frames */
+ WORD32 i4_min_closed_gop_period;
+
+ /** Max CRA open gop period: Max spacing between CRA frames */
+ WORD32 i4_max_cra_open_gop_period;
+
+ /** Max i open gop period: Max spacing between I frames */
+ WORD32 i4_max_i_open_gop_period;
+
+ /** limits Max gopsize = 2 ^ i4_max_temporal_layers - 1 */
+ WORD32 i4_max_temporal_layers;
+
+ /** Minimum temporal ID from which B-pictures are coded; Tid=1 (default) 0 (no B) */
+ WORD32 i4_min_temporal_id_for_b;
+
+ /** Maximum number of reference frames */
+ WORD32 i4_max_reference_frames;
+
+ /** Interlace field */
+ WORD32 i4_src_interlace_field;
+
+ /* Frame rate*/
+ WORD32 i4_frame_rate;
+
+ /** Enable Logo flag */
+ WORD32 i4_enable_logo;
+
+ /** Bit Depth */
+ WORD32 i4_internal_bit_depth;
+
+ WORD32 i4_input_bit_depth;
+
+ /* 0 - 400; 1 - 420; 2 - 422; 3 - 444 */
+ UWORD8 u1_chroma_array_type;
+
+ WORD32 ai4_quality_preset[IHEVCE_MAX_NUM_RESOLUTIONS];
+
+ WORD32 i4_rc_pass_num;
+
+ /* If enable, enables blu ray compatibility of op*/
+ WORD32 i4_blu_ray_spec;
+
+ IV_ARCH_T e_arch_type;
+
+ UWORD8 u1_is_popcnt_available;
+
+ WORD32 i4_mres_single_out;
+
+ WORD32 i4_luma_size_copy_src_logo;
+
+} ihevce_lap_static_params_t;
+
+/**
+ * @biref luma and chroma weight and offset container structure
+ */
+typedef struct
+{
+ /**
+ * flag to control the weighted pred for luma component of
+ * this reference frame
+ * Range [0 : 1]
+ */
+ UWORD8 u1_luma_weight_enable_flag;
+
+ /**
+ * flag to control the weighted pred for chroma component of
+ * this reference frame
+ * Range [0 : 1]
+ */
+ UWORD8 u1_chroma_weight_enable_flag;
+
+ /**
+ * luma weight factor for a reference frame,
+ * Range [0 : 128]
+ * Default = 1 << as_wght_offst
+ */
+ WORD16 i2_luma_weight;
+
+ /**
+ * luma offset to be added after weighing for reference frame
+ * Range [-128 : 127]
+ * Default = 0
+ */
+ WORD16 i2_luma_offset;
+
+ /**
+ * chroma weight factor for a reference frame, Default = 1
+ */
+ WORD16 i2_cb_weight;
+
+ /**
+ * chroma offset to be added after weighing for reference frame, Default = 0
+ */
+ WORD16 i2_cb_offset;
+
+ /**
+ * chroma weight factor for a reference frame, Default = 1
+ */
+ WORD16 i2_cr_weight;
+
+ /**
+ * chroma offset to be added after weighing for reference frame, Default = 0
+ */
+ WORD16 i2_cr_offset;
+
+} ihevce_wght_offst_t;
+
+/**
+ * @biref defines the attributes of a reference picture
+ */
+typedef struct
+{
+ /**
+ * weighted prediction attribute for each duplicate entry of a ref pic
+ * Note : Duplicate entries help in using same reference with different
+ * weights and offsets. Example being partial flashes in scence
+ */
+ ihevce_wght_offst_t as_wght_off[MAX_DUPLICATE_ENTRIES_IN_REF_LIST];
+
+ /**
+ * delta POC of reference frame w.r.t current Picture POC,
+ */
+ WORD32 i4_ref_pic_delta_poc;
+
+ /**
+ * flag indicating if this reference frame is to be used as
+ * reference by current picture
+ * shall be 0 or 1
+ */
+ WORD32 i4_used_by_cur_pic_flag;
+
+ /**
+ * Indicates the number of duplicate entries of a reference picture
+ * in the reference picture list. A reference picture may see multiple
+ * entries in the reference picture list, since that allows the LAP to
+ * assign multiple weighting related parameters to a single reference picture.
+ * Range [1, MAX_DUPLICATE_ENTRIES_IN_REF_LIST]
+ *
+ * Used only when weighted prediction is enabled
+ *
+ */
+ WORD32 i4_num_duplicate_entries_in_ref_list;
+
+} ihevce_ref_pic_attrs_t;
+
+/* @brief IV_YUV_BUF_T: This structure defines attributes
+ * for the input yuv used in enc and lap buffer
+ */
+typedef struct
+{
+ /** i4_size of the structure */
+ WORD32 i4_size;
+
+ /** Pointer to Luma (Y) Buffer */
+ void *pv_y_buf;
+
+ /** Pointer to Chroma (Cb) Buffer */
+ void *pv_u_buf;
+
+ /** Pointer to Chroma (Cr) Buffer */
+ void *pv_v_buf;
+
+ /** Width of the Luma (Y) Buffer in pixels */
+ WORD32 i4_y_wd;
+
+ /** Height of the Luma (Y) Buffer in pixels */
+ WORD32 i4_y_ht;
+
+ /** Stride/Pitch of the Luma (Y) Buffer */
+ WORD32 i4_y_strd;
+
+ /** Luma Process start offset : x dir. */
+ WORD32 i4_start_offset_x;
+
+ /** Luma Process start offset : y dir. */
+ WORD32 i4_start_offset_y;
+
+ /** Width of the Chroma (Cb / Cr) Buffer in pixels */
+ WORD32 i4_uv_wd;
+
+ /** Height of the Chroma (Cb / Cr) Buffer in pixels */
+ WORD32 i4_uv_ht;
+
+ /** Stride/Pitch of the Chroma (Cb / Cr) Buffer */
+ WORD32 i4_uv_strd;
+
+} iv_enc_yuv_buf_t;
+
+typedef struct
+{
+ /** i4_size of the structure */
+ WORD32 i4_size;
+
+ /** Pointer to Luma (Y) Buffer */
+ void *pv_y_buf;
+
+ /** Pointer to Chroma (Cb) Buffer */
+ void *pv_u_buf;
+
+ /** Pointer to Chroma (Cr) Buffer */
+ void *pv_v_buf;
+
+} iv_enc_yuv_buf_src_t;
+
+typedef struct
+{
+ /*********** common params for both lap_out and rc_lap_out ****************/
+
+ /* hevc pic types : IDR/CDR/I/P/B etc */
+ WORD32 i4_pic_type;
+ /* picture order count */
+ WORD32 i4_poc;
+ /* temporal layer of the current picture */
+ WORD32 i4_temporal_lyr_id;
+ /**
+ * indicates if the current frame is reference pic
+ * 0 : not ref pic
+ * 1 : ref pic at lower layers (w.r.t to highest layer id)
+ * 2 : ref pic at highest temporal layer id layer
+ */
+ WORD32 i4_is_ref_pic;
+ /**
+ * Scene type such as Scene Cut, fade in/ out, dissolve, flash etc
+ * enum used is IHEVCE_SCENE_TYPE
+ */
+ WORD32 i4_scene_type;
+ /**
+ * Scene number helps to identify the reference frames
+ * for the current frame of same scene and
+ * also it can be used to reset the RC model
+ * for each layer whenever scene cut happens
+ */
+ UWORD32 u4_scene_num;
+ /*display order num*/
+ WORD32 i4_display_num;
+
+ WORD32 i4_quality_preset;
+
+ /*********** parameters specific to lap_out structure **************/
+ /* cra pic type flag */
+ WORD32 i4_is_cra_pic;
+ /** IDR GOP number */
+ WORD32 i4_idr_gop_num;
+ /** weighted prediction enable flag */
+ WORD8 i1_weighted_pred_flag;
+ /** weighted bipred enable flag */
+ WORD8 i1_weighted_bipred_flag;
+ /* number of references for current pic */
+ WORD32 i4_num_ref_pics;
+ /**
+ * common denominator used for luma weights across all ref pics
+ * Default = 0, Shall be in the range [0:7]
+ */
+ WORD32 i4_log2_luma_wght_denom;
+ /**
+ * common denominator used for chroma weights across all ref pics
+ * Default = 0, Shall be in the range [0:7]
+ */
+ WORD32 i4_log2_chroma_wght_denom;
+ /* ref pics to str current Picture POC */
+ ihevce_ref_pic_attrs_t as_ref_pics[MAX_REF_PICS];
+ /* Structure for the ITTIAM logo */
+ ihevce_logo_attrs_t s_logo_ctxt;
+ /* first field flag */
+ WORD32 i4_first_field;
+ /* associated IRAP poc */
+ WORD32 i4_assoc_IRAP_poc;
+ WORD32 i4_is_prev_pic_in_Tid0_same_scene;
+
+ WORD32 i4_is_I_in_any_field;
+ WORD32 i4_used;
+
+ WORD32 i4_end_flag;
+ WORD32 i4_force_idr_flag;
+ WORD32 i4_out_flush_flag;
+ WORD32 i4_first_frm_new_res;
+
+ /***** Spatial QP offset related *****/
+ float f_strength;
+
+ long double ld_curr_frame_8x8_log_avg[2];
+ long double ld_curr_frame_16x16_log_avg[3];
+ long double ld_curr_frame_32x32_log_avg[3];
+
+ LWORD64 i8_curr_frame_8x8_avg_act[2];
+ LWORD64 i8_curr_frame_16x16_avg_act[3];
+ LWORD64 i8_curr_frame_32x32_avg_act[3];
+
+ WORD32 i4_i_pic_lamda_offset;
+
+ double f_i_pic_lamda_modifier;
+
+ WORD32 i4_curr_frm_qp;
+
+ iv_enc_yuv_buf_t s_input_buf;
+
+ /** Frame - level L0 satd accum*/
+ LWORD64 i8_frame_l0_acc_satd;
+
+ /* Frame - level L1 Activity factor */
+ LWORD64 i8_frame_level_activity_fact;
+ /*bits esimated for frame calulated for sub pic rc bit control */
+ WORD32 ai4_frame_bits_estimated[IHEVCE_MAX_NUM_BITRATES];
+ float f_pred_factor;
+
+} ihevce_lap_output_params_t;
+
+/**
+******************************************************************************
+ * @brief Encoder and LAP I/O structutre
+ * s_input_buf : input buffer will be populated by applciation
+ * when LAP gets this buffer only input will be populated
+ * During the time of seeting the encode order for current buffer
+ * LAP should populate the s_lap_out structure.
+******************************************************************************
+ */
+typedef struct
+{
+ /**
+ * Input YUV buffers pointers and related parameters
+ */
+ iv_input_data_ctrl_buffs_t s_input_buf;
+
+ /**
+ * Following parameters are output of LAP
+ * for the current buffer to be encoded
+ */
+ ihevce_lap_output_params_t s_lap_out;
+ /**
+ * Following parameters are output of LAP
+ * for the current buffer to be encoded,
+ * which are RC specific parameters
+ */
+ rc_lap_out_params_t s_rc_lap_out;
+
+ /**
+ * Following parameters are context of LAP QUEUE
+ */
+ frame_info_t s_frame_info;
+} ihevce_lap_enc_buf_t;
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+#endif /* _IHEVCE_LAP_ENC_STRUCTS_H_ */
diff --git a/encoder/ihevce_lap_interface.c b/encoder/ihevce_lap_interface.c
new file mode 100644
index 0000000..5125e03
--- /dev/null
+++ b/encoder/ihevce_lap_interface.c
@@ -0,0 +1,2398 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ihevce_lap_interface.c
+*
+* @brief
+* This file contains function definitions related to look-ahead processing
+*
+* @author
+* ittiam
+*
+* @par List of Functions:
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System Include Files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+
+/* User Include Files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_macros.h"
+#include "ihevc_debug.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_api.h"
+#include "ihevce_hle_interface.h"
+#include "ihevce_hle_q_func.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_lap_interface.h"
+#include "ihevce_lap_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_rc_enc_structs.h"
+#include "ihevce_rc_interface.h"
+#include "ihevce_buffer_que_interface.h"
+
+/*****************************************************************************/
+/* Globals */
+/*****************************************************************************/
+WORD32 gau1_order_insert_pic_type[MAX_TEMPORAL_LAYERS][8] = {
+ { P_PIC, B_PIC, P_PIC, B_PIC, P_PIC, B_PIC, P_PIC, B_PIC },
+ { P_PIC, B_PIC, B1_PIC, B1_PIC, P_PIC, B_PIC, B1_PIC, B1_PIC },
+ { P_PIC, B_PIC, B1_PIC, B2_PIC, B2_PIC, B1_PIC, B2_PIC, B2_PIC },
+};
+
+UWORD8 gau1_use_by_cur_pic_flag[MAX_REF_PICS] = { 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 };
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*!
+************************************************************************
+* \brief
+* return number of records used by LAP
+*
+************************************************************************
+*/
+WORD32 ihevce_lap_get_num_mem_recs(void)
+{
+ return (NUM_LAP_MEM_RECS);
+}
+
+/*!
+************************************************************************
+* @brief
+* return each record attributes of LAP
+************************************************************************
+*/
+WORD32 ihevce_lap_get_mem_recs(iv_mem_rec_t *ps_mem_tab, WORD32 i4_mem_space)
+{
+ /* number of NODE memory */
+ WORD32 max_nodes = MAX_SUB_GOP_SIZE - 1;
+
+ ps_mem_tab[LAP_CTXT].i4_mem_size = sizeof(lap_struct_t);
+ ps_mem_tab[LAP_CTXT].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+ ps_mem_tab[LAP_CTXT].i4_mem_alignment = 8;
+
+ /* Node memory for 2 sub-gops*/
+ ps_mem_tab[LAP_NODE_MEM].i4_mem_size = (max_nodes * sizeof(ihevce_encode_node_t));
+
+ ps_mem_tab[LAP_NODE_MEM].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+
+ ps_mem_tab[LAP_NODE_MEM].i4_mem_alignment = 8;
+
+ return (NUM_LAP_MEM_RECS);
+}
+
+/*!
+************************************************************************
+* @brief
+* Init LAP structure
+************************************************************************
+*/
+void *ihevce_lap_init(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_lap_static_params_t *ps_lap_params,
+ ihevce_static_cfg_params_t *ps_static_cfg_prms)
+{
+ WORD32 i4_src_interlace_field;
+ WORD32 i4_max_temporal_layers;
+ ihevce_encode_node_t *ps_encode_node_struct;
+ lap_struct_t *ps_lap_struct = (lap_struct_t *)ps_mem_tab[LAP_CTXT].pv_base;
+ ihevce_lap_static_params_t *ps_lap_static_params = &ps_lap_struct->s_lap_static_params;
+ ps_lap_struct->aps_encode_node[0] = (ihevce_encode_node_t *)ps_mem_tab[LAP_NODE_MEM].pv_base;
+
+ memcpy(
+ &ps_lap_struct->s_static_cfg_params,
+ ps_static_cfg_prms,
+ sizeof(ihevce_static_cfg_params_t));
+ memmove(ps_lap_static_params, ps_lap_params, sizeof(ihevce_lap_static_params_t));
+ ps_lap_static_params->e_arch_type = ps_static_cfg_prms->e_arch_type;
+
+ /* Set the array to zero */
+ memset(&ps_lap_struct->ai4_capture_order_poc[0], 0, MAX_NUM_ENC_NODES * sizeof(WORD32));
+ memset(&ps_lap_struct->ai4_encode_order_poc[0], 0, MAX_NUM_ENC_NODES * sizeof(WORD32));
+ memset(&ps_lap_struct->ref_poc_array[0], 0xFF, sizeof(ps_lap_struct->ref_poc_array));
+ memset(&ps_lap_struct->ai4_pic_type_to_be_removed, 0, NUM_LAP2_LOOK_AHEAD * sizeof(WORD32));
+
+ ps_lap_struct->i4_curr_poc = 0;
+ ps_lap_struct->i4_cra_poc = 0;
+
+ i4_max_temporal_layers = ps_lap_static_params->i4_max_temporal_layers;
+ i4_src_interlace_field = ps_lap_static_params->i4_src_interlace_field;
+ ps_lap_struct->i4_max_idr_period =
+ ps_static_cfg_prms->s_coding_tools_prms.i4_max_closed_gop_period;
+ ps_lap_struct->i4_min_idr_period =
+ ps_static_cfg_prms->s_coding_tools_prms.i4_min_closed_gop_period;
+ ps_lap_struct->i4_max_cra_period =
+ ps_static_cfg_prms->s_coding_tools_prms.i4_max_cra_open_gop_period;
+ ps_lap_struct->i4_max_i_period =
+ ps_static_cfg_prms->s_coding_tools_prms.i4_max_i_open_gop_period;
+ ps_lap_struct->i4_idr_counter = 0;
+ ps_lap_struct->i4_cra_counter = 0;
+ ps_lap_struct->i4_i_counter = 0;
+ ps_lap_struct->i4_idr_gop_num = -1;
+ ps_lap_struct->i4_curr_ref_pics = 0;
+ ps_lap_struct->i4_display_num = 0;
+ ps_lap_struct->i4_num_frames_after_force_idr = 0;
+ ps_lap_struct->i4_num_frm_type_decided = 0;
+ ps_lap_struct->i4_next_start_ctr = 0;
+ ps_lap_struct->ai1_pic_type[0] = PIC_TYPE_IDR;
+
+ ps_lap_struct->i4_enable_logo = ps_lap_static_params->i4_enable_logo;
+ ps_lap_struct->i4_cra_i_pic_flag = 0;
+ ps_lap_struct->i4_force_end_flag = 0;
+ ps_lap_struct->i4_sub_gop_size = (1 << i4_max_temporal_layers);
+ ps_lap_struct->i4_sub_gop_size_idr =
+ ps_lap_struct->i4_sub_gop_size + (i4_max_temporal_layers > 0);
+
+ ps_lap_struct->i4_is_all_i_pic_in_seq = 0;
+
+ if(ps_lap_struct->i4_max_idr_period == 1 || ps_lap_struct->i4_max_cra_period == 1 ||
+ ps_lap_struct->i4_max_i_period == 1)
+ {
+ ps_lap_struct->i4_is_all_i_pic_in_seq = 1;
+ }
+
+ if(1 == i4_src_interlace_field && (!ps_lap_struct->i4_is_all_i_pic_in_seq))
+ {
+ ps_lap_struct->i4_sub_gop_size <<= 1;
+ ps_lap_struct->i4_sub_gop_size_idr <<= 1;
+ }
+
+ ps_lap_struct->i4_fixed_open_gop_period = 1;
+ ps_lap_struct->i4_fixed_i_period = 1;
+
+ if(ps_static_cfg_prms->s_coding_tools_prms.i4_max_closed_gop_period <=
+ ps_lap_struct->i4_sub_gop_size)
+ {
+ ps_lap_struct->i4_min_idr_period =
+ ps_static_cfg_prms->s_coding_tools_prms.i4_max_closed_gop_period;
+ }
+ if(ps_lap_struct->i4_max_idr_period)
+ {
+ if(ps_lap_struct->i4_max_cra_period)
+ {
+ ps_lap_struct->i4_gop_period = ps_lap_struct->i4_max_cra_period;
+ }
+ else if(ps_lap_struct->i4_max_i_period)
+ {
+ ps_lap_struct->i4_gop_period = ps_lap_struct->i4_max_i_period;
+ }
+ else
+ {
+ ps_lap_struct->i4_gop_period = ps_lap_struct->i4_max_idr_period;
+ }
+ }
+ else
+ {
+ if(ps_lap_struct->i4_max_i_period)
+ {
+ ps_lap_struct->i4_gop_period = ps_lap_struct->i4_max_i_period;
+ }
+ else if(ps_lap_struct->i4_max_cra_period)
+ {
+ ps_lap_struct->i4_gop_period = ps_lap_struct->i4_max_cra_period;
+ }
+ }
+
+ if(!ps_lap_struct->i4_max_i_period)
+ {
+ ps_lap_struct->i4_max_i_period =
+ 2 * MAX(ps_lap_struct->i4_max_idr_period, ps_lap_struct->i4_max_cra_period);
+ }
+
+ ps_lap_struct->i4_no_back_to_back_i_avoidance = 0;
+
+ /*Infinite GOP case*/
+ if(!ps_lap_struct->i4_gop_period)
+ {
+ /*max signed 32 bit value which will be ~ 414 days considering 60frames/fields per second*/
+ ps_lap_struct->i4_max_i_period = 0x7fffffff;
+ ps_lap_struct->i4_gop_period =
+ (INFINITE_GOP_CDR_TIME_S * (ps_static_cfg_prms->s_src_prms.i4_frm_rate_num /
+ ps_static_cfg_prms->s_src_prms.i4_frm_rate_denom));
+ }
+
+ if(ps_lap_struct->i4_gop_period < (2 * ps_lap_struct->i4_sub_gop_size))
+ {
+ ps_lap_struct->i4_no_back_to_back_i_avoidance = 1;
+ }
+
+ ps_lap_struct->i4_rc_lap_period =
+ ps_static_cfg_prms->s_lap_prms.i4_rc_look_ahead_pics + MIN_L1_L0_STAGGER_NON_SEQ;
+ ps_lap_struct->pv_prev_inp_buf = NULL;
+ ps_lap_struct->i4_buf_deq_idx = 0;
+ ps_lap_struct->i4_deq_idx = 0;
+ ps_lap_struct->i4_enq_idx = 0;
+ ps_lap_struct->i4_lap2_counter = 0;
+ ps_lap_struct->i4_dyn_sub_gop_size = ps_lap_struct->i4_sub_gop_size;
+ ps_lap_struct->i4_buf_enq_idx = 0;
+ ps_lap_struct->i4_lap_out_idx = 0;
+ ps_lap_struct->i4_capture_idx = 0;
+ ps_lap_struct->i4_idr_flag = 1;
+ ps_lap_struct->i4_num_bufs_encode_order = 0;
+ ps_lap_struct->end_flag = 0;
+ ps_lap_struct->i4_immediate_idr_case = 0;
+ ps_lap_struct->i4_max_buf_in_enc_order = 0;
+ ps_lap_struct->i4_end_flag_pic_idx = 0;
+ memset(
+ &ps_lap_struct->api4_encode_order_array[0],
+ 0,
+ sizeof(ihevce_lap_enc_buf_t *) * MAX_NUM_ENC_NODES);
+
+ {
+ WORD32 node_offset, curr_layer;
+ WORD32 i;
+ /*intialization of aps_lap_inp_buf*/
+ for(i = 0; i < MAX_QUEUE_LENGTH; i++)
+ {
+ ps_lap_struct->aps_lap_inp_buf[i] = NULL;
+ }
+
+ /* init capture order and encode order pointer */
+ ps_lap_struct->pi4_capture_poc_ptr = &ps_lap_struct->ai4_capture_order_poc[0];
+ ps_lap_struct->pi4_encode_poc_ptr = &ps_lap_struct->ai4_encode_order_poc[0];
+
+ /* init all the buffer status to default values */
+ ps_encode_node_struct = ps_lap_struct->aps_encode_node[0];
+
+ ps_encode_node_struct->pv_left_node = NULL;
+ ps_encode_node_struct->pv_right_node = NULL;
+
+ /* Initialise the tree */
+ node_offset = 1;
+ curr_layer = 0;
+ ihevce_populate_tree_nodes(
+ ps_encode_node_struct,
+ ps_encode_node_struct,
+ &node_offset,
+ curr_layer,
+ ps_lap_static_params->i4_max_temporal_layers);
+ }
+
+ ps_mem_tab += NUM_LAP_MEM_RECS;
+
+ return ((void *)ps_lap_struct);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_populate_tree_nodes \endif
+*
+* \brief
+* LAP populate nodes function
+*
+* \param[in] encode_parent_node_t node pointer to base
+* encode_node_t node pointer to current buffer
+* loop_count layer count
+* hier_layer total layers
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_populate_tree_nodes(
+ ihevce_encode_node_t *encode_parent_node_t,
+ ihevce_encode_node_t *encode_node_t,
+ WORD32 *loop_count,
+ WORD32 layer,
+ WORD32 hier_layer)
+{
+ /* If only I/P pictures, return NULL from the child nodes*/
+ if(hier_layer == 0)
+ {
+ encode_node_t->pv_left_node = NULL;
+ encode_node_t->pv_right_node = NULL;
+ return;
+ }
+ if(layer == hier_layer)
+ return;
+
+ layer = layer + 1;
+
+ /* If the layers are not exhausted */
+ if(layer < hier_layer)
+ {
+ encode_node_t->pv_left_node = encode_parent_node_t + (*loop_count);
+ encode_node_t->pv_right_node = encode_parent_node_t + (*loop_count + 1);
+ (*loop_count) = (*loop_count) + 2;
+ }
+ else
+ {
+ encode_node_t->pv_left_node = NULL;
+ encode_node_t->pv_right_node = NULL;
+ }
+
+ /* Populate Left tree nodes */
+ ihevce_populate_tree_nodes(
+ encode_parent_node_t,
+ (ihevce_encode_node_t *)encode_node_t->pv_left_node,
+ loop_count,
+ layer,
+ hier_layer);
+
+ /* Populate right tree nodes */
+ ihevce_populate_tree_nodes(
+ encode_parent_node_t,
+ (ihevce_encode_node_t *)encode_node_t->pv_right_node,
+ loop_count,
+ layer,
+ hier_layer);
+}
+
+/*!
+************************************************************************
+* \brief
+* pad input when its dimensions are not aligned to LCU size
+************************************************************************
+*/
+void ihevce_lap_pad_input_bufs(
+ ihevce_lap_enc_buf_t *ps_curr_inp, WORD32 align_pic_wd, WORD32 align_pic_ht)
+{
+ /* local variables */
+ WORD32 ctr_horz, ctr_vert;
+
+ /* ------- Horizontal Right Padding ------ */
+ if(align_pic_wd != ps_curr_inp->s_lap_out.s_input_buf.i4_y_wd)
+ {
+ UWORD8 *pu1_inp;
+ UWORD16 *pu2_inp;
+ WORD32 pad_wd;
+ WORD32 pad_ht;
+
+ /* ------------- LUMA ----------------------------- */
+ /* derive the pointers and dimensions to be padded */
+ pad_ht = ps_curr_inp->s_lap_out.s_input_buf.i4_y_ht;
+ pad_wd = align_pic_wd - ps_curr_inp->s_lap_out.s_input_buf.i4_y_wd;
+ pu1_inp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf;
+ pu1_inp += ps_curr_inp->s_lap_out.s_input_buf.i4_y_wd;
+
+ /* loops for padding the right region for entire pic */
+ for(ctr_vert = 0; ctr_vert < pad_ht; ctr_vert++)
+ {
+ for(ctr_horz = 0; ctr_horz < pad_wd; ctr_horz++)
+ {
+ /* last pixel is replicated */
+ pu1_inp[ctr_horz] = pu1_inp[-1];
+ }
+
+ /* row level increments */
+ pu1_inp += ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd;
+ }
+
+ /* ------------- CHROMA ---------------------------- */
+ /* derive the pointers and dimensions to be padded */
+ pad_ht = ps_curr_inp->s_lap_out.s_input_buf.i4_uv_ht;
+ pad_wd = align_pic_wd - ps_curr_inp->s_lap_out.s_input_buf.i4_uv_wd;
+ pad_wd >>= 1;
+ pu1_inp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf;
+ pu2_inp = (UWORD16 *)(pu1_inp + ps_curr_inp->s_lap_out.s_input_buf.i4_uv_wd);
+
+ /* loops for padding the right region for entire pic */
+ for(ctr_vert = 0; ctr_vert < pad_ht; ctr_vert++)
+ {
+ for(ctr_horz = 0; ctr_horz < pad_wd; ctr_horz++)
+ {
+ /* last pixel is replicated, cb and cr pixel interleaved */
+ pu2_inp[ctr_horz] = pu2_inp[-1];
+ }
+
+ /* row level increments */
+ pu2_inp += (ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd >> 1);
+ }
+ }
+
+ /* ------- Vertical Bottom Padding ------ */
+ if(align_pic_ht != ps_curr_inp->s_lap_out.s_input_buf.i4_y_ht)
+ {
+ UWORD8 *pu1_inp, *pu1_src;
+ WORD32 pad_ht;
+
+ /* ------------- LUMA ----------------------------- */
+ /* derive the pointers and dimensions to be padded */
+ pad_ht = align_pic_ht - ps_curr_inp->s_lap_out.s_input_buf.i4_y_ht;
+ pu1_inp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_y_buf;
+ pu1_inp += ps_curr_inp->s_lap_out.s_input_buf.i4_y_ht *
+ ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd;
+
+ /* get the pointer of last row */
+ pu1_src = pu1_inp - ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd;
+
+ /* loops for padding the bottom region for entire row */
+ for(ctr_vert = 0; ctr_vert < pad_ht; ctr_vert++)
+ {
+ /* copy the eniter orw including horz padd region */
+ memcpy(pu1_inp, pu1_src, align_pic_wd);
+
+ /* row level increments */
+ pu1_inp += ps_curr_inp->s_lap_out.s_input_buf.i4_y_strd;
+ }
+
+ /* ------------- CHROMA ----------------------------- */
+ /* derive the pointers and dimensions to be padded */
+ pad_ht = (align_pic_ht >> 1) - ps_curr_inp->s_lap_out.s_input_buf.i4_uv_ht;
+ pu1_inp = (UWORD8 *)ps_curr_inp->s_lap_out.s_input_buf.pv_u_buf;
+ pu1_inp += ps_curr_inp->s_lap_out.s_input_buf.i4_uv_ht *
+ ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd;
+
+ /* get the pointer of last row */
+ pu1_src = pu1_inp - ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd;
+
+ /* loops for padding the bottom region for entire row */
+ for(ctr_vert = 0; ctr_vert < pad_ht; ctr_vert++)
+ {
+ /* copy the eniter orw including horz padd region */
+ memcpy(pu1_inp, pu1_src, align_pic_wd);
+
+ /* row level increments */
+ pu1_inp += ps_curr_inp->s_lap_out.s_input_buf.i4_uv_strd;
+ }
+ }
+ return;
+}
+
+/*!
+************************************************************************
+* \brief
+* check for last inp buf
+************************************************************************
+*/
+WORD32 ihevce_check_last_inp_buf(WORD32 *pi4_cmd_buf)
+{
+ WORD32 cmd = (*pi4_cmd_buf) & (IHEVCE_COMMANDS_TAG_MASK);
+
+ if(IHEVCE_SYNCH_API_FLUSH_TAG == cmd)
+ return 1;
+ return 0;
+}
+
+/*!
+************************************************************************
+* \brief
+* lap parse sync commands
+************************************************************************
+*/
+void ihevce_lap_parse_sync_cmd(
+ ihevce_hle_ctxt_t *ps_hle_ctxt,
+ ihevce_static_cfg_params_t *ps_static_cfg_prms,
+ WORD32 *pi4_cmd_buf,
+ ihevce_lap_enc_buf_t *ps_lap_inp_buf,
+ WORD32 *pi4_flush_check,
+ WORD32 *pi4_force_idr_check,
+ WORD32 *pi4_set_res_check,
+ WORD32 *pi4_num_frames_after_force_idr)
+{
+ WORD32 *pi4_end;
+ WORD32 i4_sub_gop_size_mul_2, i4_field_pic, i4_is_first_field;
+ WORD32 *pi4_tag_parse, i4_end_flag = 0, *pi4_next_tag, i4_length, i4_buf_id, i4_next_tag;
+ UWORD32 u4_num_sei = 0;
+ i4_length = ps_lap_inp_buf->s_input_buf.i4_cmd_buf_size;
+ i4_buf_id = ps_lap_inp_buf->s_input_buf.i4_buf_id;
+ pi4_end = pi4_cmd_buf + (i4_length >> 2) - 1;
+ i4_sub_gop_size_mul_2 = (1 << ps_static_cfg_prms->s_coding_tools_prms.i4_max_temporal_layers)
+ << 1;
+ i4_field_pic = ps_static_cfg_prms->s_src_prms.i4_field_pic;
+ pi4_tag_parse = pi4_cmd_buf;
+ i4_is_first_field = 1;
+ if(i4_field_pic)
+ {
+ i4_is_first_field =
+ (ps_lap_inp_buf->s_input_buf.i4_topfield_first ^
+ ps_lap_inp_buf->s_input_buf.i4_bottom_field);
+ }
+
+ while(pi4_tag_parse != (pi4_end + 1))
+ {
+ switch((*pi4_tag_parse) & (IHEVCE_COMMANDS_TAG_MASK))
+ {
+ case IHEVCE_SYNCH_API_FLUSH_TAG:
+ (*pi4_flush_check) = 1;
+ if((*(pi4_tag_parse + 1)))
+ ps_hle_ctxt->ihevce_cmds_error_report(
+ ps_hle_ctxt->pv_cmd_err_cb_handle,
+ IHEVCE_SYNCH_ERR_LENGTH_NOT_ZERO,
+ 1,
+ i4_buf_id);
+ pi4_tag_parse += 2;
+ u4_num_sei++;
+ break;
+ case IHEVCE_SYNCH_API_FORCE_IDR_TAG:
+ if(0 == i4_field_pic)
+ {
+ (*pi4_force_idr_check) = 1;
+ if((*(pi4_tag_parse + 1)))
+ ps_hle_ctxt->ihevce_cmds_error_report(
+ ps_hle_ctxt->pv_cmd_err_cb_handle,
+ IHEVCE_SYNCH_ERR_LENGTH_NOT_ZERO,
+ 1,
+ i4_buf_id);
+ if(*pi4_num_frames_after_force_idr < i4_sub_gop_size_mul_2)
+ {
+ ps_hle_ctxt->ihevce_cmds_error_report(
+ ps_hle_ctxt->pv_cmd_err_cb_handle,
+ IHEVCE_SYNCH_ERR_FREQ_FORCE_IDR_RECEIVED,
+ 1,
+ i4_buf_id);
+ }
+ *pi4_num_frames_after_force_idr = 0;
+ }
+ else
+ {
+ if(i4_is_first_field)
+ {
+ (*pi4_force_idr_check) = 1;
+ }
+ if((*(pi4_tag_parse + 1)))
+ ps_hle_ctxt->ihevce_cmds_error_report(
+ ps_hle_ctxt->pv_cmd_err_cb_handle,
+ IHEVCE_SYNCH_ERR_LENGTH_NOT_ZERO,
+ 1,
+ i4_buf_id);
+
+ if((*pi4_num_frames_after_force_idr < (i4_sub_gop_size_mul_2 << 1)))
+ {
+ ps_hle_ctxt->ihevce_cmds_error_report(
+ ps_hle_ctxt->pv_cmd_err_cb_handle,
+ IHEVCE_SYNCH_ERR_FREQ_FORCE_IDR_RECEIVED,
+ 1,
+ i4_buf_id);
+ }
+ *pi4_num_frames_after_force_idr = 0;
+ }
+ pi4_tag_parse += 2;
+ u4_num_sei++;
+ break;
+ case IHEVCE_SYNCH_API_SET_RES_TAG:
+ (*pi4_set_res_check) = 0;
+ ps_hle_ctxt->ihevce_cmds_error_report(
+ ps_hle_ctxt->pv_cmd_err_cb_handle,
+ IHEVCE_SYNCH_ERR_SET_RES_NOT_SUPPORTED,
+ 1,
+ i4_buf_id);
+ break;
+ case IHEVCE_SYNCH_API_REG_ALLFRAME_SEI_TAG:
+ pi4_next_tag =
+ pi4_tag_parse + 2 +
+ (((*(pi4_tag_parse + 1) - 1) >> 2) + 1); //Logic to reach the next boundary of 4
+ i4_next_tag = (*pi4_next_tag & IHEVCE_COMMANDS_TAG_MASK);
+ if((i4_next_tag != IHEVCE_SYNCH_API_END_TAG) &&
+ (i4_next_tag != IHEVCE_SYNCH_API_FLUSH_TAG) &&
+ (i4_next_tag != IHEVCE_SYNCH_API_FORCE_IDR_TAG) &&
+ (i4_next_tag != IHEVCE_SYNCH_API_REG_KEYFRAME_SEI_TAG) &&
+ (i4_next_tag != IHEVCE_SYNCH_API_REG_ALLFRAME_SEI_TAG))
+ {
+ if(*(pi4_tag_parse + 1) % 4)
+ ps_hle_ctxt->ihevce_cmds_error_report(
+ ps_hle_ctxt->pv_cmd_err_cb_handle,
+ IHEVCE_SYNCH_ERR_NO_PADDING,
+ 1,
+ i4_buf_id);
+ else
+ ps_hle_ctxt->ihevce_cmds_error_report(
+ ps_hle_ctxt->pv_cmd_err_cb_handle,
+ IHEVCE_SYNCH_ERR_WRONG_LENGTH,
+ 1,
+ i4_buf_id);
+ }
+ pi4_tag_parse = pi4_next_tag;
+ u4_num_sei++;
+ break;
+ case IHEVCE_SYNCH_API_REG_KEYFRAME_SEI_TAG:
+ pi4_next_tag =
+ pi4_tag_parse + 2 +
+ (((*(pi4_tag_parse + 1) - 1) >> 2) + 1); //Logic to reach the next boundary of 4
+ i4_next_tag = (*pi4_next_tag & IHEVCE_COMMANDS_TAG_MASK);
+ if((i4_next_tag != IHEVCE_SYNCH_API_END_TAG) &&
+ (i4_next_tag != IHEVCE_SYNCH_API_FLUSH_TAG) &&
+ (i4_next_tag != IHEVCE_SYNCH_API_FORCE_IDR_TAG) &&
+ (i4_next_tag != IHEVCE_SYNCH_API_REG_KEYFRAME_SEI_TAG) &&
+ (i4_next_tag != IHEVCE_SYNCH_API_REG_ALLFRAME_SEI_TAG))
+ {
+ if(*(pi4_tag_parse + 1) % 4)
+ ps_hle_ctxt->ihevce_cmds_error_report(
+ ps_hle_ctxt->pv_cmd_err_cb_handle,
+ IHEVCE_SYNCH_ERR_NO_PADDING,
+ 1,
+ i4_buf_id);
+ else
+ ps_hle_ctxt->ihevce_cmds_error_report(
+ ps_hle_ctxt->pv_cmd_err_cb_handle,
+ IHEVCE_SYNCH_ERR_WRONG_LENGTH,
+ 1,
+ i4_buf_id);
+ }
+ pi4_tag_parse = pi4_next_tag;
+ u4_num_sei++;
+ break;
+ case IHEVCE_SYNCH_API_END_TAG:
+ i4_end_flag = 1;
+ break;
+ default:
+ ps_hle_ctxt->ihevce_cmds_error_report(
+ ps_hle_ctxt->pv_cmd_err_cb_handle, IHEVCE_SYNCH_ERR_TLV_ERROR, 1, i4_buf_id);
+ }
+ if(i4_end_flag)
+ break;
+ }
+ if(u4_num_sei > MAX_NUMBER_OF_SEI_PAYLOAD) //Checking for max number of SEI messages.
+ ps_hle_ctxt->ihevce_cmds_error_report(
+ ps_hle_ctxt->pv_cmd_err_cb_handle, IHEVCE_SYNCH_ERR_TOO_MANY_SEI_MSG, 1, i4_buf_id);
+
+ if(!i4_end_flag)
+ ps_hle_ctxt->ihevce_cmds_error_report(
+ ps_hle_ctxt->pv_cmd_err_cb_handle, IHEVCE_SYNCH_ERR_NO_END_TAG, 1, i4_buf_id);
+}
+
+/*!
+************************************************************************
+* \brief
+* lap parse Async commands
+************************************************************************
+*/
+void ihevce_lap_parse_async_cmd(
+ ihevce_hle_ctxt_t *ps_hle_ctxt,
+ WORD32 *pi4_cmd_buf,
+ WORD32 i4_length,
+ WORD32 i4_buf_id,
+ WORD32 *pi4_num_set_bitrate_cmds,
+ ihevce_dyn_config_prms_t *ps_dyn_br)
+{
+ WORD32 i4_end_flag = 0;
+ WORD32 *pi4_end = pi4_cmd_buf + (i4_length >> 2) - 1;
+ WORD32 *pi4_tag_parse = pi4_cmd_buf;
+
+ while(pi4_tag_parse != pi4_end)
+ {
+ switch(*pi4_tag_parse)
+ {
+ case IHEVCE_ASYNCH_API_SETBITRATE_TAG:
+ if((*(pi4_tag_parse + 1)) != sizeof(ihevce_dyn_config_prms_t))
+ ps_hle_ctxt->ihevce_cmds_error_report(
+ ps_hle_ctxt->pv_cmd_err_cb_handle, IHEVCE_ASYNCH_ERR_BR_NOT_BYTE, 1, i4_buf_id);
+
+ memcpy(
+ (void *)ps_dyn_br, (void *)(pi4_tag_parse + 2), sizeof(ihevce_dyn_config_prms_t));
+ pi4_tag_parse += 2;
+ pi4_tag_parse += (sizeof(ihevce_dyn_config_prms_t) >> 2);
+ *pi4_num_set_bitrate_cmds = *pi4_num_set_bitrate_cmds + 1;
+ ps_dyn_br++;
+
+ break;
+ case IHEVCE_ASYNCH_API_END_TAG:
+ i4_end_flag = 1;
+ break;
+ default:
+ ps_hle_ctxt->ihevce_cmds_error_report(
+ ps_hle_ctxt->pv_cmd_err_cb_handle, IHEVCE_ASYNCH_ERR_TLV_ERROR, 1, i4_buf_id);
+ }
+ if(i4_end_flag)
+ break;
+ }
+ if(!i4_end_flag)
+ ps_hle_ctxt->ihevce_cmds_error_report(
+ ps_hle_ctxt->pv_cmd_err_cb_handle, IHEVCE_ASYNCH_ERR_NO_END_TAG, 1, i4_buf_id);
+}
+
+/*!
+************************************************************************
+* \brief
+* ref pics weight offset calculation
+************************************************************************
+*/
+void ref_pics_weight_offset_calc(ihevce_lap_output_params_t *ps_lap_out, lap_struct_t *ps_lap_struct)
+{
+ WORD32 i, j;
+ WORD32 *ref_poc_array = ps_lap_struct->ref_poc_array;
+ WORD32 ai4_delta_poc[MAX_REF_PICS];
+ WORD32 ref_poc_arr_sort[MAX_REF_PICS];
+
+ /* Default weighted pred parameters populated for now */
+ ps_lap_out->i4_log2_luma_wght_denom = DENOM_DEFAULT;
+ ps_lap_out->i4_log2_chroma_wght_denom = DENOM_DEFAULT;
+
+ /* sort the ref_poc_array based on delta as
+ * in case weighted pred dup pics are inserted and it should consider
+ * the neighbors first for prediction than farthest */
+ for(i = 0; i < ps_lap_struct->i4_curr_ref_pics; i++)
+ {
+ ai4_delta_poc[i] = ref_poc_array[i] - ps_lap_out->i4_poc;
+ }
+
+ for(i = 0; i < ps_lap_struct->i4_curr_ref_pics; i++)
+ {
+ WORD32 i4_min, temp;
+ i4_min = i;
+ for(j = i; j < ps_lap_struct->i4_curr_ref_pics; j++)
+ {
+ if(abs(ai4_delta_poc[j]) <= abs(ai4_delta_poc[i4_min]))
+ {
+ i4_min = j;
+ }
+ }
+ temp = ai4_delta_poc[i];
+ ai4_delta_poc[i] = ai4_delta_poc[i4_min];
+ ai4_delta_poc[i4_min] = temp;
+ ref_poc_arr_sort[i] = ai4_delta_poc[i] + ps_lap_out->i4_poc;
+ }
+
+ for(i = 0; i < ps_lap_struct->i4_curr_ref_pics; i++)
+ {
+ ps_lap_out->as_ref_pics[i].i4_ref_pic_delta_poc = ref_poc_arr_sort[i] - ps_lap_out->i4_poc;
+ ASSERT(ps_lap_out->as_ref_pics[i].i4_ref_pic_delta_poc);
+
+ /* Enable flag for the reference pics to be used by curr pic */
+ ps_lap_out->as_ref_pics[i].i4_used_by_cur_pic_flag = gau1_use_by_cur_pic_flag[i];
+
+ /* Currently no weighted prediction offset added */
+ ps_lap_out->as_ref_pics[i].i4_num_duplicate_entries_in_ref_list = 1;
+ }
+ return;
+}
+
+/*!
+************************************************************************
+* \brief
+* ref b picture population
+************************************************************************
+*/
+void ref_b_pic_population(
+ WORD32 curr_layer, ihevce_lap_enc_buf_t *ps_lap_inp, lap_struct_t *ps_lap_struct)
+{
+ ihevce_lap_output_params_t *ps_lap_out = &ps_lap_inp->s_lap_out;
+ WORD32 *ref_poc_array = ps_lap_struct->ref_poc_array;
+ WORD32 *p_ref_poc_array = ref_poc_array;
+ WORD32 i4_interlace_field = ps_lap_struct->s_lap_static_params.i4_src_interlace_field;
+ WORD32 i4_max_ref_pics = ps_lap_struct->s_lap_static_params.i4_max_reference_frames;
+ WORD32 max_temporal_layers = ps_lap_struct->s_lap_static_params.i4_max_temporal_layers;
+
+ /* LAP output structure */
+ ps_lap_out->i4_poc = ps_lap_struct->pi4_encode_poc_ptr[0];
+ ps_lap_out->i4_idr_gop_num = ps_lap_struct->i4_idr_gop_num;
+ ps_lap_out->i4_assoc_IRAP_poc = ps_lap_struct->i4_assoc_IRAP_poc;
+ ps_lap_out->i4_temporal_lyr_id = curr_layer;
+ ps_lap_out->i4_pic_type = IV_B_FRAME;
+
+ if((ps_lap_out->i4_poc > ps_lap_struct->i4_cra_poc) &&
+ (ref_poc_array[0] < ps_lap_struct->i4_cra_poc) && ps_lap_struct->i4_cra_i_pic_flag)
+ {
+ ref_poc_array[0] = ps_lap_struct->i4_cra_poc;
+ ps_lap_struct->i4_curr_ref_pics = 1;
+ }
+
+ ps_lap_out->i4_num_ref_pics = ps_lap_struct->i4_curr_ref_pics;
+
+ /* Default: Cur pic is ref pic*/
+ ps_lap_out->i4_is_ref_pic = 1;
+
+ if(1 == i4_interlace_field)
+ {
+ WORD32 i4_bottom_field = ps_lap_inp->s_input_buf.i4_bottom_field;
+ WORD32 first_field = (ps_lap_inp->s_input_buf.i4_topfield_first ^ i4_bottom_field);
+
+ /*If current pic is top field B picture and is present in top hierarchical layer */
+ /* Dereference the curr pic */
+ if(ps_lap_out->i4_temporal_lyr_id == max_temporal_layers)
+ {
+ if(0 == first_field)
+ ps_lap_out->i4_is_ref_pic = 0;
+ else
+ ps_lap_out->i4_is_ref_pic = 2;
+ }
+ }
+ else
+ {
+ /*If progressive B picture and is present in top hierarchical layer */
+ if(ps_lap_out->i4_temporal_lyr_id >= max_temporal_layers)
+ {
+ ps_lap_out->i4_temporal_lyr_id = max_temporal_layers;
+ ps_lap_out->i4_is_ref_pic = 0;
+ }
+ }
+
+ ref_pics_weight_offset_calc(ps_lap_out, ps_lap_struct);
+
+ /* Updating number of current reference Pictures for the Given Picture */
+ /* If the current frame is n-layer B frame, donot increment*/
+ if(ps_lap_struct->i4_curr_ref_pics < i4_max_ref_pics)
+ {
+ if(ps_lap_out->i4_is_ref_pic)
+ {
+ ps_lap_struct->i4_curr_ref_pics++;
+ }
+ }
+
+ /* Arrange the reference array in ascending order */
+ {
+ WORD32 i, j, temp;
+ for(i = 0; i < (ps_lap_struct->i4_curr_ref_pics - 1); i++)
+ {
+ for(j = i + 1; j < ps_lap_struct->i4_curr_ref_pics; j++)
+ {
+ if(ref_poc_array[i] > ref_poc_array[j])
+ {
+ temp = ref_poc_array[i];
+ ref_poc_array[i] = ref_poc_array[j];
+ ref_poc_array[j] = temp;
+ }
+ }
+ }
+ }
+
+ {
+ WORD32 ref = ps_lap_out->i4_poc;
+ if(ps_lap_out->i4_is_ref_pic && ref > *p_ref_poc_array)
+ {
+ *p_ref_poc_array = ref;
+ }
+ }
+
+ return;
+}
+
+/*!
+************************************************************************
+* \brief
+* ref i/p pic population
+************************************************************************
+*/
+void ref_pic_population(ihevce_lap_enc_buf_t *ps_lap_inp, lap_struct_t *ps_lap_struct)
+{
+ ihevce_lap_output_params_t *ps_lap_out = &ps_lap_inp->s_lap_out;
+ WORD32 *ref_poc_array = ps_lap_struct->ref_poc_array;
+ WORD32 *p_ref_poc_array = ref_poc_array;
+ WORD32 i4_max_ref_pics = ps_lap_struct->s_lap_static_params.i4_max_reference_frames;
+
+ /* Update the POC position */
+ ps_lap_out->i4_poc = ps_lap_struct->pi4_encode_poc_ptr[0];
+
+ /* picture after CRA can't refer pic before CRA*/
+ if((ps_lap_out->i4_poc > ps_lap_struct->i4_cra_poc) &&
+ (ref_poc_array[0] <= ps_lap_struct->i4_cra_poc) && ps_lap_struct->i4_cra_i_pic_flag)
+ {
+ ref_poc_array[0] = ps_lap_struct->i4_cra_poc;
+ ps_lap_struct->i4_curr_ref_pics = 1;
+ }
+
+ /* For every IDR period, set pic type as IDR frame and reset reference POC array to 0*/
+ if(IV_IDR_FRAME == ps_lap_out->i4_pic_type)
+ {
+ ps_lap_struct->i4_idr_gop_num++;
+ ps_lap_struct->i4_curr_ref_pics = 0;
+ ps_lap_out->i4_num_ref_pics = 0;
+ ps_lap_struct->i4_cra_i_pic_flag = 1;
+ ps_lap_struct->i4_cra_poc = ps_lap_out->i4_poc;
+
+ memset(ps_lap_struct->ref_poc_array, 0xFF, sizeof(WORD32) * MAX_REF_PICS);
+ }
+ else if(IV_I_FRAME == ps_lap_out->i4_pic_type)
+ {
+ /* For the I-frames after CRA Frame, no pictures should be referenced */
+ if((1 == ps_lap_struct->i4_cra_i_pic_flag) && ps_lap_out->i4_is_cra_pic)
+ {
+ ps_lap_struct->i4_curr_ref_pics = 0;
+ ps_lap_out->i4_num_ref_pics = 0;
+ }
+ ps_lap_struct->i4_cra_poc = ps_lap_out->i4_poc;
+ ps_lap_struct->i4_cra_i_pic_flag = ps_lap_out->i4_is_cra_pic;
+ }
+ else if(IV_P_FRAME == ps_lap_out->i4_pic_type)
+ {
+ /* If the current POC is the P POC after CRA I POC */
+ if(1 == ps_lap_struct->i4_cra_i_pic_flag)
+ {
+ ps_lap_struct->i4_curr_ref_pics = 1;
+ ps_lap_struct->i4_cra_i_pic_flag = 0;
+ }
+ }
+
+ if(ps_lap_out->i4_pic_type == IV_IDR_FRAME ||
+ (ps_lap_out->i4_pic_type == IV_I_FRAME && ps_lap_out->i4_is_cra_pic))
+ {
+ ps_lap_struct->i4_assoc_IRAP_poc = ps_lap_out->i4_poc;
+ }
+
+ /*Update ps_lap_out*/
+ ps_lap_out->i4_idr_gop_num = ps_lap_struct->i4_idr_gop_num;
+ ps_lap_out->i4_is_ref_pic = 1;
+ ps_lap_out->i4_assoc_IRAP_poc = ps_lap_struct->i4_assoc_IRAP_poc;
+
+ /* Reference POCS */
+ ps_lap_out->i4_num_ref_pics = ps_lap_struct->i4_curr_ref_pics;
+
+ /* I and P frames are always mapped to layer zero*/
+ ps_lap_out->i4_temporal_lyr_id = 0;
+
+ ref_pics_weight_offset_calc(ps_lap_out, ps_lap_struct);
+
+ if(ps_lap_struct->i4_curr_ref_pics < i4_max_ref_pics)
+ {
+ if(ps_lap_out->i4_is_ref_pic)
+ {
+ ps_lap_struct->i4_curr_ref_pics++;
+ }
+ }
+
+ /* Arrange the reference array in ascending order */
+ {
+ WORD32 i, j, temp;
+ for(i = 0; i < (ps_lap_struct->i4_curr_ref_pics - 1); i++)
+ {
+ for(j = i + 1; j < (ps_lap_struct->i4_curr_ref_pics); j++)
+ {
+ if(ref_poc_array[i] > ref_poc_array[j])
+ {
+ temp = ref_poc_array[i];
+ ref_poc_array[i] = ref_poc_array[j];
+ ref_poc_array[j] = temp;
+ }
+ }
+ }
+ }
+
+ {
+ /* add the current pictute at the start of the reference queue */
+ /*For I and P pictures, all the previous frames are reference frames */
+ /* If the current ref POC is greater than the least POC in reference array*/
+ /* Then fill the reference array */
+
+ WORD32 ref = ps_lap_out->i4_poc;
+
+ if(ps_lap_out->i4_is_ref_pic && ref > *p_ref_poc_array)
+ {
+ *p_ref_poc_array = ref;
+ }
+ }
+
+ return;
+}
+
+/*!
+************************************************************************
+* \brief
+* determine next sub-gop state
+************************************************************************
+*/
+void ihevce_determine_next_sub_gop_state(lap_struct_t *ps_lap_struct)
+{
+ WORD32 i4_num_b_frames = -1;
+ WORD32 i4_sd = ps_lap_struct->i4_sub_gop_size;
+ WORD32 i4_sd_idr = ps_lap_struct->i4_sub_gop_size_idr;
+ WORD32 i4_Midr = ps_lap_struct->i4_max_idr_period;
+ WORD32 i4_midr = ps_lap_struct->i4_min_idr_period;
+ WORD32 i4_Mcra = ps_lap_struct->i4_max_cra_period;
+ WORD32 i4_Mi = ps_lap_struct->i4_max_i_period;
+ WORD32 i4_Cd = ps_lap_struct->i4_idr_counter;
+ WORD32 i4_Cc = ps_lap_struct->i4_cra_counter;
+ WORD32 i4_Ci = ps_lap_struct->i4_i_counter;
+
+ if(i4_Midr)
+ ASSERT(i4_Cd < i4_Midr);
+
+ if(i4_Mcra)
+ ASSERT(i4_Cc < i4_Mcra);
+
+ if(i4_Mi)
+ ASSERT(i4_Ci < i4_Mi);
+
+ /*if all are i pictures */
+ if((i4_Midr == 1) || (i4_Mcra == 1) || (i4_Mi == 1))
+ {
+ ps_lap_struct->i4_num_frm_type_decided = 1;
+ if((i4_Midr == 1) || ((i4_Cd + i4_sd) == i4_Midr))
+ {
+ ps_lap_struct->ai1_pic_type[1] = PIC_TYPE_IDR;
+ ps_lap_struct->i4_idr_counter = 0;
+ ps_lap_struct->i4_cra_counter = 0;
+ ps_lap_struct->i4_i_counter = 0;
+ }
+ else if((i4_Mcra == 1) || ((i4_Cc + i4_sd) == i4_Mcra))
+ {
+ ps_lap_struct->ai1_pic_type[1] = PIC_TYPE_CRA;
+ ps_lap_struct->i4_idr_counter += 1;
+ ps_lap_struct->i4_cra_counter = 0;
+ ps_lap_struct->i4_i_counter = 0;
+ }
+ else
+ {
+ ps_lap_struct->ai1_pic_type[1] = PIC_TYPE_I;
+ ps_lap_struct->i4_idr_counter += 1;
+ ps_lap_struct->i4_cra_counter += 1;
+ ps_lap_struct->i4_i_counter = 0;
+ }
+ return;
+ }
+
+ if((i4_Cd + i4_sd_idr >= i4_Midr) && i4_Midr)
+ {
+ /*if idr falls already on sub-gop aligned w.r.t Midr or if strict idr use case*/
+ if(i4_sd_idr != i4_sd)
+ {
+ i4_num_b_frames = i4_Midr - i4_Cd - 2;
+ memset(&ps_lap_struct->ai1_pic_type[1], PIC_TYPE_B, i4_num_b_frames);
+ ps_lap_struct->ai1_pic_type[i4_num_b_frames + 1] = PIC_TYPE_P;
+ ps_lap_struct->ai1_pic_type[i4_num_b_frames + 2] = PIC_TYPE_IDR;
+ ps_lap_struct->i4_num_frm_type_decided = i4_num_b_frames + 2;
+ ps_lap_struct->i4_idr_counter = 0;
+ ps_lap_struct->i4_cra_counter = 0;
+ ps_lap_struct->i4_i_counter = 0;
+ }
+ else
+ {
+ i4_num_b_frames = 0;
+ ps_lap_struct->ai1_pic_type[1] = PIC_TYPE_IDR;
+ ps_lap_struct->i4_num_frm_type_decided = 1;
+ ps_lap_struct->i4_idr_counter = 0;
+ ps_lap_struct->i4_cra_counter = 0;
+ ps_lap_struct->i4_i_counter = 0;
+ }
+ }
+ /*if next sub gop is going to have CRA as Cc reaches Mcra*/
+ else if(((i4_Cc + i4_sd) >= i4_Mcra) && i4_Mcra)
+ {
+ if(((i4_Cc + i4_sd) == i4_Mcra) || (1 == ps_lap_struct->i4_fixed_open_gop_period))
+ {
+ i4_num_b_frames = i4_Mcra - i4_Cc - 1;
+ memset(&ps_lap_struct->ai1_pic_type[1], PIC_TYPE_B, i4_num_b_frames);
+ ps_lap_struct->ai1_pic_type[i4_num_b_frames + 1] = PIC_TYPE_CRA;
+ ps_lap_struct->i4_num_frm_type_decided = i4_num_b_frames + 1;
+ ps_lap_struct->i4_idr_counter += ps_lap_struct->i4_num_frm_type_decided;
+ ps_lap_struct->i4_cra_counter = 0;
+ ps_lap_struct->i4_i_counter = 0;
+ }
+ else
+ {
+ ps_lap_struct->ai1_pic_type[0] = PIC_TYPE_CRA;
+ i4_num_b_frames = i4_sd - 1;
+ memset(&ps_lap_struct->ai1_pic_type[1], PIC_TYPE_B, i4_num_b_frames);
+ ps_lap_struct->ai1_pic_type[i4_num_b_frames + 1] = PIC_TYPE_P;
+ ps_lap_struct->i4_num_frm_type_decided = i4_num_b_frames + 1;
+ ps_lap_struct->i4_idr_counter += ps_lap_struct->i4_num_frm_type_decided;
+ ps_lap_struct->i4_cra_counter = ps_lap_struct->i4_num_frm_type_decided;
+ ps_lap_struct->i4_i_counter = ps_lap_struct->i4_num_frm_type_decided;
+ }
+ }
+ /*if next sub gop is going to have I_slice as Ci reaches Mi*/
+ else if((i4_Ci + i4_sd >= i4_Mi) && i4_Mi)
+ {
+ if(((i4_Ci + i4_sd) == i4_Mi) || (1 == ps_lap_struct->i4_fixed_i_period))
+ {
+ i4_num_b_frames = i4_Mi - i4_Ci - 1;
+ memset(&ps_lap_struct->ai1_pic_type[1], PIC_TYPE_B, i4_num_b_frames);
+ ps_lap_struct->ai1_pic_type[i4_num_b_frames + 1] = PIC_TYPE_I;
+ ps_lap_struct->i4_num_frm_type_decided = i4_num_b_frames + 1;
+ ps_lap_struct->i4_idr_counter += ps_lap_struct->i4_num_frm_type_decided;
+ ps_lap_struct->i4_cra_counter += ps_lap_struct->i4_num_frm_type_decided;
+ ps_lap_struct->i4_i_counter = 0;
+ }
+ else
+ {
+ ps_lap_struct->ai1_pic_type[0] = PIC_TYPE_I;
+ i4_num_b_frames = i4_sd - 1;
+ memset(&ps_lap_struct->ai1_pic_type[1], PIC_TYPE_B, i4_num_b_frames);
+ ps_lap_struct->ai1_pic_type[i4_num_b_frames + 1] = PIC_TYPE_P;
+ ps_lap_struct->i4_num_frm_type_decided = i4_num_b_frames + 1;
+ ps_lap_struct->i4_idr_counter += ps_lap_struct->i4_num_frm_type_decided;
+ ps_lap_struct->i4_cra_counter += ps_lap_struct->i4_num_frm_type_decided;
+ ps_lap_struct->i4_i_counter = ps_lap_struct->i4_num_frm_type_decided;
+ }
+ }
+ /* if next sub-gop is not going to be idr,cra,I*/
+ else
+ {
+ i4_num_b_frames = i4_sd - 1;
+ memset(&ps_lap_struct->ai1_pic_type[1], PIC_TYPE_B, i4_num_b_frames);
+ ps_lap_struct->ai1_pic_type[i4_num_b_frames + 1] = PIC_TYPE_P;
+ ps_lap_struct->i4_num_frm_type_decided = i4_num_b_frames + 1;
+ ps_lap_struct->i4_idr_counter += ps_lap_struct->i4_num_frm_type_decided;
+ ps_lap_struct->i4_cra_counter += ps_lap_struct->i4_num_frm_type_decided;
+ ps_lap_struct->i4_i_counter += ps_lap_struct->i4_num_frm_type_decided;
+ }
+ ASSERT(i4_num_b_frames != -1);
+
+ return;
+}
+
+/*!
+************************************************************************
+* \brief
+* assign pic type to input buf
+************************************************************************
+*/
+void ihevce_assign_pic_type(lap_struct_t *ps_lap_struct, ihevce_lap_enc_buf_t *ps_lap_inp_buf)
+{
+ WORD8 pic_type = ps_lap_struct->ai1_pic_type[ps_lap_struct->i4_next_start_ctr];
+
+ switch(pic_type)
+ {
+ case PIC_TYPE_I:
+ {
+ ps_lap_inp_buf->s_lap_out.i4_pic_type = IV_I_FRAME;
+ ps_lap_inp_buf->s_lap_out.i4_is_cra_pic = 0;
+ ps_lap_inp_buf->s_lap_out.i4_is_I_in_any_field = 1;
+ break;
+ }
+ case PIC_TYPE_P:
+ {
+ ps_lap_inp_buf->s_lap_out.i4_pic_type = IV_P_FRAME;
+ ps_lap_inp_buf->s_lap_out.i4_is_cra_pic = 0;
+ break;
+ }
+ case PIC_TYPE_B:
+ {
+ ps_lap_inp_buf->s_lap_out.i4_pic_type = IV_B_FRAME;
+ ps_lap_inp_buf->s_lap_out.i4_is_cra_pic = 0;
+ break;
+ }
+ case PIC_TYPE_IDR:
+ {
+ ps_lap_struct->i4_curr_poc = 0;
+ ps_lap_inp_buf->s_lap_out.i4_pic_type = IV_IDR_FRAME;
+ ps_lap_inp_buf->s_lap_out.i4_is_cra_pic = 0;
+ break;
+ }
+ case PIC_TYPE_CRA:
+ {
+ ps_lap_inp_buf->s_lap_out.i4_pic_type = IV_I_FRAME;
+ ps_lap_inp_buf->s_lap_out.i4_is_I_in_any_field = 1;
+ ps_lap_inp_buf->s_lap_out.i4_is_cra_pic = 1;
+ break;
+ }
+ default:
+ ASSERT(0);
+ }
+ return;
+}
+
+/*!
+************************************************************************
+* \brief
+* capture order traversal nodes
+************************************************************************
+*/
+void ihevce_encode_order_traversal_nodes(
+ ihevce_encode_node_t *encode_node_t,
+ ihevce_lap_enc_buf_t **encode_order,
+ WORD32 *loop_count,
+ WORD32 curr_layer,
+ lap_struct_t *ps_lap_struct)
+{
+ if(encode_node_t == NULL)
+ return;
+
+ encode_order[*loop_count] = (ihevce_lap_enc_buf_t *)encode_node_t->ps_lap_top_buff;
+
+ if(encode_order[*loop_count] != NULL)
+ {
+ ihevce_lap_enc_buf_t *ps_lap_inp;
+
+ ps_lap_struct->pi4_encode_poc_ptr[0] = encode_node_t->data;
+ ref_b_pic_population(curr_layer, encode_order[*loop_count], ps_lap_struct);
+
+ ps_lap_inp = (ihevce_lap_enc_buf_t *)encode_order[*loop_count];
+ ihevce_rc_populate_common_params(&ps_lap_inp->s_lap_out, &ps_lap_inp->s_rc_lap_out);
+
+ ps_lap_struct->pi4_encode_poc_ptr++;
+ }
+
+ (*loop_count) = (*loop_count) + 1;
+
+ /* Pre-order Left-node traversal*/
+ ihevce_encode_order_traversal_nodes(
+ (ihevce_encode_node_t *)encode_node_t->pv_left_node,
+ encode_order,
+ loop_count,
+ curr_layer + 1,
+ ps_lap_struct);
+
+ /* Pre-order Right-node traversal*/
+ ihevce_encode_order_traversal_nodes(
+ (ihevce_encode_node_t *)encode_node_t->pv_right_node,
+ encode_order,
+ loop_count,
+ curr_layer + 1,
+ ps_lap_struct);
+}
+
+/*!
+************************************************************************
+* \brief
+* capture order traversal nodes
+************************************************************************
+*/
+void ihevce_capture_order_traversal_nodes(
+ ihevce_encode_node_t *encode_node_t,
+ ihevce_lap_enc_buf_t **api4_capture_order_array,
+ WORD32 *capture_order_poc_array,
+ WORD32 *loop_count,
+ WORD32 i4_interlace_field)
+{
+ if(encode_node_t == NULL)
+ return;
+
+ /* Inorder Insertion for the left-child node */
+ ihevce_capture_order_traversal_nodes(
+ (ihevce_encode_node_t *)encode_node_t->pv_left_node,
+ api4_capture_order_array,
+ capture_order_poc_array,
+ loop_count,
+ i4_interlace_field);
+
+ if(i4_interlace_field)
+ {
+ encode_node_t->ps_lap_top_buff =
+ (ihevce_lap_enc_buf_t *)api4_capture_order_array[*loop_count];
+ encode_node_t->data = capture_order_poc_array[*loop_count];
+ encode_node_t->ps_lap_bottom_buff =
+ (ihevce_lap_enc_buf_t *)api4_capture_order_array[*loop_count + 1];
+ }
+ else
+ {
+ encode_node_t->ps_lap_top_buff =
+ (ihevce_lap_enc_buf_t *)api4_capture_order_array[*loop_count];
+ encode_node_t->data = capture_order_poc_array[*loop_count];
+ }
+ if(i4_interlace_field)
+ (*loop_count) = (*loop_count) + 2;
+ else
+ (*loop_count) = (*loop_count) + 1;
+
+ /* Inorder Insertion for the right-child node */
+ ihevce_capture_order_traversal_nodes(
+ (ihevce_encode_node_t *)encode_node_t->pv_right_node,
+ api4_capture_order_array,
+ capture_order_poc_array,
+ loop_count,
+ i4_interlace_field);
+}
+
+/*!
+************************************************************************
+* \brief
+* I/P pic population
+************************************************************************
+*/
+void ihevce_ip_pic_population(
+ ihevce_encode_node_t *ps_encode_node, lap_struct_t *ps_lap_struct, WORD32 i4_first_gop)
+{
+ ihevce_lap_enc_buf_t *ps_lap_inp = NULL;
+ WORD32 hier_layer = ps_lap_struct->s_lap_static_params.i4_max_temporal_layers;
+ WORD32 sub_gop_size = ps_lap_struct->i4_dyn_sub_gop_size;
+ ihevce_lap_enc_buf_t **api4_capture_order_array = ps_lap_struct->api4_capture_order_array;
+ ihevce_lap_enc_buf_t **api4_encode_order_array = ps_lap_struct->api4_encode_order_array;
+ WORD32 *ai4_capture_order_poc = ps_lap_struct->pi4_capture_poc_ptr;
+
+ /* Populate the encode order POC dependent on IDR frames and Interlace Field*/
+ if(1 == ps_lap_struct->i4_idr_flag)
+ {
+ if(i4_first_gop)
+ {
+ api4_encode_order_array[0] = api4_capture_order_array[0];
+
+ if(api4_encode_order_array[0] != NULL)
+ {
+ ps_lap_struct->pi4_encode_poc_ptr[0] = ai4_capture_order_poc[0];
+ ref_pic_population(api4_encode_order_array[0], ps_lap_struct);
+
+ ps_lap_inp = api4_encode_order_array[0];
+ ihevce_rc_populate_common_params(&ps_lap_inp->s_lap_out, &ps_lap_inp->s_rc_lap_out);
+
+ ps_lap_struct->pi4_encode_poc_ptr++;
+ }
+
+ if(ps_lap_struct->i4_immediate_idr_case != 1)
+ {
+ api4_encode_order_array[1] = api4_capture_order_array[sub_gop_size];
+
+ if(api4_encode_order_array[1] != NULL)
+ {
+ ps_lap_struct->pi4_encode_poc_ptr[0] = ai4_capture_order_poc[sub_gop_size];
+ ref_pic_population(api4_encode_order_array[1], ps_lap_struct);
+
+ ps_lap_inp = api4_encode_order_array[1];
+ ihevce_rc_populate_common_params(
+ &ps_lap_inp->s_lap_out, &ps_lap_inp->s_rc_lap_out);
+
+ ps_lap_struct->pi4_encode_poc_ptr++;
+ }
+ }
+ }
+ else
+ {
+ api4_encode_order_array[0] = api4_capture_order_array[sub_gop_size - 1];
+
+ if(api4_encode_order_array[0] != NULL)
+ {
+ ps_lap_struct->pi4_encode_poc_ptr[0] = ai4_capture_order_poc[sub_gop_size - 1];
+ ref_pic_population(api4_encode_order_array[0], ps_lap_struct);
+
+ ps_lap_inp = api4_encode_order_array[0];
+ ihevce_rc_populate_common_params(&ps_lap_inp->s_lap_out, &ps_lap_inp->s_rc_lap_out);
+
+ ps_lap_struct->pi4_encode_poc_ptr++;
+ }
+ }
+ }
+ else
+ {
+ api4_encode_order_array[0] = api4_capture_order_array[sub_gop_size - 1];
+
+ if(api4_encode_order_array[0] != NULL)
+ {
+ ps_lap_struct->pi4_encode_poc_ptr[0] = ai4_capture_order_poc[sub_gop_size - 1];
+ ref_pic_population(api4_encode_order_array[0], ps_lap_struct);
+
+ ps_lap_inp = api4_encode_order_array[0];
+ ihevce_rc_populate_common_params(&ps_lap_inp->s_lap_out, &ps_lap_inp->s_rc_lap_out);
+
+ ps_lap_struct->pi4_encode_poc_ptr++;
+ }
+ }
+ return;
+}
+
+/*!
+************************************************************************
+* \brief
+* B pic population
+************************************************************************
+*/
+void ihevce_b_pic_population(ihevce_encode_node_t *ps_encode_node, lap_struct_t *ps_lap_struct)
+{
+ WORD32 interlace_field = ps_lap_struct->s_lap_static_params.i4_src_interlace_field;
+ ihevce_lap_enc_buf_t **api4_encode_order_array = ps_lap_struct->api4_encode_order_array;
+ WORD32 *capture_order_poc_array = ps_lap_struct->pi4_capture_poc_ptr;
+ WORD32 loop_count = 0;
+
+ /* encoder_order offset changed dependent on IDR and Interlace Field */
+ if(ps_lap_struct->i4_idr_flag)
+ loop_count = 1 + interlace_field;
+
+ /* Inorder Insertion of POC in tree, for capture order */
+ ihevce_capture_order_traversal_nodes(
+ ps_encode_node,
+ &ps_lap_struct->api4_capture_order_array[0],
+ capture_order_poc_array,
+ &loop_count,
+ interlace_field);
+
+ /* encoder_order offset changed dependent on IDR and Interlace Field */
+ /* If the gop_size is multiple of CRA period , decrement loop count */
+ if(ps_lap_struct->i4_idr_flag)
+ loop_count = 2 + (interlace_field * 2);
+ else
+ loop_count = 1 + interlace_field;
+
+ /* Pre-order traversal of the tree to get encode-order POCs*/
+ ihevce_encode_order_traversal_nodes(
+ ps_encode_node, api4_encode_order_array, &loop_count, 1, ps_lap_struct);
+
+ return;
+}
+
+/*!
+************************************************************************
+* \brief
+* rc_update_model_control_by_lap_for_modified_sub_gop
+************************************************************************
+*/
+void rc_update_model_control_by_lap_for_modified_sub_gop(
+ lap_struct_t *ps_lap_struct, ihevce_lap_enc_buf_t *ps_lap_out_buf)
+{
+ ihevce_lap_output_params_t *ps_lap_out = &ps_lap_out_buf->s_lap_out;
+
+ /* model update flag for rc*/
+ if(ps_lap_out->i4_pic_type == IV_P_FRAME)
+ {
+ WORD32 i4_loop = 0;
+ WORD32 i4_min_delta_poc = 0x7FFFFFFF;
+
+ for(i4_loop = 0; i4_loop < ps_lap_out->i4_num_ref_pics; i4_loop++)
+ {
+ if(i4_min_delta_poc > ABS(ps_lap_out->as_ref_pics[i4_loop].i4_ref_pic_delta_poc))
+ {
+ i4_min_delta_poc = ABS(ps_lap_out->as_ref_pics[i4_loop].i4_ref_pic_delta_poc);
+ }
+ }
+ }
+
+ if(ps_lap_out->i4_pic_type == IV_B_FRAME)
+ {
+ WORD32 i4_loop = 0;
+ WORD32 i4_min_delta_poc = 0x7FFFFFFF;
+ WORD32 i4_min_delta_poc_for_b =
+ (1 << ps_lap_struct->s_lap_static_params.i4_max_temporal_layers) /
+ (ps_lap_out->i4_temporal_lyr_id + 1);
+
+ for(i4_loop = 0; i4_loop < ps_lap_out->i4_num_ref_pics; i4_loop++)
+ {
+ if(i4_min_delta_poc > ABS(ps_lap_out->as_ref_pics[i4_loop].i4_ref_pic_delta_poc))
+ {
+ i4_min_delta_poc = ABS(ps_lap_out->as_ref_pics[i4_loop].i4_ref_pic_delta_poc);
+ }
+ }
+ }
+ return;
+}
+
+/*!
+************************************************************************
+* \brief
+* Update num of pic type for rc
+************************************************************************
+*/
+void update_rc_num_pic_type(lap_struct_t *ps_lap_struct, ihevce_lap_enc_buf_t *ps_lap_out_buf)
+{
+ WORD32 i4_field_flag = ps_lap_struct->s_lap_static_params.i4_src_interlace_field;
+ rc_lap_out_params_t *ps_rc_lap_out = &ps_lap_out_buf->s_rc_lap_out;
+
+ ps_lap_struct->i4_lap2_counter++;
+
+ if(ps_lap_out_buf->s_lap_out.i4_pic_type == IV_I_FRAME ||
+ ps_lap_out_buf->s_lap_out.i4_pic_type == IV_IDR_FRAME)
+ {
+ ps_lap_struct->ai4_pic_type_to_be_removed[ps_lap_struct->i4_enq_idx] = I_PIC;
+ GET_IDX_CIRCULAR_BUF(ps_lap_struct->i4_enq_idx, 1, NUM_LAP2_LOOK_AHEAD);
+ }
+ else if(ps_lap_out_buf->s_lap_out.i4_pic_type == IV_P_FRAME)
+ {
+ if(ps_lap_out_buf->s_lap_out.i4_first_field)
+ {
+ ps_lap_struct->ai4_pic_type_to_be_removed[ps_lap_struct->i4_enq_idx] = P_PIC;
+ }
+ else
+ {
+ ps_lap_struct->ai4_pic_type_to_be_removed[ps_lap_struct->i4_enq_idx] = P1_PIC;
+ }
+ GET_IDX_CIRCULAR_BUF(ps_lap_struct->i4_enq_idx, 1, NUM_LAP2_LOOK_AHEAD);
+ }
+ else if(ps_lap_out_buf->s_lap_out.i4_pic_type == IV_B_FRAME)
+ {
+ if(ps_lap_out_buf->s_lap_out.i4_temporal_lyr_id == 1)
+ {
+ if(ps_lap_out_buf->s_lap_out.i4_first_field)
+ {
+ ps_lap_struct->ai4_pic_type_to_be_removed[ps_lap_struct->i4_enq_idx] = B_PIC;
+ }
+ else
+ {
+ ps_lap_struct->ai4_pic_type_to_be_removed[ps_lap_struct->i4_enq_idx] = BB_PIC;
+ }
+ GET_IDX_CIRCULAR_BUF(ps_lap_struct->i4_enq_idx, 1, NUM_LAP2_LOOK_AHEAD);
+ }
+ else if(ps_lap_out_buf->s_lap_out.i4_temporal_lyr_id == 2)
+ {
+ if(ps_lap_out_buf->s_lap_out.i4_first_field)
+ {
+ ps_lap_struct->ai4_pic_type_to_be_removed[ps_lap_struct->i4_enq_idx] = B1_PIC;
+ }
+ else
+ {
+ ps_lap_struct->ai4_pic_type_to_be_removed[ps_lap_struct->i4_enq_idx] = B11_PIC;
+ }
+ GET_IDX_CIRCULAR_BUF(ps_lap_struct->i4_enq_idx, 1, NUM_LAP2_LOOK_AHEAD);
+ }
+ else if(ps_lap_out_buf->s_lap_out.i4_temporal_lyr_id == 3)
+ {
+ if(ps_lap_out_buf->s_lap_out.i4_first_field)
+ {
+ ps_lap_struct->ai4_pic_type_to_be_removed[ps_lap_struct->i4_enq_idx] = B2_PIC;
+ }
+ else
+ {
+ ps_lap_struct->ai4_pic_type_to_be_removed[ps_lap_struct->i4_enq_idx] = B22_PIC;
+ }
+ GET_IDX_CIRCULAR_BUF(ps_lap_struct->i4_enq_idx, 1, NUM_LAP2_LOOK_AHEAD);
+ }
+ else
+ {
+ ASSERT(0);
+ }
+ }
+ else
+ {
+ ASSERT(0);
+ }
+
+ if(!ps_lap_struct->i4_rc_lap_period)
+ {
+ if(ps_lap_struct->i4_rc_lap_period < ps_lap_struct->i4_gop_period)
+ {
+ WORD32 i4_loop;
+ WORD32 idx = 0;
+ WORD32 i4_max_temporal_layer =
+ ps_lap_struct->s_lap_static_params.i4_max_temporal_layers;
+
+ for(i4_loop = 0;
+ i4_loop < (ps_lap_struct->i4_gop_period - ps_lap_struct->i4_rc_lap_period);
+ i4_loop++)
+ {
+ ps_rc_lap_out->i4_next_sc_i_in_rc_look_ahead++;
+
+ if(i4_max_temporal_layer == 0)
+ {
+ if(ps_lap_struct->i4_is_all_i_pic_in_seq)
+ {
+ ps_rc_lap_out->ai4_num_pic_type[I_PIC]++;
+ }
+ else
+ {
+ /*second field*/
+ if((i4_loop & 1) && i4_field_flag)
+ {
+ ps_rc_lap_out->ai4_num_pic_type[P1_PIC]++;
+ }
+ else
+ {
+ ps_rc_lap_out->ai4_num_pic_type[P_PIC]++;
+ }
+ }
+ }
+ else
+ {
+ ps_rc_lap_out->ai4_num_pic_type
+ [gau1_order_insert_pic_type[i4_max_temporal_layer - 1][idx]]++;
+
+ GET_IDX_CIRCULAR_BUF(idx, 1, (8 << i4_field_flag));
+ }
+ }
+ }
+ }
+ else
+ {
+ ASSERT(ps_lap_struct->i4_lap2_counter <= ps_lap_struct->i4_rc_lap_period);
+
+ if(ps_lap_struct->i4_lap2_counter == ps_lap_struct->i4_rc_lap_period)
+ {
+ WORD32 i4_loop, i4_period, i4_next_i_pic = 0;
+ WORD32 i4_stop_count = 0;
+ WORD32 i4_temp_deq = ps_lap_struct->i4_deq_idx;
+ WORD32 i4_first_pic_type = ps_lap_struct->ai4_pic_type_to_be_removed[i4_temp_deq];
+
+ if(ps_lap_struct->i4_rc_lap_period >= ps_lap_struct->i4_gop_period)
+ {
+ i4_period = ps_lap_struct->i4_gop_period;
+ }
+ else
+ {
+ i4_period = ps_lap_struct->i4_rc_lap_period;
+ }
+
+ for(i4_loop = 0; i4_loop < i4_period; i4_loop++)
+ {
+ if(ps_lap_struct->ai4_pic_type_to_be_removed[i4_temp_deq] == I_PIC && i4_loop &&
+ i4_first_pic_type == I_PIC)
+ {
+ i4_stop_count = 1;
+ }
+
+ if(!i4_stop_count)
+ {
+ ps_rc_lap_out->i4_next_sc_i_in_rc_look_ahead++;
+ }
+
+ ps_rc_lap_out
+ ->ai4_num_pic_type[ps_lap_struct->ai4_pic_type_to_be_removed[i4_temp_deq]]++;
+
+ GET_IDX_CIRCULAR_BUF(i4_temp_deq, 1, NUM_LAP2_LOOK_AHEAD);
+ }
+ if(ps_lap_struct->i4_rc_lap_period < ps_lap_struct->i4_gop_period)
+ {
+ WORD32 i4_loop;
+ WORD32 idx = 0;
+ WORD32 i4_max_temporal_layer =
+ ps_lap_struct->s_lap_static_params.i4_max_temporal_layers;
+
+ for(i4_loop = 0;
+ i4_loop < (ps_lap_struct->i4_gop_period - ps_lap_struct->i4_rc_lap_period) &&
+ (!i4_next_i_pic);
+ i4_loop++)
+ {
+ if(!i4_stop_count)
+ {
+ ps_rc_lap_out->i4_next_sc_i_in_rc_look_ahead++;
+ }
+
+ if(i4_max_temporal_layer == 0)
+ {
+ if(ps_lap_struct->i4_is_all_i_pic_in_seq)
+ {
+ ps_rc_lap_out->ai4_num_pic_type[I_PIC]++;
+ }
+ else
+ {
+ /*second field*/
+ if((i4_loop & 1) && i4_field_flag)
+ {
+ ps_rc_lap_out->ai4_num_pic_type[P1_PIC]++;
+ }
+ else
+ {
+ ps_rc_lap_out->ai4_num_pic_type[P_PIC]++;
+ }
+ }
+ }
+ else
+ {
+ ps_rc_lap_out->ai4_num_pic_type
+ [gau1_order_insert_pic_type[i4_max_temporal_layer - 1][idx]]++;
+ GET_IDX_CIRCULAR_BUF(idx, 1, (8 << i4_field_flag));
+ }
+ }
+ }
+ /*remove one pic type*/
+ GET_IDX_CIRCULAR_BUF(ps_lap_struct->i4_deq_idx, 1, NUM_LAP2_LOOK_AHEAD);
+ ps_lap_struct->i4_lap2_counter--;
+ }
+ }
+
+ {
+ WORD32 i4_loop;
+ WORD32 idx = 0;
+ WORD32 i4_max_temporal_layer = ps_lap_struct->s_lap_static_params.i4_max_temporal_layers;
+ WORD32 i4_num_pictype = 0;
+
+ for(i4_loop = 0; i4_loop < MAX_PIC_TYPE; i4_loop++)
+ {
+ i4_num_pictype += ps_rc_lap_out->ai4_num_pic_type[i4_loop];
+ }
+
+ if(!i4_num_pictype)
+ {
+ ps_rc_lap_out->i4_next_sc_i_in_rc_look_ahead = ps_lap_struct->i4_gop_period;
+
+ for(i4_loop = 0; i4_loop < (ps_lap_struct->i4_gop_period); i4_loop++)
+ {
+ if(i4_max_temporal_layer == 0)
+ {
+ if(ps_lap_struct->i4_is_all_i_pic_in_seq)
+ {
+ ps_rc_lap_out->ai4_num_pic_type[I_PIC]++;
+ }
+ else
+ {
+ /*second field*/
+ if((i4_loop & 1) && i4_field_flag)
+ {
+ ps_rc_lap_out->ai4_num_pic_type[P1_PIC]++;
+ }
+ else
+ {
+ ps_rc_lap_out->ai4_num_pic_type[P_PIC]++;
+ }
+ }
+ }
+ else
+ {
+ ps_rc_lap_out->ai4_num_pic_type
+ [gau1_order_insert_pic_type[i4_max_temporal_layer - 1][idx]]++;
+
+ GET_IDX_CIRCULAR_BUF(idx, 1, (8 << i4_field_flag));
+ }
+ }
+ }
+ }
+ /*FOR RC : ensure at least 1 I pic in the gop period at any case*/
+ if(!ps_rc_lap_out->ai4_num_pic_type[I_PIC])
+ {
+ ASSERT(ps_rc_lap_out->ai4_num_pic_type[P_PIC]);
+ ps_lap_out_buf->s_rc_lap_out.ai4_num_pic_type[P_PIC]--;
+ ps_lap_out_buf->s_rc_lap_out.ai4_num_pic_type[I_PIC]++;
+ }
+ return;
+}
+
+/*!
+************************************************************************
+* \brief
+* pre rel lap output update
+************************************************************************
+*/
+void ihevce_pre_rel_lapout_update(lap_struct_t *ps_lap_struct, ihevce_lap_enc_buf_t *ps_lap_out_buf)
+{
+ WORD32 i4_first_field = 1;
+ WORD32 i4_field = ps_lap_struct->s_lap_static_params.i4_src_interlace_field;
+
+ if(i4_field)
+ {
+ i4_first_field = ps_lap_out_buf->s_lap_out.i4_first_field;
+ }
+
+ ps_lap_out_buf->s_lap_out.i4_used = 0;
+
+ rc_update_model_control_by_lap_for_modified_sub_gop(ps_lap_struct, ps_lap_out_buf);
+ update_rc_num_pic_type(ps_lap_struct, ps_lap_out_buf);
+
+ /* curr buf next is null, prev buf next is curr and prev buff equal to curr*/
+
+ ps_lap_out_buf->s_rc_lap_out.ps_rc_lap_out_next_encode = NULL;
+ if(ps_lap_struct->pv_prev_inp_buf != NULL &&
+ ps_lap_struct->s_lap_static_params.s_lap_params.i4_rc_look_ahead_pics)
+ {
+ ((ihevce_lap_enc_buf_t *)ps_lap_struct->pv_prev_inp_buf)
+ ->s_rc_lap_out.ps_rc_lap_out_next_encode = (void *)&ps_lap_out_buf->s_rc_lap_out;
+ }
+
+ ps_lap_struct->pv_prev_inp_buf = (void *)ps_lap_out_buf;
+ ps_lap_out_buf->s_lap_out.i4_is_prev_pic_in_Tid0_same_scene = 0;
+
+ /*with force idr below check is not valid*/
+#if(!FORCE_IDR_TEST)
+ if(ps_lap_struct->i4_max_idr_period == ps_lap_struct->i4_min_idr_period)
+ {
+ if(!ps_lap_out_buf->s_lap_out.i4_poc)
+ {
+ ASSERT(ps_lap_struct->i4_max_prev_poc == (ps_lap_struct->i4_max_idr_period - 1));
+ ps_lap_struct->i4_max_prev_poc = 0;
+ }
+ }
+#endif
+
+ /*assert if num of reference frame is zero in case of P or B frame*/
+ if(ps_lap_out_buf->s_lap_out.i4_pic_type == IV_P_FRAME ||
+ ps_lap_out_buf->s_lap_out.i4_pic_type == IV_B_FRAME)
+ {
+ ASSERT(ps_lap_out_buf->s_lap_out.i4_num_ref_pics != 0);
+ }
+
+ /*assert if poc = 0 and pictype is not an idr*/
+ if(ps_lap_out_buf->s_lap_out.i4_pic_type != IV_IDR_FRAME &&
+ ps_lap_out_buf->s_lap_out.i4_poc == 0)
+ {
+ ASSERT(0);
+ }
+ if(ps_lap_out_buf->s_lap_out.i4_pic_type == IV_IDR_FRAME &&
+ ps_lap_out_buf->s_lap_out.i4_poc != 0)
+ {
+ ASSERT(0);
+ }
+ if(ps_lap_out_buf->s_lap_out.i4_poc < 0)
+ {
+ ASSERT(0);
+ }
+
+#if(!FORCE_IDR_TEST)
+ if((!ps_lap_struct->i4_max_idr_period) && ps_lap_out_buf->s_lap_out.i4_display_num != 0)
+ {
+ ASSERT(ps_lap_out_buf->s_lap_out.i4_pic_type != IV_IDR_FRAME);
+ }
+#endif
+ if(!ps_lap_struct->i4_max_cra_period)
+ {
+ ASSERT(ps_lap_out_buf->s_lap_out.i4_is_cra_pic != 1);
+ }
+
+ if(ps_lap_out_buf->s_lap_out.i4_force_idr_flag)
+ {
+ ASSERT(ps_lap_out_buf->s_lap_out.i4_pic_type == IV_IDR_FRAME);
+ }
+ ps_lap_out_buf->s_lap_out.i4_curr_frm_qp = -1;
+}
+
+/*!
+************************************************************************
+* \brief
+* lap queue input
+************************************************************************
+*/
+void ihevce_lap_queue_input(
+ lap_struct_t *ps_lap_struct, ihevce_lap_enc_buf_t *ps_input_lap_enc_buf, WORD32 *pi4_tree_num)
+{
+ ihevce_encode_node_t *ps_encode_node =
+ (ihevce_encode_node_t *)ps_lap_struct->aps_encode_node[*pi4_tree_num];
+
+ WORD32 i4_capture_idx = ps_lap_struct->i4_capture_idx;
+
+ /* Static Lap parameters */
+ ihevce_lap_static_params_t *ps_lap_static_params =
+ (ihevce_lap_static_params_t *)&ps_lap_struct->s_lap_static_params;
+
+ WORD32 hier_layer = ps_lap_static_params->i4_max_temporal_layers;
+ WORD32 sub_gop_size = ps_lap_struct->i4_dyn_sub_gop_size;
+
+ /* queue the current input in capture array */
+ {
+ WORD32 first_gop_flag;
+
+ if(!i4_capture_idx)
+ {
+ memset(
+ &ps_lap_struct->api4_capture_order_array[0],
+ 0,
+ sizeof(ihevce_lap_enc_buf_t *) * MAX_NUM_ENC_NODES);
+ }
+ ps_lap_struct->api4_capture_order_array[i4_capture_idx] = ps_input_lap_enc_buf;
+
+ if(ps_input_lap_enc_buf != NULL)
+ {
+ if(ps_input_lap_enc_buf->s_lap_out.i4_end_flag == 1)
+ ps_lap_struct->i4_end_flag_pic_idx = i4_capture_idx;
+ ps_lap_struct->ai4_capture_order_poc[i4_capture_idx] = ps_lap_struct->i4_curr_poc++;
+ }
+ i4_capture_idx++;
+
+ /* to take care of buffering 1 extra picture at start or at IDR interval*/
+ if(!ps_lap_struct->i4_is_all_i_pic_in_seq)
+ {
+ if(ps_lap_static_params->i4_src_interlace_field && sub_gop_size <= 2)
+ {
+ first_gop_flag = 0;
+ }
+ else
+ {
+ first_gop_flag = ps_lap_struct->i4_idr_flag
+ << ps_lap_static_params->i4_src_interlace_field;
+ }
+ }
+ else
+ {
+ first_gop_flag = ps_lap_struct->i4_idr_flag;
+ }
+
+ /* For every IDR period, set idr_flag and reset POC value and gop_size to 0*/
+ if(ps_input_lap_enc_buf != NULL)
+ {
+ if((!first_gop_flag) && (ps_input_lap_enc_buf->s_lap_out.i4_pic_type == IV_IDR_FRAME))
+ {
+ ps_lap_struct->pi4_encode_poc_ptr = &ps_lap_struct->ai4_encode_order_poc[0];
+ ps_lap_struct->i4_idr_flag = 1;
+ ps_lap_struct->i4_curr_poc = 0;
+ ps_lap_struct->ai4_capture_order_poc[i4_capture_idx - 1] =
+ ps_lap_struct->i4_curr_poc++;
+ }
+ }
+
+ if(first_gop_flag &&
+ (ps_lap_struct->i4_is_all_i_pic_in_seq || ps_lap_struct->i4_immediate_idr_case))
+ {
+ sub_gop_size = 0;
+ }
+
+ if(!first_gop_flag && ps_lap_struct->i4_immediate_idr_case &&
+ (i4_capture_idx != (sub_gop_size + first_gop_flag)))
+ {
+ sub_gop_size = 1 << ps_lap_static_params->i4_src_interlace_field;
+ ps_lap_struct->i4_dyn_sub_gop_size = 1 << ps_lap_static_params->i4_src_interlace_field;
+ }
+
+ /* reset the queue idx end of every gop */
+ if(i4_capture_idx == (sub_gop_size + first_gop_flag))
+ {
+ if(ps_lap_struct->i4_end_flag_pic_idx)
+ {
+ WORD32 i4_temp_poc = 0;
+ ihevce_lap_enc_buf_t *ps_temp_lap_enc_buf = NULL;
+
+ /*swap the lap enc buf and poc*/
+ ps_temp_lap_enc_buf =
+ ps_lap_struct->api4_capture_order_array[ps_lap_struct->i4_end_flag_pic_idx - 1];
+ ps_lap_struct->api4_capture_order_array[ps_lap_struct->i4_end_flag_pic_idx - 1] =
+ NULL;
+ ps_lap_struct->api4_capture_order_array[i4_capture_idx - 2] =
+ ps_lap_struct->api4_capture_order_array[ps_lap_struct->i4_end_flag_pic_idx];
+
+ if((i4_capture_idx - 2) != ps_lap_struct->i4_end_flag_pic_idx)
+ ps_lap_struct->api4_capture_order_array[ps_lap_struct->i4_end_flag_pic_idx] =
+ NULL;
+
+ ps_temp_lap_enc_buf->s_lap_out.i4_pic_type = IV_P_FRAME;
+ ps_lap_struct->api4_capture_order_array[i4_capture_idx - 1] = ps_temp_lap_enc_buf;
+
+ i4_temp_poc =
+ ps_lap_struct->ai4_capture_order_poc[ps_lap_struct->i4_end_flag_pic_idx - 1];
+ ps_lap_struct->ai4_capture_order_poc[i4_capture_idx - 2] =
+ ps_lap_struct->ai4_capture_order_poc[ps_lap_struct->i4_end_flag_pic_idx];
+
+ ps_lap_struct->ai4_capture_order_poc[i4_capture_idx - 1] = i4_temp_poc;
+ }
+ i4_capture_idx = 0;
+
+ /* add the number of pics in sub gop to the gop counter */
+ /* Get reordered Buffer for encoder, wait till all sub-gop buffers are output */
+
+ /* Popluate I/P pictures */
+ ihevce_ip_pic_population(ps_encode_node, ps_lap_struct, first_gop_flag);
+
+ /* For hierarchical layers, Populate B picture */
+ if((hier_layer > 0) &&
+ sub_gop_size > (1 << ps_lap_static_params->i4_src_interlace_field))
+ {
+ ihevce_b_pic_population(ps_encode_node, ps_lap_struct);
+ }
+
+ ps_lap_struct->i4_num_bufs_encode_order = sub_gop_size + first_gop_flag;
+
+ /* correction of encode order in case of multiple non reference B*/
+ if(ps_lap_struct->i4_dyn_sub_gop_size > ps_lap_struct->i4_sub_gop_size)
+ {
+ WORD32 i4_loop;
+ ihevce_lap_enc_buf_t *ps_lap_enc_buf, *ps_lap_enc_buf_tmp[MAX_NUM_ENC_NODES];
+ WORD32 i4_enc_cnt, i4_cap_cnt;
+
+ i4_cap_cnt = first_gop_flag;
+ i4_enc_cnt = 0;
+
+ for(i4_loop = 0; i4_loop < ps_lap_struct->i4_num_bufs_encode_order; i4_loop++)
+ {
+ ps_lap_enc_buf = ps_lap_struct->api4_encode_order_array[i4_loop];
+
+ if(ps_lap_enc_buf != NULL && !ps_lap_enc_buf->s_lap_out.i4_is_ref_pic &&
+ (ps_lap_enc_buf->s_lap_out.i4_temporal_lyr_id ==
+ ps_lap_struct->s_lap_static_params.i4_max_temporal_layers))
+ {
+ if(ps_lap_enc_buf != ps_lap_struct->api4_capture_order_array[i4_cap_cnt])
+ {
+ ps_lap_enc_buf_tmp[i4_enc_cnt] =
+ ps_lap_struct->api4_capture_order_array[i4_cap_cnt];
+ i4_enc_cnt++;
+ i4_loop++;
+ }
+ i4_cap_cnt += 2;
+ ps_lap_enc_buf_tmp[i4_enc_cnt] = ps_lap_enc_buf;
+ i4_enc_cnt++;
+ ps_lap_enc_buf_tmp[i4_enc_cnt] =
+ ps_lap_struct->api4_capture_order_array[i4_cap_cnt];
+ i4_enc_cnt++;
+ i4_cap_cnt += 2;
+ i4_loop++;
+ }
+ else
+ {
+ ps_lap_enc_buf_tmp[i4_enc_cnt] = ps_lap_enc_buf;
+ i4_enc_cnt++;
+ }
+ }
+ for(i4_loop = 0; i4_loop < ps_lap_struct->i4_num_bufs_encode_order; i4_loop++)
+ {
+ ps_lap_struct->api4_encode_order_array[i4_loop] = ps_lap_enc_buf_tmp[i4_loop];
+ }
+ }
+
+ /* reset the IDR flag */
+ ps_lap_struct->i4_idr_flag = 0;
+ ps_lap_struct->i4_dyn_sub_gop_size = ps_lap_struct->i4_sub_gop_size;
+ }
+
+ if(0 == ps_lap_struct->i4_lap_out_idx)
+ ps_lap_struct->i4_max_buf_in_enc_order = ps_lap_struct->i4_num_bufs_encode_order;
+
+ /* store the capture index */
+ ps_lap_struct->i4_capture_idx = i4_capture_idx;
+ ps_lap_struct->i4_immediate_idr_case = 0;
+ }
+ return;
+}
+
+/*!
+************************************************************************
+* \brief
+* lap process
+************************************************************************
+*/
+ihevce_lap_enc_buf_t *ihevce_lap_process(void *pv_interface_ctxt, ihevce_lap_enc_buf_t *ps_curr_inp)
+{
+ lap_intface_t *ps_lap_interface = (lap_intface_t *)pv_interface_ctxt;
+ lap_struct_t *ps_lap_struct = (lap_struct_t *)ps_lap_interface->pv_lap_module_ctxt;
+ ihevce_hle_ctxt_t *ps_hle_ctxt = (ihevce_hle_ctxt_t *)ps_lap_interface->pv_hle_ctxt;
+ ihevce_lap_enc_buf_t *ps_lap_inp_buf = ps_curr_inp;
+ ihevce_tgt_params_t *ps_tgt_params =
+ &ps_lap_struct->s_static_cfg_params.s_tgt_lyr_prms.as_tgt_params[0];
+ WORD32 i4_field_flag = ps_lap_struct->s_lap_static_params.i4_src_interlace_field;
+ WORD32 i4_num_frames_after_force_idr = ps_lap_struct->i4_num_frames_after_force_idr;
+ WORD32 i4_flush_check = 0;
+ WORD32 i4_force_idr_check = 0;
+ WORD32 i4_set_res_check = 0;
+ WORD32 i4_tree_num = 0;
+ iv_input_ctrl_buffs_t *ps_ctrl_buf = NULL;
+ WORD32 buf_id = 0;
+
+ ps_lap_interface->i4_ctrl_in_que_blocking_mode = BUFF_QUE_NON_BLOCKING_MODE;
+
+ /* ----------- LAP processing ----------- */
+ if(ps_lap_struct->end_flag != 1)
+ {
+ ASSERT(NULL != ps_curr_inp);
+
+ /* ---------- get the filled control command buffer ------------ */
+ ps_ctrl_buf = (iv_input_ctrl_buffs_t *)ihevce_q_get_filled_buff(
+ ps_hle_ctxt->apv_enc_hdl[0],
+ ps_lap_interface->i4_ctrl_in_que_id,
+ &buf_id,
+ ps_lap_interface->i4_ctrl_in_que_blocking_mode);
+
+ /* ----------- check the command ---------------------- */
+ if(NULL != ps_ctrl_buf)
+ {
+ /* check for async errors */
+ ihevce_dyn_config_prms_t as_dyn_br[MAX_NUM_DYN_BITRATE_CMDS];
+ WORD32 i4_num_set_bitrate_cmds = 0;
+ WORD32 bitrt_ctr = 0;
+
+ ihevce_lap_parse_async_cmd(
+ ps_hle_ctxt,
+ (WORD32 *)ps_ctrl_buf->pv_asynch_ctrl_bufs,
+ ps_ctrl_buf->i4_cmd_buf_size,
+ ps_ctrl_buf->i4_buf_id,
+ &i4_num_set_bitrate_cmds,
+ &as_dyn_br[0]);
+
+ /* Call the call back function to register the new bitrate */
+ for(bitrt_ctr = 0; bitrt_ctr < i4_num_set_bitrate_cmds; bitrt_ctr++)
+ {
+ ps_lap_interface->ihevce_dyn_bitrate_cb(
+ (void *)ps_hle_ctxt, (void *)&as_dyn_br[bitrt_ctr]);
+ }
+ }
+
+ {
+ WORD32 *pi4_cmd_buf = (WORD32 *)ps_lap_inp_buf->s_input_buf.pv_synch_ctrl_bufs;
+
+ /* check for sync cmd buffer error */
+ /* check FLUSH comand and Force IDR in the complete buffer */
+ i4_flush_check = 0;
+ i4_force_idr_check = 0;
+ i4_set_res_check = 0;
+ ihevce_lap_parse_sync_cmd(
+ ps_hle_ctxt,
+ &ps_lap_struct->s_static_cfg_params,
+ pi4_cmd_buf,
+ ps_lap_inp_buf,
+ &i4_flush_check,
+ &i4_force_idr_check,
+ &i4_set_res_check,
+ &i4_num_frames_after_force_idr);
+
+ if(i4_flush_check)
+ ps_lap_struct->end_flag = 1;
+
+ ps_lap_inp_buf->s_lap_out.i4_out_flush_flag = 0;
+ ps_lap_inp_buf->s_lap_out.i4_end_flag = ps_lap_struct->end_flag;
+
+ /* check if input buffer is a valid buffer */
+ if(1 == ps_lap_inp_buf->s_input_buf.i4_inp_frm_data_valid_flag)
+ {
+ /* Initialise laps input buffer descriptors */
+ memset(&ps_lap_inp_buf->s_lap_out, 0, sizeof(ihevce_lap_output_params_t));
+ memset(&ps_lap_inp_buf->s_rc_lap_out, 0, sizeof(rc_lap_out_params_t));
+ /* Default initialization of lapout parameters */
+ ps_lap_inp_buf->s_lap_out.i4_scene_type = SCENE_TYPE_NORMAL;
+ ps_lap_inp_buf->s_lap_out.u4_scene_num = 0;
+ ps_lap_inp_buf->s_lap_out.i4_display_num = ps_lap_struct->i4_display_num;
+ ps_lap_inp_buf->s_lap_out.i4_quality_preset = ps_tgt_params->i4_quality_preset;
+ ps_lap_inp_buf->s_lap_out.i1_weighted_pred_flag = 0;
+ ps_lap_inp_buf->s_lap_out.i1_weighted_bipred_flag = 0;
+ ps_lap_inp_buf->s_lap_out.i4_log2_luma_wght_denom = DENOM_DEFAULT;
+ ps_lap_inp_buf->s_lap_out.i4_log2_chroma_wght_denom = DENOM_DEFAULT;
+ ps_lap_inp_buf->s_lap_out.as_ref_pics[0].i4_num_duplicate_entries_in_ref_list = 1;
+ ps_lap_inp_buf->s_lap_out.as_ref_pics[0].i4_used_by_cur_pic_flag = 1;
+ ps_lap_inp_buf->s_lap_out.as_ref_pics[0].as_wght_off[0].u1_luma_weight_enable_flag =
+ 0;
+ ps_lap_inp_buf->s_lap_out.as_ref_pics[0]
+ .as_wght_off[0]
+ .u1_chroma_weight_enable_flag = 0;
+ ps_lap_inp_buf->s_lap_out.i4_first_field = 1;
+ ps_lap_inp_buf->s_lap_out.i4_force_idr_flag = 0;
+ ps_lap_inp_buf->s_lap_out.i4_curr_frm_qp = ps_tgt_params->ai4_frame_qp[0];
+ ps_lap_inp_buf->s_lap_out.i4_used = 1;
+ if(i4_force_idr_check)
+ {
+ ps_lap_inp_buf->s_lap_out.i4_force_idr_flag = 1;
+ }
+ ASSERT(i4_set_res_check == 0);
+
+ /* Populate input params in lap out struct */
+ ps_lap_inp_buf->s_lap_out.s_input_buf.pv_y_buf =
+ ps_lap_inp_buf->s_input_buf.s_input_buf.pv_y_buf;
+ ps_lap_inp_buf->s_lap_out.s_input_buf.pv_u_buf =
+ ps_lap_inp_buf->s_input_buf.s_input_buf.pv_u_buf;
+ ps_lap_inp_buf->s_lap_out.s_input_buf.pv_v_buf =
+ ps_lap_inp_buf->s_input_buf.s_input_buf.pv_v_buf;
+ ps_lap_inp_buf->s_lap_out.s_input_buf.i4_y_wd =
+ ps_lap_inp_buf->s_input_buf.s_input_buf.i4_y_wd;
+ ps_lap_inp_buf->s_lap_out.s_input_buf.i4_y_ht =
+ ps_lap_inp_buf->s_input_buf.s_input_buf.i4_y_ht;
+ ps_lap_inp_buf->s_lap_out.s_input_buf.i4_y_strd =
+ ps_lap_inp_buf->s_input_buf.s_input_buf.i4_y_strd;
+ ps_lap_inp_buf->s_lap_out.s_input_buf.i4_uv_wd =
+ ps_lap_inp_buf->s_input_buf.s_input_buf.i4_uv_wd;
+ ps_lap_inp_buf->s_lap_out.s_input_buf.i4_uv_ht =
+ ps_lap_inp_buf->s_input_buf.s_input_buf.i4_uv_ht;
+ ps_lap_inp_buf->s_lap_out.s_input_buf.i4_uv_strd =
+ ps_lap_inp_buf->s_input_buf.s_input_buf.i4_uv_strd;
+
+ ps_lap_struct->i4_display_num++;
+ i4_num_frames_after_force_idr++;
+
+ ps_lap_struct->aps_lap_inp_buf[ps_lap_struct->i4_buf_enq_idx] = ps_lap_inp_buf;
+ /* update first field flag */
+ ps_lap_inp_buf->s_lap_out.i4_first_field = 1;
+ if(i4_field_flag)
+ {
+ ps_lap_inp_buf->s_lap_out.i4_first_field =
+ (ps_lap_inp_buf->s_input_buf.i4_topfield_first ^
+ ps_lap_inp_buf->s_input_buf.i4_bottom_field);
+ }
+
+ /* force idr in case interlace input can be taken only for first field */
+ if(!ps_lap_inp_buf->s_lap_out.i4_first_field)
+ {
+ ps_lap_inp_buf->s_lap_out.i4_force_idr_flag = 0;
+ }
+
+ /*to be filed*/
+ if(0 ==
+ ps_lap_struct->i4_num_frm_type_decided /*&& ps_lap_struct->i4_init_delay_over*/)
+ {
+ ps_lap_struct->ai1_pic_type[0] =
+ ps_lap_struct->ai1_pic_type[ps_lap_struct->i4_next_start_ctr];
+
+ ihevce_determine_next_sub_gop_state(ps_lap_struct);
+
+ ps_lap_struct->i4_next_start_ctr = 0;
+ }
+
+ if(/*ps_lap_struct->i4_init_delay_over &&*/ 0 !=
+ ps_lap_struct->i4_num_frm_type_decided)
+ {
+ ihevce_assign_pic_type(
+ ps_lap_struct,
+ ps_lap_struct->aps_lap_inp_buf[ps_lap_struct->i4_buf_deq_idx]);
+
+ ps_lap_struct->i4_num_frm_type_decided--;
+
+ if(NULL != ps_lap_struct->aps_lap_inp_buf[ps_lap_struct->i4_buf_deq_idx])
+ {
+ /*special case of two consequetive idr at the start of encode or due to force idr*/
+ ps_lap_struct->i4_immediate_idr_case =
+ ps_lap_struct->i4_is_all_i_pic_in_seq;
+ if(ps_lap_struct->aps_lap_inp_buf[ps_lap_struct->i4_buf_deq_idx]
+ ->s_lap_out.i4_pic_type == IV_IDR_FRAME)
+ {
+ ps_lap_struct->i4_immediate_idr_case = 1;
+ }
+ else
+ {
+ WORD32 i4_prev_idx = ps_lap_struct->i4_buf_deq_idx > 0
+ ? ps_lap_struct->i4_buf_deq_idx - 1
+ : ps_lap_struct->i4_buf_deq_idx;
+ /*field case of single IDR field followed by P*/
+ if(NULL != ps_lap_struct->aps_lap_inp_buf[i4_prev_idx] &&
+ i4_field_flag &&
+ ps_lap_struct->aps_lap_inp_buf[i4_prev_idx]->s_lap_out.i4_pic_type ==
+ IV_IDR_FRAME &&
+ !ps_lap_struct->i4_num_frm_type_decided)
+ {
+ ps_lap_struct->i4_immediate_idr_case = 1;
+ }
+ }
+ }
+
+ /* Queue in the current input Buffer to LAP que */
+ ihevce_lap_queue_input(
+ ps_lap_struct,
+ ps_lap_struct->aps_lap_inp_buf[ps_lap_struct->i4_buf_deq_idx],
+ &i4_tree_num);
+
+ ps_lap_struct->i4_next_start_ctr++;
+ ps_lap_struct->i4_buf_deq_idx++;
+ if(ps_lap_struct->i4_buf_deq_idx >= MAX_QUEUE_LENGTH)
+ ps_lap_struct->i4_buf_deq_idx = 0;
+ }
+
+ ps_lap_struct->i4_buf_enq_idx++;
+ if(ps_lap_struct->i4_buf_enq_idx >= MAX_QUEUE_LENGTH)
+ ps_lap_struct->i4_buf_enq_idx = 0;
+ } /* end if for valid input buffer check*/
+ }
+
+ /* source pixel padding if width/height is not aligned to 8 pixel */
+ if(ps_lap_inp_buf->s_input_buf.i4_inp_frm_data_valid_flag)
+ {
+ ihevce_src_params_t *ps_src_prms = &ps_lap_struct->s_static_cfg_params.s_src_prms;
+ WORD32 i4_align_wd = ps_src_prms->i4_width;
+ WORD32 i4_align_ht = ps_src_prms->i4_height;
+ WORD32 min_cu_size =
+ (1 << ps_lap_struct->s_static_cfg_params.s_config_prms.i4_min_log2_cu_size);
+
+ i4_align_wd += SET_CTB_ALIGN(ps_src_prms->i4_width, min_cu_size);
+ i4_align_ht += SET_CTB_ALIGN(ps_src_prms->i4_height, min_cu_size);
+
+ ihevce_lap_pad_input_bufs(ps_lap_inp_buf, i4_align_wd, i4_align_ht);
+ }
+ {
+ ps_lap_inp_buf->s_lap_out.s_logo_ctxt.logo_width = 0;
+ ps_lap_inp_buf->s_lap_out.s_logo_ctxt.logo_height = 0;
+ ps_lap_inp_buf->s_lap_out.s_logo_ctxt.logo_x_offset = 0;
+ ps_lap_inp_buf->s_lap_out.s_logo_ctxt.logo_y_offset = 0;
+ }
+ }
+
+ if(ps_lap_struct->end_flag == 1)
+ {
+ ps_lap_struct->aps_lap_inp_buf[ps_lap_struct->i4_buf_enq_idx] = ps_lap_inp_buf;
+
+ /*to be filed*/
+ if(0 == ps_lap_struct->i4_num_frm_type_decided)
+ {
+ ps_lap_struct->ai1_pic_type[0] =
+ ps_lap_struct->ai1_pic_type[ps_lap_struct->i4_next_start_ctr];
+
+ ihevce_determine_next_sub_gop_state(ps_lap_struct);
+
+ ps_lap_struct->i4_next_start_ctr = 0;
+ }
+
+ if(NULL != ps_lap_struct->aps_lap_inp_buf[ps_lap_struct->i4_buf_deq_idx])
+ {
+ ihevce_assign_pic_type(
+ ps_lap_struct, ps_lap_struct->aps_lap_inp_buf[ps_lap_struct->i4_buf_deq_idx]);
+ }
+
+ ps_lap_struct->i4_num_frm_type_decided--;
+
+ if(NULL != ps_lap_struct->aps_lap_inp_buf[ps_lap_struct->i4_buf_deq_idx])
+ {
+ /*special case of two consequetive idr at the start of encode or due to force idr*/
+ ps_lap_struct->i4_immediate_idr_case = ps_lap_struct->i4_is_all_i_pic_in_seq;
+
+ if(ps_lap_struct->aps_lap_inp_buf[ps_lap_struct->i4_buf_deq_idx]
+ ->s_lap_out.i4_pic_type == IV_IDR_FRAME)
+ {
+ ps_lap_struct->i4_immediate_idr_case = 1;
+ }
+ else
+ {
+ WORD32 i4_prev_idx = ps_lap_struct->i4_buf_deq_idx > 0
+ ? ps_lap_struct->i4_buf_deq_idx - 1
+ : ps_lap_struct->i4_buf_deq_idx;
+ /*field case of single IDR field followed by P*/
+ if(NULL != ps_lap_struct->aps_lap_inp_buf[i4_prev_idx] && i4_field_flag &&
+ ps_lap_struct->aps_lap_inp_buf[i4_prev_idx]->s_lap_out.i4_pic_type ==
+ IV_IDR_FRAME &&
+ !ps_lap_struct->i4_num_frm_type_decided)
+ {
+ ps_lap_struct->i4_immediate_idr_case = 1;
+ }
+ }
+ }
+ /* Queue in the current input Buffer to LAP que */
+ ihevce_lap_queue_input(
+ ps_lap_struct,
+ ps_lap_struct->aps_lap_inp_buf[ps_lap_struct->i4_buf_deq_idx],
+ &i4_tree_num);
+
+ ps_lap_struct->i4_next_start_ctr++;
+ ps_lap_struct->i4_buf_deq_idx++;
+
+ if(ps_lap_struct->i4_buf_deq_idx >= MAX_QUEUE_LENGTH)
+ ps_lap_struct->i4_buf_deq_idx = 0;
+
+ ps_lap_struct->i4_buf_enq_idx++;
+ if(ps_lap_struct->i4_buf_enq_idx >= MAX_QUEUE_LENGTH)
+ ps_lap_struct->i4_buf_enq_idx = 0;
+ }
+ ps_lap_struct->i4_num_frames_after_force_idr = i4_num_frames_after_force_idr;
+
+ if(1 == ps_lap_struct->i4_force_end_flag)
+ {
+ ihevce_force_end(ps_hle_ctxt);
+ }
+
+ /*return encode order pic to pre enc*/
+ ps_lap_inp_buf = NULL;
+
+ if(NULL != ps_lap_struct->api4_encode_order_array[ps_lap_struct->i4_lap_out_idx])
+ {
+ ps_lap_inp_buf = ps_lap_struct->api4_encode_order_array[ps_lap_struct->i4_lap_out_idx];
+ ps_lap_struct->api4_encode_order_array[ps_lap_struct->i4_lap_out_idx] = NULL;
+ if(!ps_lap_inp_buf->s_lap_out.i4_end_flag)
+ ihevce_pre_rel_lapout_update(ps_lap_struct, ps_lap_inp_buf);
+ }
+
+ ps_lap_struct->i4_lap_out_idx++;
+ if(ps_lap_struct->i4_lap_out_idx == ps_lap_struct->i4_max_buf_in_enc_order)
+ {
+ ps_lap_struct->i4_lap_out_idx = 0;
+ ps_lap_struct->pi4_encode_poc_ptr = &ps_lap_struct->ai4_encode_order_poc[0];
+ }
+
+ return (ps_lap_inp_buf);
+}
+
+/*!
+************************************************************************
+* \brief
+* lap get input buffer requirement count
+************************************************************************
+*/
+WORD32 ihevce_lap_get_num_ip_bufs(ihevce_lap_static_params_t *ps_lap_stat_prms)
+{
+ WORD32 i4_lap_window_size = 1;
+ WORD32 gop_delay = 1 << ps_lap_stat_prms->i4_max_temporal_layers;
+
+ if(ps_lap_stat_prms->s_lap_params.i4_rc_look_ahead_pics != 0)
+ {
+ i4_lap_window_size = 1 + ps_lap_stat_prms->s_lap_params.i4_rc_look_ahead_pics;
+ }
+
+ gop_delay += (i4_lap_window_size);
+ return gop_delay;
+}
diff --git a/encoder/ihevce_lap_interface.h b/encoder/ihevce_lap_interface.h
new file mode 100644
index 0000000..748e1bf
--- /dev/null
+++ b/encoder/ihevce_lap_interface.h
@@ -0,0 +1,101 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ihevce_lap_interface.h
+*
+* @brief
+* This file contains structure definitions related to look-ahead processing
+*
+* @author
+* ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_LAP_INTERFACE_H_
+#define _IHEVCE_LAP_INTERFACE_H_
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief lap interface ctxt
+******************************************************************************
+ */
+typedef struct
+{
+ ihevce_sys_api_t *ps_sys_api;
+ void *pv_hle_ctxt;
+ void *pv_lap_module_ctxt;
+
+ /**
+ * Control Input buffer Queue id
+ */
+
+ WORD32 i4_ctrl_in_que_id;
+ /**
+ *
+ *EnC and application owned command buffer size
+ */
+ WORD32 i4_ctrl_cmd_buf_size;
+
+ /**
+ * Control Input buffer blocking mode
+ */
+ WORD32 i4_ctrl_in_que_blocking_mode;
+
+ /**
+ * Control output buffer Queue id
+ */
+ WORD32 i4_ctrl_out_que_id;
+
+ /**
+ * Dynamic bitrate change Callback function
+ */
+ void (*ihevce_dyn_bitrate_cb)(void *pv_hle_ctxt, void *pv_dyn_bitrate_prms);
+
+} lap_intface_t;
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+
+WORD32 ihevce_lap_get_num_mem_recs(void);
+
+WORD32 ihevce_lap_get_mem_recs(iv_mem_rec_t *ps_mem_tab, WORD32 i4_mem_space);
+
+WORD32 ihevce_lap_get_num_ip_bufs(ihevce_lap_static_params_t *ps_lap_stat_prms);
+
+void *ihevce_lap_init(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_lap_static_params_t *ps_lap_params,
+ ihevce_static_cfg_params_t *ps_static_cfg_prms);
+
+ihevce_lap_enc_buf_t *
+ ihevce_lap_process(void *pv_interface_ctxt, ihevce_lap_enc_buf_t *ps_curr_inp);
+
+WORD32 ihevce_check_last_inp_buf(WORD32 *pi4_cmd_buf);
+
+#endif /* _IHEVCE_LAP_INTERFACE_H_ */
diff --git a/encoder/ihevce_lap_structs.h b/encoder/ihevce_lap_structs.h
new file mode 100644
index 0000000..d231b37
--- /dev/null
+++ b/encoder/ihevce_lap_structs.h
@@ -0,0 +1,181 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file
+* ihevce_lap_structs.h
+*
+* @brief
+* This file contains structure definitions related to look-ahead processing
+*
+* @author
+* ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_LAP_STRUCTS_H_
+#define _IHEVCE_LAP_STRUCTS_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define EVAL_VERSION 0
+#define EVAL_MODE_FORCE_LOGO 0
+#define MAX_FRAMES_EVAL_VERSION 50000
+#define LAP_DEBUG_PRINT 0
+#define FORCE_IDR_TEST 1
+#define MAX_NUM_ENC_NODES 8
+#define MAX_QUEUE_LENGTH (MAX_LAP_WINDOW_SIZE + MAX_SUB_GOP_SIZE + 2)
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+typedef enum
+{
+ LAP_CTXT = 0,
+ LAP_NODE_MEM,
+ NUM_LAP_MEM_RECS,
+} LAP_MEM_T;
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/* Picture types */
+typedef enum PIC_TYPE_E
+{
+ PIC_TYPE_NA = -1, /* Invalid frame type*/
+ PIC_TYPE_I, /* I frame */
+ PIC_TYPE_P, /* P frame */
+ PIC_TYPE_B, /* B frame */
+ PIC_TYPE_IDR, /* IDR frame */
+ PIC_TYPE_CRA, /* CRA frame */
+ MAX_NUM_PIC_TYPES
+} PIC_TYPE_E;
+
+typedef struct ihevce_encode_node_t
+{
+ WORD32 data;
+ void *pv_left_node;
+ void *pv_right_node;
+ WORD32 i4_hierachical_layer;
+ WORD32 i4_interlace_field;
+ ihevce_lap_enc_buf_t *ps_lap_top_buff;
+ ihevce_lap_enc_buf_t *ps_lap_bottom_buff;
+
+} ihevce_encode_node_t;
+
+/**
+******************************************************************************
+ * @brief lap context
+******************************************************************************
+ */
+typedef struct
+{
+ // cfg params
+ ihevce_static_cfg_params_t s_static_cfg_params;
+ ihevce_lap_static_params_t s_lap_static_params;
+
+ //pic reorder info
+ ihevce_lap_enc_buf_t *aps_lap_inp_buf[MAX_QUEUE_LENGTH];
+
+ ihevce_encode_node_t *aps_encode_node[1];
+
+ /** Array of nodes in encode order*/
+ ihevce_lap_enc_buf_t *api4_encode_order_array[MAX_NUM_ENC_NODES];
+
+ /** Array of nodes in capture order*/
+ ihevce_lap_enc_buf_t *api4_capture_order_array[MAX_NUM_ENC_NODES];
+
+ /**Array of POCS in encode order*/
+ WORD32 ai4_encode_order_poc[MAX_NUM_ENC_NODES];
+
+ /**Array of POCS in capture order*/
+ WORD32 ai4_capture_order_poc[MAX_NUM_ENC_NODES];
+
+ /** Pointer to POC in encode order*/
+ WORD32 *pi4_encode_poc_ptr;
+
+ /** Pointer to POC in capture order*/
+ WORD32 *pi4_capture_poc_ptr;
+
+ WORD32 ai4_pic_type_to_be_removed[NUM_LAP2_LOOK_AHEAD];
+
+ void *pv_prev_inp_buf;
+
+ WORD32 i4_buf_enq_idx;
+ WORD32 i4_buf_deq_idx;
+ WORD32 i4_lap_out_idx;
+ WORD32 i4_capture_idx;
+ WORD32 i4_idr_flag;
+ WORD32 i4_num_bufs_encode_order;
+ WORD32 i4_deq_idx;
+ WORD32 i4_enq_idx;
+ // poc info
+ WORD32 ref_poc_array[MAX_REF_PICS];
+ WORD8 ai1_pic_type[10];
+ WORD32 i4_curr_poc;
+ WORD32 i4_cra_poc;
+ WORD32 i4_assoc_IRAP_poc;
+ // counters
+ WORD32 i4_max_idr_period;
+ WORD32 i4_min_idr_period;
+ WORD32 i4_max_cra_period;
+ WORD32 i4_max_i_period;
+ WORD32 i4_idr_counter;
+ WORD32 i4_cra_counter;
+ WORD32 i4_i_counter;
+ WORD32 i4_idr_gop_num;
+ WORD32 i4_curr_ref_pics;
+ WORD32 i4_display_num;
+ WORD32 i4_num_frames_after_force_idr;
+ WORD32 i4_num_frm_type_decided;
+ WORD32 i4_frm_gop_idx;
+ WORD32 i4_is_all_i_pic_in_seq;
+ WORD32 i4_next_start_ctr;
+ WORD32 i4_fixed_open_gop_period;
+ WORD32 i4_fixed_i_period;
+ // misc
+ WORD32 i4_enable_logo;
+ WORD32 i4_cra_i_pic_flag;
+ WORD32 i4_force_end_flag;
+ WORD32 i4_sub_gop_size;
+ WORD32 i4_sub_gop_size_idr;
+ WORD32 i4_dyn_sub_gop_size;
+ WORD32 end_flag;
+ WORD32 i4_immediate_idr_case;
+ WORD32 i4_max_buf_in_enc_order;
+ WORD32 i4_end_flag_pic_idx;
+ WORD32 i4_lap2_counter;
+ WORD32 i4_rc_lap_period;
+ WORD32 i4_gop_period;
+ WORD32 i4_no_back_to_back_i_avoidance;
+} lap_struct_t;
+
+void ihevce_populate_tree_nodes(
+ ihevce_encode_node_t *encode_parent_node_t,
+ ihevce_encode_node_t *encode_node_t,
+ WORD32 *loop_count,
+ WORD32 layer,
+ WORD32 hier_layer);
+
+#endif /* _IHEVCE_LAP_STRUCTS_H_ */
diff --git a/encoder/ihevce_me_common_defs.h b/encoder/ihevce_me_common_defs.h
new file mode 100644
index 0000000..e8c4f88
--- /dev/null
+++ b/encoder/ihevce_me_common_defs.h
@@ -0,0 +1,775 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file ihevce_me_common_defs.h
+*
+* @brief
+* This file contains structures and interface prototypes for header encoding
+*
+* @author
+* Ittiam
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_ME_COMMON_DEFS_H_
+#define _IHEVCE_ME_COMMON_DEFS_H_
+
+/****************************************************************************/
+/* Constant Macros */
+/****************************************************************************/
+/**
+*******************************************************************************
+@brief We basically store an impossible and unique MV to identify intra blks
+or CUs
+*******************************************************************************
+ */
+#define INTRA_MV 0x4000
+/**
+*******************************************************************************
+@brief MAX INT VAL is defined as follows so that adding the four candidates,
+ still will be a positive value
+*******************************************************************************
+ */
+#define MAX_INT_VAL (0x7FFFFFF)
+
+/**
+*******************************************************************************
+@brief Max number of results stored in search result str (per partition) during
+refinement search. Needed for memory allocation purposes
+*******************************************************************************
+ */
+#define MAX_REFINE_RESULTS 4
+
+/**
+*******************************************************************************
+@brief Maximum number of partitions in a CU (NxN case)
+*******************************************************************************
+ */
+#define MAX_NUM_PARTS 4
+
+/** As min CU size is 8, there can only be two partitions in a CU */
+#define MAX_NUM_INTER_PARTS 2
+
+/* 4 for the num of REF and 2 for num_results_per_part */
+#define MAX_NUM_RESULTS_PER_PART_LIST 8
+
+#define MAX_NUM_RESULTS_PER_PART 2
+
+#define MAX_NUM_REF 12
+
+#define NUM_BEST_ME_OUTPUTS 4
+
+#define MAX_NUM_CLUSTERS_IN_ONE_REF_IDX 5
+
+/* Assumption is (MAX_NUM_CANDS_BESTUNI >= MAX_NUM_CANDS_BESTALT) */
+#define MAX_NUM_CANDS_BESTUNI 10
+
+#define MAX_NUM_CANDS_BESTALT 10
+
+#define MAX_NUM_MERGE_CANDTS 4 * (3 * MAX_NUM_CLUSTERS_IN_ONE_REF_IDX + 2 * MAX_NUM_CANDS_BESTUNI)
+
+#define MAX_NUM_CLUSTERS_16x16 8
+
+#define MAX_NUM_CLUSTERS_32x32 10
+
+#define MAX_NUM_CLUSTERS_64x64 10
+
+#define MAX_DISTANCE_FROM_CENTROID_16x16 4
+
+#define MAX_DISTANCE_FROM_CENTROID_32x32 8
+
+#define MAX_DISTANCE_FROM_CENTROID_64x64 16
+
+#define MAX_DISTANCE_FROM_CENTROID_16x16_B 4
+
+#define MAX_DISTANCE_FROM_CENTROID_32x32_B 8
+
+#define MAX_DISTANCE_FROM_CENTROID_64x64_B 16
+
+#define MAX_NUM_CLUSTERS_IN_VALID_16x16_BLK 3
+
+#define MAX_NUM_CLUSTERS_IN_VALID_32x32_BLK 5
+
+#define MAX_NUM_CLUSTERS_IN_VALID_64x64_BLK 5
+
+#define ALL_INTER_COST_DIFF_THR 10
+
+#define MAX_INTRA_PERCENTAGE 25
+
+#define CLUSTER_DATA_DUMP 0
+
+#define DISABLE_INTER_CANDIDATES 0
+
+#define ENABLE_4CTB_EVALUATION 1
+
+#define USE_2N_NBR 1
+
+#define USE_CLUSTER_DATA_AS_BLK_MERGE_CANDTS 0
+
+#define MAX_REFS_SEARCHABLE MAX_NUM_REF
+
+#define DEBUG_TRACE_ENABLE 0
+
+#define DISABLE_INTRA_IN_BPICS 1
+
+#define DISABLE_L0_IPE_INTRA_IN_BPICS 1
+
+#define DISABLE_L2_IPE_INTRA_IN_BPICS 0
+
+#define DISABLE_L2_IPE_INTRA_IN_IPBPICS 0
+
+#define DISABLE_L1_L2_IPE_INTRA_IN_BPICS 1
+
+#define RC_DEPENDENCY_FOR_BPIC 1
+
+#define DISABLE_L1_L2_IPE_INTRA_IN_IPBPICS 0
+
+#define DISABLE_L2_IPE_IN_IPB_L1_IN_B 0
+
+#define DISABLE_L2_IPE_IN_PB_L1_IN_B 1
+
+#define DISBLE_CHILD_CU_EVAL_L0_IPE 0
+
+#define FORCE_NXN_MODE_BASED_ON_OL_IPE 0
+
+#define TEMPORAL_LAYER_DISABLE 0
+
+#define COARSE_ME_OPT 1
+
+#define NUM_RESULTS_TO_EXPORT_MS 3
+
+#define NUM_RESULTS_TO_EXPORT_HS NUM_BEST_ME_OUTPUTS
+
+#define NUM_RESULTS_TO_EXPORT_XS 2
+
+#define DISABLE_MERGE 0
+
+#define INTERP_OUT_BUF_SIZE (64 * 64)
+
+/* NUM_BEST_ME_OUTPUTS - Maximum possible TU Recursion candidates */
+/* 2 - Required for Hadamard Transform coefficients */
+/* 2 - Required in 'hme_compute_pred_and_evaluate_bi' */
+/* 5 of these are also used in 'hme_subpel_refine_cu_hs' */
+#define MAX_NUM_PRED_BUFS_USED_FOR_PARTTYPE_DECISIONS (NUM_BEST_ME_OUTPUTS) + 2 + 2
+
+#define MAX_WKG_MEM_SIZE_PER_THREAD \
+ (MAX_NUM_PRED_BUFS_USED_FOR_PARTTYPE_DECISIONS) * (INTERP_OUT_BUF_SIZE)
+
+/**
+******************************************************************************
+ * @macro OLD_XTREME_SPEED
+ * @brief Reverts the changes back to older Xtreme speed model
+******************************************************************************
+*/
+#define OLD_XTREME_SPEED 0
+#define OLD_HIGH_SPEED 0
+
+/**
+******************************************************************************
+ * @macro BIT_EN
+ * @brief Enables the bit at a given bit position
+******************************************************************************
+*/
+#define BIT_EN(x) (1 << (x))
+
+/**
+******************************************************************************
+ * @macros ENABLE_mxn
+ * @brief Enables a type or a group of partitions. ENABLE_ALL_PARTS, enables all
+ * partitions, while others enable selected partitions. These can be used
+ * to set the mask of active partitions
+******************************************************************************
+*/
+#define ENABLE_2Nx2N (BIT_EN(PART_ID_2Nx2N))
+#define ENABLE_2NxN (BIT_EN(PART_ID_2NxN_T) | BIT_EN(PART_ID_2NxN_B))
+#define ENABLE_Nx2N (BIT_EN(PART_ID_Nx2N_L) | BIT_EN(PART_ID_Nx2N_R))
+#define ENABLE_NxN \
+ (BIT_EN(PART_ID_NxN_TL) | BIT_EN(PART_ID_NxN_TR) | BIT_EN(PART_ID_NxN_BL) | \
+ BIT_EN(PART_ID_NxN_BR))
+#define ENABLE_2NxnU (BIT_EN(PART_ID_2NxnU_T) | BIT_EN(PART_ID_2NxnU_B))
+#define ENABLE_2NxnD (BIT_EN(PART_ID_2NxnD_T) | BIT_EN(PART_ID_2NxnD_B))
+#define ENABLE_nLx2N (BIT_EN(PART_ID_nLx2N_L) | BIT_EN(PART_ID_nLx2N_R))
+#define ENABLE_nRx2N (BIT_EN(PART_ID_nRx2N_L) | BIT_EN(PART_ID_nRx2N_R))
+#define ENABLE_AMP ((ENABLE_2NxnU) | (ENABLE_2NxnD) | (ENABLE_nLx2N) | (ENABLE_nRx2N))
+#define ENABLE_ALL_PARTS \
+ ((ENABLE_2Nx2N) | (ENABLE_NxN) | (ENABLE_2NxN) | (ENABLE_Nx2N) | (ENABLE_AMP))
+
+#define DISABLE_THE_CHILDREN_NODES(ps_parent_node) \
+ { \
+ (ps_parent_node)->ps_child_node_tl->is_node_valid = 0; \
+ (ps_parent_node)->ps_child_node_tr->is_node_valid = 0; \
+ (ps_parent_node)->ps_child_node_bl->is_node_valid = 0; \
+ (ps_parent_node)->ps_child_node_br->is_node_valid = 0; \
+ }
+
+#define NULLIFY_THE_CHILDREN_NODES(ps_parent_node) \
+ { \
+ (ps_parent_node)->ps_child_node_tl = NULL; \
+ (ps_parent_node)->ps_child_node_tr = NULL; \
+ (ps_parent_node)->ps_child_node_bl = NULL; \
+ (ps_parent_node)->ps_child_node_br = NULL; \
+ }
+
+#define DISABLE_ALL_KIN_OF_64x64_NODE(ps_tree_root) \
+ { \
+ DISABLE_THE_CHILDREN_NODES((ps_tree_root)); \
+ DISABLE_THE_CHILDREN_NODES((ps_tree_root)->ps_child_node_tl); \
+ DISABLE_THE_CHILDREN_NODES((ps_tree_root)->ps_child_node_tr); \
+ DISABLE_THE_CHILDREN_NODES((ps_tree_root)->ps_child_node_bl); \
+ DISABLE_THE_CHILDREN_NODES((ps_tree_root)->ps_child_node_br); \
+ DISABLE_THE_CHILDREN_NODES((ps_tree_root)->ps_child_node_tl->ps_child_node_tl); \
+ DISABLE_THE_CHILDREN_NODES((ps_tree_root)->ps_child_node_tl->ps_child_node_tr); \
+ DISABLE_THE_CHILDREN_NODES((ps_tree_root)->ps_child_node_tl->ps_child_node_bl); \
+ DISABLE_THE_CHILDREN_NODES((ps_tree_root)->ps_child_node_tl->ps_child_node_br); \
+ DISABLE_THE_CHILDREN_NODES((ps_tree_root)->ps_child_node_tr->ps_child_node_tl); \
+ DISABLE_THE_CHILDREN_NODES((ps_tree_root)->ps_child_node_tr->ps_child_node_tr); \
+ DISABLE_THE_CHILDREN_NODES((ps_tree_root)->ps_child_node_tr->ps_child_node_bl); \
+ DISABLE_THE_CHILDREN_NODES((ps_tree_root)->ps_child_node_tr->ps_child_node_br); \
+ DISABLE_THE_CHILDREN_NODES((ps_tree_root)->ps_child_node_bl->ps_child_node_tl); \
+ DISABLE_THE_CHILDREN_NODES((ps_tree_root)->ps_child_node_bl->ps_child_node_tr); \
+ DISABLE_THE_CHILDREN_NODES((ps_tree_root)->ps_child_node_bl->ps_child_node_bl); \
+ DISABLE_THE_CHILDREN_NODES((ps_tree_root)->ps_child_node_bl->ps_child_node_br); \
+ DISABLE_THE_CHILDREN_NODES((ps_tree_root)->ps_child_node_br->ps_child_node_tl); \
+ DISABLE_THE_CHILDREN_NODES((ps_tree_root)->ps_child_node_br->ps_child_node_tr); \
+ DISABLE_THE_CHILDREN_NODES((ps_tree_root)->ps_child_node_br->ps_child_node_bl); \
+ DISABLE_THE_CHILDREN_NODES((ps_tree_root)->ps_child_node_br->ps_child_node_br); \
+ }
+
+#define DISABLE_ALL_KIN_OF_32x32_NODE(ps_tree_root) \
+ { \
+ DISABLE_THE_CHILDREN_NODES((ps_tree_root)); \
+ DISABLE_THE_CHILDREN_NODES((ps_tree_root)->ps_child_node_tl); \
+ DISABLE_THE_CHILDREN_NODES((ps_tree_root)->ps_child_node_tr); \
+ DISABLE_THE_CHILDREN_NODES((ps_tree_root)->ps_child_node_bl); \
+ DISABLE_THE_CHILDREN_NODES((ps_tree_root)->ps_child_node_br); \
+ }
+
+#define ENABLE_THE_CHILDREN_NODES(ps_parent_node) \
+ { \
+ (ps_parent_node)->ps_child_node_tl->is_node_valid = 1; \
+ (ps_parent_node)->ps_child_node_tr->is_node_valid = 1; \
+ (ps_parent_node)->ps_child_node_bl->is_node_valid = 1; \
+ (ps_parent_node)->ps_child_node_br->is_node_valid = 1; \
+ }
+
+#define CLIP_MV_WITHIN_RANGE( \
+ x, y, range, fpel_refine_extent, hpel_refine_extent, qpel_refine_extent) \
+ { \
+ WORD16 i4_range_erosion_metric; \
+ \
+ i4_range_erosion_metric = \
+ ((fpel_refine_extent) << 2) + ((hpel_refine_extent) << 1) + (qpel_refine_extent); \
+ i4_range_erosion_metric += 2; \
+ i4_range_erosion_metric >>= 2; \
+ \
+ if((x) > ((range)->i2_max_x - i4_range_erosion_metric)) \
+ (x) = ((range)->i2_max_x - i4_range_erosion_metric); \
+ if((x) < ((range)->i2_min_x + i4_range_erosion_metric)) \
+ (x) = ((range)->i2_min_x + i4_range_erosion_metric); \
+ if((y) > ((range)->i2_max_y - i4_range_erosion_metric)) \
+ (y) = ((range)->i2_max_y - i4_range_erosion_metric); \
+ if((y) < ((range)->i2_min_y + i4_range_erosion_metric)) \
+ (y) = ((range)->i2_min_y + i4_range_erosion_metric); \
+ }
+
+/****************************************************************************/
+/* Enumerations */
+/****************************************************************************/
+/**
+
+
+******************************************************************************
+ * @enum CU_SIZE_T
+ * @brief Enumerates all possible CU sizes (8x8 to 64x64)
+******************************************************************************
+*/
+typedef enum
+{
+ CU_INVALID = -1,
+ CU_8x8 = 0,
+ CU_16x16,
+ CU_32x32,
+ CU_64x64,
+ NUM_CU_SIZES
+} CU_SIZE_T;
+
+/**
+******************************************************************************
+ * @enum PART_TYPE_T
+ * @brief Defines all possible partition splits within a inter CU
+******************************************************************************
+*/
+typedef enum
+{
+ PRT_INVALID = -1,
+ PRT_2Nx2N = 0,
+ PRT_2NxN,
+ PRT_Nx2N,
+ PRT_NxN,
+ PRT_2NxnU,
+ PRT_2NxnD,
+ PRT_nLx2N,
+ PRT_nRx2N,
+ MAX_PART_TYPES
+} PART_TYPE_T;
+
+/**
+******************************************************************************
+ * @enum PART_ID_T
+ * @brief Defines all possible partition ids within a inter CU
+******************************************************************************
+*/
+typedef enum
+{
+ PART_ID_INVALID = -1,
+ PART_ID_2Nx2N = 0,
+ /* These 2 belong to 2NxN Part */
+ PART_ID_2NxN_T = 1,
+ PART_ID_2NxN_B = 2,
+ /* These 2 belong to Nx2N */
+ PART_ID_Nx2N_L = 3,
+ PART_ID_Nx2N_R = 4,
+
+ /* 4 partitions of NxN */
+ PART_ID_NxN_TL = 5,
+ PART_ID_NxN_TR = 6,
+ PART_ID_NxN_BL = 7,
+ PART_ID_NxN_BR = 8,
+
+ /*************************************************************************/
+ /* ________ */
+ /* |________|-->2NxnU_T */
+ /* | | */
+ /* | |-->2NxnU_B */
+ /* |________| */
+ /*************************************************************************/
+ PART_ID_2NxnU_T = 9,
+ PART_ID_2NxnU_B = 10,
+
+ /*************************************************************************/
+ /* ________ */
+ /* | | */
+ /* | |-->2NxnD_T */
+ /* |________| */
+ /* |________|-->2NxnD_B */
+ /*************************************************************************/
+ PART_ID_2NxnD_T = 11,
+ PART_ID_2NxnD_B = 12,
+
+ /*************************************************************************/
+ /* ________ */
+ /* | | | */
+ /* | | |-->nLx2N_R */
+ /* | | | */
+ /* |_|______| */
+ /* | */
+ /* v */
+ /* nLx2N_L */
+ /*************************************************************************/
+ PART_ID_nLx2N_L = 13,
+ PART_ID_nLx2N_R = 14,
+
+ /*************************************************************************/
+ /* ________ */
+ /* | | | */
+ /* | | |-->nRx2N_R */
+ /* | | | */
+ /* |______|_| */
+ /* | */
+ /* v */
+ /* nRx2N_L */
+ /*************************************************************************/
+ /* AMP 12x16 and 4x16 split */
+ PART_ID_nRx2N_L = 15,
+ PART_ID_nRx2N_R = 16,
+ TOT_NUM_PARTS = 17
+} PART_ID_T;
+
+/**
+******************************************************************************
+* @enum CU_POS_T
+* @brief Position of a block wrt its parent in the CU tree
+******************************************************************************
+*/
+typedef enum
+{
+ POS_NA = -1,
+ POS_TL = 0,
+ POS_TR = 1,
+ POS_BL = 2,
+ POS_BR = 3
+} CU_POS_T;
+
+typedef CU_POS_T TU_POS_T;
+
+/****************************************************************************/
+/* Structures */
+/****************************************************************************/
+
+/**
+******************************************************************************
+ * @struct range_prms_t
+ * @brief Indicates valid range of MV for a given blk/cu/ctb
+******************************************************************************
+ */
+typedef struct
+{
+ /** Min x value possible, precision inferred from context */
+ WORD16 i2_min_x;
+ /** Max x value possible, precision inferred from context */
+ WORD16 i2_max_x;
+ /** Min y value possible, precision inferred from context */
+ WORD16 i2_min_y;
+ /** Max y value possible, precision inferred from context */
+ WORD16 i2_max_y;
+} range_prms_t;
+
+/**
+******************************************************************************
+ * MACRO for enabling Dynamical Vertical Search Range Support
+ * Note : Should be always 1, else part is not supported
+******************************************************************************
+ */
+#define DVSR_CHANGES 1
+
+/**
+******************************************************************************
+ * @struct dyn_range_prms_t
+ * @brief Indicates Dynamic search range for a given blk/cu/ctb
+******************************************************************************
+ */
+typedef struct
+{
+ /** Min x value possible */
+ //WORD16 i2_dyn_min_x;
+ /** Max x value possible */
+ //WORD16 i2_dyn_max_x;
+ /** Min y value possible */
+ WORD16 i2_dyn_min_y;
+ /** Max y value possible */
+ WORD16 i2_dyn_max_y;
+
+ /** Pic order count */
+ WORD32 i4_poc;
+
+} dyn_range_prms_t;
+
+/**
+******************************************************************************
+ * @macro INIT_DYN_SEARCH_PRMS
+ * @brief Initializes this dyn_range_prms_t structure. Can be used to zero
+ * out the range
+******************************************************************************
+ */
+#define INIT_DYN_SEARCH_PRMS(x, ref_poc) \
+ { \
+ (x)->i2_dyn_min_y = 0; \
+ (x)->i2_dyn_max_y = 0; \
+ (x)->i4_poc = ref_poc; \
+ }
+
+typedef struct
+{
+ WORD16 mvx;
+
+ WORD16 mvy;
+
+ /* 0=>mv is not a part of bi-pred mv */
+ /* 1=>inverse of case 0 */
+ UWORD8 is_uni;
+
+ WORD16 pixel_count;
+
+ WORD32 sdi;
+
+} mv_data_t;
+
+/**
+******************************************************************************
+* @brief This struct is stores the search result for a prediction unit (PU)
+******************************************************************************
+*/
+
+typedef struct
+{
+ /**
+ * PU attributes likes mvs, refids, pred mode, wdt, heigt, ctbx/y offsets etc
+ */
+ pu_t pu;
+
+ /* mv cost for this pu */
+ WORD32 i4_mv_cost;
+
+ /* total cost for this pu */
+ WORD32 i4_tot_cost;
+
+ WORD32 i4_sdi;
+} pu_result_t;
+
+/**
+******************************************************************************
+* @brief This struct is stores the search result for partition type of CU
+******************************************************************************
+*/
+typedef struct
+{
+ /** part results for a part type */
+ pu_result_t as_pu_results[MAX_NUM_INTER_PARTS];
+
+ UWORD8 *pu1_pred;
+
+ WORD32 i4_pred_stride;
+
+ /* total cost for part type */
+ WORD32 i4_tot_cost;
+
+ /* TU split flag : tu_split_flag[0] represents the transform splits
+ * for CU size <= 32, for 64x64 each ai4_tu_split_flag corresponds
+ * to respective 32x32 */
+ /* For a 8x8 TU - 1 bit used to indicate split */
+ /* For a 16x16 TU - LSB used to indicate winner between 16 and 8 TU's. 4 other bits used to indicate split in each 8x8 quadrant */
+ /* For a 32x32 TU - See above */
+ WORD32 ai4_tu_split_flag[4];
+
+ /* TU early cbf : tu_early_cbf[0] represents the transform splits
+ * for CU size <= 32, for 64x64 each ai4_tu_early_cbf corresponds
+ * to respective 32x32 */
+ WORD32 ai4_tu_early_cbf[4];
+
+ /* Populate the tu_split flag cost for the candidates */
+ WORD32 i4_tu_split_cost;
+
+ /** partition type : shall be one of PART_TYPE_T */
+ UWORD8 u1_part_type;
+} part_type_results_t;
+
+/**
+******************************************************************************
+ * @struct part_results_t
+ * @brief Basic structure used for storage of search results, specification
+ * of init candidates for search etc. This structure is complete for
+ * specification of mv and cost for a given direction of search (L0/L1) but
+ * does not carry information of what type of partition it represents.
+******************************************************************************
+ */
+typedef struct
+{
+ /** Motion vector X component */
+ WORD16 i2_mv_x;
+
+ /** Motion vector Y component */
+ WORD16 i2_mv_y;
+
+ /** Ref id, as specified in terms of Lc, unified list */
+ WORD8 i1_ref_idx;
+
+ /** SAD / SATD stored here */
+ WORD32 i4_sad;
+} part_results_t;
+
+/**
+******************************************************************************
+* @brief This struct is used for storing output of me search or block merge
+ * and also all of the intermediate results required
+******************************************************************************
+*/
+typedef struct
+{
+ /**
+ * X and y offsets w.r.t. CTB start in encode layers. For non encode
+ * layers, these may typically be 0
+ */
+ UWORD8 u1_x_off;
+
+ UWORD8 u1_y_off;
+
+ /** cu size as per the CU_SIZE_T enumeration */
+ UWORD8 u1_cu_size;
+
+ WORD32 i4_inp_offset;
+
+ /** best results of a CU sorted in increasing cost */
+ part_type_results_t *ps_best_results;
+
+ /** active partition mask for this CU */
+ WORD32 i4_part_mask;
+
+ /** number of best results mainted for every PU */
+ UWORD8 u1_num_best_results;
+
+ /** Split flag to indicate whether current CU is split or not */
+ UWORD8 u1_split_flag;
+
+} inter_cu_results_t;
+
+/**
+******************************************************************************
+* @brief This struct is used for storing input of me search in the form of
+ * pu_results_t structure which is given to hme_decide_part_types as i/p
+******************************************************************************
+*/
+typedef struct
+{
+ /** ptrs to multiple pu results of a CU. Can be seperated out as seperate structure*/
+ pu_result_t *aps_pu_results[2][TOT_NUM_PARTS];
+
+ /** max number of best results mainted for a partition in L0*/
+ UWORD8 u1_num_results_per_part_l0[TOT_NUM_PARTS];
+
+ /** max number of best results mainted for a partition in L*/
+ UWORD8 u1_num_results_per_part_l1[TOT_NUM_PARTS];
+} inter_pu_results_t;
+
+/**
+******************************************************************************
+ * @struct me_results_16x16_t
+ * @brief Contains complete search result for a CU for a given type of
+ * partition split. Holds ptrs to results for each partition, with
+ * information of partition type.
+******************************************************************************
+ */
+typedef struct
+{
+ /**
+ * X and y offsets w.r.t. CTB start in encode layers. For non encode
+ * layers, these may typically be 0
+ */
+ UWORD8 u1_x_off;
+
+ UWORD8 u1_y_off;
+
+ /**
+ * Type of partition that the CU is split into, for which this
+ * result is relevant
+ */
+ PART_TYPE_T e_part_type;
+
+ /**
+ * Pointer to results of each individual partitions. Note that max
+ * number of partitions a CU can be split into is MAX_NUM_PARTS
+ * 3 => L0 best, L1 best and best across L0 and L1
+ */
+ part_results_t as_part_result[MAX_NUM_PARTS][3];
+
+ /* Contains the best uni dir for each partition type */
+ /* enabled for this 16x16 block */
+ WORD32 ai4_best_uni_dir[MAX_NUM_PARTS];
+
+ /* Contains the best pred dir for each partition type */
+ /* enabled for this 16x16 block */
+ WORD32 ai4_best_pred_dir[MAX_NUM_PARTS];
+} me_results_16x16_t;
+
+/**
+******************************************************************************
+ * @struct me_results_8x8_t
+ * @brief Contains complete search result for a CU for a given type of
+ * partition split. Holds ptrs to results for each partition, with
+ * information of partition type.
+ * @assumptions e_part_type is always PRT_2Nx2N
+******************************************************************************
+ */
+typedef struct
+{
+ /**
+ * X and y offsets w.r.t. CTB start in encode layers. For non encode
+ * layers, these may typically be 0
+ */
+ UWORD8 u1_x_off;
+
+ UWORD8 u1_y_off;
+
+ /**
+ * Type of partition that the CU is split into, for which this
+ * result is relevant
+ */
+ PART_TYPE_T e_part_type;
+
+ /**
+ * Pointer to results of each individual partitions. Note that max
+ * number of partitions a CU can be split into is MAX_NUM_PARTS
+ * 3 => L0 best, L1 best and best across L0 and L1
+ */
+ part_results_t as_part_result[2];
+
+ /* Contains the best uni dir for each partition type */
+ /* enabled for this 16x16 block */
+ WORD32 i4_best_uni_dir;
+
+ /* Contains the best pred dir for each partition type */
+ /* enabled for this 16x16 block */
+ WORD32 i4_best_pred_dir;
+} me_results_8x8_t;
+
+/**
+******************************************************************************
+ * @struct cluster_mv_list_t
+ * @brief Contains data computed by the clustering algorithm
+******************************************************************************
+ */
+typedef struct
+{
+ mv_t as_mv[MAX_NUM_MERGE_CANDTS];
+
+ WORD32 num_mvs;
+
+} cluster_mv_list_t;
+
+/**
+******************************************************************************
+ * @struct qpel_input_buf_cfg_t
+ * @brief For QPEL averaging, this descriptor (typically outcome of lookup)
+ * contains info related to the 2 fpel/hpel planes that are to be
+ * averaged along wiht the exact offsets
+******************************************************************************
+ */
+typedef struct
+{
+ /** id of buf1 for input of averaging: 0-3 */
+ WORD8 i1_buf_id1;
+
+ /**
+ * x and y offset in buf 1 w.r.t. colocated input point after correcting
+ * for fpel mvx and mvy
+ */
+ WORD8 i1_buf_xoff1;
+ WORD8 i1_buf_yoff1;
+
+ /** id of buf2 for input of averaging: 0-3 */
+ WORD8 i1_buf_id2;
+
+ /**
+ * x and y offset in buf 2 w.r.t. colocated input point after correcting
+ * for fpel mvx and mvy
+ */
+ WORD8 i1_buf_xoff2;
+ WORD8 i1_buf_yoff2;
+} qpel_input_buf_cfg_t;
+
+typedef struct
+{
+ UWORD8 *apu1_pred_bufs[MAX_NUM_PRED_BUFS_USED_FOR_PARTTYPE_DECISIONS];
+
+ UWORD32 u4_pred_buf_usage_indicator;
+} hme_pred_buf_mngr_t;
+
+#endif
diff --git a/encoder/ihevce_me_instr_set_router.c b/encoder/ihevce_me_instr_set_router.c
new file mode 100644
index 0000000..2a3a4df
--- /dev/null
+++ b/encoder/ihevce_me_instr_set_router.c
@@ -0,0 +1,437 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_me_utils_instr_set_router.c
+*
+* \brief
+* This file contains function pointer initialization of me utility
+* functions
+*
+* \date
+* 15/07/2013
+*
+* \author
+* Ittiam
+*
+* List of Functions
+* ihevce_me_utils_instr_set_router()
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_debug.h"
+#include "ihevc_deblk.h"
+#include "ihevc_defs.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_macros.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_sao.h"
+#include "ihevc_structs.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_platform_macros.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevce_api.h"
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+
+#include "hme_datatype.h"
+#include "hme_common_defs.h"
+#include "hme_common_utils.h"
+#include "hme_interface.h"
+#include "hme_defs.h"
+#include "hme_err_compute.h"
+#include "hme_globals.h"
+
+#include "ihevce_me_instr_set_router.h"
+
+/*****************************************************************************/
+/* Globals */
+/*****************************************************************************/
+static FT_SAD_EVALUATOR *gapf_sad_pt_npu[NUM_BLK_SIZES];
+static FT_PART_SADS_EVALUATOR_16X16CU *gpf_part_sads_evaluator_16x16CU;
+static FT_PART_SADS_EVALUATOR *gpf_part_sads_evaluator_MxM;
+static FT_SAD_EVALUATOR *gpf_sad_grid_mxn;
+/* 9 => Number of function types */
+/* 2 => Number of results to store */
+static FT_CALC_SAD_AND_RESULT *gapf_calc_sad_and_result_fxn[9][2];
+
+static U08 gau1_calc_sad_and_result[2][2][4][TOT_NUM_PARTS] = {
+ //grid flag = 0
+ { //noise = 0
+ { //NxN or NxN & SMP
+ { 1, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4 },
+ //SMP only
+ { 1, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4 },
+ //AMP
+ { 1, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4 },
+ //2Nx2N only, i.e. num_parts = 1
+ { 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 } },
+ //noise = 1
+ { { 5, 7, 7, 7, 6, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 5, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 5, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 } } },
+
+ //grid flag = 1
+ { //noise = 0
+ { { 0, 2, 2, 2, 2, 2, 2, 2, 2, 4, 4, 4, 4, 4, 4, 4, 4 },
+ { 0, 2, 2, 2, 2, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4 },
+ { 0, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4 },
+ { 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 } },
+ //noise = 1
+ { { 0, 7, 7, 7, 6, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 0, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 0, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8 },
+ { 0, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 } } }
+};
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+/*!
+******************************************************************************
+* \if Function name : ihevce_me_instr_set_router \endif
+*
+* \brief
+* Function pointer initialization of me utils struct
+*
+*****************************************************************************
+*/
+void ihevce_me_instr_set_router(ihevce_me_optimised_function_list_t *ps_func_list, IV_ARCH_T e_arch)
+{
+ // clang-format off
+#ifdef DISABLE_AVX2_INTR
+ e_arch = (e_arch == ARCH_X86_AVX2) ? ARCH_X86_AVX : e_arch;
+#endif
+
+ switch(e_arch)
+ {
+#ifdef ENABLE_NEON
+ case ARCH_ARM_A9Q:
+ case ARCH_ARM_V8_NEON:
+ ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8 = hme_calc_pt_sad_and_result_explicit;
+ ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8_4x4 = hme_calc_pt_sad_and_result_explicit;
+ ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8_for_grid = hme_calc_pt_sad_and_result_explicit;
+ ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_generic = hme_calc_pt_sad_and_result_explicit;
+ ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8 = hme_calc_pt_sad_and_result_explicit;
+ ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8_4x4 = hme_calc_pt_sad_and_result_explicit;
+ ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8_for_grid = hme_calc_pt_sad_and_result_explicit;
+ ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_generic = hme_calc_pt_sad_and_result_explicit;
+ ps_func_list->pf_calc_sad_and_1_best_result_generic = hme_calc_sad_and_1_best_result;
+ ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_generic = hme_calc_stim_injected_sad_and_1_best_result;
+ ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_eq_1 = hme_calc_stim_injected_sad_and_1_best_result;
+ ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_square_parts = hme_calc_stim_injected_sad_and_1_best_result;
+ ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_9 = hme_calc_stim_injected_sad_and_1_best_result;
+ ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_17 = hme_calc_stim_injected_sad_and_1_best_result;
+ ps_func_list->pf_compute_variance_for_all_parts = hme_compute_variance_for_all_parts;
+ ps_func_list->pf_compute_stim_injected_distortion_for_all_parts = hme_compute_stim_injected_distortion_for_all_parts;
+ ps_func_list->pf_calc_sad_and_1_best_result_num_part_1_for_grid = hme_calc_sad_and_1_best_result_neon;
+ ps_func_list->pf_calc_sad_and_1_best_result_num_part_eq_1 = hme_calc_sad_and_1_best_result_neon;
+ ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_17 = hme_calc_sad_and_1_best_result_neon;
+ ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_9 = hme_calc_sad_and_1_best_result_neon;
+ ps_func_list->pf_calc_sad_and_1_best_result_num_square_parts = hme_calc_sad_and_1_best_result_neon;
+ ps_func_list->pf_calc_sad_and_1_best_result_subpel_generic = hme_calc_sad_and_1_best_result_subpel;
+ ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_eq_1 = hme_calc_sad_and_1_best_result_subpel_neon;
+ ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_lt_17 = hme_calc_sad_and_1_best_result_subpel_neon;
+ ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_lt_9 = hme_calc_sad_and_1_best_result_subpel_neon;
+ ps_func_list->pf_calc_sad_and_1_best_result_subpel_square_parts = hme_calc_sad_and_1_best_result_subpel_neon;
+ ps_func_list->pf_combine_4x4_sads_and_compute_cost_high_quality = hme_combine_4x4_sads_and_compute_cost_high_quality_neon;
+ ps_func_list->pf_combine_4x4_sads_and_compute_cost_high_speed = hme_combine_4x4_sads_and_compute_cost_high_speed_neon;
+ ps_func_list->pf_compute_4x4_sads_for_16x16_blk = compute_4x4_sads_for_16x16_blk_neon;
+ ps_func_list->pf_evalsad_grid_npu_MxN = hme_evalsad_grid_npu_MxN_neon;
+ ps_func_list->pf_evalsad_grid_pu_MxM = compute_part_sads_for_MxM_blk_neon;
+ ps_func_list->pf_evalsad_pt_npu_12x16_8bit = hme_evalsad_pt_npu_MxN_8bit_neon;
+ ps_func_list->pf_evalsad_pt_npu_16x12_8bit = hme_evalsad_pt_npu_MxN_8bit_neon;
+ ps_func_list->pf_evalsad_pt_npu_16x4_8bit = hme_evalsad_pt_npu_MxN_8bit_neon;
+ ps_func_list->pf_evalsad_pt_npu_24x32_8bit = hme_evalsad_pt_npu_MxN_8bit_neon;
+ ps_func_list->pf_evalsad_pt_npu_8x4_8bit = hme_evalsad_pt_npu_MxN_8bit_neon;
+ ps_func_list->pf_evalsad_pt_npu_mxn_8bit = hme_evalsad_pt_npu_MxN_8bit_neon;
+ ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit = hme_evalsad_pt_npu_MxN_8bit_neon;
+ ps_func_list->pf_evalsad_pt_npu_width_multiple_4_8bit = hme_evalsad_pt_npu_MxN_8bit_neon;
+ ps_func_list->pf_evalsad_pt_npu_width_multiple_8_8bit = hme_evalsad_pt_npu_MxN_8bit_neon;
+ ps_func_list->pf_get_wt_inp_8x8 = hme_get_wt_inp_8x8_neon;
+ ps_func_list->pf_get_wt_inp_ctb = hme_get_wt_inp_ctb_neon;
+ ps_func_list->pf_get_wt_inp_generic = hme_get_wt_inp;
+ ps_func_list->pf_mv_clipper = hme_mv_clipper;
+ ps_func_list->pf_qpel_interp_avg_1pt = hme_qpel_interp_avg_1pt_neon;
+ ps_func_list->pf_qpel_interp_avg_2pt_horz_with_reuse = hme_qpel_interp_avg_2pt_horz_with_reuse_neon;
+ ps_func_list->pf_qpel_interp_avg_2pt_vert_with_reuse = hme_qpel_interp_avg_2pt_vert_with_reuse_neon;
+ ps_func_list->pf_qpel_interp_avg_generic = hme_qpel_interp_avg_neon;
+ ps_func_list->pf_store_4x4_sads_high_quality = hme_store_4x4_sads_high_quality_neon;
+ ps_func_list->pf_store_4x4_sads_high_speed = hme_store_4x4_sads_high_speed_neon;
+ break;
+#endif
+ default:
+ ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8 = hme_calc_pt_sad_and_result_explicit;
+ ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8_4x4 = hme_calc_pt_sad_and_result_explicit;
+ ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_8x8_for_grid = hme_calc_pt_sad_and_result_explicit;
+ ps_func_list->pf_calc_pt_sad_and_1_best_result_explicit_generic = hme_calc_pt_sad_and_result_explicit;
+ ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8 = hme_calc_pt_sad_and_result_explicit;
+ ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8_4x4 = hme_calc_pt_sad_and_result_explicit;
+ ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_8x8_for_grid = hme_calc_pt_sad_and_result_explicit;
+ ps_func_list->pf_calc_pt_sad_and_2_best_results_explicit_generic = hme_calc_pt_sad_and_result_explicit;
+ ps_func_list->pf_calc_sad_and_1_best_result_generic = hme_calc_sad_and_1_best_result;
+ ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_generic = hme_calc_stim_injected_sad_and_1_best_result;
+ ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_eq_1 = hme_calc_stim_injected_sad_and_1_best_result;
+ ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_square_parts = hme_calc_stim_injected_sad_and_1_best_result;
+ ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_9 = hme_calc_stim_injected_sad_and_1_best_result;
+ ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_17 = hme_calc_stim_injected_sad_and_1_best_result;
+ ps_func_list->pf_compute_variance_for_all_parts = hme_compute_variance_for_all_parts;
+ ps_func_list->pf_compute_stim_injected_distortion_for_all_parts = hme_compute_stim_injected_distortion_for_all_parts;
+ ps_func_list->pf_calc_sad_and_1_best_result_num_part_1_for_grid = hme_calc_sad_and_1_best_result;
+ ps_func_list->pf_calc_sad_and_1_best_result_num_part_eq_1 = hme_calc_sad_and_1_best_result;
+ ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_17 = hme_calc_sad_and_1_best_result;
+ ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_9 = hme_calc_sad_and_1_best_result;
+ ps_func_list->pf_calc_sad_and_1_best_result_num_square_parts = hme_calc_sad_and_1_best_result;
+ ps_func_list->pf_calc_sad_and_1_best_result_subpel_generic = hme_calc_sad_and_1_best_result_subpel;
+ ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_eq_1 = hme_calc_sad_and_1_best_result_subpel;
+ ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_lt_17 = hme_calc_sad_and_1_best_result_subpel;
+ ps_func_list->pf_calc_sad_and_1_best_result_subpel_num_part_lt_9 = hme_calc_sad_and_1_best_result_subpel;
+ ps_func_list->pf_calc_sad_and_1_best_result_subpel_square_parts = hme_calc_sad_and_1_best_result_subpel;
+ ps_func_list->pf_combine_4x4_sads_and_compute_cost_high_quality = hme_combine_4x4_sads_and_compute_cost_high_quality;
+ ps_func_list->pf_combine_4x4_sads_and_compute_cost_high_speed = hme_combine_4x4_sads_and_compute_cost_high_speed;
+ ps_func_list->pf_compute_4x4_sads_for_16x16_blk = compute_4x4_sads_for_16x16_blk;
+ ps_func_list->pf_evalsad_grid_npu_MxN = hme_evalsad_grid_npu_MxN;
+ ps_func_list->pf_evalsad_grid_pu_MxM = compute_part_sads_for_MxM_blk;
+ ps_func_list->pf_evalsad_pt_npu_12x16_8bit = hme_evalsad_pt_npu_MxN_8bit;
+ ps_func_list->pf_evalsad_pt_npu_16x12_8bit = hme_evalsad_pt_npu_MxN_8bit;
+ ps_func_list->pf_evalsad_pt_npu_16x4_8bit = hme_evalsad_pt_npu_MxN_8bit;
+ ps_func_list->pf_evalsad_pt_npu_24x32_8bit = hme_evalsad_pt_npu_MxN_8bit;
+ ps_func_list->pf_evalsad_pt_npu_8x4_8bit = hme_evalsad_pt_npu_MxN_8bit;
+ ps_func_list->pf_evalsad_pt_npu_mxn_8bit = hme_evalsad_pt_npu_MxN_8bit;
+ ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit = hme_evalsad_pt_npu_MxN_8bit;
+ ps_func_list->pf_evalsad_pt_npu_width_multiple_4_8bit = hme_evalsad_pt_npu_MxN_8bit;
+ ps_func_list->pf_evalsad_pt_npu_width_multiple_8_8bit = hme_evalsad_pt_npu_MxN_8bit;
+ ps_func_list->pf_get_wt_inp_8x8 = hme_get_wt_inp;
+ ps_func_list->pf_get_wt_inp_ctb = hme_get_wt_inp;
+ ps_func_list->pf_get_wt_inp_generic = hme_get_wt_inp;
+ ps_func_list->pf_mv_clipper = hme_mv_clipper;
+ ps_func_list->pf_qpel_interp_avg_1pt = hme_qpel_interp_avg_1pt;
+ ps_func_list->pf_qpel_interp_avg_2pt_horz_with_reuse = hme_qpel_interp_avg_2pt_horz_with_reuse;
+ ps_func_list->pf_qpel_interp_avg_2pt_vert_with_reuse = hme_qpel_interp_avg_2pt_vert_with_reuse;
+ ps_func_list->pf_qpel_interp_avg_generic = hme_qpel_interp_avg;
+ ps_func_list->pf_store_4x4_sads_high_quality = hme_store_4x4_sads_high_quality;
+ ps_func_list->pf_store_4x4_sads_high_speed = hme_store_4x4_sads_high_speed;
+ break;
+ }
+
+ gapf_sad_pt_npu[BLK_4x4] = ps_func_list->pf_evalsad_pt_npu_width_multiple_4_8bit;
+ gapf_sad_pt_npu[BLK_4x8] = ps_func_list->pf_evalsad_pt_npu_width_multiple_4_8bit;
+ gapf_sad_pt_npu[BLK_8x4] = ps_func_list->pf_evalsad_pt_npu_8x4_8bit;
+ gapf_sad_pt_npu[BLK_8x8] = ps_func_list->pf_evalsad_pt_npu_width_multiple_8_8bit;
+ gapf_sad_pt_npu[BLK_4x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_4_8bit;
+ gapf_sad_pt_npu[BLK_8x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_8_8bit;
+ gapf_sad_pt_npu[BLK_12x16] = ps_func_list->pf_evalsad_pt_npu_12x16_8bit;
+ gapf_sad_pt_npu[BLK_16x4] = ps_func_list->pf_evalsad_pt_npu_16x4_8bit;
+ gapf_sad_pt_npu[BLK_16x8] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
+ gapf_sad_pt_npu[BLK_16x12] = ps_func_list->pf_evalsad_pt_npu_16x12_8bit;
+ gapf_sad_pt_npu[BLK_16x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
+ gapf_sad_pt_npu[BLK_8x32] = ps_func_list->pf_evalsad_pt_npu_width_multiple_8_8bit;
+ gapf_sad_pt_npu[BLK_16x32] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
+ gapf_sad_pt_npu[BLK_24x32] = ps_func_list->pf_evalsad_pt_npu_24x32_8bit;
+ gapf_sad_pt_npu[BLK_32x8] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
+ gapf_sad_pt_npu[BLK_32x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
+ gapf_sad_pt_npu[BLK_32x8] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
+ gapf_sad_pt_npu[BLK_32x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
+ gapf_sad_pt_npu[BLK_32x24] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
+ gapf_sad_pt_npu[BLK_32x32] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
+ gapf_sad_pt_npu[BLK_16x64] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
+ gapf_sad_pt_npu[BLK_32x64] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
+ gapf_sad_pt_npu[BLK_48x64] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
+ gapf_sad_pt_npu[BLK_64x16] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
+ gapf_sad_pt_npu[BLK_64x32] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
+ gapf_sad_pt_npu[BLK_64x48] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
+ gapf_sad_pt_npu[BLK_64x64] = ps_func_list->pf_evalsad_pt_npu_width_multiple_16_8bit;
+
+ gpf_part_sads_evaluator_16x16CU = ps_func_list->pf_compute_4x4_sads_for_16x16_blk;
+ gpf_part_sads_evaluator_MxM = ps_func_list->pf_evalsad_grid_pu_MxM;
+
+ gpf_sad_grid_mxn = ps_func_list->pf_evalsad_grid_npu_MxN;
+
+ gapf_calc_sad_and_result_fxn[0][0] = ps_func_list->pf_calc_sad_and_1_best_result_num_part_1_for_grid;
+ gapf_calc_sad_and_result_fxn[1][0] = ps_func_list->pf_calc_sad_and_1_best_result_num_part_eq_1;
+ gapf_calc_sad_and_result_fxn[2][0] = ps_func_list->pf_calc_sad_and_1_best_result_num_square_parts;
+ gapf_calc_sad_and_result_fxn[3][0] = ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_9;
+ gapf_calc_sad_and_result_fxn[4][0] = ps_func_list->pf_calc_sad_and_1_best_result_num_part_lt_17;
+ gapf_calc_sad_and_result_fxn[5][0] = ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_eq_1;
+ gapf_calc_sad_and_result_fxn[6][0] = ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_square_parts;
+ gapf_calc_sad_and_result_fxn[7][0] = ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_9;
+ gapf_calc_sad_and_result_fxn[8][0] = ps_func_list->pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_17;
+ gapf_calc_sad_and_result_fxn[0][1] = ps_func_list->pf_calc_sad_and_2_best_results_num_part_1_for_grid;
+ gapf_calc_sad_and_result_fxn[1][1] = ps_func_list->pf_calc_sad_and_2_best_results_num_part_eq_1;
+ gapf_calc_sad_and_result_fxn[2][1] = ps_func_list->pf_calc_sad_and_2_best_results_num_square_parts;
+ gapf_calc_sad_and_result_fxn[3][1] = ps_func_list->pf_calc_sad_and_2_best_results_num_part_lt_9;
+ gapf_calc_sad_and_result_fxn[4][1] = ps_func_list->pf_calc_sad_and_2_best_results_num_part_lt_17;
+ gapf_calc_sad_and_result_fxn[5][1] = ps_func_list->pf_calc_stim_injected_sad_and_2_best_results_num_part_eq_1;
+ gapf_calc_sad_and_result_fxn[6][1] = ps_func_list->pf_calc_stim_injected_sad_and_2_best_results_num_square_parts;
+ gapf_calc_sad_and_result_fxn[7][1] = ps_func_list->pf_calc_stim_injected_sad_and_2_best_results_num_part_lt_9;
+ gapf_calc_sad_and_result_fxn[8][1] = ps_func_list->pf_calc_stim_injected_sad_and_2_best_results_num_part_lt_17;
+}
+// clang-format on
+
+FT_CALC_SAD_AND_RESULT *hme_get_calc_sad_and_result_fxn(
+ S08 i1_grid_flag, U08 u1_is_cu_noisy, S32 i4_part_mask, S32 num_parts, S32 num_results)
+{
+ U08 u1_index;
+
+ ASSERT((1 == num_results) || (2 == num_results));
+
+ u1_index =
+ gau1_calc_sad_and_result[i1_grid_flag][u1_is_cu_noisy]
+ [(!!(i4_part_mask & (ENABLE_SMP | ENABLE_NxN)) &&
+ !(i4_part_mask & ENABLE_AMP))
+ ? (!!(i4_part_mask & ENABLE_NxN) ? 0 : 1)
+ : (!!(i4_part_mask & ENABLE_AMP) ? 2 : 3)][num_parts - 1];
+
+ return gapf_calc_sad_and_result_fxn[u1_index][2 == num_results];
+}
+
+void hme_evalsad_grid_pu_MxM(err_prms_t *ps_prms)
+{
+ grid_ctxt_t s_grid;
+ cand_t as_candt[9];
+
+ S32 *api4_sad_grid[TOT_NUM_PARTS];
+
+ hme_mv_t s_mv = { 0, 0 };
+
+ CU_SIZE_T e_cu_size = (CU_SIZE_T)(hme_get_range(ps_prms->i4_blk_wd) - 4);
+
+ S32 i4_ref_idx = 0, i;
+ S32 num_candts = 0;
+
+ s_grid.num_grids = 1;
+ s_grid.ref_buf_stride = ps_prms->i4_ref_stride;
+ s_grid.grd_sz_y_x = ((ps_prms->i4_step << 16) | ps_prms->i4_step);
+ s_grid.ppu1_ref_ptr = &ps_prms->pu1_ref;
+ s_grid.pi4_grd_mask = &ps_prms->i4_grid_mask;
+ s_grid.p_mv = &s_mv;
+ s_grid.p_ref_idx = &i4_ref_idx;
+
+ for(i = 0; i < 9; i++)
+ {
+ if(s_grid.pi4_grd_mask[0] & (1 << i))
+ {
+ num_candts++;
+ }
+ }
+
+ for(i = 0; i < TOT_NUM_PARTS; i++)
+ {
+ api4_sad_grid[i] = &ps_prms->pi4_sad_grid[i * num_candts];
+ }
+
+ gpf_part_sads_evaluator_MxM(
+ &s_grid,
+ ps_prms->pu1_inp,
+ ps_prms->i4_inp_stride,
+ (WORD32 **)api4_sad_grid,
+ as_candt,
+ &num_candts,
+ e_cu_size);
+}
+
+PF_SAD_FXN_T hme_get_sad_fxn(BLK_SIZE_T e_blk_size, S32 i4_grid_mask, S32 i4_part_mask)
+{
+ S32 i4_grid_en = ((i4_grid_mask & 0x1fe) != 0);
+
+ if(i4_grid_en)
+ {
+ if(i4_part_mask & (i4_part_mask - 1))
+ {
+ if(BLK_16x16 == e_blk_size)
+ {
+ return hme_evalsad_grid_pu_16x16;
+ }
+ else
+ {
+ return hme_evalsad_grid_pu_MxM;
+ }
+ }
+ else
+ {
+ return gpf_sad_grid_mxn;
+ }
+ }
+ else
+ {
+ if(i4_part_mask & (i4_part_mask - 1))
+ {
+ if(BLK_16x16 == e_blk_size)
+ {
+ return hme_evalsad_grid_pu_16x16;
+ }
+ else
+ {
+ return hme_evalsad_grid_pu_MxM;
+ }
+ }
+ else
+ {
+ return gapf_sad_pt_npu[e_blk_size];
+ }
+ }
+}
+
+void ihevce_sifter_sad_fxn_assigner(FT_SAD_EVALUATOR **ppf_evalsad_pt_npu_mxn, IV_ARCH_T e_arch)
+{
+ switch(e_arch)
+ {
+#ifdef ENABLE_NEON
+ case ARCH_ARM_A9Q:
+ case ARCH_ARM_V8_NEON:
+ ppf_evalsad_pt_npu_mxn[0] = hme_evalsad_pt_npu_MxN_8bit_neon;
+ break;
+#endif
+
+ default:
+ ppf_evalsad_pt_npu_mxn[0] = hme_evalsad_pt_npu_MxN_8bit;
+ break;
+ }
+}
diff --git a/encoder/ihevce_me_instr_set_router.h b/encoder/ihevce_me_instr_set_router.h
new file mode 100644
index 0000000..3bd7745
--- /dev/null
+++ b/encoder/ihevce_me_instr_set_router.h
@@ -0,0 +1,229 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_me_instr_set_router.h
+*
+* \brief
+* This file contains declarations related to me utilities used in encoder
+*
+* \date
+* 15/07/2013
+*
+* \author
+* Ittiam
+*
+* List of Functions
+*
+*
+******************************************************************************
+*/
+
+#ifndef __IHEVCE_ME_INSTR_SET_ROUTER_H_
+#define __IHEVCE_ME_INSTR_SET_ROUTER_H_
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+typedef void FT_SAD_EVALUATOR(err_prms_t *);
+
+typedef void FT_PART_SADS_EVALUATOR(
+ grid_ctxt_t *, UWORD8 *, WORD32, WORD32 **, cand_t *, WORD32 *, CU_SIZE_T);
+
+typedef void
+ FT_PART_SADS_EVALUATOR_16X16CU(grid_ctxt_t *, UWORD8 *, WORD32, UWORD16 **, cand_t *, WORD32 *);
+
+typedef void FT_CALC_SAD_AND_RESULT(
+ hme_search_prms_t *, wgt_pred_ctxt_t *, err_prms_t *, result_upd_prms_t *, U08 **, S32);
+
+typedef void FT_CALC_SAD_AND_RESULT_SUBPEL(err_prms_t *, result_upd_prms_t *);
+
+typedef void FT_QPEL_INTERP_AVG(interp_prms_t *, S32, S32, S32);
+
+typedef void FT_QPEL_INTERP_AVG_1PT(interp_prms_t *, S32, S32, S32, U08 **, S32 *);
+
+typedef void FT_QPEL_INTERP_AVG_2PT(interp_prms_t *, S32, S32, U08 **, S32 *);
+
+typedef void FT_GET_WT_INP(layer_ctxt_t *, wgt_pred_ctxt_t *, S32, S32, S32, S32, S32, U08);
+
+typedef void
+ FT_STORE_4X4_SADS(hme_search_prms_t *, layer_ctxt_t *, range_prms_t *, wgt_pred_ctxt_t *, S16 *);
+
+typedef void FT_COMBINE_4X4_SADS_AND_COMPUTE_COST(
+ S08,
+ range_prms_t *,
+ range_prms_t *,
+ hme_mv_t *,
+ hme_mv_t *,
+ pred_ctxt_t *,
+ PF_MV_COST_FXN,
+ S16 *,
+ S16 *,
+ S16 *);
+
+typedef void FT_MV_CLIPPER(hme_search_prms_t *, S32, S08, U08, U08, U08);
+
+typedef void FT_COMPUTE_VARIANCE(U08 *, S32, S32 *, U32 *, S32, U08);
+
+typedef void FT_COMPUTE_DISTORTION(
+ U08 *, S32, S32 *, ULWORD64 *, ULWORD64 *, S32 *, S32, S32, S32, S32, S32, U08);
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+// clang-format off
+typedef struct
+{
+ FT_SAD_EVALUATOR *pf_evalsad_pt_npu_mxn_8bit;
+ FT_SAD_EVALUATOR *pf_evalsad_grid_npu_MxN;
+ FT_SAD_EVALUATOR *pf_evalsad_pt_npu_8x4_8bit;
+ FT_SAD_EVALUATOR *pf_evalsad_pt_npu_16x4_8bit;
+ FT_SAD_EVALUATOR *pf_evalsad_pt_npu_16x12_8bit;
+ FT_SAD_EVALUATOR *pf_evalsad_pt_npu_24x32_8bit;
+ FT_SAD_EVALUATOR *pf_evalsad_pt_npu_12x16_8bit;
+ FT_SAD_EVALUATOR *pf_evalsad_pt_npu_width_multiple_4_8bit;
+ FT_SAD_EVALUATOR *pf_evalsad_pt_npu_width_multiple_8_8bit;
+ FT_SAD_EVALUATOR *pf_evalsad_pt_npu_width_multiple_16_8bit;
+ FT_PART_SADS_EVALUATOR_16X16CU *pf_compute_4x4_sads_for_16x16_blk;
+ FT_PART_SADS_EVALUATOR *pf_evalsad_grid_pu_MxM;
+ FT_CALC_SAD_AND_RESULT *pf_calc_sad_and_1_best_result_generic;
+ FT_CALC_SAD_AND_RESULT *pf_calc_stim_injected_sad_and_1_best_result_generic;
+ FT_CALC_SAD_AND_RESULT *pf_calc_stim_injected_sad_and_1_best_result_num_part_eq_1;
+ FT_CALC_SAD_AND_RESULT *pf_calc_stim_injected_sad_and_1_best_result_num_square_parts;
+ FT_CALC_SAD_AND_RESULT *pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_9;
+ FT_CALC_SAD_AND_RESULT *pf_calc_stim_injected_sad_and_1_best_result_num_part_lt_17;
+ FT_CALC_SAD_AND_RESULT *pf_calc_sad_and_1_best_result_num_part_eq_1;
+ FT_CALC_SAD_AND_RESULT *pf_calc_sad_and_1_best_result_num_part_1_for_grid;
+ FT_CALC_SAD_AND_RESULT *pf_calc_sad_and_1_best_result_num_square_parts;
+ FT_CALC_SAD_AND_RESULT *pf_calc_sad_and_1_best_result_num_part_lt_9;
+ FT_CALC_SAD_AND_RESULT *pf_calc_sad_and_1_best_result_num_part_lt_17;
+ FT_CALC_SAD_AND_RESULT *pf_calc_pt_sad_and_1_best_result_explicit_generic;
+ FT_CALC_SAD_AND_RESULT *pf_calc_pt_sad_and_1_best_result_explicit_8x8;
+ FT_CALC_SAD_AND_RESULT *pf_calc_pt_sad_and_1_best_result_explicit_8x8_for_grid;
+ FT_CALC_SAD_AND_RESULT *pf_calc_pt_sad_and_1_best_result_explicit_8x8_4x4;
+ FT_CALC_SAD_AND_RESULT *pf_calc_pt_sad_and_2_best_results_explicit_generic;
+ FT_CALC_SAD_AND_RESULT *pf_calc_pt_sad_and_2_best_results_explicit_8x8;
+ FT_CALC_SAD_AND_RESULT *pf_calc_pt_sad_and_2_best_results_explicit_8x8_for_grid;
+ FT_CALC_SAD_AND_RESULT *pf_calc_pt_sad_and_2_best_results_explicit_8x8_4x4;
+ FT_CALC_SAD_AND_RESULT_SUBPEL *pf_calc_sad_and_1_best_result_subpel_generic;
+ FT_CALC_SAD_AND_RESULT_SUBPEL *pf_calc_sad_and_1_best_result_subpel_num_part_eq_1;
+ FT_CALC_SAD_AND_RESULT_SUBPEL *pf_calc_sad_and_1_best_result_subpel_square_parts;
+ FT_CALC_SAD_AND_RESULT_SUBPEL *pf_calc_sad_and_1_best_result_subpel_num_part_lt_9;
+ FT_CALC_SAD_AND_RESULT_SUBPEL *pf_calc_sad_and_1_best_result_subpel_num_part_lt_17;
+ FT_CALC_SAD_AND_RESULT *pf_calc_sad_and_2_best_results_generic;
+ FT_CALC_SAD_AND_RESULT *pf_calc_stim_injected_sad_and_2_best_results_generic;
+ FT_CALC_SAD_AND_RESULT *pf_calc_stim_injected_sad_and_2_best_results_num_part_eq_1;
+ FT_CALC_SAD_AND_RESULT *pf_calc_stim_injected_sad_and_2_best_results_num_square_parts;
+ FT_CALC_SAD_AND_RESULT *pf_calc_stim_injected_sad_and_2_best_results_num_part_lt_9;
+ FT_CALC_SAD_AND_RESULT *pf_calc_stim_injected_sad_and_2_best_results_num_part_lt_17;
+ FT_CALC_SAD_AND_RESULT *pf_calc_sad_and_2_best_results_num_part_eq_1;
+ FT_CALC_SAD_AND_RESULT *pf_calc_sad_and_2_best_results_num_part_1_for_grid;
+ FT_CALC_SAD_AND_RESULT *pf_calc_sad_and_2_best_results_num_square_parts;
+ FT_CALC_SAD_AND_RESULT *pf_calc_sad_and_2_best_results_num_part_lt_9;
+ FT_CALC_SAD_AND_RESULT *pf_calc_sad_and_2_best_results_num_part_lt_17;
+ FT_CALC_SAD_AND_RESULT_SUBPEL *pf_calc_sad_and_2_best_results_subpel_generic;
+ FT_CALC_SAD_AND_RESULT_SUBPEL *pf_calc_sad_and_2_best_results_subpel_num_part_eq_1;
+ FT_CALC_SAD_AND_RESULT_SUBPEL *pf_calc_sad_and_2_best_results_subpel_square_parts;
+ FT_CALC_SAD_AND_RESULT_SUBPEL *pf_calc_sad_and_2_best_results_subpel_num_part_lt_9;
+ FT_CALC_SAD_AND_RESULT_SUBPEL *pf_calc_sad_and_2_best_results_subpel_num_part_lt_17;
+ FT_QPEL_INTERP_AVG *pf_qpel_interp_avg_generic;
+ FT_QPEL_INTERP_AVG_1PT *pf_qpel_interp_avg_1pt;
+ FT_QPEL_INTERP_AVG_2PT *pf_qpel_interp_avg_2pt_vert_with_reuse;
+ FT_QPEL_INTERP_AVG_2PT *pf_qpel_interp_avg_2pt_horz_with_reuse;
+ FT_GET_WT_INP *pf_get_wt_inp_generic;
+ FT_GET_WT_INP *pf_get_wt_inp_8x8;
+ FT_GET_WT_INP *pf_get_wt_inp_ctb;
+ FT_STORE_4X4_SADS *pf_store_4x4_sads_high_speed;
+ FT_STORE_4X4_SADS *pf_store_4x4_sads_high_quality;
+ FT_COMBINE_4X4_SADS_AND_COMPUTE_COST *pf_combine_4x4_sads_and_compute_cost_high_speed;
+ FT_COMBINE_4X4_SADS_AND_COMPUTE_COST *pf_combine_4x4_sads_and_compute_cost_high_quality;
+ FT_MV_CLIPPER *pf_mv_clipper;
+ FT_COMPUTE_VARIANCE *pf_compute_variance_for_all_parts;
+ FT_COMPUTE_DISTORTION *pf_compute_stim_injected_distortion_for_all_parts;
+} ihevce_me_optimised_function_list_t;
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+void ihevce_me_instr_set_router(
+ ihevce_me_optimised_function_list_t *ps_func_list, IV_ARCH_T e_arch);
+
+PF_SAD_FXN_T hme_get_sad_fxn(
+ BLK_SIZE_T e_blk_size, S32 i4_grid_mask, S32 i4_part_mask);
+
+void ihevce_sifter_sad_fxn_assigner(
+ FT_SAD_EVALUATOR **ppf_evalsad_pt_npu_mxn, IV_ARCH_T e_arch);
+
+void hme_evalsad_grid_pu_MxM(err_prms_t *ps_prms);
+
+FT_CALC_SAD_AND_RESULT *hme_get_calc_sad_and_result_fxn(S08 i1_grid_flag,
+ U08 u1_is_cu_noisy, S32 i4_part_mask, S32 num_parts, S32 num_results);
+
+/* Function List - C */
+FT_SAD_EVALUATOR hme_evalsad_pt_npu_MxN_8bit;
+FT_SAD_EVALUATOR hme_evalsad_grid_npu_MxN;
+FT_PART_SADS_EVALUATOR compute_part_sads_for_MxM_blk;
+FT_PART_SADS_EVALUATOR_16X16CU compute_4x4_sads_for_16x16_blk;
+FT_CALC_SAD_AND_RESULT hme_calc_sad_and_1_best_result;
+FT_CALC_SAD_AND_RESULT hme_calc_stim_injected_sad_and_1_best_result;
+FT_CALC_SAD_AND_RESULT hme_calc_pt_sad_and_result_explicit;
+FT_CALC_SAD_AND_RESULT_SUBPEL hme_calc_sad_and_1_best_result_subpel;
+FT_CALC_SAD_AND_RESULT hme_calc_sad_and_2_best_results;
+FT_CALC_SAD_AND_RESULT hme_calc_stim_injected_sad_and_2_best_results;
+FT_CALC_SAD_AND_RESULT_SUBPEL hme_calc_sad_and_2_best_results_subpel;
+FT_QPEL_INTERP_AVG hme_qpel_interp_avg;
+FT_QPEL_INTERP_AVG_1PT hme_qpel_interp_avg_1pt;
+FT_QPEL_INTERP_AVG_2PT hme_qpel_interp_avg_2pt_vert_with_reuse;
+FT_QPEL_INTERP_AVG_2PT hme_qpel_interp_avg_2pt_horz_with_reuse;
+FT_GET_WT_INP hme_get_wt_inp;
+FT_STORE_4X4_SADS hme_store_4x4_sads_high_speed;
+FT_STORE_4X4_SADS hme_store_4x4_sads_high_quality;
+FT_COMBINE_4X4_SADS_AND_COMPUTE_COST hme_combine_4x4_sads_and_compute_cost_high_speed;
+FT_COMBINE_4X4_SADS_AND_COMPUTE_COST hme_combine_4x4_sads_and_compute_cost_high_quality;
+FT_MV_CLIPPER hme_mv_clipper;
+FT_COMPUTE_VARIANCE hme_compute_variance_for_all_parts;
+FT_COMPUTE_DISTORTION hme_compute_stim_injected_distortion_for_all_parts;
+
+
+/* Function List - Neon */
+#ifdef ENABLE_NEON
+FT_SAD_EVALUATOR hme_evalsad_pt_npu_MxN_8bit_neon;
+FT_SAD_EVALUATOR hme_evalsad_grid_npu_MxN_neon;
+FT_PART_SADS_EVALUATOR compute_part_sads_for_MxM_blk_neon;
+FT_PART_SADS_EVALUATOR_16X16CU compute_4x4_sads_for_16x16_blk_neon;
+FT_CALC_SAD_AND_RESULT hme_calc_sad_and_1_best_result_neon;
+FT_CALC_SAD_AND_RESULT_SUBPEL hme_calc_sad_and_1_best_result_subpel_neon;
+FT_QPEL_INTERP_AVG hme_qpel_interp_avg_neon;
+FT_QPEL_INTERP_AVG_1PT hme_qpel_interp_avg_1pt_neon;
+FT_QPEL_INTERP_AVG_2PT hme_qpel_interp_avg_2pt_vert_with_reuse_neon;
+FT_QPEL_INTERP_AVG_2PT hme_qpel_interp_avg_2pt_horz_with_reuse_neon;
+FT_GET_WT_INP hme_get_wt_inp_8x8_neon;
+FT_GET_WT_INP hme_get_wt_inp_ctb_neon;
+FT_STORE_4X4_SADS hme_store_4x4_sads_high_speed_neon;
+FT_STORE_4X4_SADS hme_store_4x4_sads_high_quality_neon;
+FT_COMBINE_4X4_SADS_AND_COMPUTE_COST hme_combine_4x4_sads_and_compute_cost_high_speed_neon;
+FT_COMBINE_4X4_SADS_AND_COMPUTE_COST hme_combine_4x4_sads_and_compute_cost_high_quality_neon;
+#endif
+
+// clang-format on
+
+#endif
diff --git a/encoder/ihevce_me_pass.c b/encoder/ihevce_me_pass.c
new file mode 100644
index 0000000..e782952
--- /dev/null
+++ b/encoder/ihevce_me_pass.c
@@ -0,0 +1,1601 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+******************************************************************************
+* \file ihevce_me_pass.c
+*
+* \brief
+* Converts the language of the encoder to language of me. This is an i/f
+* between the encoder style APIs and ME style APIs. This is basically
+* a memoryless glue layer.
+*
+* \date
+* 22/10/2012
+*
+* \author
+* Ittiam
+*
+*
+* List of Functions
+*
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_debug.h"
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_inter_pred.h"
+
+#include "hme_datatype.h"
+#include "hme_interface.h"
+#include "hme_common_defs.h"
+#include "hme_defs.h"
+#include "ihevce_me_instr_set_router.h"
+#include "hme_utils.h"
+#include "hme_coarse.h"
+#include "hme_refine.h"
+#include "hme_function_selector.h"
+#include "ihevce_me_pass.h"
+
+#include "cast_types.h"
+#include "osal.h"
+#include "osal_defaults.h"
+
+/*****************************************************************************/
+/* Macros */
+/*****************************************************************************/
+
+/** orig simple five tap scaler */
+#define FIVE_TAP_ORIG_SCALER 0
+
+/** simple gaussian filter, blurs the image a bit */
+#define SIMPLE_GAUSSIAN_SCALER 0
+
+/** lanczos scaler gives sharper images */
+#define LANCZOS_SCALER 1
+
+// Saturated addition z = x + y
+// overflow condition: z<x or z<y
+#define SATURATED_ADD(z, x, y) \
+ { \
+ (z) = (x) + (y); \
+ if(((z) < (x)) || ((z) < (y))) \
+ (z) = MAX_INTRA_COST_IPE; \
+ }
+
+#define SATURATED_SUB(z, x, y) \
+ { \
+ (z) = (x) - (y); \
+ if((z) < 0) /*if (((z) > (x)) || ((z) > (y))) */ \
+ (z) = 0; \
+ }
+
+#if(FIVE_TAP_ORIG_SCALER + SIMPLE_GAUSSIAN_SCALER + LANCZOS_SCALER) > 1
+#error "HME ERROR: Only one scaler can be enabled at a time"
+#endif
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_me_get_num_mem_recs \endif
+*
+* \brief
+* Number of memory records are returned for ME module
+* Note : Include TOT MEM. req. for ME + TOT MEM. req. for Dep Mngr for L0 ME
+*
+* \return
+* Number of memory records
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32 ihevce_me_get_num_mem_recs(WORD32 i4_num_me_frm_pllel)
+{
+ WORD32 me_mem_recs = hme_enc_num_alloc(i4_num_me_frm_pllel);
+
+ return (me_mem_recs);
+}
+
+void ihevce_derive_me_init_prms(
+ ihevce_static_cfg_params_t *ps_init_prms,
+ hme_init_prms_t *ps_hme_init_prms,
+ S32 i4_num_proc_thrds,
+ S32 i4_resolution_id)
+{
+ WORD32 i4_field_pic = ps_init_prms->s_src_prms.i4_field_pic;
+ WORD32 min_cu_size;
+
+ /* max number of ref frames. This should be > ref frms sent any frm */
+ ps_hme_init_prms->max_num_ref = ((DEFAULT_MAX_REFERENCE_PICS) << i4_field_pic);
+
+ /* get the min cu size from config params */
+ min_cu_size = ps_init_prms->s_config_prms.i4_min_log2_cu_size;
+
+ min_cu_size = 1 << min_cu_size;
+
+ /* Width and height for the layer being encoded */
+ ps_hme_init_prms->a_wd[0] =
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
+ SET_CTB_ALIGN(
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width, min_cu_size);
+
+ ps_hme_init_prms->a_ht[0] =
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
+ SET_CTB_ALIGN(
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height, min_cu_size);
+
+ /* we store 4 results in coarsest layer per blk. 8x4L, 8x4R, 4x8T, 4x8B */
+ ps_hme_init_prms->max_num_results_coarse = 4;
+
+ /* Every refinement layer stores a max of 2 results per partition */
+ ps_hme_init_prms->max_num_results = 2;
+
+ /* Assuming abt 4 layers for 1080p, we do explicit search across all ref */
+ /* frames in all but final layer In final layer, it could be 1/2 */
+ ps_hme_init_prms->num_layers_explicit_search = 3;
+
+ /* Populate the max_tr_depth for Inter */
+ ps_hme_init_prms->u1_max_tr_depth = ps_init_prms->s_config_prms.i4_max_tr_tree_depth_nI;
+
+ ps_hme_init_prms->log_ctb_size = ps_init_prms->s_config_prms.i4_max_log2_cu_size;
+ ASSERT(ps_hme_init_prms->log_ctb_size == 6);
+
+ /* currently encoding only 1 layer */
+ ps_hme_init_prms->num_simulcast_layers = 1;
+
+ /* this feature not yet supported */
+ ps_hme_init_prms->segment_higher_layers = 0;
+
+ /* Allow 4x4 in refinement layers. Unconditionally enabled in coarse lyr */
+ /* And not enabled in encode layers, this is just for intermediate refine*/
+ /* layers, where it could be used for better accuracy of motion. */
+
+#if !OLD_XTREME_SPEED
+ if((IHEVCE_QUALITY_P6 ==
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset) ||
+ (IHEVCE_QUALITY_P7 ==
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset) ||
+ (IHEVCE_QUALITY_P5 ==
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset) ||
+ (IHEVCE_QUALITY_P4 ==
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset))
+ ps_hme_init_prms->use_4x4 = 0;
+ else
+ ps_hme_init_prms->use_4x4 = 1;
+#else
+ ps_hme_init_prms->use_4x4 = 1;
+#endif
+
+ ps_hme_init_prms->num_b_frms =
+ (1 << ps_init_prms->s_coding_tools_prms.i4_max_temporal_layers) - 1;
+
+ ps_hme_init_prms->i4_num_proc_thrds = i4_num_proc_thrds;
+
+ if(IHEVCE_QUALITY_P0 ==
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
+ {
+ ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_PRISTINE_QUALITY;
+ ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 3;
+ ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 3;
+ }
+ else if(
+ IHEVCE_QUALITY_P2 ==
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
+ {
+ ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_HIGH_QUALITY;
+ ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 3;
+ ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 3;
+ }
+ else if(
+ IHEVCE_QUALITY_P3 ==
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
+ {
+ ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_MEDIUM_SPEED;
+ ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 2;
+ ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 2;
+ }
+ else if(
+ IHEVCE_QUALITY_P4 ==
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
+ {
+ ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_HIGH_SPEED;
+ ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 1;
+ ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 1;
+ }
+ else if(
+ IHEVCE_QUALITY_P5 ==
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
+ {
+ ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_XTREME_SPEED;
+ ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 1;
+ ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 1;
+ }
+ else if(
+ IHEVCE_QUALITY_P6 ==
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
+ {
+ ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_XTREME_SPEED_25;
+ ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 1;
+ ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 1;
+ }
+ else if(
+ IHEVCE_QUALITY_P7 ==
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset)
+ {
+ ps_hme_init_prms->s_me_coding_tools.e_me_quality_presets = ME_XTREME_SPEED_25;
+ ps_hme_init_prms->s_me_coding_tools.i4_num_steps_hpel_refine = 1;
+ ps_hme_init_prms->s_me_coding_tools.i4_num_steps_qpel_refine = 0;
+ }
+
+ ps_hme_init_prms->s_me_coding_tools.u1_l0_me_controlled_via_cmd_line = 0;
+
+ /* Register the search range params from static params */
+ ps_hme_init_prms->max_horz_search_range = ps_init_prms->s_config_prms.i4_max_search_range_horz;
+ ps_hme_init_prms->max_vert_search_range = ps_init_prms->s_config_prms.i4_max_search_range_vert;
+ ps_hme_init_prms->e_arch_type = ps_init_prms->e_arch_type;
+ ps_hme_init_prms->is_interlaced = (ps_init_prms->s_src_prms.i4_field_pic == IV_INTERLACED);
+
+ ps_hme_init_prms->u1_is_stasino_enabled =
+ ((ps_init_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
+ (ps_init_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)));
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_me_get_mem_recs \endif
+*
+* \brief
+* Memory requirements are returned for ME.
+*
+* \param[in,out] ps_mem_tab : pointer to memory descriptors table
+* \param[in] ps_init_prms : Create time static parameters
+* \param[in] i4_num_proc_thrds : Number of processing threads for this module
+* \param[in] i4_mem_space : memspace in whihc memory request should be done
+*
+* \return
+* Number of records
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32 ihevce_me_get_mem_recs(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ WORD32 i4_num_proc_thrds,
+ WORD32 i4_mem_space,
+ WORD32 i4_resolution_id,
+ WORD32 i4_num_me_frm_pllel)
+{
+ hme_memtab_t as_memtabs[MAX_HME_ENC_TOT_MEMTABS];
+ WORD32 n_tabs, i;
+
+ /* Init prms structure specific to HME */
+ hme_init_prms_t s_hme_init_prms;
+
+ /*************************************************************************/
+ /* code flow: we call hme alloc function and then remap those memtabs */
+ /* to a different type of memtab structure. */
+ /*************************************************************************/
+ if(i4_num_me_frm_pllel > 1)
+ {
+ ASSERT(MAX_HME_ENC_TOT_MEMTABS >= hme_enc_num_alloc(i4_num_me_frm_pllel));
+ }
+ else
+ {
+ ASSERT(MIN_HME_ENC_TOT_MEMTABS >= hme_enc_num_alloc(i4_num_me_frm_pllel));
+ }
+
+ /*************************************************************************/
+ /* POPULATE THE HME INIT PRMS */
+ /*************************************************************************/
+ ihevce_derive_me_init_prms(ps_init_prms, &s_hme_init_prms, i4_num_proc_thrds, i4_resolution_id);
+
+ /*************************************************************************/
+ /* CALL THE ME FUNCTION TO GET MEMTABS */
+ /*************************************************************************/
+ n_tabs = hme_enc_alloc(&as_memtabs[0], &s_hme_init_prms, i4_num_me_frm_pllel);
+ ASSERT(n_tabs == hme_enc_num_alloc(i4_num_me_frm_pllel));
+
+ /*************************************************************************/
+ /* REMAP RESULTS TO ENCODER MEMTAB STRUCTURE */
+ /*************************************************************************/
+ for(i = 0; i < n_tabs; i++)
+ {
+ ps_mem_tab[i].i4_mem_size = as_memtabs[i].size;
+ ps_mem_tab[i].i4_mem_alignment = as_memtabs[i].align;
+ ps_mem_tab[i].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+ ps_mem_tab[i].i4_size = sizeof(iv_mem_rec_t);
+ }
+
+ /*************************************************************************/
+ /* --- L0 ME sync Dep Mngr Mem requests -- */
+ /*************************************************************************/
+ ps_mem_tab += n_tabs;
+
+ return (n_tabs);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_me_init \endif
+*
+* \brief
+* Intialization for ME context state structure .
+*
+* \param[in] ps_mem_tab : pointer to memory descriptors table
+* \param[in] ps_init_prms : Create time static parameters
+* \param[in] pv_osal_handle : Osal handle
+*
+* \return
+* Handle to the ME context
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void *ihevce_me_init(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ WORD32 i4_num_proc_thrds,
+ void *pv_osal_handle,
+ rc_quant_t *ps_rc_quant_ctxt,
+ void *pv_tile_params_base,
+ WORD32 i4_resolution_id,
+ WORD32 i4_num_me_frm_pllel,
+ UWORD8 u1_is_popcnt_available)
+{
+ /* ME handle to be returned */
+ void *pv_me_ctxt;
+ WORD32 status;
+ me_master_ctxt_t *ps_me_ctxt;
+ IV_ARCH_T e_arch_type;
+
+ /* Init prms structure specific to HME */
+ hme_init_prms_t s_hme_init_prms;
+
+ /* memtabs to be passed to hme */
+ hme_memtab_t as_memtabs[MAX_HME_ENC_TOT_MEMTABS];
+ WORD32 n_tabs, i;
+
+ /*************************************************************************/
+ /* POPULATE THE HME INIT PRMS */
+ /*************************************************************************/
+ ihevce_derive_me_init_prms(ps_init_prms, &s_hme_init_prms, i4_num_proc_thrds, i4_resolution_id);
+
+ /*************************************************************************/
+ /* Ensure local declaration is sufficient */
+ /*************************************************************************/
+ n_tabs = hme_enc_num_alloc(i4_num_me_frm_pllel);
+
+ if(i4_num_me_frm_pllel > 1)
+ {
+ ASSERT(MAX_HME_ENC_TOT_MEMTABS >= n_tabs);
+ }
+ else
+ {
+ ASSERT(MIN_HME_ENC_TOT_MEMTABS >= n_tabs);
+ }
+
+ /*************************************************************************/
+ /* MAP RESULTS TO HME MEMTAB STRUCTURE */
+ /*************************************************************************/
+ for(i = 0; i < n_tabs; i++)
+ {
+ as_memtabs[i].size = ps_mem_tab[i].i4_mem_size;
+ as_memtabs[i].align = ps_mem_tab[i].i4_mem_alignment;
+ as_memtabs[i].pu1_mem = (U08 *)ps_mem_tab[i].pv_base;
+ }
+ /*************************************************************************/
+ /* CALL THE ME FUNCTION TO GET MEMTABS */
+ /*************************************************************************/
+ pv_me_ctxt = (void *)as_memtabs[0].pu1_mem;
+ ps_me_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
+ /* Store Tile params base into ME context */
+ ps_me_ctxt->pv_tile_params_base = pv_tile_params_base;
+
+ status = hme_enc_init(
+ pv_me_ctxt, &as_memtabs[0], &s_hme_init_prms, ps_rc_quant_ctxt, i4_num_me_frm_pllel);
+
+ if(status == -1)
+ return NULL;
+
+ /*************************************************************************/
+ /* --- L0 ME sync Dep Mngr Mem init -- */
+ /*************************************************************************/
+ /* Update numer of ME frames running in parallel in me master context */
+ ps_me_ctxt->i4_num_me_frm_pllel = i4_num_me_frm_pllel;
+
+ e_arch_type = ps_init_prms->e_arch_type;
+
+ hme_init_function_ptr(ps_me_ctxt, e_arch_type);
+
+ ihevce_me_instr_set_router(
+ (ihevce_me_optimised_function_list_t *)ps_me_ctxt->pv_me_optimised_function_list,
+ e_arch_type);
+
+ ihevce_cmn_utils_instr_set_router(
+ &ps_me_ctxt->s_cmn_opt_func, u1_is_popcnt_available, e_arch_type);
+
+ ps_mem_tab += n_tabs;
+
+ return (pv_me_ctxt);
+}
+
+/**
+*******************************************************************************
+* \if Function name : ihevce_me_set_resolution \endif
+*
+* \brief
+* Sets the resolution for ME state
+*
+* \par Description:
+* ME requires information of resolution to prime up its layer descriptors
+* and contexts. This API is called whenever a control call from application
+* causes a change of resolution. Has to be called once initially before
+* processing any frame. Again this is just a glue function and calls the
+* actual ME API for the same.
+*
+* \param[in,out] pv_me_ctxt: Handle to the ME context
+* \param[in] n_enc_layers: Number of layers getting encoded
+* \param[in] p_wd : Pointer containing widths of each layer getting encoded.
+* \param[in] p_ht : Pointer containing heights of each layer getting encoded.
+*
+* \returns
+* none
+*
+* \author
+* Ittiam
+*
+*******************************************************************************
+*/
+void ihevce_me_set_resolution(void *pv_me_ctxt, WORD32 n_enc_layers, WORD32 *p_wd, WORD32 *p_ht)
+{
+ /* local variables */
+ me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
+ WORD32 thrds;
+ WORD32 i;
+
+ for(thrds = 0; thrds < ps_master_ctxt->i4_num_proc_thrds; thrds++)
+ {
+ me_ctxt_t *ps_me_thrd_ctxt;
+
+ ps_me_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[thrds];
+
+ for(i = 0; i < MAX_NUM_ME_PARALLEL; i++)
+ {
+ hme_set_resolution((void *)ps_me_thrd_ctxt, n_enc_layers, p_wd, p_ht, i);
+ }
+ }
+}
+
+void ihevce_populate_me_ctb_data(
+ me_ctxt_t *ps_ctxt,
+ me_frm_ctxt_t *ps_frm_ctxt,
+ cur_ctb_cu_tree_t *ps_cu_tree,
+ me_ctb_data_t *ps_me_ctb_data,
+ CU_POS_T e_grandparent_blk_pos,
+ CU_POS_T e_parent_blk_pos,
+ CU_POS_T e_cur_blk_pos)
+{
+ inter_cu_results_t *ps_cu_results;
+
+ switch(ps_cu_tree->u1_cu_size)
+ {
+ case 64:
+ {
+ block_data_64x64_t *ps_data = &ps_me_ctb_data->s_64x64_block_data;
+
+ ps_cu_results = &ps_frm_ctxt->s_cu64x64_results;
+ ps_data->num_best_results = (ps_cu_tree->is_node_valid) ? ps_cu_results->u1_num_best_results
+ : 0;
+
+ break;
+ }
+ case 32:
+ {
+ block_data_32x32_t *ps_data = &ps_me_ctb_data->as_32x32_block_data[e_cur_blk_pos];
+
+ ps_cu_results = &ps_frm_ctxt->as_cu32x32_results[e_cur_blk_pos];
+ ps_data->num_best_results = (ps_cu_tree->is_node_valid) ? ps_cu_results->u1_num_best_results
+ : 0;
+
+ break;
+ }
+ case 16:
+ {
+ WORD32 i4_blk_id = e_cur_blk_pos + (e_parent_blk_pos << 2);
+
+ block_data_16x16_t *ps_data = &ps_me_ctb_data->as_block_data[i4_blk_id];
+
+ ps_cu_results = &ps_frm_ctxt->as_cu16x16_results[i4_blk_id];
+ ps_data->num_best_results = (ps_cu_tree->is_node_valid) ? ps_cu_results->u1_num_best_results
+ : 0;
+
+ break;
+ }
+ case 8:
+ {
+ WORD32 i4_blk_id = e_cur_blk_pos + (e_parent_blk_pos << 2) + (e_grandparent_blk_pos << 4);
+
+ block_data_8x8_t *ps_data = &ps_me_ctb_data->as_8x8_block_data[i4_blk_id];
+
+ ps_cu_results = &ps_frm_ctxt->as_cu8x8_results[i4_blk_id];
+ ps_data->num_best_results = (ps_cu_tree->is_node_valid) ? ps_cu_results->u1_num_best_results
+ : 0;
+
+ break;
+ }
+ }
+
+ if(ps_cu_tree->is_node_valid)
+ {
+ if((ps_ctxt->s_init_prms.s_me_coding_tools.e_me_quality_presets == ME_PRISTINE_QUALITY) &&
+ (ps_cu_tree->u1_cu_size != 8))
+ {
+ ihevce_populate_me_ctb_data(
+ ps_ctxt,
+ ps_frm_ctxt,
+ ps_cu_tree->ps_child_node_tl,
+ ps_me_ctb_data,
+ e_parent_blk_pos,
+ e_cur_blk_pos,
+ POS_TL);
+
+ ihevce_populate_me_ctb_data(
+ ps_ctxt,
+ ps_frm_ctxt,
+ ps_cu_tree->ps_child_node_tr,
+ ps_me_ctb_data,
+ e_parent_blk_pos,
+ e_cur_blk_pos,
+ POS_TR);
+
+ ihevce_populate_me_ctb_data(
+ ps_ctxt,
+ ps_frm_ctxt,
+ ps_cu_tree->ps_child_node_bl,
+ ps_me_ctb_data,
+ e_parent_blk_pos,
+ e_cur_blk_pos,
+ POS_BL);
+
+ ihevce_populate_me_ctb_data(
+ ps_ctxt,
+ ps_frm_ctxt,
+ ps_cu_tree->ps_child_node_br,
+ ps_me_ctb_data,
+ e_parent_blk_pos,
+ e_cur_blk_pos,
+ POS_BR);
+ }
+ }
+ else if(ps_cu_tree->u1_cu_size != 8)
+ {
+ ihevce_populate_me_ctb_data(
+ ps_ctxt,
+ ps_frm_ctxt,
+ ps_cu_tree->ps_child_node_tl,
+ ps_me_ctb_data,
+ e_parent_blk_pos,
+ e_cur_blk_pos,
+ POS_TL);
+
+ ihevce_populate_me_ctb_data(
+ ps_ctxt,
+ ps_frm_ctxt,
+ ps_cu_tree->ps_child_node_tr,
+ ps_me_ctb_data,
+ e_parent_blk_pos,
+ e_cur_blk_pos,
+ POS_TR);
+
+ ihevce_populate_me_ctb_data(
+ ps_ctxt,
+ ps_frm_ctxt,
+ ps_cu_tree->ps_child_node_bl,
+ ps_me_ctb_data,
+ e_parent_blk_pos,
+ e_cur_blk_pos,
+ POS_BL);
+
+ ihevce_populate_me_ctb_data(
+ ps_ctxt,
+ ps_frm_ctxt,
+ ps_cu_tree->ps_child_node_br,
+ ps_me_ctb_data,
+ e_parent_blk_pos,
+ e_cur_blk_pos,
+ POS_BR);
+ }
+}
+
+void ihevce_me_update_ctb_results(
+ void *pv_me_ctxt, void *pv_me_frm_ctxt, WORD32 i4_ctb_x, WORD32 i4_ctb_y)
+{
+ ctb_analyse_t *ps_ctb_out;
+ cur_ctb_cu_tree_t *ps_cu_tree;
+ me_ctb_data_t *ps_me_ctb_data;
+
+ me_ctxt_t *ps_ctxt = (me_ctxt_t *)pv_me_ctxt;
+ me_frm_ctxt_t *ps_frm_ctxt = (me_frm_ctxt_t *)pv_me_frm_ctxt;
+
+ ps_ctb_out = ps_frm_ctxt->ps_ctb_analyse_curr_row + i4_ctb_x;
+
+ ps_me_ctb_data = ps_frm_ctxt->ps_me_ctb_data_curr_row + i4_ctb_x;
+ ps_cu_tree = ps_frm_ctxt->ps_cu_tree_curr_row + (i4_ctb_x * MAX_NUM_NODES_CU_TREE);
+
+ ps_ctb_out->ps_cu_tree = ps_cu_tree;
+ ps_ctb_out->ps_me_ctb_data = ps_me_ctb_data;
+
+ ihevce_populate_me_ctb_data(
+ ps_ctxt, ps_frm_ctxt, ps_cu_tree, ps_me_ctb_data, POS_NA, POS_NA, POS_NA);
+}
+
+WORD32 ihevce_me_find_poc_in_list(
+ recon_pic_buf_t **pps_rec_list, WORD32 poc, WORD32 i4_idr_gop_num, WORD32 num_ref)
+{
+ WORD32 i;
+
+ for(i = 0; i < num_ref; i++)
+ {
+ if(pps_rec_list[i]->i4_poc == poc && pps_rec_list[i]->i4_idr_gop_num == i4_idr_gop_num)
+ return (i);
+ }
+
+ /* should never come here */
+ ASSERT(0);
+ return (-1);
+}
+void ihevc_me_update_ref_desc(
+ hme_ref_desc_t *ps_ref_desc,
+ recon_pic_buf_t *ps_recon_pic,
+ WORD32 ref_id_l0,
+ WORD32 ref_id_l1,
+ WORD32 ref_id_lc,
+ WORD32 is_fwd)
+{
+ hme_ref_buf_info_t *ps_ref_info = &ps_ref_desc->as_ref_info[0];
+ iv_enc_yuv_buf_t *ps_yuv_desc = (iv_enc_yuv_buf_t *)&ps_recon_pic->s_yuv_buf_desc;
+ iv_enc_yuv_buf_t *ps_src_yuv_desc = (iv_enc_yuv_buf_t *)&ps_recon_pic->s_yuv_buf_desc_src;
+ S32 offset;
+
+ /* Padding beyond 64 is not of use to ME */
+ ps_ref_info->u1_pad_x = MIN(64, PAD_HORZ);
+ ps_ref_info->u1_pad_y = MIN(64, PAD_VERT);
+
+ /* Luma stride and offset. Assuming here that supplied ptr is */
+ /* 0, 0 position and hence setting offset to 0. In fact, it is */
+ /* not used inside ME as of now. */
+ ps_ref_info->luma_stride = ps_yuv_desc->i4_y_strd;
+ ps_ref_info->luma_offset = 0;
+
+ /* 4 planes, fxfy is the direct recon buf, others are from subpel planes */
+ //offset = ps_ref_info->luma_stride * PAD_VERT + PAD_HORZ;
+ offset = 0;
+ ps_ref_info->pu1_rec_fxfy = (UWORD8 *)ps_yuv_desc->pv_y_buf + offset;
+ ps_ref_info->pu1_rec_hxfy = ps_recon_pic->apu1_y_sub_pel_planes[0] + offset;
+ ps_ref_info->pu1_rec_fxhy = ps_recon_pic->apu1_y_sub_pel_planes[1] + offset;
+ ps_ref_info->pu1_rec_hxhy = ps_recon_pic->apu1_y_sub_pel_planes[2] + offset;
+ ps_ref_info->pu1_ref_src = (UWORD8 *)ps_src_yuv_desc->pv_y_buf + offset;
+
+ /* U V ptrs though they are not used */
+ ps_ref_info->pu1_rec_u = (U08 *)ps_yuv_desc->pv_u_buf;
+ ps_ref_info->pu1_rec_v = (U08 *)ps_yuv_desc->pv_v_buf;
+
+ /* uv offsets and strides, same treatment sa luma */
+ ps_ref_info->chroma_offset = 0;
+ ps_ref_info->chroma_stride = ps_yuv_desc->i4_uv_strd;
+
+ ps_ref_info->pv_dep_mngr = ps_recon_pic->pv_dep_mngr_recon;
+
+ /* L0, L1 and LC id. */
+ ps_ref_desc->i1_ref_id_l0 = ref_id_l0;
+ ps_ref_desc->i1_ref_id_l1 = ref_id_l1;
+ ps_ref_desc->i1_ref_id_lc = ref_id_lc;
+
+ /* POC of the ref pic */
+ ps_ref_desc->i4_poc = ps_recon_pic->i4_poc;
+
+ /* Display num of the ref pic */
+ ps_ref_desc->i4_display_num = ps_recon_pic->i4_display_num;
+
+ /* GOP number of the reference pic*/
+ ps_ref_desc->i4_GOP_num = ps_recon_pic->i4_idr_gop_num;
+
+ /* Whether this picture is in past (fwd) or future (bck) */
+ ps_ref_desc->u1_is_fwd = is_fwd;
+
+ /* store the weight and offsets fo refernce picture */
+ ps_ref_desc->i2_weight = ps_recon_pic->s_weight_offset.i2_luma_weight;
+ ps_ref_desc->i2_offset = ps_recon_pic->s_weight_offset.i2_luma_offset;
+}
+
+/* Create the reference map for ME */
+void ihevce_me_create_ref_map(
+ recon_pic_buf_t **pps_rec_list_l0,
+ recon_pic_buf_t **pps_rec_list_l1,
+ WORD32 num_ref_l0_active,
+ WORD32 num_ref_l1_active,
+ WORD32 num_ref,
+ hme_ref_map_t *ps_ref_map)
+{
+ WORD32 min_ref, i, poc, ref_id_l0, ref_id_l1;
+
+ /* tracks running count of ref pics */
+ WORD32 ref_count = 0, i4_idr_gop_num;
+
+ /* points to One instance of a ref pic structure */
+ recon_pic_buf_t *ps_recon_pic;
+
+ /* points to one instance of ref desc str used by ME */
+ hme_ref_desc_t *ps_ref_desc;
+
+ min_ref = MIN(num_ref_l0_active, num_ref_l1_active);
+
+ for(i = 0; i < min_ref; i++)
+ {
+ /* Create interleaved L0 and L1 entries */
+ ps_ref_desc = &ps_ref_map->as_ref_desc[ref_count];
+ ps_recon_pic = pps_rec_list_l0[i];
+ poc = ps_recon_pic->i4_poc;
+ i4_idr_gop_num = ps_recon_pic->i4_idr_gop_num;
+ ref_id_l0 = i;
+ ref_id_l1 = ihevce_me_find_poc_in_list(pps_rec_list_l1, poc, i4_idr_gop_num, num_ref);
+ ihevc_me_update_ref_desc(ps_ref_desc, ps_recon_pic, ref_id_l0, ref_id_l1, 2 * i, 1);
+
+ ref_count++;
+
+ ps_ref_desc = &ps_ref_map->as_ref_desc[ref_count];
+ ps_recon_pic = pps_rec_list_l1[i];
+ poc = ps_recon_pic->i4_poc;
+ i4_idr_gop_num = ps_recon_pic->i4_idr_gop_num;
+ ref_id_l1 = i;
+ ref_id_l0 = ihevce_me_find_poc_in_list(pps_rec_list_l0, poc, i4_idr_gop_num, num_ref);
+ ihevc_me_update_ref_desc(ps_ref_desc, ps_recon_pic, ref_id_l0, ref_id_l1, 2 * i + 1, 0);
+
+ ref_count++;
+ }
+
+ if(num_ref_l0_active > min_ref)
+ {
+ for(i = 0; i < (num_ref_l0_active - min_ref); i++)
+ {
+ ps_ref_desc = &ps_ref_map->as_ref_desc[ref_count];
+ ref_id_l0 = i + min_ref;
+ ps_recon_pic = pps_rec_list_l0[ref_id_l0];
+ poc = ps_recon_pic->i4_poc;
+ i4_idr_gop_num = ps_recon_pic->i4_idr_gop_num;
+ ref_id_l1 = ihevce_me_find_poc_in_list(pps_rec_list_l1, poc, i4_idr_gop_num, num_ref);
+ ihevc_me_update_ref_desc(
+ ps_ref_desc, ps_recon_pic, ref_id_l0, ref_id_l1, 2 * min_ref + i, 1);
+ ref_count++;
+ }
+ }
+ else
+ {
+ for(i = 0; i < (num_ref_l1_active - min_ref); i++)
+ {
+ ps_ref_desc = &ps_ref_map->as_ref_desc[ref_count];
+ ref_id_l1 = i + min_ref;
+ ps_recon_pic = pps_rec_list_l1[ref_id_l1];
+ poc = ps_recon_pic->i4_poc;
+ i4_idr_gop_num = ps_recon_pic->i4_idr_gop_num;
+ ref_id_l0 = ihevce_me_find_poc_in_list(pps_rec_list_l0, poc, i4_idr_gop_num, num_ref);
+ ihevc_me_update_ref_desc(
+ ps_ref_desc, ps_recon_pic, ref_id_l0, ref_id_l1, 2 * min_ref + i, 0);
+ ref_count++;
+ }
+ }
+
+ ps_ref_map->i4_num_ref = ref_count;
+ ASSERT(ref_count == (num_ref_l0_active + num_ref_l1_active));
+
+ /* TODO : Fill better values in lambda depending on ref dist */
+ for(i = 0; i < ps_ref_map->i4_num_ref; i++)
+ ps_ref_map->as_ref_desc[i].lambda = 20;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_me_process \endif
+*
+* \brief
+* Frame level ME function
+*
+* \par Description:
+* Processing of all layers starting from coarse and going
+* to the refinement layers, all layers
+* that are encoded go CTB by CTB. Outputs of this function are populated
+* ctb_analyse_t structures, one per CTB.
+*
+* \param[in] pv_ctxt : pointer to ME module
+* \param[in] ps_enc_lap_inp : pointer to input yuv buffer (frame buffer)
+* \param[in,out] ps_ctb_out : pointer to CTB analyse output structure (frame buffer)
+* \param[out] ps_cu_out : pointer to CU analyse output structure (frame buffer)
+* \param[in] pd_intra_costs : pointerto intra cost buffer
+* \param[in] ps_multi_thrd_ctxt : pointer to multi thread ctxt
+* \param[in] thrd_id : Thread id of the current thrd in which function is executed
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_me_process(
+ void *pv_me_ctxt,
+ ihevce_lap_enc_buf_t *ps_enc_lap_inp,
+ ctb_analyse_t *ps_ctb_out,
+ me_enc_rdopt_ctxt_t *ps_cur_out_me_prms,
+ double *pd_intra_costs,
+ ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse_ctb,
+ pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input,
+ void *pv_coarse_layer,
+ multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
+ WORD32 i4_frame_parallelism_level,
+ WORD32 thrd_id,
+ WORD32 i4_me_frm_id)
+{
+ me_ctxt_t *ps_thrd_ctxt;
+ me_frm_ctxt_t *ps_ctxt;
+
+ PF_EXT_UPDATE_FXN_T pf_ext_update_fxn;
+
+ me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
+ cur_ctb_cu_tree_t *ps_cu_tree_out = ps_cur_out_me_prms->ps_cur_ctb_cu_tree;
+ me_ctb_data_t *ps_me_ctb_data_out = ps_cur_out_me_prms->ps_cur_ctb_me_data;
+ layer_ctxt_t *ps_coarse_layer = (layer_ctxt_t *)pv_coarse_layer;
+
+ pf_ext_update_fxn = (PF_EXT_UPDATE_FXN_T)ihevce_me_update_ctb_results;
+
+ /* get the current thread ctxt pointer */
+ ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[thrd_id];
+ ps_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
+ ps_ctxt->thrd_id = thrd_id;
+
+ /* store the ctb out and cu out base pointers */
+ ps_ctxt->ps_ctb_analyse_base = ps_ctb_out;
+
+ ps_ctxt->ps_cu_tree_base = ps_cu_tree_out;
+ ps_ctxt->ps_ipe_l0_ctb_frm_base = ps_ipe_analyse_ctb;
+ ps_ctxt->ps_me_ctb_data_base = ps_me_ctb_data_out;
+ ps_ctxt->ps_func_selector = &ps_master_ctxt->s_func_selector;
+
+ /** currently in master context. Copying that to me context **/
+ /* frame level processing function */
+ hme_process_frm(
+ (void *)ps_thrd_ctxt,
+ ps_l0_ipe_input,
+ &ps_master_ctxt->as_ref_map[i4_me_frm_id],
+ &pd_intra_costs,
+ &ps_master_ctxt->as_frm_prms[i4_me_frm_id],
+ pf_ext_update_fxn,
+ ps_coarse_layer,
+ ps_multi_thrd_ctxt,
+ i4_frame_parallelism_level,
+ thrd_id,
+ i4_me_frm_id);
+}
+/*!
+******************************************************************************
+* \if Function name : ihevce_me_frame_dpb_update \endif
+*
+* \brief
+* Frame level ME initialisation function
+*
+* \par Description:
+* Updation of ME's internal DPB
+* based on available ref list information
+*
+* \param[in] pv_ctxt : pointer to ME module
+* \param[in] num_ref_l0 : Number of reference pics in L0 list
+* \param[in] num_ref_l1 : Number of reference pics in L1 list
+* \param[in] pps_rec_list_l0 : List of recon pics in L0 list
+* \param[in] pps_rec_list_l1 : List of recon pics in L1 list
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_me_frame_dpb_update(
+ void *pv_me_ctxt,
+ WORD32 num_ref_l0,
+ WORD32 num_ref_l1,
+ recon_pic_buf_t **pps_rec_list_l0,
+ recon_pic_buf_t **pps_rec_list_l1,
+ WORD32 i4_thrd_id)
+{
+ me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
+ me_ctxt_t *ps_thrd0_ctxt;
+ WORD32 a_pocs_to_remove[MAX_NUM_REF + 2];
+ WORD32 i, i4_is_buffer_full;
+ WORD32 i4_least_POC = 0x7FFFFFFF;
+ WORD32 i4_least_GOP_num = 0x7FFFFFFF;
+ me_ctxt_t *ps_ctxt;
+
+ /* All processing done using shared / common memory across */
+ /* threads is done using thrd ctxt */
+ ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[i4_thrd_id];
+
+ ps_ctxt = (me_ctxt_t *)ps_thrd0_ctxt;
+ a_pocs_to_remove[0] = INVALID_POC;
+ /*************************************************************************/
+ /* Updation of ME's DPB list. This involves the following steps: */
+ /* 1. Obtain list of active POCs maintained within ME. */
+ /* 2. Search each of them in the ref list. Whatever is not found goes to */
+ /* the list to be removed. Note: a_pocs_buffered_in_me holds the */
+ /* currently active POC list within ME. a_pocs_to_remove holds the */
+ /* list of POCs to be removed, terminated by -1. */
+ /*************************************************************************/
+ i4_is_buffer_full =
+ hme_get_active_pocs_list((void *)ps_thrd0_ctxt, ps_master_ctxt->i4_num_me_frm_pllel);
+
+ if(i4_is_buffer_full)
+ {
+ /* remove if any non-reference pictures are present */
+ for(i = 0;
+ i <
+ (ps_ctxt->aps_me_frm_prms[0]->max_num_ref * ps_master_ctxt->i4_num_me_frm_pllel) + 1;
+ i++)
+ {
+ if(ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_is_reference == 0 &&
+ ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_non_ref_free == 1)
+ {
+ i4_least_POC = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc;
+ i4_least_GOP_num = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_idr_gop_num;
+ }
+ }
+ /* if all non reference pictures are removed, then find the least poc
+ in the least gop number*/
+ if(i4_least_POC == 0x7FFFFFFF)
+ {
+ ASSERT(i4_least_GOP_num == 0x7FFFFFFF);
+ for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref *
+ ps_master_ctxt->i4_num_me_frm_pllel) +
+ 1;
+ i++)
+ {
+ if(i4_least_GOP_num > ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_idr_gop_num)
+ {
+ i4_least_GOP_num = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_idr_gop_num;
+ }
+ }
+ for(i = 0; i < (ps_ctxt->aps_me_frm_prms[0]->max_num_ref *
+ ps_master_ctxt->i4_num_me_frm_pllel) +
+ 1;
+ i++)
+ {
+ if(i4_least_POC > ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc &&
+ ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_idr_gop_num == i4_least_GOP_num)
+ {
+ i4_least_POC = ps_ctxt->as_ref_descr[i].aps_layers[0]->i4_poc;
+ }
+ }
+ }
+ ASSERT(i4_least_POC != 0x7FFFFFFF);
+ a_pocs_to_remove[0] = i4_least_POC;
+ a_pocs_to_remove[1] = INVALID_POC;
+ }
+
+ /* Call the ME API to remove "outdated" POCs */
+ hme_discard_frm(
+ ps_thrd0_ctxt, a_pocs_to_remove, i4_least_GOP_num, ps_master_ctxt->i4_num_me_frm_pllel);
+}
+/*!
+******************************************************************************
+* \if Function name : ihevce_me_frame_init \endif
+*
+* \brief
+* Frame level ME initialisation function
+*
+* \par Description:
+* The following pre-conditions exist for this function: a. We have the input
+* pic ready for encode, b. We have the reference list with POC, L0/L1 IDs
+* and ref ptrs ready for this picture and c. ihevce_me_set_resolution has
+* been called atleast once. Once these are supplied, the following are
+* done here: a. Input pyramid creation, b. Updation of ME's internal DPB
+* based on available ref list information
+*
+* \param[in] pv_ctxt : pointer to ME module
+* \param[in] ps_frm_ctb_prms : CTB characteristics parameters
+* \param[in] ps_frm_lamda : Frame level Lambda params
+* \param[in] num_ref_l0 : Number of reference pics in L0 list
+* \param[in] num_ref_l1 : Number of reference pics in L1 list
+* \param[in] num_ref_l0_active : Active reference pics in L0 dir for current frame (shall be <= num_ref_l0)
+* \param[in] num_ref_l1_active : Active reference pics in L1 dir for current frame (shall be <= num_ref_l1)
+* \param[in] pps_rec_list_l0 : List of recon pics in L0 list
+* \param[in] pps_rec_list_l1 : List of recon pics in L1 list
+* \param[in] ps_enc_lap_inp : pointer to input yuv buffer (frame buffer)
+* \param[in] i4_frm_qp : current picture QP
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_me_frame_init(
+ void *pv_me_ctxt,
+ me_enc_rdopt_ctxt_t *ps_cur_out_me_prms,
+ ihevce_static_cfg_params_t *ps_stat_prms,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ frm_lambda_ctxt_t *ps_frm_lamda,
+ WORD32 num_ref_l0,
+ WORD32 num_ref_l1,
+ WORD32 num_ref_l0_active,
+ WORD32 num_ref_l1_active,
+ recon_pic_buf_t **pps_rec_list_l0,
+ recon_pic_buf_t **pps_rec_list_l1,
+ recon_pic_buf_t *(*aps_ref_list)[HEVCE_MAX_REF_PICS * 2],
+ func_selector_t *ps_func_selector,
+ ihevce_lap_enc_buf_t *ps_enc_lap_inp,
+ void *pv_coarse_layer,
+ WORD32 i4_me_frm_id,
+ WORD32 i4_thrd_id,
+ WORD32 i4_frm_qp,
+ WORD32 i4_temporal_layer_id,
+ WORD8 i1_cu_qp_delta_enabled_flag,
+ void *pv_dep_mngr_encloop_dep_me)
+{
+ me_ctxt_t *ps_thrd_ctxt;
+ me_ctxt_t *ps_thrd0_ctxt;
+ me_frm_ctxt_t *ps_ctxt;
+ hme_inp_desc_t s_inp_desc;
+
+ WORD32 inp_poc, num_ref;
+ WORD32 i;
+
+ me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
+ layer_ctxt_t *ps_coarse_layer = (layer_ctxt_t *)pv_coarse_layer;
+
+ /* Input POC is derived from input buffer */
+ inp_poc = ps_enc_lap_inp->s_lap_out.i4_poc;
+ num_ref = num_ref_l0 + num_ref_l1;
+
+ /* All processing done using shared / common memory across */
+ /* threads is done using thrd ctxt */
+ ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[i4_thrd_id];
+
+ ps_ctxt = ps_thrd0_ctxt->aps_me_frm_prms[i4_me_frm_id];
+
+ /* Update the paarameters "num_ref_l0_active" and "num_ref_l1_active" in hme_frm_prms */
+ ps_master_ctxt->as_frm_prms[i4_me_frm_id].u1_num_active_ref_l0 = num_ref_l0_active;
+ ps_master_ctxt->as_frm_prms[i4_me_frm_id].u1_num_active_ref_l1 = num_ref_l1_active;
+
+ /*************************************************************************/
+ /* Add the current input to ME's DPB. This will also create the pyramids */
+ /* for the HME layers tha are not "encoded". */
+ /*************************************************************************/
+ s_inp_desc.i4_poc = inp_poc;
+ s_inp_desc.i4_idr_gop_num = ps_enc_lap_inp->s_lap_out.i4_idr_gop_num;
+ s_inp_desc.i4_is_reference = ps_enc_lap_inp->s_lap_out.i4_is_ref_pic;
+ s_inp_desc.s_layer_desc[0].pu1_y = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_y_buf;
+ s_inp_desc.s_layer_desc[0].pu1_u = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_u_buf;
+ s_inp_desc.s_layer_desc[0].pu1_v = (UWORD8 *)ps_enc_lap_inp->s_lap_out.s_input_buf.pv_v_buf;
+
+ s_inp_desc.s_layer_desc[0].luma_stride = ps_enc_lap_inp->s_lap_out.s_input_buf.i4_y_strd;
+ s_inp_desc.s_layer_desc[0].chroma_stride = ps_enc_lap_inp->s_lap_out.s_input_buf.i4_uv_strd;
+
+ hme_add_inp(pv_me_ctxt, &s_inp_desc, i4_me_frm_id, i4_thrd_id);
+
+ /* store the frm ctb ctxt to all the thrd ctxt */
+ {
+ WORD32 num_thrds;
+
+ /* initialise the parameters for all the threads */
+ for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
+ {
+ me_frm_ctxt_t *ps_me_tmp_frm_ctxt;
+
+ ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+
+ ps_me_tmp_frm_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
+
+ ps_thrd_ctxt->pv_ext_frm_prms = (void *)ps_frm_ctb_prms;
+ ps_me_tmp_frm_ctxt->i4_l0me_qp_mod = ps_stat_prms->s_config_prms.i4_cu_level_rc & 1;
+
+ /* intialize the inter pred (MC) context at frame level */
+ ps_me_tmp_frm_ctxt->s_mc_ctxt.ps_ref_list = aps_ref_list;
+ ps_me_tmp_frm_ctxt->s_mc_ctxt.i1_weighted_pred_flag =
+ ps_enc_lap_inp->s_lap_out.i1_weighted_pred_flag;
+ ps_me_tmp_frm_ctxt->s_mc_ctxt.i1_weighted_bipred_flag =
+ ps_enc_lap_inp->s_lap_out.i1_weighted_bipred_flag;
+ ps_me_tmp_frm_ctxt->s_mc_ctxt.i4_log2_luma_wght_denom =
+ ps_enc_lap_inp->s_lap_out.i4_log2_luma_wght_denom;
+ ps_me_tmp_frm_ctxt->s_mc_ctxt.i4_log2_chroma_wght_denom =
+ ps_enc_lap_inp->s_lap_out.i4_log2_chroma_wght_denom;
+ ps_me_tmp_frm_ctxt->s_mc_ctxt.i4_bit_depth = 8;
+ ps_me_tmp_frm_ctxt->s_mc_ctxt.u1_chroma_array_type = 1;
+ ps_me_tmp_frm_ctxt->s_mc_ctxt.ps_func_selector = ps_func_selector;
+ /* Initiallization for non-distributed mode */
+ memset(
+ ps_me_tmp_frm_ctxt->s_mc_ctxt.ai4_tile_xtra_pel,
+ 0,
+ sizeof(ps_me_tmp_frm_ctxt->s_mc_ctxt.ai4_tile_xtra_pel));
+
+ ps_me_tmp_frm_ctxt->i4_pic_type = ps_enc_lap_inp->s_lap_out.i4_pic_type;
+
+ ps_me_tmp_frm_ctxt->i4_rc_pass = ps_stat_prms->s_pass_prms.i4_pass;
+ ps_me_tmp_frm_ctxt->i4_temporal_layer = ps_enc_lap_inp->s_lap_out.i4_temporal_lyr_id;
+ ps_me_tmp_frm_ctxt->i4_use_const_lamda_modifier = USE_CONSTANT_LAMBDA_MODIFIER;
+ ps_me_tmp_frm_ctxt->i4_use_const_lamda_modifier =
+ ps_ctxt->i4_use_const_lamda_modifier ||
+ ((ps_stat_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_CONTROL_TOGGLER)) &&
+ ((ps_stat_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_NOISE_PRESERVATION)) ||
+ (ps_stat_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_1)) ||
+ (ps_stat_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_2)) ||
+ (ps_stat_prms->s_coding_tools_prms.i4_vqet &
+ (1 << BITPOS_IN_VQ_TOGGLE_FOR_ENABLING_PSYRDOPT_3))));
+ {
+ ps_me_tmp_frm_ctxt->f_i_pic_lamda_modifier =
+ ps_enc_lap_inp->s_lap_out.f_i_pic_lamda_modifier;
+ }
+ /* weighted pred enable flag */
+ ps_me_tmp_frm_ctxt->i4_wt_pred_enable_flag =
+ ps_enc_lap_inp->s_lap_out.i1_weighted_pred_flag |
+ ps_enc_lap_inp->s_lap_out.i1_weighted_bipred_flag;
+
+ if(1 == ps_me_tmp_frm_ctxt->i4_wt_pred_enable_flag)
+ {
+ /* log2 weight denom */
+ ps_me_tmp_frm_ctxt->s_wt_pred.wpred_log_wdc =
+ ps_enc_lap_inp->s_lap_out.i4_log2_luma_wght_denom;
+ }
+ else
+ {
+ /* default value */
+ ps_me_tmp_frm_ctxt->s_wt_pred.wpred_log_wdc = DENOM_DEFAULT;
+ }
+
+ ps_me_tmp_frm_ctxt->u1_is_curFrame_a_refFrame = ps_enc_lap_inp->s_lap_out.i4_is_ref_pic;
+
+ ps_thrd_ctxt->pv_me_optimised_function_list =
+ ps_master_ctxt->pv_me_optimised_function_list;
+ ps_thrd_ctxt->ps_cmn_utils_optimised_function_list = &ps_master_ctxt->s_cmn_opt_func;
+ }
+ }
+
+ /* Create the reference map for ME */
+ ihevce_me_create_ref_map(
+ pps_rec_list_l0,
+ pps_rec_list_l1,
+ num_ref_l0_active,
+ num_ref_l1_active,
+ num_ref,
+ &ps_master_ctxt->as_ref_map[i4_me_frm_id]);
+
+ /** Remember the pointers to recon list parmas for L0 and L1 lists in the context */
+ ps_ctxt->ps_hme_ref_map->pps_rec_list_l0 = pps_rec_list_l0;
+ ps_ctxt->ps_hme_ref_map->pps_rec_list_l1 = pps_rec_list_l1;
+
+ /*************************************************************************/
+ /* Call the ME frame level processing for further actiion. */
+ /* ToDo: Support Row Level API. */
+ /*************************************************************************/
+ ps_master_ctxt->as_frm_prms[i4_me_frm_id].i2_mv_range_x =
+ ps_thrd0_ctxt->s_init_prms.max_horz_search_range;
+ ps_master_ctxt->as_frm_prms[i4_me_frm_id].i2_mv_range_y =
+ ps_thrd0_ctxt->s_init_prms.max_vert_search_range;
+ ps_master_ctxt->as_frm_prms[i4_me_frm_id].is_i_pic = 0;
+ ps_master_ctxt->as_frm_prms[i4_me_frm_id].is_pic_second_field =
+ (!(ps_enc_lap_inp->s_input_buf.i4_bottom_field ^
+ ps_enc_lap_inp->s_input_buf.i4_topfield_first));
+ ps_master_ctxt->as_frm_prms[i4_me_frm_id].i4_temporal_layer_id = i4_temporal_layer_id;
+ {
+ S32 pic_type = ps_enc_lap_inp->s_lap_out.i4_pic_type;
+
+ /*********************************************************************/
+ /* For I Pic, we do not call update fn at ctb level, instead we do */
+ /* one shot update for entire picture. */
+ /*********************************************************************/
+ if((pic_type == IV_I_FRAME) || (pic_type == IV_II_FRAME) || (pic_type == IV_IDR_FRAME))
+ {
+ ps_master_ctxt->as_frm_prms[i4_me_frm_id].is_i_pic = 1;
+ ps_master_ctxt->as_frm_prms[i4_me_frm_id].bidir_enabled = 0;
+ }
+
+ else if((pic_type == IV_P_FRAME) || (pic_type == IV_PP_FRAME))
+ {
+ ps_master_ctxt->as_frm_prms[i4_me_frm_id].bidir_enabled = 0;
+ }
+ else if((pic_type == IV_B_FRAME) || (pic_type == IV_BB_FRAME))
+ {
+ ps_master_ctxt->as_frm_prms[i4_me_frm_id].bidir_enabled = 1;
+ }
+ else
+ {
+ /* not sure whether we need to handle mixed frames like IP, */
+ /* they should ideally come as single field. */
+ /* TODO : resolve thsi ambiguity */
+ ASSERT(0);
+ }
+ }
+ /************************************************************************/
+ /* Lambda calculations moved outside ME and to one place, so as to have */
+ /* consistent lambda across ME, IPE, CL RDOPT etc */
+ /************************************************************************/
+
+ {
+ double d_q_factor;
+
+ d_q_factor = pow(2.0, (i4_frm_qp / 6.)) * 5.0 / 8.0;
+ ps_master_ctxt->as_frm_prms[i4_me_frm_id].qstep = (WORD32)(d_q_factor + .5);
+ ps_master_ctxt->as_frm_prms[i4_me_frm_id].i4_frame_qp = i4_frm_qp;
+
+ /* Qstep multiplied by 256, to work at higher precision:
+ 5/6 is the rounding factor. Multiplied by 2 for the Had vs DCT
+ cost variation */
+ ps_master_ctxt->as_frm_prms[i4_me_frm_id].qstep_ls8 =
+ (WORD32)((((d_q_factor * 256) * 5) / 3) + .5);
+ }
+
+ /* Frame level init of all threads of ME */
+ {
+ WORD32 num_thrds;
+
+ /* initialise the parameters for all the threads */
+ for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
+ {
+ me_frm_ctxt_t *ps_tmp_frm_ctxt;
+
+ ps_thrd_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds];
+
+ ps_tmp_frm_ctxt = ps_thrd_ctxt->aps_me_frm_prms[i4_me_frm_id];
+
+ hme_process_frm_init(
+ (void *)ps_thrd_ctxt,
+ ps_tmp_frm_ctxt->ps_hme_ref_map,
+ ps_tmp_frm_ctxt->ps_hme_frm_prms,
+ i4_me_frm_id,
+ ps_master_ctxt->i4_num_me_frm_pllel);
+
+ ps_tmp_frm_ctxt->s_frm_lambda_ctxt = *ps_frm_lamda;
+ ps_tmp_frm_ctxt->pv_dep_mngr_encloop_dep_me = pv_dep_mngr_encloop_dep_me;
+ }
+ }
+
+ ps_master_ctxt->as_frm_prms[i4_me_frm_id].i4_cl_sad_lambda_qf =
+ ps_frm_lamda->i4_cl_sad_lambda_qf;
+ ps_master_ctxt->as_frm_prms[i4_me_frm_id].i4_cl_satd_lambda_qf =
+ ps_frm_lamda->i4_cl_satd_lambda_qf;
+ ps_master_ctxt->as_frm_prms[i4_me_frm_id].i4_ol_sad_lambda_qf =
+ ps_frm_lamda->i4_ol_sad_lambda_qf;
+ ps_master_ctxt->as_frm_prms[i4_me_frm_id].i4_ol_satd_lambda_qf =
+ ps_frm_lamda->i4_ol_satd_lambda_qf;
+ ps_master_ctxt->as_frm_prms[i4_me_frm_id].lambda_q_shift = LAMBDA_Q_SHIFT;
+
+ ps_master_ctxt->as_frm_prms[i4_me_frm_id].u1_is_cu_qp_delta_enabled =
+ i1_cu_qp_delta_enabled_flag;
+
+ /*************************************************************************/
+ /* If num ref is 0, that means that it has to be coded as I. Do nothing */
+ /* However mv bank update needs to happen with "intra" mv. */
+ /*************************************************************************/
+ if(ps_master_ctxt->as_ref_map[i4_me_frm_id].i4_num_ref == 0 ||
+ ps_master_ctxt->as_frm_prms[i4_me_frm_id].is_i_pic)
+ {
+ for(i = 0; i < 1; i++)
+ {
+ layer_ctxt_t *ps_layer_ctxt = ps_ctxt->ps_curr_descr->aps_layers[i];
+ BLK_SIZE_T e_blk_size;
+ S32 use_4x4;
+
+ /* The mv bank is filled with "intra" mv */
+ use_4x4 = hme_get_mv_blk_size(
+ ps_thrd0_ctxt->s_init_prms.use_4x4, i, ps_ctxt->num_layers, ps_ctxt->u1_encode[i]);
+ e_blk_size = use_4x4 ? BLK_4x4 : BLK_8x8;
+ hme_init_mv_bank(ps_layer_ctxt, e_blk_size, 2, 1, ps_ctxt->u1_encode[i]);
+ hme_fill_mvbank_intra(ps_layer_ctxt);
+
+ /* Clear out the global mvs */
+ memset(
+ ps_layer_ctxt->s_global_mv,
+ 0,
+ sizeof(hme_mv_t) * ps_ctxt->max_num_ref * NUM_GMV_LOBES);
+ }
+
+ return;
+ }
+
+ /*************************************************************************/
+ /* Encode layer frame init */
+ /*************************************************************************/
+ {
+ refine_prms_t s_refine_prms;
+ layer_ctxt_t *ps_curr_layer;
+ S16 i2_max;
+ S32 layer_id;
+
+ layer_id = 0;
+ i2_max = ps_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_x;
+ i2_max = MAX(i2_max, ps_ctxt->ps_curr_descr->aps_layers[layer_id]->i2_max_mv_y);
+
+ ps_curr_layer = ps_ctxt->ps_curr_descr->aps_layers[layer_id];
+
+ {
+ hme_set_refine_prms(
+ &s_refine_prms,
+ ps_ctxt->u1_encode[layer_id],
+ ps_master_ctxt->as_ref_map[i4_me_frm_id].i4_num_ref,
+ layer_id,
+ ps_ctxt->num_layers,
+ ps_ctxt->num_layers_explicit_search,
+ ps_thrd0_ctxt->s_init_prms.use_4x4,
+ &ps_master_ctxt->as_frm_prms[i4_me_frm_id],
+ NULL,
+ &ps_thrd0_ctxt->s_init_prms
+ .s_me_coding_tools); /* during frm init Intra cost Pointer is not required */
+
+ hme_refine_frm_init(ps_curr_layer, &s_refine_prms, ps_coarse_layer);
+ }
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_l0_me_frame_end \endif
+*
+* \brief
+* End of frame update function performs
+* - Dynamic Search Range collation
+*
+* \param[in] pv_ctxt : pointer to ME module
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+
+void ihevce_l0_me_frame_end(
+ void *pv_me_ctxt, WORD32 i4_idx_dvsr_p, WORD32 i4_display_num, WORD32 me_frm_id)
+{
+ WORD32 i4_num_ref = 0, num_ref, num_thrds, cur_poc, frm_num;
+
+ me_master_ctxt_t *ps_master_ctxt = (me_master_ctxt_t *)pv_me_ctxt;
+ me_ctxt_t *ps_thrd0_ctxt;
+ me_frm_ctxt_t *ps_frm_ctxt;
+ WORD32 prev_me_frm_id;
+
+ ps_thrd0_ctxt = ps_master_ctxt->aps_me_ctxt[0];
+ ps_frm_ctxt = ps_thrd0_ctxt->aps_me_frm_prms[me_frm_id];
+
+ /* Deriving the previous poc from previous frames context */
+ if(me_frm_id == 0)
+ prev_me_frm_id = (MAX_NUM_ME_PARALLEL - 1);
+ else
+ prev_me_frm_id = me_frm_id - 1;
+
+ /* Getting the max num references value */
+ for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
+ {
+ i4_num_ref =
+ MAX(i4_num_ref,
+ ps_master_ctxt->aps_me_ctxt[num_thrds]
+ ->aps_me_frm_prms[me_frm_id]
+ ->as_l0_dyn_range_prms[i4_idx_dvsr_p]
+ .i4_num_act_ref_in_l0);
+ }
+
+ /* No processing is required if current pic is I pic */
+ if(1 == ps_master_ctxt->as_frm_prms[me_frm_id].is_i_pic)
+ {
+ return;
+ }
+
+ /* If a B/b pic, then the previous frame ctxts dyn search prms should be copied ito the latest ctxt */
+ if(1 == ps_frm_ctxt->s_frm_prms.bidir_enabled)
+ {
+ return;
+ }
+
+ /* Only for P pic. For P, both are 0, I&B has them mut. exclusive */
+ ASSERT(ps_frm_ctxt->s_frm_prms.is_i_pic == ps_frm_ctxt->s_frm_prms.bidir_enabled);
+
+ /* use thrd 0 ctxt to collate the Dynamic Search Range across all threads */
+ for(num_ref = 0; num_ref < i4_num_ref; num_ref++)
+ {
+ dyn_range_prms_t *ps_dyn_range_prms_thrd0;
+
+ ps_dyn_range_prms_thrd0 =
+ &ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[num_ref];
+
+ /* run a loop over all the other threads to update the dynamical search range */
+ for(num_thrds = 1; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
+ {
+ me_frm_ctxt_t *ps_me_tmp_frm_ctxt;
+
+ dyn_range_prms_t *ps_dyn_range_prms;
+
+ ps_me_tmp_frm_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]->aps_me_frm_prms[me_frm_id];
+
+ /* get current thrd dynamical search range param. pointer */
+ ps_dyn_range_prms =
+ &ps_me_tmp_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[num_ref];
+
+ /* TODO : This calls can be optimized further. No need for min in 1st call and max in 2nd call */
+ hme_update_dynamic_search_params(
+ ps_dyn_range_prms_thrd0, ps_dyn_range_prms->i2_dyn_max_y);
+
+ hme_update_dynamic_search_params(
+ ps_dyn_range_prms_thrd0, ps_dyn_range_prms->i2_dyn_min_y);
+ }
+ }
+
+ /*************************************************************************/
+ /* Get the MAX/MIN per POC distance based on the all the ref. pics */
+ /*************************************************************************/
+ cur_poc = ps_frm_ctxt->i4_curr_poc;
+ ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_max_y_per_poc = 0;
+ ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_min_y_per_poc = 0;
+ /*populate display num*/
+ ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_display_num = i4_display_num;
+
+ for(num_ref = 0; num_ref < i4_num_ref; num_ref++)
+ {
+ WORD16 i2_mv_per_poc;
+ WORD32 ref_poc, poc_diff;
+ dyn_range_prms_t *ps_dyn_range_prms_thrd0;
+ ps_dyn_range_prms_thrd0 =
+ &ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].as_dyn_range_prms[num_ref];
+
+ ref_poc = ps_dyn_range_prms_thrd0->i4_poc;
+ /* Should be cleaned up for ME llsm */
+ poc_diff = (cur_poc - ref_poc);
+ poc_diff = MAX(1, poc_diff);
+
+ /* cur. ref. pic. max y per POC */
+ i2_mv_per_poc = (ps_dyn_range_prms_thrd0->i2_dyn_max_y + (poc_diff - 1)) / poc_diff;
+ /* update the max y per POC */
+ ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_max_y_per_poc = MAX(
+ ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_max_y_per_poc, i2_mv_per_poc);
+
+ /* cur. ref. pic. min y per POC */
+ i2_mv_per_poc = (ps_dyn_range_prms_thrd0->i2_dyn_min_y - (poc_diff - 1)) / poc_diff;
+ /* update the min y per POC */
+ ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_min_y_per_poc = MIN(
+ ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_min_y_per_poc, i2_mv_per_poc);
+ }
+
+ /*************************************************************************/
+ /* Populate the results to all thread ctxt */
+ /*************************************************************************/
+ for(num_thrds = 1; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
+ {
+ me_frm_ctxt_t *ps_me_tmp_frm_ctxt;
+
+ ps_me_tmp_frm_ctxt = ps_master_ctxt->aps_me_ctxt[num_thrds]->aps_me_frm_prms[me_frm_id];
+
+ ps_me_tmp_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_max_y_per_poc =
+ ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_max_y_per_poc;
+
+ ps_me_tmp_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_min_y_per_poc =
+ ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i2_dyn_min_y_per_poc;
+
+ ps_me_tmp_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_display_num =
+ ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_display_num;
+ }
+
+ /* Copy the dynamic search paramteres into the other Frame cotexts in parallel */
+ for(num_thrds = 0; num_thrds < ps_master_ctxt->i4_num_proc_thrds; num_thrds++)
+ {
+ l0_dyn_range_prms_t *ps_dyn_range_prms_thrd0;
+
+ ps_frm_ctxt = ps_thrd0_ctxt->aps_me_frm_prms[me_frm_id];
+
+ i4_num_ref = ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p].i4_num_act_ref_in_l0;
+
+ ps_dyn_range_prms_thrd0 = &ps_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p];
+
+ for(frm_num = 0; frm_num < MAX_NUM_ME_PARALLEL; frm_num++)
+ {
+ if(me_frm_id != frm_num)
+ {
+ me_frm_ctxt_t *ps_me_tmp_frm_ctxt;
+
+ l0_dyn_range_prms_t *ps_dyn_range_prms;
+
+ ps_me_tmp_frm_ctxt =
+ ps_master_ctxt->aps_me_ctxt[num_thrds]->aps_me_frm_prms[frm_num];
+
+ /* get current thrd dynamical search range param. pointer */
+ ps_dyn_range_prms = &ps_me_tmp_frm_ctxt->as_l0_dyn_range_prms[i4_idx_dvsr_p];
+
+ memcpy(ps_dyn_range_prms, ps_dyn_range_prms_thrd0, sizeof(l0_dyn_range_prms_t));
+ }
+ }
+ }
+}
diff --git a/encoder/ihevce_me_pass.h b/encoder/ihevce_me_pass.h
new file mode 100644
index 0000000..9247043
--- /dev/null
+++ b/encoder/ihevce_me_pass.h
@@ -0,0 +1,349 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_me_pass.h
+*
+* \brief
+* Interfaces to create, control and run the ME module
+*
+* \date
+* 22/10/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_ME_PASS_H_
+#define _IHEVCE_ME_PASS_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+/*!
+******************************************************************************
+* \if Function name : ihevce_me_get_num_mem_recs \endif
+*
+* \brief
+* Number of memory records are returned for ME module
+*
+*
+* \return
+* Number of memory records
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32 ihevce_me_get_num_mem_recs(WORD32 i4_num_me_frm_pllel);
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_me_get_mem_recs \endif
+*
+* \brief
+* Memory requirements are returned for ME.
+*
+* \param[in,out] ps_mem_tab : pointer to memory descriptors table
+* \param[in] ps_init_prms : Create time static parameters
+*
+* \return
+* Number of records
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32 ihevce_me_get_mem_recs(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ WORD32 i4_num_proc_thrds,
+ WORD32 i4_mem_space,
+ WORD32 i4_resolution_id,
+ WORD32 i4_num_me_frm_pllel);
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_me_init \endif
+*
+* \brief
+* Intialization for ME context state structure .
+*
+* \param[in] ps_mem_tab : pointer to memory descriptors table
+* \param[in] ps_init_prms : Create time static parameters
+* \param[in] pv_osal_handle : Osal handle
+*
+* \return
+* Handle to the ME context
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void *ihevce_me_init(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ WORD32 i4_num_proc_thrds,
+ void *pv_osal_handle,
+ rc_quant_t *ps_rc_quant_ctxt,
+ void *pv_tile_params_base,
+ WORD32 i4_resolution_id,
+ WORD32 i4_num_me_frm_pllel,
+ UWORD8 u1_is_popcnt_available);
+
+/**
+*******************************************************************************
+* \if Function name : ihevce_me_set_resolution \endif
+*
+* \brief
+* Sets the resolution for ME state
+*
+* \par Description:
+* ME requires information of resolution to prime up its layer descriptors
+* and contexts. This API is called whenever a control call from application
+* causes a change of resolution. Has to be called once initially before
+* processing any frame. Again this is just a glue function and calls the
+* actual ME API for the same.
+*
+* \param[in,out] pv_me_ctxt: Handle to the ME context
+* \param[in] n_enc_layers: Number of layers getting encoded
+* \param[in] p_wd : Pointer containing widths of each layer getting encoded.
+* \param[in] p_ht : Pointer containing heights of each layer getting encoded.
+*
+* \returns
+* none
+*
+* \author
+* Ittiam
+*
+*******************************************************************************
+*/
+void ihevce_me_set_resolution(void *pv_me_ctxt, WORD32 n_enc_layers, WORD32 *p_wd, WORD32 *p_ht);
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_me_frame_init \endif
+*
+* \brief
+* Frame level ME initialisation function
+*
+* \par Description:
+* The following pre-conditions exist for this function: a. We have the input
+* pic ready for encode, b. We have the reference list with POC, L0/L1 IDs
+* and ref ptrs ready for this picture and c. ihevce_me_set_resolution has
+* been called atleast once. Once these are supplied, the following are
+* done here: a. Input pyramid creation, b. Updation of ME's internal DPB
+* based on available ref list information
+*
+* \param[in] pv_ctxt : pointer to ME module
+* \param[in] ps_frm_ctb_prms : CTB characteristics parameters
+* \param[in] ps_frm_lamda : Frame level Lambda params
+* \param[in] num_ref_l0 : Number of reference pics in L0 list
+* \param[in] num_ref_l1 : Number of reference pics in L1 list
+* \param[in] num_ref_l0_active : Active reference pics in L0 dir for current frame (shall be <= num_ref_l0)
+* \param[in] num_ref_l1_active : Active reference pics in L1 dir for current frame (shall be <= num_ref_l1)
+* \param[in] pps_rec_list_l0 : List of recon pics in L0 list
+* \param[in] pps_rec_list_l1 : List of recon pics in L1 list
+* \param[in] ps_enc_lap_inp : pointer to input yuv buffer (frame buffer)
+* \param[in] i4_frm_qp : current picture QP
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_me_frame_init(
+ void *pv_me_ctxt,
+ me_enc_rdopt_ctxt_t *ps_cur_out_me_prms,
+ ihevce_static_cfg_params_t *ps_stat_prms,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ frm_lambda_ctxt_t *ps_frm_lamda,
+ WORD32 num_ref_l0,
+ WORD32 num_ref_l1,
+ WORD32 num_ref_l0_active,
+ WORD32 num_ref_l1_active,
+ recon_pic_buf_t **pps_rec_list_l0,
+ recon_pic_buf_t **pps_rec_list_l1,
+ recon_pic_buf_t *(*aps_ref_list)[HEVCE_MAX_REF_PICS * 2],
+ func_selector_t *ps_func_selector,
+ ihevce_lap_enc_buf_t *ps_enc_lap_inp,
+ void *pv_coarse_layer,
+ WORD32 i4_me_frm_id,
+ WORD32 i4_thrd_id,
+ WORD32 i4_frm_qp,
+ WORD32 i4_temporal_layer_id,
+ WORD8 i1_cu_qp_delta_enabled_flag,
+ void *pv_dep_mngr_encloop_dep_me);
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_me_process \endif
+*
+* \brief
+* Frame level ME function
+*
+* \par Description:
+* Processing of all layers starting from coarse and going
+* to the refinement layers, all layers
+* that are encoded go CTB by CTB. Outputs of this function are populated
+* ctb_analyse_t structures, one per CTB.
+*
+* \param[in] pv_ctxt : pointer to ME module
+* \param[in] ps_enc_lap_inp : pointer to input yuv buffer (frame buffer)
+* \param[in,out] ps_ctb_out : pointer to CTB analyse output structure (frame buffer)
+* \param[out] ps_cu_out : pointer to CU analyse output structure (frame buffer)
+* \param[in] pd_intra_costs : pointerto intra cost buffer
+* \param[in] ps_multi_thrd_ctxt : pointer to multi thread ctxt
+* \param[in] thrd_id : Thread id of the current thrd in which function is executed
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_me_process(
+ void *pv_me_ctxt,
+ ihevce_lap_enc_buf_t *ps_enc_lap_inp,
+ ctb_analyse_t *ps_ctb_out,
+ me_enc_rdopt_ctxt_t *ps_cur_out_me_prms,
+ double *pd_intra_costs,
+ ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse_ctb,
+ pre_enc_L0_ipe_encloop_ctxt_t *ps_l0_ipe_input,
+ void *pv_coarse_layer,
+ multi_thrd_ctxt_t *ps_multi_thrd_ctxt,
+ WORD32 i4_frame_parallelism_level,
+ WORD32 thrd_id,
+ WORD32 i4_me_frm_id);
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_me_frame_dpb_update \endif
+*
+* \brief
+* Frame level ME initialisation function
+*
+* \par Description:
+* Updation of ME's internal DPB
+* based on available ref list information
+*
+* \param[in] pv_ctxt : pointer to ME module
+* \param[in] num_ref_l0 : Number of reference pics in L0 list
+* \param[in] num_ref_l1 : Number of reference pics in L1 list
+* \param[in] pps_rec_list_l0 : List of recon pics in L0 list
+* \param[in] pps_rec_list_l1 : List of recon pics in L1 list
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_me_frame_dpb_update(
+ void *pv_me_ctxt,
+ WORD32 num_ref_l0,
+ WORD32 num_ref_l1,
+ recon_pic_buf_t **pps_rec_list_l0,
+ recon_pic_buf_t **pps_rec_list_l1,
+ WORD32 i4_thrd_id);
+
+void ihevce_derive_me_init_prms(
+ ihevce_static_cfg_params_t *ps_init_prms,
+ hme_init_prms_t *ps_hme_init_prms,
+ S32 i4_num_proc_thrds,
+ WORD32 i4_resolution_id);
+
+void ihevc_me_update_ref_desc(
+ hme_ref_desc_t *ps_ref_desc,
+ recon_pic_buf_t *ps_recon_pic,
+ WORD32 ref_id_l0,
+ WORD32 ref_id_l1,
+ WORD32 ref_id_lc,
+ WORD32 is_fwd);
+
+WORD32 ihevce_me_find_poc_in_list(
+ recon_pic_buf_t **pps_rec_list, WORD32 poc, WORD32 i4_idr_gop_num, WORD32 num_ref);
+
+void ihevce_me_create_ref_map(
+ recon_pic_buf_t **pps_rec_list_l0,
+ recon_pic_buf_t **pps_rec_list_l1,
+ WORD32 num_ref_l0_active,
+ WORD32 num_ref_l1_active,
+ WORD32 num_ref,
+ hme_ref_map_t *ps_ref_map);
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_l0_me_frame_end \endif
+*
+* \brief
+* End of frame update function performs
+* - Dynamic Search Range collation
+*
+* \param[in] pv_ctxt : pointer to ME module
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_l0_me_frame_end(
+ void *pv_me_ctxt, WORD32 i4_idx_dvsr_p, WORD32 i4_display_num, WORD32 i4_me_frm_id);
+
+#endif /* _IHEVCE_ME_PASS_H_ */
diff --git a/encoder/ihevce_memory_init.c b/encoder/ihevce_memory_init.c
new file mode 100644
index 0000000..3402003
--- /dev/null
+++ b/encoder/ihevce_memory_init.c
@@ -0,0 +1,3079 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+******************************************************************************
+* \file ihevce_memory_init.c
+*
+* \brief
+* This file contains functions which perform memory requirement gathering
+* and freeing of memories of encoder at the end
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+* List of Functions
+* <TODO: TO BE ADDED>
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_macros.h"
+#include "ihevc_debug.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+#include "ihevc_common_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_hle_interface.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_lap_interface.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_ipe_instr_set_router.h"
+#include "ihevce_decomp_pre_intra_structs.h"
+#include "ihevce_decomp_pre_intra_pass.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_nbr_avail.h"
+#include "ihevce_enc_loop_utils.h"
+#include "ihevce_sub_pic_rc.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_bs_compute_ctb.h"
+#include "ihevce_cabac_rdo.h"
+#include "ihevce_deblk.h"
+#include "ihevce_entropy_interface.h"
+#include "ihevce_frame_process.h"
+#include "ihevce_ipe_pass.h"
+#include "ihevce_rc_enc_structs.h"
+#include "ihevce_rc_interface.h"
+#include "hme_datatype.h"
+#include "hme_interface.h"
+#include "hme_common_defs.h"
+#include "hme_defs.h"
+#include "ihevce_me_instr_set_router.h"
+#include "ihevce_enc_subpel_gen.h"
+#include "ihevce_inter_pred.h"
+#include "ihevce_mv_pred.h"
+#include "ihevce_mv_pred_merge.h"
+#include "ihevce_enc_loop_inter_mode_sifter.h"
+#include "ihevce_me_pass.h"
+#include "ihevce_coarse_me_pass.h"
+#include "ihevce_enc_cu_recursion.h"
+#include "ihevce_enc_loop_pass.h"
+#include "ihevce_common_utils.h"
+#include "ihevce_buffer_que_interface.h"
+#include "ihevce_dep_mngr_interface.h"
+#include "ihevce_sao.h"
+#include "ihevce_tile_interface.h"
+
+#include "cast_types.h"
+#include "osal.h"
+#include "osal_defaults.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_mem_manager_init \endif
+*
+* \brief
+* Encoder Memory init function
+*
+* \param[in] Processing interface context pointer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+#define MAX_QUEUE 40
+void ihevce_mem_manager_init(enc_ctxt_t *ps_enc_ctxt, ihevce_hle_ctxt_t *ps_intrf_ctxt)
+{
+ /* local variables */
+ WORD32 total_memtabs_req = 0;
+ WORD32 total_memtabs_used = 0;
+ WORD32 total_system_memtabs = 0;
+ WORD32 ctr;
+ WORD32 buf_size;
+ WORD32 num_ctb_horz;
+ WORD32 num_ctb_vert;
+ WORD32 num_cu_in_ctb;
+ WORD32 num_pu_in_ctb;
+ WORD32 num_tu_in_ctb;
+ WORD32 ctb_size;
+ WORD32 min_cu_size;
+ WORD32 max_num_ref_pics;
+ WORD32 mem_alloc_ctrl_flag;
+ WORD32 space_for_mem_in_enc_grp = 0;
+ WORD32 space_for_mem_in_pre_enc_grp = 0;
+ WORD32 mv_bank_size;
+ WORD32 ref_idx_bank_size;
+ WORD32 a_wd[MAX_NUM_HME_LAYERS], a_ht[MAX_NUM_HME_LAYERS];
+ WORD32 a_disp_wd[MAX_NUM_HME_LAYERS], a_disp_ht[MAX_NUM_HME_LAYERS];
+ WORD32 a_ctb_align_wd[MAX_NUM_HME_LAYERS], a_ctb_align_ht[MAX_NUM_HME_LAYERS];
+ WORD32 n_enc_layers = 1, n_tot_layers;
+ WORD32 num_bufs_preenc_me_que, num_bufs_L0_ipe_enc;
+ WORD32 i, i4_resolution_id = ps_enc_ctxt->i4_resolution_id; //counter
+ WORD32 i4_num_bitrate_inst;
+ iv_mem_rec_t *ps_memtab;
+ WORD32 i4_field_pic, i4_total_queues = 0;
+
+ recon_pic_buf_t **pps_pre_enc_pic_bufs;
+ frm_proc_ent_cod_ctxt_t **pps_frm_proc_ent_cod_bufs[IHEVCE_MAX_NUM_BITRATES];
+ pre_enc_me_ctxt_t **pps_pre_enc_bufs;
+ me_enc_rdopt_ctxt_t **pps_me_enc_bufs;
+ pre_enc_L0_ipe_encloop_ctxt_t **pps_L0_ipe_enc_bufs;
+ /*get number of input buffer required based on requirement from each stage*/
+ ihevce_lap_enc_buf_t **pps_lap_enc_input_bufs;
+ WORD32 i4_num_enc_loop_frm_pllel;
+ WORD32 i4_num_me_frm_pllel;
+ /*msr: These are parameters required to allocate input buffer,
+ encoder needs to be initilized before getting requirements hence filled once static params are initilized*/
+ WORD32 num_input_buf_per_queue, i4_yuv_min_size, i4_luma_min_size;
+
+ i4_num_bitrate_inst = ps_enc_ctxt->i4_num_bitrates;
+ i4_field_pic = ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_field_pic;
+ ps_intrf_ctxt->i4_gpu_mem_size = 0;
+
+ /*Initialize the thrd id flag and all deafult values for sub pic rc */
+ {
+ WORD32 i, j, k;
+
+ for(i = 0; i < MAX_NUM_ENC_LOOP_PARALLEL; i++)
+ {
+ for(j = 0; j < IHEVCE_MAX_NUM_BITRATES; j++)
+ {
+ ps_enc_ctxt->s_multi_thrd.ai4_acc_ctb_ctr[i][j] = 0;
+ ps_enc_ctxt->s_multi_thrd.ai4_ctb_ctr[i][j] = 0;
+
+ ps_enc_ctxt->s_multi_thrd.ai4_threshold_reached[i][j] = 0;
+
+ ps_enc_ctxt->s_multi_thrd.ai4_curr_qp_acc[i][j] = 0;
+
+ ps_enc_ctxt->s_multi_thrd.af_acc_hdr_bits_scale_err[i][j] = 0;
+
+ for(k = 0; k < MAX_NUM_FRM_PROC_THRDS_ENC; k++)
+ {
+ ps_enc_ctxt->s_multi_thrd.ai4_thrd_id_valid_flag[i][j][k] = -1;
+ }
+ }
+ }
+ }
+
+#define ENABLE_FRM_PARALLEL
+#ifdef ENABLE_FRM_PARALLEL
+ i4_num_enc_loop_frm_pllel = MAX_NUM_ENC_LOOP_PARALLEL;
+ i4_num_me_frm_pllel = MAX_NUM_ME_PARALLEL;
+#else
+ i4_num_enc_loop_frm_pllel = 1;
+ i4_num_me_frm_pllel = 1;
+#endif
+
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_loop_frm_pllel = i4_num_enc_loop_frm_pllel;
+ ps_enc_ctxt->i4_max_fr_enc_loop_parallel_rc = i4_num_enc_loop_frm_pllel;
+ ps_enc_ctxt->s_multi_thrd.i4_num_me_frm_pllel = i4_num_me_frm_pllel;
+ ps_enc_ctxt->s_multi_thrd.i4_force_end_flag = 0;
+
+ ps_enc_ctxt->i4_ref_mbr_id = 0;
+ /* get the ctb size from max cu size */
+ ctb_size = ps_enc_ctxt->ps_stat_prms->s_config_prms.i4_max_log2_cu_size;
+
+ /* get the min cu size from config params */
+ min_cu_size = ps_enc_ctxt->ps_stat_prms->s_config_prms.i4_min_log2_cu_size;
+
+ /* convert to actual width */
+ ctb_size = 1 << ctb_size;
+ min_cu_size = 1 << min_cu_size;
+
+ /* Get the width and heights of different decomp layers */
+ *a_wd =
+ ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[ps_enc_ctxt->i4_resolution_id]
+ .i4_width +
+ SET_CTB_ALIGN(
+ ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[ps_enc_ctxt->i4_resolution_id]
+ .i4_width,
+ min_cu_size);
+ *a_ht =
+ ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[ps_enc_ctxt->i4_resolution_id]
+ .i4_height +
+ SET_CTB_ALIGN(
+ ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[ps_enc_ctxt->i4_resolution_id]
+ .i4_height,
+ min_cu_size);
+
+ n_tot_layers = hme_derive_num_layers(n_enc_layers, a_wd, a_ht, a_disp_wd, a_disp_ht);
+ hme_coarse_get_layer1_mv_bank_ref_idx_size(
+ n_tot_layers,
+ a_wd,
+ a_ht,
+ ((ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_max_reference_frames == -1)
+ ? ((DEFAULT_MAX_REFERENCE_PICS) << i4_field_pic)
+ : ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_max_reference_frames),
+ (S32 *)(&mv_bank_size),
+ (S32 *)(&ref_idx_bank_size));
+ if(n_tot_layers < 3)
+ {
+ WORD32 error_code;
+ error_code = IHEVCE_NUM_DECOMP_LYRS_NOT_SUPPORTED;
+ ps_intrf_ctxt->i4_error_code = IHEVCE_SETUNSUPPORTEDINPUT(error_code);
+ return;
+ }
+
+ /* calculate num cu,pu,tu in ctb */
+ num_cu_in_ctb = ctb_size / MIN_CU_SIZE;
+ num_cu_in_ctb *= num_cu_in_ctb;
+
+ num_pu_in_ctb = ctb_size / MIN_PU_SIZE;
+ num_pu_in_ctb *= num_pu_in_ctb;
+
+ num_tu_in_ctb = ctb_size / MIN_PU_SIZE;
+ num_tu_in_ctb *= num_tu_in_ctb;
+
+ /* calcuate the number of ctb horizontally*/
+ num_ctb_horz =
+ ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[ps_enc_ctxt->i4_resolution_id]
+ .i4_width +
+ SET_CTB_ALIGN(
+ ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[ps_enc_ctxt->i4_resolution_id]
+ .i4_width,
+ ctb_size);
+ num_ctb_horz = num_ctb_horz / ctb_size;
+
+ /* calcuate the number of ctb vertically*/
+ num_ctb_vert =
+ ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[ps_enc_ctxt->i4_resolution_id]
+ .i4_height +
+ SET_CTB_ALIGN(
+ ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[ps_enc_ctxt->i4_resolution_id]
+ .i4_height,
+ ctb_size);
+ num_ctb_vert = num_ctb_vert / ctb_size;
+
+ /* align all the decomp layer dimensions to CTB alignment */
+ for(ctr = 0; ctr < n_tot_layers; ctr++)
+ {
+ a_ctb_align_wd[ctr] = a_wd[ctr] + SET_CTB_ALIGN(a_wd[ctr], ctb_size);
+
+ a_ctb_align_ht[ctr] = a_ht[ctr] + SET_CTB_ALIGN(a_ht[ctr], ctb_size);
+ }
+
+ /* SEI related parametert initialization */
+
+ ps_enc_ctxt->u4_cur_pic_encode_cnt = 0;
+
+ /* store the frame level ctb parameters which will be constant for the session */
+ ps_enc_ctxt->s_frm_ctb_prms.i4_ctb_size = ctb_size;
+ ps_enc_ctxt->s_frm_ctb_prms.i4_min_cu_size = min_cu_size;
+ ps_enc_ctxt->s_frm_ctb_prms.i4_num_cus_in_ctb = num_cu_in_ctb;
+ ps_enc_ctxt->s_frm_ctb_prms.i4_num_pus_in_ctb = num_pu_in_ctb;
+ ps_enc_ctxt->s_frm_ctb_prms.i4_num_tus_in_ctb = num_tu_in_ctb;
+
+ /* intialize cra poc to default value */
+ ps_enc_ctxt->i4_cra_poc = 0;
+
+ /* initialise the memory alloc control flag */
+ mem_alloc_ctrl_flag = ps_enc_ctxt->ps_stat_prms->s_multi_thrd_prms.i4_memory_alloc_ctrl_flag;
+
+ /* decide the memory space for enc_grp and pre_enc_grp based on control flag */
+ if(0 == mem_alloc_ctrl_flag)
+ {
+ /* normal memory */
+ space_for_mem_in_enc_grp = IV_EXT_CACHEABLE_NORMAL_MEM;
+ space_for_mem_in_pre_enc_grp = IV_EXT_CACHEABLE_NORMAL_MEM;
+ }
+ else if(1 == mem_alloc_ctrl_flag)
+ {
+ /* only NUMA Node 0 memory allocation */
+ space_for_mem_in_enc_grp = IV_EXT_CACHEABLE_NUMA_NODE0_MEM;
+ space_for_mem_in_pre_enc_grp = IV_EXT_CACHEABLE_NUMA_NODE0_MEM;
+ }
+ else if(2 == mem_alloc_ctrl_flag)
+ {
+ /* Both NUMA Node 0 & Node 1 memory allocation */
+ space_for_mem_in_enc_grp = IV_EXT_CACHEABLE_NUMA_NODE0_MEM;
+ space_for_mem_in_pre_enc_grp = IV_EXT_CACHEABLE_NUMA_NODE1_MEM;
+ }
+ else
+ {
+ /* should not enter here */
+ ASSERT(0);
+ }
+
+ {
+ if(ps_enc_ctxt->s_multi_thrd.i4_num_enc_loop_frm_pllel > 1)
+ {
+ num_bufs_preenc_me_que = MIN_L1_L0_STAGGER_NON_SEQ +
+ ps_enc_ctxt->ps_stat_prms->s_lap_prms.i4_rc_look_ahead_pics +
+ (MAX_L0_IPE_ENC_STAGGER - 1) + NUM_BUFS_DECOMP_HME;
+ }
+ else
+ {
+ num_bufs_preenc_me_que = MIN_L1_L0_STAGGER_NON_SEQ +
+ ps_enc_ctxt->ps_stat_prms->s_lap_prms.i4_rc_look_ahead_pics +
+ (MIN_L0_IPE_ENC_STAGGER - 1) + NUM_BUFS_DECOMP_HME;
+ }
+
+ /*The number of buffers to support stagger between L0 IPE, ME and enc loop. This is a separate queue to store L0 IPE
+ output to save memory since this is not used in L1 stage*/
+ if(ps_enc_ctxt->s_multi_thrd.i4_num_enc_loop_frm_pllel > 1)
+ {
+ num_bufs_L0_ipe_enc = MAX_L0_IPE_ENC_STAGGER;
+ }
+ else
+ {
+ num_bufs_L0_ipe_enc = MIN_L0_IPE_ENC_STAGGER;
+ }
+ }
+
+ /* ------------ popluate the lap static parameters ------------- */
+ ps_enc_ctxt->s_lap_stat_prms.i4_max_closed_gop_period =
+ ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_max_closed_gop_period;
+
+ ps_enc_ctxt->s_lap_stat_prms.i4_min_closed_gop_period =
+ ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_min_closed_gop_period;
+
+ ps_enc_ctxt->s_lap_stat_prms.i4_max_cra_open_gop_period =
+ ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_max_cra_open_gop_period;
+
+ ps_enc_ctxt->s_lap_stat_prms.i4_max_i_open_gop_period =
+ ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_max_i_open_gop_period;
+
+ ps_enc_ctxt->s_lap_stat_prms.i4_max_reference_frames =
+ ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_max_reference_frames;
+
+ ps_enc_ctxt->s_lap_stat_prms.i4_max_temporal_layers =
+ ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_max_temporal_layers;
+
+ ps_enc_ctxt->s_lap_stat_prms.i4_width = ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_width;
+
+ ps_enc_ctxt->s_lap_stat_prms.i4_height = ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_height;
+
+ ps_enc_ctxt->s_lap_stat_prms.i4_enable_logo = ps_enc_ctxt->ps_stat_prms->i4_enable_logo;
+
+ ps_enc_ctxt->s_lap_stat_prms.i4_src_interlace_field =
+ ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_field_pic;
+ ps_enc_ctxt->s_lap_stat_prms.i4_frame_rate =
+ ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_frm_rate_num /
+ ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_frm_rate_denom;
+
+ ps_enc_ctxt->s_lap_stat_prms.i4_blu_ray_spec = ps_enc_ctxt->i4_blu_ray_spec;
+
+ ps_enc_ctxt->s_lap_stat_prms.i4_internal_bit_depth =
+ ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.i4_internal_bit_depth;
+
+ ps_enc_ctxt->s_lap_stat_prms.i4_input_bit_depth =
+ ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_input_bit_depth;
+
+ ps_enc_ctxt->s_lap_stat_prms.u1_chroma_array_type =
+ (ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_chr_format == IV_YUV_422SP_UV) ? 2 : 1;
+
+ ps_enc_ctxt->s_lap_stat_prms.i4_rc_pass_num = ps_enc_ctxt->ps_stat_prms->s_pass_prms.i4_pass;
+
+ if(0 == i4_resolution_id)
+ {
+ for(ctr = 0; ctr < ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.i4_num_res_layers; ctr++)
+ {
+ ps_enc_ctxt->s_lap_stat_prms.ai4_quality_preset[ctr] =
+ ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[ctr].i4_quality_preset;
+
+ if(ps_enc_ctxt->s_lap_stat_prms.ai4_quality_preset[ctr] == IHEVCE_QUALITY_P7)
+ {
+ ps_enc_ctxt->s_lap_stat_prms.ai4_quality_preset[ctr] = IHEVCE_QUALITY_P6;
+ }
+ }
+ }
+ memcpy(
+ &ps_enc_ctxt->s_lap_stat_prms.s_lap_params,
+ &ps_enc_ctxt->ps_stat_prms->s_lap_prms,
+ sizeof(ihevce_lap_params_t));
+
+ /* copy the create prms as runtime prms */
+ memcpy(
+ &ps_enc_ctxt->s_runtime_src_prms,
+ &ps_enc_ctxt->ps_stat_prms->s_src_prms,
+ sizeof(ihevce_src_params_t));
+ /*Copy the target params*/
+ memcpy(
+ &ps_enc_ctxt->s_runtime_tgt_params,
+ &ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id],
+ sizeof(ihevce_tgt_params_t));
+ ps_enc_ctxt->s_lap_stat_prms.e_arch_type = ps_enc_ctxt->ps_stat_prms->e_arch_type;
+ ps_enc_ctxt->s_lap_stat_prms.u1_is_popcnt_available = ps_enc_ctxt->u1_is_popcnt_available;
+
+ /* copy the create prms as runtime prms */
+ memcpy(
+ &ps_enc_ctxt->s_runtime_src_prms,
+ &ps_enc_ctxt->ps_stat_prms->s_src_prms,
+ sizeof(ihevce_src_params_t));
+ /*Copy the target params*/
+ memcpy(
+ &ps_enc_ctxt->s_runtime_tgt_params,
+ &ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id],
+ sizeof(ihevce_tgt_params_t));
+
+ /* copy the run time coding parameters */
+ memcpy(
+ &ps_enc_ctxt->s_runtime_coding_prms,
+ &ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms,
+ sizeof(ihevce_coding_params_t));
+ /*change in run time parameter*/
+ if(ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_max_reference_frames == -1)
+ {
+ ps_enc_ctxt->s_runtime_coding_prms.i4_max_reference_frames = (DEFAULT_MAX_REFERENCE_PICS)
+ << i4_field_pic;
+ ps_enc_ctxt->s_lap_stat_prms.i4_max_reference_frames =
+ ps_enc_ctxt->s_runtime_coding_prms.i4_max_reference_frames;
+ }
+ ASSERT(i4_num_enc_loop_frm_pllel == i4_num_me_frm_pllel);
+
+ if((1 == i4_num_enc_loop_frm_pllel) && (1 == i4_num_me_frm_pllel))
+ {
+ max_num_ref_pics = ps_enc_ctxt->s_runtime_coding_prms.i4_max_reference_frames;
+ }
+ else
+ {
+ max_num_ref_pics =
+ ps_enc_ctxt->s_runtime_coding_prms.i4_max_reference_frames * i4_num_enc_loop_frm_pllel;
+ }
+ /* --------------------------------------------------------------------- */
+ /* -------------- Collating the number of memtabs required ------------ */
+ /* --------------------------------------------------------------------- */
+
+ /* Memtabs for syntactical tiles */
+ total_memtabs_req += ihevce_tiles_get_num_mem_recs();
+
+ /* ---------- Enc loop Memtabs --------- */
+ total_memtabs_req +=
+ ihevce_enc_loop_get_num_mem_recs(i4_num_bitrate_inst, i4_num_enc_loop_frm_pllel);
+ /* ---------- ME Memtabs --------------- */
+ total_memtabs_req += ihevce_me_get_num_mem_recs(i4_num_me_frm_pllel);
+
+ /* ---------- Coarse ME Memtabs --------------- */
+ total_memtabs_req += ihevce_coarse_me_get_num_mem_recs();
+ /* ---------- IPE Memtabs -------------- */
+ total_memtabs_req += ihevce_ipe_get_num_mem_recs();
+
+ /* ---------- ECD Memtabs -------------- */
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ total_memtabs_req += ihevce_entropy_get_num_mem_recs();
+ }
+ if(0 == ps_enc_ctxt->i4_resolution_id)
+ {
+ /* ---------- LAP Memtabs--------------- */
+ total_memtabs_req += ihevce_lap_get_num_mem_recs();
+ }
+ /* ---------- Decomp Pre Intra Memtabs--------------- */
+ total_memtabs_req += ihevce_decomp_pre_intra_get_num_mem_recs();
+
+ /* ---------- RC memtabs --------------- */
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ total_memtabs_req += ihevce_rc_get_num_mem_recs(); /*HEVC_RC*/
+ }
+
+ /* ---------- System Memtabs ----------- */
+ total_memtabs_req += TOTAL_SYSTEM_MEM_RECS; //increment this based on final requirement
+
+ /* -----Frameproc Entcod Que Memtabs --- */
+ /* one queue for each bit-rate is used */
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ total_memtabs_req += ihevce_buff_que_get_num_mem_recs();
+ }
+ /* mrs:memtab for one queue for encoder owned input queue, This is only request for memtab, currently more than
+ required memtabs are allocated. Hence my change of using memtab for yuv buffers is surviving. Only memtab
+ usage and initialization needs to be exact sync*/
+ total_memtabs_req += ihevce_buff_que_get_num_mem_recs();
+
+ /* ---Pre-encode Encode Que Mem requests -- */
+ total_memtabs_req += ihevce_buff_que_get_num_mem_recs();
+
+ /* -----ME / Enc-RD opt Que Mem requests --- */
+ total_memtabs_req += ihevce_buff_que_get_num_mem_recs();
+
+ /* ----Pre-encode L0 IPE to enc Que Mem requests -- */
+ total_memtabs_req += ihevce_buff_que_get_num_mem_recs();
+
+ /* --- ME-EncLoop Dep Mngr Row-Row Mem requests -- */
+ total_memtabs_req += NUM_ME_ENC_BUFS * ihevce_dmgr_get_num_mem_recs();
+
+ /* --- Prev. frame EncLoop Done Dep Mngr Frm-Frm Mem requests -- */
+ total_memtabs_req += i4_num_enc_loop_frm_pllel * ihevce_dmgr_get_num_mem_recs();
+
+ /* --- Prev. frame EncLoop Done for re-encode Dep Mngr Frm-Frm Mem requests -- */
+ total_memtabs_req += ihevce_dmgr_get_num_mem_recs();
+
+ /* --- Prev. frame ME Done Dep Mngr Frm-Frm Mem requests -- */
+ total_memtabs_req += i4_num_me_frm_pllel * ihevce_dmgr_get_num_mem_recs();
+
+ /* --- Prev. frame PreEnc L1 Done Dep Mngr Frm-Frm Mem requests -- */
+ total_memtabs_req += ihevce_dmgr_get_num_mem_recs();
+
+ /* --- Prev. frame PreEnc HME Done Dep Mngr Frm-Frm Mem requests -- */
+ total_memtabs_req += ihevce_dmgr_get_num_mem_recs();
+
+ /* --- Prev. frame PreEnc L0 Done Dep Mngr Frm-Frm Mem requests -- */
+ total_memtabs_req += ihevce_dmgr_get_num_mem_recs();
+
+ /* --- ME-Prev Recon Dep Mngr Row-Frm Mem requests -- */
+ total_memtabs_req +=
+ (max_num_ref_pics + 1 + NUM_EXTRA_RECON_BUFS) * ihevce_dmgr_get_num_mem_recs();
+
+ /* ----- allocate memomry for memtabs --- */
+ {
+ iv_mem_rec_t s_memtab;
+
+ s_memtab.i4_size = sizeof(iv_mem_rec_t);
+ s_memtab.i4_mem_size = total_memtabs_req * sizeof(iv_mem_rec_t);
+ s_memtab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+ s_memtab.i4_mem_alignment = 4;
+
+ ps_intrf_ctxt->ihevce_mem_alloc(
+ ps_intrf_ctxt->pv_mem_mgr_hdl, &ps_enc_ctxt->ps_stat_prms->s_sys_api, &s_memtab);
+ if(s_memtab.pv_base == NULL)
+ {
+ ps_intrf_ctxt->i4_error_code = IHEVCE_CANNOT_ALLOCATE_MEMORY;
+ return;
+ }
+
+ ps_memtab = (iv_mem_rec_t *)s_memtab.pv_base;
+ }
+
+ /* --------------------------------------------------------------------- */
+ /* ------------------ Collating memory requirements ------------------- */
+ /* --------------------------------------------------------------------- */
+
+ /* ----------- Tiles mem requests -------------*/
+ total_memtabs_used += ihevce_tiles_get_mem_recs(
+ &ps_memtab[total_memtabs_used],
+ ps_enc_ctxt->ps_stat_prms,
+ &ps_enc_ctxt->s_frm_ctb_prms,
+ i4_resolution_id,
+ space_for_mem_in_enc_grp);
+
+ /* ---------- Enc loop Mem requests --------- */
+ total_memtabs_used += ihevce_enc_loop_get_mem_recs(
+ &ps_memtab[total_memtabs_used],
+ ps_enc_ctxt->ps_stat_prms,
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds,
+ i4_num_bitrate_inst,
+ i4_num_enc_loop_frm_pllel,
+ space_for_mem_in_enc_grp,
+ i4_resolution_id);
+ /* ---------- ME Mem requests --------------- */
+ total_memtabs_used += ihevce_me_get_mem_recs(
+ &ps_memtab[total_memtabs_used],
+ ps_enc_ctxt->ps_stat_prms,
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds,
+ space_for_mem_in_enc_grp,
+ i4_resolution_id,
+ i4_num_me_frm_pllel);
+
+ /* ---------- Coarse ME Mem requests --------------- */
+ total_memtabs_used += ihevce_coarse_me_get_mem_recs(
+ &ps_memtab[total_memtabs_used],
+ ps_enc_ctxt->ps_stat_prms,
+ ps_enc_ctxt->s_multi_thrd.i4_num_pre_enc_proc_thrds,
+ space_for_mem_in_pre_enc_grp,
+ i4_resolution_id);
+ /* ---------- IPE Mem requests -------------- */
+ total_memtabs_used += ihevce_ipe_get_mem_recs(
+ &ps_memtab[total_memtabs_used],
+ ps_enc_ctxt->s_multi_thrd.i4_num_pre_enc_proc_thrds,
+ space_for_mem_in_pre_enc_grp);
+ /* ---------- ECD Mem requests -------------- */
+ i4_num_bitrate_inst = ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id]
+ .i4_num_bitrate_instances;
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ total_memtabs_used += ihevce_entropy_get_mem_recs(
+ &ps_memtab[total_memtabs_used],
+ ps_enc_ctxt->ps_stat_prms,
+ space_for_mem_in_pre_enc_grp,
+ i4_resolution_id);
+ }
+
+ if(0 == i4_resolution_id)
+ {
+ /* ---------- LAP Mem requests--------------- */
+ total_memtabs_used +=
+ ihevce_lap_get_mem_recs(&ps_memtab[total_memtabs_used], space_for_mem_in_pre_enc_grp);
+ }
+
+ /* -------- DECOMPOSITION PRE INTRA Mem requests-------- */
+ total_memtabs_used += ihevce_decomp_pre_intra_get_mem_recs(
+ &ps_memtab[total_memtabs_used],
+ ps_enc_ctxt->s_multi_thrd.i4_num_pre_enc_proc_thrds,
+ space_for_mem_in_pre_enc_grp);
+
+ /* ---------- RC Mem requests --------------- */
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ total_memtabs_used += ihevce_rc_get_mem_recs(
+ &ps_memtab[total_memtabs_used],
+ ps_enc_ctxt->ps_stat_prms,
+ space_for_mem_in_pre_enc_grp,
+ &ps_enc_ctxt->ps_stat_prms->s_sys_api);
+ }
+
+ /* ---------- System Mem requests ----------- */
+
+ /* allocate memory for pps tile */
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ if(1 == ps_enc_ctxt->ps_stat_prms->s_app_tile_params.i4_tiles_enabled_flag)
+ {
+ ps_memtab[total_memtabs_used].i4_mem_size =
+ (ps_enc_ctxt->ps_stat_prms->s_app_tile_params.i4_num_tile_cols *
+ ps_enc_ctxt->ps_stat_prms->s_app_tile_params.i4_num_tile_rows) *
+ (sizeof(tile_t));
+ }
+ else
+ {
+ ps_memtab[total_memtabs_used].i4_mem_size = sizeof(tile_t);
+ }
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* recon picture buffer pointer array */
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size =
+ (max_num_ref_pics + 1 + NUM_EXTRA_RECON_BUFS) * (sizeof(recon_pic_buf_t *));
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+ }
+
+ /* recon picture buffers structures */
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size =
+ (max_num_ref_pics + 1 + NUM_EXTRA_RECON_BUFS) * (sizeof(recon_pic_buf_t));
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+ }
+
+ /* reference/recon picture buffers */
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ WORD32 i4_chroma_buf_size_shift =
+ -(ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.i4_internal_bit_depth <= 8) +
+ (ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_chr_format == IV_YUV_422SP_UV);
+
+ buf_size = ((num_ctb_horz * ctb_size) + (PAD_HORZ << 1));
+ buf_size = buf_size * ((num_ctb_vert * ctb_size) + (PAD_VERT << 1));
+ buf_size = buf_size * (max_num_ref_pics + 1 + NUM_EXTRA_RECON_BUFS);
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ /* If HBD, both 8bit and 16 bit luma buffers are required, whereas only 16bit chroma buffers are required */
+ ps_memtab[total_memtabs_used].i4_mem_size =
+ /* Luma */
+ (buf_size * ((ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.i4_internal_bit_depth > 8)
+ ? BUFFER_SIZE_MULTIPLIER_IF_HBD
+ : 1)) +
+ /* Chroma */
+ (SHL_NEG(buf_size, i4_chroma_buf_size_shift));
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+ }
+ /* reference/recon picture subpel planes */
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size =
+ buf_size * (3 + L0ME_IN_OPENLOOP_MODE); /* 3 planes */
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+ /* reference colocated MV bank */
+ /* Keep memory for an extra CTB at the right and bottom of frame.
+ This extra space is needed by dist-encoding and unused in non-dist-encoding */
+ buf_size = (num_ctb_horz + 1) * (num_ctb_vert + 1) * num_pu_in_ctb;
+ buf_size = buf_size * sizeof(pu_col_mv_t) * (max_num_ref_pics + 1 + NUM_EXTRA_RECON_BUFS) *
+ i4_num_bitrate_inst;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = buf_size;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* reference colocated MV bank map */
+ /* Keep memory for an extra CTB at the right and bottom of frame.
+ This extra space is needed by dist-encoding and unused in non-dist-encoding */
+ buf_size = (num_ctb_horz + 1) * (num_ctb_vert + 1) * num_pu_in_ctb;
+ buf_size = buf_size * sizeof(UWORD8) * (max_num_ref_pics + 1 + NUM_EXTRA_RECON_BUFS) *
+ i4_num_bitrate_inst;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = buf_size;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* reference collocated MV bank map offsets map */
+ buf_size = num_ctb_horz * num_ctb_vert;
+ buf_size = buf_size * sizeof(UWORD16) * (max_num_ref_pics + 1 + NUM_EXTRA_RECON_BUFS) *
+ i4_num_bitrate_inst;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = buf_size;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* reference colocated MV bank ctb offset */
+ buf_size = num_ctb_horz;
+ buf_size = buf_size * num_ctb_vert;
+ buf_size = buf_size * sizeof(UWORD32) * (max_num_ref_pics + 1 + NUM_EXTRA_RECON_BUFS) *
+ i4_num_bitrate_inst;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = buf_size;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* recon picture buffer pointer array for pre enc group */
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size =
+ (max_num_ref_pics + 1) * (sizeof(recon_pic_buf_t *));
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* recon picture buffers structures for pre enc group */
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = (max_num_ref_pics + 1) * (sizeof(recon_pic_buf_t));
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+ {
+ num_input_buf_per_queue = ihevce_lap_get_num_ip_bufs(&ps_enc_ctxt->s_lap_stat_prms);
+ {
+ WORD32 i4_count_temp = 0, i4_last_queue_length;
+
+ /*First allocate the memory for the buffer based on resolution*/
+ WORD32 ctb_align_pic_wd = ps_enc_ctxt->s_runtime_tgt_params.i4_width +
+ SET_CTB_ALIGN(
+ ps_enc_ctxt->s_runtime_tgt_params.i4_width,
+ ps_enc_ctxt->s_frm_ctb_prms.i4_ctb_size);
+
+ WORD32 ctb_align_pic_ht = ps_enc_ctxt->s_runtime_tgt_params.i4_height +
+ SET_CTB_ALIGN(
+ ps_enc_ctxt->s_runtime_tgt_params.i4_height,
+ ps_enc_ctxt->s_frm_ctb_prms.i4_ctb_size);
+
+ i4_last_queue_length = (num_input_buf_per_queue % MAX_QUEUE);
+
+ if((num_input_buf_per_queue % MAX_QUEUE) == 0)
+ i4_last_queue_length = MAX_QUEUE;
+
+ ps_enc_ctxt->i4_num_input_buf_per_queue = num_input_buf_per_queue;
+ i4_yuv_min_size =
+ (ctb_align_pic_wd * ctb_align_pic_ht) +
+ ((ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_chr_format == IV_YUV_422SP_UV)
+ ? (ctb_align_pic_wd * ctb_align_pic_ht)
+ : ((ctb_align_pic_wd * ctb_align_pic_ht) >> 1));
+ i4_luma_min_size = (ctb_align_pic_wd * ctb_align_pic_ht);
+
+ /*Inorder to allocate memory for the large buffer sizes overflowing WORD32 we are splitting the memtabs using i4_total_hbd_queues and MAX_HBD_QUEUE*/
+ i4_total_queues = num_input_buf_per_queue / MAX_QUEUE;
+
+ if((num_input_buf_per_queue % MAX_QUEUE) != 0)
+ {
+ i4_total_queues++;
+ }
+
+ ASSERT(i4_total_queues < 5);
+
+ for(i4_count_temp = 0; i4_count_temp < i4_total_queues; i4_count_temp++)
+ {
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 32;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+ /*Memory size for yuv buffer of one frame * num of input required to stored in the queue*/
+ if((i4_count_temp < (i4_total_queues - 1)))
+ ps_memtab[total_memtabs_used].i4_mem_size = i4_yuv_min_size * MAX_QUEUE;
+ else
+ ps_memtab[total_memtabs_used].i4_mem_size =
+ (i4_yuv_min_size)*i4_last_queue_length;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+ }
+ }
+ /*memory for input buffer structure*/
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size =
+ (num_input_buf_per_queue) * (sizeof(ihevce_lap_enc_buf_t *));
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* frame process/entropy coding buffer structures */
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size =
+ (num_input_buf_per_queue) * (sizeof(ihevce_lap_enc_buf_t));
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /*input synch ctrl command*/
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size =
+ (num_input_buf_per_queue) * (ENC_COMMAND_BUFF_SIZE);
+
+ total_memtabs_used++;
+ total_system_memtabs++;
+ }
+
+ /* Pre-encode/encode coding buffer pointer array */
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size =
+ (num_bufs_preenc_me_que) * (sizeof(pre_enc_me_ctxt_t *));
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* frame process/entropy coding buffer structures */
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size =
+ (num_bufs_preenc_me_que) * (sizeof(pre_enc_me_ctxt_t));
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* Pre-encode L0 IPE output to ME buffer pointer*/
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size =
+ (num_bufs_L0_ipe_enc) * (sizeof(pre_enc_L0_ipe_encloop_ctxt_t *));
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* Pre-encode L0 IPE output to ME buffer */
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size =
+ (num_bufs_L0_ipe_enc) * (sizeof(pre_enc_L0_ipe_encloop_ctxt_t));
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* CTB analyse Frame level */
+ buf_size = num_ctb_horz;
+ buf_size = buf_size * num_ctb_vert;
+ buf_size = buf_size * sizeof(ctb_analyse_t) * num_bufs_preenc_me_que;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = buf_size;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* ME layer ctxt pointer */
+ buf_size = sizeof(layer_ctxt_t) * num_bufs_preenc_me_que;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = buf_size;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* ME layer MV bank ctxt pointer */
+ buf_size = sizeof(layer_mv_t) * num_bufs_preenc_me_que;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = buf_size;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* ME layer MV bank pointer */
+ buf_size = mv_bank_size * num_bufs_preenc_me_que;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = buf_size;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* ME layer ref idx bank pointer */
+ buf_size = ref_idx_bank_size * num_bufs_preenc_me_que;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = buf_size;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+ /* Frame level array to store 8x8 intra cost */
+ buf_size = (num_ctb_horz * ctb_size) >> 3;
+ buf_size *= ((num_ctb_vert * ctb_size) >> 3);
+ buf_size *= sizeof(double) * num_bufs_preenc_me_que;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = buf_size;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* Frame level array to store ctb intra cost and modes */
+ buf_size = (num_ctb_horz * num_ctb_vert);
+ buf_size *= sizeof(ipe_l0_ctb_analyse_for_me_t) * num_bufs_L0_ipe_enc;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = buf_size;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+ /*
+ * L0 8x8 cur satd for qp mod
+ */
+ buf_size = (a_ctb_align_wd[0] >> 3) * (a_ctb_align_ht[0] >> 3) * sizeof(ihevce_8x8_L0_satd_t) *
+ num_bufs_preenc_me_que;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_pre_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = buf_size;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /*
+ * L0 8x8 cur mean for qp mod
+ */
+ buf_size = (a_ctb_align_wd[0] >> 3) * (a_ctb_align_ht[0] >> 3) * sizeof(ihevce_8x8_L0_mean_t) *
+ num_bufs_preenc_me_que;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_pre_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = buf_size;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /*
+ * Layer early decision buffer L1 buf.Since the pre intra analysis always
+ * expects memory for ihevce_ed_blk_t for complete ctbs, align the width and
+ * height in layer to mutiple of 32.
+ */
+ buf_size = (a_ctb_align_wd[1] >> 5) * (a_ctb_align_ht[1] >> 5) * sizeof(ihevce_ed_ctb_l1_t) *
+ num_bufs_preenc_me_que;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_pre_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = buf_size;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /*
+ * Layer early decision buffer L1 buf.Since the pre intra analysis always
+ * expects memory for ihevce_ed_blk_t for complete ctbs, align the width and
+ * height in layer to mutiple of 32.
+ */
+ buf_size = (a_ctb_align_wd[1] >> 2) * (a_ctb_align_ht[1] >> 2) * sizeof(ihevce_ed_blk_t) *
+ num_bufs_preenc_me_que;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_pre_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = buf_size;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /*
+ * Layer early decision buffer L2 buf.Since the pre intra analysis always
+ * expects memory for ihevce_ed_blk_t for complete ctbs, align the width and
+ * height in layer to mutiple of 16.
+ */
+ buf_size = (a_ctb_align_wd[2] >> 2) * (a_ctb_align_ht[2] >> 2) * sizeof(ihevce_ed_blk_t) *
+ num_bufs_preenc_me_que;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_pre_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = buf_size;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* following is the buffer requirement of
+ que between me and enc*/
+
+ /* me/enc que buffer pointer array */
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = (NUM_ME_ENC_BUFS) * (sizeof(me_enc_rdopt_ctxt_t *));
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* fme/enc que buffer structures */
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = (NUM_ME_ENC_BUFS) * (sizeof(me_enc_rdopt_ctxt_t));
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* Job Queue related memory */
+ /* max num ctb rows is doubled to take care worst case */
+ /* requirements because of HME layers */
+ buf_size = (MAX_NUM_VERT_UNITS_FRM) * (NUM_ENC_JOBS_QUES)*NUM_ME_ENC_BUFS; //PING_PONG_BUF;
+ /* In tile case, based on the number of column tiles,
+ we will have separate jobQ per column tile */
+ if(1 == ps_enc_ctxt->ps_stat_prms->s_app_tile_params.i4_tiles_enabled_flag)
+ {
+ buf_size *= ps_enc_ctxt->ps_stat_prms->s_app_tile_params.i4_num_tile_cols;
+ }
+ buf_size *= sizeof(job_queue_t);
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = buf_size;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* cur_ctb_cu_tree_t Frame level */
+ buf_size = num_ctb_horz * MAX_NUM_NODES_CU_TREE;
+ buf_size = buf_size * num_ctb_vert;
+
+ /* ps_cu_analyse_inter buffer is used to popualte outputs form ME after using cu analyse form IPE */
+ buf_size = buf_size * sizeof(cur_ctb_cu_tree_t) * NUM_ME_ENC_BUFS;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = buf_size;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* me_ctb_data_t Frame level */
+ buf_size = num_ctb_horz * num_ctb_vert;
+
+ /* This buffer is used to */
+ buf_size = buf_size * sizeof(me_ctb_data_t) * NUM_ME_ENC_BUFS;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = buf_size;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* following is for each bit-rate */
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ /* frame process/entropy coding buffer pointer array */
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size =
+ (NUM_FRMPROC_ENTCOD_BUFS) * (sizeof(frm_proc_ent_cod_ctxt_t *));
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* frame process/entropy coding buffer structures */
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size =
+ (NUM_FRMPROC_ENTCOD_BUFS) * (sizeof(frm_proc_ent_cod_ctxt_t));
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* CTB enc loop Frame level */
+ buf_size = num_ctb_horz;
+ buf_size = buf_size * num_ctb_vert;
+ buf_size = buf_size * sizeof(ctb_enc_loop_out_t) * NUM_FRMPROC_ENTCOD_BUFS;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = buf_size;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* CU enc loop Frame level */
+ buf_size = num_ctb_horz * num_cu_in_ctb;
+ buf_size = buf_size * num_ctb_vert;
+ buf_size = buf_size * sizeof(cu_enc_loop_out_t) * NUM_FRMPROC_ENTCOD_BUFS;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = buf_size;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* TU enc loop Frame level */
+ buf_size = num_ctb_horz * num_tu_in_ctb;
+ buf_size = buf_size * num_ctb_vert;
+ buf_size = buf_size * sizeof(tu_enc_loop_out_t) * NUM_FRMPROC_ENTCOD_BUFS;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = buf_size;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* PU enc loop Frame level */
+ buf_size = num_ctb_horz * num_pu_in_ctb;
+ buf_size = buf_size * num_ctb_vert;
+ buf_size = buf_size * sizeof(pu_t) * NUM_FRMPROC_ENTCOD_BUFS;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = buf_size;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* Coeffs Frame level */
+ buf_size =
+ num_ctb_horz * ((ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_chr_format == IV_YUV_422SP_UV)
+ ? (num_tu_in_ctb << 1)
+ : ((num_tu_in_ctb * 3) >> 1));
+ buf_size = buf_size * num_ctb_vert;
+ buf_size = buf_size * sizeof(UWORD8) * MAX_SCAN_COEFFS_BYTES_4x4 * NUM_FRMPROC_ENTCOD_BUFS;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = buf_size;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* SEI Payload Data */
+ buf_size = sizeof(UWORD8) * MAX_NUMBER_OF_SEI_PAYLOAD * MAX_SEI_PAYLOAD_PER_TLV *
+ NUM_FRMPROC_ENTCOD_BUFS;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = buf_size;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+ }
+
+ /* ------ Working mem frame level -------*/
+ buf_size = ((num_ctb_horz * ctb_size) + 16);
+ buf_size *= ((num_ctb_vert * ctb_size) + 23);
+ buf_size *= sizeof(WORD16);
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = buf_size;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+ /* Job Queue related memory */
+ /* max num ctb rows is doubled to take care worst case */
+ /* requirements because of HME layers */
+ buf_size = (MAX_NUM_VERT_UNITS_FRM) * (NUM_PRE_ENC_JOBS_QUES) *
+ (MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME); //PING_PONG_BUF;
+
+ buf_size *= sizeof(job_queue_t);
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = (IV_MEM_TYPE_T)space_for_mem_in_enc_grp;
+
+ ps_memtab[total_memtabs_used].i4_mem_size = buf_size;
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+ total_system_memtabs++;
+
+ /* check on the system memtabs */
+ ASSERT(total_system_memtabs <= TOTAL_SYSTEM_MEM_RECS);
+
+ /* -----Frameproc Entcod Que Mem requests --- */
+ /* derive for each bit-rate */
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ total_memtabs_used += ihevce_buff_que_get_mem_recs(
+ &ps_memtab[total_memtabs_used], NUM_FRMPROC_ENTCOD_BUFS, space_for_mem_in_enc_grp);
+ }
+ /*mrs: Request memory for the input yuv queue*/
+ total_memtabs_used += ihevce_buff_que_get_mem_recs(
+ &ps_memtab[total_memtabs_used], num_input_buf_per_queue, space_for_mem_in_enc_grp);
+ /*------ The encoder owned input buffer queue*/
+ /* -----Pre-encode Encode Que Mem requests --- */
+ total_memtabs_used += ihevce_buff_que_get_mem_recs(
+ &ps_memtab[total_memtabs_used], num_bufs_preenc_me_que, space_for_mem_in_enc_grp);
+
+ /* -----ME / Enc-RD opt Que Mem requests --- */
+ total_memtabs_used += ihevce_buff_que_get_mem_recs(
+ &ps_memtab[total_memtabs_used], NUM_ME_ENC_BUFS, space_for_mem_in_enc_grp);
+
+ /* -----Pre-encode L0 IPE to enc Que Mem requests --- */
+ total_memtabs_used += ihevce_buff_que_get_mem_recs(
+ &ps_memtab[total_memtabs_used], num_bufs_L0_ipe_enc, space_for_mem_in_enc_grp);
+
+ /* ---------- Dependency Manager allocations -------- */
+ {
+ /* --- ME-EncLoop Dep Mngr Row-Row Mem requests -- */
+ for(ctr = 0; ctr < NUM_ME_ENC_BUFS; ctr++)
+ {
+ total_memtabs_used += ihevce_dmgr_get_mem_recs(
+ &ps_memtab[total_memtabs_used],
+ DEP_MNGR_ROW_ROW_SYNC,
+ (a_ctb_align_ht[0] / ctb_size),
+ ps_enc_ctxt->ps_stat_prms->s_app_tile_params
+ .i4_num_tile_cols, /* Number of Col Tiles */
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds,
+ space_for_mem_in_enc_grp);
+ }
+
+ for(ctr = 0; ctr < i4_num_enc_loop_frm_pllel; ctr++)
+ {
+ /* --- Prev. frame EncLoop Done Dep Mngr Frm-Frm Mem requests -- */
+ total_memtabs_used += ihevce_dmgr_get_mem_recs(
+ &ps_memtab[total_memtabs_used],
+ DEP_MNGR_FRM_FRM_SYNC,
+ (a_ctb_align_ht[0] / ctb_size),
+ 1, /* Number of Col Tiles : Don't care for FRM_FRM */
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds,
+ space_for_mem_in_enc_grp);
+ }
+ /* --- Prev. frame EncLoop Done for re-encode Dep Mngr Frm-Frm Mem requests -- */
+ total_memtabs_used += ihevce_dmgr_get_mem_recs(
+ &ps_memtab[total_memtabs_used],
+ DEP_MNGR_FRM_FRM_SYNC,
+ (a_ctb_align_ht[0] / ctb_size),
+ 1, /* Number of Col Tiles : Don't care for FRM_FRM */
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds,
+ space_for_mem_in_enc_grp);
+ for(ctr = 0; ctr < i4_num_me_frm_pllel; ctr++)
+ {
+ /* --- Prev. frame ME Done Dep Mngr Frm-Frm Mem requests -- */
+ total_memtabs_used += ihevce_dmgr_get_mem_recs(
+ &ps_memtab[total_memtabs_used],
+ DEP_MNGR_FRM_FRM_SYNC,
+ (a_ctb_align_ht[0] / ctb_size),
+ 1, /* Number of Col Tiles : Don't care for FRM_FRM */
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds,
+ space_for_mem_in_enc_grp);
+ }
+
+ /* --- Prev. frame PreEnc L1 Done Dep Mngr Frm-Frm Mem requests -- */
+ total_memtabs_used += ihevce_dmgr_get_mem_recs(
+ &ps_memtab[total_memtabs_used],
+ DEP_MNGR_FRM_FRM_SYNC,
+ (a_ctb_align_ht[0] / ctb_size),
+ 1, /* Number of Col Tiles : Don't care for FRM_FRM */
+ ps_enc_ctxt->s_multi_thrd.i4_num_pre_enc_proc_thrds,
+ space_for_mem_in_enc_grp);
+
+ /* --- Prev. frame PreEnc HME Done Dep Mngr Frm-Frm Mem requests -- */
+ total_memtabs_used += ihevce_dmgr_get_mem_recs(
+ &ps_memtab[total_memtabs_used],
+ DEP_MNGR_FRM_FRM_SYNC,
+ (a_ctb_align_ht[0] / ctb_size),
+ 1, /* Number of Col Tiles : Don't care for FRM_FRM */
+ ps_enc_ctxt->s_multi_thrd.i4_num_pre_enc_proc_thrds,
+ space_for_mem_in_enc_grp);
+
+ /* --- Prev. frame PreEnc L0 Done Dep Mngr Frm-Frm Mem requests -- */
+ total_memtabs_used += ihevce_dmgr_get_mem_recs(
+ &ps_memtab[total_memtabs_used],
+ DEP_MNGR_FRM_FRM_SYNC,
+ (a_ctb_align_ht[0] / ctb_size),
+ 1, /* Number of Col Tiles : Don't care for FRM_FRM */
+ ps_enc_ctxt->s_multi_thrd.i4_num_pre_enc_proc_thrds,
+ space_for_mem_in_enc_grp);
+
+ /* --- ME-Prev Recon Dep Mngr Row-Frm Mem requests -- */
+ for(ctr = 0; ctr < (max_num_ref_pics + 1 + NUM_EXTRA_RECON_BUFS); ctr++)
+ {
+ WORD32 i4_num_units = num_ctb_horz * num_ctb_vert;
+
+ total_memtabs_used += ihevce_dmgr_map_get_mem_recs(
+ &ps_memtab[total_memtabs_used],
+ i4_num_units,
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds,
+ space_for_mem_in_enc_grp);
+ }
+ }
+
+ /* ----- allocate memory as per requests ---- */
+
+ /* check on memtabs requested v/s memtabs used */
+ //ittiam : should put an assert
+
+ //ASSERT(total_memtabs_used == total_memtabs_req);
+
+ for(ctr = 0; ctr < total_memtabs_used; ctr++)
+ {
+ UWORD8 *pu1_mem = NULL;
+ ps_intrf_ctxt->ihevce_mem_alloc(
+ ps_intrf_ctxt->pv_mem_mgr_hdl, &ps_enc_ctxt->ps_stat_prms->s_sys_api, &ps_memtab[ctr]);
+
+ pu1_mem = (UWORD8 *)ps_memtab[ctr].pv_base;
+
+ if(NULL == pu1_mem)
+ {
+ ps_intrf_ctxt->i4_error_code = IHEVCE_CANNOT_ALLOCATE_MEMORY;
+ return;
+ }
+
+ memset(pu1_mem, 0, ps_memtab[ctr].i4_mem_size);
+ }
+
+ /* --------------------------------------------------------------------- */
+ /* --------- Initialisation of Modules & System memory ----------------- */
+ /* --------------------------------------------------------------------- */
+
+ /* store the final allocated memtabs */
+ ps_enc_ctxt->s_mem_mngr.i4_num_create_memtabs = total_memtabs_used;
+ ps_enc_ctxt->s_mem_mngr.ps_create_memtab = ps_memtab;
+
+ /* ---------- Tiles Mem init --------- */
+ ps_enc_ctxt->ps_tile_params_base = (ihevce_tile_params_t *)ihevce_tiles_mem_init(
+ ps_memtab, ps_enc_ctxt->ps_stat_prms, ps_enc_ctxt, i4_resolution_id);
+
+ ps_memtab += ihevce_tiles_get_num_mem_recs();
+
+ /* ---------- Enc loop Mem init --------- */
+ ps_enc_ctxt->s_module_ctxt.pv_enc_loop_ctxt = ihevce_enc_loop_init(
+ ps_memtab,
+ ps_enc_ctxt->ps_stat_prms,
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds,
+ ps_intrf_ctxt->pv_osal_handle,
+ &ps_enc_ctxt->s_func_selector,
+ &ps_enc_ctxt->s_rc_quant,
+ ps_enc_ctxt->ps_tile_params_base,
+ i4_resolution_id,
+ i4_num_enc_loop_frm_pllel,
+ ps_enc_ctxt->u1_is_popcnt_available);
+
+ ps_memtab += ihevce_enc_loop_get_num_mem_recs(i4_num_bitrate_inst, i4_num_enc_loop_frm_pllel);
+ /* ---------- ME Mem init --------------- */
+ ps_enc_ctxt->s_module_ctxt.pv_me_ctxt = ihevce_me_init(
+ ps_memtab,
+ ps_enc_ctxt->ps_stat_prms,
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds,
+ ps_intrf_ctxt->pv_osal_handle,
+ &ps_enc_ctxt->s_rc_quant,
+ (void *)ps_enc_ctxt->ps_tile_params_base,
+ i4_resolution_id,
+ i4_num_me_frm_pllel,
+ ps_enc_ctxt->u1_is_popcnt_available);
+
+ ps_memtab += ihevce_me_get_num_mem_recs(i4_num_me_frm_pllel);
+
+ /* ---------- Coarse ME Mem init --------------- */
+ ps_enc_ctxt->s_module_ctxt.pv_coarse_me_ctxt = ihevce_coarse_me_init(
+ ps_memtab,
+ ps_enc_ctxt->ps_stat_prms,
+ ps_enc_ctxt->s_multi_thrd.i4_num_pre_enc_proc_thrds,
+ ps_intrf_ctxt->pv_osal_handle,
+ i4_resolution_id,
+ ps_enc_ctxt->u1_is_popcnt_available);
+
+ ps_memtab += ihevce_coarse_me_get_num_mem_recs();
+ /* ---------- IPE Mem init -------------- */
+ ps_enc_ctxt->s_module_ctxt.pv_ipe_ctxt = ihevce_ipe_init(
+ ps_memtab,
+ ps_enc_ctxt->ps_stat_prms,
+ ps_enc_ctxt->s_multi_thrd.i4_num_pre_enc_proc_thrds,
+ ps_enc_ctxt->i4_ref_mbr_id,
+ &ps_enc_ctxt->s_func_selector,
+ &ps_enc_ctxt->s_rc_quant,
+ i4_resolution_id,
+ ps_enc_ctxt->u1_is_popcnt_available);
+
+ ps_memtab += ihevce_ipe_get_num_mem_recs();
+
+ ps_enc_ctxt->s_rc_quant.i2_max_qp = 51;
+ ps_enc_ctxt->s_rc_quant.i2_min_qp = 0;
+ ps_enc_ctxt->s_rc_quant.i1_qp_offset = 0;
+ ps_enc_ctxt->s_rc_quant.i2_max_qscale =
+ 228 << 3; // Q3 format is mantained for accuarate calc at lower qp
+ ps_enc_ctxt->s_rc_quant.i2_min_qscale = 1;
+
+ /* ---------- ECD Mem init -------------- */
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ ps_enc_ctxt->s_module_ctxt.apv_ent_cod_ctxt[i] = ihevce_entropy_init(
+ ps_memtab,
+ ps_enc_ctxt->ps_stat_prms,
+ (void *)ps_enc_ctxt->ps_tile_params_base,
+ i4_resolution_id);
+
+ ps_memtab += ihevce_entropy_get_num_mem_recs();
+ }
+
+ /* ---------- LAP Mem init--------------- */
+ if(i4_resolution_id == 0)
+ {
+ ps_enc_ctxt->s_module_ctxt.pv_lap_ctxt =
+ ihevce_lap_init(ps_memtab, &ps_enc_ctxt->s_lap_stat_prms, ps_enc_ctxt->ps_stat_prms);
+
+ ps_memtab += ihevce_lap_get_num_mem_recs();
+ }
+ /*-----------DECOMPOSITION PRE INTRA init----*/
+ ps_enc_ctxt->s_module_ctxt.pv_decomp_pre_intra_ctxt = ihevce_decomp_pre_intra_init(
+ ps_memtab,
+ ps_enc_ctxt->ps_stat_prms,
+ ps_enc_ctxt->s_multi_thrd.i4_num_pre_enc_proc_thrds,
+ &ps_enc_ctxt->s_func_selector,
+ i4_resolution_id,
+ ps_enc_ctxt->u1_is_popcnt_available);
+
+ ps_memtab += ihevce_decomp_pre_intra_get_num_mem_recs();
+
+ /* ---------- RC Mem init --------------- */
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ /*swaping of buf_id for 0th and reference bitrate location, as encoder
+ assumes always 0th loc for reference bitrate and app must receive in
+ the configured order*/
+ if(i == 0)
+ {
+ ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[i] = ihevce_rc_mem_init(
+ ps_memtab,
+ ps_enc_ctxt->ps_stat_prms,
+ ps_enc_ctxt->i4_ref_mbr_id,
+ &ps_enc_ctxt->s_rc_quant,
+ ps_enc_ctxt->i4_resolution_id,
+ ps_enc_ctxt->i4_look_ahead_frames_in_first_pass);
+ }
+ else if(i == ps_enc_ctxt->i4_ref_mbr_id)
+ {
+ ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[i] = ihevce_rc_mem_init(
+ ps_memtab,
+ ps_enc_ctxt->ps_stat_prms,
+ 0,
+ &ps_enc_ctxt->s_rc_quant,
+ ps_enc_ctxt->i4_resolution_id,
+ ps_enc_ctxt->i4_look_ahead_frames_in_first_pass);
+ }
+ else
+ {
+ ps_enc_ctxt->s_module_ctxt.apv_rc_ctxt[i] = ihevce_rc_mem_init(
+ ps_memtab,
+ ps_enc_ctxt->ps_stat_prms,
+ i,
+ &ps_enc_ctxt->s_rc_quant,
+ ps_enc_ctxt->i4_resolution_id,
+ ps_enc_ctxt->i4_look_ahead_frames_in_first_pass);
+ }
+ ps_memtab += ihevce_rc_get_num_mem_recs();
+ }
+
+ /* ---------- System Mem init ----------- */
+ {
+ recon_pic_buf_t **pps_pic_bufs[IHEVCE_MAX_NUM_BITRATES];
+ recon_pic_buf_t *ps_pic_bufs[IHEVCE_MAX_NUM_BITRATES];
+ void *pv_recon_buf[IHEVCE_MAX_NUM_BITRATES];
+#if(SRC_PADDING_FOR_TRAQO || ENABLE_SSD_CALC_RC)
+ void *pv_recon_buf_source[IHEVCE_MAX_NUM_BITRATES] = { NULL };
+#endif
+ void *pv_uv_recon_buf[IHEVCE_MAX_NUM_BITRATES];
+ UWORD8 *pu1_subpel_buf;
+ pu_col_mv_t *ps_col_mv;
+ UWORD8 *pu1_col_mv_map;
+ UWORD16 *pu2_col_num_pu_map;
+ UWORD32 *pu4_col_mv_off;
+ WORD32 luma_frm_size;
+ WORD32 recon_stride; /* stride for Y and UV(interleave) */
+ WORD32 luma_frm_height; /* including padding */
+ WORD32 num_pu_in_frm;
+
+ /* pps tile memory */
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ ps_enc_ctxt->as_pps[i].ps_tile = (tile_t *)ps_memtab->pv_base;
+ }
+
+ ps_memtab++; /* increment the memtabs */
+
+ /* recon picture buffer pointer array */
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ pps_pic_bufs[i] = (recon_pic_buf_t **)ps_memtab->pv_base;
+ ps_memtab++; /* increment the memtabs */
+ }
+
+ /* recon picture buffers structures */
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ ps_pic_bufs[i] = (recon_pic_buf_t *)ps_memtab->pv_base;
+ ps_memtab++; /* increment the memtabs */
+ }
+
+ /* reference/recon picture buffers */
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ pv_recon_buf[i] = ps_memtab->pv_base;
+ ps_memtab++; /* increment the memtabs */
+ }
+ /* reference/recon picture subpel planes */
+ pu1_subpel_buf = (UWORD8 *)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+ /* reference colocated MV bank */
+ ps_col_mv = (pu_col_mv_t *)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /* reference colocated MV bank map */
+ pu1_col_mv_map = (UWORD8 *)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /* reference collocated MV bank map offsets map */
+ pu2_col_num_pu_map = (UWORD16 *)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /* reference colocated MV bank ctb offset */
+ pu4_col_mv_off = (UWORD32 *)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /* compute the stride and frame height after accounting for padding */
+ recon_stride = ((num_ctb_horz * ctb_size) + (PAD_HORZ << 1));
+ luma_frm_height = ((num_ctb_vert * ctb_size) + (PAD_VERT << 1));
+ luma_frm_size = recon_stride * luma_frm_height;
+ /* The subpel buffer is also incremented to take care of padding */
+ /* Both luma and subpel buffer use same stride */
+ pu1_subpel_buf += (recon_stride * PAD_VERT);
+ pu1_subpel_buf += PAD_HORZ;
+
+ /* Keep memory for an extra CTB at the right and bottom of frame.
+ This extra space is needed by dist-encoding and unused in non-dist-encoding */
+ num_pu_in_frm = (num_ctb_horz + 1) * num_pu_in_ctb * (num_ctb_vert + 1);
+
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ pv_uv_recon_buf[i] = pv_recon_buf[i];
+
+ /* increment the recon buffer to take care of padding */
+ pv_recon_buf[i] = (UWORD8 *)pv_recon_buf[i] + (recon_stride * PAD_VERT) + PAD_HORZ;
+
+ /* chroma buffer starts at the end of luma buffer */
+ pv_uv_recon_buf[i] = (UWORD8 *)pv_uv_recon_buf[i] + luma_frm_size;
+ if(ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.i4_internal_bit_depth == 8)
+ {
+ /* increment the chroma recon buffer to take care of padding */
+ /* vert padding halved but horiz is same due to uv interleave */
+ pv_uv_recon_buf[i] =
+ (UWORD8 *)pv_uv_recon_buf[i] + (recon_stride * (PAD_VERT >> 1)) +
+ ((ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_chr_format == IV_YUV_422SP_UV)
+ ? (recon_stride * (PAD_VERT >> 1))
+ : 0);
+ pv_uv_recon_buf[i] = (UWORD8 *)pv_uv_recon_buf[i] + PAD_HORZ;
+ }
+
+ /* loop to initialise all the memories */
+ /* initialize recon buffers */
+ /* only YUV buffers are allocated for each bit-rate instnaces.
+ Subpel buffers and col buffers are made NULL for auxiliary bit-rate instances,
+ since ME and IPE happens only for reference bit-rate instnace */
+ for(ctr = 0; ctr < (max_num_ref_pics + 1 + NUM_EXTRA_RECON_BUFS); ctr++)
+ {
+ pps_pic_bufs[i][ctr] =
+ ps_pic_bufs[i]; //check the index of pps [i] should be first or last index?!!
+
+ ps_pic_bufs[i]->s_yuv_buf_desc.i4_size = sizeof(iv_enc_yuv_buf_t);
+ ps_pic_bufs[i]->s_yuv_buf_desc.pv_y_buf = pv_recon_buf[i];
+ ps_pic_bufs[i]->s_yuv_buf_desc.pv_v_buf = NULL;
+ {
+ ps_pic_bufs[i]->s_yuv_buf_desc.pv_u_buf = pv_uv_recon_buf[i];
+ }
+ ps_pic_bufs[i]->apu1_y_sub_pel_planes[0] = ((i == 0) ? pu1_subpel_buf : NULL);
+ ps_pic_bufs[i]->apu1_y_sub_pel_planes[1] =
+ ((i == 0) ? (pu1_subpel_buf + luma_frm_size) : NULL);
+ ps_pic_bufs[i]->apu1_y_sub_pel_planes[2] =
+ ((i == 0) ? (pu1_subpel_buf + (luma_frm_size * 2)) : NULL);
+ ps_pic_bufs[i]->ps_frm_col_mv = ps_col_mv;
+ ps_pic_bufs[i]->pu1_frm_pu_map = pu1_col_mv_map;
+ ps_pic_bufs[i]->pu2_num_pu_map = pu2_col_num_pu_map;
+ ps_pic_bufs[i]->pu4_pu_off = pu4_col_mv_off;
+ ps_pic_bufs[i]->i4_is_free = 1;
+ ps_pic_bufs[i]->i4_poc = -1;
+ ps_pic_bufs[i]->i4_display_num = -1;
+ ps_pic_bufs[i]->i4_buf_id = ctr;
+
+ /* frame level buff increments */
+ ps_col_mv += num_pu_in_frm;
+ pu1_col_mv_map += num_pu_in_frm;
+ pu2_col_num_pu_map += (num_ctb_horz * num_ctb_vert);
+ pu4_col_mv_off += (num_ctb_horz * num_ctb_vert);
+
+ if(ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_chr_format == IV_YUV_422SP_UV)
+ {
+ pv_recon_buf[i] = (UWORD8 *)pv_recon_buf[i] + (luma_frm_size << 1);
+ pv_uv_recon_buf[i] = (UWORD8 *)pv_uv_recon_buf[i] + (luma_frm_size << 1);
+ }
+ else
+ {
+ pv_recon_buf[i] = (UWORD8 *)pv_recon_buf[i] + ((3 * luma_frm_size) >> 1);
+ pv_uv_recon_buf[i] = (UWORD8 *)pv_uv_recon_buf[i] + ((3 * luma_frm_size) >> 1);
+ }
+ pu1_subpel_buf += ((3 + L0ME_IN_OPENLOOP_MODE) * luma_frm_size); /* 3 planes */
+ ps_pic_bufs[i]++;
+ } //ctr ends
+
+ /* store the queue pointer and num buffs to context */
+ ps_enc_ctxt->pps_recon_buf_q[i] = pps_pic_bufs[i];
+ ps_enc_ctxt->ai4_num_buf_recon_q[i] = (max_num_ref_pics + 1 + NUM_EXTRA_RECON_BUFS);
+
+ } //bitrate ctr ends
+
+ } //end of system memory init
+
+ /* Pre encode group recon buffer containier NO Buffers will be allocated / used */
+ {
+ recon_pic_buf_t *ps_pic_bufs;
+
+ /* recon picture buffer pointer array */
+ pps_pre_enc_pic_bufs = (recon_pic_buf_t **)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /* recon picture buffers structures */
+ ps_pic_bufs = (recon_pic_buf_t *)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /* loop to initialise all the memories */
+ for(ctr = 0; ctr < (max_num_ref_pics + 1); ctr++)
+ {
+ pps_pre_enc_pic_bufs[ctr] = ps_pic_bufs;
+
+ ps_pic_bufs->s_yuv_buf_desc.i4_size = sizeof(iv_enc_yuv_buf_t);
+ ps_pic_bufs->s_yuv_buf_desc.pv_y_buf = NULL;
+ ps_pic_bufs->s_yuv_buf_desc.pv_u_buf = NULL;
+ ps_pic_bufs->s_yuv_buf_desc.pv_v_buf = NULL;
+ ps_pic_bufs->apu1_y_sub_pel_planes[0] = NULL;
+ ps_pic_bufs->apu1_y_sub_pel_planes[1] = NULL;
+ ps_pic_bufs->apu1_y_sub_pel_planes[2] = NULL;
+ ps_pic_bufs->ps_frm_col_mv = NULL;
+ ps_pic_bufs->pu1_frm_pu_map = NULL;
+ ps_pic_bufs->pu2_num_pu_map = NULL;
+ ps_pic_bufs->pu4_pu_off = NULL;
+ ps_pic_bufs->i4_is_free = 1;
+ ps_pic_bufs->i4_poc = -1;
+ ps_pic_bufs->i4_buf_id = ctr;
+
+ /* frame level buff increments */
+ ps_pic_bufs++;
+ }
+
+ /* store the queue pointer and num buffs to context */
+ ps_enc_ctxt->pps_pre_enc_recon_buf_q = pps_pre_enc_pic_bufs;
+ ps_enc_ctxt->i4_pre_enc_num_buf_recon_q = (max_num_ref_pics + 1);
+ }
+
+ /* Frame level buffers and Que between pre-encode & encode */
+ {
+ pre_enc_me_ctxt_t *ps_pre_enc_bufs;
+ pre_enc_L0_ipe_encloop_ctxt_t *ps_L0_ipe_enc_bufs;
+ ihevce_lap_enc_buf_t *ps_lap_enc_input_buf;
+ ctb_analyse_t *ps_ctb_analyse;
+ UWORD8 *pu1_me_lyr_ctxt;
+ UWORD8 *pu1_me_lyr_bank_ctxt;
+ UWORD8 *pu1_mv_bank;
+ UWORD8 *pu1_ref_idx_bank;
+ double *plf_intra_8x8_cost;
+ ipe_l0_ctb_analyse_for_me_t *ps_ipe_analyse_ctb;
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1;
+ ihevce_ed_blk_t *ps_layer1_buf;
+ ihevce_ed_blk_t *ps_layer2_buf;
+ ihevce_8x8_L0_satd_t *ps_layer0_cur_satd;
+ ihevce_8x8_L0_mean_t *ps_layer0_cur_mean;
+ UWORD8 *pu1_lap_input_yuv_buf[4];
+ UWORD8 *pu1_input_synch_ctrl_cmd;
+ WORD32 i4_count = 0;
+ /*initialize the memory for input buffer*/
+ {
+ for(i4_count = 0; i4_count < i4_total_queues; i4_count++)
+ {
+ pu1_lap_input_yuv_buf[i4_count] = (UWORD8 *)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+ }
+ pps_lap_enc_input_bufs = (ihevce_lap_enc_buf_t **)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /*memory for the input buffer structure*/
+ ps_lap_enc_input_buf = (ihevce_lap_enc_buf_t *)ps_memtab->pv_base;
+ ps_memtab++;
+
+ pu1_input_synch_ctrl_cmd = (UWORD8 *)ps_memtab->pv_base;
+ ps_memtab++;
+ }
+ /* pre encode /encode coding buffer pointer array */
+ pps_pre_enc_bufs = (pre_enc_me_ctxt_t **)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /* pre encode /encode buffer structure */
+ ps_pre_enc_bufs = (pre_enc_me_ctxt_t *)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /* Pre-encode L0 IPE output to ME buffer pointer */
+ pps_L0_ipe_enc_bufs = (pre_enc_L0_ipe_encloop_ctxt_t **)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /* Pre-encode L0 IPE output to ME buffer */
+ ps_L0_ipe_enc_bufs = (pre_enc_L0_ipe_encloop_ctxt_t *)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /* CTB analyse Frame level */
+ ps_ctb_analyse = (ctb_analyse_t *)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /* ME layer ctxt Frame level */
+ pu1_me_lyr_ctxt = (UWORD8 *)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /* ME layer bank ctxt Frame level */
+ pu1_me_lyr_bank_ctxt = (UWORD8 *)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /* ME layer MV bank Frame level */
+ pu1_mv_bank = (UWORD8 *)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /* ME layer ref idx bank Frame level */
+ pu1_ref_idx_bank = (UWORD8 *)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+ /* 8x8 intra costs for entire frame */
+ plf_intra_8x8_cost = (double *)ps_memtab->pv_base;
+ ps_memtab++;
+
+ /* ctb intra costs and modes for entire frame */
+ ps_ipe_analyse_ctb = (ipe_l0_ctb_analyse_for_me_t *)ps_memtab->pv_base;
+ ps_memtab++;
+
+ /*L0 8x8 cur satd for qp mod*/
+ ps_layer0_cur_satd = (ihevce_8x8_L0_satd_t *)ps_memtab->pv_base;
+ ps_memtab++;
+
+ /*L0 8x8 cur mean for qp mod*/
+ ps_layer0_cur_mean = (ihevce_8x8_L0_mean_t *)ps_memtab->pv_base;
+ ps_memtab++;
+
+ /*Contains ctb level information at pre-intra stage */
+ ps_ed_ctb_l1 = (ihevce_ed_ctb_l1_t *)ps_memtab->pv_base;
+ ps_memtab++;
+
+ /* Layer L1 buf */
+ ps_layer1_buf = (ihevce_ed_blk_t *)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /* Layer2 buf */
+ ps_layer2_buf = (ihevce_ed_blk_t *)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /* loop to initialise all the memories*/
+ /*mrs: assign individual input yuv frame pointers here*/
+
+ i4_count = 0;
+ /* loop to initialise the buffer pointer */
+ for(ctr = 0; ctr < num_input_buf_per_queue; ctr++)
+ {
+ pps_lap_enc_input_bufs[ctr] = &ps_lap_enc_input_buf[ctr];
+
+ pps_lap_enc_input_bufs[ctr]->s_input_buf.i4_size = sizeof(iv_input_data_ctrl_buffs_t);
+
+ pps_lap_enc_input_bufs[ctr]->s_input_buf.pv_synch_ctrl_bufs = pu1_input_synch_ctrl_cmd;
+
+ pps_lap_enc_input_bufs[ctr]->s_input_buf.s_input_buf.i4_size = sizeof(iv_yuv_buf_t);
+
+ pu1_input_synch_ctrl_cmd += ENC_COMMAND_BUFF_SIZE;
+ /*pointer to i/p buf initialised to null in case of run time allocation*/
+
+ {
+ pps_lap_enc_input_bufs[ctr]->s_lap_out.s_input_buf.pv_y_buf =
+ pu1_lap_input_yuv_buf[i4_count];
+
+ pps_lap_enc_input_bufs[ctr]->s_lap_out.s_input_buf.pv_u_buf =
+ pu1_lap_input_yuv_buf[i4_count] + i4_luma_min_size;
+
+ pps_lap_enc_input_bufs[ctr]->s_lap_out.s_input_buf.pv_v_buf =
+ NULL; /*since yuv 420 format*/
+
+ pu1_lap_input_yuv_buf[i4_count] += i4_yuv_min_size;
+
+ if(((ctr + 1) % MAX_QUEUE) == 0)
+ i4_count++;
+ }
+ }
+ for(ctr = 0; ctr < num_bufs_preenc_me_que; ctr++)
+ {
+ pps_pre_enc_bufs[ctr] = ps_pre_enc_bufs;
+
+ ps_pre_enc_bufs->ps_ctb_analyse = ps_ctb_analyse;
+ ps_pre_enc_bufs->pv_me_lyr_ctxt = (void *)pu1_me_lyr_ctxt;
+ ps_pre_enc_bufs->pv_me_lyr_bnk_ctxt = (void *)pu1_me_lyr_bank_ctxt;
+ ps_pre_enc_bufs->pv_me_mv_bank = (void *)pu1_mv_bank;
+ ps_pre_enc_bufs->pv_me_ref_idx = (void *)pu1_ref_idx_bank;
+ ps_pre_enc_bufs->ps_layer1_buf = ps_layer1_buf;
+ ps_pre_enc_bufs->ps_layer2_buf = ps_layer2_buf;
+ ps_pre_enc_bufs->ps_layer0_cur_satd = ps_layer0_cur_satd;
+ ps_pre_enc_bufs->ps_layer0_cur_mean = ps_layer0_cur_mean;
+ ps_pre_enc_bufs->ps_ed_ctb_l1 = ps_ed_ctb_l1;
+ ps_pre_enc_bufs->plf_intra_8x8_cost = plf_intra_8x8_cost;
+
+ ps_ctb_analyse += num_ctb_horz * num_ctb_vert;
+ pu1_me_lyr_ctxt += sizeof(layer_ctxt_t);
+ pu1_me_lyr_bank_ctxt += sizeof(layer_mv_t);
+ pu1_mv_bank += mv_bank_size;
+ pu1_ref_idx_bank += ref_idx_bank_size;
+ plf_intra_8x8_cost +=
+ (((num_ctb_horz * ctb_size) >> 3) * ((num_ctb_vert * ctb_size) >> 3));
+ ps_ed_ctb_l1 += (a_ctb_align_wd[1] >> 5) * (a_ctb_align_ht[1] >> 5);
+ ps_layer1_buf += (a_ctb_align_wd[1] >> 2) * (a_ctb_align_ht[1] >> 2);
+ ps_layer2_buf += (a_ctb_align_wd[2] >> 2) * (a_ctb_align_ht[2] >> 2);
+ ps_layer0_cur_satd += (a_ctb_align_wd[0] >> 3) * (a_ctb_align_ht[0] >> 3);
+ ps_pre_enc_bufs++;
+ }
+
+ for(ctr = 0; ctr < num_bufs_L0_ipe_enc; ctr++)
+ {
+ pps_L0_ipe_enc_bufs[ctr] = ps_L0_ipe_enc_bufs;
+ ps_L0_ipe_enc_bufs->ps_ipe_analyse_ctb = ps_ipe_analyse_ctb;
+ ps_ipe_analyse_ctb += num_ctb_horz * num_ctb_vert;
+ ps_L0_ipe_enc_bufs++;
+ }
+ }
+
+ /* Frame level que between ME and Enc rd-opt */
+ {
+ me_enc_rdopt_ctxt_t *ps_me_enc_bufs;
+ job_queue_t *ps_job_q_enc;
+ me_ctb_data_t *ps_cur_ctb_me_data;
+ cur_ctb_cu_tree_t *ps_cur_ctb_cu_tree;
+
+ /* pre encode /encode coding buffer pointer array */
+ pps_me_enc_bufs = (me_enc_rdopt_ctxt_t **)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /* pre encode /encode buffer structure */
+ ps_me_enc_bufs = (me_enc_rdopt_ctxt_t *)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /*me and enc job queue memory */
+ ps_job_q_enc = (job_queue_t *)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /*ctb me data memory*/
+ ps_cur_ctb_cu_tree = (cur_ctb_cu_tree_t *)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /*ctb me data memory*/
+ ps_cur_ctb_me_data = (me_ctb_data_t *)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /* loop to initialise all the memories */
+ for(ctr = 0; ctr < NUM_ME_ENC_BUFS; ctr++)
+ {
+ pps_me_enc_bufs[ctr] = ps_me_enc_bufs;
+
+ ps_me_enc_bufs->ps_job_q_enc = ps_job_q_enc;
+ ps_me_enc_bufs->ps_cur_ctb_cu_tree = ps_cur_ctb_cu_tree;
+ ps_me_enc_bufs->ps_cur_ctb_me_data = ps_cur_ctb_me_data;
+
+ ps_job_q_enc += (MAX_NUM_VERT_UNITS_FRM * NUM_ENC_JOBS_QUES);
+ /* In tile case, based on the number of column tiles,
+ increment jobQ per column tile */
+ if(1 == ps_enc_ctxt->ps_stat_prms->s_app_tile_params.i4_tiles_enabled_flag)
+ {
+ WORD32 col_tile_ctr;
+ for(col_tile_ctr = 1;
+ col_tile_ctr < ps_enc_ctxt->ps_stat_prms->s_app_tile_params.i4_num_tile_cols;
+ col_tile_ctr++)
+ {
+ ps_job_q_enc += (MAX_NUM_VERT_UNITS_FRM * NUM_ENC_JOBS_QUES);
+ }
+ }
+
+ ps_cur_ctb_cu_tree += (num_ctb_horz * MAX_NUM_NODES_CU_TREE * num_ctb_vert);
+ ps_cur_ctb_me_data += (num_ctb_horz * num_ctb_vert);
+
+ ps_me_enc_bufs++;
+ }
+ }
+ /* Frame level Que between frame process & entropy */
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ frm_proc_ent_cod_ctxt_t *ps_frmp_ent_bufs;
+ ctb_enc_loop_out_t *ps_ctb;
+ cu_enc_loop_out_t *ps_cu;
+ tu_enc_loop_out_t *ps_tu;
+ pu_t *ps_pu;
+ UWORD8 *pu1_coeffs;
+ WORD32 num_ctb_in_frm;
+ WORD32 coeff_size;
+ UWORD8 *pu1_sei_payload;
+
+ /* frame process/entropy coding buffer pointer array */
+ pps_frm_proc_ent_cod_bufs[i] = (frm_proc_ent_cod_ctxt_t **)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /* frame process/entropy coding buffer structure */
+ ps_frmp_ent_bufs = (frm_proc_ent_cod_ctxt_t *)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /* CTB enc loop Frame level */
+ ps_ctb = (ctb_enc_loop_out_t *)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /* CU enc loop Frame level */
+ ps_cu = (cu_enc_loop_out_t *)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /* TU enc loop Frame level */
+ ps_tu = (tu_enc_loop_out_t *)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /* PU enc loop Frame level */
+ ps_pu = (pu_t *)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /* Coeffs Frame level */
+ pu1_coeffs = (UWORD8 *)ps_memtab->pv_base;
+ /* increment the memtabs */
+ ps_memtab++;
+
+ /* CC User Data */
+ pu1_sei_payload = (UWORD8 *)ps_memtab->pv_base;
+ ps_memtab++;
+
+ num_ctb_in_frm = num_ctb_horz * num_ctb_vert;
+
+ /* calculate the coeff size */
+ coeff_size =
+ num_ctb_horz * ((ps_enc_ctxt->ps_stat_prms->s_src_prms.i4_chr_format == IV_YUV_422SP_UV)
+ ? (num_tu_in_ctb << 1)
+ : ((num_tu_in_ctb * 3) >> 1));
+ coeff_size = coeff_size * num_ctb_vert * MAX_SCAN_COEFFS_BYTES_4x4;
+ /* loop to initialise all the memories */
+ for(ctr = 0; ctr < NUM_FRMPROC_ENTCOD_BUFS; ctr++)
+ {
+ WORD32 num_sei;
+ pps_frm_proc_ent_cod_bufs[i][ctr] = ps_frmp_ent_bufs;
+
+ ps_frmp_ent_bufs->ps_frm_ctb_data = ps_ctb;
+ ps_frmp_ent_bufs->ps_frm_cu_data = ps_cu;
+ ps_frmp_ent_bufs->ps_frm_pu_data = ps_pu;
+ ps_frmp_ent_bufs->ps_frm_tu_data = ps_tu;
+ ps_frmp_ent_bufs->pv_coeff_data = pu1_coeffs;
+
+ /* memset the slice headers and buffer to keep track */
+ memset(&ps_frmp_ent_bufs->s_slice_hdr, 0, sizeof(slice_header_t));
+
+ /*PIC_INFO*/
+ memset(&ps_frmp_ent_bufs->s_pic_level_info, 0, sizeof(s_pic_level_acc_info_t));
+
+ ps_ctb += num_ctb_in_frm;
+ ps_cu += num_ctb_in_frm * num_cu_in_ctb;
+ ps_pu += num_ctb_in_frm * num_pu_in_ctb;
+ ps_tu += num_ctb_in_frm * num_tu_in_ctb;
+
+ pu1_coeffs += coeff_size;
+
+ for(num_sei = 0; num_sei < MAX_NUMBER_OF_SEI_PAYLOAD; num_sei++)
+ {
+ ps_frmp_ent_bufs->as_sei_payload[num_sei].pu1_sei_payload = pu1_sei_payload;
+ ps_frmp_ent_bufs->as_sei_payload[num_sei].u4_payload_type = 0;
+ ps_frmp_ent_bufs->as_sei_payload[num_sei].u4_payload_length = 0;
+ pu1_sei_payload += MAX_SEI_PAYLOAD_PER_TLV;
+ }
+
+ ps_frmp_ent_bufs++;
+ }
+ }
+
+ /* Working memory for encoder */
+ ps_enc_ctxt->pu1_frm_lvl_wkg_mem = (UWORD8 *)ps_memtab->pv_base;
+ ps_memtab++;
+
+ /* Job Que memory */
+ /* Job que memory distribution is as follows _______
+ enc_group_ping -> MAX_NUM_VERT_UNITS_FRM for all the passes (NUM_ENC_JOBS_QUES)------------>|_______|
+ enc_group_pong -> MAX_NUM_VERT_UNITS_FRM for all the passes (NUM_ENC_JOBS_QUES)------------>|_______|
+ pre_enc_group_ping -> MAX_NUM_VERT_UNITS_FRM for all the passes (NUM_PRE_ENC_JOBS_QUES)---->|_______|
+ pre_enc_group_ping -> MAX_NUM_VERT_UNITS_FRM for all the passes (NUM_PRE_ENC_JOBS_QUES)---->|_______|
+ */
+
+ ps_enc_ctxt->s_multi_thrd.aps_job_q_pre_enc[0] = (job_queue_t *)ps_memtab->pv_base;
+ for(ctr = 1; ctr < MAX_PRE_ENC_STAGGER + NUM_BUFS_DECOMP_HME; ctr++)
+ {
+ ps_enc_ctxt->s_multi_thrd.aps_job_q_pre_enc[ctr] =
+ ps_enc_ctxt->s_multi_thrd.aps_job_q_pre_enc[0] +
+ (MAX_NUM_VERT_UNITS_FRM * NUM_PRE_ENC_JOBS_QUES * ctr);
+ }
+ ps_memtab++;
+
+ /* -----Frameproc Entcod Que mem_init --- */
+ /* init ptrs for each bit-rate */
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ ps_enc_ctxt->s_enc_ques.apv_q_hdl[IHEVCE_FRM_PRS_ENT_COD_Q + i] = ihevce_buff_que_init(
+ ps_memtab, NUM_FRMPROC_ENTCOD_BUFS, (void **)pps_frm_proc_ent_cod_bufs[i]);
+ ps_memtab += ihevce_buff_que_get_num_mem_recs();
+ }
+ /*mrs*/
+ /* ----Encoder owned input buffer queue init----*/
+ ps_enc_ctxt->s_enc_ques.apv_q_hdl[IHEVCE_ENC_INPUT_Q] =
+ ihevce_buff_que_init(ps_memtab, num_input_buf_per_queue, (void **)pps_lap_enc_input_bufs);
+ ps_memtab += ihevce_buff_que_get_num_mem_recs();
+
+ /* -----Pre-Encode / Encode Que mem_init --- */
+ ps_enc_ctxt->s_enc_ques.apv_q_hdl[IHEVCE_PRE_ENC_ME_Q] =
+ ihevce_buff_que_init(ps_memtab, num_bufs_preenc_me_que, (void **)pps_pre_enc_bufs);
+
+ ps_memtab += ihevce_buff_que_get_num_mem_recs();
+
+ /* -----ME / Enc-RD opt Que mem_init --- */
+ ps_enc_ctxt->s_enc_ques.apv_q_hdl[IHEVCE_ME_ENC_RDOPT_Q] =
+ ihevce_buff_que_init(ps_memtab, NUM_ME_ENC_BUFS, (void **)pps_me_enc_bufs);
+
+ ps_memtab += ihevce_buff_que_get_num_mem_recs();
+
+ /* -----Pre-Encode L0 IPE to enc queue --- */
+ ps_enc_ctxt->s_enc_ques.apv_q_hdl[IHEVCE_L0_IPE_ENC_Q] =
+ ihevce_buff_que_init(ps_memtab, num_bufs_L0_ipe_enc, (void **)pps_L0_ipe_enc_bufs);
+
+ ps_memtab += ihevce_buff_que_get_num_mem_recs();
+
+ /* ---------- Dependency Manager allocations -------- */
+ {
+ osal_sem_attr_t attr = OSAL_DEFAULT_SEM_ATTR;
+ WORD32 i1_is_sem_enabled;
+
+ if(ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id]
+ .i4_quality_preset >= IHEVCE_QUALITY_P4)
+ {
+ i1_is_sem_enabled = 0;
+ }
+ else
+ {
+ i1_is_sem_enabled = 1;
+ }
+
+ /* allocate semaphores for all the threads in pre-enc and enc */
+ for(ctr = 0; ctr < ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds; ctr++)
+ {
+ ps_enc_ctxt->s_multi_thrd.apv_enc_thrd_sem_handle[ctr] =
+ osal_sem_create(ps_intrf_ctxt->pv_osal_handle, &attr);
+ if(NULL == ps_enc_ctxt->s_multi_thrd.apv_enc_thrd_sem_handle[ctr])
+ {
+ ps_intrf_ctxt->i4_error_code = IHEVCE_CANNOT_ALLOCATE_MEMORY;
+ return;
+ }
+ }
+
+ for(ctr = 0; ctr < ps_enc_ctxt->s_multi_thrd.i4_num_pre_enc_proc_thrds; ctr++)
+ {
+ ps_enc_ctxt->s_multi_thrd.apv_pre_enc_thrd_sem_handle[ctr] =
+ osal_sem_create(ps_intrf_ctxt->pv_osal_handle, &attr);
+ if(NULL == ps_enc_ctxt->s_multi_thrd.apv_pre_enc_thrd_sem_handle[ctr])
+ {
+ ps_intrf_ctxt->i4_error_code = IHEVCE_CANNOT_ALLOCATE_MEMORY;
+ return;
+ }
+ }
+
+ /* --- ME-EncLoop Dep Mngr Row-Row Init -- */
+ for(ctr = 0; ctr < NUM_ME_ENC_BUFS; ctr++)
+ {
+ me_enc_rdopt_ctxt_t *ps_me_enc_bufs = pps_me_enc_bufs[ctr];
+
+ ps_me_enc_bufs->pv_dep_mngr_encloop_dep_me = ihevce_dmgr_init(
+ ps_memtab,
+ ps_intrf_ctxt->pv_osal_handle,
+ DEP_MNGR_ROW_ROW_SYNC,
+ (a_ctb_align_ht[0] / ctb_size),
+ (a_ctb_align_wd[0] / ctb_size),
+ ps_enc_ctxt->ps_tile_params_base->i4_num_tile_cols, /* Number of Col Tiles */
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds,
+ i1_is_sem_enabled /*Sem Disabled/Enabled*/
+ );
+ ps_memtab += ihevce_dmgr_get_num_mem_recs();
+
+ /* Register Enc group semaphore handles */
+ ihevce_dmgr_reg_sem_hdls(
+ ps_me_enc_bufs->pv_dep_mngr_encloop_dep_me,
+ ps_enc_ctxt->s_multi_thrd.apv_enc_thrd_sem_handle,
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds);
+
+ /* Register the handle in multithread ctxt also for free purpose */
+ ps_enc_ctxt->s_multi_thrd.apv_dep_mngr_encloop_dep_me[ctr] =
+ ps_me_enc_bufs->pv_dep_mngr_encloop_dep_me;
+ }
+
+ for(ctr = 0; ctr < i4_num_enc_loop_frm_pllel; ctr++)
+ {
+ /* --- Prev. frame EncLoop Done Dep Mngr Frm-Frm Mem Init -- */
+ ps_enc_ctxt->s_multi_thrd.apv_dep_mngr_prev_frame_done[ctr] = ihevce_dmgr_init(
+ ps_memtab,
+ ps_intrf_ctxt->pv_osal_handle,
+ DEP_MNGR_FRM_FRM_SYNC,
+ (a_ctb_align_ht[0] / ctb_size),
+ (a_ctb_align_wd[0] / ctb_size),
+ 1, /* Number of Col Tiles : Don't care for FRM_FRM */
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds,
+ 1 /*Sem Enabled*/
+ );
+ ps_memtab += ihevce_dmgr_get_num_mem_recs();
+
+ /* Register Enc group semaphore handles */
+ ihevce_dmgr_reg_sem_hdls(
+ ps_enc_ctxt->s_multi_thrd.apv_dep_mngr_prev_frame_done[ctr],
+ ps_enc_ctxt->s_multi_thrd.apv_enc_thrd_sem_handle,
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds);
+ }
+ /* --- Prev. frame EncLoop Done Dep Mngr for re-encode Frm-Frm Mem Init -- */
+ ps_enc_ctxt->s_multi_thrd.pv_dep_mngr_prev_frame_enc_done_for_reenc = ihevce_dmgr_init(
+ ps_memtab,
+ ps_intrf_ctxt->pv_osal_handle,
+ DEP_MNGR_FRM_FRM_SYNC,
+ (a_ctb_align_ht[0] / ctb_size),
+ (a_ctb_align_wd[0] / ctb_size),
+ 1, /* Number of Col Tiles : Don't care for FRM_FRM */
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds,
+ 1 /*Sem Enabled*/
+ );
+ ps_memtab += ihevce_dmgr_get_num_mem_recs();
+
+ /* Register Enc group semaphore handles */
+ ihevce_dmgr_reg_sem_hdls(
+ ps_enc_ctxt->s_multi_thrd.pv_dep_mngr_prev_frame_enc_done_for_reenc,
+ ps_enc_ctxt->s_multi_thrd.apv_enc_thrd_sem_handle,
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds);
+ for(ctr = 0; ctr < i4_num_me_frm_pllel; ctr++)
+ {
+ /* --- Prev. frame ME Done Dep Mngr Frm-Frm Mem Init -- */
+ ps_enc_ctxt->s_multi_thrd.apv_dep_mngr_prev_frame_me_done[ctr] = ihevce_dmgr_init(
+ ps_memtab,
+ ps_intrf_ctxt->pv_osal_handle,
+ DEP_MNGR_FRM_FRM_SYNC,
+ (a_ctb_align_ht[0] / ctb_size),
+ (a_ctb_align_wd[0] / ctb_size),
+ 1, /* Number of Col Tiles : Don't care for FRM_FRM */
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds,
+ 1 /*Sem Enabled*/
+ );
+ ps_memtab += ihevce_dmgr_get_num_mem_recs();
+
+ /* Register Enc group semaphore handles */
+ ihevce_dmgr_reg_sem_hdls(
+ ps_enc_ctxt->s_multi_thrd.apv_dep_mngr_prev_frame_me_done[ctr],
+ ps_enc_ctxt->s_multi_thrd.apv_enc_thrd_sem_handle,
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds);
+ }
+ /* --- Prev. frame PreEnc L1 Done Dep Mngr Frm-Frm Mem Init -- */
+ ps_enc_ctxt->s_multi_thrd.pv_dep_mngr_prev_frame_pre_enc_l1 = ihevce_dmgr_init(
+ ps_memtab,
+ ps_intrf_ctxt->pv_osal_handle,
+ DEP_MNGR_FRM_FRM_SYNC,
+ (a_ctb_align_ht[0] / ctb_size),
+ (a_ctb_align_wd[0] / ctb_size),
+ 1, /* Number of Col Tiles : Don't care for FRM_FRM */
+ ps_enc_ctxt->s_multi_thrd.i4_num_pre_enc_proc_thrds,
+ 1 /*Sem Enabled*/
+ );
+ ps_memtab += ihevce_dmgr_get_num_mem_recs();
+
+ /* Register Pre-Enc group semaphore handles */
+ ihevce_dmgr_reg_sem_hdls(
+ ps_enc_ctxt->s_multi_thrd.pv_dep_mngr_prev_frame_pre_enc_l1,
+ ps_enc_ctxt->s_multi_thrd.apv_pre_enc_thrd_sem_handle,
+ ps_enc_ctxt->s_multi_thrd.i4_num_pre_enc_proc_thrds);
+
+ /* --- Prev. frame PreEnc HME Done Dep Mngr Frm-Frm Mem Init -- */
+ ps_enc_ctxt->s_multi_thrd.pv_dep_mngr_prev_frame_pre_enc_coarse_me = ihevce_dmgr_init(
+ ps_memtab,
+ ps_intrf_ctxt->pv_osal_handle,
+ DEP_MNGR_FRM_FRM_SYNC,
+ (a_ctb_align_ht[0] / ctb_size),
+ (a_ctb_align_wd[0] / ctb_size),
+ 1, /* Number of Col Tiles : Don't care for FRM_FRM */
+ ps_enc_ctxt->s_multi_thrd.i4_num_pre_enc_proc_thrds,
+ 1 /*Sem Enabled*/
+ );
+ ps_memtab += ihevce_dmgr_get_num_mem_recs();
+
+ /* Register Pre-Enc group semaphore handles */
+ ihevce_dmgr_reg_sem_hdls(
+ ps_enc_ctxt->s_multi_thrd.pv_dep_mngr_prev_frame_pre_enc_coarse_me,
+ ps_enc_ctxt->s_multi_thrd.apv_pre_enc_thrd_sem_handle,
+ ps_enc_ctxt->s_multi_thrd.i4_num_pre_enc_proc_thrds);
+
+ /* --- Prev. frame PreEnc L0 Done Dep Mngr Frm-Frm Mem Init -- */
+ ps_enc_ctxt->s_multi_thrd.pv_dep_mngr_prev_frame_pre_enc_l0 = ihevce_dmgr_init(
+ ps_memtab,
+ ps_intrf_ctxt->pv_osal_handle,
+ DEP_MNGR_FRM_FRM_SYNC,
+ (a_ctb_align_ht[0] / ctb_size),
+ (a_ctb_align_wd[0] / ctb_size),
+ 1, /* Number of Col Tiles : Don't care for FRM_FRM */
+ ps_enc_ctxt->s_multi_thrd.i4_num_pre_enc_proc_thrds,
+ 1 /*Sem Enabled*/
+ );
+ ps_memtab += ihevce_dmgr_get_num_mem_recs();
+
+ /* Register Pre-Enc group semaphore handles */
+ ihevce_dmgr_reg_sem_hdls(
+ ps_enc_ctxt->s_multi_thrd.pv_dep_mngr_prev_frame_pre_enc_l0,
+ ps_enc_ctxt->s_multi_thrd.apv_pre_enc_thrd_sem_handle,
+ ps_enc_ctxt->s_multi_thrd.i4_num_pre_enc_proc_thrds);
+
+ /* --- ME-Prev Recon Dep Mngr Row-Frm Mem init -- */
+ for(ctr = 0; ctr < (max_num_ref_pics + 1 + NUM_EXTRA_RECON_BUFS); ctr++)
+ {
+ WORD32 ai4_tile_xtra_ctb[4] = { 0 };
+
+ ps_enc_ctxt->pps_recon_buf_q[0][ctr]->pv_dep_mngr_recon = ihevce_dmgr_map_init(
+ ps_memtab,
+ num_ctb_vert,
+ num_ctb_horz,
+ i1_is_sem_enabled, /*Sem Disabled/Enabled*/
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds,
+ ai4_tile_xtra_ctb);
+
+ ps_memtab += ihevce_dmgr_get_num_mem_recs();
+
+ /* Register Enc group semaphore handles */
+ ihevce_dmgr_reg_sem_hdls(
+ ps_enc_ctxt->pps_recon_buf_q[0][ctr]->pv_dep_mngr_recon,
+ ps_enc_ctxt->s_multi_thrd.apv_enc_thrd_sem_handle,
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds);
+ }
+
+ /* ------ Module level register semaphores -------- */
+ ihevce_coarse_me_reg_thrds_sem(
+ ps_enc_ctxt->s_module_ctxt.pv_coarse_me_ctxt,
+ ps_enc_ctxt->s_multi_thrd.apv_pre_enc_thrd_sem_handle,
+ ps_enc_ctxt->s_multi_thrd.i4_num_pre_enc_proc_thrds);
+
+ ihevce_enc_loop_reg_sem_hdls(
+ ps_enc_ctxt->s_module_ctxt.pv_enc_loop_ctxt,
+ ps_enc_ctxt->s_multi_thrd.apv_enc_thrd_sem_handle,
+ ps_enc_ctxt->s_multi_thrd.i4_num_enc_proc_thrds);
+ }
+
+ /* copy the run time source parameters from create time prms */
+ memcpy(
+ &ps_enc_ctxt->s_runtime_src_prms,
+ &ps_enc_ctxt->ps_stat_prms->s_src_prms,
+ sizeof(ihevce_src_params_t));
+
+ memcpy(
+ &ps_enc_ctxt->s_runtime_tgt_params,
+ &ps_enc_ctxt->ps_stat_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id],
+ sizeof(ihevce_tgt_params_t));
+
+ /* copy the run time coding parameters from create time prms */
+ memcpy(
+ &ps_enc_ctxt->s_runtime_coding_prms,
+ &ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms,
+ sizeof(ihevce_coding_params_t));
+
+ /*change in run time parameter*/
+ if(ps_enc_ctxt->ps_stat_prms->s_coding_tools_prms.i4_max_reference_frames == -1)
+ {
+ ps_enc_ctxt->s_runtime_coding_prms.i4_max_reference_frames = (DEFAULT_MAX_REFERENCE_PICS)
+ << i4_field_pic;
+
+ ps_enc_ctxt->s_lap_stat_prms.i4_max_reference_frames =
+ ps_enc_ctxt->s_runtime_coding_prms.i4_max_reference_frames;
+ }
+
+ /* populate the frame level ctb parameters based on run time params */
+ ihevce_set_pre_enc_prms(ps_enc_ctxt);
+
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_mem_manager_que_init \endif
+*
+* \brief
+* Encoder Que memory init function
+*
+* \param[in] Encoder context pointer
+* \param[in] High level Encoder context pointer
+* \param[in] Buffer descriptors
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_mem_manager_que_init(
+ enc_ctxt_t *ps_enc_ctxt,
+ ihevce_hle_ctxt_t *ps_hle_ctxt,
+ iv_input_data_ctrl_buffs_desc_t *ps_input_data_ctrl_buffs_desc,
+ iv_input_asynch_ctrl_buffs_desc_t *ps_input_asynch_ctrl_buffs_desc,
+ iv_output_data_buffs_desc_t *ps_output_data_buffs_desc,
+ iv_recon_data_buffs_desc_t *ps_recon_data_buffs_desc)
+{
+ /* local variables */
+ WORD32 total_memtabs_req = 0;
+ WORD32 total_memtabs_used = 0;
+ WORD32 ctr;
+ iv_mem_rec_t *ps_memtab;
+ WORD32 i; //counter variable
+ iv_output_data_buffs_desc_t *ps_out_desc;
+ iv_recon_data_buffs_desc_t *ps_rec_desc;
+ WORD32 i4_num_bitrate_inst; //number of bit-rate instance
+ /* storing 0th instance's pointer. This will be used for assigning buffer queue handles for input/output queues */
+ enc_ctxt_t *ps_enc_ctxt_base = (enc_ctxt_t *)ps_hle_ctxt->apv_enc_hdl[0];
+
+ i4_num_bitrate_inst = ps_enc_ctxt->i4_num_bitrates;
+ //ps_hle_ctxt->ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[0].i4_num_bitrate_instances;
+
+ /* --------------------------------------------------------------------- */
+ /* -------------- Collating the number of memtabs required ------------ */
+ /* --------------------------------------------------------------------- */
+
+ /* ------ Input Data Que Memtab -------- */
+ if(0 == ps_enc_ctxt->i4_resolution_id)
+ {
+ /* array of pointers for input */
+ total_memtabs_req++;
+
+ /* pointers for input desc */
+ total_memtabs_req++;
+
+ /* que manager buffer requirements */
+ total_memtabs_req += ihevce_buff_que_get_num_mem_recs();
+
+ /* ------ Input Control Que memtab ----- */
+ /* array of pointers for input control */
+ total_memtabs_req++;
+
+ /* pointers for input control desc */
+ total_memtabs_req++;
+
+ /* que manager buffer requirements */
+ total_memtabs_req += ihevce_buff_que_get_num_mem_recs();
+ }
+
+ /* ------ Output Data Que Memtab -------- */
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ /* array of pointers for output */
+ total_memtabs_req++;
+
+ /* pointers for output desc */
+ total_memtabs_req++;
+
+ /* que manager buffer requirements */
+ total_memtabs_req += ihevce_buff_que_get_num_mem_recs();
+ }
+
+ /* ------ Recon Data Que Memtab -------- */
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ if(ps_hle_ctxt->ps_static_cfg_prms->i4_save_recon)
+ {
+ /* array of pointers for input */
+ total_memtabs_req++;
+
+ /* pointers for input desc */
+ total_memtabs_req++;
+
+ /* que manager buffer requirements */
+ total_memtabs_req += ihevce_buff_que_get_num_mem_recs();
+ }
+ }
+
+ /* ----- allocate memomry for memtabs --- */
+ {
+ iv_mem_rec_t s_memtab;
+
+ s_memtab.i4_size = sizeof(iv_mem_rec_t);
+ s_memtab.i4_mem_size = total_memtabs_req * sizeof(iv_mem_rec_t);
+ s_memtab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+ s_memtab.i4_mem_alignment = 4;
+
+ ps_hle_ctxt->ihevce_mem_alloc(
+ ps_hle_ctxt->pv_mem_mgr_hdl, &ps_hle_ctxt->ps_static_cfg_prms->s_sys_api, &s_memtab);
+ if(s_memtab.pv_base == NULL)
+ {
+ ps_hle_ctxt->i4_error_code = IHEVCE_CANNOT_ALLOCATE_MEMORY;
+ return;
+ }
+ ps_memtab = (iv_mem_rec_t *)s_memtab.pv_base;
+ }
+
+ /* --------------------------------------------------------------------- */
+ /* ------------------ Collating memory requirements ------------------- */
+ /* --------------------------------------------------------------------- */
+ if(0 == ps_enc_ctxt->i4_resolution_id)
+ {
+ /* ------ Input Data Que memory requests -------- */
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+
+ ps_memtab[total_memtabs_used].i4_mem_size =
+ ((ps_input_data_ctrl_buffs_desc->i4_num_yuv_bufs) * (sizeof(ihevce_lap_enc_buf_t *)));
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+
+ ps_memtab[total_memtabs_used].i4_mem_size =
+ ((ps_input_data_ctrl_buffs_desc->i4_num_yuv_bufs) * (sizeof(ihevce_lap_enc_buf_t)));
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+
+ /* call the Que manager get mem recs */
+ total_memtabs_used += ihevce_buff_que_get_mem_recs(
+ &ps_memtab[total_memtabs_used],
+ ps_input_data_ctrl_buffs_desc->i4_num_yuv_bufs,
+ IV_EXT_CACHEABLE_NORMAL_MEM);
+
+ /* ------ Input Control Que memory requests -------- */
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+
+ ps_memtab[total_memtabs_used].i4_mem_size =
+ ((ps_input_asynch_ctrl_buffs_desc->i4_num_asynch_ctrl_bufs) *
+ (sizeof(iv_input_ctrl_buffs_t *)));
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+
+ ps_memtab[total_memtabs_used].i4_mem_size =
+ ((ps_input_asynch_ctrl_buffs_desc->i4_num_asynch_ctrl_bufs) *
+ (sizeof(iv_input_ctrl_buffs_t)));
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+
+ /* call the Que manager get mem recs */
+ total_memtabs_used += ihevce_buff_que_get_mem_recs(
+ &ps_memtab[total_memtabs_used],
+ ps_input_asynch_ctrl_buffs_desc->i4_num_asynch_ctrl_bufs,
+ IV_EXT_CACHEABLE_NORMAL_MEM);
+ }
+
+ /* ------ Output data Que memory requests -------- */
+ ps_out_desc = ps_output_data_buffs_desc;
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+
+ ps_memtab[total_memtabs_used].i4_mem_size =
+ ((ps_out_desc->i4_num_bitstream_bufs) * (sizeof(iv_output_data_buffs_t *)));
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+
+ ps_memtab[total_memtabs_used].i4_mem_size =
+ ((ps_out_desc->i4_num_bitstream_bufs) * (sizeof(iv_output_data_buffs_t)));
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+
+ /* call the Que manager get mem recs */
+ total_memtabs_used += ihevce_buff_que_get_mem_recs(
+ &ps_memtab[total_memtabs_used],
+ ps_out_desc->i4_num_bitstream_bufs,
+ IV_EXT_CACHEABLE_NORMAL_MEM);
+ ps_out_desc++;
+ }
+
+ //recon_dump
+ /* ------ Recon Data Que memory requests -------- */
+ ps_rec_desc = ps_recon_data_buffs_desc;
+ if(ps_hle_ctxt->ps_static_cfg_prms->i4_save_recon)
+ {
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+
+ ps_memtab[total_memtabs_used].i4_mem_size =
+ ((ps_rec_desc->i4_num_recon_bufs) * (sizeof(iv_enc_recon_data_buffs_t *)));
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+
+ ps_memtab[total_memtabs_used].i4_mem_alignment = 8;
+
+ ps_memtab[total_memtabs_used].e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+
+ ps_memtab[total_memtabs_used].i4_mem_size =
+ ((ps_rec_desc->i4_num_recon_bufs) * (sizeof(iv_enc_recon_data_buffs_t)));
+
+ /* increment the memtab counter */
+ total_memtabs_used++;
+
+ /* call the Que manager get mem recs */
+ total_memtabs_used += ihevce_buff_que_get_mem_recs(
+ &ps_memtab[total_memtabs_used],
+ ps_rec_desc->i4_num_recon_bufs,
+ IV_EXT_CACHEABLE_NORMAL_MEM);
+
+ ps_rec_desc++;
+ }
+ }
+
+ /* ----- allocate memory as per requests ---- */
+
+ /* check on memtabs requested v/s memtabs used */
+ //ittiam : should put an assert
+ ASSERT(total_memtabs_req == total_memtabs_used);
+ for(ctr = 0; ctr < total_memtabs_used; ctr++)
+ {
+ UWORD8 *pu1_mem = NULL;
+ ps_hle_ctxt->ihevce_mem_alloc(
+ ps_hle_ctxt->pv_mem_mgr_hdl,
+ &ps_hle_ctxt->ps_static_cfg_prms->s_sys_api,
+ &ps_memtab[ctr]);
+
+ pu1_mem = (UWORD8 *)ps_memtab[ctr].pv_base;
+
+ if(NULL == pu1_mem)
+ {
+ ps_hle_ctxt->i4_error_code = IHEVCE_CANNOT_ALLOCATE_MEMORY;
+ return;
+ }
+ }
+
+ /* store the final allocated memtabs */
+ ps_enc_ctxt->s_mem_mngr.i4_num_q_memtabs = total_memtabs_used;
+ ps_enc_ctxt->s_mem_mngr.ps_q_memtab = ps_memtab;
+
+ /* --------------------------------------------------------------------- */
+ /* -------------- Initialisation of Queues memory ---------------------- */
+ /* --------------------------------------------------------------------- */
+
+ /* ---------- Input Data Que Mem init --------------- */
+ if(0 == ps_enc_ctxt->i4_resolution_id)
+ {
+ ihevce_lap_enc_buf_t **pps_inp_bufs;
+ ihevce_lap_enc_buf_t *ps_inp_bufs;
+
+ pps_inp_bufs = (ihevce_lap_enc_buf_t **)ps_memtab->pv_base;
+ ps_memtab++;
+
+ ps_inp_bufs = (ihevce_lap_enc_buf_t *)ps_memtab->pv_base;
+ ps_memtab++;
+
+ /* loop to initialise the buffer pointer */
+ for(ctr = 0; ctr < ps_input_data_ctrl_buffs_desc->i4_num_yuv_bufs; ctr++)
+ {
+ pps_inp_bufs[ctr] = &ps_inp_bufs[ctr];
+
+ pps_inp_bufs[ctr]->s_input_buf.i4_size = sizeof(iv_input_data_ctrl_buffs_t);
+
+ pps_inp_bufs[ctr]->s_input_buf.s_input_buf.i4_size = sizeof(iv_yuv_buf_t);
+
+ /*pointer to i/p buf initialised to null in case of run time allocation*/
+ if(ps_hle_ctxt->i4_create_time_input_allocation == 1)
+ {
+ pps_inp_bufs[ctr]->s_input_buf.pv_synch_ctrl_bufs =
+ ps_input_data_ctrl_buffs_desc->ppv_synch_ctrl_bufs[ctr];
+
+ pps_inp_bufs[ctr]->s_input_buf.s_input_buf.pv_y_buf =
+ ps_input_data_ctrl_buffs_desc->ppv_y_buf[ctr];
+
+ pps_inp_bufs[ctr]->s_input_buf.s_input_buf.pv_u_buf =
+ ps_input_data_ctrl_buffs_desc->ppv_u_buf[ctr];
+
+ pps_inp_bufs[ctr]->s_input_buf.s_input_buf.pv_v_buf =
+ ps_input_data_ctrl_buffs_desc->ppv_v_buf[ctr];
+ }
+ else
+ {
+ pps_inp_bufs[ctr]->s_input_buf.pv_synch_ctrl_bufs = NULL;
+
+ pps_inp_bufs[ctr]->s_input_buf.s_input_buf.pv_y_buf = NULL;
+
+ pps_inp_bufs[ctr]->s_input_buf.s_input_buf.pv_u_buf = NULL;
+
+ pps_inp_bufs[ctr]->s_input_buf.s_input_buf.pv_v_buf = NULL;
+ }
+ }
+
+ /* Get the input data buffer Q handle */
+ ps_enc_ctxt->s_enc_ques.apv_q_hdl[IHEVCE_INPUT_DATA_CTRL_Q] = ihevce_buff_que_init(
+ ps_memtab, ps_input_data_ctrl_buffs_desc->i4_num_yuv_bufs, (void **)pps_inp_bufs);
+
+ /* increment the memtab pointer */
+ ps_memtab += ihevce_buff_que_get_num_mem_recs();
+ }
+ else
+ {
+ /* Get the input data buffer Q handle from 0th instance */
+ ps_enc_ctxt->s_enc_ques.apv_q_hdl[IHEVCE_INPUT_DATA_CTRL_Q] =
+ ps_enc_ctxt_base->s_enc_ques.apv_q_hdl[IHEVCE_INPUT_DATA_CTRL_Q];
+ }
+
+ /* ---------- Input control Que Mem init --------------- */
+ if(0 == ps_enc_ctxt->i4_resolution_id)
+ {
+ iv_input_ctrl_buffs_t **pps_inp_bufs;
+ iv_input_ctrl_buffs_t *ps_inp_bufs;
+
+ pps_inp_bufs = (iv_input_ctrl_buffs_t **)ps_memtab->pv_base;
+ ps_memtab++;
+
+ ps_inp_bufs = (iv_input_ctrl_buffs_t *)ps_memtab->pv_base;
+ ps_memtab++;
+
+ /* loop to initialise the buffer pointer */
+ for(ctr = 0; ctr < ps_input_asynch_ctrl_buffs_desc->i4_num_asynch_ctrl_bufs; ctr++)
+ {
+ pps_inp_bufs[ctr] = &ps_inp_bufs[ctr];
+
+ pps_inp_bufs[ctr]->i4_size = sizeof(iv_input_ctrl_buffs_t);
+
+ pps_inp_bufs[ctr]->pv_asynch_ctrl_bufs =
+ ps_input_asynch_ctrl_buffs_desc->ppv_asynch_ctrl_bufs[ctr];
+ }
+
+ /* Get the input control buffer Q handle */
+ ps_enc_ctxt->s_enc_ques.apv_q_hdl[IHEVCE_INPUT_ASYNCH_CTRL_Q] = ihevce_buff_que_init(
+ ps_memtab,
+ ps_input_asynch_ctrl_buffs_desc->i4_num_asynch_ctrl_bufs,
+ (void **)pps_inp_bufs);
+
+ /* increment the memtab pointer */
+ ps_memtab += ihevce_buff_que_get_num_mem_recs();
+ }
+ else
+ {
+ /* Get the input control buffer Q handle from 0th instance */
+ ps_enc_ctxt->s_enc_ques.apv_q_hdl[IHEVCE_INPUT_ASYNCH_CTRL_Q] =
+ ps_enc_ctxt_base->s_enc_ques.apv_q_hdl[IHEVCE_INPUT_ASYNCH_CTRL_Q];
+ }
+
+ /* ---------- Output data Que Mem init --------------- */
+ ps_out_desc = ps_output_data_buffs_desc;
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ iv_output_data_buffs_t **pps_out_bufs;
+ iv_output_data_buffs_t *ps_out_bufs;
+
+ pps_out_bufs = (iv_output_data_buffs_t **)ps_memtab->pv_base;
+ ps_memtab++;
+
+ ps_out_bufs = (iv_output_data_buffs_t *)ps_memtab->pv_base;
+ ps_memtab++;
+
+ /* loop to initialise the buffer pointer */
+ for(ctr = 0; ctr < ps_out_desc->i4_num_bitstream_bufs; ctr++)
+ {
+ pps_out_bufs[ctr] = &ps_out_bufs[ctr];
+
+ pps_out_bufs[ctr]->i4_size = sizeof(iv_output_data_buffs_t);
+
+ pps_out_bufs[ctr]->i4_bitstream_buf_size = ps_out_desc->i4_size_bitstream_buf;
+
+ /*pointer to o/p buf initialised to null in case of run time allocation*/
+ if(ps_hle_ctxt->i4_create_time_output_allocation == 1)
+ {
+ pps_out_bufs[ctr]->pv_bitstream_bufs = ps_out_desc->ppv_bitstream_bufs[ctr];
+ }
+ else
+ {
+ pps_out_bufs[ctr]->pv_bitstream_bufs = NULL;
+ }
+ }
+
+ /* Get the output data buffer Q handle */
+ ps_enc_ctxt->s_enc_ques.apv_q_hdl[IHEVCE_OUTPUT_DATA_Q + i] = ihevce_buff_que_init(
+ ps_memtab, ps_out_desc->i4_num_bitstream_bufs, (void **)pps_out_bufs);
+
+ /* increment the memtab pointer */
+ ps_memtab += ihevce_buff_que_get_num_mem_recs();
+
+ ps_out_desc++;
+ }
+
+ /* ----------Recon data Que Mem init --------------- */
+ ps_rec_desc = ps_recon_data_buffs_desc;
+ for(i = 0; i < i4_num_bitrate_inst; i++)
+ {
+ if(ps_hle_ctxt->ps_static_cfg_prms->i4_save_recon)
+ {
+ iv_enc_recon_data_buffs_t **pps_recon_bufs;
+ iv_enc_recon_data_buffs_t *ps_recon_bufs;
+
+ pps_recon_bufs = (iv_enc_recon_data_buffs_t **)ps_memtab->pv_base;
+ ps_memtab++;
+
+ ps_recon_bufs = (iv_enc_recon_data_buffs_t *)ps_memtab->pv_base;
+ ps_memtab++;
+
+ /* loop to initialise the buffer pointer */
+ for(ctr = 0; ctr < ps_rec_desc->i4_num_recon_bufs; ctr++)
+ {
+ pps_recon_bufs[ctr] = &ps_recon_bufs[ctr];
+
+ pps_recon_bufs[ctr]->i4_size = sizeof(iv_enc_recon_data_buffs_t);
+
+ pps_recon_bufs[ctr]->pv_y_buf = ps_rec_desc->ppv_y_buf[ctr];
+
+ pps_recon_bufs[ctr]->pv_cb_buf = ps_rec_desc->ppv_u_buf[ctr];
+
+ pps_recon_bufs[ctr]->pv_cr_buf = ps_rec_desc->ppv_v_buf[ctr];
+ }
+
+ /* Get the output data buffer Q handle */
+ ps_enc_ctxt->s_enc_ques.apv_q_hdl[IHEVCE_RECON_DATA_Q + i] = ihevce_buff_que_init(
+ ps_memtab, ps_rec_desc->i4_num_recon_bufs, (void **)pps_recon_bufs);
+
+ /* increment the memtab pointer */
+ ps_memtab += ihevce_buff_que_get_num_mem_recs();
+
+ ps_rec_desc++;
+ }
+ else
+ {
+ ps_enc_ctxt->s_enc_ques.apv_q_hdl[IHEVCE_RECON_DATA_Q + i] = NULL;
+ }
+ }
+
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_mem_manager_free \endif
+*
+* \brief
+* Encoder memory free function
+*
+* \param[in] Processing interface context pointer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_mem_manager_free(enc_ctxt_t *ps_enc_ctxt, ihevce_hle_ctxt_t *ps_intrf_ctxt)
+{
+ WORD32 ctr;
+
+ /* run a loop to free all the memory allocated create time */
+ for(ctr = 0; ctr < ps_enc_ctxt->s_mem_mngr.i4_num_create_memtabs; ctr++)
+ {
+ ps_intrf_ctxt->ihevce_mem_free(
+ ps_intrf_ctxt->pv_mem_mgr_hdl, &ps_enc_ctxt->s_mem_mngr.ps_create_memtab[ctr]);
+ }
+
+ /* free the memtab memory */
+ {
+ iv_mem_rec_t s_memtab;
+
+ s_memtab.i4_size = sizeof(iv_mem_rec_t);
+ s_memtab.i4_mem_size = ps_enc_ctxt->s_mem_mngr.i4_num_create_memtabs * sizeof(iv_mem_rec_t);
+ s_memtab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+ s_memtab.i4_mem_alignment = 4;
+ s_memtab.pv_base = (void *)ps_enc_ctxt->s_mem_mngr.ps_create_memtab;
+
+ ps_intrf_ctxt->ihevce_mem_free(ps_intrf_ctxt->pv_mem_mgr_hdl, &s_memtab);
+ }
+
+ if(1 == ps_enc_ctxt->i4_io_queues_created)
+ {
+ /* run a loop to free all the memory allocated durign que creation */
+ for(ctr = 0; ctr < ps_enc_ctxt->s_mem_mngr.i4_num_q_memtabs; ctr++)
+ {
+ ps_intrf_ctxt->ihevce_mem_free(
+ ps_intrf_ctxt->pv_mem_mgr_hdl, &ps_enc_ctxt->s_mem_mngr.ps_q_memtab[ctr]);
+ }
+
+ /* free the memtab memory */
+ {
+ iv_mem_rec_t s_memtab;
+
+ s_memtab.i4_size = sizeof(iv_mem_rec_t);
+ s_memtab.i4_mem_size = ps_enc_ctxt->s_mem_mngr.i4_num_q_memtabs * sizeof(iv_mem_rec_t);
+ s_memtab.e_mem_type = IV_EXT_CACHEABLE_NORMAL_MEM;
+ s_memtab.i4_mem_alignment = 4;
+ s_memtab.pv_base = (void *)ps_enc_ctxt->s_mem_mngr.ps_q_memtab;
+
+ ps_intrf_ctxt->ihevce_mem_free(ps_intrf_ctxt->pv_mem_mgr_hdl, &s_memtab);
+ }
+ }
+ return;
+}
diff --git a/encoder/ihevce_memory_init.h b/encoder/ihevce_memory_init.h
new file mode 100644
index 0000000..6e95675
--- /dev/null
+++ b/encoder/ihevce_memory_init.h
@@ -0,0 +1,78 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_memory_init.h
+*
+* \brief
+* This file contains interface defiation of encoder memory manager
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_MEMORY_INIT_H_
+#define _IHEVCE_MEMORY_INIT_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+void ihevce_mem_manager_init(enc_ctxt_t *ps_enc_ctxt, ihevce_hle_ctxt_t *ps_intrf_ctxt);
+
+void ihevce_mem_manager_que_init(
+ enc_ctxt_t *ps_enc_ctxt,
+ ihevce_hle_ctxt_t *ps_hle_ctxt,
+ iv_input_data_ctrl_buffs_desc_t *ps_input_data_ctrl_buffs_desc,
+ iv_input_asynch_ctrl_buffs_desc_t *ps_input_asynch_ctrl_buffs_desc,
+ iv_output_data_buffs_desc_t *ps_output_data_buffs_desc,
+ iv_recon_data_buffs_desc_t *ps_recon_data_buffs_desc);
+
+void ihevce_mem_manager_free(enc_ctxt_t *ps_enc_ctxt, ihevce_hle_ctxt_t *ps_intrf_ctxt);
+
+#endif /* _IHEVCE_MEMORY_INIT_H_ */
diff --git a/encoder/ihevce_multi_thrd_funcs.c b/encoder/ihevce_multi_thrd_funcs.c
new file mode 100644
index 0000000..e50ad2b
--- /dev/null
+++ b/encoder/ihevce_multi_thrd_funcs.c
@@ -0,0 +1,1148 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevce_multi_thread_funcs.c
+*
+* @brief
+* Contains functions related to Job Ques and others, required for multi threading
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* <TODO: TO BE ADDED>
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_bs_compute_ctb.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_dep_mngr_interface.h"
+#include "hme_datatype.h"
+#include "hme_interface.h"
+#include "hme_common_defs.h"
+#include "hme_defs.h"
+#include "ihevce_me_instr_set_router.h"
+#include "ihevce_ipe_instr_set_router.h"
+#include "ihevce_ipe_structs.h"
+#include "ihevce_coarse_me_pass.h"
+
+#include "cast_types.h"
+#include "osal.h"
+#include "osal_defaults.h"
+
+/********************************************************************/
+/*Macros */
+/********************************************************************/
+#define MULT_FACT 100
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief Function Pops out the next Job in the appropriate Job Que
+*
+* @par Description: Does under mutex lock to ensure thread safe
+*
+* @param[inout] pv_multi_thrd_ctxt
+* Pointer to Multi thread context
+*
+* @param[in] i4_job_type
+* Job type from which a job needs to be popped out
+*
+* @param[in] i4_blocking_mode
+* Mode of operation
+*
+* @returns
+* None
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void *ihevce_pre_enc_grp_get_next_job(
+ void *pv_multi_thrd_ctxt, WORD32 i4_job_type, WORD32 i4_blocking_mode, WORD32 i4_ping_pong)
+{
+ /* Local variables */
+ multi_thrd_ctxt_t *ps_multi_thrd;
+ job_queue_handle_t *ps_job_queue_hdl;
+ void *pv_next = NULL;
+ UWORD8 au1_in_dep_cmp[MAX_IN_DEP] = { 0 };
+ void *pv_job_q_mutex_hdl_pre_enc = NULL;
+
+ /* Derive local variables */
+ ps_multi_thrd = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
+ ps_job_queue_hdl =
+ (job_queue_handle_t *)&ps_multi_thrd->as_job_que_preenc_hdls[i4_ping_pong][i4_job_type];
+
+ /* lock the mutex for Q access */
+ /* As design must facilitate for parallelism in each stage,
+ It is recommended to have seperate mutex for each stage*/
+ if(i4_job_type < ME_JOB_LYR4)
+ {
+ pv_job_q_mutex_hdl_pre_enc = ps_multi_thrd->pv_job_q_mutex_hdl_pre_enc_decomp;
+ }
+ else if(i4_job_type < IPE_JOB_LYR0)
+ {
+ pv_job_q_mutex_hdl_pre_enc = ps_multi_thrd->pv_job_q_mutex_hdl_pre_enc_hme;
+ }
+ else
+ {
+ pv_job_q_mutex_hdl_pre_enc = ps_multi_thrd->pv_job_q_mutex_hdl_pre_enc_l0ipe;
+ }
+
+ osal_mutex_lock(pv_job_q_mutex_hdl_pre_enc);
+ /* Get the next */
+ pv_next = ps_job_queue_hdl->pv_next;
+
+ /* Update the next by checking input dependency */
+ if(NULL != pv_next)
+ {
+ job_queue_t *ps_job_queue = (job_queue_t *)pv_next;
+
+ /* check for input dependencies to be resolved */
+ /* this can be blocking or non blocking based on use case */
+ /* if non blocking then the function returns NULL */
+
+ if(1 == i4_blocking_mode)
+ {
+ volatile WORD32 mem_diff;
+ volatile UWORD8 *pu1_ref_buf = &au1_in_dep_cmp[0];
+ volatile UWORD8 *pu1_curr_buf = &ps_job_queue->au1_in_dep[0];
+
+ mem_diff = memcmp((void *)pu1_ref_buf, (void *)pu1_curr_buf, MAX_IN_DEP);
+
+ /* wait until all dependency is resolved */
+ while(0 != mem_diff)
+ {
+ mem_diff = memcmp((void *)pu1_ref_buf, (void *)pu1_curr_buf, MAX_IN_DEP);
+ }
+
+ /* update the next job in the queue */
+ ps_job_queue_hdl->pv_next = ps_job_queue->pv_next;
+ }
+ else
+ {
+ /* check for input dependency resolved */
+ if((0 != memcmp(&au1_in_dep_cmp[0], &ps_job_queue->au1_in_dep[0], MAX_IN_DEP)))
+ {
+ /* return null */
+ pv_next = NULL;
+ }
+ else
+ {
+ /* update the next job in the queue */
+ ps_job_queue_hdl->pv_next = ps_job_queue->pv_next;
+ }
+ }
+ }
+
+ /* unlock the mutex */
+ osal_mutex_unlock(pv_job_q_mutex_hdl_pre_enc);
+
+ /* Return */
+ return (pv_next);
+
+} /* End of get_next_job */
+
+/**
+*******************************************************************************
+*
+* @brief Function Pops out the next Job in the appropriate Job Que
+*
+* @par Description: Does under mutex lock to ensure thread safe
+*
+* @param[inout] pv_multi_thrd_ctxt
+* Pointer to Multi thread context
+*
+* @param[in] i4_job_type
+* Job type from which a job needs to be popped out
+*
+* @param[in] i4_blocking_mode
+* Mode of operation
+*
+* @returns
+* None
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void *ihevce_enc_grp_get_next_job(
+ void *pv_multi_thrd_ctxt, WORD32 i4_job_type, WORD32 i4_blocking_mode, WORD32 i4_curr_frm_id)
+{
+ /* Local variables */
+ multi_thrd_ctxt_t *ps_multi_thrd;
+ job_queue_handle_t *ps_job_queue_hdl;
+ void *pv_next = NULL;
+ void *pv_job_q_mutex_hdl_enc_grp;
+ UWORD8 au1_in_dep_cmp[MAX_IN_DEP] = { 0 };
+
+ /* Derive local variables */
+ ps_multi_thrd = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
+
+ if(ME_JOB_ENC_LYR == i4_job_type)
+ {
+ pv_job_q_mutex_hdl_enc_grp = ps_multi_thrd->pv_job_q_mutex_hdl_enc_grp_me;
+
+ ps_job_queue_hdl = (job_queue_handle_t *)&ps_multi_thrd->aps_cur_out_me_prms[i4_curr_frm_id]
+ ->as_job_que_enc_hdls[i4_job_type];
+ }
+ else
+ {
+ pv_job_q_mutex_hdl_enc_grp = ps_multi_thrd->pv_job_q_mutex_hdl_enc_grp_enc_loop;
+ ps_job_queue_hdl =
+ (job_queue_handle_t *)&ps_multi_thrd->aps_cur_inp_enc_prms[i4_curr_frm_id]
+ ->as_job_que_enc_hdls[i4_job_type];
+ }
+
+ /* lock the mutex for Q access */
+ osal_mutex_lock(pv_job_q_mutex_hdl_enc_grp);
+
+ /* Get the next */
+ pv_next = ps_job_queue_hdl->pv_next;
+
+ /* Update the next by checking input dependency */
+ if(NULL != pv_next)
+ {
+ job_queue_t *ps_job_queue = (job_queue_t *)pv_next;
+
+ /* check for input dependencies to be resolved */
+ /* this can be blocking or non blocking based on use case */
+ /* if non blocking then the function returns NULL */
+
+ if(1 == i4_blocking_mode)
+ {
+ volatile WORD32 mem_diff;
+ volatile UWORD8 *pu1_ref_buf = &au1_in_dep_cmp[0];
+ volatile UWORD8 *pu1_curr_buf = &ps_job_queue->au1_in_dep[0];
+
+ mem_diff = memcmp((void *)pu1_ref_buf, (void *)pu1_curr_buf, MAX_IN_DEP);
+
+ /* wait until all dependency is resolved */
+ while(0 != mem_diff)
+ {
+ mem_diff = memcmp((void *)pu1_ref_buf, (void *)pu1_curr_buf, MAX_IN_DEP);
+ }
+
+ /* update the next job in the queue */
+ ps_job_queue_hdl->pv_next = ps_job_queue->pv_next;
+ }
+ else
+ {
+ /* check for input dependency resolved */
+ if((0 != memcmp(&au1_in_dep_cmp[0], &ps_job_queue->au1_in_dep[0], MAX_IN_DEP)))
+ {
+ /* return null */
+ pv_next = NULL;
+ }
+ else
+ {
+ /* update the next job in the queue */
+ ps_job_queue_hdl->pv_next = ps_job_queue->pv_next;
+ }
+ }
+ }
+
+ /* unlock the mutex */
+ osal_mutex_unlock(pv_job_q_mutex_hdl_enc_grp);
+
+ /* Return */
+ return (pv_next);
+
+} /* End of get_next_job */
+
+/**
+*******************************************************************************
+*
+* @brief Set the output dependency to done state
+*
+* @par Description: same as brief
+*
+* @param[inout] pv_multi_thrd_ctxt
+* Pointer to Multi thread context
+*
+* @param[in] ps_curr_job
+* Current finished Job pointer
+*
+* @returns
+* None
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ihevce_pre_enc_grp_job_set_out_dep(
+ void *pv_multi_thrd_ctxt, job_queue_t *ps_curr_job, WORD32 i4_ping_pong)
+{
+ /* local vareiables */
+ WORD32 ctr;
+ multi_thrd_ctxt_t *ps_multi_thrd;
+
+ ps_multi_thrd = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
+
+ /* loop over number output dependencies */
+ for(ctr = 0; ctr < ps_curr_job->i4_num_output_dep; ctr++)
+ {
+ UWORD8 *pu1_ptr;
+
+ pu1_ptr = (UWORD8 *)ps_multi_thrd->aps_job_q_pre_enc[i4_ping_pong];
+ pu1_ptr += ps_curr_job->au4_out_ofsts[ctr];
+ *pu1_ptr = 0;
+ }
+
+ return;
+}
+
+/**
+*******************************************************************************
+*
+* @brief Set the output dependency to done state
+*
+* @par Description: same as brief
+*
+* @param[inout] pv_multi_thrd_ctxt
+* Pointer to Multi thread context
+*
+* @param[in] ps_curr_job
+* Current finished Job pointer
+*
+* @returns
+* None
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ihevce_enc_grp_job_set_out_dep(
+ void *pv_multi_thrd_ctxt, job_queue_t *ps_curr_job, WORD32 i4_curr_frm_id)
+{
+ /* local vareiables */
+ WORD32 ctr;
+ UWORD8 *pu1_ptr;
+ multi_thrd_ctxt_t *ps_multi_thrd;
+
+ ps_multi_thrd = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
+
+ if(ME_JOB_ENC_LYR == ps_curr_job->i4_task_type)
+ {
+ pu1_ptr = (UWORD8 *)ps_multi_thrd->aps_cur_out_me_prms[i4_curr_frm_id]->ps_job_q_enc;
+ }
+ else
+ {
+ pu1_ptr = (UWORD8 *)ps_multi_thrd->aps_cur_inp_enc_prms[i4_curr_frm_id]->ps_job_q_enc;
+ }
+
+ /* loop over number output dependencies */
+ for(ctr = 0; ctr < ps_curr_job->i4_num_output_dep; ctr++)
+ {
+ WORD32 i4_off;
+ i4_off = ps_curr_job->au4_out_ofsts[ctr];
+ pu1_ptr[i4_off] = 0;
+ }
+
+ return;
+}
+
+/**
+*******************************************************************************
+*
+* @brief Function prepares the Job Queues for all the passes of encoder
+*
+* @par Description: Based on picture type sets the input and output dependency
+*
+* @param[inout] pv_enc_ctxt
+* Pointer to encoder context
+*
+* @param[in] ps_curr_inp
+* Current Input buffer pointer
+*
+* @returns
+* None
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ihevce_prepare_job_queue(
+ void *pv_enc_ctxt, ihevce_lap_enc_buf_t *ps_curr_inp, WORD32 i4_curr_frm_id)
+{
+ /* local variables */
+ enc_ctxt_t *ps_ctxt;
+ job_queue_t *ps_me_job_queue_lyr0;
+ job_queue_t *ps_enc_loop_job_queue;
+ WORD32 pass;
+ WORD32 num_jobs, col_tile_ctr;
+ WORD32 num_ctb_vert_rows;
+ WORD32 i4_pic_type;
+ WORD32 i; //counter for bitrate
+ WORD32 i4_num_bitrate_instances;
+ WORD32 i4_num_tile_col;
+
+ /* derive local varaibles */
+ ps_ctxt = (enc_ctxt_t *)pv_enc_ctxt;
+ num_ctb_vert_rows = ps_ctxt->s_frm_ctb_prms.i4_num_ctbs_vert;
+ i4_num_bitrate_instances = ps_ctxt->i4_num_bitrates;
+
+ i4_num_tile_col = 1;
+ if(1 == ps_ctxt->ps_tile_params_base->i4_tiles_enabled_flag)
+ {
+ i4_num_tile_col = ps_ctxt->ps_tile_params_base->i4_num_tile_cols;
+ }
+ /* memset the entire job que buffer to zero */
+ memset(
+ ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]->ps_job_q_enc,
+ 0,
+ MAX_NUM_VERT_UNITS_FRM * NUM_ENC_JOBS_QUES * i4_num_tile_col * sizeof(job_queue_t));
+
+ /* get the start address of Job queues */
+ ps_me_job_queue_lyr0 = ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]->ps_job_q_enc;
+ ps_enc_loop_job_queue = ps_me_job_queue_lyr0 + (i4_num_tile_col * MAX_NUM_VERT_UNITS_FRM);
+
+ /* store the JOB queue in the Job handle */
+ ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]
+ ->as_job_que_enc_hdls[ME_JOB_ENC_LYR]
+ .pv_next = (void *)ps_me_job_queue_lyr0;
+ /* store the JOB queue in the Job handle for reenc */
+ ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]
+ ->as_job_que_enc_hdls_reenc[ME_JOB_ENC_LYR]
+ .pv_next = (void *)ps_me_job_queue_lyr0;
+
+ for(i = 0; i < i4_num_bitrate_instances; i++)
+ {
+ ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]
+ ->as_job_que_enc_hdls[ENC_LOOP_JOB + i]
+ .pv_next = (void *)ps_enc_loop_job_queue;
+ ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]
+ ->as_job_que_enc_hdls_reenc[ENC_LOOP_JOB + i]
+ .pv_next = (void *)ps_enc_loop_job_queue;
+ ps_enc_loop_job_queue += (i4_num_tile_col * MAX_NUM_VERT_UNITS_FRM);
+ }
+
+ i4_pic_type = ps_curr_inp->s_lap_out.i4_pic_type;
+
+ //prepare ME JOB queue first
+ //for(pass = 0; pass < NUM_ENC_JOBS_QUES; pass++)
+ {
+ job_queue_t *ps_job_queue_curr;
+ job_queue_t *ps_job_queue_next;
+ WORD32 ctr;
+ WORD32 inp_dep;
+ WORD32 out_dep;
+ WORD32 num_vert_units;
+ HEVCE_ENC_JOB_TYPES_T task_type;
+
+ pass = 0; //= ENC_LOOP_JOB
+
+ {
+ /* num_ver_units of finest layer is stored at (num_hme_lyrs - 1)th index */
+ num_vert_units = num_ctb_vert_rows;
+ task_type = ME_JOB_ENC_LYR;
+ ps_job_queue_curr = ps_me_job_queue_lyr0;
+ ps_job_queue_next =
+ (job_queue_t *)ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]
+ ->as_job_que_enc_hdls[ENC_LOOP_JOB]
+ .pv_next;
+ inp_dep = 0;
+ out_dep = 1; //set reference bit-rate's input dependency
+ }
+
+ if((ME_JOB_ENC_LYR == pass) &&
+ ((IV_I_FRAME == i4_pic_type) || (IV_IDR_FRAME == i4_pic_type)) && !L0ME_IN_OPENLOOP_MODE)
+ {
+ //continue;
+ }
+ else
+ {
+ /* loop over all the vertical rows */
+ for(num_jobs = 0; num_jobs < num_vert_units; num_jobs++)
+ {
+ /* loop over all the column tiles */
+ for(col_tile_ctr = 0; col_tile_ctr < i4_num_tile_col; col_tile_ctr++)
+ {
+ ULWORD64 u8_temp;
+
+ {
+ ps_job_queue_curr->s_job_info.s_me_job_info.i4_vert_unit_row_no = num_jobs;
+ ps_job_queue_curr->s_job_info.s_me_job_info.i4_tile_col_idx = col_tile_ctr;
+ }
+
+ ps_job_queue_curr->pv_next = (void *)(ps_job_queue_curr + 1);
+
+ ps_job_queue_curr->i4_task_type = task_type;
+
+ ps_job_queue_curr->i4_num_input_dep = inp_dep;
+
+ /* set the entire input dep buffer to default value 0 */
+ memset(&ps_job_queue_curr->au1_in_dep[0], 0, sizeof(UWORD8) * MAX_IN_DEP);
+
+ /* set the input dep buffer to 1 for num inp dep */
+ if(0 != inp_dep)
+ {
+ memset(&ps_job_queue_curr->au1_in_dep[0], 1, sizeof(UWORD8) * inp_dep);
+ }
+
+ ps_job_queue_curr->i4_num_output_dep = out_dep;
+
+ /* set the entire offset buffer to default value */
+ memset(
+ &ps_job_queue_curr->au4_out_ofsts[0], 0xFF, sizeof(UWORD32) * MAX_OUT_DEP);
+
+ for(ctr = 0; ctr < out_dep; ctr++)
+ {
+ /* col tile level dependency b/w ME & EncLoop */
+ u8_temp = (ULWORD64)(
+ &ps_job_queue_next[num_jobs * i4_num_tile_col + col_tile_ctr] -
+ ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]->ps_job_q_enc);
+
+ u8_temp *= sizeof(job_queue_t);
+
+ /* store the offset to the array */
+ ps_job_queue_curr->au4_out_ofsts[ctr] = (UWORD32)u8_temp;
+ }
+
+ ps_job_queue_curr++;
+ }
+ } //for ends
+
+ /* set the last pointer to NULL */
+ ps_job_queue_curr--;
+ ps_job_queue_curr->pv_next = (void *)NULL;
+ } //else ends
+ }
+
+ //prepare Enc_loop JOB queue for all bitrate instances
+ //for(pass = 0; pass < NUM_ENC_JOBS_QUES; pass++)
+ for(i = 0; i < i4_num_bitrate_instances; i++)
+ {
+ job_queue_t *ps_job_queue_curr;
+ job_queue_t *ps_job_queue_next;
+ WORD32 ctr;
+ WORD32 inp_dep;
+ WORD32 out_dep;
+ WORD32 num_vert_units;
+ HEVCE_ENC_JOB_TYPES_T task_type;
+
+ /* In case of I or IDR pictures ME will not perform any processing */
+ //if(ENC_LOOP_JOB == pass)
+ {
+ if(((IV_I_FRAME == i4_pic_type) || (IV_IDR_FRAME == i4_pic_type)) &&
+ !L0ME_IN_OPENLOOP_MODE)
+ {
+ inp_dep = 0;
+ }
+ else
+ {
+ inp_dep = 1;
+ }
+
+ task_type = (HEVCE_ENC_JOB_TYPES_T)(ENC_LOOP_JOB + i);
+ ps_job_queue_curr =
+ (job_queue_t *)ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]
+ ->as_job_que_enc_hdls[ENC_LOOP_JOB + i]
+ .pv_next;
+ ps_job_queue_next =
+ (job_queue_t *)ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]
+ ->as_job_que_enc_hdls[ENC_LOOP_JOB + i + 1]
+ .pv_next;
+ out_dep = 1; //output dependecny is the next bit-rate instance's input dependency
+ num_vert_units = num_ctb_vert_rows;
+
+ if(i == i4_num_bitrate_instances - 1) //for last bit-rate instance
+ {
+ //clear output dependency
+ ps_job_queue_next = NULL;
+ out_dep = 0;
+ }
+ }
+
+ /* loop over all the vertical rows */
+ for(num_jobs = 0; num_jobs < num_vert_units; num_jobs++)
+ {
+ /* loop over all the column tiles */
+ for(col_tile_ctr = 0; col_tile_ctr < i4_num_tile_col; col_tile_ctr++)
+ {
+ ULWORD64 u8_temp;
+
+ {
+ ps_job_queue_curr->s_job_info.s_enc_loop_job_info.i4_ctb_row_no = num_jobs;
+ ps_job_queue_curr->s_job_info.s_enc_loop_job_info.i4_tile_col_idx =
+ col_tile_ctr;
+ ps_job_queue_curr->s_job_info.s_enc_loop_job_info.i4_bitrate_instance_no = i;
+ }
+
+ ps_job_queue_curr->pv_next = (void *)(ps_job_queue_curr + 1);
+
+ ps_job_queue_curr->i4_task_type = task_type;
+
+ ps_job_queue_curr->i4_num_input_dep = inp_dep;
+
+ /* set the entire input dep buffer to default value 0 */
+ memset(&ps_job_queue_curr->au1_in_dep[0], 0, sizeof(UWORD8) * MAX_IN_DEP);
+
+ /* set the input dep buffer to 1 for num inp dep */
+ if(0 != inp_dep)
+ {
+ memset(&ps_job_queue_curr->au1_in_dep[0], 1, sizeof(UWORD8) * inp_dep);
+ }
+
+ ps_job_queue_curr->i4_num_output_dep = out_dep;
+
+ /* set the entire offset buffer to default value */
+ memset(&ps_job_queue_curr->au4_out_ofsts[0], 0xFF, sizeof(UWORD32) * MAX_OUT_DEP);
+
+ for(ctr = 0; ctr < out_dep; ctr++)
+ {
+ /* col tile level dependency b/w EncLoops of MBR */
+ u8_temp = (ULWORD64)(
+ &ps_job_queue_next[num_jobs * i4_num_tile_col + col_tile_ctr] -
+ ps_ctxt->s_multi_thrd.aps_cur_out_me_prms[i4_curr_frm_id]->ps_job_q_enc);
+
+ u8_temp *= sizeof(job_queue_t);
+
+ /* store the offset to the array */
+ ps_job_queue_curr->au4_out_ofsts[ctr] = (UWORD32)u8_temp;
+ }
+
+ ps_job_queue_curr++;
+ }
+ }
+
+ /* set the last pointer to NULL */
+ ps_job_queue_curr--;
+ ps_job_queue_curr->pv_next = (void *)NULL;
+ }
+
+ return;
+
+} /* End of ihevce_prepare_job_queue */
+
+/**
+*******************************************************************************
+*
+* @brief Function prepares the Job Queues for all the passes of pre enc
+*
+* @par Description: Based on picture type sets the input and output dependency
+*
+* @param[inout] pv_enc_ctxt
+* Pointer to encoder context
+*
+* @param[in] ps_curr_inp
+* Current Input buffer pointer
+*
+* @returns
+* None
+*
+* @remarks
+*
+*******************************************************************************
+*/
+void ihevce_prepare_pre_enc_job_queue(
+ void *pv_enc_ctxt, ihevce_lap_enc_buf_t *ps_curr_inp, WORD32 i4_ping_pong)
+{
+ /* local variables */
+ enc_ctxt_t *ps_ctxt;
+ job_queue_t *ps_decomp_job_queue_lyr0;
+ job_queue_t *ps_decomp_job_queue_lyr1;
+ job_queue_t *ps_decomp_job_queue_lyr2;
+ job_queue_t *ps_decomp_job_queue_lyr3;
+ job_queue_t *ps_me_job_queue_lyr1;
+ job_queue_t *ps_me_job_queue_lyr2;
+ job_queue_t *ps_me_job_queue_lyr3;
+ job_queue_t *ps_me_job_queue_lyr4;
+ job_queue_t *ps_ipe_job_queue;
+ job_queue_t *aps_me_job_queues[MAX_NUM_HME_LAYERS];
+ multi_thrd_me_job_q_prms_t *ps_me_job_q_prms;
+ WORD32 ai4_decomp_num_vert_units_lyr[MAX_NUM_HME_LAYERS];
+ WORD32 a14_decomp_lyr_unit_size[MAX_NUM_HME_LAYERS];
+ WORD32 layer_no;
+ WORD32 decomp_lyr_cnt;
+ WORD32 num_jobs;
+ WORD32 n_tot_layers;
+ WORD32 a_wd[MAX_NUM_HME_LAYERS];
+ WORD32 a_ht[MAX_NUM_HME_LAYERS];
+ WORD32 a_disp_wd[MAX_NUM_HME_LAYERS];
+ WORD32 a_disp_ht[MAX_NUM_HME_LAYERS];
+ WORD32 u4_log_ctb_size;
+ WORD32 num_ctb_vert_rows;
+ WORD32 pass;
+ WORD32 me_lyr_cnt;
+ WORD32 num_hme_lyrs;
+ WORD32 ai4_me_num_vert_units_lyr[MAX_NUM_HME_LAYERS];
+ WORD32 me_start_lyr_pass;
+ WORD32 ctb_size;
+ WORD32 me_coarsest_lyr_inp_dep = -1;
+
+ (void)ps_curr_inp;
+ /* derive local varaibles */
+ ps_ctxt = (enc_ctxt_t *)pv_enc_ctxt;
+ num_ctb_vert_rows = ps_ctxt->s_frm_ctb_prms.i4_num_ctbs_vert;
+
+ /* CHANGE REQUIRED: change the pointer to the job queue buffer */
+ /* memset the entire job que buffer to zero */
+ memset(
+ ps_ctxt->s_multi_thrd.aps_job_q_pre_enc[i4_ping_pong],
+ 0,
+ MAX_NUM_VERT_UNITS_FRM * NUM_PRE_ENC_JOBS_QUES * sizeof(job_queue_t));
+
+ /* Get the number of vertical units in a layer from the resolution of the layer */
+ a_wd[0] = ps_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_wd;
+ a_ht[0] = ps_ctxt->s_frm_ctb_prms.i4_cu_aligned_pic_ht;
+ n_tot_layers = hme_derive_num_layers(1, a_wd, a_ht, a_disp_wd, a_disp_ht);
+ GETRANGE(u4_log_ctb_size, ps_ctxt->s_frm_ctb_prms.i4_ctb_size);
+
+ ASSERT(n_tot_layers >= 3);
+
+ /*
+ * Always force minimum layers as 4 so that we would have both l1 and l2
+ * pre intra analysis
+ */
+ if(n_tot_layers == 3)
+ {
+ n_tot_layers = 4;
+ a_wd[3] = CEIL16(a_wd[2] >> 1);
+ a_ht[3] = CEIL16(a_ht[2] >> 1);
+ }
+
+ for(layer_no = 0; layer_no < n_tot_layers; layer_no++)
+ {
+ ctb_size = 1 << (u4_log_ctb_size - 1 - layer_no);
+ ai4_decomp_num_vert_units_lyr[layer_no] = ((a_ht[layer_no] + ctb_size) & ~(ctb_size - 1)) >>
+ (u4_log_ctb_size - 1 - layer_no);
+ a14_decomp_lyr_unit_size[layer_no] = 1 << (u4_log_ctb_size - 1 - layer_no);
+ }
+
+ /* get the start address of Job queues */
+ ps_decomp_job_queue_lyr0 = ps_ctxt->s_multi_thrd.aps_job_q_pre_enc[i4_ping_pong];
+ ps_decomp_job_queue_lyr1 = ps_decomp_job_queue_lyr0 + MAX_NUM_VERT_UNITS_FRM;
+ ps_decomp_job_queue_lyr2 = ps_decomp_job_queue_lyr1 + MAX_NUM_VERT_UNITS_FRM;
+ ps_decomp_job_queue_lyr3 = ps_decomp_job_queue_lyr2 + MAX_NUM_VERT_UNITS_FRM;
+ ps_me_job_queue_lyr4 = ps_decomp_job_queue_lyr3 + MAX_NUM_VERT_UNITS_FRM;
+ ps_me_job_queue_lyr3 = ps_me_job_queue_lyr4 + MAX_NUM_VERT_UNITS_FRM;
+ ps_me_job_queue_lyr2 = ps_me_job_queue_lyr3 + MAX_NUM_VERT_UNITS_FRM;
+ ps_me_job_queue_lyr1 = ps_me_job_queue_lyr2 + MAX_NUM_VERT_UNITS_FRM;
+
+ ps_ipe_job_queue = ps_me_job_queue_lyr1 + MAX_NUM_VERT_UNITS_FRM;
+
+ /* store the JOB queue in the Job handle */
+ ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][DECOMP_JOB_LYR0].pv_next =
+ (void *)ps_decomp_job_queue_lyr0;
+ ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][DECOMP_JOB_LYR1].pv_next =
+ (void *)ps_decomp_job_queue_lyr1;
+ ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][DECOMP_JOB_LYR2].pv_next =
+ (void *)ps_decomp_job_queue_lyr2;
+ ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][DECOMP_JOB_LYR3].pv_next =
+ (void *)ps_decomp_job_queue_lyr3;
+ ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][ME_JOB_LYR4].pv_next =
+ (void *)ps_me_job_queue_lyr4;
+ ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][ME_JOB_LYR3].pv_next =
+ (void *)ps_me_job_queue_lyr3;
+ ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][ME_JOB_LYR2].pv_next =
+ (void *)ps_me_job_queue_lyr2;
+ ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][ME_JOB_LYR1].pv_next =
+ (void *)ps_me_job_queue_lyr1;
+ ps_ctxt->s_multi_thrd.as_job_que_preenc_hdls[i4_ping_pong][IPE_JOB_LYR0].pv_next =
+ (void *)ps_ipe_job_queue;
+
+ /* store the ME Jobs que into array */
+ aps_me_job_queues[0] = NULL;
+ aps_me_job_queues[1] = ps_me_job_queue_lyr1;
+ aps_me_job_queues[2] = ps_me_job_queue_lyr2;
+ aps_me_job_queues[3] = ps_me_job_queue_lyr3;
+ aps_me_job_queues[4] = ps_me_job_queue_lyr4;
+ decomp_lyr_cnt = 0;
+ /* Set the me_lyr_cnt to 0 */
+ me_lyr_cnt = 0;
+
+ /* call the ME function which returns the layer properties */
+ ihevce_coarse_me_get_lyr_prms_job_que(
+ ps_ctxt->s_module_ctxt.pv_coarse_me_ctxt,
+ ps_curr_inp,
+ &num_hme_lyrs,
+ &ai4_me_num_vert_units_lyr[0],
+ &ps_ctxt->s_multi_thrd.as_me_job_q_prms[0][0]);
+
+ ps_me_job_q_prms = &ps_ctxt->s_multi_thrd.as_me_job_q_prms[0][0];
+
+ /* derive ME coarsest layer tak type */
+ me_start_lyr_pass = ME_JOB_LYR4 + (MAX_NUM_HME_LAYERS - num_hme_lyrs);
+
+ ps_ctxt->s_multi_thrd.i4_me_coarsest_lyr_type = me_start_lyr_pass;
+
+ /* coarsest HME layer number of units should be less than or equal to max in dep in Job queue */
+ /* this constraint is to take care of Coarsest layer requring entire layer to do FULL search */
+ ASSERT(ai4_me_num_vert_units_lyr[0] <= MAX_IN_DEP);
+ /* loop over all the passes in the encoder */
+ for(pass = 0; pass < NUM_PRE_ENC_JOBS_QUES; pass++)
+ {
+ job_queue_t *ps_pre_enc_job_queue_curr;
+ job_queue_t *ps_pre_enc_job_queue_next;
+ WORD32 inp_dep_pass;
+ WORD32 out_dep_pass;
+ WORD32 num_vert_units;
+ HEVCE_PRE_ENC_JOB_TYPES_T pre_enc_task_type;
+ HEVCE_ENC_JOB_TYPES_T enc_task_type;
+ WORD32 proc_valid_flag = 0;
+
+ // num_vert_units = ai4_decomp_num_vert_units_lyr[decomp_lyr_cnt];
+ /* Initializing the job queues for max no of rows among all the layers. And max would be for last layer*/
+ num_vert_units = ai4_decomp_num_vert_units_lyr[n_tot_layers - 1];
+
+ if(DECOMP_JOB_LYR0 == pass)
+ {
+ proc_valid_flag = 1;
+ pre_enc_task_type = DECOMP_JOB_LYR0;
+ enc_task_type = (HEVCE_ENC_JOB_TYPES_T)-1;
+ ps_pre_enc_job_queue_curr = ps_decomp_job_queue_lyr0;
+
+ inp_dep_pass = 0;
+ decomp_lyr_cnt++;
+
+ /* If all the decomp layers are done next job queue will be ME job queue */
+ if(decomp_lyr_cnt == (n_tot_layers - 1))
+ {
+ /* Assumption : num_hme_lyrs > 1*/
+ ps_pre_enc_job_queue_next = aps_me_job_queues[num_hme_lyrs - 1];
+
+ /* ME coarsest layer is currently made dependent on entire decomp layer */
+ out_dep_pass = ai4_me_num_vert_units_lyr[0];
+ me_coarsest_lyr_inp_dep = num_vert_units;
+ }
+ else
+ {
+ ps_pre_enc_job_queue_next = ps_decomp_job_queue_lyr1;
+ out_dep_pass = 3;
+ }
+ }
+ else if((DECOMP_JOB_LYR1 == pass) && (decomp_lyr_cnt != (n_tot_layers - 1)))
+ {
+ proc_valid_flag = 1;
+ pre_enc_task_type = DECOMP_JOB_LYR1;
+ enc_task_type = (HEVCE_ENC_JOB_TYPES_T)-1;
+ ps_pre_enc_job_queue_curr = ps_decomp_job_queue_lyr1;
+
+ inp_dep_pass = 3;
+ decomp_lyr_cnt++;
+
+ /* If all the decomp layers are done next job queue will be ME job queue */
+ if(decomp_lyr_cnt == (n_tot_layers - 1))
+ {
+ /* Assumption : num_hme_lyrs > 1*/
+ ps_pre_enc_job_queue_next = aps_me_job_queues[num_hme_lyrs - 1];
+
+ /* ME coarsest layer is currently made dependent on entire decomp layer */
+ out_dep_pass = ai4_me_num_vert_units_lyr[0];
+ me_coarsest_lyr_inp_dep = num_vert_units;
+ }
+ else
+ {
+ ps_pre_enc_job_queue_next = ps_decomp_job_queue_lyr2;
+ out_dep_pass = 3;
+ }
+ }
+ else if((DECOMP_JOB_LYR2 == pass) && (decomp_lyr_cnt != (n_tot_layers - 1)))
+ {
+ proc_valid_flag = 1;
+ pre_enc_task_type = DECOMP_JOB_LYR2;
+ enc_task_type = (HEVCE_ENC_JOB_TYPES_T)-1;
+ ps_pre_enc_job_queue_curr = ps_decomp_job_queue_lyr2;
+
+ inp_dep_pass = 3;
+ decomp_lyr_cnt++;
+
+ /* If all the decomp layers are done next job queue will be ME job queue */
+ if(decomp_lyr_cnt == (n_tot_layers - 1))
+ {
+ /* Assumption : num_hme_lyrs > 1*/
+ ps_pre_enc_job_queue_next = aps_me_job_queues[num_hme_lyrs - 1];
+
+ /* ME coarsest layer is currently made dependent on entire decomp layer */
+ out_dep_pass = ai4_me_num_vert_units_lyr[0];
+ me_coarsest_lyr_inp_dep = num_vert_units;
+ }
+ else
+ {
+ /* right now MAX 4 layers worth of JOB queues are prepared */
+ ASSERT(0);
+ }
+ }
+
+ else if(IPE_JOB_LYR0 == pass)
+ {
+ proc_valid_flag = 1;
+ pre_enc_task_type = IPE_JOB_LYR0;
+ enc_task_type = (HEVCE_ENC_JOB_TYPES_T)-1;
+ ps_pre_enc_job_queue_curr = ps_ipe_job_queue;
+ ps_pre_enc_job_queue_next = NULL;
+ num_vert_units = num_ctb_vert_rows;
+ }
+ else if(((pass >= ME_JOB_LYR4) && (pass <= ME_JOB_LYR1)) && (pass >= me_start_lyr_pass))
+ {
+ /* num_ver_units of coarsest layer is stored at 0th index */
+ num_vert_units = ai4_me_num_vert_units_lyr[me_lyr_cnt];
+ proc_valid_flag = 1;
+
+ pre_enc_task_type =
+ (HEVCE_PRE_ENC_JOB_TYPES_T)((WORD32)ME_JOB_LYR1 - (num_hme_lyrs - me_lyr_cnt - 2));
+
+ enc_task_type = (HEVCE_ENC_JOB_TYPES_T)-1;
+
+ /* Assumption : num_hme_lyrs > 1*/
+ ps_pre_enc_job_queue_curr = aps_me_job_queues[num_hme_lyrs - me_lyr_cnt - 1];
+
+ if(me_lyr_cnt == (num_hme_lyrs - 2))
+ {
+ ps_pre_enc_job_queue_next = ps_ipe_job_queue;
+ }
+ else
+ {
+ ps_pre_enc_job_queue_next = aps_me_job_queues[num_hme_lyrs - me_lyr_cnt - 2];
+ }
+ me_lyr_cnt++;
+ }
+
+ /* check for valid processing flag */
+ if(0 == proc_valid_flag)
+ {
+ continue;
+ }
+
+ /* in the loop ps_me_job_q_prms get incremented for every row */
+ /* so at the end of one layer the pointer will be correctly */
+ /* pointing to the start of next layer */
+
+ /* loop over all the vertical rows */
+ for(num_jobs = 0; num_jobs < num_vert_units; num_jobs++)
+ {
+ ULWORD64 u8_temp;
+ WORD32 inp_dep = 0;
+ WORD32 out_dep = 0;
+ WORD32 ctr;
+ WORD32 job_off_ipe;
+
+ if(IPE_JOB_LYR0 == pass)
+ {
+ ps_pre_enc_job_queue_curr->s_job_info.s_ipe_job_info.i4_ctb_row_no = num_jobs;
+ inp_dep = ps_me_job_q_prms->i4_num_inp_dep;
+ out_dep = 0;
+ }
+ else if((pass >= DECOMP_JOB_LYR0) && (pass <= DECOMP_JOB_LYR3))
+ {
+ ps_pre_enc_job_queue_curr->s_job_info.s_decomp_job_info.i4_vert_unit_row_no =
+ num_jobs;
+
+ /* Input and output dependencies of 1st row and last row is 1 less than other rows*/
+ inp_dep = inp_dep_pass;
+ out_dep = out_dep_pass;
+
+ if(pass != DECOMP_JOB_LYR0)
+ {
+ if(((num_jobs == 0) || (num_jobs == num_vert_units - 1)))
+ {
+ inp_dep = inp_dep_pass - 1;
+ }
+ }
+
+ if(pass != (DECOMP_JOB_LYR0 + n_tot_layers - 2))
+ {
+ if(((num_jobs == 0) || (num_jobs == num_vert_units - 1)))
+ {
+ out_dep = out_dep_pass - 1;
+ }
+ }
+ }
+ else /* remaining all are ME JOBS */
+ {
+ ps_pre_enc_job_queue_curr->s_job_info.s_me_job_info.i4_vert_unit_row_no = num_jobs;
+
+ if(pass == me_start_lyr_pass)
+ {
+ ASSERT(me_coarsest_lyr_inp_dep != -1);
+ inp_dep = me_coarsest_lyr_inp_dep;
+ }
+ else
+ {
+ inp_dep = ps_me_job_q_prms->i4_num_inp_dep;
+ }
+ out_dep = ps_me_job_q_prms->i4_num_output_dep;
+ }
+ ps_pre_enc_job_queue_curr->pv_next = (void *)(ps_pre_enc_job_queue_curr + 1);
+
+ ps_pre_enc_job_queue_curr->i4_pre_enc_task_type = pre_enc_task_type;
+ ps_pre_enc_job_queue_curr->i4_task_type = enc_task_type;
+
+ /* Set the input dependencies */
+ ps_pre_enc_job_queue_curr->i4_num_input_dep = inp_dep;
+
+ /* set the entire input dep buffer to default value 0 */
+ memset(&ps_pre_enc_job_queue_curr->au1_in_dep[0], 0, sizeof(UWORD8) * MAX_IN_DEP);
+
+ /* set the input dep buffer to 1 for num inp dep */
+ if(0 != inp_dep)
+ {
+ memset(&ps_pre_enc_job_queue_curr->au1_in_dep[0], 1, sizeof(UWORD8) * inp_dep);
+ }
+
+ /* If decomposition layer ends at this pass the no of out dependencies
+ * will be based on number of vertical units in the coarsets layer of HME
+ * This is because the search range in coarsest layer will be almost
+ * entire frame (search range of +-128 in vert direction is max supported
+ */
+ if(pass == (DECOMP_JOB_LYR0 + n_tot_layers - 2))
+ {
+ job_off_ipe = 0;
+ }
+ else
+ {
+ if(num_jobs == 0)
+ job_off_ipe = num_jobs;
+
+ else
+ job_off_ipe = num_jobs - 1;
+ }
+
+ /* Set the offsets of output dependencies */
+ ps_pre_enc_job_queue_curr->i4_num_output_dep = out_dep;
+
+ /* set the entire offset buffer to default value */
+ memset(
+ &ps_pre_enc_job_queue_curr->au4_out_ofsts[0], 0xFF, sizeof(UWORD32) * MAX_OUT_DEP);
+
+ for(ctr = 0; ctr < out_dep; ctr++)
+ {
+ /* if IPE or DECOMP loop the dep is 1 to 1*/
+ if(((pass >= DECOMP_JOB_LYR0) && (pass <= DECOMP_JOB_LYR3)) ||
+ (IPE_JOB_LYR0 == pass))
+ {
+ u8_temp = (ULWORD64)(
+ &ps_pre_enc_job_queue_next[job_off_ipe] -
+ ps_ctxt->s_multi_thrd.aps_job_q_pre_enc[i4_ping_pong]);
+
+ u8_temp *= sizeof(job_queue_t);
+
+ /* add the excat inp dep byte for the next layer JOB */
+ u8_temp += ps_pre_enc_job_queue_next[job_off_ipe].i4_num_input_dep;
+
+ /* increment the inp dep number for a given job */
+ ps_pre_enc_job_queue_next[job_off_ipe].i4_num_input_dep++;
+
+ job_off_ipe++;
+ }
+ else if((pass >= ME_JOB_LYR4) && (pass <= ME_JOB_LYR1))
+ {
+ /* ME layer Jobs */
+ WORD32 job_off;
+
+ job_off = ps_me_job_q_prms->ai4_out_dep_unit_off[ctr];
+
+ u8_temp = (ULWORD64)(
+ &ps_pre_enc_job_queue_next[job_off] -
+ ps_ctxt->s_multi_thrd.aps_job_q_pre_enc[i4_ping_pong]);
+
+ u8_temp *= sizeof(job_queue_t);
+
+ /* add the excat inp dep byte for the next layer JOB */
+ u8_temp += ps_pre_enc_job_queue_next[job_off].i4_num_input_dep;
+
+ /* increment the inp dep number for a given job */
+ ps_pre_enc_job_queue_next[job_off].i4_num_input_dep++;
+ }
+ /* store the offset to the array */
+ ps_pre_enc_job_queue_curr->au4_out_ofsts[ctr] = (UWORD32)u8_temp;
+ }
+ /* ME job q params is incremented only for ME jobs */
+ if(((pass >= ME_JOB_LYR4) && (pass <= ME_JOB_LYR1)) || (IPE_JOB_LYR0 == pass))
+ {
+ ps_me_job_q_prms++;
+ }
+ ps_pre_enc_job_queue_curr++;
+ }
+
+ /* set the last pointer to NULL */
+ ps_pre_enc_job_queue_curr--;
+ ps_pre_enc_job_queue_curr->pv_next = (void *)NULL;
+ }
+
+ /* reset the num ctb processed in every row for IPE sync */
+ memset(
+ &ps_ctxt->s_multi_thrd.ai4_ctbs_in_row_proc_ipe_pass[0],
+ 0,
+ (MAX_NUM_CTB_ROWS_FRM * sizeof(WORD32)));
+
+} /* End of ihevce_prepare_pre_enc_job_queue */
diff --git a/encoder/ihevce_multi_thrd_funcs.h b/encoder/ihevce_multi_thrd_funcs.h
new file mode 100644
index 0000000..fdbed48
--- /dev/null
+++ b/encoder/ihevce_multi_thrd_funcs.h
@@ -0,0 +1,85 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_multi_thrd_funcs.h
+*
+* \brief
+* This file contains interface defination of related to Job Ques and others,
+* required for multi threading
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_MULTI_THRD_FUNCS_H_
+#define _IHEVCE_MULTI_THRD_FUNCS_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+void *ihevce_enc_grp_get_next_job(
+ void *pv_multi_thrd_ctxt, WORD32 i4_job_type, WORD32 i4_blocking_mode, WORD32 i4_curr_frm_id);
+
+void *ihevce_pre_enc_grp_get_next_job(
+ void *pv_multi_thrd_ctxt, WORD32 i4_job_type, WORD32 i4_blocking_mode, WORD32 i4_ping_pong);
+
+void ihevce_pre_enc_grp_job_set_out_dep(
+ void *pv_multi_thrd_ctxt, job_queue_t *ps_curr_job, WORD32 i4_ping_pong);
+
+void ihevce_enc_grp_job_set_out_dep(
+ void *pv_multi_thrd_ctxt, job_queue_t *ps_curr_job, WORD32 i4_curr_frm_id);
+
+void ihevce_prepare_job_queue(
+ void *pv_enc_ctxt, ihevce_lap_enc_buf_t *ps_curr_inp, WORD32 i4_curr_frm_id);
+
+void ihevce_prepare_pre_enc_job_queue(
+ void *pv_enc_ctxt, ihevce_lap_enc_buf_t *ps_curr_inp, WORD32 i4_ping_pong);
+
+#endif /* _IHEVCE_MULTI_THRD_FUNCS_H_ */
diff --git a/encoder/ihevce_multi_thrd_structs.h b/encoder/ihevce_multi_thrd_structs.h
new file mode 100644
index 0000000..2396c8b
--- /dev/null
+++ b/encoder/ihevce_multi_thrd_structs.h
@@ -0,0 +1,233 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_multi_thrd_structs.h
+*
+* \brief
+* This file contains structure definations of multi thread based processing
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_MULTI_THRD_STRUCTS_H_
+#define _IHEVCE_MULTI_THRD_STRUCTS_H_
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+/** Maximum number of modules on whose outputs any module's inputs are dependent */
+#define MAX_IN_DEP 80
+
+/** Maximum number of modules whose inputs are dependent on any module's outputs */
+#define MAX_OUT_DEP 80
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+typedef enum
+{
+ ME_JOB_ENC_LYR = 0,
+ ENC_LOOP_JOB,
+ ENC_LOOP_JOB1,
+ ENC_LOOP_JOB2,
+ ENC_LOOP_JOB3,
+ ENC_LOOP_JOB4, //MBR: enc_loop job instance created for each bit-rate.
+ //change instances based on IHEVCE_MAX_NUM_BITRATES
+
+ NUM_ENC_JOBS_QUES,
+
+} HEVCE_ENC_JOB_TYPES_T;
+
+typedef enum
+{
+ DECOMP_JOB_LYR0 = 0,
+ DECOMP_JOB_LYR1,
+ DECOMP_JOB_LYR2,
+ DECOMP_JOB_LYR3,
+ ME_JOB_LYR4,
+ ME_JOB_LYR3,
+ ME_JOB_LYR2,
+ ME_JOB_LYR1,
+ IPE_JOB_LYR0,
+
+ NUM_PRE_ENC_JOBS_QUES,
+
+} HEVCE_PRE_ENC_JOB_TYPES_T;
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief IPE Job parameters structure
+******************************************************************************
+ */
+typedef struct
+{
+ /*!< Index of the CTB Row */
+ WORD32 i4_ctb_row_no;
+
+} ipe_job_node_t;
+
+/**
+******************************************************************************
+ * @brief ME Job parameters structure
+******************************************************************************
+ */
+typedef struct
+{
+ /** Index of the Vertical unit Row */
+ WORD32 i4_vert_unit_row_no;
+ WORD32 i4_tile_col_idx;
+
+} me_job_node_t;
+
+/**
+******************************************************************************
+ * @brief Encode Loop Job parameters structure
+******************************************************************************
+ */
+typedef struct
+{
+ /** Index of the CTB Row */
+ WORD32 i4_ctb_row_no;
+ WORD32 i4_tile_col_idx;
+ WORD32 i4_bitrate_instance_no;
+
+} enc_loop_job_node_t;
+
+/**
+******************************************************************************
+ * @brief Decomposition Job parameters structure
+******************************************************************************
+ */
+typedef struct
+{
+ /** Index of the Vertical unit Row */
+ WORD32 i4_vert_unit_row_no;
+
+} decomp_job_node_t;
+
+/**
+******************************************************************************
+ * @brief Union Job parameters structure
+******************************************************************************
+ */
+typedef union /* Make sure that the size is a multiple of 4 */
+{
+ ipe_job_node_t s_ipe_job_info;
+
+ me_job_node_t s_me_job_info;
+
+ enc_loop_job_node_t s_enc_loop_job_info;
+
+ decomp_job_node_t s_decomp_job_info;
+
+} job_info_t;
+
+/**
+******************************************************************************
+ * @brief Job Queue Element parameters structure
+******************************************************************************
+ */
+typedef struct
+{
+ /** Array of flags indicating the input dependencies of the module.
+ * Flag set to 0 indicates that the input dependency is resolved.
+ * Processing can start only after all the flags are 0.
+ *
+ * This has to be the first element of the array, MAX_IN_DEP has to be multiple of 4
+ */
+ UWORD8 au1_in_dep[MAX_IN_DEP];
+
+ /** Pointer to the next link in the job queue */
+ void *pv_next;
+
+ /** Job information ctxt of the module */
+ job_info_t s_job_info;
+
+ /** Array of offsets for the output dependencies' pointers.
+ * Indicates the location where the dependency flag needs to
+ * be set after the processing of the current NMB/row/slice
+ */
+ UWORD32 au4_out_ofsts[MAX_OUT_DEP];
+
+ /** Number of input dependencies to be checked before starting current task */
+ WORD32 i4_num_input_dep;
+
+ /** Number of output dependencies to be updated after finishing current task */
+ WORD32 i4_num_output_dep;
+
+ /** indicates what type of task is to be executed
+ * [ME_JOB for layer 0,ENC_LOOP_JOB] are valid
+ * -1 will be set if this hob task type is irrelevant
+ */
+ HEVCE_ENC_JOB_TYPES_T i4_task_type;
+
+ /** indicates what type of task is to be executed
+ * [ME_JOB for coarse and refine layers, DECOMP Jobs and IPE JOB] are valid
+ * -1 will be set if this hob task type is irrelevant
+ */
+ HEVCE_PRE_ENC_JOB_TYPES_T i4_pre_enc_task_type;
+
+} job_queue_t;
+
+/**
+******************************************************************************
+ * @brief Job Queue Handle structure
+******************************************************************************
+ */
+typedef struct
+{
+ /** Pointer to the next link in the job queue */
+ void *pv_next;
+
+} job_queue_handle_t;
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+#endif /* _IHEVCE_MULTI_THRD_STRUCTS_H_ */
diff --git a/encoder/ihevce_mv_pred.c b/encoder/ihevce_mv_pred.c
new file mode 100644
index 0000000..5178ce4
--- /dev/null
+++ b/encoder/ihevce_mv_pred.c
@@ -0,0 +1,754 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ihevcd_mv_pred.c
+ *
+ * @brief
+ * Contains functions for motion vector prediction
+ *
+ * @author
+ * Ittiam
+ *
+ * @par List of Functions:
+ * - ihevcd_mvp_spatial_cand()
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_macros.h"
+#include "ihevc_debug.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+#include "ihevc_common_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_hle_interface.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "hme_datatype.h"
+#include "hme_interface.h"
+#include "hme_common_defs.h"
+#include "hme_defs.h"
+#include "ihevce_mv_pred.h"
+#include "ihevce_mv_pred_merge.h"
+#include "ihevce_common_utils.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs Motion Vector prediction and return a list of mv
+ *
+ * @par Description:
+ * MV predictor list is computed using neighbor mvs and colocated mv
+ *
+ * @param[in] ps_ctxt
+ * pointer to mv predictor context
+ *
+ * @param[in] ps_top_nbr_4x4
+ * pointer to top 4x4 nbr structure
+ *
+ * @param[in] ps_left_nbr_4x4
+ * pointer to left 4x4 nbr structure
+ *
+ * @param[in] ps_top_left_nbr_4x4
+ * pointer to top left 4x4 nbr structure
+ *
+ * @param[in] left_nbr_4x4_strd
+ * left nbr buffer stride in terms of 4x4 units
+ *
+ * @param[in] ps_avail_flags
+ * Neighbor availability flags container
+ *
+ * @param[in] ps_col_mv
+ * Colocated MV pointer
+ *
+ * @param[in] ps_pu
+ * Current Partition PU strucrture pointer
+ *
+ * @param[inout] ps_pred_mv
+ * pointer to store predicted MV list
+ *
+ * @returns
+ * None
+ * @remarks
+ *
+ *
+ *******************************************************************************
+ */
+void ihevce_mv_pred(
+ mv_pred_ctxt_t *ps_ctxt,
+ nbr_4x4_t *ps_top_nbr_4x4,
+ nbr_4x4_t *ps_left_nbr_4x4,
+ nbr_4x4_t *ps_top_left_nbr_4x4,
+ WORD32 left_nbr_4x4_strd,
+ nbr_avail_flags_t *ps_avail_flags,
+ pu_mv_t *ps_col_mv,
+ pu_t *ps_pu,
+ pu_mv_t *ps_pred_mv,
+ UWORD8 (*pau1_is_top_used)[MAX_MVP_LIST_CAND])
+{
+ WORD32 is_scaled_flag_list[2] /* Indicates whether A0 or A1 is available */;
+ WORD32 lb_avail, l_avail, t_avail, tr_avail, tl_avail;
+ WORD32 avail_a_flag[2];
+ WORD32 avail_b_flag[2];
+ mv_t as_mv_a[2];
+ mv_t as_mv_b[2];
+ UWORD8 i1_cur_ref_idx_list[2];
+ WORD32 part_pos_x;
+ WORD32 part_pos_y;
+ WORD32 part_wd;
+ WORD32 part_ht;
+
+ /*******************************************/
+ /* Neighbor location: Graphical indication */
+ /* */
+ /* B2 _____________B1 B0 */
+ /* | | */
+ /* | | */
+ /* | | */
+ /* | PU ht| */
+ /* | | */
+ /* | | */
+ /* A1|______wd_______| */
+ /* A0 */
+ /* */
+ /*******************************************/
+
+ /* Initialization */
+ avail_a_flag[0] = 0;
+ avail_a_flag[1] = 0;
+ avail_b_flag[0] = 0;
+ avail_b_flag[1] = 0;
+
+ as_mv_a[0].i2_mvx = 0;
+ as_mv_a[0].i2_mvy = 0;
+ as_mv_a[1].i2_mvx = 0;
+ as_mv_a[1].i2_mvy = 0;
+ as_mv_b[0].i2_mvx = 0;
+ as_mv_b[0].i2_mvy = 0;
+ as_mv_b[1].i2_mvx = 0;
+ as_mv_b[1].i2_mvy = 0;
+
+ lb_avail = ps_avail_flags->u1_bot_lt_avail;
+ l_avail = ps_avail_flags->u1_left_avail;
+ tr_avail = ps_avail_flags->u1_top_rt_avail;
+ t_avail = ps_avail_flags->u1_top_avail;
+ tl_avail = ps_avail_flags->u1_top_lt_avail;
+
+ is_scaled_flag_list[0] = 0;
+ is_scaled_flag_list[1] = 0;
+
+ part_pos_x = ps_pu->b4_pos_x << 2;
+ part_pos_y = ps_pu->b4_pos_y << 2;
+ part_wd = (ps_pu->b4_wd + 1) << 2;
+ part_ht = (ps_pu->b4_ht + 1) << 2;
+
+ /* Initializing current PU reference index */
+ /* if -1 is set then that direction is invalid */
+ i1_cur_ref_idx_list[0] = (-1 == ps_pu->mv.i1_l0_ref_idx) ? 0 : ps_pu->mv.i1_l0_ref_idx;
+ i1_cur_ref_idx_list[1] = (-1 == ps_pu->mv.i1_l1_ref_idx) ? 0 : ps_pu->mv.i1_l1_ref_idx;
+
+ /************************************************************/
+ /* Calculating of motion vector A from neighbors A0 and A1 */
+ /************************************************************/
+ {
+ WORD32 l_x, a;
+ WORD32 *pi4_avail_flag;
+ WORD32 nbr_avail[2]; /*[A0/A1] */
+ WORD8 i1_nbr_ref_idx_list[2][2]; /* [A0/A1][L0/L1] */
+ UWORD8 u1_nbr_intra_flag[2]; /*[A0/A1] */
+ UWORD8 u1_nbr_pred_flag[2][2]; /* [A0/A1][L0/L1] */
+ mv_t *ps_mv;
+ nbr_4x4_t *ps_a0, *ps_a1;
+ mv_t *ps_nbr_mv[2][2]; /* [A0/A1][L0/L1] */
+
+ /* A0 and A1 initializations */
+ ps_mv = &as_mv_a[0];
+ pi4_avail_flag = avail_a_flag;
+
+ /* Pointers to A0 and A1 */
+ {
+ WORD32 y_a0, y_a1;
+ /* TODO: y_a0, y_a1 is coded assuming left nbr pointer starts at PU */
+ y_a0 = (part_ht >> 2);
+ y_a1 = ((part_ht - 1) >> 2);
+
+ ps_a0 = ps_left_nbr_4x4 + (y_a0 * left_nbr_4x4_strd);
+ ps_a1 = ps_left_nbr_4x4 + (y_a1 * left_nbr_4x4_strd);
+ }
+
+ nbr_avail[0] = lb_avail && (!ps_a0->b1_intra_flag);
+ nbr_avail[1] = l_avail && (!ps_a1->b1_intra_flag);
+
+ /* Setting is scaled flag based on availability of A0 and A1 */
+ if((nbr_avail[0] == 1) || (nbr_avail[1]))
+ {
+ is_scaled_flag_list[0] = 1;
+ is_scaled_flag_list[1] = 1;
+ }
+
+ /* Initializing A0 variables */
+ ps_nbr_mv[0][0] = &ps_a0->mv.s_l0_mv;
+ ps_nbr_mv[0][1] = &ps_a0->mv.s_l1_mv;
+
+ i1_nbr_ref_idx_list[0][0] = ps_a0->mv.i1_l0_ref_idx;
+ i1_nbr_ref_idx_list[0][1] = ps_a0->mv.i1_l1_ref_idx;
+
+ u1_nbr_pred_flag[0][0] = (UWORD8)ps_a0->b1_pred_l0_flag;
+ u1_nbr_pred_flag[0][1] = (UWORD8)ps_a0->b1_pred_l1_flag;
+
+ u1_nbr_intra_flag[0] = (UWORD8)ps_a0->b1_intra_flag;
+
+ /* Initializing A1 variables */
+ ps_nbr_mv[1][0] = &ps_a1->mv.s_l0_mv;
+ ps_nbr_mv[1][1] = &ps_a1->mv.s_l1_mv;
+
+ i1_nbr_ref_idx_list[1][0] = ps_a1->mv.i1_l0_ref_idx;
+ i1_nbr_ref_idx_list[1][1] = ps_a1->mv.i1_l1_ref_idx;
+
+ u1_nbr_pred_flag[1][0] = (UWORD8)ps_a1->b1_pred_l0_flag;
+ u1_nbr_pred_flag[1][1] = (UWORD8)ps_a1->b1_pred_l1_flag;
+
+ u1_nbr_intra_flag[1] = (UWORD8)ps_a1->b1_intra_flag;
+
+ /* Derivation of mvL0A and mvL1A from A0 and A1 */
+ for(l_x = 0; l_x < 2; l_x++) /* list 0 and list 1 */
+ {
+ WORD32 l_y;
+
+ l_y = !l_x; /* if i=0, y = L1 else y = L0 */
+
+ for(a = 0; a < 2; a++)
+ {
+ /* MODE_INTRA check has been taken care in availability check */
+ if((nbr_avail[a] == 1) && (pi4_avail_flag[l_x] == 0))
+ {
+ if(u1_nbr_pred_flag[a][l_x] == 1)
+ {
+ WORD32 nbr_ref_poc, cur_ref_poc;
+ WORD8 i1_cur_ref_idx, i1_nbr_ref_idx;
+
+ i1_cur_ref_idx = i1_cur_ref_idx_list[l_x];
+ cur_ref_poc = ps_ctxt->ps_ref_list[l_x][i1_cur_ref_idx]->i4_poc;
+ i1_nbr_ref_idx = i1_nbr_ref_idx_list[a][l_x];
+ nbr_ref_poc = ps_ctxt->ps_ref_list[l_x][i1_nbr_ref_idx]->i4_poc;
+
+ if(nbr_ref_poc == cur_ref_poc)
+ {
+ pi4_avail_flag[l_x] = 1;
+ ps_mv[l_x] = *ps_nbr_mv[a][l_x];
+ break;
+ }
+ }
+ if(u1_nbr_pred_flag[a][l_y] == 1)
+ {
+ WORD32 nbr_ref_poc, cur_ref_poc;
+ WORD8 i1_nbr_ref_idx, i1_cur_ref_idx;
+
+ i1_cur_ref_idx = i1_cur_ref_idx_list[l_x];
+ cur_ref_poc = ps_ctxt->ps_ref_list[l_x][i1_cur_ref_idx]->i4_poc;
+
+ i1_nbr_ref_idx = i1_nbr_ref_idx_list[a][l_y];
+ nbr_ref_poc = ps_ctxt->ps_ref_list[l_y][i1_nbr_ref_idx]->i4_poc;
+ if(nbr_ref_poc == cur_ref_poc)
+ {
+ pi4_avail_flag[l_x] = 1;
+ ps_mv[l_x] = *ps_nbr_mv[a][l_y];
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ for(l_x = 0; l_x < 2; l_x++) /* list 0 and list 1 */
+ {
+ if(pi4_avail_flag[l_x] == 0)
+ {
+ WORD8 i1_nbr_ref_list_idx, i1_nbr_ref_idx;
+ WORD32 l_y;
+
+ l_y = !l_x; /* if i=0, y = L1 else y = L0 */
+
+ for(a = 0; a < 2; a++)
+ {
+ /* MODE_INTRA check has been taken care in availability check */
+ if((nbr_avail[a] == 1) && (pi4_avail_flag[l_x] == 0))
+ {
+ /* Long term reference check Removed */
+ if(u1_nbr_pred_flag[a][l_x] == 1)
+ {
+ pi4_avail_flag[l_x] = 1;
+ ps_mv[l_x] = *ps_nbr_mv[a][l_x];
+ i1_nbr_ref_idx = i1_nbr_ref_idx_list[a][l_x];
+ i1_nbr_ref_list_idx = l_x;
+ break;
+ }
+ /* Long term reference check Removed */
+ else if(u1_nbr_pred_flag[a][l_y] == 1)
+ {
+ pi4_avail_flag[l_x] = 1;
+ ps_mv[l_x] = *ps_nbr_mv[a][l_y];
+ i1_nbr_ref_idx = i1_nbr_ref_idx_list[a][l_y];
+ i1_nbr_ref_list_idx = l_y;
+ break;
+ }
+ }
+ }
+
+ /* Long term reference check Removed */
+ if(pi4_avail_flag[l_x] == 1)
+ {
+ WORD8 i1_cur_ref_idx;
+ WORD32 cur_ref_poc, nbr_ref_poc;
+ WORD32 cur_poc;
+
+ i1_cur_ref_idx = i1_cur_ref_idx_list[l_x];
+ cur_ref_poc = ps_ctxt->ps_ref_list[l_x][i1_cur_ref_idx]->i4_poc;
+
+ nbr_ref_poc = ps_ctxt->ps_ref_list[i1_nbr_ref_list_idx][i1_nbr_ref_idx]->i4_poc;
+
+ cur_poc = ps_ctxt->ps_slice_hdr->i4_abs_pic_order_cnt;
+
+ ihevce_scale_mv(&ps_mv[l_x], cur_ref_poc, nbr_ref_poc, cur_poc);
+ }
+ }
+ }
+ }
+
+ /************************************************************/
+ /* Calculating of motion vector B from neighbors B0 and B1 */
+ /************************************************************/
+ {
+ WORD32 l_x, b;
+ WORD32 *pi4_avail_flag;
+ WORD32 nbr_avail[3]; /* [B0/B1/B2] */
+ WORD8 i1_nbr_ref_idx_list[3][2]; /* [B0/B1/B2][L0/L1] */
+ UWORD8 u1_nbr_intra_flag[3]; /*[B0/B1/B2] */
+ UWORD8 u1_nbr_pred_flag[3][2]; /* [B0/B1/B2][L0/L1] */
+ mv_t *ps_mv;
+ nbr_4x4_t *ps_b0, *ps_b1, *ps_b2;
+ mv_t *ps_nbr_mv[3][2]; /* [B0/B1/B2][L0/L1] */
+
+ /* B0, B1 and B2 initializations */
+ ps_mv = &as_mv_b[0];
+ pi4_avail_flag = avail_b_flag;
+
+ /* Pointers to B0, B1 and B2 */
+ {
+ WORD32 x_b0, x_b1, x_b2;
+
+ /* Relative co-ordiante of Xp,Yp w.r.t CTB start will work */
+ /* as long as minCTB = 16 */
+ x_b0 = (part_pos_x + part_wd);
+ x_b1 = (part_pos_x + part_wd - 1);
+ x_b2 = (part_pos_x - 1);
+
+ /* Getting offset back to given pointer */
+ x_b0 = x_b0 - part_pos_x;
+ x_b1 = x_b1 - part_pos_x;
+ x_b2 = x_b2 - part_pos_x;
+
+ /* Below derivation are based on top pointer */
+ /* is pointing first pixel of PU */
+ ps_b0 = ps_top_nbr_4x4 + (x_b0 >> 2);
+ ps_b1 = ps_top_nbr_4x4 + (x_b1 >> 2);
+
+ /* At CTB boundary, use top-left passed in */
+ if(part_pos_y)
+ {
+ ps_b2 = ps_top_left_nbr_4x4;
+ }
+ else
+ {
+ /* Not at CTB boundary, use top and */
+ /* add correction to go to top-left */
+ ps_b2 = (ps_top_nbr_4x4) + (x_b2 >> 2);
+ }
+ }
+ nbr_avail[0] = tr_avail && (!ps_b0->b1_intra_flag);
+ nbr_avail[1] = t_avail && (!ps_b1->b1_intra_flag);
+ nbr_avail[2] = tl_avail && (!ps_b2->b1_intra_flag);
+
+ /* Initializing B0 related variables */
+ ps_nbr_mv[0][0] = &ps_b0->mv.s_l0_mv;
+ ps_nbr_mv[0][1] = &ps_b0->mv.s_l1_mv;
+
+ i1_nbr_ref_idx_list[0][0] = ps_b0->mv.i1_l0_ref_idx;
+ i1_nbr_ref_idx_list[0][1] = ps_b0->mv.i1_l1_ref_idx;
+
+ u1_nbr_pred_flag[0][0] = (UWORD8)ps_b0->b1_pred_l0_flag;
+ u1_nbr_pred_flag[0][1] = (UWORD8)ps_b0->b1_pred_l1_flag;
+
+ u1_nbr_intra_flag[0] = (UWORD8)ps_b0->b1_intra_flag;
+
+ /* Initializing B1 related variables */
+ ps_nbr_mv[1][0] = &ps_b1->mv.s_l0_mv;
+ ps_nbr_mv[1][1] = &ps_b1->mv.s_l1_mv;
+
+ i1_nbr_ref_idx_list[1][0] = ps_b1->mv.i1_l0_ref_idx;
+ i1_nbr_ref_idx_list[1][1] = ps_b1->mv.i1_l1_ref_idx;
+
+ u1_nbr_pred_flag[1][0] = (UWORD8)ps_b1->b1_pred_l0_flag;
+ u1_nbr_pred_flag[1][1] = (UWORD8)ps_b1->b1_pred_l1_flag;
+
+ u1_nbr_intra_flag[1] = (UWORD8)ps_b1->b1_intra_flag;
+
+ /* Initializing B2 related variables */
+ ps_nbr_mv[2][0] = &ps_b2->mv.s_l0_mv;
+ ps_nbr_mv[2][1] = &ps_b2->mv.s_l1_mv;
+
+ i1_nbr_ref_idx_list[2][0] = ps_b2->mv.i1_l0_ref_idx;
+ i1_nbr_ref_idx_list[2][1] = ps_b2->mv.i1_l1_ref_idx;
+
+ u1_nbr_pred_flag[2][0] = (UWORD8)ps_b2->b1_pred_l0_flag;
+ u1_nbr_pred_flag[2][1] = (UWORD8)ps_b2->b1_pred_l1_flag;
+
+ u1_nbr_intra_flag[2] = (UWORD8)ps_b2->b1_intra_flag;
+
+ /* Derivation of mvL0B and mvL1B from B0,B1 and B2 */
+ for(l_x = 0; l_x < 2; l_x++) /* list 0 and list 1 */
+ {
+ WORD32 l_y;
+
+ l_y = !l_x; /* if i=0, y = L1 else y = L0 */
+
+ for(b = 0; b < 3; b++)
+ {
+ if((nbr_avail[b] == 1) && (pi4_avail_flag[l_x] == 0))
+ {
+ if(u1_nbr_pred_flag[b][l_x] == 1)
+ {
+ WORD32 nbr_ref_poc, cur_ref_poc;
+ WORD8 i1_cur_ref_idx, i1_nbr_ref_idx;
+
+ i1_cur_ref_idx = i1_cur_ref_idx_list[l_x];
+ cur_ref_poc = ps_ctxt->ps_ref_list[l_x][i1_cur_ref_idx]->i4_poc;
+ i1_nbr_ref_idx = i1_nbr_ref_idx_list[b][l_x];
+ nbr_ref_poc = ps_ctxt->ps_ref_list[l_x][i1_nbr_ref_idx]->i4_poc;
+
+ if(nbr_ref_poc == cur_ref_poc)
+ {
+ pi4_avail_flag[l_x] = 1;
+ ps_mv[l_x] = *ps_nbr_mv[b][l_x];
+ break;
+ }
+ }
+ if(u1_nbr_pred_flag[b][l_y] == 1)
+ {
+ WORD32 nbr_ref_poc, cur_ref_poc;
+ WORD8 i1_nbr_ref_idx, i1_cur_ref_idx;
+
+ i1_cur_ref_idx = i1_cur_ref_idx_list[l_x];
+ cur_ref_poc = ps_ctxt->ps_ref_list[l_x][i1_cur_ref_idx]->i4_poc;
+
+ i1_nbr_ref_idx = i1_nbr_ref_idx_list[b][l_y];
+ nbr_ref_poc = ps_ctxt->ps_ref_list[l_y][i1_nbr_ref_idx]->i4_poc;
+
+ if(nbr_ref_poc == cur_ref_poc)
+ {
+ pi4_avail_flag[l_x] = 1;
+ ps_mv[l_x] = *ps_nbr_mv[b][l_y];
+ break;
+ }
+ }
+ }
+ }
+ }
+
+ if((is_scaled_flag_list[0] == 0) && (avail_b_flag[0] == 1))
+ {
+ avail_a_flag[0] = 1;
+ as_mv_a[0] = as_mv_b[0];
+ }
+ if((is_scaled_flag_list[1] == 0) && (avail_b_flag[1] == 1))
+ {
+ avail_a_flag[1] = 1;
+ as_mv_a[1] = as_mv_b[1];
+ }
+
+ for(l_x = 0; l_x < 2; l_x++) /* list 0 and list 1 */
+ {
+ if(is_scaled_flag_list[l_x] == 0)
+ {
+ /* If isScaledFlagLX == 0, availFlagLXB flag is set to 0 */
+ pi4_avail_flag[l_x] = 0;
+ {
+ WORD8 i1_nbr_ref_list_idx, i1_nbr_ref_idx;
+ WORD32 l_y;
+
+ l_y = !l_x; /* if i=0, y = L1 else y = L0 */
+
+ for(b = 0; b < 3; b++)
+ {
+ if((nbr_avail[b] == 1) && (pi4_avail_flag[l_x] == 0))
+ {
+ /* Long term reference check Removed */
+ if(u1_nbr_pred_flag[b][l_x] == 1)
+ {
+ pi4_avail_flag[l_x] = 1;
+ ps_mv[l_x] = *ps_nbr_mv[b][l_x];
+ i1_nbr_ref_idx = i1_nbr_ref_idx_list[b][l_x];
+ i1_nbr_ref_list_idx = l_x;
+ break;
+ }
+ /* Long term reference check Removed */
+ else if(u1_nbr_pred_flag[b][l_y] == 1)
+ {
+ pi4_avail_flag[l_x] = 1;
+ ps_mv[l_x] = *ps_nbr_mv[b][l_y];
+ i1_nbr_ref_idx = i1_nbr_ref_idx_list[b][l_y];
+ i1_nbr_ref_list_idx = l_y;
+ break;
+ }
+ }
+ }
+ /* Long term reference check Removed */
+ if(pi4_avail_flag[l_x] == 1)
+ {
+ WORD8 i1_cur_ref_idx;
+ WORD32 cur_ref_poc, nbr_ref_poc;
+ WORD32 cur_poc;
+
+ i1_cur_ref_idx = i1_cur_ref_idx_list[l_x];
+ cur_ref_poc = ps_ctxt->ps_ref_list[l_x][i1_cur_ref_idx]->i4_poc;
+
+ nbr_ref_poc =
+ ps_ctxt->ps_ref_list[i1_nbr_ref_list_idx][i1_nbr_ref_idx]->i4_poc;
+
+ cur_poc = ps_ctxt->ps_slice_hdr->i4_abs_pic_order_cnt;
+
+ if(cur_ref_poc != nbr_ref_poc)
+ ihevce_scale_mv(&ps_mv[l_x], cur_ref_poc, nbr_ref_poc, cur_poc);
+ }
+ }
+ }
+ }
+ }
+
+ /* Candidate list */
+ {
+ mv_t as_mvp_list_l0[MAX_MVP_LIST_CAND_MEM]; /*[Cand0/Cand1/Cand2] */
+ mv_t as_mvp_list_l1[MAX_MVP_LIST_CAND_MEM]; /*[Cand0/Cand1/Cand2] */
+ UWORD8 au1_is_top_used_l0[MAX_MVP_LIST_CAND_MEM];
+ UWORD8 au1_is_top_used_l1[MAX_MVP_LIST_CAND_MEM];
+ WORD32 num_mvp_cand_l0;
+ WORD32 num_mvp_cand_l1;
+
+ /* L0 candidate list*/
+ num_mvp_cand_l0 = 0;
+
+ if(avail_a_flag[0] == 1)
+ {
+ as_mvp_list_l0[num_mvp_cand_l0] = as_mv_a[0];
+ au1_is_top_used_l0[num_mvp_cand_l0] = (is_scaled_flag_list[0] == 0);
+ num_mvp_cand_l0++;
+ }
+ if(avail_b_flag[0] == 1)
+ {
+ if(((as_mv_a[0].i2_mvx != as_mv_b[0].i2_mvx) ||
+ (as_mv_a[0].i2_mvy != as_mv_b[0].i2_mvy)) ||
+ (0 == num_mvp_cand_l0))
+ {
+ as_mvp_list_l0[num_mvp_cand_l0] = as_mv_b[0];
+ au1_is_top_used_l0[num_mvp_cand_l0] = 1;
+ num_mvp_cand_l0++;
+ }
+ }
+
+ /* L1 candidate list*/
+ num_mvp_cand_l1 = 0;
+
+ if(avail_a_flag[1] == 1)
+ {
+ as_mvp_list_l1[num_mvp_cand_l1] = as_mv_a[1];
+ au1_is_top_used_l1[num_mvp_cand_l1] = (is_scaled_flag_list[1] == 0);
+ num_mvp_cand_l1++;
+ }
+ if(avail_b_flag[1] == 1)
+ {
+ if(((as_mv_a[1].i2_mvx != as_mv_b[1].i2_mvx) ||
+ (as_mv_a[1].i2_mvy != as_mv_b[1].i2_mvy)) ||
+ (0 == num_mvp_cand_l1))
+ {
+ as_mvp_list_l1[num_mvp_cand_l1] = as_mv_b[1];
+ au1_is_top_used_l1[num_mvp_cand_l1] = 1;
+ num_mvp_cand_l1++;
+ }
+ }
+
+ /***********************************************************/
+ /* Collocated MV prediction */
+ /***********************************************************/
+ if((MAX_MVP_LIST_CAND > num_mvp_cand_l0) || (MAX_MVP_LIST_CAND > num_mvp_cand_l1))
+ {
+ mv_t as_mv_col[2], s_mv_col_l0, s_mv_col_l1;
+ WORD32 avail_col_flag[2] = { 0 };
+ WORD32 x_col, y_col, avail_col_l0, avail_col_l1;
+
+ x_col = part_pos_x + part_wd;
+ y_col = part_pos_y + part_ht;
+ ihevce_collocated_mvp(ps_ctxt, ps_pu, as_mv_col, avail_col_flag, 1, x_col, y_col);
+
+ avail_col_l0 = avail_col_flag[0];
+ avail_col_l1 = avail_col_flag[1];
+ if(avail_col_l0 || avail_col_l1)
+ {
+ s_mv_col_l0 = as_mv_col[0];
+ s_mv_col_l1 = as_mv_col[1];
+ }
+
+ if(avail_col_l0 == 0 || avail_col_l1 == 0)
+ {
+ /* Checking Collocated MV availability at Center of PU */
+ x_col = part_pos_x + (part_wd >> 1);
+ y_col = part_pos_y + (part_ht >> 1);
+ ihevce_collocated_mvp(ps_ctxt, ps_pu, as_mv_col, avail_col_flag, 1, x_col, y_col);
+
+ if(avail_col_l0 == 0)
+ {
+ s_mv_col_l0 = as_mv_col[0];
+ }
+ if(avail_col_l1 == 0)
+ {
+ s_mv_col_l1 = as_mv_col[1];
+ }
+
+ avail_col_l0 |= avail_col_flag[0];
+ avail_col_l1 |= avail_col_flag[1];
+ }
+
+ /* Checking if mvp index matches collocated mv */
+ if(avail_col_l0)
+ {
+ if(MAX_MVP_LIST_CAND > num_mvp_cand_l0)
+ {
+ as_mvp_list_l0[num_mvp_cand_l0] = s_mv_col_l0;
+ au1_is_top_used_l0[num_mvp_cand_l0] = 0;
+ num_mvp_cand_l0++;
+ }
+ }
+ if(avail_col_l1)
+ {
+ if(MAX_MVP_LIST_CAND > num_mvp_cand_l1)
+ {
+ as_mvp_list_l1[num_mvp_cand_l1] = s_mv_col_l1;
+ au1_is_top_used_l1[num_mvp_cand_l1] = 0;
+ num_mvp_cand_l1++;
+ }
+ }
+ }
+
+ /* Adding zero if mv candidates are less than 2 */
+ while(num_mvp_cand_l0 < MAX_MVP_LIST_CAND)
+ {
+ as_mvp_list_l0[num_mvp_cand_l0].i2_mvx = 0;
+ as_mvp_list_l0[num_mvp_cand_l0].i2_mvy = 0;
+ au1_is_top_used_l0[num_mvp_cand_l0] = 0;
+ num_mvp_cand_l0++;
+ };
+ while(num_mvp_cand_l1 < MAX_MVP_LIST_CAND)
+ {
+ as_mvp_list_l1[num_mvp_cand_l1].i2_mvx = 0;
+ as_mvp_list_l1[num_mvp_cand_l1].i2_mvy = 0;
+ au1_is_top_used_l1[num_mvp_cand_l1] = 0;
+ num_mvp_cand_l1++;
+ };
+ /* Removing mvs if candidates are greater than 2 */
+ if(num_mvp_cand_l0 > MAX_MVP_LIST_CAND)
+ {
+ num_mvp_cand_l0 = MAX_MVP_LIST_CAND;
+ };
+ if(num_mvp_cand_l1 > MAX_MVP_LIST_CAND)
+ {
+ num_mvp_cand_l1 = MAX_MVP_LIST_CAND;
+ };
+
+ /* Copying list to output */
+ {
+ WORD32 i;
+ for(i = 0; i < num_mvp_cand_l0; i++)
+ {
+ ps_pred_mv[i].s_l0_mv = as_mvp_list_l0[i];
+ pau1_is_top_used[0][i] = au1_is_top_used_l0[i];
+ }
+
+ for(i = 0; i < num_mvp_cand_l1; i++)
+ {
+ ps_pred_mv[i].s_l1_mv = as_mvp_list_l1[i];
+ pau1_is_top_used[1][i] = au1_is_top_used_l1[i];
+ }
+ }
+ }
+}
diff --git a/encoder/ihevce_mv_pred.h b/encoder/ihevce_mv_pred.h
new file mode 100644
index 0000000..896eb23
--- /dev/null
+++ b/encoder/ihevce_mv_pred.h
@@ -0,0 +1,82 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_mv_pred.h
+*
+* \brief
+* This file contains function prototypes of MV predcition function
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_MV_PRED_H_
+#define _IHEVCE_MV_PRED_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+#define MAX_MVP_LIST_CAND 2
+#define MAX_MVP_LIST_CAND_MEM (MAX_MVP_LIST_CAND + 1)
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+void ihevce_mv_pred(
+ mv_pred_ctxt_t *ps_ctxt,
+ nbr_4x4_t *ps_top_nbr_4x4,
+ nbr_4x4_t *ps_left_nbr_4x4,
+ nbr_4x4_t *ps_top_left_nbr_4x4,
+ WORD32 left_nbr_4x4_strd,
+ nbr_avail_flags_t *ps_avail_flags,
+ pu_mv_t *ps_col_mv,
+ pu_t *ps_pu,
+ pu_mv_t *ps_pred_mv,
+ UWORD8 (*pu1_is_top_used)[MAX_MVP_LIST_CAND]);
+
+#endif /* _IHEVCE_MV_PRED_H_ */
diff --git a/encoder/ihevce_mv_pred_merge.c b/encoder/ihevce_mv_pred_merge.c
new file mode 100644
index 0000000..b7f5a17
--- /dev/null
+++ b/encoder/ihevce_mv_pred_merge.c
@@ -0,0 +1,1023 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ *******************************************************************************
+ * @file
+ * ihevcd_mv_pred_merge.c
+ *
+ * @brief
+ * Contains functions for motion vector merge candidates derivation
+ *
+ * @author
+ * Ittiam
+ *
+ * @par List of Functions:
+ * - ihevce_compare_pu_mv_t()
+ * - ihevce_mv_pred_merge()
+ *
+ * @remarks
+ * None
+ *
+ *******************************************************************************
+ */
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_macros.h"
+#include "ihevc_debug.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+#include "ihevc_common_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_hle_interface.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "hme_datatype.h"
+#include "hme_interface.h"
+#include "hme_common_defs.h"
+#include "hme_defs.h"
+#include "ihevce_mv_pred.h"
+#include "ihevce_mv_pred_merge.h"
+#include "ihevce_common_utils.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+ *******************************************************************************
+ *
+ * @brief Function scaling temporal motion vector
+ *
+ *
+ * @par Description:
+ * Scales mv based on difference between current POC and current
+ * reference POC and neighbour reference poc
+ *
+ * @param[inout] mv
+ * motion vector to be scaled
+ *
+ * @param[in] cur_ref_poc
+ * Current PU refernce pic poc
+ *
+ * @param[in] nbr_ref_poc
+ * Neighbor PU reference pic poc
+ *
+ * @param[in] cur_poc
+ * Picture order count of current pic
+ *
+ * @returns
+ * None
+ *
+ * @remarks
+ *
+ *******************************************************************************
+ */
+void ihevce_scale_collocated_mv(
+ mv_t *ps_mv, WORD32 cur_ref_poc, WORD32 col_ref_poc, WORD32 col_poc, WORD32 cur_poc)
+{
+ WORD32 td, tb, tx;
+ WORD32 dist_scale_factor;
+ WORD32 mvx, mvy;
+
+ td = CLIP_S8(col_poc - col_ref_poc);
+ tb = CLIP_S8(cur_poc - cur_ref_poc);
+
+ tx = (16384 + (abs(td) >> 1)) / td;
+
+ dist_scale_factor = (tb * tx + 32) >> 6;
+ dist_scale_factor = CLIP3(dist_scale_factor, -4096, 4095);
+
+ mvx = ps_mv->i2_mvx;
+ mvy = ps_mv->i2_mvy;
+
+ mvx = SIGN(dist_scale_factor * mvx) * ((abs(dist_scale_factor * mvx) + 127) >> 8);
+ mvy = SIGN(dist_scale_factor * mvy) * ((abs(dist_scale_factor * mvy) + 127) >> 8);
+
+ ps_mv->i2_mvx = CLIP_S16(mvx);
+ ps_mv->i2_mvy = CLIP_S16(mvy);
+
+} /* End of ihevce_scale_collocated_mv */
+
+void ihevce_collocated_mvp(
+ mv_pred_ctxt_t *ps_mv_ctxt,
+ pu_t *ps_pu,
+ mv_t *ps_mv_col,
+ WORD32 *pu4_avail_col_flag,
+ WORD32 use_pu_ref_idx,
+ WORD32 x_col,
+ WORD32 y_col)
+{
+ sps_t *ps_sps = ps_mv_ctxt->ps_sps;
+ slice_header_t *ps_slice_hdr = ps_mv_ctxt->ps_slice_hdr;
+ recon_pic_buf_t *ps_col_ref_buf;
+ WORD32 xp_col, yp_col; //In pixel unit
+ WORD32 col_ctb_x, col_ctb_y; //In CTB unit
+ mv_t as_mv_col[2];
+ WORD32 log2_ctb_size;
+ WORD32 ctb_size;
+ WORD32 avail_col;
+ WORD32 col_ctb_idx, pu_cnt;
+ WORD32 au4_list_col[2];
+ WORD32 num_minpu_in_ctb;
+ UWORD8 *pu1_pic_pu_map_ctb;
+ pu_col_mv_t *ps_col_mv;
+ WORD32 part_pos_y;
+
+ part_pos_y = ps_pu->b4_pos_y << 2;
+
+ log2_ctb_size = ps_sps->i1_log2_ctb_size;
+ ctb_size = (1 << log2_ctb_size);
+
+ avail_col = 1;
+
+ /* Initializing reference list */
+ if((ps_slice_hdr->i1_slice_type == BSLICE) && (ps_slice_hdr->i1_collocated_from_l0_flag == 0))
+ {
+ /* L1 */
+ ps_col_ref_buf = ps_mv_ctxt->ps_ref_list[1][ps_slice_hdr->i1_collocated_ref_idx];
+ }
+ else
+ {
+ /* L0 */
+ ps_col_ref_buf = ps_mv_ctxt->ps_ref_list[0][ps_slice_hdr->i1_collocated_ref_idx];
+ }
+ num_minpu_in_ctb = (ctb_size / MIN_PU_SIZE) * (ctb_size / MIN_PU_SIZE);
+
+ if(((part_pos_y >> log2_ctb_size) == (y_col >> log2_ctb_size)) &&
+ (((x_col + (ps_mv_ctxt->i4_ctb_x << log2_ctb_size)) < ps_sps->i2_pic_width_in_luma_samples) ||
+ ps_mv_ctxt->ai4_tile_xtra_ctb[2]) &&
+ ((((y_col + (ps_mv_ctxt->i4_ctb_y << log2_ctb_size)) <
+ ps_sps->i2_pic_height_in_luma_samples) ||
+ ps_mv_ctxt->ai4_tile_xtra_ctb[3])))
+ {
+ xp_col = ((x_col >> 4) << 4);
+ yp_col = ((y_col >> 4) << 4);
+ col_ctb_x = ps_mv_ctxt->i4_ctb_x + (xp_col >> log2_ctb_size);
+ col_ctb_y = ps_mv_ctxt->i4_ctb_y + (yp_col >> log2_ctb_size);
+
+ /* pu1_frm_pu_map has (i2_pic_wd_in_ctb + 1) CTBs for stride */
+ col_ctb_idx = col_ctb_x + (col_ctb_y) * (ps_sps->i2_pic_wd_in_ctb + 1);
+
+ if(xp_col == ctb_size)
+ xp_col = 0;
+
+ pu1_pic_pu_map_ctb = ps_col_ref_buf->pu1_frm_pu_map + col_ctb_idx * num_minpu_in_ctb;
+
+ pu_cnt = pu1_pic_pu_map_ctb[(yp_col >> 2) * (ctb_size / MIN_PU_SIZE) + (xp_col >> 2)];
+
+ /* ps_frm_col_mv has (i2_pic_wd_in_ctb + 1) CTBs for stride */
+ ps_col_mv = ps_col_ref_buf->ps_frm_col_mv +
+ (col_ctb_y * (ps_sps->i2_pic_wd_in_ctb + 1) + col_ctb_x) * num_minpu_in_ctb +
+ pu_cnt;
+ }
+ else
+ avail_col = 0;
+
+ if((avail_col == 0) || (ps_col_mv->b1_intra_flag == 1) ||
+ (ps_slice_hdr->i1_slice_temporal_mvp_enable_flag == 0))
+ {
+ pu4_avail_col_flag[0] = 0;
+ pu4_avail_col_flag[1] = 0;
+ ps_mv_col[0].i2_mvx = 0;
+ ps_mv_col[0].i2_mvy = 0;
+ ps_mv_col[1].i2_mvx = 0;
+ ps_mv_col[1].i2_mvy = 0;
+ }
+ else
+ {
+ WORD32 au4_ref_idx_col[2];
+ WORD32 pred_flag_l0, pred_flag_l1;
+ pred_flag_l0 = (ps_col_mv->b2_pred_mode != PRED_L1);
+ pred_flag_l1 = (ps_col_mv->b2_pred_mode != PRED_L0);
+
+ if(pred_flag_l0 == 0)
+ {
+ as_mv_col[0] = ps_col_mv->s_l1_mv;
+ au4_ref_idx_col[0] = ps_col_mv->i1_l1_ref_idx;
+ au4_list_col[0] = 1; /* L1 */
+
+ as_mv_col[1] = ps_col_mv->s_l1_mv;
+ au4_ref_idx_col[1] = ps_col_mv->i1_l1_ref_idx;
+ au4_list_col[1] = 1; /* L1 */
+ }
+ else
+ {
+ if(pred_flag_l1 == 0)
+ {
+ as_mv_col[0] = ps_col_mv->s_l0_mv;
+ au4_ref_idx_col[0] = ps_col_mv->i1_l0_ref_idx;
+ au4_list_col[0] = 0; /* L1 */
+
+ as_mv_col[1] = ps_col_mv->s_l0_mv;
+ au4_ref_idx_col[1] = ps_col_mv->i1_l0_ref_idx;
+ au4_list_col[1] = 0; /* L1 */
+ }
+ else
+ {
+ if(1 == ps_slice_hdr->i1_low_delay_flag)
+ {
+ as_mv_col[0] = ps_col_mv->s_l0_mv;
+ au4_ref_idx_col[0] = ps_col_mv->i1_l0_ref_idx;
+ au4_list_col[0] = 0; /* L0 */
+
+ as_mv_col[1] = ps_col_mv->s_l1_mv;
+ au4_ref_idx_col[1] = ps_col_mv->i1_l1_ref_idx;
+ au4_list_col[1] = 1; /* L1 */
+ }
+ else
+ {
+ if(0 == ps_slice_hdr->i1_collocated_from_l0_flag)
+ {
+ as_mv_col[0] = ps_col_mv->s_l0_mv;
+ au4_ref_idx_col[0] = ps_col_mv->i1_l0_ref_idx;
+
+ as_mv_col[1] = ps_col_mv->s_l0_mv;
+ au4_ref_idx_col[1] = ps_col_mv->i1_l0_ref_idx;
+ }
+ else
+ {
+ as_mv_col[0] = ps_col_mv->s_l1_mv;
+ au4_ref_idx_col[0] = ps_col_mv->i1_l1_ref_idx;
+
+ as_mv_col[1] = ps_col_mv->s_l1_mv;
+ au4_ref_idx_col[1] = ps_col_mv->i1_l1_ref_idx;
+ }
+
+ au4_list_col[0] =
+ ps_slice_hdr->i1_collocated_from_l0_flag; /* L"collocated_from_l0_flag" */
+ au4_list_col[1] =
+ ps_slice_hdr->i1_collocated_from_l0_flag; /* L"collocated_from_l0_flag" */
+ }
+ }
+ }
+ avail_col = 1;
+ {
+ WORD32 cur_poc, col_poc, col_ref_poc_l0, cur_ref_poc;
+ WORD32 col_ref_poc_l0_lt, cur_ref_poc_lt;
+ WORD32 ref_idx_l0, ref_idx_l1;
+
+ if(use_pu_ref_idx)
+ {
+ ref_idx_l0 = ps_pu->mv.i1_l0_ref_idx;
+ ref_idx_l1 = ps_pu->mv.i1_l1_ref_idx;
+ }
+ else
+ {
+ ref_idx_l0 = 0;
+ ref_idx_l1 = 0;
+ }
+
+ col_poc = ps_col_ref_buf->i4_poc;
+ cur_poc = ps_slice_hdr->i4_abs_pic_order_cnt;
+
+ if(-1 != ref_idx_l0)
+ {
+ if(au4_list_col[0] == 0)
+ {
+ col_ref_poc_l0 = ps_col_ref_buf->ai4_col_l0_poc[au4_ref_idx_col[0]];
+ col_ref_poc_l0_lt = 0; /* Encoder has only short term references */
+ }
+ else
+ {
+ col_ref_poc_l0 = ps_col_ref_buf->ai4_col_l1_poc[au4_ref_idx_col[0]];
+ col_ref_poc_l0_lt = 0;
+ }
+ /* L0 collocated mv */
+ cur_ref_poc = ps_mv_ctxt->ps_ref_list[0][ref_idx_l0]->i4_poc;
+ cur_ref_poc_lt = 0;
+
+ {
+ pu4_avail_col_flag[0] = 1;
+
+ /*if(cur_ref_poc_lt || ((col_poc - col_ref_poc_l0) == (cur_poc - cur_ref_poc)))*/
+ if((col_poc - col_ref_poc_l0) == (cur_poc - cur_ref_poc))
+ {
+ ps_mv_col[0] = as_mv_col[0];
+ }
+ else
+ {
+ ps_mv_col[0] = as_mv_col[0];
+ if(col_ref_poc_l0 != col_poc)
+ {
+ ihevce_scale_collocated_mv(
+ (mv_t *)(&ps_mv_col[0]),
+ cur_ref_poc,
+ col_ref_poc_l0,
+ col_poc,
+ cur_poc);
+ }
+ }
+ }
+ }
+ else
+ {
+ pu4_avail_col_flag[0] = 0;
+ ps_mv_col[0].i2_mvx = 0;
+ ps_mv_col[0].i2_mvy = 0;
+ }
+ if((BSLICE == ps_slice_hdr->i1_slice_type) && (-1 != ref_idx_l1))
+ {
+ WORD32 col_ref_poc_l1_lt, col_ref_poc_l1;
+
+ if(au4_list_col[1] == 0)
+ {
+ col_ref_poc_l1 = ps_col_ref_buf->ai4_col_l0_poc[au4_ref_idx_col[0]];
+ col_ref_poc_l1_lt = 0;
+ }
+ else
+ {
+ col_ref_poc_l1 = ps_col_ref_buf->ai4_col_l1_poc[au4_ref_idx_col[0]];
+ col_ref_poc_l1_lt = 0;
+ }
+
+ /* L1 collocated mv */
+ cur_ref_poc = ps_mv_ctxt->ps_ref_list[1][ref_idx_l1]->i4_poc;
+ cur_ref_poc_lt = 0;
+
+ {
+ pu4_avail_col_flag[1] = 1;
+
+ /*if(cur_ref_poc_lt || ((col_poc - col_ref_poc_l1) == (cur_poc - cur_ref_poc)))*/
+ if((col_poc - col_ref_poc_l1) == (cur_poc - cur_ref_poc))
+ {
+ ps_mv_col[1] = as_mv_col[1];
+ }
+ else
+ {
+ ps_mv_col[1] = as_mv_col[1];
+ if(col_ref_poc_l1 != col_poc)
+ {
+ ihevce_scale_collocated_mv(
+ (mv_t *)&ps_mv_col[1],
+ cur_ref_poc,
+ col_ref_poc_l1,
+ col_poc,
+ cur_poc);
+ }
+ }
+ }
+ } /* End of if BSLICE */
+ else
+ {
+ pu4_avail_col_flag[1] = 0;
+ }
+ }
+
+ } /* End of collocated MV calculation */
+
+} /* End of ihevce_collocated_mvp */
+
+/**
+ *******************************************************************************
+ *
+ * @brief Compare Motion vectors function
+ *
+ *
+ * @par Description:
+ * Checks if MVs and Reference idx are excatly matching.
+ *
+ * @param[inout] ps_1
+ * motion vector 1 to be compared
+ *
+ * @param[in] ps_2
+ * motion vector 2 to be compared
+ *
+ * @returns
+ * 0 : if not matching 1 : if matching
+ *
+ * @remarks
+ *
+ *******************************************************************************
+ */
+
+/**
+ *******************************************************************************
+ *
+ * @brief
+ * This function performs Motion Vector Merge candidates derivation
+ *
+ * @par Description:
+ * MV merge list is computed using neighbor mvs and colocated mv
+ *
+ * @param[in] ps_ctxt
+ * pointer to mv predictor context
+ *
+ * @param[in] ps_top_nbr_4x4
+ * pointer to top 4x4 nbr structure
+ *
+ * @param[in] ps_left_nbr_4x4
+ * pointer to left 4x4 nbr structure
+ *
+ * @param[in] ps_top_left_nbr_4x4
+ * pointer to top left 4x4 nbr structure
+ *
+ * @param[in] left_nbr_4x4_strd
+ * left nbr buffer stride in terms of 4x4 units
+ *
+ * @param[in] ps_avail_flags
+ * Neighbor availability flags container
+ *
+ * @param[in] ps_col_mv
+ * Colocated MV pointer
+ *
+ * @param[in] ps_pu
+ * Current Partition PU strucrture pointer
+ *
+ * @param[in] part_mode
+ * Partition mode @sa PART_SIZE_E
+ *
+ * @param[in] part_idx
+ * Partition idx of current partition inside CU
+ *
+ * @param[in] single_mcl_flag
+ * Single MCL flag based on 8x8 CU and Parallel merge value
+ *
+ * @param[out] ps_merge_cand_list
+ * pointer to store MV merge candidates list
+ *
+ * @returns
+ * Number of merge candidates
+ * @remarks
+ *
+ *
+ *******************************************************************************
+ */
+WORD32 ihevce_mv_pred_merge(
+ mv_pred_ctxt_t *ps_ctxt,
+ nbr_4x4_t *ps_top_nbr_4x4,
+ nbr_4x4_t *ps_left_nbr_4x4,
+ nbr_4x4_t *ps_top_left_nbr_4x4,
+ WORD32 left_nbr_4x4_strd,
+ nbr_avail_flags_t *ps_avail_flags,
+ pu_mv_t *ps_col_mv,
+ pu_t *ps_pu,
+ PART_SIZE_E part_mode,
+ WORD32 part_idx,
+ WORD32 single_mcl_flag,
+ merge_cand_list_t *ps_merge_cand_list,
+ UWORD8 *pu1_is_top_used)
+{
+ /******************************************************/
+ /* Spatial Merge Candidates */
+ /******************************************************/
+ WORD32 part_pos_x;
+ WORD32 part_pos_y;
+ WORD32 part_wd;
+ WORD32 part_ht;
+ WORD32 slice_type;
+ WORD32 num_ref_idx_l0_active;
+ WORD32 num_ref_idx_l1_active;
+ WORD32 num_merge_cand;
+ WORD32 log2_parallel_merge_level_minus2;
+ WORD32 n;
+ WORD8 i1_spatial_avail_flag_n[MAX_NUM_MV_NBR]; /*[A0/A1/B0/B1/B2]*/
+ WORD32 nbr_x[MAX_NUM_MV_NBR], nbr_y[MAX_NUM_MV_NBR];
+ UWORD8 u1_nbr_avail[MAX_NUM_MV_NBR];
+ WORD32 merge_shift;
+ nbr_4x4_t *ps_nbr_mv[MAX_NUM_MV_NBR];
+
+ /*******************************************/
+ /* Neighbor location: Graphical indication */
+ /* */
+ /* B2 _____________B1 B0 */
+ /* | | */
+ /* | | */
+ /* | | */
+ /* | PU ht| */
+ /* | | */
+ /* | | */
+ /* A1|______wd_______| */
+ /* A0 */
+ /* */
+ /*******************************************/
+
+ part_pos_x = ps_pu->b4_pos_x << 2;
+ part_pos_y = ps_pu->b4_pos_y << 2;
+ part_ht = (ps_pu->b4_ht + 1) << 2;
+ part_wd = (ps_pu->b4_wd + 1) << 2;
+
+ slice_type = ps_ctxt->ps_slice_hdr->i1_slice_type;
+ num_ref_idx_l0_active = ps_ctxt->ps_slice_hdr->i1_num_ref_idx_l0_active;
+ num_ref_idx_l1_active = ps_ctxt->ps_slice_hdr->i1_num_ref_idx_l1_active;
+ log2_parallel_merge_level_minus2 = ps_ctxt->i4_log2_parallel_merge_level_minus2;
+
+ /* Assigning co-ordinates to neighbors */
+ nbr_x[NBR_A0] = part_pos_x - 1;
+ nbr_y[NBR_A0] = part_pos_y + part_ht; /* A0 */
+
+ nbr_x[NBR_A1] = part_pos_x - 1;
+ nbr_y[NBR_A1] = part_pos_y + part_ht - 1; /* A1 */
+
+ nbr_x[NBR_B0] = part_pos_x + part_wd;
+ nbr_y[NBR_B0] = part_pos_y - 1; /* B0 */
+
+ nbr_x[NBR_B1] = part_pos_x + part_wd - 1;
+ nbr_y[NBR_B1] = part_pos_y - 1; /* B1 */
+
+ nbr_x[NBR_B2] = part_pos_x - 1;
+ nbr_y[NBR_B2] = part_pos_y - 1; /* B2 */
+
+ /* Assigning mv's */
+ ps_nbr_mv[NBR_A0] = ps_left_nbr_4x4 + ((nbr_y[NBR_A0] - part_pos_y) >> 2) * left_nbr_4x4_strd;
+ ps_nbr_mv[NBR_A1] = ps_left_nbr_4x4 + ((nbr_y[NBR_A1] - part_pos_y) >> 2) * left_nbr_4x4_strd;
+ ps_nbr_mv[NBR_B0] = ps_top_nbr_4x4 + ((nbr_x[NBR_B0] - part_pos_x) >> 2);
+ ps_nbr_mv[NBR_B1] = ps_top_nbr_4x4 + ((nbr_x[NBR_B1] - part_pos_x) >> 2);
+
+ if(part_pos_y == 0) /* AT vertical CTB boundary */
+ ps_nbr_mv[NBR_B2] = ps_top_nbr_4x4 + ((nbr_x[NBR_B2] - part_pos_x) >> 2);
+ else
+ ps_nbr_mv[NBR_B2] = ps_top_left_nbr_4x4;
+
+ /* Assigning nbr availability */
+ u1_nbr_avail[NBR_A0] = ps_avail_flags->u1_bot_lt_avail &&
+ (!ps_nbr_mv[NBR_A0]->b1_intra_flag); /* A0 */
+ u1_nbr_avail[NBR_A1] = ps_avail_flags->u1_left_avail &&
+ (!ps_nbr_mv[NBR_A1]->b1_intra_flag); /* A1 */
+ u1_nbr_avail[NBR_B0] = ps_avail_flags->u1_top_rt_avail &&
+ (!ps_nbr_mv[NBR_B0]->b1_intra_flag); /* B0 */
+ u1_nbr_avail[NBR_B1] = ps_avail_flags->u1_top_avail &&
+ (!ps_nbr_mv[NBR_B1]->b1_intra_flag); /* B1 */
+ u1_nbr_avail[NBR_B2] = ps_avail_flags->u1_top_lt_avail &&
+ (!ps_nbr_mv[NBR_B2]->b1_intra_flag); /* B2 */
+
+ merge_shift = log2_parallel_merge_level_minus2 + 2;
+
+ /* Availability check */
+ /* A1 */
+ {
+ WORD32 avail_flag;
+ avail_flag = 1;
+ n = NBR_A1;
+
+ /* if at same merge level */
+ if((part_pos_x >> merge_shift) == (nbr_x[n] >> merge_shift) &&
+ ((part_pos_y >> merge_shift) == (nbr_y[n] >> merge_shift)))
+ {
+ u1_nbr_avail[n] = 0;
+ }
+
+ /* SPEC JCTVC-K1003_v9 version has a different way using not available */
+ /* candidates compared to software. for non square part and seconf part case */
+ /* ideally nothing from the 1st partition should be used as per spec but */
+ /* HM 8.2 dev verison does not adhere to this. currenlty code fllows HM */
+
+ /* if single MCL is 0 , second part of 2 part in CU */
+ if((single_mcl_flag == 0) && (part_idx == 1) &&
+ ((part_mode == PART_Nx2N) || (part_mode == PART_nLx2N) || (part_mode == PART_nRx2N)))
+ {
+ u1_nbr_avail[n] = 0;
+ }
+
+ if(u1_nbr_avail[n] == 0)
+ {
+ avail_flag = 0;
+ }
+ i1_spatial_avail_flag_n[n] = avail_flag;
+ }
+ /* B1 */
+ {
+ WORD32 avail_flag;
+ avail_flag = 1;
+ n = NBR_B1;
+
+ /* if at same merge level */
+ if((part_pos_x >> merge_shift) == (nbr_x[n] >> merge_shift) &&
+ ((part_pos_y >> merge_shift) == (nbr_y[n] >> merge_shift)))
+ {
+ u1_nbr_avail[n] = 0;
+ }
+
+ /* if single MCL is 0 , second part of 2 part in CU */
+ if((single_mcl_flag == 0) && (part_idx == 1) &&
+ ((part_mode == PART_2NxN) || (part_mode == PART_2NxnU) || (part_mode == PART_2NxnD)))
+ {
+ u1_nbr_avail[n] = 0;
+ }
+
+ if(u1_nbr_avail[n] == 0)
+ {
+ avail_flag = 0;
+ }
+
+ if((avail_flag == 1) && (u1_nbr_avail[NBR_A1] == 1))
+ {
+ /* TODO: Assumption: mvs and ref indicies in both l0 and l1*/
+ /* should match for non availability */
+ WORD32 i4_pred_1, i4_pred_2;
+ i4_pred_1 =
+ (ps_nbr_mv[NBR_A1]->b1_pred_l0_flag | (ps_nbr_mv[NBR_A1]->b1_pred_l1_flag << 1)) -
+ 1;
+ i4_pred_2 = (ps_nbr_mv[n]->b1_pred_l0_flag | (ps_nbr_mv[n]->b1_pred_l1_flag << 1)) - 1;
+ if(ihevce_compare_pu_mv_t(
+ &ps_nbr_mv[NBR_A1]->mv, &ps_nbr_mv[n]->mv, i4_pred_1, i4_pred_2))
+ {
+ avail_flag = 0;
+ }
+ }
+ i1_spatial_avail_flag_n[n] = avail_flag;
+ }
+
+ /* B0 */
+ {
+ WORD32 avail_flag;
+ avail_flag = 1;
+ n = NBR_B0;
+
+ /* if at same merge level */
+ if((part_pos_x >> merge_shift) == (nbr_x[n] >> merge_shift) &&
+ ((part_pos_y >> merge_shift) == (nbr_y[n] >> merge_shift)))
+ {
+ u1_nbr_avail[n] = 0;
+ }
+
+ if(u1_nbr_avail[n] == 0)
+ {
+ avail_flag = 0;
+ }
+
+ if((avail_flag == 1) && (u1_nbr_avail[NBR_B1] == 1))
+ {
+ WORD32 i4_pred_1, i4_pred_2;
+ i4_pred_1 =
+ (ps_nbr_mv[NBR_B1]->b1_pred_l0_flag | (ps_nbr_mv[NBR_B1]->b1_pred_l1_flag << 1)) -
+ 1;
+ i4_pred_2 = (ps_nbr_mv[n]->b1_pred_l0_flag | (ps_nbr_mv[n]->b1_pred_l1_flag << 1)) - 1;
+ if(ihevce_compare_pu_mv_t(
+ &ps_nbr_mv[NBR_B1]->mv, &ps_nbr_mv[n]->mv, i4_pred_1, i4_pred_2))
+ {
+ avail_flag = 0;
+ }
+ }
+ i1_spatial_avail_flag_n[n] = avail_flag;
+ }
+
+ /* A0 */
+ {
+ WORD32 avail_flag;
+ avail_flag = 1;
+ n = NBR_A0;
+
+ /* if at same merge level */
+ if((part_pos_x >> merge_shift) == (nbr_x[n] >> merge_shift) &&
+ ((part_pos_y >> merge_shift) == (nbr_y[n] >> merge_shift)))
+ {
+ u1_nbr_avail[n] = 0;
+ }
+
+ if(u1_nbr_avail[n] == 0)
+ {
+ avail_flag = 0;
+ }
+
+ if((avail_flag == 1) && (u1_nbr_avail[NBR_A1] == 1))
+ {
+ WORD32 i4_pred_1, i4_pred_2;
+ i4_pred_1 =
+ (ps_nbr_mv[NBR_A1]->b1_pred_l0_flag | (ps_nbr_mv[NBR_A1]->b1_pred_l1_flag << 1)) -
+ 1;
+ i4_pred_2 = (ps_nbr_mv[n]->b1_pred_l0_flag | (ps_nbr_mv[n]->b1_pred_l1_flag << 1)) - 1;
+ if(ihevce_compare_pu_mv_t(
+ &ps_nbr_mv[NBR_A1]->mv, &ps_nbr_mv[n]->mv, i4_pred_1, i4_pred_2))
+ {
+ avail_flag = 0;
+ }
+ }
+ i1_spatial_avail_flag_n[n] = avail_flag;
+ }
+ /* B2 */
+ {
+ WORD32 avail_flag;
+ avail_flag = 1;
+ n = NBR_B2;
+
+ /* if at same merge level */
+ if((part_pos_x >> merge_shift) == (nbr_x[n] >> merge_shift) &&
+ ((part_pos_y >> merge_shift) == (nbr_y[n] >> merge_shift)))
+ {
+ u1_nbr_avail[n] = 0;
+ }
+
+ if(u1_nbr_avail[n] == 0)
+ {
+ avail_flag = 0;
+ }
+
+ if((i1_spatial_avail_flag_n[NBR_A0] + i1_spatial_avail_flag_n[NBR_A1] +
+ i1_spatial_avail_flag_n[NBR_B0] + i1_spatial_avail_flag_n[NBR_B1]) == 4)
+ {
+ avail_flag = 0;
+ }
+
+ if(avail_flag == 1)
+ {
+ if(u1_nbr_avail[NBR_A1] == 1)
+ {
+ WORD32 i4_pred_1, i4_pred_2;
+ i4_pred_1 = (ps_nbr_mv[NBR_A1]->b1_pred_l0_flag |
+ (ps_nbr_mv[NBR_A1]->b1_pred_l1_flag << 1)) -
+ 1;
+ i4_pred_2 =
+ (ps_nbr_mv[n]->b1_pred_l0_flag | (ps_nbr_mv[n]->b1_pred_l1_flag << 1)) - 1;
+ if(ihevce_compare_pu_mv_t(
+ &ps_nbr_mv[NBR_A1]->mv, &ps_nbr_mv[n]->mv, i4_pred_1, i4_pred_2))
+ {
+ avail_flag = 0;
+ }
+ }
+ if(u1_nbr_avail[NBR_B1] == 1)
+ {
+ WORD32 i4_pred_1, i4_pred_2;
+ i4_pred_1 = (ps_nbr_mv[NBR_B1]->b1_pred_l0_flag |
+ (ps_nbr_mv[NBR_B1]->b1_pred_l1_flag << 1)) -
+ 1;
+ i4_pred_2 =
+ (ps_nbr_mv[n]->b1_pred_l0_flag | (ps_nbr_mv[n]->b1_pred_l1_flag << 1)) - 1;
+ if(ihevce_compare_pu_mv_t(
+ &ps_nbr_mv[NBR_B1]->mv, &ps_nbr_mv[n]->mv, i4_pred_1, i4_pred_2))
+ {
+ avail_flag = 0;
+ }
+ }
+ }
+ i1_spatial_avail_flag_n[n] = avail_flag;
+ }
+
+ /******************************************************/
+ /* Merge Candidates List */
+ /******************************************************/
+ /* Preparing MV merge candidate list */
+ {
+ WORD32 merge_list_priority[MAX_NUM_MERGE_CAND] = { NBR_A1, NBR_B1, NBR_B0, NBR_A0, NBR_B2 };
+
+ num_merge_cand = 0;
+ for(n = 0; n < MAX_NUM_MERGE_CAND; n++)
+ {
+ WORD32 merge_idx;
+ merge_idx = merge_list_priority[n];
+ if(i1_spatial_avail_flag_n[merge_idx] == 1)
+ {
+ ps_merge_cand_list[num_merge_cand].mv = ps_nbr_mv[merge_idx]->mv;
+ ps_merge_cand_list[num_merge_cand].u1_pred_flag_l0 =
+ (UWORD8)ps_nbr_mv[merge_idx]->b1_pred_l0_flag;
+ ps_merge_cand_list[num_merge_cand].u1_pred_flag_l1 =
+ (UWORD8)ps_nbr_mv[merge_idx]->b1_pred_l1_flag;
+
+ switch(merge_list_priority[n])
+ {
+ case NBR_A1:
+ case NBR_A0:
+ {
+ pu1_is_top_used[num_merge_cand] = 0;
+
+ break;
+ }
+ default:
+ {
+ pu1_is_top_used[num_merge_cand] = 1;
+
+ break;
+ }
+ }
+
+ num_merge_cand++;
+ }
+ }
+
+ /******************************************************/
+ /* Temporal Merge Candidates */
+ /******************************************************/
+ if(num_merge_cand < MAX_NUM_MERGE_CAND)
+ {
+ mv_t as_mv_col[2];
+ WORD32 avail_col_flag[2] = { 0 }, x_col, y_col;
+ WORD32 avail_col_l0, avail_col_l1;
+
+ /* Checking Collocated MV availability at Bottom right of PU*/
+ x_col = part_pos_x + part_wd;
+ y_col = part_pos_y + part_ht;
+ ihevce_collocated_mvp(ps_ctxt, ps_pu, as_mv_col, avail_col_flag, 0, x_col, y_col);
+
+ avail_col_l0 = avail_col_flag[0];
+ avail_col_l1 = avail_col_flag[1];
+
+ if(avail_col_l0 || avail_col_l1)
+ {
+ ps_merge_cand_list[num_merge_cand].mv.s_l0_mv = as_mv_col[0];
+ ps_merge_cand_list[num_merge_cand].mv.s_l1_mv = as_mv_col[1];
+ }
+
+ if(avail_col_l0 == 0 || avail_col_l1 == 0)
+ {
+ /* Checking Collocated MV availability at Center of PU */
+ x_col = part_pos_x + (part_wd >> 1);
+ y_col = part_pos_y + (part_ht >> 1);
+ ihevce_collocated_mvp(ps_ctxt, ps_pu, as_mv_col, avail_col_flag, 0, x_col, y_col);
+
+ if(avail_col_l0 == 0)
+ {
+ ps_merge_cand_list[num_merge_cand].mv.s_l0_mv = as_mv_col[0];
+ }
+ if(avail_col_l1 == 0)
+ {
+ ps_merge_cand_list[num_merge_cand].mv.s_l1_mv = as_mv_col[1];
+ }
+
+ avail_col_l0 |= avail_col_flag[0];
+ avail_col_l1 |= avail_col_flag[1];
+ }
+
+ ps_merge_cand_list[num_merge_cand].mv.i1_l0_ref_idx = 0;
+ ps_merge_cand_list[num_merge_cand].mv.i1_l1_ref_idx = 0;
+ ps_merge_cand_list[num_merge_cand].u1_pred_flag_l0 = avail_col_l0 ? 1 : 0;
+ ps_merge_cand_list[num_merge_cand].u1_pred_flag_l1 = avail_col_l1 ? 1 : 0;
+
+ if(avail_col_l0 || avail_col_l1)
+ {
+ pu1_is_top_used[num_merge_cand] = 0;
+ num_merge_cand++;
+ }
+ }
+
+ /******************************************************/
+ /* Bi pred merge candidates */
+ /******************************************************/
+ if(slice_type == BSLICE)
+ {
+ if((num_merge_cand > 1) && (num_merge_cand < MAX_NUM_MERGE_CAND))
+ {
+ WORD32 priority_list0[12] = { 0, 1, 0, 2, 1, 2, 0, 3, 1, 3, 2, 3 };
+ WORD32 priority_list1[12] = { 1, 0, 2, 0, 2, 1, 3, 0, 3, 1, 3, 2 };
+ WORD32 l0_cand, l1_cand;
+ WORD32 bi_pred_idx = 0;
+ WORD32 total_bi_pred_cand = num_merge_cand * (num_merge_cand - 1);
+
+ while(bi_pred_idx < total_bi_pred_cand)
+ {
+ l0_cand = priority_list0[bi_pred_idx];
+ l1_cand = priority_list1[bi_pred_idx];
+
+ if((ps_merge_cand_list[l0_cand].u1_pred_flag_l0 == 1) &&
+ (ps_merge_cand_list[l1_cand].u1_pred_flag_l1 == 1))
+ {
+ WORD8 i1_l0_ref_idx, i1_l1_ref_idx;
+ WORD32 l0_poc, l1_poc;
+ mv_t s_l0_mv, s_l1_mv;
+
+ i1_l0_ref_idx = ps_merge_cand_list[l0_cand].mv.i1_l0_ref_idx;
+ i1_l1_ref_idx = ps_merge_cand_list[l1_cand].mv.i1_l1_ref_idx;
+ l0_poc = ps_ctxt->ps_ref_list[0][i1_l0_ref_idx]->i4_poc;
+ l1_poc = ps_ctxt->ps_ref_list[1][i1_l1_ref_idx]->i4_poc;
+ s_l0_mv = ps_merge_cand_list[l0_cand].mv.s_l0_mv;
+ s_l1_mv = ps_merge_cand_list[l1_cand].mv.s_l1_mv;
+
+ if((l0_poc != l1_poc) || (s_l0_mv.i2_mvx != s_l1_mv.i2_mvx) ||
+ (s_l0_mv.i2_mvy != s_l1_mv.i2_mvy))
+ {
+ ps_merge_cand_list[num_merge_cand].mv.s_l0_mv = s_l0_mv;
+ ps_merge_cand_list[num_merge_cand].mv.s_l1_mv = s_l1_mv;
+ ps_merge_cand_list[num_merge_cand].mv.i1_l0_ref_idx = i1_l0_ref_idx;
+ ps_merge_cand_list[num_merge_cand].mv.i1_l1_ref_idx = i1_l1_ref_idx;
+ ps_merge_cand_list[num_merge_cand].u1_pred_flag_l0 = 1;
+ ps_merge_cand_list[num_merge_cand].u1_pred_flag_l1 = 1;
+
+ if(pu1_is_top_used[l0_cand] || pu1_is_top_used[l1_cand])
+ {
+ pu1_is_top_used[num_merge_cand] = 1;
+ }
+ else
+ {
+ pu1_is_top_used[num_merge_cand] = 0;
+ }
+
+ num_merge_cand++;
+ }
+ }
+
+ bi_pred_idx++;
+
+ if((bi_pred_idx == total_bi_pred_cand) ||
+ (num_merge_cand == MAX_NUM_MERGE_CAND))
+ {
+ break;
+ }
+ }
+ }
+ } /* End of Bipred merge candidates */
+
+ /******************************************************/
+ /* Zero merge candidates */
+ /******************************************************/
+ if(num_merge_cand < MAX_NUM_MERGE_CAND)
+ {
+ WORD32 num_ref_idx;
+ WORD32 zero_idx;
+
+ zero_idx = 0;
+
+ if(slice_type == PSLICE)
+ num_ref_idx = num_ref_idx_l0_active;
+ else
+ /* Slice type B */
+ num_ref_idx = MIN(num_ref_idx_l0_active, num_ref_idx_l1_active);
+
+ while(num_merge_cand < MAX_NUM_MERGE_CAND)
+ {
+ if(slice_type == PSLICE)
+ {
+ ps_merge_cand_list[num_merge_cand].mv.i1_l0_ref_idx = zero_idx;
+ ps_merge_cand_list[num_merge_cand].mv.i1_l1_ref_idx = -1;
+ ps_merge_cand_list[num_merge_cand].u1_pred_flag_l0 = 1;
+ ps_merge_cand_list[num_merge_cand].u1_pred_flag_l1 = 0;
+ }
+ else /* Slice type B */
+ {
+ ps_merge_cand_list[num_merge_cand].mv.i1_l0_ref_idx = zero_idx;
+ ps_merge_cand_list[num_merge_cand].mv.i1_l1_ref_idx = zero_idx;
+ ps_merge_cand_list[num_merge_cand].u1_pred_flag_l0 = 1;
+ ps_merge_cand_list[num_merge_cand].u1_pred_flag_l1 = 1;
+ }
+
+ ps_merge_cand_list[num_merge_cand].mv.s_l0_mv.i2_mvx = 0;
+ ps_merge_cand_list[num_merge_cand].mv.s_l0_mv.i2_mvy = 0;
+ ps_merge_cand_list[num_merge_cand].mv.s_l1_mv.i2_mvx = 0;
+ ps_merge_cand_list[num_merge_cand].mv.s_l1_mv.i2_mvy = 0;
+
+ pu1_is_top_used[num_merge_cand] = 0;
+
+ num_merge_cand++;
+ zero_idx++;
+
+ /* if all the reference pics have been added as candidates */
+ /* the the loop shoudl break since it would add same cand again */
+ if(zero_idx == num_ref_idx)
+ {
+ break;
+ }
+ }
+ } /* End of zero merge candidates */
+
+ } /* End of merge candidate list population */
+
+ return (num_merge_cand);
+}
diff --git a/encoder/ihevce_mv_pred_merge.h b/encoder/ihevce_mv_pred_merge.h
new file mode 100644
index 0000000..66eafb6
--- /dev/null
+++ b/encoder/ihevce_mv_pred_merge.h
@@ -0,0 +1,118 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_mv_pred_merge.h
+*
+* \brief
+* This file contains function prototypes of MV Merge candidates list
+* derivation functions and corresponding structure and macrso definations
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_MV_PRED_MERGE_H_
+#define _IHEVCE_MV_PRED_MERGE_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define MAX_NUM_MERGE_CAND MAX_MERGE_CANDIDATES
+#define MAX_NUM_MV_NBR 5
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+typedef enum
+{
+ NBR_A0 = 0,
+ NBR_A1 = 1,
+ NBR_B0 = 2,
+ NBR_B1 = 3,
+ NBR_B2 = 4,
+
+ /* should be last */
+ MAX_NUM_NBRS
+} MV_MERGE_NBRS_T;
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+typedef struct
+{
+ /* Merge candidate motion vectors and refernce idx */
+ pu_mv_t mv;
+
+ /* Pred_l0 mode for the candidate */
+ UWORD8 u1_pred_flag_l0;
+
+ /* Pred_l1 mode for the candidate */
+ UWORD8 u1_pred_flag_l1;
+
+} merge_cand_list_t;
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+void ihevce_collocated_mvp(
+ mv_pred_ctxt_t *ps_mv_ctxt,
+ pu_t *ps_pu,
+ mv_t *ps_mv_col,
+ WORD32 *pu4_avail_col_flag,
+ WORD32 use_pu_ref_idx,
+ WORD32 x_col,
+ WORD32 y_col);
+
+WORD32 ihevce_mv_pred_merge(
+ mv_pred_ctxt_t *ps_ctxt,
+ nbr_4x4_t *ps_top_nbr_4x4,
+ nbr_4x4_t *ps_left_nbr_4x4,
+ nbr_4x4_t *ps_top_left_nbr_4x4,
+ WORD32 left_nbr_4x4_strd,
+ nbr_avail_flags_t *ps_avail_flags,
+ pu_mv_t *ps_col_mv,
+ pu_t *ps_pu,
+ PART_SIZE_E part_mode,
+ WORD32 part_idx,
+ WORD32 single_mcl_flag,
+ merge_cand_list_t *ps_merge_cand_list,
+ UWORD8 *pu1_is_top_used);
+
+#endif /* _IHEVCE_MV_PRED_MERGE_H_ */
diff --git a/encoder/ihevce_nbr_avail.c b/encoder/ihevce_nbr_avail.c
new file mode 100644
index 0000000..7ab3db8
--- /dev/null
+++ b/encoder/ihevce_nbr_avail.c
@@ -0,0 +1,706 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file ihevce_nbr_avail.c
+*
+* @brief
+* This file contains function definitions and look up tables for various
+* neigbour avail flags in HEVC encoder
+*
+* @author
+* Ittiam
+*
+* List of Functions
+* <TODO: TO BE ADDED>
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_multi_thrd_funcs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_nbr_avail.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_set_ctb_nbr \endif
+*
+* \brief
+* This function sets the neighbour availability flags of ctb based on the
+* CTB position
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+* \return
+* none
+*
+******************************************************************************
+*/
+void ihevce_set_ctb_nbr(
+ nbr_avail_flags_t *ps_nbr,
+ UWORD8 *pu1_nbr_map,
+ WORD32 nbr_map_strd,
+ WORD32 ctb_pos_x,
+ WORD32 ctb_pos_y,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms)
+{
+ WORD32 ctr;
+ WORD32 *pi4_cur_ctb_tile_id;
+ WORD32 i4_curr_ctb_tile_id, i4_top_ctb_tile_id;
+ WORD32 i4_left_ctb_tile_id, i4_right_ctb_tile_id;
+
+ WORD32 ctb_size = ps_frm_ctb_prms->i4_ctb_size;
+ WORD32 num_ctb_horz = ps_frm_ctb_prms->i4_num_ctbs_horz;
+ WORD32 num_ctb_vert = ps_frm_ctb_prms->i4_num_ctbs_vert;
+ WORD32 cu_aligned_pic_wd = ps_frm_ctb_prms->i4_cu_aligned_pic_wd;
+ WORD32 cu_aligned_pic_ht = ps_frm_ctb_prms->i4_cu_aligned_pic_ht;
+ UWORD8 *pu1_top_nbr_map = pu1_nbr_map - nbr_map_strd;
+ UWORD8 *pu1_left_nbr_map = pu1_nbr_map - 1;
+ UWORD8 *pu1_top_lt_nbr_map = pu1_top_nbr_map - 1;
+ UWORD8 *pu1_top_rt_nbr_map = pu1_top_nbr_map + (ctb_size >> 2);
+ WORD32 num_4x4_ctb_x = (ctb_size >> 2);
+ WORD32 num_4x4_ctb_y = (ctb_size >> 2);
+
+ /* Conditionally update num_4x4_ctb_x and num_4x4_ctb_y */
+ if(ctb_pos_y == (num_ctb_vert - 1))
+ {
+ num_4x4_ctb_y = (cu_aligned_pic_ht - ((num_ctb_vert - 1) * ctb_size)) / 4;
+ }
+
+ if(ctb_pos_x == (num_ctb_horz - 1))
+ {
+ num_4x4_ctb_x = (cu_aligned_pic_wd - ((num_ctb_horz - 1) * ctb_size)) / 4;
+ }
+
+ /* Get Tile-ids of top, left and current CTBs */
+ pi4_cur_ctb_tile_id = ps_frm_ctb_prms->pi4_tile_id_map +
+ ctb_pos_y * ps_frm_ctb_prms->i4_tile_id_ctb_map_stride + ctb_pos_x;
+
+ i4_curr_ctb_tile_id = *pi4_cur_ctb_tile_id;
+ i4_left_ctb_tile_id = *(pi4_cur_ctb_tile_id - 1);
+ i4_right_ctb_tile_id = *(pi4_cur_ctb_tile_id + 1);
+ i4_top_ctb_tile_id = *(pi4_cur_ctb_tile_id - ps_frm_ctb_prms->i4_tile_id_ctb_map_stride);
+
+ /*********** Update Nbr availability in ps_nbr **********/
+
+ ps_nbr->u1_left_avail = (i4_left_ctb_tile_id == i4_curr_ctb_tile_id);
+ ps_nbr->u1_top_avail = (i4_top_ctb_tile_id == i4_curr_ctb_tile_id);
+ ps_nbr->u1_top_lt_avail = (ps_nbr->u1_left_avail && ps_nbr->u1_top_avail);
+ ps_nbr->u1_top_rt_avail = ps_nbr->u1_top_avail && (i4_right_ctb_tile_id == i4_curr_ctb_tile_id);
+ ps_nbr->u1_bot_lt_avail = 0; /* at ctb level bottom left is always not available */
+
+ /*********** Update Nbr availability in pu1_nbr_map **********/
+
+ /* NOTE: entire Nbr availability map is by default set to 0 */
+ *pu1_top_lt_nbr_map = ps_nbr->u1_top_lt_avail; /* Top-Left*/
+
+ memset(pu1_top_nbr_map, ps_nbr->u1_top_avail, num_4x4_ctb_x); /* Top */
+
+ for(ctr = 0; ctr < num_4x4_ctb_y; ctr++) /* Left */
+ {
+ *pu1_left_nbr_map = ps_nbr->u1_left_avail;
+ pu1_left_nbr_map += nbr_map_strd;
+ }
+
+ if((num_ctb_horz - 2) == ctb_pos_x) /* Top-Right */
+ {
+ /* For the last but 1 ctb, if the last ctb is non-multiple of 64,
+ then set the map accordingly */
+ WORD32 last_ctb_x = cu_aligned_pic_wd - ((num_ctb_horz - 1) * ctb_size);
+
+ num_4x4_ctb_x = MIN(last_ctb_x, MAX_TU_SIZE) / 4;
+ memset(pu1_top_rt_nbr_map, ps_nbr->u1_top_rt_avail, num_4x4_ctb_x);
+ }
+ else
+ {
+ memset(pu1_top_rt_nbr_map, ps_nbr->u1_top_rt_avail, (MAX_TU_SIZE / 4));
+ }
+
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_get_nbr_intra \endif
+*
+* \brief
+* This function sets the neighbour availability flags of given unit
+* based on the position and size
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+* \return
+* none
+*
+******************************************************************************
+*/
+WORD32 ihevce_get_nbr_intra(
+ nbr_avail_flags_t *ps_cu_nbr,
+ UWORD8 *pu1_nbr_map,
+ WORD32 nbr_map_strd,
+ WORD32 unit_4x4_pos_x,
+ WORD32 unit_4x4_pos_y,
+ WORD32 unit_4x4_size)
+{
+ WORD32 nbr_tem_flags = 0;
+ WORD32 i;
+
+ UWORD8 *pu1_bot_lt_map;
+ UWORD8 *pu1_top_rt_map;
+ UWORD8 *pu1_top_lt_map;
+ UWORD8 *pu1_left_map;
+ UWORD8 *pu1_top_map;
+
+ /* map is stored at 4x4 level increment to point to current cu 4x4 */
+ pu1_nbr_map += (unit_4x4_pos_x);
+ pu1_nbr_map += (unit_4x4_pos_y)*nbr_map_strd;
+
+ pu1_top_map = pu1_nbr_map - nbr_map_strd;
+ pu1_top_lt_map = pu1_top_map - 1;
+ pu1_left_map = (pu1_nbr_map - 1);
+ /* use map to get top right availablility */
+ pu1_top_rt_map = pu1_nbr_map - nbr_map_strd;
+ pu1_top_rt_map += unit_4x4_size;
+
+ /* use map to get bot left availablility */
+ pu1_bot_lt_map = pu1_nbr_map - 1;
+ pu1_bot_lt_map += unit_4x4_size * nbr_map_strd;
+
+ /* Top flag */
+ ps_cu_nbr->u1_top_avail = *pu1_top_map;
+
+ /* left flag */
+ ps_cu_nbr->u1_left_avail = *pu1_left_map;
+
+ /* top left flag */
+ ps_cu_nbr->u1_top_lt_avail = *pu1_top_lt_map;
+
+ /* top right flag */
+ ps_cu_nbr->u1_top_rt_avail = *pu1_top_rt_map;
+
+ /* bottom left flag */
+ ps_cu_nbr->u1_bot_lt_avail = (*pu1_bot_lt_map);
+
+ /* Update the neighbor availiblity flag according to the nbr_map */
+ nbr_tem_flags = 0;
+
+ for(i = 0; i < 4; i++)
+ {
+ nbr_tem_flags |= ((*pu1_bot_lt_map) << (3 - i));
+ pu1_bot_lt_map += (nbr_map_strd * 2);
+ }
+
+ for(i = 0; i < 4; i++)
+ {
+ nbr_tem_flags |= ((*pu1_left_map) << (7 - i));
+ pu1_left_map += (nbr_map_strd * 2);
+ }
+ for(i = 0; i < 4; i++)
+ {
+ nbr_tem_flags |= ((*pu1_top_map) << (i + 8));
+ pu1_top_map += 2;
+ }
+ for(i = 0; i < 4; i++)
+ {
+ nbr_tem_flags |= ((*pu1_top_rt_map) << (i + 12));
+ pu1_top_rt_map += 2;
+ }
+ nbr_tem_flags |= (*pu1_top_lt_map << 16);
+
+ return nbr_tem_flags;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_get_nbr_intra_mxn_tu \endif
+*
+* \brief
+* This function sets the neighbour availability flags of given unit
+* based on the position and size
+*
+* \date
+* 24/06/2014
+*
+* \author
+* Ittiam
+*
+* \return
+* none
+*
+******************************************************************************
+*/
+WORD32 ihevce_get_nbr_intra_mxn_tu(
+ UWORD8 *pu1_nbr_map,
+ WORD32 nbr_map_strd,
+ WORD32 unit_4x4_pos_x,
+ WORD32 unit_4x4_pos_y,
+ WORD32 unit_4x4_size_horz,
+ WORD32 unit_4x4_size_vert)
+{
+ WORD32 nbr_tem_flags = 0;
+ WORD32 i;
+
+ UWORD8 *pu1_bot_lt_map;
+ UWORD8 *pu1_top_rt_map;
+ UWORD8 *pu1_top_lt_map;
+ UWORD8 *pu1_left_map;
+ UWORD8 *pu1_top_map;
+
+ /* map is stored at 4x4 level increment to point to current cu 4x4 */
+ pu1_nbr_map += (unit_4x4_pos_x);
+ pu1_nbr_map += (unit_4x4_pos_y)*nbr_map_strd;
+
+ pu1_top_map = pu1_nbr_map - nbr_map_strd;
+ pu1_top_lt_map = pu1_top_map - 1;
+ pu1_left_map = (pu1_nbr_map - 1);
+ /* use map to get top right availablility */
+ pu1_top_rt_map = pu1_nbr_map - nbr_map_strd;
+ pu1_top_rt_map += unit_4x4_size_horz;
+
+ /* use map to get bot left availablility */
+ pu1_bot_lt_map = pu1_nbr_map - 1;
+ pu1_bot_lt_map += unit_4x4_size_vert * nbr_map_strd;
+
+ /* Update the neighbor availiblity flag according to the nbr_map */
+ nbr_tem_flags = 0;
+
+ for(i = 0; i < 4; i++)
+ {
+ nbr_tem_flags |= ((*pu1_bot_lt_map) << (3 - i));
+ pu1_bot_lt_map += (nbr_map_strd * 2);
+ }
+
+ for(i = 0; i < 4; i++)
+ {
+ nbr_tem_flags |= ((*pu1_left_map) << (7 - i));
+ pu1_left_map += (nbr_map_strd * 2);
+ }
+ for(i = 0; i < 4; i++)
+ {
+ nbr_tem_flags |= ((*pu1_top_map) << (i + 8));
+ pu1_top_map += 2;
+ }
+ for(i = 0; i < 4; i++)
+ {
+ nbr_tem_flags |= ((*pu1_top_rt_map) << (i + 12));
+ pu1_top_rt_map += 2;
+ }
+ nbr_tem_flags |= (*pu1_top_lt_map << 16);
+
+ return nbr_tem_flags;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_get_intra_chroma_tu_nbr \endif
+*
+* \brief
+* This function sets the neighbour availability flags of a chroma
+* subTU based on luma availability and chroma format
+*
+* \date
+* 04/07/2014
+*
+* \author
+* Ittiam
+*
+* \return
+* none
+*
+******************************************************************************
+*/
+WORD32 ihevce_get_intra_chroma_tu_nbr(
+ WORD32 i4_luma_nbr_flags, WORD32 i4_subtu_idx, WORD32 i4_trans_size, UWORD8 u1_is_422)
+{
+ /* TOP LEFT | TOP-RIGHT | TOP | LEFT | BOTTOM LEFT*/
+ /* (1 bit) (4 bits) (4 bits) (4 bits) (4 bits) */
+ /* With reference to the above bit arrangement - */
+ /* BL0 - Bit 3 */
+ /* BL1 - Bit 2 */
+ /* BL2 - Bit 1 */
+ /* BL3 - Bit 0 */
+ /* L0 - Bit 7 */
+ /* L1 - Bit 6 */
+ /* L2 - Bit 5 */
+ /* L3 - Bit 4 */
+ /* T0 - Bit 8 */
+ /* T1 - Bit 9 */
+ /* T2 - Bit 10 */
+ /* T3 - Bit 11 */
+ /* TR0 - Bit 12 */
+ /* TR1 - Bit 13 */
+ /* TR2 - Bit 14 */
+ /* TR3 - Bit 15 */
+ if(u1_is_422)
+ {
+ if(0 == i4_subtu_idx)
+ {
+ /* If left is available for luma, then */
+ if(i4_luma_nbr_flags & 0xf0)
+ {
+ switch(i4_trans_size)
+ {
+ case 4:
+ {
+ /* BL0 - 1 */
+ /* BL1-3 - Luma_BL0-2 */
+ /*i4_luma_nbr_flags |= (i4_luma_nbr_flags & 0xe) >> 1;*/
+ i4_luma_nbr_flags |= 0x8;
+
+ /* L0-1 - 11 */
+ /* L2-3 - Luma_L2-3 */
+ i4_luma_nbr_flags |= 0xc0;
+
+ break;
+ }
+ case 8:
+ {
+ /* BL0-1 - 11 */
+ /* BL1-3 - Luma_BL0-1 */
+ /*i4_luma_nbr_flags |= (i4_luma_nbr_flags & 0xc) >> 2;*/
+ i4_luma_nbr_flags |= 0xc;
+
+ /* L0-3 - 1111 */
+ i4_luma_nbr_flags |= 0xf0;
+
+ break;
+ }
+ case 16:
+ {
+ /* BL0-3 - 1111 */
+ i4_luma_nbr_flags |= 0xf;
+
+ /* L0-3 - 1111 */
+ i4_luma_nbr_flags |= 0xf0;
+
+ break;
+ }
+ }
+ }
+ }
+ else
+ {
+ /* Top right is always unavailable */
+ /* Top is always available */
+
+ i4_luma_nbr_flags &= (0xffff0fff);
+
+ /* Top left is marked as available if */
+ /* luma left is available */
+ if(i4_luma_nbr_flags & 0xf0)
+ {
+ i4_luma_nbr_flags |= (1 << 16);
+ }
+
+ switch(i4_trans_size)
+ {
+ case 4:
+ {
+ /* T0 - 1 */
+ /* T1-3 - 000 */
+ i4_luma_nbr_flags |= 0x100;
+ i4_luma_nbr_flags &= 0xfffff1ff;
+
+ if(i4_luma_nbr_flags & 0xf0)
+ {
+ i4_luma_nbr_flags |= 0x80;
+ }
+
+ if(i4_luma_nbr_flags & 0x8)
+ {
+ i4_luma_nbr_flags |= 0x8;
+ }
+
+ break;
+ }
+ case 8:
+ {
+ /* T0-1 - 11 */
+ /* T2-3 - 00 */
+ i4_luma_nbr_flags |= 0x300;
+ i4_luma_nbr_flags &= 0xfffff3ff;
+
+ if(i4_luma_nbr_flags & 0xf0)
+ {
+ i4_luma_nbr_flags |= 0xc0;
+ }
+
+ if((i4_luma_nbr_flags & 0xc) == 0x8)
+ {
+ i4_luma_nbr_flags |= 0xc;
+ }
+ else if((i4_luma_nbr_flags & 0xc) == 0xc)
+ {
+ i4_luma_nbr_flags |= 0xf;
+ }
+ else if((i4_luma_nbr_flags & 0xf) == 0xe)
+ {
+ i4_luma_nbr_flags |= 0xf;
+ }
+
+ break;
+ }
+ case 16:
+ {
+ /* T0-3 - 1111 */
+ i4_luma_nbr_flags |= 0xf00;
+
+ if(i4_luma_nbr_flags & 0xf0)
+ {
+ i4_luma_nbr_flags |= 0xf0;
+ }
+
+ if((i4_luma_nbr_flags & 0xf) == 0x8)
+ {
+ i4_luma_nbr_flags |= 0xc;
+ }
+ else if((i4_luma_nbr_flags & 0xf) == 0xc)
+ {
+ i4_luma_nbr_flags |= 0xf;
+ }
+ else if((i4_luma_nbr_flags & 0xf) == 0xe)
+ {
+ i4_luma_nbr_flags |= 0xf;
+ }
+
+ break;
+ }
+ }
+ }
+ }
+
+ return i4_luma_nbr_flags;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_get_only_nbr_flag \endif
+*
+* \brief
+* This function sets the neighbour availability flags of given unit
+* based on the position, unit width and unit height
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+* \return
+* none
+*
+******************************************************************************
+*/
+void ihevce_get_only_nbr_flag(
+ nbr_avail_flags_t *ps_cu_nbr,
+ UWORD8 *pu1_nbr_map,
+ WORD32 nbr_map_strd,
+ WORD32 unit_4x4_pos_x,
+ WORD32 unit_4x4_pos_y,
+ WORD32 unit_4x4_size_hz,
+ WORD32 unit_4x4_size_vt)
+{
+ /* map is stored at 4x4 level increment to point to current cu 4x4 */
+ pu1_nbr_map += (unit_4x4_pos_x);
+ pu1_nbr_map += (unit_4x4_pos_y)*nbr_map_strd;
+
+ /* Top flag */
+ ps_cu_nbr->u1_top_avail = *(pu1_nbr_map - nbr_map_strd);
+
+ /* left flag */
+ ps_cu_nbr->u1_left_avail = *(pu1_nbr_map - 1);
+
+ /* top left flag */
+ ps_cu_nbr->u1_top_lt_avail = *(pu1_nbr_map - nbr_map_strd - 1);
+
+ /* top right flag */
+ {
+ UWORD8 *pu1_top_rt_map;
+ /* use map to get top right availablility */
+ pu1_top_rt_map = pu1_nbr_map - nbr_map_strd;
+ pu1_top_rt_map += unit_4x4_size_hz;
+
+ /* store the availbility */
+ ps_cu_nbr->u1_top_rt_avail = *pu1_top_rt_map;
+ }
+
+ /* bottom left flag */
+ {
+ UWORD8 *pu1_bot_lt_map;
+
+ /* use map to get bot left availablility */
+ pu1_bot_lt_map = pu1_nbr_map - 1;
+ pu1_bot_lt_map += unit_4x4_size_vt * nbr_map_strd;
+
+ /* store the availbility */
+ ps_cu_nbr->u1_bot_lt_avail = *pu1_bot_lt_map;
+ }
+
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_set_nbr_map \endif
+*
+* \brief
+* This function sets the neighbour availability flags of given value
+* based on the position and size
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+* \return
+* none
+*
+******************************************************************************
+*/
+void ihevce_set_nbr_map(
+ UWORD8 *pu1_nbr_map,
+ WORD32 nbr_map_strd,
+ WORD32 unit_4x4_pos_x,
+ WORD32 unit_4x4_pos_y,
+ WORD32 unit_4x4_size,
+ WORD32 val)
+{
+ WORD32 i;
+ /* map is stored at 4x4 level increment to point to current cu 4x4 */
+ pu1_nbr_map += (unit_4x4_pos_x);
+ pu1_nbr_map += (unit_4x4_pos_y)*nbr_map_strd;
+
+ /* loops to set the flags for given size */
+ for(i = 0; i < unit_4x4_size; i++)
+ {
+ memset(pu1_nbr_map, val, sizeof(UWORD8) * unit_4x4_size);
+ /* row level updates */
+ pu1_nbr_map += nbr_map_strd;
+ }
+
+ return;
+}
+/*!
+******************************************************************************
+* \if Function name : ihevce_set_inter_nbr_map \endif
+*
+* \brief
+* This function sets the neighbour availability flags of given value
+* based on the position and horizontal width and vertical height
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+* \return
+* none
+* List of Functions
+*
+*
+******************************************************************************
+*/
+void ihevce_set_inter_nbr_map(
+ UWORD8 *pu1_nbr_map,
+ WORD32 nbr_map_strd,
+ WORD32 unit_4x4_pos_x,
+ WORD32 unit_4x4_pos_y,
+ WORD32 unit_4x4_size_hz,
+ WORD32 unit_4x4_size_vt,
+ WORD32 val)
+{
+ WORD32 i;
+ /* map is stored at 4x4 level increment to point to current cu 4x4 */
+ pu1_nbr_map += (unit_4x4_pos_x);
+ pu1_nbr_map += (unit_4x4_pos_y)*nbr_map_strd;
+ {
+ /* loops to set the flags for given size */
+ for(i = 0; i < unit_4x4_size_vt; i++)
+ {
+ memset(pu1_nbr_map, val, sizeof(UWORD8) * unit_4x4_size_hz);
+ /* row level updates */
+ pu1_nbr_map += nbr_map_strd;
+ }
+ }
+
+ return;
+}
diff --git a/encoder/ihevce_nbr_avail.h b/encoder/ihevce_nbr_avail.h
new file mode 100644
index 0000000..efe9a8b
--- /dev/null
+++ b/encoder/ihevce_nbr_avail.h
@@ -0,0 +1,115 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_nbr_avail.h
+*
+* \brief
+* This file contains function prototypes of neihbour acces related funcs
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_NBR_AVAIL_H_
+#define _IHEVCE_NBR_AVAIL_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+void ihevce_set_ctb_nbr(
+ nbr_avail_flags_t *ps_nbr,
+ UWORD8 *pu1_nbr_map,
+ WORD32 nbr_map_strd,
+ WORD32 ctb_pos_x,
+ WORD32 ctb_pos_y,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms);
+
+WORD32 ihevce_get_nbr_intra(
+ nbr_avail_flags_t *ps_cu_nbr,
+ UWORD8 *pu1_nbr_map,
+ WORD32 nbr_map_strd,
+ WORD32 unit_4x4_pos_x,
+ WORD32 unit_4x4_pos_y,
+ WORD32 unit_4x4_size);
+
+void ihevce_get_only_nbr_flag(
+ nbr_avail_flags_t *ps_cu_nbr,
+ UWORD8 *pu1_nbr_map,
+ WORD32 nbr_map_strd,
+ WORD32 unit_4x4_pos_x,
+ WORD32 unit_4x4_pos_y,
+ WORD32 unit_4x4_size_hz,
+ WORD32 unit_4x4_size_vt);
+
+void ihevce_set_nbr_map(
+ UWORD8 *pu1_nbr_map,
+ WORD32 nbr_map_strd,
+ WORD32 unit_4x4_pos_x,
+ WORD32 unit_4x4_pos_y,
+ WORD32 unit_4x4_size,
+ WORD32 val);
+void ihevce_set_inter_nbr_map(
+ UWORD8 *pu1_nbr_map,
+ WORD32 nbr_map_strd,
+ WORD32 unit_4x4_pos_x,
+ WORD32 unit_4x4_pos_y,
+ WORD32 unit_4x4_size_hz,
+ WORD32 unit_4x4_size_vt,
+ WORD32 val);
+
+WORD32 ihevce_get_nbr_intra_mxn_tu(
+ UWORD8 *pu1_nbr_map,
+ WORD32 nbr_map_strd,
+ WORD32 unit_4x4_pos_x,
+ WORD32 unit_4x4_pos_y,
+ WORD32 unit_4x4_size_horz,
+ WORD32 unit_4x4_size_vert);
+
+WORD32 ihevce_get_intra_chroma_tu_nbr(
+ WORD32 i4_luma_nbr_flags, WORD32 i4_subtu_idx, WORD32 i4_trans_size, UWORD8 u1_is_422);
+
+#endif /* _IHEVCE_NBR_AVAIL_H_ */
diff --git a/encoder/ihevce_plugin.c b/encoder/ihevce_plugin.c
new file mode 100644
index 0000000..4f0e532
--- /dev/null
+++ b/encoder/ihevce_plugin.c
@@ -0,0 +1,2115 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_plugin.c
+*
+* \brief
+* This file contains wrapper utilities to use hevc encoder library
+*
+* \date
+* 15/04/2014
+*
+* \author
+* Ittiam
+*
+* List of Functions
+*
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_macros.h"
+#include "ihevc_debug.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_plugin.h"
+#include "ihevce_plugin_priv.h"
+#include "ihevce_hle_interface.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_error_checks.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_global_tables.h"
+
+#include "cast_types.h"
+#include "osal.h"
+#include "osal_defaults.h"
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define CREATE_TIME_ALLOCATION_INPUT 1
+#define CREATE_TIME_ALLOCATION_OUTPUT 0
+
+#define MAX_NUM_FRM_IN_GOP 600
+
+/*****************************************************************************/
+/* Extern variables */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*!
+******************************************************************************
+* \if Function name : mem_mngr_alloc \endif
+*
+* \brief
+* Memory manager specific alloc function
+*
+* \param[in] pv_handle : handle to memory manager
+* (currently not required can be set to null)
+* \param[in] ps_memtab : memory descriptor pointer
+*
+* \return
+* Memory pointer
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void mem_mngr_alloc(void *pv_handle, ihevce_sys_api_t *ps_sys_api, iv_mem_rec_t *ps_memtab)
+{
+#ifndef X86_MINGW
+ WORD32 error, mem_alignment;
+#endif
+
+ (void)pv_handle;
+
+#ifdef X86_MINGW
+ ps_memtab->pv_base = _aligned_malloc(ps_memtab->i4_mem_size, ps_memtab->i4_mem_alignment);
+#else
+ mem_alignment = ps_memtab->i4_mem_alignment;
+ mem_alignment = (mem_alignment >> 3) << 3;
+ if(mem_alignment == 0)
+ {
+ error = posix_memalign(&ps_memtab->pv_base, sizeof(void *), ps_memtab->i4_mem_size);
+ }
+ else
+ {
+ error = posix_memalign(&ps_memtab->pv_base, mem_alignment, ps_memtab->i4_mem_size);
+ }
+ if(error != 0)
+ {
+ ps_sys_api->ihevce_printf(ps_sys_api->pv_cb_handle, "posix_memalign error %d\n", error);
+ }
+#endif
+
+ if(ps_memtab->pv_base == NULL)
+ {
+ ps_sys_api->ihevce_printf(
+ ps_sys_api->pv_cb_handle, "IHEVCE ERROR: Unable to allocate memory\n");
+ ASSERT(0);
+ }
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : memory_alloc \endif
+*
+* \brief
+* common memory allocate function should be used across all threads
+*
+* \param[in] pv_handle : handle to memory manager
+* (currently not required can be set to null)
+* \param[in] u4_size : size of memory required
+*
+* \return
+* Memory pointer
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void *memory_alloc(void *pv_handle, UWORD32 u4_size)
+{
+ (void)pv_handle;
+ return (malloc(u4_size));
+}
+
+/*!
+******************************************************************************
+* \if Function name : mem_mngr_free \endif
+*
+* \brief
+* Memory manager specific free function
+*
+* \param[in] pv_handle : handle to memory manager
+* (currently not required can be set to null)
+* \param[in] ps_memtab : memory descriptor pointer
+*
+* \return
+* Memory pointer
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void mem_mngr_free(void *pv_handle, iv_mem_rec_t *ps_memtab)
+{
+ (void)pv_handle;
+#ifdef X86_MINGW
+ _aligned_free(ps_memtab->pv_base);
+#else
+ free(ps_memtab->pv_base);
+#endif
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : memory_free \endif
+*
+* \brief
+* common memory free function should be used across all threads
+*
+* \param[in] pv_handle : handle to memory manager
+* (currently not required can be set to null)
+* \param[in] pv_mem : memory to be freed
+*
+* \return
+* Memory pointer
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void memory_free(void *pv_handle, void *pv_mem)
+{
+ (void)pv_handle;
+ free(pv_mem);
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_set_def_params \endif
+*
+* \brief
+* Set default values
+*
+* \param[in] Static params pointer
+*
+* \return
+* status
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+IHEVCE_PLUGIN_STATUS_T ihevce_set_def_params(ihevce_static_cfg_params_t *ps_params)
+{
+ WORD32 i, j;
+ /* sanity checks */
+ if(NULL == ps_params)
+ return (IHEVCE_EFAIL);
+
+ memset(ps_params, 0, sizeof(*ps_params));
+
+ /* initialsie all the parameters to default values */
+ ps_params->i4_size = sizeof(ihevce_static_cfg_params_t);
+ ps_params->i4_save_recon = 0;
+ ps_params->i4_log_dump_level = 0;
+ ps_params->i4_enable_logo = 0;
+ ps_params->i4_enable_csv_dump = 0;
+
+ /* Control to free the entropy output buffers */
+ /* 1 for non_blocking mode */
+ /* and 0 for blocking mode */
+ ps_params->i4_outbuf_buf_free_control = 1;
+
+ /* coding tools parameters */
+ ps_params->s_coding_tools_prms.i4_size = sizeof(ihevce_coding_params_t);
+ ps_params->s_coding_tools_prms.i4_cropping_mode = 1;
+ ps_params->s_coding_tools_prms.i4_deblocking_type = 0;
+ ps_params->s_coding_tools_prms.i4_enable_entropy_sync = 0;
+ // New IDR/CDR Params
+ ps_params->s_coding_tools_prms.i4_max_closed_gop_period = 0;
+ ps_params->s_coding_tools_prms.i4_min_closed_gop_period = 0;
+ ps_params->s_coding_tools_prms.i4_max_cra_open_gop_period = 60;
+ ps_params->s_coding_tools_prms.i4_max_i_open_gop_period = 0;
+ ps_params->s_coding_tools_prms.i4_max_reference_frames = -1;
+ ps_params->s_coding_tools_prms.i4_max_temporal_layers = 0;
+ ps_params->s_coding_tools_prms.i4_slice_type = 0;
+ ps_params->s_coding_tools_prms.i4_use_default_sc_mtx = 0;
+ ps_params->s_coding_tools_prms.i4_weighted_pred_enable = 0;
+ ps_params->s_coding_tools_prms.i4_vqet = 0;
+
+ ps_params->e_arch_type = ARCH_NA;
+
+ /* config parameters */
+ ps_params->s_config_prms.i4_size = sizeof(ihevce_config_prms_t);
+ ps_params->s_config_prms.i4_cu_level_rc = 1;
+ ps_params->s_config_prms.i4_init_vbv_fullness = 0;
+ ps_params->s_config_prms.i4_max_frame_qp = 51;
+ ps_params->s_config_prms.i4_max_log2_cu_size = 6;
+ ps_params->s_config_prms.i4_max_log2_tu_size = 5;
+ ps_params->s_config_prms.i4_max_search_range_horz = 512;
+ ps_params->s_config_prms.i4_max_search_range_vert = 256;
+ ps_params->s_config_prms.i4_max_tr_tree_depth_I = 1;
+ ps_params->s_config_prms.i4_max_tr_tree_depth_nI = 3;
+ ps_params->s_config_prms.i4_min_frame_qp = 1;
+ ps_params->s_config_prms.i4_min_log2_cu_size = 3;
+ ps_params->s_config_prms.i4_min_log2_tu_size = 2;
+ ps_params->s_config_prms.i4_num_frms_to_encode = -1;
+ ps_params->s_config_prms.i4_rate_factor = 500;
+ ps_params->s_config_prms.i4_rate_control_mode = 2;
+ ps_params->s_config_prms.i4_stuffing_enable = 0;
+ ps_params->s_config_prms.i4_vbr_max_peak_rate_dur = 2000;
+
+ /* LAP parameters */
+ ps_params->s_lap_prms.i4_size = sizeof(ihevce_lap_params_t);
+ ps_params->s_lap_prms.i4_deinterlacer_enable = 0;
+ ps_params->s_lap_prms.i4_denoise_enable = 0;
+ ps_params->s_lap_prms.i4_enable_wts_ofsts = 1;
+ ps_params->s_lap_prms.i4_rc_look_ahead_pics = 0;
+
+ /* Multi Thread parameters */
+ ps_params->s_multi_thrd_prms.i4_size = sizeof(ihevce_static_multi_thread_params_t);
+ ps_params->s_multi_thrd_prms.i4_max_num_cores = 1;
+ ps_params->s_multi_thrd_prms.i4_memory_alloc_ctrl_flag = 0;
+ ps_params->s_multi_thrd_prms.i4_num_proc_groups = 1;
+ ps_params->s_multi_thrd_prms.ai4_num_cores_per_grp[0] = -1;
+ ps_params->s_multi_thrd_prms.i4_use_thrd_affinity = -1; //0;
+ memset(&ps_params->s_multi_thrd_prms.au8_core_aff_mask[0], 0, sizeof(ULWORD64) * MAX_NUM_CORES);
+
+ /* Output Streams parameters */
+ ps_params->s_out_strm_prms.i4_size = sizeof(ihevce_out_strm_params_t);
+ ps_params->s_out_strm_prms.i4_aud_enable_flags = 0;
+ ps_params->s_out_strm_prms.i4_eos_enable_flags = 0;
+ ps_params->s_out_strm_prms.i4_codec_profile = 1;
+ ps_params->s_out_strm_prms.i4_codec_tier = 0;
+ ps_params->s_out_strm_prms.i4_codec_type = 0;
+ ps_params->s_out_strm_prms.i4_sei_buffer_period_flags = 0;
+ ps_params->s_out_strm_prms.i4_sei_enable_flag = 0;
+ ps_params->s_out_strm_prms.i4_sei_payload_enable_flag = 0;
+ ps_params->s_out_strm_prms.i4_sei_pic_timing_flags = 0;
+ ps_params->s_out_strm_prms.i4_sei_cll_enable = 0;
+ ps_params->s_out_strm_prms.u2_sei_avg_cll = 0;
+ ps_params->s_out_strm_prms.u2_sei_max_cll = 0;
+ ps_params->s_out_strm_prms.i4_sei_recovery_point_flags = 0;
+ ps_params->s_out_strm_prms.i4_sei_mastering_disp_colour_vol_flags = 0;
+ ps_params->s_out_strm_prms.i4_decoded_pic_hash_sei_flag = 0;
+ ps_params->s_out_strm_prms.i4_sps_at_cdr_enable = 1;
+ ps_params->s_out_strm_prms.i4_vui_enable = 0;
+ /*Set the interoperability flag to 0*/
+ ps_params->s_out_strm_prms.i4_interop_flags = 0;
+
+ /* Source parameters */
+ ps_params->s_src_prms.i4_size = sizeof(ihevce_src_params_t);
+ ps_params->s_src_prms.inp_chr_format = 1;
+ ps_params->s_src_prms.i4_chr_format = 11;
+ ps_params->s_src_prms.i4_field_pic = 0;
+ ps_params->s_src_prms.i4_frm_rate_denom = 1000;
+ ps_params->s_src_prms.i4_frm_rate_num = 30000;
+ ps_params->s_src_prms.i4_height = 0; //1080;
+ ps_params->s_src_prms.i4_input_bit_depth = 8;
+ ps_params->s_src_prms.i4_topfield_first = 1;
+ ps_params->s_src_prms.i4_width = 0; //1920;
+ ps_params->s_src_prms.i4_orig_width = 0;
+ ps_params->s_src_prms.i4_orig_height = 0;
+
+ /* Target layer parameters */
+ ps_params->s_tgt_lyr_prms.i4_size = sizeof(ihevce_tgt_layer_params_t);
+ ps_params->s_tgt_lyr_prms.i4_enable_temporal_scalability = 0;
+ ps_params->s_tgt_lyr_prms.i4_internal_bit_depth = 8;
+ ps_params->s_tgt_lyr_prms.i4_mbr_quality_setting = IHEVCE_MBR_HIGH_QUALITY;
+ ps_params->s_tgt_lyr_prms.i4_multi_res_layer_reuse = 0;
+ ps_params->s_tgt_lyr_prms.i4_num_res_layers = 1;
+ ps_params->s_tgt_lyr_prms.i4_mres_single_out = 0;
+ ps_params->s_tgt_lyr_prms.i4_start_res_id = 0;
+ ps_params->s_tgt_lyr_prms.pf_scale_chroma = NULL;
+ ps_params->s_tgt_lyr_prms.pf_scale_luma = NULL;
+ ps_params->s_tgt_lyr_prms.pv_scaler_handle = NULL;
+
+ /* target parameters */
+ for(i = 0; i < IHEVCE_MAX_NUM_RESOLUTIONS; i++)
+ {
+ ps_params->s_tgt_lyr_prms.as_tgt_params[i].i4_size = sizeof(ihevce_tgt_params_t);
+ for(j = 0; j < IHEVCE_MAX_NUM_BITRATES; j++)
+ {
+ ps_params->s_tgt_lyr_prms.as_tgt_params[i].ai4_frame_qp[j] = 32;
+ ps_params->s_tgt_lyr_prms.as_tgt_params[i].ai4_tgt_bitrate[j] = 5000000;
+ ps_params->s_tgt_lyr_prms.as_tgt_params[i].ai4_peak_bitrate[j] = 10000000;
+ ps_params->s_tgt_lyr_prms.as_tgt_params[i].ai4_max_vbv_buffer_size[j] = -1;
+ }
+ ps_params->s_tgt_lyr_prms.as_tgt_params[i].i4_codec_level = 156;
+ ps_params->s_tgt_lyr_prms.as_tgt_params[i].i4_frm_rate_scale_factor = 1;
+ ps_params->s_tgt_lyr_prms.as_tgt_params[i].i4_height = 0;
+ ps_params->s_tgt_lyr_prms.as_tgt_params[i].i4_num_bitrate_instances = 1;
+ ps_params->s_tgt_lyr_prms.as_tgt_params[i].i4_quality_preset = IHEVCE_QUALITY_P5;
+ ps_params->s_tgt_lyr_prms.as_tgt_params[i].i4_width = 0;
+ }
+
+ /* SEI VUI parameters */
+ ps_params->s_vui_sei_prms.u1_aspect_ratio_info_present_flag = 0;
+ ps_params->s_vui_sei_prms.au1_aspect_ratio_idc[0] = 255;
+ ps_params->s_vui_sei_prms.au2_sar_width[0] = 4;
+ ps_params->s_vui_sei_prms.au2_sar_height[0] = 3;
+ ps_params->s_vui_sei_prms.u1_overscan_info_present_flag = 0;
+ ps_params->s_vui_sei_prms.u1_overscan_appropriate_flag = 0;
+ ps_params->s_vui_sei_prms.u1_video_signal_type_present_flag = 1;
+ ps_params->s_vui_sei_prms.u1_video_format = 5;
+ ps_params->s_vui_sei_prms.u1_video_full_range_flag = 1;
+ ps_params->s_vui_sei_prms.u1_colour_description_present_flag = 0;
+ ps_params->s_vui_sei_prms.u1_colour_primaries = 2;
+ ps_params->s_vui_sei_prms.u1_transfer_characteristics = 2;
+ ps_params->s_vui_sei_prms.u1_matrix_coefficients = 2;
+ ps_params->s_vui_sei_prms.u1_chroma_loc_info_present_flag = 0;
+ ps_params->s_vui_sei_prms.u1_chroma_sample_loc_type_top_field = 0;
+ ps_params->s_vui_sei_prms.u1_chroma_sample_loc_type_bottom_field = 0;
+ ps_params->s_vui_sei_prms.u1_vui_hrd_parameters_present_flag = 0;
+ ps_params->s_vui_sei_prms.u1_timing_info_present_flag = 0;
+ ps_params->s_vui_sei_prms.u1_nal_hrd_parameters_present_flag = 0;
+
+ /* Setting sysAPIs to NULL */
+ memset(&ps_params->s_sys_api, 0, sizeof(ihevce_sys_api_t));
+
+ /* Multi pass parameters */
+ memset(&ps_params->s_pass_prms, 0, sizeof(ihevce_pass_prms_t));
+ ps_params->s_pass_prms.i4_size = sizeof(ihevce_pass_prms_t);
+
+ /* Tile parameters */
+ ps_params->s_app_tile_params.i4_size = sizeof(ihevce_app_tile_params_t);
+ ps_params->s_app_tile_params.i4_tiles_enabled_flag = 0;
+ ps_params->s_app_tile_params.i4_uniform_spacing_flag = 1;
+ ps_params->s_app_tile_params.i4_num_tile_cols = 1;
+ ps_params->s_app_tile_params.i4_num_tile_rows = 1;
+
+ ps_params->s_slice_params.i4_slice_segment_mode = 0;
+ ps_params->s_slice_params.i4_slice_segment_argument = 1300;
+
+ return (IHEVCE_EOK);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_cmds_error_report \endif
+*
+* \brief
+* Call back from encoder to report errors
+*
+* \param[in] pv_error_handling_cb_handle
+* \param[in] i4_error_code
+* \param[in] i4_cmd_type
+* \param[in] i4_id
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+IV_API_CALL_STATUS_T ihevce_cmds_error_report(
+ void *pv_cb_handle, WORD32 i4_error_code, WORD32 i4_cmd_type, WORD32 i4_buf_id)
+{
+ /*local variables*/
+ plugin_ctxt_t *plugin_ctxt = (plugin_ctxt_t *)pv_cb_handle;
+ ihevce_static_cfg_params_t *ps_static_cfg_params =
+ ((ihevce_hle_ctxt_t *)plugin_ctxt->pv_hle_interface_ctxt)->ps_static_cfg_prms;
+
+ if(i4_cmd_type == 0)
+ ps_static_cfg_params->s_sys_api.ihevce_printf(
+ ps_static_cfg_params->s_sys_api.pv_cb_handle,
+ "PLUGIN ERROR: Asynchronous Buffer Error %d in Buffer Id %d",
+ i4_error_code,
+ i4_buf_id);
+ else
+ ps_static_cfg_params->s_sys_api.ihevce_printf(
+ ps_static_cfg_params->s_sys_api.pv_cb_handle,
+ "PLUGIN ERROR: Synchronous Buffer Error %d in Buffer Id %d",
+ i4_error_code,
+ i4_buf_id);
+
+ return (IV_SUCCESS);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_strm_fill_done \endif
+*
+* \brief
+* Call back from encoder when Bitstream is ready to consume
+*
+* \param[in]
+* \param[in]
+* \param[in]
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+IV_API_CALL_STATUS_T
+ ihevce_strm_fill_done(void *pv_ctxt, void *pv_curr_out, WORD32 i4_br_id, WORD32 i4_res_id)
+{
+ /* local variables */
+ plugin_ctxt_t *ps_ctxt = (plugin_ctxt_t *)pv_ctxt;
+ app_ctxt_t *ps_app_ctxt = &ps_ctxt->s_app_ctxt;
+ out_strm_prms_t *ps_out_strm_prms = &ps_app_ctxt->as_out_strm_prms[i4_res_id][i4_br_id];
+ void *pv_app_out_strm_buf_mutex_hdl = ps_out_strm_prms->pv_app_out_strm_buf_mutex_hdl;
+ void *pv_app_out_strm_buf_cond_var_hdl = ps_out_strm_prms->pv_app_out_strm_buf_cond_var_hdl;
+ iv_output_data_buffs_t *ps_curr_out = (iv_output_data_buffs_t *)pv_curr_out;
+ WORD32 end_flag = ps_curr_out->i4_end_flag;
+ WORD32 osal_result;
+
+ /* ------ output dump stream -- */
+ if((WORD32)IV_FAIL != ps_curr_out->i4_process_ret_sts)
+ {
+ if(0 != ps_curr_out->i4_bytes_generated)
+ {
+ /* accumulate the total bits generated */
+ (ps_out_strm_prms->u8_total_bits) += ps_curr_out->i4_bytes_generated * 8;
+ (ps_out_strm_prms->u4_num_frms_enc)++;
+ }
+ }
+
+ /****** Lock the critical section ******/
+ osal_result = osal_mutex_lock(pv_app_out_strm_buf_mutex_hdl);
+ if(OSAL_SUCCESS != osal_result)
+ return (IV_FAIL);
+
+ /* Update the end flag to communicate with the o/p thread */
+ ps_app_ctxt->ai4_out_strm_end_flag[i4_res_id][i4_br_id] = end_flag;
+
+ /* set the produced status of the buffer */
+ {
+ WORD32 idx = ps_curr_out->i4_cb_buf_id;
+
+ ps_ctxt->aaas_out_bufs[i4_res_id][i4_br_id][idx].i4_timestamp_low =
+ ps_curr_out->i4_out_timestamp_low;
+ ps_ctxt->aaas_out_bufs[i4_res_id][i4_br_id][idx].i4_timestamp_high =
+ ps_curr_out->i4_out_timestamp_high;
+ ps_ctxt->aaas_out_bufs[i4_res_id][i4_br_id][idx].i4_bytes_gen =
+ ps_curr_out->i4_bytes_generated;
+ ps_ctxt->aaas_out_bufs[i4_res_id][i4_br_id][idx].i4_is_key_frame = 0;
+ ps_ctxt->aaas_out_bufs[i4_res_id][i4_br_id][idx].i4_end_flag = end_flag;
+
+ if((IV_IDR_FRAME == ps_curr_out->i4_encoded_frame_type) ||
+ (IV_I_FRAME == ps_curr_out->i4_encoded_frame_type))
+ {
+ ps_ctxt->aaas_out_bufs[i4_res_id][i4_br_id][idx].i4_is_key_frame = 1;
+ }
+
+ /* set the buffer as produced */
+ ps_ctxt->aaas_out_bufs[i4_res_id][i4_br_id][idx].i4_is_prod = 1;
+ }
+
+ /****** Wake ******/
+ osal_cond_var_signal(pv_app_out_strm_buf_cond_var_hdl);
+
+ /****** Unlock the critical section ******/
+ osal_result = osal_mutex_unlock(pv_app_out_strm_buf_mutex_hdl);
+ if(OSAL_SUCCESS != osal_result)
+ return (IV_FAIL);
+
+ return (IV_SUCCESS);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_plugin_init \endif
+*
+* \brief
+* Initialises the enocder context and threads
+*
+* \param[in] Static params pointer
+*
+* \return
+* status
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+IHEVCE_PLUGIN_STATUS_T ihevce_init(ihevce_static_cfg_params_t *ps_params, void **ppv_ihevce_hdl)
+{
+ /* local variables */
+ plugin_ctxt_t *ps_ctxt;
+ app_ctxt_t *ps_app_ctxt;
+ ihevce_hle_ctxt_t *ps_interface_ctxt;
+ ihevce_sys_api_t *ps_sys_api;
+ osal_cb_funcs_t s_cb_funcs;
+ WORD32 status = 0;
+
+ /* sanity checks */
+ if(NULL == ps_params)
+ return (IHEVCE_EFAIL);
+
+ if(NULL == ppv_ihevce_hdl)
+ return (IHEVCE_EFAIL);
+
+ /* set the handle to null by default */
+ *ppv_ihevce_hdl = NULL;
+
+ /* Initiallizing system apis */
+ ps_sys_api = &ps_params->s_sys_api;
+ ihevce_init_sys_api(NULL, ps_sys_api);
+
+ /* --------------------------------------------------------------------- */
+ /* Query and print Encoder version */
+ /* --------------------------------------------------------------------- */
+ ps_sys_api->ihevce_printf(
+ ps_sys_api->pv_cb_handle, "Encoder version %s\n\n", ihevce_get_encoder_version());
+
+ /* --------------------------------------------------------------------- */
+ /* Plugin Handle create */
+ /* --------------------------------------------------------------------- */
+ ps_ctxt = (plugin_ctxt_t *)memory_alloc(NULL, sizeof(plugin_ctxt_t));
+ if(NULL == ps_ctxt)
+ {
+ ps_sys_api->ihevce_printf(
+ ps_sys_api->pv_cb_handle, "IHEVCE ERROR: Error in Plugin initialization\n");
+ return (IHEVCE_EFAIL);
+ }
+ memset(ps_ctxt, 0, sizeof(plugin_ctxt_t));
+
+ /* initialise memory call backs */
+ ps_ctxt->ihevce_mem_alloc = memory_alloc;
+ ps_ctxt->ihevce_mem_free = memory_free;
+
+ ps_ctxt->u8_num_frames_encoded = 0;
+
+ if((0 == ps_params->i4_res_id) && (0 == ps_params->i4_br_id))
+ {
+ /* --------------------------------------------------------------------- */
+ /* OSAL Handle create */
+ /* --------------------------------------------------------------------- */
+ ps_ctxt->pv_osal_handle = memory_alloc(NULL, OSAL_HANDLE_SIZE);
+
+ /* Initialize OSAL call back functions */
+ s_cb_funcs.mmr_handle = NULL;
+ s_cb_funcs.osal_alloc = memory_alloc;
+ s_cb_funcs.osal_free = memory_free;
+
+ status = osal_init(ps_ctxt->pv_osal_handle);
+ if(OSAL_SUCCESS != status)
+ {
+ ps_sys_api->ihevce_printf(
+ ps_sys_api->pv_cb_handle, "IHEVCE ERROR: Error in OSAL initialization\n");
+ return (IHEVCE_EFAIL);
+ }
+
+ status = osal_register_callbacks(ps_ctxt->pv_osal_handle, &s_cb_funcs);
+ if(OSAL_SUCCESS != status)
+ {
+ ps_sys_api->ihevce_printf(
+ ps_sys_api->pv_cb_handle, "IHEVCE ERROR: Error in OSAL call back registration\n");
+ return (IHEVCE_EFAIL);
+ }
+
+ /* --------------------------------------------------------------------- */
+ /* Thread affinity Initialization */
+ /* --------------------------------------------------------------------- */
+ if(ps_params->s_multi_thrd_prms.i4_use_thrd_affinity)
+ {
+ WORD32 i4_ctr;
+
+ /* loop over all the cores */
+ for(i4_ctr = 0; i4_ctr < ps_params->s_multi_thrd_prms.i4_max_num_cores; i4_ctr++)
+ {
+ /* All cores are logical cores */
+ ps_params->s_multi_thrd_prms.au8_core_aff_mask[i4_ctr] = ((ULWORD64)1 << i4_ctr);
+ }
+ }
+
+ /* --------------------------------------------------------------------- */
+ /* Context Initialization */
+ /* --------------------------------------------------------------------- */
+ ps_app_ctxt = &ps_ctxt->s_app_ctxt;
+
+ ps_ctxt->ps_static_cfg_prms = (ihevce_static_cfg_params_t *)ps_ctxt->ihevce_mem_alloc(
+ NULL, sizeof(ihevce_static_cfg_params_t));
+ if(NULL == ps_ctxt->ps_static_cfg_prms)
+ {
+ ps_sys_api->ihevce_printf(
+ ps_sys_api->pv_cb_handle, "IHEVCE ERROR: Error in Plugin memory initialization\n");
+ return (IHEVCE_EFAIL);
+ }
+
+ ps_params->apF_csv_file[0][0] = NULL;
+
+ /* set the memory manager handle to NULL */
+ ps_app_ctxt->pv_mem_mngr_handle = NULL;
+
+ /* --------------------------------------------------------------------- */
+ /* Back up the static params passed by caller */
+ /* --------------------------------------------------------------------- */
+ memcpy(ps_ctxt->ps_static_cfg_prms, ps_params, sizeof(ihevce_static_cfg_params_t));
+
+ ps_ctxt->ps_static_cfg_prms->s_src_prms.i4_orig_width =
+ ps_ctxt->ps_static_cfg_prms->s_src_prms.i4_width;
+ if(HEVCE_MIN_WIDTH > ps_ctxt->ps_static_cfg_prms->s_src_prms.i4_width)
+ {
+ ps_ctxt->ps_static_cfg_prms->s_src_prms.i4_width = HEVCE_MIN_WIDTH;
+ }
+
+ ps_ctxt->ps_static_cfg_prms->s_src_prms.i4_orig_height =
+ ps_ctxt->ps_static_cfg_prms->s_src_prms.i4_height;
+ if(HEVCE_MIN_HEIGHT > ps_ctxt->ps_static_cfg_prms->s_src_prms.i4_height)
+ {
+ ps_ctxt->ps_static_cfg_prms->s_src_prms.i4_height = HEVCE_MIN_HEIGHT;
+ }
+
+ /* setting tgt width and height same as src width and height */
+ ps_ctxt->ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[0].i4_width =
+ ps_ctxt->ps_static_cfg_prms->s_src_prms.i4_width;
+ ps_ctxt->ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[0].i4_height =
+ ps_ctxt->ps_static_cfg_prms->s_src_prms.i4_height;
+
+ /* setting key frame interval */
+ ps_ctxt->ps_static_cfg_prms->s_coding_tools_prms.i4_max_closed_gop_period =
+ MIN(MAX_NUM_FRM_IN_GOP,
+ ps_ctxt->ps_static_cfg_prms->s_coding_tools_prms.i4_max_closed_gop_period);
+ ps_ctxt->ps_static_cfg_prms->s_coding_tools_prms.i4_max_cra_open_gop_period =
+ MIN(MAX_NUM_FRM_IN_GOP,
+ ps_ctxt->ps_static_cfg_prms->s_coding_tools_prms.i4_max_cra_open_gop_period);
+ ps_ctxt->ps_static_cfg_prms->s_coding_tools_prms.i4_max_i_open_gop_period =
+ MIN(MAX_NUM_FRM_IN_GOP,
+ ps_ctxt->ps_static_cfg_prms->s_coding_tools_prms.i4_max_i_open_gop_period);
+
+ /* --------------------------------------------------------------------- */
+ /* High Level Encoder context init */
+ /* --------------------------------------------------------------------- */
+ ps_interface_ctxt =
+ (ihevce_hle_ctxt_t *)ps_ctxt->ihevce_mem_alloc(NULL, sizeof(ihevce_hle_ctxt_t));
+ if(NULL == ps_interface_ctxt)
+ {
+ ps_sys_api->ihevce_printf(
+ ps_sys_api->pv_cb_handle,
+ "IHEVCE ERROR: Error in Plugin HLE memory initialization\n");
+ return (IHEVCE_EFAIL);
+ }
+ memset(ps_interface_ctxt, 0, sizeof(ihevce_hle_ctxt_t));
+ ps_interface_ctxt->i4_size = sizeof(ihevce_hle_ctxt_t);
+
+ ps_ctxt->pv_hle_interface_ctxt = ps_interface_ctxt;
+
+ /* store the static config parameters pointer */
+ ps_interface_ctxt->ps_static_cfg_prms = ps_ctxt->ps_static_cfg_prms;
+
+ /* initialise the interface strucure parameters */
+ ps_interface_ctxt->pv_inp_cb_handle = (void *)ps_ctxt;
+ ps_interface_ctxt->pv_out_cb_handle = (void *)ps_ctxt;
+ ps_interface_ctxt->pv_recon_cb_handle = (void *)ps_ctxt;
+
+ ps_interface_ctxt->pv_osal_handle = ps_ctxt->pv_osal_handle;
+ ps_interface_ctxt->ihevce_mem_alloc = mem_mngr_alloc;
+ ps_interface_ctxt->ihevce_mem_free = mem_mngr_free;
+ ps_interface_ctxt->i4_hle_init_done = 0;
+ ps_interface_ctxt->pv_mem_mgr_hdl = ps_app_ctxt->pv_mem_mngr_handle;
+
+ /* reigter the callbacks */
+ ps_interface_ctxt->ihevce_output_strm_fill_done = ihevce_strm_fill_done;
+ ps_interface_ctxt->ihevce_output_recon_fill_done = NULL;
+ ps_interface_ctxt->ihevce_set_free_input_buff = NULL;
+
+ /*Added for run time or create time creation*/
+ ps_interface_ctxt->i4_create_time_input_allocation = (WORD32)CREATE_TIME_ALLOCATION_INPUT;
+ ps_interface_ctxt->i4_create_time_output_allocation = (WORD32)CREATE_TIME_ALLOCATION_OUTPUT;
+
+ ps_interface_ctxt->ihevce_cmds_error_report = ihevce_cmds_error_report;
+ ps_interface_ctxt->pv_cmd_err_cb_handle = (void *)ps_ctxt;
+
+ /* --------------------------------------------------------------------- */
+ /* High Level Encoder Instance Creation */
+ /* --------------------------------------------------------------------- */
+ status = ihevce_hle_interface_create(ps_interface_ctxt);
+ if((WORD32)IV_FAIL == status)
+ {
+ ihevce_hle_interface_delete(ps_interface_ctxt);
+
+ memory_free(NULL, ps_interface_ctxt);
+
+ /* free static config memory */
+ ps_ctxt->ihevce_mem_free(NULL, ps_ctxt->ps_static_cfg_prms);
+
+ /* free osal handle */
+ memory_free(NULL, ps_ctxt->pv_osal_handle);
+
+ /* free plugin ctxt memory */
+ memory_free(NULL, ps_ctxt);
+
+ ps_sys_api->ihevce_printf(
+ ps_sys_api->pv_cb_handle, "IHEVCE ERROR: Error in Plugin HLE create failed\n");
+ return (IHEVCE_EFAIL);
+ }
+
+ /* --------------------------------------------------------------------- */
+ /* Input Output and Command buffer allocation */
+ /* --------------------------------------------------------------------- */
+ {
+ WORD32 ctr;
+ WORD32 buf_size;
+ UWORD8 *pu1_tmp_buf;
+ WORD32 i4_res_id;
+ WORD32 i4_br_id;
+ WORD32 i4_num_resolutions;
+ WORD32 ai4_num_bitrate_instances[IHEVCE_MAX_NUM_RESOLUTIONS] = { 1 };
+ iv_input_bufs_req_t s_input_bufs_req;
+ iv_res_layer_output_bufs_req_t s_res_layer_output_bufs_req;
+ iv_res_layer_recon_bufs_req_t s_res_layer_recon_bufs_req;
+
+ /* local array of pointers */
+ void *apv_inp_luma_bufs[MAX_NUM_INP_DATA_BUFS];
+ void *apv_inp_cb_bufs[MAX_NUM_INP_DATA_BUFS];
+ void *apv_inp_cr_bufs[MAX_NUM_INP_DATA_BUFS];
+ void *apv_inp_sync_bufs[MAX_NUM_INP_CTRL_SYNC_BUFS];
+ void *apv_inp_async_bufs[MAX_NUM_INP_CTRL_ASYNC_BUFS];
+ void *apv_out_data_bufs[IHEVCE_MAX_NUM_RESOLUTIONS][IHEVCE_MAX_NUM_BITRATES]
+ [MAX_NUM_OUT_DATA_BUFS];
+
+ /* get the number of resolutions */
+ i4_num_resolutions = ps_ctxt->ps_static_cfg_prms->s_tgt_lyr_prms.i4_num_res_layers;
+
+ /* set the size of the structure */
+ s_input_bufs_req.i4_size = sizeof(iv_input_bufs_req_t);
+ s_res_layer_output_bufs_req.i4_size = sizeof(iv_res_layer_output_bufs_req_t);
+ s_res_layer_recon_bufs_req.i4_size = sizeof(iv_res_layer_recon_bufs_req_t);
+
+ /* loop over num resolutions */
+ for(i4_res_id = 0; i4_res_id < i4_num_resolutions; i4_res_id++)
+ {
+ /* store the number of bitrates */
+ ai4_num_bitrate_instances[i4_res_id] =
+ ps_ctxt->ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_res_id]
+ .i4_num_bitrate_instances;
+
+ /* loop over num bitrates */
+ for(i4_br_id = 0; i4_br_id < ai4_num_bitrate_instances[i4_res_id]; i4_br_id++)
+ {
+ s_res_layer_output_bufs_req.s_output_buf_req[i4_res_id][i4_br_id].i4_size =
+ sizeof(iv_output_bufs_req_t);
+ }
+ }
+
+ /* call Query I/O buffer */
+ status = ihevce_query_io_buf_req(
+ ps_interface_ctxt,
+ &s_input_bufs_req,
+ &s_res_layer_output_bufs_req,
+ &s_res_layer_recon_bufs_req);
+
+ /* check on the requirements against the MAX of application */
+ /* should be present only for debug purpose */
+
+ /* --------------- Input data buffers init ---------------------- */
+ /* allocate memory for input buffers */
+ if(ps_interface_ctxt->i4_create_time_input_allocation == 1)
+ {
+ buf_size = s_input_bufs_req.i4_min_size_uv_buf + s_input_bufs_req.i4_min_size_y_buf;
+ ps_ctxt->s_memtab_inp_data_buf.i4_size = sizeof(iv_mem_rec_t);
+ ps_ctxt->s_memtab_inp_data_buf.i4_mem_alignment = 4;
+ ps_ctxt->s_memtab_inp_data_buf.i4_mem_size =
+ (s_input_bufs_req.i4_min_num_yuv_bufs + XTRA_INP_DATA_BUFS) * buf_size;
+ ps_ctxt->s_memtab_inp_data_buf.e_mem_type = IV_EXT_CACHEABLE_NUMA_NODE0_MEM;
+
+ mem_mngr_alloc(
+ ps_app_ctxt->pv_mem_mngr_handle, ps_sys_api, &ps_ctxt->s_memtab_inp_data_buf);
+
+ pu1_tmp_buf = (UWORD8 *)ps_ctxt->s_memtab_inp_data_buf.pv_base;
+
+ if(NULL == pu1_tmp_buf)
+ {
+ ps_sys_api->ihevce_printf(
+ ps_sys_api->pv_cb_handle, "IHEVCE ERROR: Error in allocate memory\n");
+ return (IHEVCE_EFAIL);
+ }
+
+ /* loop to initialise the buffer pointer */
+ for(ctr = 0; ctr < s_input_bufs_req.i4_min_num_yuv_bufs + XTRA_INP_DATA_BUFS; ctr++)
+ {
+ apv_inp_luma_bufs[ctr] = pu1_tmp_buf;
+ apv_inp_cb_bufs[ctr] = pu1_tmp_buf + s_input_bufs_req.i4_min_size_y_buf;
+ apv_inp_cr_bufs[ctr] = NULL; /* 420SP case */
+
+ /* increment the input buffer pointer to next buffer */
+ pu1_tmp_buf += buf_size;
+ }
+ }
+
+ /* --------------- Output data buffers init ---------------------- */
+
+ /* loop over num resolutions */
+ for(i4_res_id = 0; i4_res_id < i4_num_resolutions; i4_res_id++)
+ {
+ for(i4_br_id = 0; i4_br_id < ai4_num_bitrate_instances[i4_res_id]; i4_br_id++)
+ {
+ buf_size = s_res_layer_output_bufs_req.s_output_buf_req[i4_res_id][i4_br_id]
+ .i4_min_size_bitstream_buf;
+
+ ps_ctxt->as_memtab_out_data_buf[i4_res_id][i4_br_id].i4_size =
+ sizeof(iv_mem_rec_t);
+ ps_ctxt->as_memtab_out_data_buf[i4_res_id][i4_br_id].i4_mem_alignment = 4;
+
+ if(!ps_interface_ctxt->i4_create_time_output_allocation)
+ {
+ ps_ctxt->as_memtab_out_data_buf[i4_res_id][i4_br_id].i4_mem_size =
+ (s_res_layer_output_bufs_req.s_output_buf_req[i4_res_id][i4_br_id]
+ .i4_min_num_out_bufs +
+ XTRA_OUT_DATA_BUFS) *
+ buf_size;
+ }
+ else
+ {
+ ps_ctxt->as_memtab_out_data_buf[i4_res_id][i4_br_id].i4_mem_size =
+ (s_res_layer_output_bufs_req.s_output_buf_req[i4_res_id][i4_br_id]
+ .i4_min_num_out_bufs) *
+ buf_size;
+ }
+ ps_ctxt->as_memtab_out_data_buf[i4_res_id][i4_br_id].e_mem_type =
+ IV_EXT_CACHEABLE_NUMA_NODE1_MEM;
+
+ mem_mngr_alloc(
+ ps_app_ctxt->pv_mem_mngr_handle,
+ ps_sys_api,
+ &ps_ctxt->as_memtab_out_data_buf[i4_res_id][i4_br_id]);
+
+ pu1_tmp_buf =
+ (UWORD8 *)ps_ctxt->as_memtab_out_data_buf[i4_res_id][i4_br_id].pv_base;
+ if(NULL == pu1_tmp_buf)
+ {
+ ps_sys_api->ihevce_printf(
+ ps_sys_api->pv_cb_handle, "IHEVCE ERROR: Error in allocate memory\n");
+ return (IHEVCE_EFAIL);
+ }
+
+ if(ps_interface_ctxt->i4_create_time_output_allocation == 1)
+ {
+ /* loop to initialise the buffer pointer */
+ for(ctr = 0;
+ ctr < s_res_layer_output_bufs_req.s_output_buf_req[i4_res_id][i4_br_id]
+ .i4_min_num_out_bufs;
+ ctr++)
+ {
+ apv_out_data_bufs[i4_res_id][i4_br_id][ctr] = pu1_tmp_buf;
+ pu1_tmp_buf += buf_size;
+ }
+ }
+ else
+ {
+ WORD32 i4_num_out_bufs =
+ s_res_layer_output_bufs_req.s_output_buf_req[i4_res_id][i4_br_id]
+ .i4_min_num_out_bufs +
+ XTRA_OUT_DATA_BUFS;
+ ps_ctxt->i4_num_out_bufs = i4_num_out_bufs;
+ ps_ctxt->ai4_free_out_buf_idx[i4_res_id][i4_br_id] = 0;
+ ps_ctxt->i4_prod_out_buf_idx = 0;
+
+ /* Assert to make sure ps_ctxt->aaas_out_bufs[i4_res_id][i4_br_id][] array
+ has more bufs than ps_ctxt->i4_num_out_bufs. Needed to identify
+ wrap-around case */
+ ASSERT(ps_ctxt->i4_num_out_bufs <= MAX_NUM_OUT_DATA_BUFS);
+
+ /* loop to initialise the buffer pointer */
+ for(ctr = 0; ctr < i4_num_out_bufs; ctr++)
+ {
+ ps_ctxt->aaas_out_bufs[i4_res_id][i4_br_id][ctr].i4_idx = ctr;
+ ps_ctxt->aaas_out_bufs[i4_res_id][i4_br_id][ctr].i4_is_free = 1;
+ ps_ctxt->aaas_out_bufs[i4_res_id][i4_br_id][ctr].i4_is_prod = 0;
+ ps_ctxt->aaas_out_bufs[i4_res_id][i4_br_id][ctr].i4_bytes_gen = 0;
+ ps_ctxt->aaas_out_bufs[i4_res_id][i4_br_id][ctr].pu1_buf = pu1_tmp_buf;
+ ps_ctxt->aaas_out_bufs[i4_res_id][i4_br_id][ctr].i4_buf_size = buf_size;
+ pu1_tmp_buf += buf_size;
+ }
+ }
+
+ /* create mutex for controlling the out strm buf b/w appln and encoder */
+ ps_app_ctxt->as_out_strm_prms[i4_res_id][i4_br_id]
+ .pv_app_out_strm_buf_mutex_hdl = osal_mutex_create(ps_ctxt->pv_osal_handle);
+ if(NULL == ps_app_ctxt->as_out_strm_prms[i4_res_id][i4_br_id]
+ .pv_app_out_strm_buf_mutex_hdl)
+ {
+ ps_sys_api->ihevce_printf(
+ ps_sys_api->pv_cb_handle,
+ "IHEVCE ERROR: Error in Plugin initialization\n");
+ return (IHEVCE_EFAIL);
+ }
+
+ /* create mutex for controlling the out strm buf b/w appln and encoder */
+ ps_app_ctxt->as_out_strm_prms[i4_res_id][i4_br_id]
+ .pv_app_out_strm_buf_cond_var_hdl =
+ osal_cond_var_create(ps_ctxt->pv_osal_handle);
+ if(NULL == ps_app_ctxt->as_out_strm_prms[i4_res_id][i4_br_id]
+ .pv_app_out_strm_buf_cond_var_hdl)
+ {
+ ps_sys_api->ihevce_printf(
+ ps_sys_api->pv_cb_handle,
+ "IHEVCE ERROR: Error in Plugin initialization\n");
+ return (IHEVCE_EFAIL);
+ }
+ }
+ }
+
+ if(ps_interface_ctxt->i4_create_time_input_allocation == 1)
+ {
+ /* ------------- Input sync command buffers init -------------------- */
+ buf_size = s_input_bufs_req.i4_min_size_synch_ctrl_bufs;
+
+ ps_ctxt->s_memtab_inp_sync_ctrl_buf.i4_size = sizeof(iv_mem_rec_t);
+ ps_ctxt->s_memtab_inp_sync_ctrl_buf.i4_mem_alignment = 4;
+ ps_ctxt->s_memtab_inp_sync_ctrl_buf.i4_mem_size =
+ (s_input_bufs_req.i4_min_num_yuv_bufs + XTRA_INP_DATA_BUFS) * buf_size;
+ ps_ctxt->s_memtab_inp_sync_ctrl_buf.e_mem_type = IV_EXT_CACHEABLE_NUMA_NODE0_MEM;
+
+ mem_mngr_alloc(
+ ps_app_ctxt->pv_mem_mngr_handle,
+ ps_sys_api,
+ &ps_ctxt->s_memtab_inp_sync_ctrl_buf);
+
+ pu1_tmp_buf = (UWORD8 *)ps_ctxt->s_memtab_inp_sync_ctrl_buf.pv_base;
+
+ if(NULL == pu1_tmp_buf)
+ {
+ ps_sys_api->ihevce_printf(
+ ps_sys_api->pv_cb_handle, "IHEVCE ERROR: Error in allocate memory\n");
+ return (IHEVCE_EFAIL);
+ }
+
+ /* loop to initialise the buffer pointer */
+ for(ctr = 0; ctr < s_input_bufs_req.i4_min_num_yuv_bufs + XTRA_INP_DATA_BUFS; ctr++)
+ {
+ apv_inp_sync_bufs[ctr] = pu1_tmp_buf;
+ pu1_tmp_buf += buf_size;
+ }
+ }
+
+ /* ------------- Input async command buffers init -------------------- */
+ buf_size = s_input_bufs_req.i4_min_size_asynch_ctrl_bufs;
+
+ /* allocate memory for output status buffer */
+ ps_ctxt->pu1_inp_async_ctrl_buf = (UWORD8 *)ps_ctxt->ihevce_mem_alloc(
+ NULL, s_input_bufs_req.i4_min_num_asynch_ctrl_bufs * buf_size);
+ if(ps_ctxt->pu1_inp_async_ctrl_buf == NULL)
+ {
+ ps_sys_api->ihevce_printf(
+ ps_sys_api->pv_cb_handle,
+ "IHEVCE ERROR: Error in Plugin memory initialization\n");
+ return (IHEVCE_EFAIL);
+ }
+
+ pu1_tmp_buf = ps_ctxt->pu1_inp_async_ctrl_buf;
+
+ /* loop to initialise the buffer pointer */
+ for(ctr = 0; ctr < s_input_bufs_req.i4_min_num_asynch_ctrl_bufs; ctr++)
+ {
+ apv_inp_async_bufs[ctr] = pu1_tmp_buf;
+ pu1_tmp_buf += buf_size;
+ }
+
+ /* Create IO ports for the buffer allocated */
+ {
+ iv_input_data_ctrl_buffs_desc_t s_inp_desc;
+ iv_input_asynch_ctrl_buffs_desc_t s_inp_ctrl_desc;
+ iv_res_layer_output_data_buffs_desc_t s_mres_out_desc;
+ iv_res_layer_recon_data_buffs_desc_t s_mres_recon_desc;
+
+ /* set the parameters of the input data control desc */
+ s_inp_desc.i4_size = sizeof(iv_input_data_ctrl_buffs_desc_t);
+ s_inp_desc.i4_num_synch_ctrl_bufs = s_input_bufs_req.i4_min_num_synch_ctrl_bufs;
+ s_inp_desc.i4_num_yuv_bufs =
+ s_input_bufs_req.i4_min_num_yuv_bufs + XTRA_INP_DATA_BUFS;
+ s_inp_desc.i4_size_y_buf = s_input_bufs_req.i4_min_size_y_buf;
+ s_inp_desc.i4_size_uv_buf = s_input_bufs_req.i4_min_size_uv_buf;
+ s_inp_desc.i4_size_synch_ctrl_bufs = s_input_bufs_req.i4_min_size_synch_ctrl_bufs;
+ s_inp_desc.ppv_synch_ctrl_bufs = &apv_inp_sync_bufs[0];
+ s_inp_desc.ppv_y_buf = &apv_inp_luma_bufs[0];
+ s_inp_desc.ppv_u_buf = &apv_inp_cb_bufs[0];
+ s_inp_desc.ppv_v_buf = &apv_inp_cr_bufs[0];
+
+ /* set the parameters of the input async control desc */
+ s_inp_ctrl_desc.i4_size = sizeof(iv_input_asynch_ctrl_buffs_desc_t);
+ s_inp_ctrl_desc.i4_num_asynch_ctrl_bufs =
+ s_input_bufs_req.i4_min_num_asynch_ctrl_bufs;
+ s_inp_ctrl_desc.i4_size_asynch_ctrl_bufs =
+ s_input_bufs_req.i4_min_size_asynch_ctrl_bufs;
+ s_inp_ctrl_desc.ppv_asynch_ctrl_bufs = &apv_inp_async_bufs[0];
+
+ for(i4_res_id = 0; i4_res_id < i4_num_resolutions; i4_res_id++)
+ {
+ /* set the parameters of the output data desc */
+ for(i4_br_id = 0; i4_br_id < ai4_num_bitrate_instances[i4_res_id]; i4_br_id++)
+ {
+ s_mres_out_desc.s_output_data_buffs[i4_res_id][i4_br_id].i4_size =
+ sizeof(iv_output_data_buffs_desc_t);
+
+ if(!ps_interface_ctxt->i4_create_time_output_allocation)
+ {
+ s_mres_out_desc.s_output_data_buffs[i4_res_id][i4_br_id]
+ .i4_num_bitstream_bufs =
+ s_res_layer_output_bufs_req.s_output_buf_req[i4_res_id][i4_br_id]
+ .i4_min_num_out_bufs +
+ XTRA_OUT_DATA_BUFS;
+ }
+ else
+ {
+ s_mres_out_desc.s_output_data_buffs[i4_res_id][i4_br_id]
+ .i4_num_bitstream_bufs =
+ s_res_layer_output_bufs_req.s_output_buf_req[i4_res_id][i4_br_id]
+ .i4_min_num_out_bufs;
+ }
+
+ s_mres_out_desc.s_output_data_buffs[i4_res_id][i4_br_id]
+ .i4_size_bitstream_buf =
+ s_res_layer_output_bufs_req.s_output_buf_req[i4_res_id][i4_br_id]
+ .i4_min_size_bitstream_buf;
+ s_mres_out_desc.s_output_data_buffs[i4_res_id][i4_br_id].ppv_bitstream_bufs =
+ &apv_out_data_bufs[i4_res_id][i4_br_id][0];
+ }
+ }
+
+ s_mres_recon_desc.i4_size = sizeof(iv_res_layer_recon_data_buffs_desc_t);
+ /* call create I/O ports */
+ status = ihevce_create_ports(
+ ps_interface_ctxt,
+ &s_inp_desc,
+ &s_inp_ctrl_desc,
+ &s_mres_out_desc,
+ &s_mres_recon_desc);
+ }
+ }
+
+ /* --------------------------------------------------------------------- */
+ /* Create a High level encoder thread */
+ /* --------------------------------------------------------------------- */
+ {
+ osal_thread_attr_t s_thread_attr = OSAL_DEFAULT_THREAD_ATTR;
+
+ /* Initialize application thread attributes */
+ s_thread_attr.exit_code = 0;
+ s_thread_attr.name = 0;
+ s_thread_attr.priority_map_flag = 1;
+ s_thread_attr.priority = OSAL_PRIORITY_DEFAULT;
+ s_thread_attr.stack_addr = 0;
+ s_thread_attr.stack_size = THREAD_STACK_SIZE;
+ s_thread_attr.thread_func = ihevce_hle_interface_thrd;
+ s_thread_attr.thread_param = (void *)(ps_interface_ctxt);
+ s_thread_attr.core_affinity_mask = 0;
+ s_thread_attr.group_num = 0;
+
+ /* Create High level encoder thread */
+ ps_ctxt->pv_hle_thread_hdl =
+ osal_thread_create(ps_ctxt->pv_osal_handle, &s_thread_attr);
+ if(NULL == ps_ctxt->pv_hle_thread_hdl)
+ {
+ return IHEVCE_EFAIL;
+ }
+ }
+
+ /* --------------------------------------------------------------------- */
+ /* Wait until HLE init is done */
+ /* --------------------------------------------------------------------- */
+ {
+ volatile WORD32 hle_init_done;
+ volatile WORD32 *pi4_hle_init_done;
+
+ pi4_hle_init_done = (volatile WORD32 *)&ps_interface_ctxt->i4_hle_init_done;
+
+ do
+ {
+ hle_init_done = *pi4_hle_init_done;
+
+ } while(0 == hle_init_done);
+ }
+
+ /* reset flush mode */
+ ps_ctxt->i4_flush_mode_on = 0;
+
+ {
+ WORD32 i4_res_id;
+ WORD32 i4_br_id;
+ for(i4_res_id = 0; i4_res_id < IHEVCE_MAX_NUM_RESOLUTIONS; i4_res_id++)
+ {
+ for(i4_br_id = 0; i4_br_id < IHEVCE_MAX_NUM_BITRATES; i4_br_id++)
+ {
+ /* reset out end flag */
+ ps_ctxt->ai4_out_end_flag[i4_res_id][i4_br_id] = 0;
+ }
+ }
+ }
+
+ /* reset the field id */
+ ps_ctxt->i4_field_id = 0;
+
+ /* based on number of B pics set the DTS value */
+ ps_ctxt->i8_dts = -1;
+
+ if(0 != ps_ctxt->ps_static_cfg_prms->s_coding_tools_prms.i4_max_temporal_layers)
+ {
+ ps_ctxt->i8_dts =
+ (-1) *
+ (1 << ps_ctxt->ps_static_cfg_prms->s_coding_tools_prms.i4_max_temporal_layers);
+ }
+
+ /* initialsie the buffer stride */
+ {
+ WORD32 max_cu_size;
+
+ max_cu_size = (1 << ps_ctxt->ps_static_cfg_prms->s_config_prms.i4_max_log2_cu_size);
+ ps_ctxt->i4_frm_stride =
+ ps_ctxt->ps_static_cfg_prms->s_src_prms.i4_width +
+ SET_CTB_ALIGN(ps_ctxt->ps_static_cfg_prms->s_src_prms.i4_width, max_cu_size);
+ }
+ }
+ else
+ {
+ /* free plugin ctxt memory */
+ memory_free(NULL, ps_ctxt);
+
+ return (IHEVCE_EFAIL);
+ }
+
+ /* reset the place holders of old bitrate */
+ memset(&ps_ctxt->ai4_old_bitrate[0][0], 0, sizeof(ps_ctxt->ai4_old_bitrate));
+
+ ps_ctxt->ai4_old_bitrate[0][0] = ps_params->s_tgt_lyr_prms.as_tgt_params[0].ai4_tgt_bitrate[0];
+
+ /* store the plugin handle before returning */
+ *ppv_ihevce_hdl = (void *)ps_ctxt;
+
+ return (IHEVCE_EOK);
+}
+
+static IHEVCE_PLUGIN_STATUS_T
+ ihevce_receive_out_buffer(plugin_ctxt_t *ps_ctxt, ihevce_out_buf_t *ps_out)
+{
+ app_ctxt_t *ps_app_ctxt = &ps_ctxt->s_app_ctxt;
+ WORD32 i4_res_id, i4_br_id;
+ WORD32 i4_num_resolutions;
+ WORD32 ai4_num_bitrate_instances[IHEVCE_MAX_NUM_RESOLUTIONS] = { 1 };
+
+ i4_num_resolutions = ps_ctxt->ps_static_cfg_prms->s_tgt_lyr_prms.i4_num_res_layers;
+ for(i4_res_id = 0; i4_res_id < i4_num_resolutions; i4_res_id++)
+ {
+ ai4_num_bitrate_instances[i4_res_id] =
+ ps_ctxt->ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_res_id]
+ .i4_num_bitrate_instances;
+ }
+ /* default init */
+ ps_out->pu1_output_buf = NULL;
+ ps_out->i4_bytes_generated = 0;
+
+ /* ---------------- if any output buffer is available return the buffer back ------------- */
+ while(1)
+ {
+ WORD32 osal_result;
+ WORD32 buf_present = 0;
+ WORD32 i4_is_prod = 1;
+ WORD32 i4_atleast_one_br_prod = 0;
+ /****** Lock the critical section ******/
+ osal_result =
+ osal_mutex_lock(ps_app_ctxt->as_out_strm_prms[0][0].pv_app_out_strm_buf_mutex_hdl);
+
+ if(OSAL_SUCCESS != osal_result)
+ return IHEVCE_EFAIL;
+
+ /* wait until entropy sends an output */
+ while(1)
+ {
+ i4_is_prod = 1;
+ for(i4_res_id = 0; i4_res_id < i4_num_resolutions; i4_res_id++)
+ {
+ for(i4_br_id = 0; i4_br_id < ai4_num_bitrate_instances[i4_res_id]; i4_br_id++)
+ {
+ i4_is_prod &=
+ ps_ctxt->aaas_out_bufs[i4_res_id][i4_br_id][ps_ctxt->i4_prod_out_buf_idx]
+ .i4_is_prod;
+ i4_atleast_one_br_prod |=
+ ps_ctxt->aaas_out_bufs[i4_res_id][i4_br_id][ps_ctxt->i4_prod_out_buf_idx]
+ .i4_is_prod;
+ }
+ }
+ if(!i4_is_prod)
+ {
+ for(i4_res_id = 0; i4_res_id < i4_num_resolutions; i4_res_id++)
+ {
+ for(i4_br_id = 0; i4_br_id < ai4_num_bitrate_instances[i4_res_id]; i4_br_id++)
+ {
+ osal_cond_var_wait(
+ ps_app_ctxt->as_out_strm_prms[i4_res_id][i4_br_id]
+ .pv_app_out_strm_buf_cond_var_hdl,
+ ps_app_ctxt->as_out_strm_prms[i4_res_id][i4_br_id]
+ .pv_app_out_strm_buf_mutex_hdl);
+ }
+ }
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ ASSERT(i4_is_prod == 1);
+
+ /* check if the current buffer for all bitrates and resolutions have been produced */
+ if(1 == i4_is_prod)
+ {
+ buf_present = 1;
+
+ for(i4_res_id = 0; i4_res_id < i4_num_resolutions; i4_res_id++)
+ {
+ for(i4_br_id = 0; i4_br_id < ai4_num_bitrate_instances[i4_res_id]; i4_br_id++)
+ {
+ /* set the buffer to free status */
+ ps_ctxt->aaas_out_bufs[i4_res_id][i4_br_id][ps_ctxt->i4_prod_out_buf_idx]
+ .i4_is_free = 1;
+ if((0 == i4_res_id) && (0 == i4_br_id))
+ {
+ ps_out->i4_bytes_generated =
+ ps_ctxt->aaas_out_bufs[0][0][ps_ctxt->i4_prod_out_buf_idx].i4_bytes_gen;
+ ps_out->pu1_output_buf =
+ ps_ctxt->aaas_out_bufs[0][0][ps_ctxt->i4_prod_out_buf_idx].pu1_buf;
+ }
+ }
+ }
+
+ /* copy the contents to output buffer */
+ ps_out->i4_is_key_frame =
+ ps_ctxt->aaas_out_bufs[0][0][ps_ctxt->i4_prod_out_buf_idx].i4_is_key_frame;
+ ps_out->u8_pts =
+ ps_ctxt->aaas_out_bufs[0][0][ps_ctxt->i4_prod_out_buf_idx].i4_timestamp_low;
+ ps_out->u8_pts =
+ ps_out->u8_pts |
+ ((ULWORD64)(
+ ps_ctxt->aaas_out_bufs[0][0][ps_ctxt->i4_prod_out_buf_idx].i4_timestamp_high)
+ << 32);
+ ps_out->i4_end_flag =
+ ps_ctxt->aaas_out_bufs[0][0][ps_ctxt->i4_prod_out_buf_idx].i4_end_flag;
+ ps_out->i8_dts = ps_ctxt->i8_dts;
+
+ /* increment the DTS */
+ ps_ctxt->i8_dts++;
+ }
+
+ /* check for buffer present */
+ if(1 == buf_present)
+ {
+ ps_ctxt->i4_prod_out_buf_idx++;
+
+ /* wrap around case */
+ if(ps_ctxt->i4_prod_out_buf_idx == ps_ctxt->i4_num_out_bufs)
+ {
+ ps_ctxt->i4_prod_out_buf_idx = 0;
+ }
+
+ /****** Unlock the critical section ******/
+ osal_result = osal_mutex_unlock(
+ ps_app_ctxt->as_out_strm_prms[0][0].pv_app_out_strm_buf_mutex_hdl);
+ if(OSAL_SUCCESS != osal_result)
+ return IHEVCE_EFAIL;
+
+ /* break while 1 loop */
+ break;
+ }
+ else
+ {
+ /* in steady state*/
+ if(0 == ps_ctxt->i4_flush_mode_on)
+ {
+ /****** Unlock the critical section ******/
+ osal_result = osal_mutex_unlock(
+ ps_app_ctxt->as_out_strm_prms[0][0].pv_app_out_strm_buf_mutex_hdl);
+ if(OSAL_SUCCESS != osal_result)
+ return IHEVCE_EFAIL;
+ if(!i4_atleast_one_br_prod) /*** If atleast one bitrate is produced do not break from loop **/
+ { /*** Continue in while loop and Wait for next bitrate ***/
+ /* break while 1 loop */
+ break;
+ }
+ }
+ else
+ {
+ /* In flush mode is ON then this function must return output
+ buffers. Otherwise assume that encoding is over and return fail */
+ /****** Unlock the critical section ******/
+ osal_result = osal_mutex_unlock(
+ ps_app_ctxt->as_out_strm_prms[0][0].pv_app_out_strm_buf_mutex_hdl);
+ if(OSAL_SUCCESS != osal_result)
+ return IHEVCE_EFAIL;
+ }
+ }
+ }
+
+ return IHEVCE_EOK;
+}
+
+static IHEVCE_PLUGIN_STATUS_T
+ ihevce_queue_out_buffer(plugin_ctxt_t *ps_ctxt, WORD32 i4_res_id, WORD32 i4_br_id)
+{
+ app_ctxt_t *ps_app_ctxt = &ps_ctxt->s_app_ctxt;
+ ihevce_hle_ctxt_t *ps_interface_ctxt = (ihevce_hle_ctxt_t *)ps_ctxt->pv_hle_interface_ctxt;
+
+ /* --------------------------------------------------------------------- */
+ /* Free Output buffer Queuing */
+ /* --------------------------------------------------------------------- */
+ /* ------- Que in free output buffer if end flag is not set ------ */
+ if(0 == ps_ctxt->ai4_out_end_flag[i4_res_id][i4_br_id])
+ {
+ WORD32 osal_result;
+ iv_output_data_buffs_t *ps_curr_out;
+ WORD32 buf_id_strm;
+ WORD32 free_idx;
+
+ free_idx = ps_ctxt->ai4_free_out_buf_idx[i4_res_id][i4_br_id];
+
+ if(1 == ps_ctxt->aaas_out_bufs[i4_res_id][i4_br_id][free_idx].i4_is_free)
+ {
+ /* ---------- get a free desc. from output Q ------ */
+ ps_curr_out = (iv_output_data_buffs_t *)ihevce_q_get_free_out_strm_buff(
+ ps_interface_ctxt, &buf_id_strm, BUFF_QUE_NON_BLOCKING_MODE, i4_br_id, i4_res_id);
+
+ /* if a free buffer is available */
+ if(NULL != ps_curr_out)
+ {
+ /****** Lock the critical section ******/
+ osal_result = osal_mutex_lock(ps_app_ctxt->as_out_strm_prms[i4_res_id][i4_br_id]
+ .pv_app_out_strm_buf_mutex_hdl);
+
+ if(OSAL_SUCCESS != osal_result)
+ return IHEVCE_EFAIL;
+
+ if(1 == ps_app_ctxt->ai4_out_strm_end_flag[i4_res_id][i4_br_id])
+ {
+ ps_curr_out->i4_is_last_buf = 1;
+ ps_ctxt->ai4_out_end_flag[i4_res_id][i4_br_id] = 1;
+ }
+ else
+ {
+ ps_curr_out->i4_is_last_buf = 0;
+ }
+ ASSERT(ps_ctxt->aaas_out_bufs[i4_res_id][i4_br_id][free_idx].i4_is_free == 1);
+ ASSERT(free_idx == ps_ctxt->aaas_out_bufs[i4_res_id][i4_br_id][free_idx].i4_idx);
+
+ ps_curr_out->pv_bitstream_bufs =
+ (void *)ps_ctxt->aaas_out_bufs[i4_res_id][i4_br_id][free_idx].pu1_buf;
+ ps_curr_out->i4_cb_buf_id =
+ ps_ctxt->aaas_out_bufs[i4_res_id][i4_br_id][free_idx].i4_idx;
+ ps_ctxt->aaas_out_bufs[i4_res_id][i4_br_id][free_idx].i4_is_free = 0;
+ ps_ctxt->aaas_out_bufs[i4_res_id][i4_br_id][free_idx].i4_is_prod = 0;
+ ps_ctxt->aaas_out_bufs[i4_res_id][i4_br_id][free_idx].i4_bytes_gen = 0;
+
+ ps_ctxt->ai4_free_out_buf_idx[i4_res_id][i4_br_id]++;
+
+ /* wrap around case */
+ if(ps_ctxt->ai4_free_out_buf_idx[i4_res_id][i4_br_id] == ps_ctxt->i4_num_out_bufs)
+ {
+ ps_ctxt->ai4_free_out_buf_idx[i4_res_id][i4_br_id] = 0;
+ }
+
+ /****** Unlock the critical section ******/
+ osal_result = osal_mutex_unlock(ps_app_ctxt->as_out_strm_prms[i4_res_id][i4_br_id]
+ .pv_app_out_strm_buf_mutex_hdl);
+ if(OSAL_SUCCESS != osal_result)
+ return IHEVCE_EFAIL;
+
+ /* ---------- set the buffer as produced ---------- */
+ ihevce_q_set_out_strm_buff_prod(
+ ps_interface_ctxt, buf_id_strm, i4_br_id, i4_res_id);
+ }
+ }
+ }
+ return IHEVCE_EOK;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_close \endif
+*
+* \brief
+* De-Initialises the enocder context and threads
+*
+* \param[in] Static params pointer
+*
+* \return
+* status
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+IHEVCE_PLUGIN_STATUS_T ihevce_close(void *pv_ihevce_hdl)
+{
+ /* local variables */
+ plugin_ctxt_t *ps_ctxt;
+ app_ctxt_t *ps_app_ctxt;
+ ihevce_hle_ctxt_t *ps_interface_ctxt;
+ WORD32 i4_num_resolutions;
+ WORD32 i4_res_id;
+ WORD32 i4_br_id;
+ WORD32 ai4_num_bitrate_instances[IHEVCE_MAX_NUM_RESOLUTIONS] = { 1 };
+ ihevce_sys_api_t *ps_sys_api;
+
+ /* sanity checks */
+ if(NULL == pv_ihevce_hdl)
+ return (IHEVCE_EFAIL);
+
+ /* derive local variables */
+ ps_ctxt = (plugin_ctxt_t *)pv_ihevce_hdl;
+
+ ps_sys_api = &ps_ctxt->ps_static_cfg_prms->s_sys_api;
+
+ if((0 == ps_ctxt->ps_static_cfg_prms->i4_res_id) &&
+ (0 == ps_ctxt->ps_static_cfg_prms->i4_br_id))
+ {
+ ps_interface_ctxt = (ihevce_hle_ctxt_t *)ps_ctxt->pv_hle_interface_ctxt;
+ ps_app_ctxt = &ps_ctxt->s_app_ctxt;
+ i4_num_resolutions = ps_ctxt->ps_static_cfg_prms->s_tgt_lyr_prms.i4_num_res_layers;
+
+ if(1 != ps_ctxt->i4_flush_mode_on)
+ {
+ for(i4_res_id = 0; i4_res_id < i4_num_resolutions; i4_res_id++)
+ {
+ ai4_num_bitrate_instances[i4_res_id] =
+ ps_ctxt->ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_res_id]
+ .i4_num_bitrate_instances;
+ for(i4_br_id = 0; i4_br_id < ai4_num_bitrate_instances[i4_res_id]; i4_br_id++)
+ {
+ /* ------- Que in free output buffer if end flag is not set ------ */
+ ihevce_queue_out_buffer(ps_ctxt, i4_res_id, i4_br_id);
+ }
+ }
+ /* --------------------------------------------------------------------- */
+ /* Input Processing */
+ /* --------------------------------------------------------------------- */
+ {
+ WORD32 buf_id;
+
+ iv_input_data_ctrl_buffs_t *ps_curr_inp;
+ WORD32 *pi4_ctrl_ptr;
+
+ /* ---------- get a free buffer from input Q ------ */
+ ps_curr_inp = (iv_input_data_ctrl_buffs_t *)ihevce_q_get_free_inp_data_buff(
+ ps_interface_ctxt, &buf_id, BUFF_QUE_BLOCKING_MODE);
+
+ if(NULL != ps_curr_inp)
+ {
+ /* flush mode command */
+
+ ps_curr_inp->i4_buf_id = buf_id;
+
+ /* set the input status to invalid flag */
+ ps_curr_inp->i4_inp_frm_data_valid_flag = 0;
+
+ pi4_ctrl_ptr = (WORD32 *)ps_curr_inp->pv_synch_ctrl_bufs;
+
+ *pi4_ctrl_ptr = IHEVCE_SYNCH_API_FLUSH_TAG;
+ *(pi4_ctrl_ptr + 1) = 0;
+ *(pi4_ctrl_ptr + 2) = IHEVCE_SYNCH_API_END_TAG;
+
+ ps_curr_inp->i4_cmd_buf_size = 4 * 3; /* 4 bytes */
+
+ /* ---------- set the buffer as produced ---------- */
+ ihevce_q_set_inp_data_buff_prod(ps_interface_ctxt, buf_id);
+ }
+ else
+ {
+ /* Enable flush-mode and internal-flush once limit according to
+ Eval-version is reached */
+ ps_ctxt->i4_flush_mode_on = 1;
+ }
+ }
+ }
+
+ /* --------------------------------------------------------------------- */
+ /* Wait and destroy Processing threads */
+ /* --------------------------------------------------------------------- */
+
+ /* Wait for High level encoder thread to complete */
+ osal_thread_wait(ps_ctxt->pv_hle_thread_hdl);
+
+ /* Destroy Hle thread */
+ osal_thread_destroy(ps_ctxt->pv_hle_thread_hdl);
+
+ /* --------------------------------------------------------------------- */
+ /* Input Output and Command buffers free */
+ /* --------------------------------------------------------------------- */
+
+ /* free output data and control buffer */
+
+ for(i4_res_id = 0; i4_res_id < i4_num_resolutions; i4_res_id++)
+ {
+ ai4_num_bitrate_instances[i4_res_id] =
+ ps_ctxt->ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_res_id]
+ .i4_num_bitrate_instances;
+
+ for(i4_br_id = 0; i4_br_id < ai4_num_bitrate_instances[i4_res_id]; i4_br_id++)
+ {
+ mem_mngr_free(
+ ps_app_ctxt->pv_mem_mngr_handle,
+ &ps_ctxt->as_memtab_out_data_buf[i4_res_id][i4_br_id]);
+
+ /* free mutex of out strm buf b/w appln and encoder */
+ osal_mutex_destroy(ps_app_ctxt->as_out_strm_prms[i4_res_id][i4_br_id]
+ .pv_app_out_strm_buf_mutex_hdl);
+
+ osal_cond_var_destroy(ps_app_ctxt->as_out_strm_prms[i4_res_id][i4_br_id]
+ .pv_app_out_strm_buf_cond_var_hdl);
+ }
+ }
+
+ ps_ctxt->ihevce_mem_free(NULL, ps_ctxt->pu1_out_ctrl_buf);
+ ps_ctxt->ihevce_mem_free(NULL, ps_ctxt->pu1_inp_async_ctrl_buf);
+
+ /* free input data and control buffer */
+ if(ps_interface_ctxt->i4_create_time_input_allocation == 1)
+ {
+ mem_mngr_free(ps_app_ctxt->pv_mem_mngr_handle, &ps_ctxt->s_memtab_inp_data_buf);
+ mem_mngr_free(ps_app_ctxt->pv_mem_mngr_handle, &ps_ctxt->s_memtab_inp_sync_ctrl_buf);
+ }
+
+ /* --------------------------------------------------------------------- */
+ /* Encoder Instance Deletion */
+ /* --------------------------------------------------------------------- */
+ ihevce_hle_interface_delete(ps_interface_ctxt);
+
+ /* free the high level encoder context memory */
+ ps_ctxt->ihevce_mem_free(NULL, ps_ctxt->pv_hle_interface_ctxt);
+
+ if(ps_ctxt->ps_static_cfg_prms->i4_enable_csv_dump)
+ {
+ ps_sys_api->s_file_io_api.ihevce_fclose(
+ (void *)ps_sys_api->pv_cb_handle, ps_ctxt->ps_static_cfg_prms->apF_csv_file[0][0]);
+ }
+
+ /* free static config memory */
+ ps_ctxt->ihevce_mem_free(NULL, ps_ctxt->ps_static_cfg_prms);
+
+ /* free osal handle */
+ memory_free(NULL, ps_ctxt->pv_osal_handle);
+
+ /* free plugin ctxt memory */
+ memory_free(NULL, pv_ihevce_hdl);
+ }
+ else
+ {
+ return (IHEVCE_EFAIL);
+ }
+
+ return (IHEVCE_EOK);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_copy_inp_8bit \endif
+*
+* \brief
+* Input copy function for 8 bit input
+*
+* \param[in] Source and desdtination buffer descriptors
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+IV_API_CALL_STATUS_T ihevce_copy_inp_8bit(
+ iv_input_data_ctrl_buffs_t *ps_curr_inp,
+ ihevce_inp_buf_t *ps_inp,
+ WORD32 chroma_format,
+ WORD32 i4_orig_wd,
+ WORD32 i4_orig_ht)
+{
+ UWORD8 *pu1_src, *pu1_dst;
+ WORD32 src_strd, dst_strd;
+ WORD32 frm_height = i4_orig_ht;
+ WORD32 frm_width = i4_orig_wd;
+ WORD32 buf_height = ps_curr_inp->s_input_buf.i4_y_ht;
+ WORD32 buf_width = ps_curr_inp->s_input_buf.i4_y_wd;
+ WORD32 rows, cols;
+
+ pu1_src = (UWORD8 *)ps_inp->apv_inp_planes[0];
+ src_strd = ps_inp->ai4_inp_strd[0];
+ pu1_dst = (UWORD8 *)ps_curr_inp->s_input_buf.pv_y_buf;
+ dst_strd = ps_curr_inp->s_input_buf.i4_y_strd;
+
+ if((ps_inp->ai4_inp_size[0] < (src_strd * frm_height)) || (ps_inp->ai4_inp_size[0] <= 0) ||
+ (ps_inp->apv_inp_planes[0] == NULL))
+ {
+ return (IV_FAIL);
+ }
+ /* copy the input luma data into internal buffer */
+ for(rows = 0; rows < frm_height; rows++)
+ {
+ memcpy(pu1_dst, pu1_src, frm_width);
+ if(buf_width > frm_width)
+ {
+ memset(pu1_dst + frm_width, 0x0, buf_width - frm_width);
+ }
+ pu1_src += src_strd;
+ pu1_dst += dst_strd;
+ }
+ while(rows < buf_height)
+ {
+ memset(pu1_dst, 0x0, buf_width);
+ pu1_dst += dst_strd;
+ rows++;
+ }
+
+ if(IV_YUV_420P == chroma_format)
+ {
+ UWORD8 *pu1_src_u, *pu1_src_v;
+ WORD32 src_strd_u, src_strd_v;
+
+ pu1_src_u = (UWORD8 *)ps_inp->apv_inp_planes[1];
+ src_strd_u = ps_inp->ai4_inp_strd[1];
+ pu1_src_v = (UWORD8 *)ps_inp->apv_inp_planes[2];
+ src_strd_v = ps_inp->ai4_inp_strd[2];
+ pu1_dst = (UWORD8 *)ps_curr_inp->s_input_buf.pv_u_buf;
+ dst_strd = ps_curr_inp->s_input_buf.i4_uv_strd;
+
+ frm_width = i4_orig_wd >> 1;
+ frm_height = i4_orig_ht >> 1;
+ buf_width = ps_curr_inp->s_input_buf.i4_uv_wd;
+ buf_height = ps_curr_inp->s_input_buf.i4_uv_ht;
+
+ if((ps_inp->ai4_inp_size[1] < (ps_inp->ai4_inp_strd[1] * frm_height)) ||
+ (ps_inp->ai4_inp_size[1] <= 0) || (pu1_src_u == NULL))
+ {
+ return (IV_FAIL);
+ }
+ if((ps_inp->ai4_inp_size[2] < (ps_inp->ai4_inp_strd[2] * frm_height)) ||
+ (ps_inp->ai4_inp_size[2] <= 0) || (pu1_src_v == NULL))
+ {
+ return (IV_FAIL);
+ }
+
+ /* copy the input chroma data in pixel interleaved format */
+ for(rows = 0; rows < frm_height; rows++)
+ {
+ for(cols = 0; cols < frm_width; cols++)
+ {
+ /* U V alternate */
+ pu1_dst[(cols << 1)] = pu1_src_u[cols];
+ pu1_dst[(cols << 1) + 1] = pu1_src_v[cols];
+ }
+ if(buf_width > (cols << 1))
+ {
+ memset(&pu1_dst[(cols << 1)], 0x80, buf_width - (cols << 1));
+ }
+
+ pu1_src_u += src_strd_u;
+ pu1_src_v += src_strd_v;
+ pu1_dst += dst_strd;
+ }
+ while(rows < buf_height)
+ {
+ memset(pu1_dst, 0x80, buf_width);
+
+ pu1_dst += dst_strd;
+ rows++;
+ }
+ }
+ else if(IV_YUV_420SP_UV == chroma_format)
+ {
+ pu1_src = (UWORD8 *)ps_inp->apv_inp_planes[1];
+ src_strd = ps_inp->ai4_inp_strd[1];
+ pu1_dst = (UWORD8 *)ps_curr_inp->s_input_buf.pv_u_buf;
+ dst_strd = ps_curr_inp->s_input_buf.i4_uv_strd;
+
+ frm_width = i4_orig_wd;
+ frm_height = i4_orig_ht >> 1;
+ buf_width = ps_curr_inp->s_input_buf.i4_uv_wd;
+ buf_height = ps_curr_inp->s_input_buf.i4_uv_ht;
+
+ if((ps_inp->ai4_inp_size[1] < (ps_inp->ai4_inp_strd[1] * frm_height)) ||
+ (ps_inp->ai4_inp_size[1] <= 0) || (pu1_src == NULL))
+ {
+ return (IV_FAIL);
+ }
+
+ /* copy the input luma data into internal buffer */
+ for(rows = 0; rows < frm_height; rows++)
+ {
+ memcpy(pu1_dst, pu1_src, frm_width);
+ if(buf_width > frm_width)
+ {
+ memset(pu1_dst + frm_width, 0x80, buf_width - frm_width);
+ }
+ pu1_src += src_strd;
+ pu1_dst += dst_strd;
+ }
+ while(rows < buf_height)
+ {
+ memset(pu1_dst, 0x80, buf_width);
+ pu1_dst += dst_strd;
+ rows++;
+ }
+ }
+ return (IV_SUCCESS);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_encode_header \endif
+*
+* \brief
+* Receive sps, pps and vps of the encoded sequence
+*
+* \param[in] Plugin handle, Output buffer
+*
+* \return
+* Success or Failure
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+IHEVCE_PLUGIN_STATUS_T ihevce_encode_header(void *pv_ihevce_hdl, ihevce_out_buf_t *ps_out)
+{
+ plugin_ctxt_t *ps_ctxt = (plugin_ctxt_t *)pv_ihevce_hdl;
+ ihevce_hle_ctxt_t *ps_interface_ctxt;
+
+ /* sanity checks */
+ if(NULL == pv_ihevce_hdl)
+ return (IHEVCE_EFAIL);
+
+ if(NULL == ps_out)
+ return (IHEVCE_EFAIL);
+
+ if((0 == ps_ctxt->ps_static_cfg_prms->i4_res_id) &&
+ (0 == ps_ctxt->ps_static_cfg_prms->i4_br_id))
+ {
+ WORD32 status;
+
+ /* ------- Que in free output buffer if end flag is not set ------ */
+ ihevce_queue_out_buffer(ps_ctxt, 0, 0);
+
+ /* ------- API call to encoder header ------- */
+ ps_interface_ctxt = (ihevce_hle_ctxt_t *)ps_ctxt->pv_hle_interface_ctxt;
+ status = ihevce_entropy_encode_header(ps_interface_ctxt, 0, 0);
+ if(status)
+ return IHEVCE_EFAIL;
+
+ /* ------- receive header ------- */
+ ihevce_receive_out_buffer(ps_ctxt, ps_out);
+ }
+ else
+ {
+ return (IHEVCE_EFAIL);
+ }
+
+ return (IHEVCE_EOK);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_encode \endif
+*
+* \brief
+* Frame level processing function
+*
+* \param[in] Plugin handle, Input buffer, Output buffer
+*
+* \return
+* Success or Failure
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+IHEVCE_PLUGIN_STATUS_T
+ ihevce_encode(void *pv_ihevce_hdl, ihevce_inp_buf_t *ps_inp, ihevce_out_buf_t *ps_out)
+{
+ /* local variables */
+ plugin_ctxt_t *ps_ctxt;
+ app_ctxt_t *ps_app_ctxt;
+ ihevce_hle_ctxt_t *ps_interface_ctxt;
+
+ WORD32 i4_res_id, i4_br_id;
+ WORD32 i4_num_resolutions;
+ WORD32 ai4_num_bitrate_instances[IHEVCE_MAX_NUM_RESOLUTIONS] = { 1 };
+ UWORD32 u4_latency = 0;
+
+ /* sanity checks */
+ if(NULL == pv_ihevce_hdl)
+ return (IHEVCE_EFAIL);
+
+ if(NULL == ps_out)
+ return (IHEVCE_EFAIL);
+
+ /* derive local variables */
+ ps_ctxt = (plugin_ctxt_t *)pv_ihevce_hdl;
+ if((0 == ps_ctxt->ps_static_cfg_prms->i4_res_id) &&
+ (0 == ps_ctxt->ps_static_cfg_prms->i4_br_id))
+ {
+ ps_interface_ctxt = (ihevce_hle_ctxt_t *)ps_ctxt->pv_hle_interface_ctxt;
+ ps_app_ctxt = &ps_ctxt->s_app_ctxt;
+ i4_num_resolutions = ps_ctxt->ps_static_cfg_prms->s_tgt_lyr_prms.i4_num_res_layers;
+
+ if(ps_ctxt->ps_static_cfg_prms->s_coding_tools_prms.i4_max_temporal_layers)
+ {
+ u4_latency +=
+ (1 << ps_ctxt->ps_static_cfg_prms->s_coding_tools_prms.i4_max_temporal_layers);
+ }
+
+ u4_latency += ps_ctxt->ps_static_cfg_prms->s_lap_prms.i4_rc_look_ahead_pics;
+
+ /* Once the internal-flush-flag has been set and codec has issued
+ end flag, exit encoding by returning IHEVCE_EFAIL */
+ if(ps_ctxt->i4_internal_flush)
+ {
+ if(1 == ps_app_ctxt->ai4_out_strm_end_flag[0][0])
+ return (IHEVCE_EFAIL);
+ }
+
+ for(i4_res_id = 0; i4_res_id < i4_num_resolutions; i4_res_id++)
+ {
+ ai4_num_bitrate_instances[i4_res_id] =
+ ps_ctxt->ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_res_id]
+ .i4_num_bitrate_instances;
+ for(i4_br_id = 0; i4_br_id < ai4_num_bitrate_instances[i4_res_id]; i4_br_id++)
+ {
+ /* ------- Que in free output buffer if end flag is not set ------ */
+ ihevce_queue_out_buffer(ps_ctxt, i4_res_id, i4_br_id);
+ }
+ }
+
+ /* --------------------------------------------------------------------- */
+ /* Input Processing */
+ /* --------------------------------------------------------------------- */
+ if(0 == ps_ctxt->i4_flush_mode_on)
+ {
+ WORD32 frm_stride;
+ WORD32 frm_width;
+ WORD32 frm_height;
+ WORD32 buf_id;
+
+ iv_input_data_ctrl_buffs_t *ps_curr_inp;
+ WORD32 *pi4_ctrl_ptr;
+
+ frm_width = ps_ctxt->ps_static_cfg_prms->s_src_prms.i4_width;
+ frm_height = ps_ctxt->ps_static_cfg_prms->s_src_prms.i4_height;
+ frm_stride = ps_ctxt->i4_frm_stride;
+
+ /* ---------- get a free buffer from input Q ------ */
+ ps_curr_inp = (iv_input_data_ctrl_buffs_t *)ihevce_q_get_free_inp_data_buff(
+ ps_interface_ctxt, &buf_id, BUFF_QUE_BLOCKING_MODE);
+
+ if(NULL != ps_curr_inp)
+ {
+ /* if input buffer is not NULL */
+ if(NULL != ps_inp)
+ {
+ WORD32 result;
+
+ pi4_ctrl_ptr = (WORD32 *)ps_curr_inp->pv_synch_ctrl_bufs;
+
+ /* ---------- set ip params ---------- */
+ ps_curr_inp->s_input_buf.i4_size = sizeof(iv_yuv_buf_t);
+ ps_curr_inp->s_input_buf.i4_y_wd = frm_width;
+ ps_curr_inp->s_input_buf.i4_y_ht = frm_height;
+ ps_curr_inp->s_input_buf.i4_y_strd = frm_stride;
+ ps_curr_inp->s_input_buf.i4_uv_wd = frm_width;
+ ps_curr_inp->s_input_buf.i4_uv_ht =
+ frm_height >>
+ ((ps_ctxt->ps_static_cfg_prms->s_src_prms.inp_chr_format == 13) ? 0 : 1);
+ ps_curr_inp->s_input_buf.i4_uv_strd = frm_stride;
+
+ ps_curr_inp->i4_buf_id = buf_id;
+ ps_curr_inp->i4_inp_frm_data_valid_flag = 1;
+ ps_curr_inp->i4_topfield_first = 1; /* set to default */
+ ps_curr_inp->i4_bottom_field = ps_ctxt->i4_field_id;
+ ps_curr_inp->i4_inp_timestamp_low = (WORD32)(ps_inp->u8_pts & 0xFFFFFFFF);
+ ps_curr_inp->i4_inp_timestamp_high = (WORD32)(ps_inp->u8_pts >> 32);
+
+ /* toggle field id */
+ ps_ctxt->i4_field_id = !ps_ctxt->i4_field_id;
+
+ /* set the cmd to NA */
+ *pi4_ctrl_ptr = IHEVCE_SYNCH_API_END_TAG;
+
+ ps_curr_inp->i4_cmd_buf_size = 4; /* 4 bytes */
+
+ /* call the input copy function */
+ result = ihevce_copy_inp_8bit(
+ ps_curr_inp,
+ ps_inp,
+ ps_ctxt->ps_static_cfg_prms->s_src_prms.inp_chr_format,
+ ps_ctxt->ps_static_cfg_prms->s_src_prms.i4_orig_width,
+ ps_ctxt->ps_static_cfg_prms->s_src_prms.i4_orig_height);
+
+ if(IV_SUCCESS != result)
+ return (IHEVCE_EFAIL);
+
+ if(3 != ps_ctxt->ps_static_cfg_prms->s_config_prms.i4_rate_control_mode)
+ {
+ /* Dynamic Change in bitrate not supported for multi res/MBR */
+ /*** Check for change in bitrate command ***/
+ if(ps_ctxt->ai4_old_bitrate[0][0] != ps_inp->i4_curr_bitrate)
+ {
+ WORD32 buf_id;
+ WORD32 *pi4_cmd_buf;
+ iv_input_ctrl_buffs_t *ps_ctrl_buf;
+ ihevce_dyn_config_prms_t *ps_dyn_br;
+ WORD32 codec_level_index = ihevce_get_level_index(
+ ps_ctxt->ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[0]
+ .i4_codec_level);
+ WORD32 max_bitrate =
+ g_as_level_data[codec_level_index].i4_max_bit_rate
+ [ps_ctxt->ps_static_cfg_prms->s_out_strm_prms.i4_codec_tier] *
+ 1000;
+
+ /* ---------- get a free buffer from command Q ------ */
+ ps_ctrl_buf = (iv_input_ctrl_buffs_t *)ihevce_q_get_free_inp_ctrl_buff(
+ ps_interface_ctxt, &buf_id, BUFF_QUE_BLOCKING_MODE);
+ /* store the buffer id */
+ ps_ctrl_buf->i4_buf_id = buf_id;
+
+ /* get the buffer pointer */
+ pi4_cmd_buf = (WORD32 *)ps_ctrl_buf->pv_asynch_ctrl_bufs;
+
+ /* store the set default command, encoder should use create time prms */
+ *pi4_cmd_buf = IHEVCE_ASYNCH_API_SETBITRATE_TAG;
+ *(pi4_cmd_buf + 1) = sizeof(ihevce_dyn_config_prms_t);
+
+ ps_dyn_br = (ihevce_dyn_config_prms_t *)(pi4_cmd_buf + 2);
+ ps_dyn_br->i4_size = sizeof(ihevce_dyn_config_prms_t);
+ ps_dyn_br->i4_tgt_br_id = 0;
+ ps_dyn_br->i4_tgt_res_id = 0;
+ ps_dyn_br->i4_new_tgt_bitrate =
+ MIN(ps_inp->i4_curr_bitrate, max_bitrate);
+ ps_dyn_br->i4_new_peak_bitrate =
+ MIN((ps_dyn_br->i4_new_tgt_bitrate << 1), max_bitrate);
+
+ pi4_cmd_buf += 2;
+ pi4_cmd_buf += (sizeof(ihevce_dyn_config_prms_t) >> 2);
+
+ *(pi4_cmd_buf) = IHEVCE_ASYNCH_API_END_TAG;
+
+ /* ---------- set the buffer as produced ---------- */
+ ihevce_q_set_inp_ctrl_buff_prod(ps_interface_ctxt, buf_id);
+
+ ps_ctxt->ai4_old_bitrate[0][0] = ps_inp->i4_curr_bitrate;
+ }
+ }
+
+ ps_ctxt->u8_num_frames_queued++;
+ }
+ else
+ { /* flush mode command */
+
+ ps_curr_inp->i4_buf_id = buf_id;
+
+ /* set the input status to invalid flag */
+ ps_curr_inp->i4_inp_frm_data_valid_flag = 0;
+
+ pi4_ctrl_ptr = (WORD32 *)ps_curr_inp->pv_synch_ctrl_bufs;
+
+ *pi4_ctrl_ptr = IHEVCE_SYNCH_API_FLUSH_TAG;
+ *(pi4_ctrl_ptr + 1) = 0;
+ *(pi4_ctrl_ptr + 2) = IHEVCE_SYNCH_API_END_TAG;
+
+ ps_curr_inp->i4_cmd_buf_size = 4 * 3; /* 4 bytes */
+ }
+
+ /* ---------- set the buffer as produced ---------- */
+ ihevce_q_set_inp_data_buff_prod(ps_interface_ctxt, buf_id);
+ ps_ctxt->u8_num_frames_encoded++;
+ }
+ else
+ {
+ /* Enable flush-mode and internal-flush once limit according to
+ Eval-version is reached */
+ ps_ctxt->i4_flush_mode_on = 1;
+ ps_ctxt->i4_internal_flush = 1;
+ }
+ }
+
+ /* set encoder in flush mode if input buffer is NULL */
+ if(0 == ps_ctxt->i4_flush_mode_on)
+ {
+ if(NULL == ps_inp)
+ {
+ ps_ctxt->i4_flush_mode_on = 1;
+ }
+ }
+
+ if((u4_latency < ps_ctxt->u8_num_frames_queued) || (1 == ps_ctxt->i4_flush_mode_on))
+ {
+ /* --------------------------------------------------------------------- */
+ /* Output Processing */
+ /* --------------------------------------------------------------------- */
+ ihevce_receive_out_buffer(ps_ctxt, ps_out);
+ }
+ }
+ else //Other bitrate and resolution instances
+ {
+ return IHEVCE_EFAIL;
+ }
+ return (IHEVCE_EOK);
+}
+
diff --git a/encoder/ihevce_plugin.h b/encoder/ihevce_plugin.h
new file mode 100644
index 0000000..ddce830
--- /dev/null
+++ b/encoder/ihevce_plugin.h
@@ -0,0 +1,144 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_plugin.h
+*
+* \brief
+* This file contains plugin interface definations and structures
+*
+* \date
+* 15/04/2014
+*
+* \author
+* Ittiam
+*
+*
+* List of Functions
+*
+*
+******************************************************************************
+*/
+#ifndef _IHEVCE_PLUGIN_H_
+#define _IHEVCE_PLUGIN_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define MAX_INP_PLANES 3
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+typedef enum
+{
+ IHEVCE_EFAIL = 0xFFFFFFFF,
+ IHEVCE_EOK = 0
+} IHEVCE_PLUGIN_STATUS_T;
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+typedef struct
+{
+ /* input buffer pointers */
+ void *apv_inp_planes[MAX_INP_PLANES];
+
+ /* input buffer strides */
+ WORD32 ai4_inp_strd[MAX_INP_PLANES];
+
+ /* input buffer size */
+ WORD32 ai4_inp_size[MAX_INP_PLANES];
+
+ /* PTS of the input */
+ ULWORD64 u8_pts;
+
+ /* Current bitrate*/
+ WORD32 i4_curr_bitrate;
+
+ /* Current peak bitrate*/
+ WORD32 i4_curr_peak_bitrate;
+
+ /* Current rate factor*/
+ WORD32 i4_curr_rate_factor;
+
+} ihevce_inp_buf_t;
+
+typedef struct
+{
+ /* Output buffer pointer (if set to NULL then no output is sent out from encoder) */
+ UWORD8 *pu1_output_buf;
+
+ /* Number of bytes generated in the buffer */
+ WORD32 i4_bytes_generated;
+
+ /* Key frame flag */
+ WORD32 i4_is_key_frame;
+
+ /* PTS of the output */
+ ULWORD64 u8_pts;
+
+ /* DTS of the output */
+ LWORD64 i8_dts;
+
+ /* Flag to check if last output buffer sent from encoder */
+ WORD32 i4_end_flag;
+
+} ihevce_out_buf_t;
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+IHEVCE_PLUGIN_STATUS_T ihevce_set_def_params(ihevce_static_cfg_params_t *ps_params);
+
+IHEVCE_PLUGIN_STATUS_T ihevce_init(ihevce_static_cfg_params_t *ps_params, void **ppv_ihevce_hdl);
+
+IHEVCE_PLUGIN_STATUS_T ihevce_encode_header(void *pv_ihevce_hdl, ihevce_out_buf_t *ps_out);
+
+IHEVCE_PLUGIN_STATUS_T
+ ihevce_encode(void *pv_ihevce_hdl, ihevce_inp_buf_t *ps_inp, ihevce_out_buf_t *ps_out);
+
+IHEVCE_PLUGIN_STATUS_T ihevce_close(void *pv_ihevce_hdl);
+
+void ihevce_init_sys_api(void *pv_cb_handle, ihevce_sys_api_t *ps_sys_api);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _IHEVCE_PLUGIN_H_ */
diff --git a/encoder/ihevce_plugin_priv.h b/encoder/ihevce_plugin_priv.h
new file mode 100644
index 0000000..df76df0
--- /dev/null
+++ b/encoder/ihevce_plugin_priv.h
@@ -0,0 +1,269 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_plugin_priv.h
+*
+* \brief
+* This file contains sample application definations and structures
+*
+* \date
+* 15/04/2014
+*
+* \author
+* Ittiam
+*
+*
+* List of Functions
+*
+*
+******************************************************************************
+*/
+#ifndef _IHEVCE_PLUGIN_PRIV_H_
+#define _IHEVCE_PLUGIN_PRIV_H_
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+#define XTRA_INP_DATA_BUFS 0
+#define MAX_NUM_INP_DATA_BUFS MAX_SUB_GOP_SIZE + NUM_LAP2_LOOK_AHEAD
+#define MAX_NUM_INP_CTRL_SYNC_BUFS MAX_NUM_INP_DATA_BUFS
+#define MAX_NUM_INP_CTRL_ASYNC_BUFS 5
+
+#define XTRA_OUT_DATA_BUFS 0
+#define MAX_NUM_OUT_DATA_BUFS (16 + XTRA_OUT_DATA_BUFS)
+#define MAX_NUM_OUT_CTRL_ASYNC_BUFS 16
+
+#define MAX_NUM_RECON_DATA_BUFS 64
+
+/** Queue from Master to Slave for MBR/MRES cases **/
+#define MBR_M2S_QUEUE 200
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+#define SET_CTB_ALIGN(x, y) ((((x) & ((y)-1)) == 0) ? 0 : (y) - ((x) & ((y)-1)))
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/**
+* @brief Store output buffer parameters
+*/
+typedef struct
+{
+ UWORD8 *pu1_bs_buffer;
+ WORD32 i4_bytes_generated;
+ WORD32 i4_is_key_frame;
+ ULWORD64 u8_pts;
+ LWORD64 i8_dts;
+} bitstream_buf_t;
+
+/**
+* @brief Store buffer queue parameters
+*/
+typedef struct
+{
+ /******* Buffer q indexes *******/
+ WORD32 i4_q_rd_idx;
+ WORD32 i4_q_wr_idx;
+ WORD32 i4_quit;
+ WORD32 i4_q_size;
+
+ /******* Semaphore Handles ******/
+ void *pv_q_deq_sem_handle;
+ void *pv_rel_free_sem_handle;
+} queue_t;
+
+/**
+* @brief Datatype of global pointer used for data sharing
+* across encoder nodes.
+*/
+
+typedef struct
+{
+ queue_t s_queue_params;
+ bitstream_buf_t bs_buf_nodes[MBR_M2S_QUEUE];
+ WORD32 i4_slave_inst_done;
+
+} ihevce_mbr_mres_handle_t;
+
+typedef struct
+{
+ WORD32 i4_idx;
+
+ UWORD8 *pu1_buf;
+
+ WORD32 i4_is_free;
+
+ WORD32 i4_is_prod;
+
+ WORD32 i4_timestamp_low;
+
+ WORD32 i4_timestamp_high;
+
+ WORD32 i4_bytes_gen;
+
+ WORD32 i4_is_key_frame;
+
+ WORD32 i4_buf_size;
+
+ WORD32 i4_end_flag;
+
+} out_buf_ctxt_t;
+
+typedef struct
+{
+ ULWORD64 u8_total_bits;
+
+ UWORD32 u4_num_frms_enc;
+
+ /* mutex controlling the out strm buf b/w appln and encoder */
+ void *pv_app_out_strm_buf_mutex_hdl;
+
+ void *pv_app_out_strm_buf_cond_var_hdl;
+
+} out_strm_prms_t;
+
+typedef struct
+{
+ void *pv_mem_mngr_handle; /*!< memory manager handle */
+
+ WORD32 ai4_out_strm_end_flag[IHEVCE_MAX_NUM_RESOLUTIONS]
+ [IHEVCE_MAX_NUM_BITRATES]; /*!< end of strm processing */
+
+ out_strm_prms_t as_out_strm_prms[IHEVCE_MAX_NUM_RESOLUTIONS]
+ [IHEVCE_MAX_NUM_BITRATES]; /*!< to store out strm related prms */
+
+} app_ctxt_t;
+
+typedef struct
+{
+ /*!< Static paramters same memory pointer will be passed to
+ processing interface layer */
+ ihevce_static_cfg_params_t *ps_static_cfg_prms;
+
+ /*!< Osal Handle */
+ void *pv_osal_handle;
+
+ /*!< Call back API for freeing */
+ void (*ihevce_mem_free)(void *pv_handle, void *pv_mem);
+
+ /*!< Call back API to be called during allocation */
+ void *(*ihevce_mem_alloc)(void *pv_handle, UWORD32 u4_size);
+
+ /** App context memory */
+ app_ctxt_t s_app_ctxt;
+
+ /** semaphore handle for Input data proc thread */
+ void *pv_app_inp_ctrl_sem_hdl;
+
+ /** semaphore handle for Output data proc thread */
+ void *pv_app_out_sts_sem_hdl;
+
+ /** Pointer to HLE interface ctxt */
+ void *pv_hle_interface_ctxt;
+
+ /** Memtab of input buffers */
+ iv_mem_rec_t s_memtab_inp_data_buf;
+
+ /** Memtab of input command buffers */
+ iv_mem_rec_t s_memtab_inp_sync_ctrl_buf;
+
+ /** Array of memtabs of outptu buffers */
+ iv_mem_rec_t as_memtab_out_data_buf[IHEVCE_MAX_NUM_RESOLUTIONS][IHEVCE_MAX_NUM_BITRATES];
+
+ /* pointer to async command input buffer */
+ UWORD8 *pu1_inp_async_ctrl_buf;
+
+ /* pointer to async command output buffer*/
+ UWORD8 *pu1_out_ctrl_buf;
+
+ /* HLE thread handle */
+ void *pv_hle_thread_hdl;
+
+ /* flag to indicate that flush mode is ON */
+ WORD32 i4_flush_mode_on;
+
+ /* field id for interlaced case */
+ WORD32 i4_field_id;
+
+ /* frame stride of input buffers */
+ WORD32 i4_frm_stride;
+
+ /* flag to indicate Output end status */
+ WORD32 ai4_out_end_flag[IHEVCE_MAX_NUM_RESOLUTIONS][IHEVCE_MAX_NUM_BITRATES];
+
+ /* output buffer context */
+ out_buf_ctxt_t aaas_out_bufs[IHEVCE_MAX_NUM_RESOLUTIONS][IHEVCE_MAX_NUM_BITRATES]
+ [MAX_NUM_OUT_DATA_BUFS + 1];
+
+ /* Num Output buffers */
+ WORD32 i4_num_out_bufs;
+
+ /* Free outbuf idx */
+ WORD32 ai4_free_out_buf_idx[IHEVCE_MAX_NUM_RESOLUTIONS][IHEVCE_MAX_NUM_BITRATES];
+
+ /* Out produced idx */
+ WORD32 i4_prod_out_buf_idx;
+
+ /* DTS for output population */
+ LWORD64 i8_dts;
+
+ /* Flag used for flushing in case of EVAL version */
+ WORD32 i4_internal_flush;
+
+ ULWORD64 u8_num_frames_encoded;
+
+ /* Count no of frames queued */
+ ULWORD64 u8_num_frames_queued;
+
+ /** Structure which contains params to be shared across different FFMPEG instances **/
+ ihevce_mbr_mres_handle_t
+ *ps_mbr_mres_handle[IHEVCE_MAX_NUM_RESOLUTIONS][IHEVCE_MAX_NUM_BITRATES];
+
+ /* Dynamic change in bitrate detecting mechnaism related vaiables */
+ WORD32 ai4_old_bitrate[IHEVCE_MAX_NUM_RESOLUTIONS][IHEVCE_MAX_NUM_BITRATES];
+
+} plugin_ctxt_t;
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+#endif /* _IHEVCE_PLUGIN_PRIV_H_ */
diff --git a/encoder/ihevce_profile.c b/encoder/ihevce_profile.c
new file mode 100644
index 0000000..be99871
--- /dev/null
+++ b/encoder/ihevce_profile.c
@@ -0,0 +1,297 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+******************************************************************************
+* \file ihevce_profile.c
+*
+* \brief
+* This file contains Profiling related functions
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+*
+* List of Functions
+*
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "ihevce_profile.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+#include <sys/time.h>
+
+/* print attributes */
+
+/* print everything on console */
+#define PRINTF(x, y, ...) printf(__VA_ARGS__)
+
+#if PROFILE_ENABLE
+
+/*!
+******************************************************************************
+* \if Function name : init_profiler \endif
+*
+* \brief
+* Initialization of profiling context
+*
+*****************************************************************************
+*/
+void init_profiler(profile_database_t *ps_profile_data)
+{
+ memset(ps_profile_data, 0, sizeof(*ps_profile_data));
+
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : profile_sample_time \endif
+*
+* \brief
+* This function calls the system function gettimeofday() to get the current
+* time
+*
+*****************************************************************************
+*/
+ULWORD64 profile_sample_time()
+{
+ struct timeval s_time;
+ ULWORD64 u8_curr_time;
+
+ gettimeofday(&s_time, NULL);
+ u8_curr_time = (((ULWORD64)s_time.tv_sec * 1000 * 1000) + (ULWORD64)(s_time.tv_usec));
+
+ return u8_curr_time;
+}
+
+/*!
+******************************************************************************
+* \if Function name : profile_start \endif
+*
+* \brief
+* This function samples current time
+*
+*****************************************************************************
+*/
+void profile_start(profile_database_t *ps_profile_data)
+{
+ ps_profile_data->u8_time_start = profile_sample_time();
+ assert(0 == ps_profile_data->u1_sample_taken_flag);
+ ps_profile_data->u1_sample_taken_flag = 1;
+
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : profile_sample_time_end \endif
+*
+* \brief
+* This function is called for getting current time after a process call.
+* It also updates this info in profile database
+*
+*****************************************************************************
+*/
+void profile_sample_time_end(profile_database_t *ps_profile_data)
+{
+ ps_profile_data->u8_time_end = profile_sample_time();
+ assert(1 == ps_profile_data->u1_sample_taken_flag);
+ ps_profile_data->u1_sample_taken_flag = 0;
+
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : profile_get_time_taken \endif
+*
+* \brief
+* This function computes the time taken by the process call
+*
+*****************************************************************************
+*/
+void profile_get_time_taken(profile_database_t *ps_profile_data)
+{
+ if(ps_profile_data->u8_time_end < ps_profile_data->u8_time_start)
+ {
+ /* Timer overflow */
+ ps_profile_data->u8_cur_time =
+ ((LWORD64)0xFFFFFFFF - ps_profile_data->u8_time_start) + ps_profile_data->u8_time_end;
+ }
+ else
+ {
+ ps_profile_data->u8_cur_time =
+ ps_profile_data->u8_time_end - ps_profile_data->u8_time_start;
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : profile_get_average \endif
+*
+* \brief
+* This function computes the average time taken by the process calls so far
+*
+*****************************************************************************
+*/
+void profile_get_average(profile_database_t *ps_profile_data)
+{
+ ps_profile_data->u8_total_time += ps_profile_data->u8_cur_time;
+ ps_profile_data->u4_num_profile_calls++;
+
+ ps_profile_data->u8_avg_time =
+ (ps_profile_data->u8_total_time / ps_profile_data->u4_num_profile_calls);
+
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : profile_get_avg_time \endif
+*
+* \brief
+* This function returns the average time taken by the process calls so far
+*
+*****************************************************************************
+*/
+int profile_get_avg_time(profile_database_t *ps_profile_data)
+{
+ return (UWORD32)(ps_profile_data->u8_avg_time);
+}
+
+/*!
+******************************************************************************
+* \if Function name : profile_get_peak \endif
+*
+* \brief
+* This function computes the peak time taken by the process calls so far
+*
+*****************************************************************************
+*/
+void profile_get_peak(profile_database_t *ps_profile_data)
+{
+ if(ps_profile_data->u8_cur_time > ps_profile_data->u8_peak_time)
+ {
+ ps_profile_data->u8_peak_time = ps_profile_data->u8_cur_time;
+ }
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : profile_get_peak \endif
+*
+* \brief
+* This function returns the peak time taken by the process calls so far
+*
+*****************************************************************************
+*/
+int profile_get_peak_time(profile_database_t *ps_profile_data)
+{
+ return (UWORD32)(ps_profile_data->u8_peak_time);
+}
+
+/*!
+******************************************************************************
+* \if Function name : profile_end \endif
+*
+* \brief
+* This function prints the profile data - time taken by the last process
+* call, average time so far and peak time so far
+*
+*****************************************************************************
+*/
+void profile_end(profile_database_t *ps_profile_data, char *msg)
+{
+ printf("**********************************************\n");
+ if(msg)
+ {
+ printf(
+ "IHEVC : %s, Avg Process Time: %d micro-seconds\n",
+ msg,
+ (UWORD32)(ps_profile_data->u8_avg_time));
+ printf(
+ "IHEVC : %s, Peak Process Time : %d micro-seconds\n",
+ msg,
+ (UWORD32)(ps_profile_data->u8_peak_time));
+ }
+ else
+ {
+ printf(
+ "IHEVC : %s, Avg Process Time: %d micro-seconds\n",
+ "<unknown>",
+ (UWORD32)(ps_profile_data->u8_avg_time));
+ printf(
+ "IHEVC : %s, Peak Process Time : %d micro-seconds\n",
+ "<unknown>",
+ (UWORD32)(ps_profile_data->u8_peak_time));
+ }
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : profile_stop \endif
+*
+* \brief
+* This function prints the profile time
+*
+*****************************************************************************
+*/
+void profile_stop(profile_database_t *ps_profile_data, char *msg)
+{
+ /* Get current time - This corresponds to time after the process call */
+ profile_sample_time_end(ps_profile_data);
+ /* Get time taken for the process call */
+ profile_get_time_taken(ps_profile_data);
+ /* Calculate average time taken so far */
+ profile_get_average(ps_profile_data);
+ /* Calculate peak time per process call taken so far */
+ profile_get_peak(ps_profile_data);
+
+ if(msg)
+ {
+ printf("%s, fps: :%10.3f", msg, (DOUBLE)(1000000.0 / ps_profile_data->u8_avg_time));
+ }
+
+ return;
+}
+
+#endif /* #if PROFILE_ENABLE */
diff --git a/encoder/ihevce_profile.h b/encoder/ihevce_profile.h
new file mode 100644
index 0000000..b9282a9
--- /dev/null
+++ b/encoder/ihevce_profile.h
@@ -0,0 +1,119 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file ihevce_profile.h
+*
+* @brief
+* This file contains profiling related definitions
+*
+* @author
+* Ittiam
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_PROFILE_H_
+#define _IHEVCE_PROFILE_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define PROFILE_ENABLE 0
+
+typedef struct
+{
+ /* Note that time below will be in units of micro seconds */
+ /* Time before process call */
+ ULWORD64 u8_time_start;
+
+ /* Time after process call */
+ ULWORD64 u8_time_end;
+
+ /* Time taken by the last process call */
+ ULWORD64 u8_cur_time;
+
+ /* Sum total of the time taken by process calls so far */
+ ULWORD64 u8_total_time;
+
+ /*Avg time taken by a process so far */
+ ULWORD64 u8_avg_time;
+
+ /* Peak time taken by a process so far */
+ ULWORD64 u8_peak_time;
+
+ /* Number of process calls so far.
+ * Required for calc of avg time taken per process call */
+ UWORD32 u4_num_profile_calls;
+
+ /* This flag is present to check that every
+ * profile_start() will have a corresponding
+ * arm_profile_sample_time_end() */
+ UWORD8 u1_sample_taken_flag;
+
+} profile_database_t;
+
+typedef struct
+{
+ WORD32 tv_sec; /* Time in seconds. */
+ WORD32 tv_usec; /* Time in micro seconds. */
+} timeval_t;
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+void profile_sample_time_start();
+void profile_sample_time_end();
+void profile_print_stats();
+int profile_get_avg_time(profile_database_t *ps_profile_data);
+int profile_get_peak_time(profile_database_t *ps_profile_data);
+int profile_convert_to_milli_sec(profile_database_t *ps_profile_data);
+
+ULWORD64 profile_sample_time();
+
+/* Should be called after each process call */
+void profile_stop(profile_database_t *ps_profile_data, char *msg);
+
+/* Should be called before every process call */
+void profile_start(profile_database_t *ps_profile_data);
+
+/* Should be called after codec instance initialization */
+void init_profiler(profile_database_t *ps_profile_data);
+
+/* Should be called at the end of processing */
+void profile_end(profile_database_t *ps_profile_data, char *msg);
+
+#if PROFILE_ENABLE
+
+#define PROFILE_INIT(x) init_profiler(x)
+#define PROFILE_START(x) profile_start(x)
+#define PROFILE_STOP(x, y) profile_stop(x, y)
+#define PROFILE_END(x, y) profile_end(x, y)
+
+#else /* #if PROFILE_ENABLE */
+
+#define PROFILE_INIT(x)
+#define PROFILE_START(x)
+#define PROFILE_STOP(x, y)
+#define PROFILE_END(x, y)
+
+#endif /* #if PROFILE_ENABLE */
+
+#endif /* _IHEVCE_PROFILE_H_ */
diff --git a/encoder/ihevce_rc_enc_structs.h b/encoder/ihevce_rc_enc_structs.h
new file mode 100644
index 0000000..e4b9e6f
--- /dev/null
+++ b/encoder/ihevce_rc_enc_structs.h
@@ -0,0 +1,89 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_lap_enc_structs.h
+*
+* \brief
+* This file contains structure definations shared between Encoder and RC
+*
+* \date
+* 15/01/2013
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_RC_ENC_STRUCTS_H_
+#define _IHEVCE_RC_ENC_STRUCTS_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/*SAD/Qscale is calculated over all CUs and summed into a 64 bit variable*/
+/*Assuming 8*8 CU, sad can be 14 bit value */
+/*For 4k*2k, number of 8*8 CUs is 131072 which is a 18 bit value */
+/*Finally Qscale is mutlipled to this variable and it is 8 bit value*/
+/*hence qformat can max be 64 - 14 - 18 - 8 - 1(sign) - 1(safty value) = 22 */
+#define SAD_BY_QSCALE_Q 22
+typedef struct
+{
+ UWORD32 u4_total_header_bits;
+ UWORD32 u4_total_texture_bits;
+ UWORD32 u4_total_sad;
+ UWORD32 u4_total_intra_sad;
+ UWORD32 u4_open_loop_intra_sad;
+ WORD32 i4_qp_normalized_8x8_cu_sum[2];
+ WORD32 i4_8x8_cu_sum[2];
+ LWORD64 i8_sad_by_qscale[2];
+ LWORD64 i8_total_ssd_frame;
+ WORD32 i4_curr_qp_acc;
+} rc_bits_sad_t;
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+void ihevce_enc_loop_get_frame_rc_prms(
+ void *pv_enc_loop_ctxt, rc_bits_sad_t *ps_rc_prms, WORD32 i4_br_id, WORD32 i4_enc_frm_id);
+
+#endif /* _IHEVCE_RC_ENC_STRUCTS_H_ */
diff --git a/encoder/ihevce_rc_interface.c b/encoder/ihevce_rc_interface.c
new file mode 100644
index 0000000..877e7fd
--- /dev/null
+++ b/encoder/ihevce_rc_interface.c
@@ -0,0 +1,5973 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file ihevce_rc_interface.c
+*
+* @brief
+* This file contains function definitions for rc api interface
+*
+* @author
+* Ittiam
+*
+* List of Functions
+* <TODO: Update this>
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+#include "mem_req_and_acq.h"
+#include "rate_control_api.h"
+#include "var_q_operator.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_debug.h"
+#include "ihevc_macros.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_hle_interface.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_lap_interface.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "hme_datatype.h"
+#include "hme_interface.h"
+#include "hme_common_defs.h"
+#include "hme_defs.h"
+#include "ihevce_rc_enc_structs.h"
+#include "ihevce_rc_structs.h"
+#include "ihevce_rc_interface.h"
+#include "ihevce_frame_process_utils.h"
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define USE_USER_FIRST_FRAME_QP 0
+#define DEBUG_PRINT 0
+#define DETERMINISTIC_RC 1
+#define USE_QP_OFFSET_POST_SCD 1
+#define USE_SQRT 0
+#define K_SCALING_FACTOR 8
+#define ENABLE_2_PASS_BIT_ALLOC_FRM_1ST 0
+
+#define VBV_THRSH_I_PIC_DELTA_QP_1 (0.85)
+#define VBV_THRSH_I_PIC_DELTA_QP_2 (0.75)
+#define VBV_THRSH_P_PIC_DELTA_QP_1 (0.80)
+#define VBV_THRSH_P_PIC_DELTA_QP_2 (0.70)
+#define VBV_THRSH_BR_PIC_DELTA_QP_1 (0.75)
+#define VBV_THRSH_BR_PIC_DELTA_QP_2 (0.65)
+#define VBV_THRSH_BNR_PIC_DELTA_QP_1 (0.75)
+#define VBV_THRSH_BNR_PIC_DELTA_QP_2 (0.65)
+#define VBV_THRSH_DELTA_QP (0.6)
+
+#define VBV_THRSH_FRM_PRLL_I_PIC_DELTA_QP_1 (0.70)
+#define VBV_THRSH_FRM_PRLL_I_PIC_DELTA_QP_2 (0.60)
+#define VBV_THRSH_FRM_PRLL_P_PIC_DELTA_QP_1 (0.65)
+#define VBV_THRSH_FRM_PRLL_P_PIC_DELTA_QP_2 (0.55)
+#define VBV_THRSH_FRM_PRLL_BR_PIC_DELTA_QP_1 (0.60)
+#define VBV_THRSH_FRM_PRLL_BR_PIC_DELTA_QP_2 (0.50)
+#define VBV_THRSH_FRM_PRLL_BNR_PIC_DELTA_QP_1 (0.60)
+#define VBV_THRSH_FRM_PRLL_BNR_PIC_DELTA_QP_2 (0.50)
+#define VBV_THRSH_FRM_PRLL_DELTA_QP (0.45)
+
+#define TRACE_SUPPORT 0
+
+/*****************************************************************************/
+/* Globals */
+/*****************************************************************************/
+
+/*
+Modified bpp vs nor satd/act/qp :
+=================================
+
+Prestine Quality
+-----------------
+480p y = -0.1331x3 - 0.0589x2 + 2.5091x - 0.0626
+720p y = -0.3603x3 + 0.4504x2 + 2.2056x - 0.0411
+1080p y = -0.7085x3 + 0.9743x2 + 1.939x - 0.0238
+2160p y = -1.2447x3 + 2.1218x2 + 1.4995x - 0.0108
+
+High Quality
+-------------
+480p y = -0.1348x3 - 0.0557x2 + 2.5055x - 0.0655
+720p y = -0.0811x3 + 0.1988x2 + 1.246x - 0.0385
+1080p y = -0.74x3 + 1.0552x2 + 1.8942x - 0.0251
+2160p y = -1.3851x3 + 2.3372x2 + 1.4255x - 0.0113
+
+Medium Speed
+-------------
+480p y = -0.143x3 - 0.0452x2 + 2.5581x - 0.0765
+720p y = -0.3997x3 + 0.542x2 + 2.201x - 0.0507
+1080p y = -0.816x3 + 1.2048x2 + 1.8689x - 0.0298
+2160p y = -1.5169x3 + 2.5857x2 + 1.3478x - 0.0126
+
+High Speed
+-----------
+480p y = -0.1472x3 - 0.0341x2 + 2.5605x - 0.0755
+720p y = -0.3967x3 + 0.526x2 + 2.2228x - 0.0504
+1080p y = -0.8008x3 + 1.1713x2 + 1.8897x - 0.0297
+2160p y = -1.503x3 + 2.576x2 + 1.3476x - 0.0123
+
+Extreme Speed
+--------------
+480p y = -0.1379x3 - 0.059x2 + 2.5716x - 0.0756
+720p y = -0.3938x3 + 0.521x2 + 2.2239x - 0.0505
+1080p y = -0.8041x3 + 1.1725x2 + 1.8874x - 0.0293
+2160p y = -1.4863x3 + 2.556x2 + 1.344x - 0.0122
+
+*/
+
+const double g_offline_i_model_coeff[20][4] = {
+
+ /*ultra_HD*/
+ { -1.2447, 2.1218, 1.4995, -0.0108 }, /*Prestine quality*/
+ { -1.3851, 2.3372, 1.4255, -0.0113 }, /*High quality*/
+ { -1.5169, 2.5857, 1.3478, -0.0126 }, /*Medium speed*/
+ { -1.503, 2.576, 1.3476, -0.0123 }, /*high speed*/
+ { -1.4863, 2.556, 1.344, -0.0122 }, /*Extreme Speed*/
+
+ /*Full HD*/
+ { -0.7085, 0.9743, 1.939, -0.0238 }, /*Prestine quality*/
+ { -0.74, 1.0552, 1.8942, -0.0251 }, /*High quality*/
+ { -0.816, 1.2048, 1.8689, -0.0298 }, /*Medium speed*/
+ { -0.8008, 1.1713, 1.8897, -0.0297 }, /*high speed*/
+ { -0.8041, 1.1725, 1.8874, -0.0293 }, /*Extreme Speed*/
+
+ /*720p*/
+ { -0.3603, 0.4504, 2.2056, -0.0411 }, /*Prestine quality*/
+ // {-0.0811, 0.1988, 1.246, - 0.0385},/*High quality*/
+ { -0.3997, 0.542, 2.201, -0.0507 },
+ { -0.3997, 0.542, 2.201, -0.0507 }, /*Medium speed*/
+ { -0.3967, 0.526, 2.2228, -0.0504 }, /*high speed*/
+ { -0.3938, 0.521, 2.2239, -0.0505 }, /*Extreme Speed*/
+
+ /*SD*/
+ { -0.1331, -0.0589, 2.5091, -0.0626 }, /*Prestine quality*/
+ { -0.1348, -0.0557, 2.5055, -0.0655 }, /*High quality*/
+ { -0.143, -0.0452, 2.5581, -0.0765 }, /*Medium speed*/
+ { -0.1472, -0.0341, 2.5605, -0.0755 }, /*high speed*/
+ { -0.1379, -0.059, 2.5716, -0.0756 } /*Extreme Speed*/
+
+};
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+
+picture_type_e ihevce_rc_conv_pic_type(
+ IV_PICTURE_CODING_TYPE_T pic_type,
+ WORD32 i4_field_pic,
+ WORD32 i4_temporal_layer_id,
+ WORD32 i4_is_bottom_field,
+ WORD32 i4_top_field_first);
+
+WORD32 ihevce_rc_get_scaled_mpeg2_qp(WORD32 i4_frame_qp, rc_quant_t *ps_rc_quant_ctxt);
+
+static WORD32 ihevce_get_offline_index(rc_context_t *ps_rc_ctxt, WORD32 i4_num_pels_in_frame);
+
+static void ihevce_rc_get_pic_param(
+ picture_type_e rc_pic_type, WORD32 *pi4_tem_lyr, WORD32 *pi4_is_bottom_field);
+
+static double ihevce_get_frame_lambda_modifier(
+ WORD8 slice_type,
+ WORD32 i4_rc_temporal_lyr_id,
+ WORD32 i4_first_field,
+ WORD32 i4_rc_is_ref_pic,
+ WORD32 i4_num_b_frms);
+
+static WORD32 ihevce_clip_min_max_qp(
+ rc_context_t *ps_rc_ctxt,
+ WORD32 i4_hevc_frame_qp,
+ picture_type_e rc_pic_type,
+ WORD32 i4_rc_temporal_lyr_id);
+
+WORD32 ihevce_ebf_based_rc_correction_to_avoid_overflow(
+ rc_context_t *ps_rc_ctxt, rc_lap_out_params_t *ps_rc_lap_out, WORD32 *pi4_tot_bits_estimated);
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*!
+************************************************************************
+* @brief
+* return number of records used by RC
+************************************************************************
+*/
+WORD32 ihevce_rc_get_num_mem_recs(void)
+{
+ WORD32 i4_num_rc_mem_tab = 0;
+
+ /*get the number of memtab request from RC*/
+ rate_control_handle ps_rate_control_api;
+ itt_memtab_t *ps_memtab = NULL;
+ i4_num_rc_mem_tab =
+ rate_control_num_fill_use_free_memtab(&ps_rate_control_api, ps_memtab, GET_NUM_MEMTAB);
+
+ return ((NUM_RC_MEM_RECS + i4_num_rc_mem_tab));
+}
+
+/*!
+************************************************************************
+* @brief
+* return each record attributes of RC
+************************************************************************
+*/
+WORD32 ihevce_rc_get_mem_recs(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ WORD32 mem_space,
+ ihevce_sys_api_t *ps_sys_api)
+{
+ float f_temp;
+ WORD32 i4_temp_size;
+ WORD32 i4_num_memtab = 0;
+ WORD32 i4_num_rc_mem_tab, i;
+ rate_control_handle ps_rate_control_api;
+ itt_memtab_t *ps_itt_memtab = NULL;
+ itt_memtab_t as_rc_mem_tab[30];
+
+ /*memory requirements to store RC context */
+ ps_mem_tab[RC_CTXT].i4_mem_size = sizeof(rc_context_t);
+ //DBG_PRINTF("size of RC context = %d\n",sizeof(rc_context_t));
+ ps_mem_tab[RC_CTXT].e_mem_type = (IV_MEM_TYPE_T)mem_space;
+
+ ps_mem_tab[RC_CTXT].i4_mem_alignment = 64;
+
+ (void)ps_sys_api;
+ //i4_temp_size = (51 + ((ps_init_prms->s_src_prms.i4_bit_depth - 8) * 6));
+ i4_temp_size = (51 + ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth - 8) * 6));
+
+ ps_mem_tab[RC_QP_TO_QSCALE].i4_mem_size = (i4_temp_size + 1) * 4;
+ ps_mem_tab[RC_QP_TO_QSCALE].e_mem_type = (IV_MEM_TYPE_T)mem_space;
+ ps_mem_tab[RC_QP_TO_QSCALE].i4_mem_alignment = 64;
+
+ ps_mem_tab[RC_QP_TO_QSCALE_Q_FACTOR].i4_mem_size = (i4_temp_size + 1) * 4;
+ ps_mem_tab[RC_QP_TO_QSCALE_Q_FACTOR].e_mem_type = (IV_MEM_TYPE_T)mem_space;
+ ps_mem_tab[RC_QP_TO_QSCALE_Q_FACTOR].i4_mem_alignment = 64;
+
+ f_temp = (float)(51 + ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth - 8) * 6));
+ f_temp = ((float)(f_temp - 4) / 6);
+ i4_temp_size = (WORD32)((float)pow(2, f_temp) + 0.5);
+ i4_temp_size = (i4_temp_size << 3); // Q3 format is mantained for accuarate calc at lower qp
+
+ ps_mem_tab[RC_QSCALE_TO_QP].i4_mem_size = (i4_temp_size + 1) * sizeof(UWORD32);
+ ps_mem_tab[RC_QSCALE_TO_QP].e_mem_type = (IV_MEM_TYPE_T)mem_space;
+ ps_mem_tab[RC_QSCALE_TO_QP].i4_mem_alignment = 64;
+
+ /*memory requirements to store RC context */
+ ps_mem_tab[RC_MULTI_PASS_GOP_STAT].i4_mem_size = sizeof(gop_level_stat_t);
+ ps_mem_tab[RC_MULTI_PASS_GOP_STAT].e_mem_type = (IV_MEM_TYPE_T)mem_space;
+ ps_mem_tab[RC_MULTI_PASS_GOP_STAT].i4_mem_alignment = 64;
+
+ i4_num_rc_mem_tab =
+ rate_control_num_fill_use_free_memtab(&ps_rate_control_api, ps_itt_memtab, GET_NUM_MEMTAB);
+
+ i4_num_memtab =
+ rate_control_num_fill_use_free_memtab(&ps_rate_control_api, as_rc_mem_tab, FILL_MEMTAB);
+
+ for(i = 0; i < i4_num_memtab; i++)
+ {
+ ps_mem_tab[i + NUM_RC_MEM_RECS].i4_mem_size = as_rc_mem_tab[i].u4_size;
+ ps_mem_tab[i + NUM_RC_MEM_RECS].i4_mem_alignment = as_rc_mem_tab[i].i4_alignment;
+ ps_mem_tab[i + NUM_RC_MEM_RECS].e_mem_type = (IV_MEM_TYPE_T)mem_space;
+ }
+ return (i4_num_memtab + NUM_RC_MEM_RECS);
+}
+
+/**
+******************************************************************************
+*
+* @brief Initilizes the rate control module
+*
+* @par Description
+*
+* @param[inout] ps_mem_tab
+* pointer to memory descriptors table
+*
+* @param[in] ps_init_prms
+* Create time static parameters
+*
+* @return void
+*
+******************************************************************************
+*/
+void *ihevce_rc_mem_init(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ WORD32 i4_bitrate_instance_id,
+ rc_quant_t *ps_rc_quant,
+ WORD32 i4_resolution_id,
+ WORD32 i4_look_ahead_frames_in_first_pass)
+{
+ rc_context_t *ps_rc_ctxt;
+ WORD32 i4_num_memtab, i, j, i4_avg_bitrate, u4_buf_size;
+ WORD32 i4_cdr_period = 0, i4_idr_period = 0;
+ WORD32 i4_peak_bitrate_factor;
+ rate_control_handle ps_rate_control_api;
+ itt_memtab_t as_rc_mem_tab[30];
+ itt_memtab_t *ps_itt_memtab = NULL;
+ ps_rc_ctxt = (rc_context_t *)ps_mem_tab[RC_CTXT].pv_base;
+ memset(ps_rc_ctxt, 0, sizeof(rc_context_t));
+
+ ps_rc_ctxt->i4_br_id_for_2pass = i4_bitrate_instance_id;
+ if(ps_init_prms->s_coding_tools_prms.i4_max_cra_open_gop_period)
+ {
+ i4_cdr_period = ps_init_prms->s_coding_tools_prms.i4_max_cra_open_gop_period;
+ }
+ if(ps_init_prms->s_coding_tools_prms.i4_max_i_open_gop_period)
+ {
+ i4_cdr_period = ps_init_prms->s_coding_tools_prms.i4_max_i_open_gop_period;
+ }
+ i4_idr_period = ps_init_prms->s_coding_tools_prms.i4_max_closed_gop_period;
+
+ ps_rc_quant->pi4_qscale_to_qp = (WORD32 *)ps_mem_tab[RC_QSCALE_TO_QP].pv_base;
+
+ ps_rc_quant->pi4_qp_to_qscale_q_factor = (WORD32 *)ps_mem_tab[RC_QP_TO_QSCALE_Q_FACTOR].pv_base;
+
+ ps_rc_quant->pi4_qp_to_qscale = (WORD32 *)ps_mem_tab[RC_QP_TO_QSCALE].pv_base;
+
+ ps_rc_ctxt->pv_gop_stat = (void *)ps_mem_tab[RC_MULTI_PASS_GOP_STAT].pv_base;
+
+ /*assign memtabs to rc module*/
+ i4_num_memtab =
+ rate_control_num_fill_use_free_memtab(&ps_rate_control_api, ps_itt_memtab, GET_NUM_MEMTAB);
+
+ i4_num_memtab =
+ rate_control_num_fill_use_free_memtab(&ps_rate_control_api, as_rc_mem_tab, FILL_MEMTAB);
+ for(i = 0; i < i4_num_memtab; i++)
+ {
+ as_rc_mem_tab[i].pv_base = ps_mem_tab[i + NUM_RC_MEM_RECS].pv_base;
+ }
+ i4_num_memtab =
+ rate_control_num_fill_use_free_memtab(&ps_rate_control_api, as_rc_mem_tab, USE_BASE);
+
+ ps_rc_ctxt->rc_hdl =
+ ps_rate_control_api; /*handle to entire RC structure private to RC library*/
+ ps_rc_ctxt->i4_field_pic = ps_init_prms->s_src_prms.i4_field_pic;
+
+ ps_rc_ctxt->i4_is_first_frame_encoded = 0;
+ /*added for field encoding*/
+ ps_rc_ctxt->i4_max_inter_frm_int =
+ 1 << (ps_init_prms->s_coding_tools_prms.i4_max_temporal_layers + ps_rc_ctxt->i4_field_pic);
+ ps_rc_ctxt->i4_max_temporal_lyr = ps_init_prms->s_coding_tools_prms.i4_max_temporal_layers;
+ /*Number of picture types used if different models are used for hierarchial B frames*/
+
+ if(i4_idr_period == 1 || i4_cdr_period == 1)
+ ps_rc_ctxt->i4_num_active_pic_type = 1;
+ else
+ ps_rc_ctxt->i4_num_active_pic_type =
+ 2 + ps_init_prms->s_coding_tools_prms.i4_max_temporal_layers;
+
+ ps_rc_ctxt->i4_quality_preset =
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_quality_preset;
+
+ if(ps_rc_ctxt->i4_quality_preset == IHEVCE_QUALITY_P7)
+ {
+ ps_rc_ctxt->i4_quality_preset = IHEVCE_QUALITY_P6;
+ }
+
+ ps_rc_ctxt->i4_rc_pass = ps_init_prms->s_pass_prms.i4_pass;
+ ps_rc_ctxt->i8_num_gop_mem_alloc = 0;
+
+ ps_rc_ctxt->u1_is_mb_level_rc_on = 0; /*no mb level RC*/
+
+ ps_rc_ctxt->i4_is_infinite_gop = 0;
+ ps_rc_ctxt->u1_bit_depth = (UWORD8)ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth;
+
+ //ps_rc_ctxt->ps_rc_quant_ctxt->i1_qp_offset = ((ps_init_prms->s_src_prms.i4_bit_depth-8)*6);
+ ps_rc_quant->i1_qp_offset = ((ps_init_prms->s_tgt_lyr_prms.i4_internal_bit_depth - 8) * 6);
+
+ ps_rc_quant->i2_max_qp = MIN(ps_init_prms->s_config_prms.i4_max_frame_qp,
+ 51); // FOR Encoder
+ ps_rc_quant->i2_min_qp =
+ MAX(-(ps_rc_quant->i1_qp_offset), ps_init_prms->s_config_prms.i4_min_frame_qp);
+
+ if(ps_init_prms->s_lap_prms.i4_rc_look_ahead_pics)
+ {
+ ps_rc_ctxt->i4_num_frame_in_lap_window =
+ ps_init_prms->s_lap_prms.i4_rc_look_ahead_pics + MIN_L1_L0_STAGGER_NON_SEQ;
+ }
+ else
+ ps_rc_ctxt->i4_num_frame_in_lap_window = 0;
+
+ if(i4_cdr_period > 0 && i4_idr_period > 0)
+ {
+ /*both IDR and CDR are positive*/
+ //WORD32 i4_rem;
+ ps_rc_ctxt->u4_intra_frame_interval = i4_cdr_period;
+ ps_rc_ctxt->u4_idr_period = i4_idr_period;
+
+ /*Allow configuration where IDR period is multiple of CDR period. Though any configuiration is supported by LAP rate control
+ does not handle assymeteric GOPS, Bit-allocation is exposed to CDR or IDR. It treats everything as I pic*/
+ }
+ else if(!i4_idr_period && i4_cdr_period > 0)
+ {
+ ps_rc_ctxt->u4_intra_frame_interval = i4_cdr_period;
+ ps_rc_ctxt->u4_idr_period = 0;
+ }
+ else if(!i4_cdr_period && i4_idr_period > 0)
+ {
+ ps_rc_ctxt->u4_intra_frame_interval = i4_idr_period;
+ ps_rc_ctxt->u4_idr_period = i4_idr_period;
+ }
+ else
+ {
+ /*ASSERT(0);*/
+
+ ps_rc_ctxt->u4_intra_frame_interval =
+ INFINITE_GOP_CDR_TIME_S *
+ ((ps_init_prms->s_src_prms.i4_frm_rate_num /
+ (ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_frm_rate_scale_factor *
+ ps_init_prms->s_src_prms.i4_frm_rate_denom)));
+ ps_rc_ctxt->u4_idr_period = 0;
+ ps_rc_ctxt->i4_is_infinite_gop = 1;
+ }
+
+ /*If cdr period is 0 then only it is closed gop*/
+ ps_rc_ctxt->i4_is_gop_closed = 0;
+ if(i4_cdr_period == 0)
+ {
+ ps_rc_ctxt->i4_is_gop_closed = 1;
+ }
+ /*This is required because the intra sad returned by non I pic is not correct. Use only I pic sad for next I pic qp calculation*/
+ ps_rc_ctxt->i4_use_est_intra_sad = 0;
+ ps_rc_ctxt->u4_src_ticks = 1000;
+ ps_rc_ctxt->u4_tgt_ticks = 1000;
+ ps_rc_ctxt->i4_auto_generate_init_qp = 1;
+
+ ps_rc_ctxt->i8_prev_i_frm_cost = 0;
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ /* -1 cost indicates the picture type not been encoded*/
+ ps_rc_ctxt->ai8_prev_frm_pre_enc_cost[i] = -1;
+ ps_rc_ctxt->ai8_prev_frame_est_L0_satd[i] = -1;
+ ps_rc_ctxt->ai8_prev_frame_hme_sad[i] = -1;
+ ps_rc_ctxt->ai8_prev_frame_pre_intra_sad[i] = -1;
+ /*L1 state metrics*/
+ ps_rc_ctxt->s_l1_state_metric.ai8_L1_prev_I_intra_raw_satd[i] = -1;
+ ps_rc_ctxt->s_l1_state_metric.ai8_L1_prev_pic_coarse_me_cost[i] = -1;
+ ps_rc_ctxt->s_l1_state_metric.ai8_L1_prev_pic_coarse_me_sad[i] = -1;
+ /* SGI & Enc Loop Parallelism related changes*/
+ ps_rc_ctxt->s_l1_state_metric.au4_prev_scene_num[i] = 0;
+ ps_rc_ctxt->au4_prev_scene_num_pre_enc[i] = 0xFFFFFFFF;
+ ps_rc_ctxt->ai4_qp_for_previous_scene_pre_enc[i] = 0;
+ }
+ ps_rc_ctxt->u4_scene_num_est_L0_intra_sad_available = 0xFFFFFFFF;
+
+ for(i = 0; i < MAX_NON_REF_B_PICS_IN_QUEUE_SGI; i++)
+ {
+ ps_rc_ctxt->as_non_ref_b_qp[i].i4_enc_order_num_rc = 0x7FFFFFFF;
+ ps_rc_ctxt->as_non_ref_b_qp[i].i4_non_ref_B_pic_qp = 0x7FFFFFFF;
+ ps_rc_ctxt->as_non_ref_b_qp[i].u4_scene_num_rc = MAX_SCENE_NUM + 1;
+ }
+ ps_rc_ctxt->i4_non_ref_B_ctr = 0;
+ ps_rc_ctxt->i4_prev_qp_ctr = 0;
+ ps_rc_ctxt->i4_cur_scene_num = 0;
+
+ /*init = 0 set to 1 when atleast one frame of each picture type has completed L1 stage*/
+ ps_rc_ctxt->i4_is_est_L0_intra_sad_available = 0;
+
+ /*Min and max qp from user*/
+ ps_rc_ctxt->i4_min_frame_qp = ps_init_prms->s_config_prms.i4_min_frame_qp;
+ ps_rc_ctxt->i4_max_frame_qp = ps_init_prms->s_config_prms.i4_max_frame_qp;
+ ASSERT(ps_rc_ctxt->i4_min_frame_qp >= ps_rc_quant->i2_min_qp);
+ ASSERT(ps_rc_ctxt->i4_max_frame_qp <= ps_rc_quant->i2_max_qp);
+ /*bitrate init*/
+ /*take average bitrate from comfig file*/
+ i4_avg_bitrate = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id]
+ .ai4_tgt_bitrate[i4_bitrate_instance_id];
+
+ if((ps_init_prms->s_config_prms.i4_rate_control_mode == VBR_STREAMING) &&
+ (ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id]
+ .ai4_peak_bitrate[i4_bitrate_instance_id] < (1050 * (i4_avg_bitrate / 1000))))
+ {
+ ps_init_prms->s_config_prms.i4_rate_control_mode = CBR_NLDRC;
+ }
+
+ ps_rc_ctxt->e_rate_control_type = (rc_type_e)ps_init_prms->s_config_prms.i4_rate_control_mode;
+ ps_rc_ctxt->i4_capped_vbr_flag = 0;
+ if(1 == ps_init_prms->s_config_prms.i4_rate_control_mode)
+ {
+ /* The path taken by capped vbr mode is same as normal VBR mode. Only a flag needs to be enabled
+ which tells the rc module that encoder is running in capped vbr mode */
+ ps_rc_ctxt->e_rate_control_type = VBR_STREAMING;
+ ps_rc_ctxt->i4_capped_vbr_flag = 1;
+ }
+ ASSERT(
+ (ps_rc_ctxt->e_rate_control_type == CBR_NLDRC) ||
+ (ps_rc_ctxt->e_rate_control_type == CONST_QP) ||
+ (ps_rc_ctxt->e_rate_control_type == VBR_STREAMING));
+
+ ps_rc_ctxt->u4_avg_bit_rate = i4_avg_bitrate;
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ if(ps_rc_ctxt->e_rate_control_type == VBR_STREAMING)
+ {
+ ps_rc_ctxt->au4_peak_bit_rate[i] =
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id]
+ .ai4_peak_bitrate[i4_bitrate_instance_id];
+ }
+ else
+ {
+ /*peak bitrate parameter is ignored in CBR*/
+ ps_rc_ctxt->au4_peak_bit_rate[i] = i4_avg_bitrate;
+ }
+ }
+ ps_rc_ctxt->u4_min_bit_rate = i4_avg_bitrate;
+
+ /*buffer size init*/
+ u4_buf_size = (WORD32)(ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id]
+ .ai4_max_vbv_buffer_size[i4_bitrate_instance_id]);
+ ps_rc_ctxt->u4_max_delay = (UWORD32)(
+ (float)u4_buf_size / i4_avg_bitrate * 1000); /*delay in milli-seconds based on buffer size*/
+ ps_rc_ctxt->u4_max_vbv_buff_size = u4_buf_size; /*buffer size should be in bits*/
+ /*This dictates the max deviaiton allowed for file size in VBR mode. */
+ ps_rc_ctxt->f_vbr_max_peak_sustain_dur =
+ ((float)ps_init_prms->s_config_prms.i4_vbr_max_peak_rate_dur) / 1000;
+ ps_rc_ctxt->i8_num_frms_to_encode = (WORD32)ps_init_prms->s_config_prms.i4_num_frms_to_encode;
+ i4_peak_bitrate_factor = (ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id]
+ .ai4_peak_bitrate[i4_bitrate_instance_id] /
+ i4_avg_bitrate) *
+ 1000;
+ {
+ //float f_delay = ((float)ps_init_prms->s_config_prms.i4_max_vbv_buffer_size*1000)/i4_peak_bitrate_factor;
+ float f_delay = ((float)ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id]
+ .ai4_max_vbv_buffer_size[i4_bitrate_instance_id] *
+ 1000) /
+ ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id]
+ .ai4_peak_bitrate[i4_bitrate_instance_id];
+ ps_rc_ctxt->i4_initial_decoder_delay_frames = (WORD32)(
+ ((f_delay) * (ps_init_prms->s_src_prms.i4_frm_rate_num /
+ (ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id]
+ .i4_frm_rate_scale_factor *
+ ps_init_prms->s_src_prms.i4_frm_rate_denom))) /
+ 1000);
+ }
+ /*Initial buffer fullness*/
+ ps_rc_ctxt->i4_init_vbv_fullness = ps_init_prms->s_config_prms.i4_init_vbv_fullness;
+
+ /*Init Qp updation. This seems to be used for pre enc stage of second frame. Needs to be looked into*/
+ ps_rc_ctxt->i4_init_frame_qp_user = ps_init_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id]
+ .ai4_frame_qp[i4_bitrate_instance_id];
+
+ for(i = 0; i < MAX_SCENE_NUM; i++)
+ {
+ for(j = 0; j < MAX_PIC_TYPE; j++)
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[i][j] = INIT_HEVCE_QP_RC;
+ }
+ memset(&ps_rc_ctxt->ai4_scene_numbers[0], 0, sizeof(ps_rc_ctxt->ai4_scene_numbers));
+ memset(&ps_rc_ctxt->ai4_scene_num_last_pic[0], 0, sizeof(ps_rc_ctxt->ai4_scene_num_last_pic));
+ ps_rc_ctxt->ai4_last_tw0_lyr0_pic_qp[0] = ps_rc_ctxt->i4_min_frame_qp - 1;
+ ps_rc_ctxt->ai4_last_tw0_lyr0_pic_qp[1] = ps_rc_ctxt->i4_min_frame_qp - 1;
+ /* SGI & Enc Loop Parallelism related changes*/
+ for(i = 0; i < MAX_NUM_ENC_LOOP_PARALLEL; i++)
+ {
+ ps_rc_ctxt->ai8_cur_frm_intra_cost[i] = 0;
+ ps_rc_ctxt->ai8_cur_frame_coarse_ME_cost[i] = 0;
+ ps_rc_ctxt->ai4_I_model_only_reset[i] = 0;
+ ps_rc_ctxt->ai4_is_non_I_scd_pic[i] = 0;
+ ps_rc_ctxt->ai4_is_pause_to_resume[i] = 0;
+ ps_rc_ctxt->ai4_is_cmplx_change_reset_model[i] = 0;
+ ps_rc_ctxt->ai4_is_cmplx_change_reset_bits[i] = 0;
+ /*initialize assuming 30 percent intra and 70 percent inter weightage*/
+ ps_rc_ctxt->ai4_lap_complexity_q7[i] = MODERATE_LAP2_COMPLEXITY_Q7;
+
+ ps_rc_ctxt->ai4_lap_f_sim[i] = MODERATE_FSIM_VALUE;
+ }
+
+ /*Init variables required to handle entropy and rdopt consumption mismatch*/
+ ps_rc_ctxt->i4_rdopt_bit_count = 0;
+ ps_rc_ctxt->i4_entropy_bit_count = 0;
+ for(i = 0; i < NUM_BUF_RDOPT_ENT_CORRECT; i++)
+ {
+ ps_rc_ctxt->ai4_rdopt_bit_consumption_estimate[i] =
+ -1; /*negative bit signifies that value is not populated*/
+ ps_rc_ctxt->ai4_rdopt_bit_consumption_buf_id[i] = -1;
+ ps_rc_ctxt->ai4_entropy_bit_consumption[i] = -1;
+ ps_rc_ctxt->ai4_entropy_bit_consumption_buf_id[i] = -1;
+ }
+
+ /** scd model reset related param init*/
+ for(i = 0; i < MAX_NUM_TEMPORAL_LAYERS; i++)
+ {
+ ps_rc_ctxt->au4_scene_num_temp_id[i] = 0;
+ }
+ /* SGI & Enc Loop Parallelism related changes*/
+ for(i = 0; i < MAX_NUM_ENC_LOOP_PARALLEL; i++)
+ {
+ ps_rc_ctxt->ai4_is_frame_scd[i] = 0;
+ }
+
+ /*Stat file pointer passed from applicaition*/
+ ps_rc_ctxt->pf_stat_file = NULL;
+ ps_rc_ctxt->i8_num_frame_read = 0;
+
+ return ps_rc_ctxt;
+}
+
+/*###############################################*/
+/******* END OF RC MEM INIT FUNCTIONS **********/
+/*###############################################*/
+
+/*###############################################*/
+/******* START OF RC INIT FUNCTIONS **************/
+/*###############################################*/
+/**
+******************************************************************************
+*
+* @brief Initialises teh Rate control ctxt
+*
+* @par Description
+*
+* @param[inout] pv_ctxt
+* pointer to memory descriptors table
+*
+* @param[in] ps_run_time_src_param
+* Create time static parameters
+*
+* @return void
+*
+******************************************************************************
+*/
+void ihevce_rc_init(
+ void *pv_ctxt,
+ ihevce_src_params_t *ps_run_time_src_param,
+ ihevce_tgt_params_t *ps_tgt_params,
+ rc_quant_t *ps_rc_quant,
+ ihevce_sys_api_t *ps_sys_api,
+ ihevce_lap_params_t *ps_lap_prms,
+ WORD32 i4_num_frame_parallel)
+{
+ rc_context_t *ps_rc_ctxt = (rc_context_t *)pv_ctxt;
+ WORD32 i, i_temp, j;
+ float f_temp;
+
+ /*run time width and height has to considered*/
+ ps_rc_ctxt->i4_frame_height = ps_tgt_params->i4_height;
+ ps_rc_ctxt->i4_frame_width = ps_tgt_params->i4_width;
+ ps_rc_ctxt->i4_field_pic = ps_run_time_src_param->i4_field_pic;
+ ps_rc_ctxt->i8_num_bit_alloc_period = 0;
+ ps_rc_ctxt->i8_new_bitrate = -1; /*-1 indicates no dynamic change in bitrate request pending*/
+ ps_rc_ctxt->i8_new_peak_bitrate = -1;
+
+ ps_rc_ctxt->i4_is_last_frame_scan = 0;
+
+ memset(ps_rc_ctxt->ai4_offsets, 0, 5 * sizeof(WORD32));
+
+ ps_rc_ctxt->i4_complexity_bin = 5;
+ ps_rc_ctxt->i4_last_p_or_i_frame_gop = 0;
+ ps_rc_ctxt->i4_qp_at_I_frame_for_skip_sad = 1;
+ ps_rc_ctxt->i4_denominator_i_to_avg = 1;
+ ps_rc_ctxt->i4_fp_bit_alloc_in_sp = 0;
+
+ ps_rc_ctxt->ai4_offsets[0] = 0;
+ ps_rc_ctxt->ai4_offsets[1] = 1;
+ ps_rc_ctxt->ai4_offsets[2] = 2;
+ ps_rc_ctxt->ai4_offsets[3] = 3;
+ ps_rc_ctxt->ai4_offsets[4] = 4;
+
+ ps_rc_ctxt->i4_num_frames_subgop = 0;
+ ps_rc_ctxt->i8_total_acc_coarse_me_sad = 0;
+
+ ps_rc_ctxt->i4_L0_frame_qp = 1;
+
+ ps_rc_ctxt->i4_est_text_bits_ctr_get_qp = 0;
+ ps_rc_ctxt->i4_est_text_bits_ctr_update_qp = 0;
+
+ /*CAllback functions need to be copied for use inside RC*/
+ ps_rc_ctxt->ps_sys_rc_api = ps_sys_api;
+
+ f_temp = ((float)(ps_rc_quant->i2_max_qp + ps_rc_quant->i1_qp_offset - 4) / 6);
+
+ ps_rc_quant->i2_max_qscale = (WORD16)((float)pow(2, f_temp) + 0.5) << 3;
+
+ f_temp = ((float)(ps_rc_quant->i2_min_qp + ps_rc_quant->i1_qp_offset - 4) / 6);
+
+ ps_rc_quant->i2_min_qscale = (WORD16)((float)pow(2, f_temp) + 0.5);
+
+ f_temp =
+ ((float)(51 + ps_rc_quant->i1_qp_offset - 4) /
+ 6); // default MPEG2 to HEVC and HEVC to MPEG2 Qp conversion tables
+ i_temp = (WORD16)((float)pow(2, f_temp) + 0.5);
+
+ i_temp = (i_temp << 3); // Q3 format is mantained for accuarate calc at lower qp
+
+ for(i = 0; i <= i_temp; i++)
+ {
+ ps_rc_quant->pi4_qscale_to_qp[i] =
+ ihevce_rc_get_scaled_hevce_qp_q3(i, ps_rc_ctxt->u1_bit_depth);
+ }
+
+ for(i = (0 - ps_rc_quant->i1_qp_offset); i <= 51; i++)
+ {
+ ps_rc_quant->pi4_qp_to_qscale_q_factor[i + ps_rc_quant->i1_qp_offset] =
+ ihevce_rc_get_scaled_mpeg2_qp_q6(
+ i + ps_rc_quant->i1_qp_offset, ps_rc_ctxt->u1_bit_depth);
+ ps_rc_quant->pi4_qp_to_qscale[i + ps_rc_quant->i1_qp_offset] =
+ ((ps_rc_quant->pi4_qp_to_qscale_q_factor[i + ps_rc_quant->i1_qp_offset] +
+ (1 << (QSCALE_Q_FAC_3 - 1))) >>
+ QSCALE_Q_FAC_3);
+ }
+
+ if(ps_rc_quant->i2_min_qscale < 1)
+ {
+ ps_rc_quant->i2_min_qscale = 1;
+ }
+
+ ps_rc_ctxt->ps_rc_quant_ctxt = ps_rc_quant;
+
+ /*Frame rate init*/
+ ps_rc_ctxt->u4_max_frame_rate =
+ ps_run_time_src_param->i4_frm_rate_num / ps_tgt_params->i4_frm_rate_scale_factor;
+ ps_rc_ctxt->i4_top_field_first = ps_run_time_src_param->i4_topfield_first; /**/
+ /*min and max qp initialization*/
+ if(ps_rc_ctxt->i4_field_pic == 0)
+ {
+ WORD32 i4_max_qp = 0;
+
+ if(ps_rc_ctxt->u1_bit_depth == 10)
+ {
+ i4_max_qp = MAX_HEVC_QP_10bit;
+ }
+ else if(ps_rc_ctxt->u1_bit_depth == 12)
+ {
+ i4_max_qp = MAX_HEVC_QP_12bit;
+ }
+ else
+ {
+ i4_max_qp = MAX_HEVC_QP;
+ }
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ if((ps_rc_ctxt->i4_init_frame_qp_user + (2 * i) +
+ ps_rc_ctxt->ps_rc_quant_ctxt->i1_qp_offset) <=
+ i4_max_qp) //BUG_FIX related to init QP allocation
+ {
+ ps_rc_ctxt->ai4_init_qp[i] = (ps_rc_ctxt->ps_rc_quant_ctxt->pi4_qp_to_qscale
+ [(ps_rc_ctxt->i4_init_frame_qp_user + (2 * i)) +
+ ps_rc_ctxt->ps_rc_quant_ctxt->i1_qp_offset] +
+ (1 << (QSCALE_Q_FAC_3 - 1))) >>
+ QSCALE_Q_FAC_3;
+ }
+ else
+ {
+ ps_rc_ctxt->ai4_init_qp[i] =
+ (ps_rc_ctxt->ps_rc_quant_ctxt->pi4_qp_to_qscale[i4_max_qp] +
+ (1 << (QSCALE_Q_FAC_3 - 1))) >>
+ QSCALE_Q_FAC_3; // + ps_rc_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
+ }
+ ps_rc_ctxt->ai4_min_max_qp[i * 2] =
+ ps_rc_ctxt->ps_rc_quant_ctxt->i2_min_qscale; /*min qp for each picture type*/
+ ps_rc_ctxt->ai4_min_max_qp[i * 2 + 1] = ps_rc_ctxt->ps_rc_quant_ctxt->i2_max_qscale >>
+ QSCALE_Q_FAC_3; /*max qp for each picture type*/
+ }
+ }
+ else
+ {
+ WORD32 i4_num_pic_types = MAX_PIC_TYPE;
+ WORD32 i4_max_qp = 0;
+
+ if(ps_rc_ctxt->u1_bit_depth == 10)
+ {
+ i4_max_qp = MAX_HEVC_QP_10bit;
+ }
+ else if(ps_rc_ctxt->u1_bit_depth == 12)
+ {
+ i4_max_qp = MAX_HEVC_QP_12bit;
+ }
+ else
+ {
+ i4_max_qp = MAX_HEVC_QP;
+ }
+
+ i4_num_pic_types >>= 1;
+
+ for(i = 0; i < i4_num_pic_types; i++)
+ {
+ if((ps_rc_ctxt->i4_init_frame_qp_user + (2 * i) +
+ ps_rc_ctxt->ps_rc_quant_ctxt->i1_qp_offset) <= i4_max_qp)
+ {
+ ps_rc_ctxt->ai4_init_qp[i] = (ps_rc_ctxt->ps_rc_quant_ctxt->pi4_qp_to_qscale
+ [(ps_rc_ctxt->i4_init_frame_qp_user + (2 * i)) +
+ ps_rc_ctxt->ps_rc_quant_ctxt->i1_qp_offset] +
+ (1 << (QSCALE_Q_FAC_3 - 1))) >>
+ QSCALE_Q_FAC_3;
+
+ if(i != 0)
+ ps_rc_ctxt->ai4_init_qp[i + FIELD_OFFSET] = ps_rc_ctxt->ai4_init_qp[i];
+ }
+ else
+ {
+ ps_rc_ctxt->ai4_init_qp[i] =
+ (ps_rc_ctxt->ps_rc_quant_ctxt->pi4_qp_to_qscale[i4_max_qp] +
+ (1 << (QSCALE_Q_FAC_3 - 1))) >>
+ QSCALE_Q_FAC_3; // + ps_rc_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
+
+ if(i != 0)
+ ps_rc_ctxt->ai4_init_qp[i + FIELD_OFFSET] = ps_rc_ctxt->ai4_init_qp[i];
+ }
+ ps_rc_ctxt->ai4_min_max_qp[i * 2] =
+ ps_rc_ctxt->ps_rc_quant_ctxt->i2_min_qscale; /*min qp for each picture type*/
+ ps_rc_ctxt->ai4_min_max_qp[i * 2 + 1] = ps_rc_ctxt->ps_rc_quant_ctxt->i2_max_qscale >>
+ QSCALE_Q_FAC_3; /*max qp for each picture type*/
+ if(i != 0)
+ {
+ ps_rc_ctxt->ai4_min_max_qp[(i + FIELD_OFFSET) * 2] =
+ ps_rc_ctxt->ps_rc_quant_ctxt->i2_min_qscale; /*min qp for each picture type*/
+ ps_rc_ctxt->ai4_min_max_qp[(i + FIELD_OFFSET) * 2 + 1] =
+ ps_rc_ctxt->ps_rc_quant_ctxt->i2_max_qscale; /*max qp for each picture type*/
+ }
+ }
+ }
+
+ for(j = 0; i < MAX_NUM_ENC_LOOP_PARALLEL; i++)
+ {
+ /*initialise the coeffs to 1 in case lap is not used */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rc_ctxt->af_sum_weigh[j][i][0] = 1.0;
+ ps_rc_ctxt->af_sum_weigh[j][i][1] = 0.0;
+ ps_rc_ctxt->af_sum_weigh[j][i][2] = 0.0;
+ }
+ }
+
+ ps_rc_ctxt->i4_num_frame_parallel = i4_num_frame_parallel; //ELP_RC
+ i4_num_frame_parallel = (i4_num_frame_parallel > 1) ? i4_num_frame_parallel : 0;
+
+ if(ps_rc_ctxt->i4_num_frame_parallel > 1)
+ {
+ ps_rc_ctxt->i4_pre_enc_rc_delay = MAX_PRE_ENC_RC_DELAY;
+ }
+ else
+ {
+ ps_rc_ctxt->i4_pre_enc_rc_delay = MIN_PRE_ENC_RC_DELAY;
+ }
+ /*Bitrate and resolutioon based scene cut min qp*/
+ {
+ /*The min qp for scene cut frame is chosen based on bitrate*/
+ float i4_bpp = ((float)ps_rc_ctxt->u4_avg_bit_rate / ps_rc_ctxt->u4_max_frame_rate) * 1000 /
+ (ps_rc_ctxt->i4_frame_height * ps_rc_ctxt->i4_frame_width);
+ if(ps_rc_ctxt->u4_intra_frame_interval == 1)
+ {
+ /*Ultra High resolution)*/
+ if((ps_rc_ctxt->i4_frame_height * ps_rc_ctxt->i4_frame_width) > 5000000)
+ {
+ if(i4_bpp > 0.24)
+ {
+ ps_rc_ctxt->i4_min_scd_hevc_qp = SCD_MIN_HEVC_QP_VHBR;
+ }
+ else if(i4_bpp > 0.16)
+ ps_rc_ctxt->i4_min_scd_hevc_qp =
+ SCD_MIN_HEVC_QP_HBR; /*corresponds to bitrate greater than 40mbps for 4k 30p*/
+ else
+ ps_rc_ctxt->i4_min_scd_hevc_qp = SCD_MIN_HEVC_QP;
+ }
+ else
+ {
+ if(i4_bpp > 0.32)
+ {
+ ps_rc_ctxt->i4_min_scd_hevc_qp = SCD_MIN_HEVC_QP_VHBR;
+ }
+ else if(i4_bpp > 0.24)
+ ps_rc_ctxt->i4_min_scd_hevc_qp =
+ SCD_MIN_HEVC_QP_HBR; /*corresponds to bitrate greater than 15mbps for 1080 30p*/
+ else
+ ps_rc_ctxt->i4_min_scd_hevc_qp = SCD_MIN_HEVC_QP;
+ }
+ }
+ else
+ {
+ /*Ultra High resolution)*/
+ if((ps_rc_ctxt->i4_frame_height * ps_rc_ctxt->i4_frame_width) > 5000000)
+ {
+ if(i4_bpp > 0.16)
+ {
+ ps_rc_ctxt->i4_min_scd_hevc_qp = SCD_MIN_HEVC_QP_VHBR;
+ }
+ else if(i4_bpp > 0.08)
+ ps_rc_ctxt->i4_min_scd_hevc_qp =
+ SCD_MIN_HEVC_QP_HBR; /*corresponds to bitrate greater than 20mbps for 4k 30p*/
+ else
+ ps_rc_ctxt->i4_min_scd_hevc_qp = SCD_MIN_HEVC_QP;
+ }
+ else
+ {
+ /*Resolution lesser than full HD (including )*/
+ if(i4_bpp > 0.24)
+ {
+ ps_rc_ctxt->i4_min_scd_hevc_qp = SCD_MIN_HEVC_QP_VHBR;
+ }
+ else if(i4_bpp > 0.16)
+ ps_rc_ctxt->i4_min_scd_hevc_qp =
+ SCD_MIN_HEVC_QP_HBR; /*corresponds to bitrate greater than 10mbps for 1080 30p*/
+ else
+ ps_rc_ctxt->i4_min_scd_hevc_qp = SCD_MIN_HEVC_QP;
+ }
+ }
+ }
+
+ initialise_rate_control(
+ ps_rc_ctxt->rc_hdl,
+ ps_rc_ctxt->e_rate_control_type,
+ ps_rc_ctxt->u1_is_mb_level_rc_on, //0,/*disabling MB level RC*/
+ ps_rc_ctxt->u4_avg_bit_rate,
+ ps_rc_ctxt->au4_peak_bit_rate,
+ ps_rc_ctxt->u4_min_bit_rate,
+ ps_rc_ctxt->u4_max_frame_rate,
+ ps_rc_ctxt->u4_max_delay, /*max delay in milli seconds based on buffer size*/
+ ps_rc_ctxt->u4_intra_frame_interval,
+ ps_rc_ctxt->u4_idr_period,
+ ps_rc_ctxt->ai4_init_qp,
+ ps_rc_ctxt->u4_max_vbv_buff_size,
+ ps_rc_ctxt->i4_max_inter_frm_int,
+ ps_rc_ctxt->i4_is_gop_closed,
+ ps_rc_ctxt->ai4_min_max_qp, /*min and max qp to be used for each of picture type*/
+ ps_rc_ctxt->i4_use_est_intra_sad,
+ ps_rc_ctxt->u4_src_ticks,
+ ps_rc_ctxt->u4_tgt_ticks,
+ ps_rc_ctxt->i4_frame_height, /*pels in frame considering 420 semi planar format*/
+ ps_rc_ctxt->i4_frame_width,
+ ps_rc_ctxt->i4_num_active_pic_type,
+ ps_rc_ctxt->i4_field_pic,
+ ps_rc_ctxt->i4_quality_preset,
+ ps_rc_ctxt->i4_num_frame_in_lap_window,
+ ps_rc_ctxt->i4_initial_decoder_delay_frames,
+ ps_rc_ctxt->f_vbr_max_peak_sustain_dur,
+ ps_rc_ctxt->i8_num_frms_to_encode,
+ ps_rc_ctxt->i4_min_scd_hevc_qp,
+ ps_rc_ctxt->u1_bit_depth,
+ ps_rc_ctxt->pf_stat_file,
+ ps_rc_ctxt->i4_rc_pass,
+ ps_rc_ctxt->pv_gop_stat,
+ ps_rc_ctxt->i8_num_gop_mem_alloc,
+ ps_rc_ctxt->i4_is_infinite_gop,
+ sizeof(ihevce_lap_output_params_t),
+ sizeof(rc_lap_out_params_t),
+ (void *)ps_sys_api,
+ ps_rc_ctxt->i4_fp_bit_alloc_in_sp,
+ i4_num_frame_parallel,
+ ps_rc_ctxt->i4_capped_vbr_flag);
+
+ //ps_rc_ctxt->i4_init_vbv_fullness = 500000;
+ rc_init_set_ebf(ps_rc_ctxt->rc_hdl, ps_rc_ctxt->i4_init_vbv_fullness);
+
+ /*get init qp based on ebf for rate control*/
+ if(ps_rc_ctxt->e_rate_control_type != CONST_QP)
+ {
+ WORD32 I_frame_qp, I_frame_mpeg2_qp;
+ /*assume moderate fsim*/
+ WORD32 i4_fsim_global = MODERATE_FSIM_VALUE;
+ I_frame_mpeg2_qp = rc_get_bpp_based_scene_cut_qp(
+ ps_rc_ctxt->rc_hdl,
+ I_PIC,
+ ((3 * ps_rc_ctxt->i4_frame_height * ps_rc_ctxt->i4_frame_width) >> 1),
+ i4_fsim_global,
+ ps_rc_ctxt->af_sum_weigh[0],
+ 1);
+
+ I_frame_qp = ihevce_rc_get_scaled_hevc_qp_from_qs_q3(
+ I_frame_mpeg2_qp << QSCALE_Q_FAC_3, ps_rc_ctxt->ps_rc_quant_ctxt);
+
+ I_frame_qp = I_frame_qp + ps_rc_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
+
+ if(I_frame_qp > 44)
+ I_frame_qp = 44;
+
+ ps_rc_ctxt->ai4_init_pre_enc_qp[I_PIC] = I_frame_qp;
+ ps_rc_ctxt->ai4_init_pre_enc_qp[P_PIC] = I_frame_qp + 1;
+ ps_rc_ctxt->ai4_init_pre_enc_qp[B_PIC] = I_frame_qp + 2;
+ ps_rc_ctxt->ai4_init_pre_enc_qp[B1_PIC] = I_frame_qp + 3;
+ ps_rc_ctxt->ai4_init_pre_enc_qp[B2_PIC] = I_frame_qp + 4;
+ /*Bottom fields*/
+ ps_rc_ctxt->ai4_init_pre_enc_qp[P1_PIC] = I_frame_qp + 1;
+ ps_rc_ctxt->ai4_init_pre_enc_qp[BB_PIC] = I_frame_qp + 2;
+ ps_rc_ctxt->ai4_init_pre_enc_qp[B11_PIC] = I_frame_qp + 3;
+ ps_rc_ctxt->ai4_init_pre_enc_qp[B22_PIC] = I_frame_qp + 4;
+
+ ps_rc_ctxt->i4_pre_enc_qp_read_index = 0;
+ ps_rc_ctxt->i4_pre_enc_qp_write_index = ps_rc_ctxt->i4_pre_enc_rc_delay - 1;
+ for(i = 0; i < ps_rc_ctxt->i4_pre_enc_rc_delay; i++)
+ {
+ /*initialize it to -1 to indicate it as not produced*/
+ ps_rc_ctxt->as_pre_enc_qp_queue[i].i4_is_qp_valid = -1;
+ }
+ for(i = 0; i < (ps_rc_ctxt->i4_pre_enc_qp_write_index); i++)
+ {
+ WORD32 j;
+ ps_rc_ctxt->as_pre_enc_qp_queue[i].i4_is_qp_valid = 1;
+ for(j = 0; j < MAX_PIC_TYPE; j++)
+ {
+ ps_rc_ctxt->as_pre_enc_qp_queue[i].ai4_quant[j] =
+ ps_rc_ctxt->ai4_init_pre_enc_qp[j];
+ ps_rc_ctxt->as_pre_enc_qp_queue[i].i4_scd_qp =
+ ps_rc_ctxt->ai4_init_pre_enc_qp[I_PIC];
+ }
+ }
+
+ ps_rc_ctxt->i4_use_qp_offset_pre_enc = 1;
+ ps_rc_ctxt->i4_num_frms_from_reset = 0;
+ /* SGI & Enc Loop Parallelism related changes*/
+ ps_rc_ctxt->u4_prev_scene_num = 0;
+ //ps_rc_ctxt->i4_use_init_qp_for_pre_enc = 0;
+ for(j = 0; j < MAX_NON_REF_B_PICS_IN_QUEUE_SGI; j++)
+ {
+ ps_rc_ctxt->au4_prev_scene_num_multi_scene[j] = 0x3FFFFFFF;
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rc_ctxt->ai4_qp_for_previous_scene_multi_scene[j][i] =
+ ps_rc_ctxt->ai4_init_pre_enc_qp[i];
+ }
+ }
+
+ /* SGI & Enc Loop Parallelism related changes*/
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rc_ctxt->ai4_qp_for_previous_scene[i] = ps_rc_ctxt->ai4_init_pre_enc_qp[i];
+ }
+ }
+ else
+ {
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rc_ctxt->ai4_init_pre_enc_qp[i] = ps_rc_ctxt->i4_init_frame_qp_user;
+ ps_rc_ctxt->ai4_qp_for_previous_scene[i] = ps_rc_ctxt->i4_init_frame_qp_user;
+ }
+ }
+}
+
+/**
+******************************************************************************
+*
+* @brief Populate common params from lap_out structure to rc_lap_out structure
+* Also the init of some rc_lap_out params done here
+* @par Description
+*
+* @param[in] ps_lap_out
+* pointer to lap_out structure
+*
+* @param[out] ps_rc_lap_out
+* pointer to rc_lap_out structure
+*
+* @return void
+*
+******************************************************************************
+*/
+
+void ihevce_rc_populate_common_params(
+ ihevce_lap_output_params_t *ps_lap_out, rc_lap_out_params_t *ps_rc_lap_out)
+{
+ /* Update common params */
+
+ ps_rc_lap_out->i4_rc_pic_type = ps_lap_out->i4_pic_type;
+ ps_rc_lap_out->i4_rc_poc = ps_lap_out->i4_poc;
+ ps_rc_lap_out->i4_rc_temporal_lyr_id = ps_lap_out->i4_temporal_lyr_id;
+ ps_rc_lap_out->i4_rc_is_ref_pic = ps_lap_out->i4_is_ref_pic;
+ ps_rc_lap_out->i4_rc_scene_type = ps_lap_out->i4_scene_type;
+ ps_rc_lap_out->u4_rc_scene_num = ps_lap_out->u4_scene_num;
+ ps_rc_lap_out->i4_rc_display_num = ps_lap_out->i4_display_num;
+ ps_rc_lap_out->i4_rc_quality_preset = ps_lap_out->i4_quality_preset;
+ ps_rc_lap_out->i4_rc_first_field = ps_lap_out->i4_first_field;
+
+ /*params populated in LAP-2*/
+ ps_rc_lap_out->i8_frame_acc_coarse_me_cost = -1;
+ memset(ps_rc_lap_out->ai8_frame_acc_coarse_me_sad, -1, sizeof(WORD32) * 52);
+
+ ps_rc_lap_out->i8_pre_intra_satd = -1;
+
+ ps_rc_lap_out->i8_raw_pre_intra_sad = -1;
+
+ ps_rc_lap_out->i8_raw_l1_coarse_me_sad = -1;
+
+ ps_rc_lap_out->i4_is_rc_model_needs_to_be_updated = 1;
+ /* SGI & Enc Loop Parallelism related changes*/
+ ps_rc_lap_out->i4_ignore_for_rc_update = 0;
+
+ /*For 1 pass HQ I frames*/
+
+ ps_rc_lap_out->i4_complexity_bin = 5;
+ {
+ WORD32 ai4_offsets[5] = { 0, 1, 2, 3, 4 };
+ memmove(ps_rc_lap_out->ai4_offsets, ai4_offsets, sizeof(WORD32) * 5);
+ ps_rc_lap_out->i4_offsets_set_flag = -1;
+ }
+
+ ps_rc_lap_out->i4_L1_qp = -1;
+ ps_rc_lap_out->i4_L0_qp = -1;
+}
+
+/*###############################################*/
+/******* END OF RC INIT FUNCTIONS **************/
+/*###############################################*/
+
+/*#########################################################*/
+/******* START OF PRE-ENC QP QUERY FUNCTIONS **************/
+/*#######################################################*/
+
+/**
+******************************************************************************
+*
+* @name ihevce_rc_get_bpp_based_frame_qp
+*
+* @par Description
+*
+* @param[in] ps_rc_ctxt - pointer to rc context
+* ps_rc_lap_out
+* @return frame qp
+*
+******************************************************************************
+*/
+WORD32 ihevce_rc_get_bpp_based_frame_qp(void *pv_rc_ctxt, rc_lap_out_params_t *ps_rc_lap_out)
+{
+ rc_context_t *ps_rc_ctxt = (rc_context_t *)pv_rc_ctxt;
+ WORD32 i4_frame_qs_q3, i4_hevc_frame_qp, i;
+ frame_info_t *ps_frame_info;
+ picture_type_e rc_pic_type = ihevce_rc_conv_pic_type(
+ (IV_PICTURE_CODING_TYPE_T)ps_rc_lap_out->i4_rc_pic_type,
+ ps_rc_ctxt->i4_field_pic,
+ ps_rc_lap_out->i4_rc_temporal_lyr_id,
+ ps_rc_lap_out->i4_is_bottom_field,
+ ps_rc_ctxt->i4_top_field_first);
+ /*initialise the coeffs to 1 in case lap is not used */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rc_ctxt->af_sum_weigh[0][i][0] = 1.0;
+ ps_rc_ctxt->af_sum_weigh[0][i][1] = 0.0;
+ ps_rc_ctxt->af_sum_weigh[0][i][2] = 0.0;
+ }
+ {
+ /*scene cut handling during pre-enc stage*/
+ /*assume lap fsim as 117. not used since ratio is direclt sent*/
+ if(ps_rc_lap_out->i4_rc_scene_type == SCENE_TYPE_SCENE_CUT)
+ {
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rc_ctxt->ai8_prev_frame_est_L0_satd[i] = -1;
+ ps_rc_ctxt->ai8_prev_frame_hme_sad[i] = -1;
+ ps_rc_ctxt->ai8_prev_frame_pre_intra_sad[i] = -1;
+ }
+ ps_rc_ctxt->i4_is_est_L0_intra_sad_available = 0;
+ }
+
+ if(ps_rc_lap_out->i4_rc_scene_type == SCENE_TYPE_SCENE_CUT ||
+ !ps_rc_ctxt->i4_is_est_L0_intra_sad_available)
+ {
+ /*compute bpp based qp if current frame is scene cut or data is not sufficient*/
+ i4_frame_qs_q3 = rc_get_bpp_based_scene_cut_qp(
+ ps_rc_ctxt->rc_hdl,
+ I_PIC,
+ ((3 * ps_rc_lap_out->i4_num_pels_in_frame_considered) >> 1),
+ 117,
+ ps_rc_ctxt->af_sum_weigh[0],
+ 0);
+ i4_frame_qs_q3 = i4_frame_qs_q3 << QSCALE_Q_FAC_3;
+ }
+ else
+ {
+ /*using previous one sub-gop data calculate i to rest ratio and qp assuming it is I frame*/
+ WORD32 i4_num_b, i, ai4_pic_dist[MAX_PIC_TYPE], index, i4_total_bits;
+ LWORD64 i8_average_pre_intra_sad = 0, i8_average_est_l0_satd_by_act = 0;
+ double lambda_modifier[MAX_PIC_TYPE], complexity[MAX_PIC_TYPE], den = 0.0f,
+ i_to_rest_bit_ratio;
+ WORD32 i4_curr_bits_estimated = 0;
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ complexity[i] = 0;
+ lambda_modifier[i] = 0;
+ ai4_pic_dist[i] = 0;
+ }
+
+ index = ihevce_get_offline_index(
+ ps_rc_ctxt, ps_rc_lap_out->i4_num_pels_in_frame_considered);
+ if(ps_rc_ctxt->i4_max_temporal_lyr)
+ {
+ i4_num_b = ((WORD32)pow((float)2, ps_rc_ctxt->i4_max_temporal_lyr)) - 1;
+ }
+ else
+ {
+ i4_num_b = 0;
+ }
+
+ lambda_modifier[I_PIC] =
+ ihevce_get_frame_lambda_modifier((WORD8)I_PIC, 0, 1, 1, i4_num_b);
+ lambda_modifier[P_PIC] =
+ ihevce_get_frame_lambda_modifier((WORD8)P_PIC, 0, 1, 1, i4_num_b) *
+ pow((float)1.125, 1);
+ lambda_modifier[B_PIC] =
+ ihevce_get_frame_lambda_modifier(
+ (WORD8)B_PIC, 1, (ps_rc_ctxt->i4_max_temporal_lyr > 1), 1, i4_num_b) *
+ pow((float)1.125, 2);
+ lambda_modifier[B1_PIC] =
+ ihevce_get_frame_lambda_modifier(
+ (WORD8)B1_PIC, 2, 1, (ps_rc_ctxt->i4_max_temporal_lyr > 2), i4_num_b) *
+ pow((float)1.125, 3);
+ lambda_modifier[B2_PIC] =
+ ihevce_get_frame_lambda_modifier((WORD8)B2_PIC, 3, 1, 0, i4_num_b) *
+ pow((float)1.125, 4);
+
+ /*consider average of one sub-gop for intra sad*/
+
+ if(ps_rc_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6)
+ {
+ for(i = 0; i < 2; i++)
+ {
+ i8_average_pre_intra_sad += ps_rc_ctxt->ai8_prev_frame_pre_intra_sad[i];
+ i8_average_est_l0_satd_by_act += ps_rc_ctxt->ai8_prev_frame_est_L0_satd[i];
+ if(ps_rc_ctxt->i4_field_pic == 1 && i != 0)
+ {
+ i8_average_pre_intra_sad +=
+ ps_rc_ctxt->ai8_prev_frame_pre_intra_sad[i + FIELD_OFFSET];
+ i8_average_est_l0_satd_by_act +=
+ ps_rc_ctxt->ai8_prev_frame_est_L0_satd[i + FIELD_OFFSET];
+ }
+ }
+ if(ps_rc_ctxt->i4_field_pic == 1)
+ {
+ i8_average_pre_intra_sad /= 3;
+ i8_average_est_l0_satd_by_act /= 3;
+ }
+ else
+ {
+ i8_average_pre_intra_sad <<= 1;
+ i8_average_est_l0_satd_by_act <<= 1;
+ }
+ }
+ else
+ {
+ for(i = 0; i < ps_rc_ctxt->i4_num_active_pic_type; i++)
+ {
+ i8_average_pre_intra_sad += ps_rc_ctxt->ai8_prev_frame_pre_intra_sad[i];
+ i8_average_est_l0_satd_by_act += ps_rc_ctxt->ai8_prev_frame_est_L0_satd[i];
+ if(ps_rc_ctxt->i4_field_pic == 1 && i != 0)
+ {
+ i8_average_pre_intra_sad +=
+ ps_rc_ctxt->ai8_prev_frame_pre_intra_sad[i + FIELD_OFFSET];
+ i8_average_est_l0_satd_by_act +=
+ ps_rc_ctxt->ai8_prev_frame_est_L0_satd[i + FIELD_OFFSET];
+ }
+ }
+ if(ps_rc_ctxt->i4_field_pic == 1)
+ {
+ i8_average_pre_intra_sad /= ((i << 1) - 1);
+ i8_average_est_l0_satd_by_act /= ((i << 1) - 1);
+ }
+ else
+ {
+ i8_average_pre_intra_sad /= i;
+ i8_average_est_l0_satd_by_act /= i;
+ }
+ }
+
+ /*no lambda modifier is considered for I pic as other lambda are scaled according to I frame lambda*/
+ complexity[I_PIC] = (double)i8_average_pre_intra_sad;
+
+ for(i = 1; i < ps_rc_ctxt->i4_num_active_pic_type; i++)
+ {
+#if !USE_SQRT
+ complexity[i] = ps_rc_ctxt->ai8_prev_frame_hme_sad[i] / pow(1.125, i);
+
+ if(ps_rc_ctxt->i4_field_pic == 1)
+ {
+ complexity[i + FIELD_OFFSET] =
+ ps_rc_ctxt->ai8_prev_frame_hme_sad[i + FIELD_OFFSET] / pow(1.125, i);
+ }
+#else
+ complexity[i] = ps_rc_ctxt->ai8_prev_frame_hme_sad[i] /
+ (sqrt(lambda_modifier[i] / lambda_modifier[I_PIC]) * pow(1.125, i));
+#endif
+ }
+ /*get picture type distribution in LAP*/
+ rc_get_pic_distribution(ps_rc_ctxt->rc_hdl, &ai4_pic_dist[0]);
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ den += complexity[i] * ai4_pic_dist[i];
+ }
+ /*subtract I frame complexity to get I to rest ratio*/
+ {
+ WORD32 num_inter_pic = 0;
+ for(i = 1; i < MAX_PIC_TYPE; i++)
+ {
+ num_inter_pic += ai4_pic_dist[i];
+ }
+ if(num_inter_pic > 0)
+ den = (den - (complexity[I_PIC] * ai4_pic_dist[I_PIC])) / num_inter_pic;
+ else
+ den = complexity[I_PIC];
+ }
+
+ if(den > 0)
+ i_to_rest_bit_ratio = (float)((complexity[I_PIC]) / den);
+ else
+ i_to_rest_bit_ratio = 15;
+
+ /*get qp for scene cut frame based on offline data*/
+ i4_frame_qs_q3 = rc_get_qp_for_scd_frame(
+ ps_rc_ctxt->rc_hdl,
+ I_PIC,
+ i8_average_est_l0_satd_by_act,
+ ps_rc_lap_out->i4_num_pels_in_frame_considered,
+ -1,
+ MODERATE_FSIM_VALUE,
+ (void *)&g_offline_i_model_coeff[index][0],
+ (float)i_to_rest_bit_ratio,
+ 0,
+ ps_rc_ctxt->af_sum_weigh[0],
+ ps_rc_lap_out->ps_frame_info,
+ ps_rc_ctxt->i4_rc_pass,
+ 0,
+ 0,
+ 0,
+ &i4_total_bits,
+ &i4_curr_bits_estimated,
+ ps_rc_lap_out->i4_use_offline_model_2pass,
+ 0,
+ 0,
+ -1,
+ NULL);
+ }
+
+ i4_hevc_frame_qp =
+ ihevce_rc_get_scaled_hevc_qp_from_qs_q3(i4_frame_qs_q3, ps_rc_ctxt->ps_rc_quant_ctxt);
+
+ i4_hevc_frame_qp = i4_hevc_frame_qp + ps_rc_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
+
+ if(i4_hevc_frame_qp > ps_rc_ctxt->ps_rc_quant_ctxt->i2_max_qp)
+ i4_hevc_frame_qp = ps_rc_ctxt->ps_rc_quant_ctxt->i2_max_qp;
+
+ /*offset depending on current picture type*/
+ if(rc_pic_type != I_PIC)
+ i4_hevc_frame_qp += ps_rc_lap_out->i4_rc_temporal_lyr_id + 1;
+ /*clip min and max qp to be within range*/
+ i4_hevc_frame_qp = ihevce_clip_min_max_qp(
+ ps_rc_ctxt, i4_hevc_frame_qp, rc_pic_type, ps_rc_lap_out->i4_rc_temporal_lyr_id);
+
+ ps_rc_ctxt->ai4_qp_for_previous_scene_pre_enc[rc_pic_type] = i4_hevc_frame_qp;
+ ps_rc_ctxt->au4_prev_scene_num_pre_enc[rc_pic_type] = ps_rc_lap_out->u4_rc_scene_num;
+ }
+
+ return i4_hevc_frame_qp;
+}
+/**
+******************************************************************************
+*
+* @name ihevce_rc_get_pre_enc_pic_quant
+*
+* @par Description - Called from ihevce_rc_cal_pre_enc_qp. updates frame qp
+* which will be used by next frame of same pic type in
+* pre-enc stage
+*
+* @param[in] ps_rc_ctxt - pointer to rc context
+* @return void
+*
+******************************************************************************
+*/
+WORD32
+ ihevce_rc_get_pre_enc_pic_quant(void *pv_ctxt, picture_type_e rc_pic_type, WORD32 *pi4_scd_qp)
+{
+ rc_context_t *ps_rc_ctxt = (rc_context_t *)pv_ctxt;
+ WORD32 i4_frame_qp, i4_frame_qp_q6, i4_hevc_frame_qp = -1;
+ WORD32 i4_max_frame_bits = (1 << 30);
+ WORD32 i4_temporal_layer_id, i4_is_bottom_field, i4_cur_est_texture_bits;
+
+ ihevce_rc_get_pic_param(rc_pic_type, &i4_temporal_layer_id, &i4_is_bottom_field);
+
+ {
+ WORD32 is_scd_ref_frame = 0, i4_num_scd_in_lap_window = 0, num_frames_b4_scd = 0;
+
+ /*treat even first frame as scd frame*/
+ if(!ps_rc_ctxt->i4_is_first_frame_encoded)
+ {
+ is_scd_ref_frame = 1;
+ }
+
+ {
+ /*Only I frames are considered as scd pic during pre-enc*/
+ is_scd_ref_frame &= (rc_pic_type == I_PIC);
+ }
+
+ rc_set_num_scd_in_lap_window(
+ ps_rc_ctxt->rc_hdl, i4_num_scd_in_lap_window, num_frames_b4_scd);
+
+ /** Pre-enc thread as of now SCD handling is not present */
+ //if(!(is_scd_ref_frame || ps_rc_ctxt->i4_is_pause_to_resume) || call_type == PRE_ENC_GET_QP)
+ {
+ WORD32 i4_is_first_frame_coded;
+ /*Once first frame has been encoded use prev frame intra satd and cur frame satd to alter est intra sad for cur frame*/
+ i4_is_first_frame_coded = is_first_frame_coded(ps_rc_ctxt->rc_hdl);
+ {
+ int i;
+ WORD32 i4_curr_bits_estimated, i4_is_model_valid;
+ /*initialise the coeffs to 1 and 0in case lap is not used */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rc_ctxt->af_sum_weigh[0][i][0] = 1.0;
+ ps_rc_ctxt->af_sum_weigh[0][i][1] = 0.0;
+ }
+
+ i4_frame_qp_q6 = get_frame_level_qp(
+ ps_rc_ctxt->rc_hdl,
+ rc_pic_type,
+ i4_max_frame_bits,
+ &i4_cur_est_texture_bits, //this value is returned by rc
+ ps_rc_ctxt->af_sum_weigh[0],
+ 0,
+ 8.0f,
+ NULL,
+ ps_rc_ctxt->i4_complexity_bin,
+ ps_rc_ctxt->i4_scene_num_latest, /*no pause resume concept*/
+ &i4_curr_bits_estimated,
+ &i4_is_model_valid,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL,
+ NULL);
+
+ /** The usage of global table will truncate the input given as qp format and hence will not return very low qp values desirable at very
+ low bitrate. Hence on the fly calculation is enabled*/
+ i4_hevc_frame_qp =
+ ihevce_rc_get_scaled_hevce_qp_q6(i4_frame_qp_q6, ps_rc_ctxt->u1_bit_depth);
+
+ if(rc_pic_type == I_PIC)
+ {
+ /*scene cut handling during pre-enc stage*/
+ i4_frame_qp = rc_get_bpp_based_scene_cut_qp(
+ ps_rc_ctxt->rc_hdl,
+ rc_pic_type,
+ ((3 * ps_rc_ctxt->i4_frame_height * ps_rc_ctxt->i4_frame_width) >> 1),
+ ps_rc_ctxt->ai4_lap_f_sim[0],
+ ps_rc_ctxt->af_sum_weigh[0],
+ 0);
+
+ *pi4_scd_qp = ihevce_rc_get_scaled_hevc_qp_from_qs_q3(
+ i4_frame_qp << QSCALE_Q_FAC_3, ps_rc_ctxt->ps_rc_quant_ctxt);
+ *pi4_scd_qp = *pi4_scd_qp + ps_rc_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
+ if(*pi4_scd_qp > ps_rc_ctxt->ps_rc_quant_ctxt->i2_max_qp)
+ *pi4_scd_qp = ps_rc_ctxt->ps_rc_quant_ctxt->i2_max_qp;
+ }
+ else
+ {
+ /*scene cut qp is only valid when queried for I_PIC*/
+ *pi4_scd_qp = i4_hevc_frame_qp;
+ }
+ }
+ }
+
+ ASSERT(i4_hevc_frame_qp >= (-ps_rc_ctxt->ps_rc_quant_ctxt->i1_qp_offset));
+
+ /*constraint qp swing based on neighbour frames*/
+ if(is_first_frame_coded(ps_rc_ctxt->rc_hdl))
+ {
+ if(ps_rc_ctxt->i4_field_pic == 0)
+ {
+ if((rc_pic_type != I_PIC && rc_pic_type != P_PIC) &&
+ i4_hevc_frame_qp >
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[ps_rc_ctxt->i4_scene_num_latest]
+ [rc_pic_type - 1] +
+ 3)
+ {
+ /*allow max of +3 compared to previous frame*/
+ i4_hevc_frame_qp =
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[ps_rc_ctxt->i4_scene_num_latest]
+ [rc_pic_type - 1] +
+ 3;
+ }
+ if((rc_pic_type != I_PIC && rc_pic_type != P_PIC) &&
+ i4_hevc_frame_qp <
+ (ps_rc_ctxt->ai4_prev_pic_hevc_qp[ps_rc_ctxt->i4_scene_num_latest]
+ [rc_pic_type - 1]))
+ {
+ i4_hevc_frame_qp =
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[ps_rc_ctxt->i4_scene_num_latest]
+ [rc_pic_type - 1];
+ }
+
+ /** Force non-ref B pic qp to be ref_B_PIC_qp - 1. This is not valid for when max teporla later is less than 2*/
+ if(i4_temporal_layer_id == ps_rc_ctxt->i4_max_temporal_lyr &&
+ ps_rc_ctxt->i4_max_temporal_lyr > 1)
+ {
+ i4_hevc_frame_qp =
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[ps_rc_ctxt->i4_scene_num_latest]
+ [rc_pic_type - 1] +
+ 1;
+ }
+ }
+ else /*for field case*/
+ {
+ if(i4_temporal_layer_id >= 1)
+ {
+ /*To make the comparison of qp with the top field's of previous layer tempor layer id matches with the pic type. */
+ if(i4_hevc_frame_qp >
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[ps_rc_ctxt->i4_scene_num_latest]
+ [i4_temporal_layer_id] +
+ 3)
+ {
+ /*allow max of +3 compared to previous frame*/
+ i4_hevc_frame_qp =
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[ps_rc_ctxt->i4_scene_num_latest]
+ [i4_temporal_layer_id] +
+ 3;
+ }
+ if(i4_hevc_frame_qp <
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[ps_rc_ctxt->i4_scene_num_latest]
+ [i4_temporal_layer_id])
+ {
+ i4_hevc_frame_qp =
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[ps_rc_ctxt->i4_scene_num_latest]
+ [i4_temporal_layer_id];
+ }
+ /** Force non-ref B pic qp to be ref_B_PIC_qp - 1. This is not valid for when max teporla later is less than 2*/
+ if(i4_temporal_layer_id == ps_rc_ctxt->i4_max_temporal_lyr &&
+ ps_rc_ctxt->i4_max_temporal_lyr > 1)
+ {
+ i4_hevc_frame_qp =
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[ps_rc_ctxt->i4_scene_num_latest]
+ [i4_temporal_layer_id] +
+ 1;
+ }
+ }
+ }
+ }
+
+#if USE_USER_FIRST_FRAME_QP
+ /*I_PIC check is necessary coz pre-enc can query for qp even before first frame update has happened*/
+ if(!ps_rc_ctxt->i4_is_first_frame_encoded && rc_pic_type == I_PIC)
+ {
+ i4_hevc_frame_qp = ps_rc_ctxt->i4_init_frame_qp_user;
+ DBG_PRINTF("FIXED START QP PATH *************************\n");
+ }
+#endif
+ /**clip to min qp which is user configurable*/
+ i4_hevc_frame_qp =
+ ihevce_clip_min_max_qp(ps_rc_ctxt, i4_hevc_frame_qp, rc_pic_type, i4_temporal_layer_id);
+
+ return i4_hevc_frame_qp;
+ }
+}
+/**
+******************************************************************************
+*
+* @name ihevce_rc_cal_pre_enc_qp
+*
+* @par Description - Called from enc_loop_init. updates frame qp which will
+ be used by next frame of same pic type in pre-enc stage
+*
+* @param[in] ps_rc_ctxt - pointer to rc context
+* @return void
+*
+******************************************************************************
+*/
+void ihevce_rc_cal_pre_enc_qp(void *pv_rc_ctxt)
+{
+ rc_context_t *ps_rc_ctxt = (rc_context_t *)pv_rc_ctxt;
+ WORD32 i, i4_frame_qp, i4_scd_qp;
+ WORD32 i4_delay_l0_enc = 0;
+
+ i4_delay_l0_enc = ps_rc_ctxt->i4_pre_enc_rc_delay;
+
+ if(ps_rc_ctxt->e_rate_control_type != CONST_QP)
+ {
+ //DBG_PRINTF("\ncheck query read = %d write = %d",ps_rc_ctxt->i4_pre_enc_qp_read_index,ps_rc_ctxt->i4_pre_enc_qp_write_index);
+#if DETERMINISTIC_RC
+ ASSERT(
+ ps_rc_ctxt->as_pre_enc_qp_queue[ps_rc_ctxt->i4_pre_enc_qp_write_index].i4_is_qp_valid ==
+ -1);
+#endif
+ for(i = 0; i < ps_rc_ctxt->i4_num_active_pic_type; i++)
+ {
+ i4_frame_qp =
+ ihevce_rc_get_pre_enc_pic_quant(ps_rc_ctxt, (picture_type_e)i, &i4_scd_qp);
+
+ ps_rc_ctxt->as_pre_enc_qp_queue[ps_rc_ctxt->i4_pre_enc_qp_write_index].ai4_quant[i] =
+ i4_frame_qp;
+ /*returns valid scene cut qp only when queried as I_PIC*/
+ if(i == 0)
+ {
+ ps_rc_ctxt->as_pre_enc_qp_queue[ps_rc_ctxt->i4_pre_enc_qp_write_index].i4_scd_qp =
+ i4_scd_qp;
+ }
+
+ if(ps_rc_ctxt->i4_field_pic && i > 0)
+ {
+ i4_frame_qp = ihevce_rc_get_pre_enc_pic_quant(
+ ps_rc_ctxt, (picture_type_e)(i + FIELD_OFFSET), &i4_scd_qp);
+
+ ps_rc_ctxt->as_pre_enc_qp_queue[ps_rc_ctxt->i4_pre_enc_qp_write_index]
+ .ai4_quant[i + FIELD_OFFSET] = i4_frame_qp;
+ }
+ }
+ /*mark index as populated*/
+ ps_rc_ctxt->as_pre_enc_qp_queue[ps_rc_ctxt->i4_pre_enc_qp_write_index].i4_is_qp_valid = 1;
+
+ ps_rc_ctxt->i4_pre_enc_qp_write_index =
+ (ps_rc_ctxt->i4_pre_enc_qp_write_index + 1) % i4_delay_l0_enc;
+ }
+}
+/**
+******************************************************************************
+*
+* @brief function to get updated qp after L1 analysis for L0. '
+* This uses estimated L0 satd based on L1 satd/act
+*
+* @par Description
+*
+* @param[in] pv_rc_ctxt
+* void pointer to rc ctxt
+* @param[in] rc_lap_out_params_t *
+pointer to lap out structure
+* @param[in] i8_est_L0_satd_act
+* estimated L0 satd/act based on L1 satd/act
+* @return void
+*
+******************************************************************************
+*/
+WORD32 ihevce_get_L0_est_satd_based_scd_qp(
+ void *pv_rc_ctxt,
+ rc_lap_out_params_t *ps_rc_lap_out,
+ LWORD64 i8_est_L0_satd_act,
+ float i_to_avg_rest_ratio)
+{
+ rc_context_t *ps_ctxt = (rc_context_t *)pv_rc_ctxt;
+ WORD32 i4_frame_qs_q3, i4_hevc_qp, i4_est_header_bits, index, i, i4_total_bits;
+ picture_type_e rc_pic_type;
+
+ rc_pic_type = ihevce_rc_conv_pic_type(
+ (IV_PICTURE_CODING_TYPE_T)ps_rc_lap_out->i4_rc_pic_type,
+ ps_ctxt->i4_field_pic,
+ ps_rc_lap_out->i4_rc_temporal_lyr_id,
+ ps_rc_lap_out->i4_is_bottom_field,
+ ps_ctxt->i4_top_field_first);
+
+ /*initialise the coeffs to 1 in case lap is not used */
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_ctxt->af_sum_weigh[0][i][0] = 1.0;
+ ps_ctxt->af_sum_weigh[0][i][1] = 0.0;
+ }
+
+ /*get bits to find estimate of header bits*/
+ i4_est_header_bits = rc_get_scene_change_est_header_bits(
+ ps_ctxt->rc_hdl,
+ ps_rc_lap_out->i4_num_pels_in_frame_considered,
+ ps_ctxt->ai4_lap_f_sim[0],
+ ps_ctxt->af_sum_weigh[0],
+ i_to_avg_rest_ratio);
+
+ index = ihevce_get_offline_index(ps_ctxt, ps_rc_lap_out->i4_num_pels_in_frame_considered);
+ {
+ WORD32 i4_true_scd = 0;
+ WORD32 i4_curr_bits_estimated;
+
+ i4_frame_qs_q3 = rc_get_qp_for_scd_frame(
+ ps_ctxt->rc_hdl,
+ I_PIC,
+ i8_est_L0_satd_act,
+ ps_rc_lap_out->i4_num_pels_in_frame_considered,
+ i4_est_header_bits,
+ ps_ctxt->ai4_lap_f_sim[0],
+ (void *)&g_offline_i_model_coeff[index][0],
+ i_to_avg_rest_ratio,
+ i4_true_scd,
+ ps_ctxt->af_sum_weigh[0],
+ ps_rc_lap_out->ps_frame_info,
+ ps_ctxt->i4_rc_pass,
+ 0,
+ 0,
+ 0,
+ &i4_total_bits,
+ &i4_curr_bits_estimated,
+ ps_rc_lap_out->i4_use_offline_model_2pass,
+ 0,
+ 0,
+ -1,
+ NULL);
+ }
+ i4_hevc_qp = ihevce_rc_get_scaled_hevc_qp_from_qs_q3(i4_frame_qs_q3, ps_ctxt->ps_rc_quant_ctxt);
+ i4_hevc_qp = i4_hevc_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
+
+ if(i4_hevc_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp)
+ i4_hevc_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
+
+ if(i4_hevc_qp < (SCD_MIN_HEVC_QP -
+ ps_ctxt->ps_rc_quant_ctxt
+ ->i1_qp_offset)) // since outside RC the QP range is -12 to 51 for 10 bit
+ {
+ i4_hevc_qp = (SCD_MIN_HEVC_QP - ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset);
+ }
+ else if(i4_hevc_qp > SCD_MAX_HEVC_QP)
+ {
+ i4_hevc_qp = SCD_MAX_HEVC_QP;
+ }
+ /*this is done outside loop*/
+
+ return i4_hevc_qp;
+}
+/**
+******************************************************************************
+*
+* @name ihevce_rc_pre_enc_qp_query
+*
+* @par Description - Called from pre enc thrd for getting the qp of non scd
+ frames. updates frame qp from reverse queue from enc loop
+ when its available
+*
+* @param[in] ps_rc_ctxt - pointer to rc context
+* @param[in] i4_update_delay : The Delay in the update. This can happen for dist. case!
+* All decision should consider this delay for updation!
+*
+* @return void
+*
+******************************************************************************
+*/
+
+WORD32 ihevce_rc_pre_enc_qp_query(
+ void *pv_rc_ctxt, rc_lap_out_params_t *ps_rc_lap_out, WORD32 i4_update_delay)
+{
+ WORD32 scene_type, i4_is_scd = 0, i4_frame_qp, slice_type;
+ rc_context_t *ps_rc_ctxt = (rc_context_t *)pv_rc_ctxt;
+ rc_type_e e_rc_type = ps_rc_ctxt->e_rate_control_type;
+ IV_PICTURE_CODING_TYPE_T pic_type = (IV_PICTURE_CODING_TYPE_T)ps_rc_lap_out->i4_rc_pic_type;
+ picture_type_e rc_pic_type = ihevce_rc_conv_pic_type(
+ (IV_PICTURE_CODING_TYPE_T)ps_rc_lap_out->i4_rc_pic_type,
+ ps_rc_ctxt->i4_field_pic,
+ ps_rc_lap_out->i4_rc_temporal_lyr_id,
+ ps_rc_lap_out->i4_is_bottom_field,
+ ps_rc_ctxt->i4_top_field_first);
+ WORD32 i4_use_offset_flag = 0, k = 0;
+ WORD32 i4_inter_frame_interval = rc_get_inter_frame_interval(ps_rc_ctxt->rc_hdl);
+ WORD32 ai4_offsets[5] = { 0, 1, 2, 3, 4 };
+ rc_lap_out_params_t *ps_rc_lap_out_temp = ps_rc_lap_out;
+
+ /* The window for which your update is guaranteed */
+ WORD32 updated_window = ps_rc_ctxt->i4_num_frame_in_lap_window - i4_update_delay;
+
+ k = 0;
+ if((updated_window >= i4_inter_frame_interval) && (ps_rc_ctxt->i4_rc_pass != 2) &&
+ ((rc_pic_type == I_PIC) || (rc_pic_type == P_PIC)))
+ {
+ WORD32 i4_count = 0;
+
+ for(i4_count = 0; i4_count < updated_window; i4_count++)
+ {
+ picture_type_e rc_pic_type_temp = ihevce_rc_conv_pic_type(
+ (IV_PICTURE_CODING_TYPE_T)ps_rc_lap_out_temp->i4_rc_pic_type,
+ ps_rc_ctxt->i4_field_pic,
+ ps_rc_lap_out_temp->i4_rc_temporal_lyr_id,
+ ps_rc_lap_out_temp->i4_is_bottom_field,
+ ps_rc_ctxt->i4_top_field_first);
+
+ if((rc_pic_type_temp == I_PIC) || (rc_pic_type_temp == P_PIC))
+ ihevce_compute_temporal_complexity_reset_Kp_Kb(ps_rc_lap_out_temp, pv_rc_ctxt, 0);
+
+ ps_rc_lap_out_temp =
+ (rc_lap_out_params_t *)ps_rc_lap_out_temp->ps_rc_lap_out_next_encode;
+
+ if(ps_rc_lap_out_temp == NULL)
+ break;
+ }
+ }
+
+ if(updated_window >= i4_inter_frame_interval)
+ {
+ i4_use_offset_flag = 1;
+ memmove(ai4_offsets, ps_rc_lap_out->ai4_offsets, sizeof(WORD32) * 5);
+ }
+
+ if(CONST_QP == e_rc_type)
+ {
+ switch(pic_type)
+ {
+ case IV_I_FRAME:
+ case IV_IDR_FRAME:
+ {
+ slice_type = ISLICE;
+ break;
+ }
+ case IV_P_FRAME:
+ {
+ slice_type = PSLICE;
+ break;
+ }
+ case IV_B_FRAME:
+ {
+ slice_type = BSLICE;
+ break;
+ }
+ }
+
+ i4_frame_qp = ihevce_get_cur_frame_qp(
+ ps_rc_ctxt->i4_init_frame_qp_user,
+ slice_type,
+ ps_rc_lap_out->i4_rc_temporal_lyr_id,
+ ps_rc_ctxt->i4_min_frame_qp,
+ ps_rc_ctxt->i4_max_frame_qp,
+ ps_rc_ctxt->ps_rc_quant_ctxt);
+
+ return i4_frame_qp;
+ }
+ else
+ {
+ /*check scene type*/
+ scene_type = ihevce_rc_lap_get_scene_type(ps_rc_lap_out);
+
+ if(scene_type == SCENE_TYPE_SCENE_CUT)
+ {
+ i4_is_scd = 1;
+ ps_rc_ctxt->i4_num_frms_from_reset = 0;
+#if USE_QP_OFFSET_POST_SCD
+ ps_rc_ctxt->i4_use_qp_offset_pre_enc = 1;
+#else
+ ps_rc_ctxt->i4_use_qp_offset_pre_enc = 0;
+#endif
+ }
+ ASSERT(
+ ps_rc_ctxt->as_pre_enc_qp_queue[ps_rc_ctxt->i4_pre_enc_qp_read_index].i4_is_qp_valid ==
+ 1 ||
+ ps_rc_lap_out->i4_rc_poc < 20);
+
+ if(ps_rc_ctxt->as_pre_enc_qp_queue[ps_rc_ctxt->i4_pre_enc_qp_read_index].i4_is_qp_valid ==
+ 1)
+ {
+ if(i4_is_scd || ps_rc_ctxt->i4_use_qp_offset_pre_enc)
+ {
+#if 1 //The qp will be populated assuming the frame is I_PIC. Adjust according to current pic type
+ i4_frame_qp =
+ ps_rc_ctxt->as_pre_enc_qp_queue[ps_rc_ctxt->i4_pre_enc_qp_read_index].i4_scd_qp;
+ if(rc_pic_type == P_PIC)
+ i4_frame_qp++;
+ else
+ i4_frame_qp = i4_frame_qp + ps_rc_lap_out->i4_rc_temporal_lyr_id;
+#endif
+ if(i4_use_offset_flag)
+ {
+ if(rc_pic_type > B2_PIC)
+ i4_frame_qp = ps_rc_ctxt->i4_L0_frame_qp + ai4_offsets[rc_pic_type - 4];
+ else
+ i4_frame_qp = ps_rc_ctxt->i4_L0_frame_qp + ai4_offsets[rc_pic_type];
+ }
+ }
+ else
+ {
+#if DETERMINISTIC_RC
+ i4_frame_qp = ps_rc_ctxt->as_pre_enc_qp_queue[ps_rc_ctxt->i4_pre_enc_qp_read_index]
+ .ai4_quant[rc_pic_type];
+#else
+ /*read the latest qp updated by enc*/
+ i4_frame_qp =
+ ps_rc_ctxt
+ ->as_pre_enc_qp_queue
+ [(ps_rc_ctxt->i4_pre_enc_qp_write_index + MAX_PRE_ENC_RC_DELAY - 1) %
+ MAX_PRE_ENC_RC_DELAY]
+ .ai4_quant[rc_pic_type];
+#endif
+ }
+
+ ps_rc_ctxt->as_pre_enc_qp_queue[ps_rc_ctxt->i4_pre_enc_qp_read_index].i4_is_qp_valid =
+ -1;
+ /*once encoder starts reading from qp queue it should always read from qp queue*/
+ //ps_rc_ctxt->i4_use_init_qp_for_pre_enc = 0;
+ }
+ else
+ {
+ i4_frame_qp = ps_rc_ctxt->ai4_init_pre_enc_qp[rc_pic_type];
+ }
+ {
+ WORD32 i4_delay_l0_enc = ps_rc_ctxt->i4_pre_enc_rc_delay;
+ ps_rc_ctxt->i4_pre_enc_qp_read_index =
+ (ps_rc_ctxt->i4_pre_enc_qp_read_index + 1) % i4_delay_l0_enc;
+
+ if(ps_rc_ctxt->i4_num_frms_from_reset < i4_delay_l0_enc)
+ {
+ ps_rc_ctxt->i4_num_frms_from_reset++;
+ if(ps_rc_ctxt->i4_num_frms_from_reset >= i4_delay_l0_enc)
+ ps_rc_ctxt->i4_use_qp_offset_pre_enc = 0;
+ }
+ }
+
+ i4_frame_qp = CLIP3(i4_frame_qp, ps_rc_ctxt->i4_min_frame_qp, ps_rc_ctxt->i4_max_frame_qp);
+ return i4_frame_qp;
+ }
+}
+/**
+******************************************************************************
+*
+* @brief function to estimate L0 satd based on L1 satd. '
+*
+*
+* @par Description
+*
+* @param[in] pv_rc_ctxt
+* void pointer to rc ctxt
+* @param[in] rc_lap_out_params_t *
+pointer to lap out structure
+* @param[in] i8_est_L0_satd_act
+* estimated L0 satd/act based on L1 satd/act
+* @return void
+*
+******************************************************************************
+*/
+LWORD64 ihevce_get_L0_satd_based_on_L1(
+ LWORD64 i8_satd_by_act_L1, WORD32 i4_num_pixel, WORD32 i4_cur_q_scale)
+{
+ LWORD64 est_L0_satd_by_act;
+ float m, c;
+ /** choose coeff based on resolution*/
+ if(i4_num_pixel > 5184000)
+ {
+ m = (float)2.3911;
+ c = (float)86329;
+ }
+ else if(i4_num_pixel > 1497600)
+ {
+ m = (float)2.7311;
+ c = (float)-1218.9;
+ }
+ else if(i4_num_pixel > 633600)
+ {
+ m = (float)3.1454;
+ c = (float)-5836.1;
+ }
+ else
+ {
+ m = (float)3.5311;
+ c = (float)-2377.2;
+ }
+ /*due to qp difference between I and P, For P pic for same */
+ est_L0_satd_by_act = (LWORD64)(i8_satd_by_act_L1 / i4_cur_q_scale * m + c) * i4_cur_q_scale;
+
+ {
+ if(est_L0_satd_by_act < (i4_num_pixel >> 3))
+ est_L0_satd_by_act = (i4_num_pixel >> 3);
+ }
+ return est_L0_satd_by_act;
+}
+/**
+******************************************************************************
+*
+* @name ihevce_rc_register_L1_analysis_data
+*
+* @par Description
+*
+* @param[in] ps_rc_ctxt - pointer to rc context
+* ps_rc_lap_out
+* i8_est_L0_satd_by_act
+* i8_pre_intra_sad
+* i8_l1_hme_sad
+* @return void
+*
+******************************************************************************
+*/
+void ihevce_rc_register_L1_analysis_data(
+ void *pv_rc_ctxt,
+ rc_lap_out_params_t *ps_rc_lap_out,
+ LWORD64 i8_est_L0_satd_by_act,
+ LWORD64 i8_pre_intra_sad,
+ LWORD64 i8_l1_hme_sad)
+{
+ rc_context_t *ps_rc_ctxt = (rc_context_t *)pv_rc_ctxt;
+ WORD32 i, data_available = 1;
+ picture_type_e rc_pic_type = ihevce_rc_conv_pic_type(
+ (IV_PICTURE_CODING_TYPE_T)ps_rc_lap_out->i4_rc_pic_type,
+ ps_rc_ctxt->i4_field_pic,
+ ps_rc_lap_out->i4_rc_temporal_lyr_id,
+ ps_rc_lap_out->i4_is_bottom_field,
+ ps_rc_ctxt->i4_top_field_first);
+
+ //if( ps_rc_ctxt->u4_rc_scene_num_est_L0_intra_sad_available == ps_rc_lap_out->u4_rc_scene_num)
+ {
+ /*update current frame's data*/
+ ps_rc_ctxt->ai8_prev_frame_est_L0_satd[rc_pic_type] = i8_est_L0_satd_by_act;
+ ps_rc_ctxt->ai8_prev_frame_hme_sad[rc_pic_type] = i8_l1_hme_sad;
+ ps_rc_ctxt->ai8_prev_frame_pre_intra_sad[rc_pic_type] = i8_pre_intra_sad;
+ }
+ /*check if data is available for all picture type*/
+ if(!ps_rc_ctxt->i4_is_est_L0_intra_sad_available)
+ {
+ for(i = 0; i < ps_rc_ctxt->i4_num_active_pic_type; i++)
+ {
+ data_available &= (ps_rc_ctxt->ai8_prev_frame_est_L0_satd[i] >= 0);
+ if(ps_rc_ctxt->i4_field_pic == 1 && i != 0)
+ data_available &= (ps_rc_ctxt->ai8_prev_frame_est_L0_satd[i + FIELD_OFFSET] >= 0);
+ }
+ ps_rc_ctxt->i4_is_est_L0_intra_sad_available = data_available;
+ }
+}
+
+/*#######################################################*/
+/******* END OF PRE-ENC QP QUERY FUNCTIONS **************/
+/*#####################################################*/
+
+/*##########################################################*/
+/******* START OF ENC THRD QP QUERY FUNCTIONS **************/
+/*########################################################*/
+
+/**
+******************************************************************************
+*
+* @brief function to get ihevce_rc_get_pic_quant
+*
+* @par Description
+* @param[in] i4_update_delay : The Delay in the update. This can happen for dist. case!
+* All decision should consider this delay for updation!
+******************************************************************************
+*/
+
+WORD32 ihevce_rc_get_pic_quant(
+ void *pv_ctxt,
+ rc_lap_out_params_t *ps_rc_lap_out,
+ IHEVCE_RC_CALL_TYPE call_type,
+ WORD32 i4_enc_frm_id,
+ WORD32 i4_update_delay,
+ WORD32 *pi4_tot_bits_estimated)
+{
+ rc_context_t *ps_rc_ctxt = (rc_context_t *)pv_ctxt;
+ WORD32 i4_frame_qp, i4_frame_qp_q6, i4_hevc_frame_qp = -1, i4_deltaQP = 0;
+ WORD32 i4_max_frame_bits = (1 << 30);
+ rc_type_e e_rc_type = ps_rc_ctxt->e_rate_control_type;
+ WORD32 slice_type, index, i4_num_frames_in_cur_gop, i4_cur_est_texture_bits;
+ WORD32 temporal_layer_id = ps_rc_lap_out->i4_rc_temporal_lyr_id;
+ IV_PICTURE_CODING_TYPE_T pic_type = (IV_PICTURE_CODING_TYPE_T)ps_rc_lap_out->i4_rc_pic_type;
+ picture_type_e rc_pic_type = ihevce_rc_conv_pic_type(
+ pic_type,
+ ps_rc_ctxt->i4_field_pic,
+ ps_rc_lap_out->i4_rc_temporal_lyr_id,
+ ps_rc_lap_out->i4_is_bottom_field,
+ ps_rc_ctxt->i4_top_field_first);
+ float i_to_avg_bit_ratio;
+ frame_info_t s_frame_info_temp;
+ WORD32 i4_scene_num = ps_rc_lap_out->u4_rc_scene_num % MAX_SCENE_NUM;
+ WORD32 i4_vbv_buf_max_bits;
+ WORD32 i4_est_tex_bits;
+ WORD32 i4_cur_est_header_bits, i4_fade_scene;
+ WORD32 i4_model_available, i4_is_no_model_scd;
+ WORD32 i4_estimate_to_calc_frm_error;
+
+ /* The window for which your update is guaranteed */
+ WORD32 updated_window = ps_rc_ctxt->i4_num_frame_in_lap_window - i4_update_delay;
+
+ ps_rc_ctxt->i4_scene_num_latest = i4_scene_num;
+
+ ps_rc_ctxt->s_rc_high_lvl_stat.i4_modelQP = INVALID_QP;
+ ps_rc_ctxt->s_rc_high_lvl_stat.i4_finalQP = INVALID_QP;
+ ps_rc_ctxt->s_rc_high_lvl_stat.i4_maxEbfQP = INVALID_QP;
+
+ ps_rc_ctxt->i4_quality_preset = ps_rc_lap_out->i4_rc_quality_preset;
+ ps_rc_ctxt->s_rc_high_lvl_stat.i4_finalQP = INVALID_QP;
+
+ if(1 == ps_rc_ctxt->i4_bitrate_changed)
+ {
+ ps_rc_ctxt->i4_bitrate_changed = 0;
+ }
+ if(CONST_QP == e_rc_type)
+ {
+ switch(pic_type)
+ {
+ case IV_I_FRAME:
+ case IV_IDR_FRAME:
+ {
+ slice_type = ISLICE;
+ break;
+ }
+ case IV_P_FRAME:
+ {
+ slice_type = PSLICE;
+ break;
+ }
+ case IV_B_FRAME:
+ {
+ slice_type = BSLICE;
+ break;
+ }
+ }
+
+ i4_frame_qp = ihevce_get_cur_frame_qp(
+ ps_rc_ctxt->i4_init_frame_qp_user,
+ slice_type,
+ temporal_layer_id,
+ ps_rc_ctxt->i4_min_frame_qp,
+ ps_rc_ctxt->i4_max_frame_qp,
+ ps_rc_ctxt->ps_rc_quant_ctxt);
+ return i4_frame_qp;
+ }
+ else
+ {
+ WORD32 is_scd_ref_frame = 0, i4_num_scd_in_lap_window = 0, num_frames_b4_scd = 0,
+ scene_type = 0, i;
+ //ihevce_lap_output_params_t *ps_cur_rc_lap_out;
+
+ if(ps_rc_ctxt->ai4_scene_num_last_pic[rc_pic_type] !=
+ (WORD32)ps_rc_lap_out->u4_rc_scene_num)
+ {
+ rc_reset_pic_model(ps_rc_ctxt->rc_hdl, rc_pic_type);
+ rc_reset_first_frame_coded_flag(ps_rc_ctxt->rc_hdl, rc_pic_type);
+ }
+ ps_rc_ctxt->ai4_scene_num_last_pic[rc_pic_type] = ps_rc_lap_out->u4_rc_scene_num;
+
+ if(call_type == ENC_GET_QP)
+ {
+ i4_model_available = model_availability(ps_rc_ctxt->rc_hdl, rc_pic_type);
+
+ ps_rc_lap_out->i8_est_text_bits = -1;
+ }
+
+ if((rc_pic_type == I_PIC) || (rc_pic_type == P_PIC) || (rc_pic_type == P1_PIC))
+ {
+ ps_rc_ctxt->i4_cur_scene_num = ps_rc_lap_out->u4_rc_scene_num;
+ }
+
+ {
+ if(!(pic_type == IV_I_FRAME || pic_type == IV_IDR_FRAME))
+ {
+ ps_rc_ctxt->ai8_cur_frame_coarse_ME_cost[i4_enc_frm_id] =
+ ps_rc_lap_out->i8_frame_acc_coarse_me_cost;
+ }
+ /*check if frame is scene cut*/
+ /* If scd do not query the model. obtain qp from offline data model*/
+ scene_type = ihevce_rc_lap_get_scene_type(ps_rc_lap_out);
+
+ if(ps_rc_ctxt->ai4_scene_numbers[ps_rc_lap_out->u4_rc_scene_num] == 0 &&
+ (scene_type != SCENE_TYPE_SCENE_CUT))
+ {
+ scene_type = SCENE_TYPE_SCENE_CUT;
+ }
+
+ if(ps_rc_ctxt->ai4_scene_numbers[ps_rc_lap_out->u4_rc_scene_num] > 0 &&
+ (scene_type == SCENE_TYPE_SCENE_CUT))
+ {
+ scene_type = SCENE_TYPE_NORMAL;
+ }
+ if(scene_type == SCENE_TYPE_SCENE_CUT)
+ {
+ if((ps_rc_lap_out->i4_rc_quality_preset == IHEVCE_QUALITY_P6) &&
+ (rc_pic_type > P_PIC))
+ {
+ is_scd_ref_frame = 0;
+ }
+ else
+ {
+ is_scd_ref_frame = 1;
+ }
+ }
+ else if(scene_type == SCENE_TYPE_PAUSE_TO_RESUME)
+ {
+ /*pause to resume flag will only be set in layer 0 frames( I and P pic)*/
+ /*I PIC can handle this by detecting I_only SCD which is based on open loop SATD hence explicit handling for pause to resume is required only for P_PIC*/
+
+ if(ps_rc_lap_out->i4_rc_quality_preset == IHEVCE_QUALITY_P6)
+ {
+ if(call_type == ENC_GET_QP && rc_pic_type == P_PIC)
+ {
+ ps_rc_ctxt->ai4_is_pause_to_resume[i4_enc_frm_id] = 1;
+ }
+ }
+ else
+ {
+ if(call_type == ENC_GET_QP && rc_pic_type != I_PIC)
+ {
+ ps_rc_ctxt->ai4_is_pause_to_resume[i4_enc_frm_id] = 1;
+ }
+ }
+ }
+
+ ps_rc_ctxt->ai4_is_cmplx_change_reset_model[i4_enc_frm_id] =
+ ps_rc_lap_out->i4_is_cmplx_change_reset_model;
+ ps_rc_ctxt->ai4_is_cmplx_change_reset_bits[i4_enc_frm_id] =
+ ps_rc_lap_out->i4_is_cmplx_change_reset_bits;
+
+ /*initialise the coeffs to 1 in case lap is not used */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rc_ctxt->af_sum_weigh[i4_enc_frm_id][i][0] = 1.0;
+ ps_rc_ctxt->af_sum_weigh[i4_enc_frm_id][i][1] = 0.0;
+ ps_rc_ctxt->af_sum_weigh[i4_enc_frm_id][i][2] = 0.0;
+ }
+
+ /*treat even first frame as scd frame*/
+ if(!ps_rc_ctxt->i4_is_first_frame_encoded)
+ {
+ is_scd_ref_frame = 1;
+ }
+
+ /*special case SCD handling for Non-I pic*/
+ if(!(pic_type == IV_I_FRAME || pic_type == IV_IDR_FRAME) && call_type == ENC_GET_QP)
+ {
+ if(is_scd_ref_frame)
+ {
+ /*A non-I pic will only be marked as scene cut only if there is another SCD follows within another subgop*/
+ ps_rc_ctxt->ai4_is_non_I_scd_pic[i4_enc_frm_id] = 1;
+ }
+ /*check if current sad is very different from previous SAD and */
+ else if(
+ !ps_rc_ctxt->ai4_is_pause_to_resume[i4_enc_frm_id] &&
+ ps_rc_lap_out->i4_is_non_I_scd)
+ {
+ ps_rc_ctxt->ai4_is_non_I_scd_pic[i4_enc_frm_id] = 1;
+ is_scd_ref_frame = 1;
+ }
+ }
+
+ if(call_type == PRE_ENC_GET_QP)
+ {
+ /*Only I frames are considered as scd pic during pre-enc*/
+ is_scd_ref_frame &= (pic_type == IV_I_FRAME || pic_type == IV_IDR_FRAME);
+ }
+
+ /*special case SCD handling for I pic*/
+ if((pic_type == IV_I_FRAME || pic_type == IV_IDR_FRAME) && !is_scd_ref_frame)
+ {
+ /*If open loop SATD's of two I picture are very different then treat the I pic as SCD and reset only model as this can
+ happen during fade-in and fade-out where other picture types would have learnt. Reset is required only for I.*/
+
+ if(ps_rc_lap_out->i4_is_I_only_scd)
+ {
+ is_scd_ref_frame = 1;
+ ps_rc_ctxt->ai4_I_model_only_reset[i4_enc_frm_id] = 1;
+ }
+ }
+ /*should be recalculated for every picture*/
+ if((updated_window) > 0 && (call_type == ENC_GET_QP) && (ps_rc_ctxt->i4_rc_pass != 2))
+ {
+ rc_lap_out_params_t *ps_cur_rc_lap_out;
+
+ UWORD32 u4_L1_based_lap_complexity_q7;
+ WORD32 i = 0, k = 0, i4_f_sim = 0, i4_h_sim = 0, i4_var_sum = 0,
+ i4_num_pic_metric_count = 0, i4_is_first_frm = 1,
+ i4_intra_frame_interval = 0;
+ LWORD64 i8_l1_analysis_lap_comp = 0;
+ LWORD64 nor_frm_hme_sad_q10;
+ picture_type_e curr_rc_pic_type;
+ WORD32 ai4_pic_dist[MAX_PIC_TYPE] = { 0 };
+ LWORD64 i8_sad_first_frame_pic_type[MAX_PIC_TYPE] = { 0 },
+ i8_total_sad_pic_type[MAX_PIC_TYPE] = { 0 };
+ LWORD64 i8_last_frame_pic_type[MAX_PIC_TYPE] = { 0 }, i8_esti_consum_bits = 0;
+ WORD32 i4_num_pic_type[MAX_PIC_TYPE] = { 0 }, i4_frames_in_lap_end = 0,
+ i4_first_frame_coded_flag, i4_gop_end_flag = 1, i4_num_frame_for_ebf = 0;
+ i4_first_frame_coded_flag = is_first_frame_coded(ps_rc_ctxt->rc_hdl);
+
+ /*Setting the next scene cut as well as pic distribution for the gop*/
+
+ ps_cur_rc_lap_out = (rc_lap_out_params_t *)ps_rc_lap_out;
+ i4_intra_frame_interval = rc_get_intra_frame_interval(ps_rc_ctxt->rc_hdl);
+
+ /*Set the rc sc i next*/
+ if(ps_cur_rc_lap_out != NULL)
+ {
+ WORD32 i4_count = 0;
+ do
+ {
+ if(((rc_lap_out_params_t *)ps_cur_rc_lap_out->ps_rc_lap_out_next_encode ==
+ NULL)) //||((( (ihevce_lap_output_params_t*)ps_cur_rc_lap_out->ps_lap_out_next)->i8_pre_intra_sad == -1) || ( ((ihevce_lap_output_params_t*)ps_cur_rc_lap_out->ps_lap_out_next)->i8_raw_pre_intra_sad == -1) ||( ((ihevce_lap_output_params_t*)ps_cur_rc_lap_out->ps_lap_out_next)->i8_raw_l1_coarse_me_sad == -1) ||(((ihevce_lap_output_params_t*)ps_cur_rc_lap_out->ps_lap_out_next)->i8_frame_acc_coarse_me_sad == -1)))
+ break;
+
+ ps_cur_rc_lap_out =
+ (rc_lap_out_params_t *)ps_cur_rc_lap_out->ps_rc_lap_out_next_encode;
+ i4_count++;
+
+ } while((i4_count + 1) < updated_window);
+
+ rc_set_next_sc_i_in_rc_look_ahead(
+ ps_rc_ctxt->rc_hdl, ps_cur_rc_lap_out->i4_next_sc_i_in_rc_look_ahead);
+ rc_update_pic_distn_lap_to_rc(
+ ps_rc_ctxt->rc_hdl, ps_cur_rc_lap_out->ai4_num_pic_type);
+
+ ps_rc_ctxt->i4_next_sc_i_in_rc_look_ahead =
+ ps_cur_rc_lap_out->i4_next_sc_i_in_rc_look_ahead;
+ }
+
+ ps_cur_rc_lap_out = (rc_lap_out_params_t *)ps_rc_lap_out;
+ if(ps_cur_rc_lap_out != NULL)
+ {
+ /*initialise the coeffs to 1 in case lap is not used */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rc_ctxt->af_sum_weigh[i4_enc_frm_id][i][0] = 0.0;
+ ps_rc_ctxt->af_sum_weigh[i4_enc_frm_id][i][1] = 0.0;
+ ps_rc_ctxt->af_sum_weigh[i4_enc_frm_id][i][2] = 0.0;
+ }
+ i = 0;
+ k = 0;
+
+ //ASSERT(ps_cur_rc_lap_out != NULL);
+ do
+ {
+ curr_rc_pic_type = ihevce_rc_conv_pic_type(
+ (IV_PICTURE_CODING_TYPE_T)ps_cur_rc_lap_out->i4_rc_pic_type,
+ ps_rc_ctxt->i4_field_pic,
+ ps_cur_rc_lap_out->i4_rc_temporal_lyr_id,
+ ps_cur_rc_lap_out->i4_is_bottom_field,
+ ps_rc_ctxt->i4_top_field_first);
+ if(ps_rc_ctxt->i4_is_first_frame_encoded || !i4_is_first_frm)
+ {
+ /*Ignore first frame Fsim as it is not valid for first frame*/
+ i4_f_sim += ps_cur_rc_lap_out->s_pic_metrics.i4_fsim;
+ i4_h_sim += ps_cur_rc_lap_out->s_pic_metrics.ai4_hsim[0];
+ i4_var_sum += (WORD32)ps_cur_rc_lap_out->s_pic_metrics.i8_8x8_var_lum;
+ i4_num_pic_metric_count++;
+ //DBG_PRINTF("\n fsim = %d i = %d",ps_cur_rc_lap_out->s_pic_metrics.i4_fsim,i);
+ //ASSERT(ps_cur_rc_lap_out->s_pic_metrics.i4_fsim <= 128);
+ }
+
+ /*accumulate complexity from LAP2*/
+ if(curr_rc_pic_type == I_PIC)
+ {
+ i8_l1_analysis_lap_comp +=
+ (LWORD64)(1.17 * ps_cur_rc_lap_out->i8_raw_pre_intra_sad);
+ }
+ else
+ {
+ if(curr_rc_pic_type <= B2_PIC)
+ i8_l1_analysis_lap_comp += (LWORD64)(
+ (float)ps_cur_rc_lap_out->i8_raw_l1_coarse_me_sad /
+ pow(1.125f, curr_rc_pic_type));
+ else
+ i8_l1_analysis_lap_comp += (LWORD64)(
+ (float)ps_cur_rc_lap_out->i8_raw_l1_coarse_me_sad /
+ pow(1.125f, curr_rc_pic_type - B2_PIC));
+ }
+ i++;
+ i4_is_first_frm = 0;
+
+ /*CAll the function for predictting the ebf and stuffing condition check*/
+ /*rd model pass lapout l1 pass ebf return estimated ebf and signal*/
+
+ {
+ if(i4_first_frame_coded_flag && (i4_gop_end_flag != 0))
+ {
+ if(curr_rc_pic_type == 0)
+ i4_gop_end_flag = 0;
+
+ if(i4_gop_end_flag)
+ {
+ WORD32 prev_frm_cl_sad =
+ rc_get_prev_frame_sad(ps_rc_ctxt->rc_hdl, curr_rc_pic_type);
+ WORD32 cur_frm_est_cl_sad = (WORD32)(
+ (ps_cur_rc_lap_out->i8_frame_acc_coarse_me_cost *
+ prev_frm_cl_sad) /
+ ps_rc_ctxt->ai8_prev_frm_pre_enc_cost[curr_rc_pic_type]);
+ i8_esti_consum_bits += bit_alloc_get_estimated_bits_for_pic(
+ ps_rc_ctxt->rc_hdl,
+ cur_frm_est_cl_sad,
+ prev_frm_cl_sad,
+ curr_rc_pic_type);
+ i4_num_frame_for_ebf++;
+ }
+ }
+ }
+ ps_cur_rc_lap_out =
+ (rc_lap_out_params_t *)ps_cur_rc_lap_out->ps_rc_lap_out_next_encode;
+ /*The scene cut is lap window other than current frame is used to reduce bit alloc window for I pic*/
+ if(ps_cur_rc_lap_out != NULL &&
+ ps_cur_rc_lap_out->i4_rc_scene_type == SCENE_TYPE_SCENE_CUT)
+ {
+ i4_num_scd_in_lap_window++;
+ if(i4_num_scd_in_lap_window == 1)
+ {
+ /*Note how many frames are parsed before first scd is hit*/
+ num_frames_b4_scd = i + 1;
+ }
+ }
+
+ if((ps_cur_rc_lap_out == NULL ||
+ (i >=
+ (updated_window -
+ k)))) //||((( -1 == ps_cur_rc_lap_out->i8_pre_intra_sad ) || ( -1 == ps_cur_rc_lap_out->i8_raw_pre_intra_sad ) ||( -1 == ps_cur_rc_lap_out->i8_raw_l1_coarse_me_sad) ||(-1 == ps_cur_rc_lap_out->i8_frame_acc_coarse_me_sad))))
+ break;
+ if(0) //(( -1 == ps_cur_rc_lap_out->i8_pre_intra_sad ) || ( -1 == ps_cur_rc_lap_out->i8_raw_pre_intra_sad ) ||( -1 == ps_cur_rc_lap_out->i8_raw_l1_coarse_me_sad) ||(-1 == ps_cur_rc_lap_out->i8_frame_acc_coarse_me_sad)))
+ {
+ k++;
+ ps_cur_rc_lap_out =
+ (rc_lap_out_params_t *)ps_cur_rc_lap_out->ps_rc_lap_out_next_encode;
+ if(ps_cur_rc_lap_out == NULL)
+ break;
+ continue;
+ }
+
+ } while(1);
+ ;
+ }
+ /*For the first subgop we cant have underflow prevention logic
+ since once picture of each type is not encoded also happens for static contents thants high i_to avg_ratio */
+ if(i4_first_frame_coded_flag &&
+ (ps_rc_ctxt->ai_to_avg_bit_ratio[i4_enc_frm_id] > I_TO_REST_SLOW))
+ {
+ if(!(i4_num_frame_for_ebf < ps_rc_ctxt->i4_max_inter_frm_int))
+ rc_bit_alloc_detect_ebf_stuff_scenario(
+ ps_rc_ctxt->rc_hdl,
+ i4_num_frame_for_ebf,
+ i8_esti_consum_bits,
+ ps_rc_ctxt->i4_max_inter_frm_int);
+ }
+
+ k = 0;
+
+ i4_frames_in_lap_end = 0;
+ {
+ rc_lap_out_params_t *ps_cur_rc_lap_out1;
+
+ ps_cur_rc_lap_out1 = (rc_lap_out_params_t *)ps_rc_lap_out;
+ do
+ {
+ curr_rc_pic_type = ihevce_rc_conv_pic_type(
+ (IV_PICTURE_CODING_TYPE_T)ps_cur_rc_lap_out1->i4_rc_pic_type,
+ ps_rc_ctxt->i4_field_pic,
+ ps_cur_rc_lap_out1->i4_rc_temporal_lyr_id,
+ ps_cur_rc_lap_out1->i4_is_bottom_field,
+ ps_rc_ctxt->i4_top_field_first);
+ /*accumulate complexity from LAP2*/
+
+ if(curr_rc_pic_type == I_PIC)
+ {
+ i8_total_sad_pic_type[I_PIC] +=
+ ps_cur_rc_lap_out1->i8_raw_pre_intra_sad;
+ i8_last_frame_pic_type[I_PIC] =
+ ps_cur_rc_lap_out1->i8_raw_pre_intra_sad;
+ }
+ else
+ {
+ i8_total_sad_pic_type[curr_rc_pic_type] +=
+ ps_cur_rc_lap_out1->i8_raw_l1_coarse_me_sad;
+ i8_last_frame_pic_type[curr_rc_pic_type] =
+ ps_cur_rc_lap_out1->i8_raw_l1_coarse_me_sad;
+ }
+ if(i4_num_pic_type[curr_rc_pic_type] == 0)
+ {
+ if(curr_rc_pic_type == I_PIC)
+ {
+ i8_sad_first_frame_pic_type[I_PIC] =
+ ps_cur_rc_lap_out1->i8_raw_pre_intra_sad;
+ }
+ else
+ {
+ i8_sad_first_frame_pic_type[curr_rc_pic_type] =
+ ps_cur_rc_lap_out1->i8_raw_l1_coarse_me_sad;
+ }
+ }
+ i4_num_pic_type[curr_rc_pic_type]++;
+
+ i4_frames_in_lap_end++;
+
+ ps_cur_rc_lap_out1 =
+ (rc_lap_out_params_t *)ps_cur_rc_lap_out1->ps_rc_lap_out_next_encode;
+ if((ps_cur_rc_lap_out1 == NULL ||
+ (i4_frames_in_lap_end >=
+ (updated_window -
+ k)))) //||((( -1 == ps_cur_rc_lap_out1->i8_pre_intra_sad ) || ( -1 == ps_cur_rc_lap_out1->i8_raw_pre_intra_sad ) ||( -1 == ps_cur_rc_lap_out1->i8_raw_l1_coarse_me_sad) ||(-1 == ps_cur_rc_lap_out1->i8_frame_acc_coarse_me_sad))))
+ {
+ break;
+ }
+ if(0) //((( -1 == ps_cur_rc_lap_out1->i8_pre_intra_sad ) || ( -1 == ps_cur_rc_lap_out1->i8_raw_pre_intra_sad ) ||( -1 == ps_cur_rc_lap_out1->i8_raw_l1_coarse_me_sad) ||(-1 == ps_cur_rc_lap_out1->i8_frame_acc_coarse_me_sad))))
+ {
+ k++;
+ ps_cur_rc_lap_out1 = (rc_lap_out_params_t *)
+ ps_cur_rc_lap_out1->ps_rc_lap_out_next_encode;
+ if(ps_cur_rc_lap_out1 == NULL)
+ break;
+ continue;
+ }
+
+ } while(i4_frames_in_lap_end < (ps_rc_ctxt->i4_next_sc_i_in_rc_look_ahead - k));
+ }
+
+ /*get picture type distribution in LAP*/
+ rc_get_pic_distribution(ps_rc_ctxt->rc_hdl, &ai4_pic_dist[0]);
+
+ {
+ float f_prev_comp;
+ WORD32 j;
+ float af_sum_weigh[MAX_PIC_TYPE], af_nume_weight[MAX_PIC_TYPE];
+ float af_average_sad_pic_type[MAX_PIC_TYPE] = { 0 };
+ for(j = 0; j < MAX_PIC_TYPE; j++)
+ {
+ if(i4_num_pic_type[j] > 0)
+ {
+ af_average_sad_pic_type[j] =
+ (float)i8_total_sad_pic_type[j] / i4_num_pic_type[j];
+ }
+
+ f_prev_comp = 1.;
+
+ i4_num_pic_type[j] = (i4_num_pic_type[j] > ai4_pic_dist[j])
+ ? ai4_pic_dist[j]
+ : i4_num_pic_type[j];
+
+ af_sum_weigh[j] = (float)i4_num_pic_type[j];
+ af_nume_weight[j] = 1.0;
+
+ if(i4_num_pic_type[j] > 1 && (af_average_sad_pic_type[j] > 0))
+ {
+ af_nume_weight[j] =
+ (float)i8_sad_first_frame_pic_type[j] / af_average_sad_pic_type[j];
+
+ f_prev_comp =
+ (float)i8_last_frame_pic_type[j] / af_average_sad_pic_type[j];
+ }
+ //if(rc_pic_type != I_PIC)
+ {
+ af_sum_weigh[j] += f_prev_comp * (ai4_pic_dist[j] - i4_num_pic_type[j]);
+ }
+ ps_rc_ctxt->af_sum_weigh[i4_enc_frm_id][j][0] = af_nume_weight[j];
+ ps_rc_ctxt->af_sum_weigh[i4_enc_frm_id][j][1] = af_sum_weigh[j];
+ ps_rc_ctxt->af_sum_weigh[i4_enc_frm_id][j][2] = af_average_sad_pic_type[j];
+
+ /*Disabling steady state complexity based bit movement*/
+ /*Enable it in CBR and not in VBR since VBR already has complexity based bit movement*/
+
+ if(0) /*i4_frames_in_lap_end < (updated_window) || ps_rc_ctxt->e_rate_control_type == VBR_STREAMING)*/
+ {
+ ps_rc_ctxt->af_sum_weigh[i4_enc_frm_id][j][0] = 1.0;
+ ps_rc_ctxt->af_sum_weigh[i4_enc_frm_id][j][1] =
+ 0; //(float)ai4_pic_dist[j];
+ }
+ }
+ memmove(
+ ps_rc_lap_out->ps_frame_info->af_sum_weigh,
+ ps_rc_ctxt->af_sum_weigh[i4_enc_frm_id],
+ sizeof(float) * MAX_PIC_TYPE * 3);
+ }
+
+ if(i4_num_pic_metric_count > 0)
+ {
+ i4_f_sim = i4_f_sim / i4_num_pic_metric_count;
+ i4_h_sim = i4_h_sim / i4_num_pic_metric_count;
+ i4_var_sum = i4_var_sum / i4_num_pic_metric_count;
+ }
+ else
+ {
+ i4_f_sim = MODERATE_FSIM_VALUE;
+ i4_h_sim = MODERATE_FSIM_VALUE;
+ }
+
+ if(i > 0)
+ {
+ float lap_L1_comp =
+ (float)i8_l1_analysis_lap_comp /
+ (i * ps_rc_ctxt->i4_frame_height *
+ ps_rc_ctxt->i4_frame_width); //per frame per pixel complexity
+
+ lap_L1_comp = rc_get_offline_normalized_complexity(
+ ps_rc_ctxt->u4_intra_frame_interval,
+ ps_rc_ctxt->i4_frame_height * ps_rc_ctxt->i4_frame_width,
+ lap_L1_comp,
+ ps_rc_ctxt->i4_rc_pass);
+
+ u4_L1_based_lap_complexity_q7 = (WORD32)((lap_L1_comp * (1 << 7)) + .05f);
+ }
+ else
+ {
+ u4_L1_based_lap_complexity_q7 = 25;
+ }
+ ps_rc_ctxt->ai4_lap_complexity_q7[i4_enc_frm_id] =
+ (WORD32)u4_L1_based_lap_complexity_q7;
+ /*clip f_sim to 0.3 for better stability*/
+ if(i4_f_sim < 38)
+ i4_f_sim = 128 - MAX_LAP_COMPLEXITY_Q7;
+ ps_rc_ctxt->ai4_lap_f_sim[i4_enc_frm_id] = i4_f_sim;
+
+ /*calculate normalized per pixel sad*/
+ nor_frm_hme_sad_q10 = (ps_rc_lap_out->i8_frame_acc_coarse_me_cost << 10) /
+ (ps_rc_ctxt->i4_frame_height * ps_rc_ctxt->i4_frame_width);
+ /*if(rc_pic_type == P_PIC)
+ DBG_PRINTF("\n P frm hme sad = %f ",((float)nor_frm_hme_sad_q10/ (1 << 10))); */
+ rc_put_temp_comp_lap(
+ ps_rc_ctxt->rc_hdl, i4_f_sim, nor_frm_hme_sad_q10, rc_pic_type);
+
+ rc_set_num_scd_in_lap_window(
+ ps_rc_ctxt->rc_hdl, i4_num_scd_in_lap_window, num_frames_b4_scd);
+
+ if(rc_pic_type == I_PIC && updated_window > (ps_rc_ctxt->i4_max_inter_frm_int << 1))
+ {
+ float i_to_avg_bit_ratio = ihevce_get_i_to_avg_ratio(
+ (void *)ps_rc_ctxt,
+ ps_rc_lap_out,
+ 1,
+ 1,
+ 1,
+ ps_rc_lap_out->ai4_offsets,
+ i4_update_delay);
+ i_to_avg_bit_ratio = i_to_avg_bit_ratio * 1;
+ }
+
+ /* accumulation of the hme sad over next sub gop to find the temporal comlexity of the sub GOP*/
+ if((rc_pic_type == I_PIC) || (rc_pic_type == P_PIC))
+ {
+ ihevce_compute_temporal_complexity_reset_Kp_Kb(
+ ps_rc_lap_out, (void *)ps_rc_ctxt, 1);
+ }
+
+ if(i4_var_sum > MAX_LAP_VAR)
+ {
+ i4_var_sum = MAX_LAP_VAR;
+ }
+
+ {
+ /*Filling for dumping data */
+
+ ps_rc_ctxt->ai4_num_scd_in_lap_window[i4_enc_frm_id] = i4_num_scd_in_lap_window;
+ ps_rc_ctxt->ai4_num_frames_b4_scd[i4_enc_frm_id] = num_frames_b4_scd;
+ }
+ }
+ }
+
+ if((ps_rc_lap_out->i4_rc_quality_preset == IHEVCE_QUALITY_P6) && (rc_pic_type > P_PIC))
+ {
+ ps_rc_ctxt->ai4_is_pause_to_resume[i4_enc_frm_id] = 0;
+ is_scd_ref_frame = 0;
+ }
+ i4_fade_scene = 0;
+ /*Scene type fade is marked only for P pics which are in fade regions*/
+ if((ps_rc_lap_out->i4_rc_scene_type == SCENE_TYPE_FADE_IN ||
+ ps_rc_lap_out->i4_rc_scene_type == SCENE_TYPE_FADE_OUT) &&
+ (ps_rc_lap_out->i4_rc_temporal_lyr_id == 0))
+ {
+ is_scd_ref_frame = 1;
+ i4_fade_scene = 1;
+ }
+
+ if((!(is_scd_ref_frame || ps_rc_ctxt->ai4_is_pause_to_resume[i4_enc_frm_id])) &&
+ (((is_first_frame_coded(ps_rc_ctxt->rc_hdl)) && (pic_type == IV_I_FRAME)) ||
+ (pic_type != IV_I_FRAME)))
+ {
+ WORD32 i4_is_first_frame_coded = is_first_frame_coded(ps_rc_ctxt->rc_hdl);
+ i4_is_no_model_scd = 0;
+ if(call_type == ENC_GET_QP)
+ {
+ if(((0 == i4_model_available) || (!i4_is_first_frame_coded)))
+ {
+ /*No scene change but model not available*/
+ i4_is_no_model_scd = 1;
+ }
+ }
+ }
+ else
+ {
+ /*actual scene changes*/
+ i4_is_no_model_scd = 2;
+ }
+ /** Pre-enc thread as of now SCD handling is not present */
+ if(!i4_is_no_model_scd)
+ {
+ WORD32 i4_is_first_frame_coded, i4_prev_I_frm_sad, i4_cur_I_frm_sad;
+ /*Once first frame has been encoded use prev frame intra satd and cur frame satd to alter est intra sad for cur frame*/
+ i4_is_first_frame_coded = is_first_frame_coded(ps_rc_ctxt->rc_hdl);
+
+ /*prev I frame sad i changes only in enc stage. For pre enc cur and prev will be same*/
+ if(ps_rc_ctxt->i8_prev_i_frm_cost > 0)
+ {
+ if(i4_is_first_frame_coded && (pic_type == IV_I_FRAME || pic_type == IV_IDR_FRAME))
+ {
+ i4_prev_I_frm_sad = rc_get_prev_frame_intra_sad(ps_rc_ctxt->rc_hdl);
+ i4_cur_I_frm_sad = (WORD32)(
+ (ps_rc_ctxt->ai8_cur_frm_intra_cost[i4_enc_frm_id] * i4_prev_I_frm_sad) /
+ ps_rc_ctxt->i8_prev_i_frm_cost);
+ rc_update_prev_frame_intra_sad(ps_rc_ctxt->rc_hdl, i4_cur_I_frm_sad);
+ }
+ }
+ /*scale previous frame closed loop SAD with current frame HME SAD to be considered as current frame SAD*/
+ if(i4_is_first_frame_coded && !(pic_type == IV_I_FRAME || pic_type == IV_IDR_FRAME) &&
+ call_type == ENC_GET_QP)
+ {
+ if(ps_rc_ctxt->ai8_prev_frm_pre_enc_cost[rc_pic_type] > 0)
+ {
+ WORD32 prev_frm_cl_sad = rc_get_prev_frame_sad(ps_rc_ctxt->rc_hdl, rc_pic_type);
+ WORD32 cur_frm_est_cl_sad = (WORD32)(
+ (ps_rc_ctxt->ai8_cur_frame_coarse_ME_cost[i4_enc_frm_id] *
+ prev_frm_cl_sad) /
+ ps_rc_ctxt->ai8_prev_frm_pre_enc_cost[rc_pic_type]);
+ rc_update_prev_frame_sad(ps_rc_ctxt->rc_hdl, cur_frm_est_cl_sad, rc_pic_type);
+ }
+ }
+
+ if(rc_pic_type == I_PIC && updated_window > (ps_rc_ctxt->i4_max_inter_frm_int << 1))
+ {
+ ps_rc_ctxt->ai_to_avg_bit_ratio[i4_enc_frm_id] = ihevce_get_i_to_avg_ratio(
+ (void *)ps_rc_ctxt,
+ ps_rc_lap_out,
+ 1,
+ 0,
+ 1,
+ ps_rc_lap_out->ai4_offsets,
+ i4_update_delay);
+ }
+
+ ps_rc_ctxt->s_rc_high_lvl_stat.i8_bits_from_finalQP = -1;
+ i4_frame_qp_q6 = get_frame_level_qp(
+ ps_rc_ctxt->rc_hdl,
+ rc_pic_type,
+ i4_max_frame_bits,
+ &i4_cur_est_texture_bits, //this value is returned by rc
+ ps_rc_ctxt->af_sum_weigh[i4_enc_frm_id],
+ 1,
+ ps_rc_ctxt->ai_to_avg_bit_ratio[i4_enc_frm_id],
+ ps_rc_lap_out->ps_frame_info,
+ ps_rc_lap_out->i4_complexity_bin,
+ i4_scene_num, /*no pause resume concept*/
+ pi4_tot_bits_estimated,
+ &ps_rc_lap_out->i4_is_model_valid,
+ &i4_vbv_buf_max_bits,
+ &i4_est_tex_bits,
+ &i4_cur_est_header_bits,
+ &ps_rc_ctxt->s_rc_high_lvl_stat.i4_maxEbfQP,
+ &ps_rc_ctxt->s_rc_high_lvl_stat.i4_modelQP,
+ &i4_estimate_to_calc_frm_error);
+ ASSERT(*pi4_tot_bits_estimated != 0);
+ /** The usage of global table will truncate the input given as qp format and hence will not return very low qp values desirable at very
+ low bitrate. Hence on the fly calculation is enabled*/
+
+ i4_hevc_frame_qp =
+ ihevce_rc_get_scaled_hevce_qp_q6(i4_frame_qp_q6, ps_rc_ctxt->u1_bit_depth);
+
+ if(1 == ps_rc_lap_out->i4_is_model_valid)
+ ps_rc_lap_out->i4_is_steady_state = 1;
+ else
+ ps_rc_lap_out->i4_is_steady_state = 0;
+
+ ps_rc_ctxt->s_rc_high_lvl_stat.i4_is_offline_model_used = 0;
+ ps_rc_ctxt->i8_est_I_pic_header_bits = i4_cur_est_header_bits;
+ }
+ else
+ {
+ WORD32 i4_count = 0, i4_total_bits, i4_min_error_hevc_qp = 0;
+ float f_percent_error = 0.0f, f_min_error = 10000.0f;
+ WORD32 i4_current_bits_estimated = 0;
+ float i4_i_to_rest_ratio_final;
+ WORD32 i4_best_br_id = 0;
+ float af_i_qs[2];
+ LWORD64 ai8_i_tex_bits[2];
+ WORD32 i4_ref_qscale = ihevce_rc_get_scaled_mpeg2_qp(
+ ps_rc_lap_out->i4_L0_qp, ps_rc_ctxt->ps_rc_quant_ctxt);
+ WORD32 ai4_header_bits[2];
+
+ ps_rc_lap_out->i4_is_steady_state = 0;
+
+ if(ps_rc_lap_out->i4_L0_qp > 44)
+ ps_rc_lap_out->i4_L0_qp = 44;
+ if(ps_rc_lap_out->i4_L0_qp < 7 - ps_rc_ctxt->ps_rc_quant_ctxt->i1_qp_offset)
+ ps_rc_lap_out->i4_L0_qp = 7 - ps_rc_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
+
+ ps_rc_lap_out->i4_L0_qp = ps_rc_lap_out->i4_L0_qp - 9;
+ ps_rc_lap_out->i4_is_model_valid = 0;
+ ps_rc_ctxt->s_rc_high_lvl_stat.i4_is_offline_model_used = 1;
+ ps_rc_ctxt->s_rc_high_lvl_stat.i8_bits_from_finalQP = -1;
+
+ ps_rc_ctxt->i4_normal_inter_pic = (i4_is_no_model_scd == 1);
+ while(1)
+ {
+ WORD32 i4_frame_qs_q3;
+ WORD32 i4_estimate_to_calc_frm_error_temp;
+
+ i_to_avg_bit_ratio = ihevce_get_i_to_avg_ratio(
+ (void *)ps_rc_ctxt,
+ ps_rc_lap_out,
+ 1,
+ 0,
+ 1,
+ ps_rc_lap_out->ai4_offsets,
+ i4_update_delay);
+
+ ps_rc_ctxt->ai_to_avg_bit_ratio[i4_enc_frm_id] = i_to_avg_bit_ratio;
+
+ /** Use estimate of header bits from pre-enc*/
+ if(1 == i4_is_no_model_scd)
+ {
+ ps_rc_ctxt->i8_est_I_pic_header_bits =
+ get_est_hdr_bits(ps_rc_ctxt->rc_hdl, rc_pic_type);
+ }
+ else
+ {
+ WORD32 i4_curr_qscale = ihevce_rc_get_scaled_mpeg2_qp(
+ ps_rc_lap_out->i4_L0_qp, ps_rc_ctxt->ps_rc_quant_ctxt);
+ /*Assume that 30% of header bits are constant and remaining are dependent on Qp
+ and map them accordingly*/
+ ps_rc_ctxt->i8_est_I_pic_header_bits = (LWORD64)(
+ (.3 * ps_rc_lap_out->i8_est_I_pic_header_bits +
+ (1. - .3) * ps_rc_lap_out->i8_est_I_pic_header_bits * i4_ref_qscale) /
+ i4_curr_qscale);
+ }
+
+ /*get qp for scene cut frame based on offline data*/
+ index = ihevce_get_offline_index(
+ ps_rc_ctxt, ps_rc_lap_out->i4_num_pels_in_frame_considered);
+
+ /*Sub pic rC bits extraction */
+ i4_frame_qs_q3 = rc_get_qp_for_scd_frame(
+ ps_rc_ctxt->rc_hdl,
+ I_PIC,
+ ps_rc_lap_out->i8_frame_satd_act_accum,
+ ps_rc_lap_out->i4_num_pels_in_frame_considered,
+ (WORD32)ps_rc_ctxt->i8_est_I_pic_header_bits,
+ ps_rc_ctxt->ai4_lap_f_sim[i4_enc_frm_id],
+ (void *)&g_offline_i_model_coeff[index][0],
+ i_to_avg_bit_ratio,
+ 1,
+ ps_rc_ctxt->af_sum_weigh[i4_enc_frm_id],
+ ps_rc_lap_out->ps_frame_info,
+ ps_rc_ctxt->i4_rc_pass,
+ (rc_pic_type != I_PIC),
+ ((ps_rc_lap_out->i4_rc_temporal_lyr_id != ps_rc_ctxt->i4_max_temporal_lyr) ||
+ (!ps_rc_ctxt->i4_max_temporal_lyr)),
+ 1,
+ &i4_total_bits,
+ &i4_current_bits_estimated,
+ ps_rc_lap_out->i4_use_offline_model_2pass,
+ ai8_i_tex_bits,
+ af_i_qs,
+ i4_best_br_id,
+ &i4_estimate_to_calc_frm_error_temp);
+
+ i4_hevc_frame_qp = ihevce_rc_get_scaled_hevc_qp_from_qs_q3(
+ i4_frame_qs_q3, ps_rc_ctxt->ps_rc_quant_ctxt);
+
+ /*Get corresponding q scale*/
+ i4_frame_qp =
+ ihevce_rc_get_scaled_mpeg2_qp(i4_hevc_frame_qp, ps_rc_ctxt->ps_rc_quant_ctxt);
+
+ if(i4_hevc_frame_qp > ps_rc_ctxt->ps_rc_quant_ctxt->i2_max_qp)
+ i4_hevc_frame_qp = ps_rc_ctxt->ps_rc_quant_ctxt->i2_max_qp;
+
+ {
+ WORD32 i4_init_qscale = ihevce_rc_get_scaled_mpeg2_qp(
+ ps_rc_lap_out->i4_L0_qp, ps_rc_ctxt->ps_rc_quant_ctxt);
+ f_percent_error = (float)(abs(i4_init_qscale - i4_frame_qp)) / i4_init_qscale;
+ if(f_percent_error < f_min_error)
+ {
+ f_min_error = f_percent_error;
+ i4_min_error_hevc_qp = i4_hevc_frame_qp;
+ i4_i_to_rest_ratio_final = i_to_avg_bit_ratio;
+ /*Get the bits estimated for least error*/
+ *pi4_tot_bits_estimated = i4_current_bits_estimated;
+ i4_estimate_to_calc_frm_error = i4_estimate_to_calc_frm_error_temp;
+ }
+ else
+ {}
+ ASSERT(*pi4_tot_bits_estimated != 0);
+ }
+ i4_count++;
+ if(/*(ps_rc_lap_out->i4_L0_qp == i4_hevc_frame_qp) ||*/ (i4_count > 17))
+ break;
+ ps_rc_lap_out->i4_L0_qp++;
+ }
+ ps_rc_lap_out->i4_L0_qp = i4_min_error_hevc_qp;
+
+ i4_hevc_frame_qp = i4_min_error_hevc_qp;
+ if(2 == i4_is_no_model_scd)
+ {
+ /* SGI & Enc Loop Parallelism related changes*/
+
+ /*model reset not required if it is first frame*/
+ if(ps_rc_ctxt->i4_is_first_frame_encoded && !i4_fade_scene &&
+ !ps_rc_ctxt->ai4_I_model_only_reset[i4_enc_frm_id] &&
+ !ps_rc_ctxt->ai4_is_non_I_scd_pic[i4_enc_frm_id] &&
+ !ps_rc_ctxt->ai4_is_pause_to_resume[i4_enc_frm_id] &&
+ !ps_rc_ctxt->ai4_is_cmplx_change_reset_model[i4_enc_frm_id])
+ {
+ ps_rc_ctxt->ai4_is_frame_scd[i4_enc_frm_id] = 1;
+ /*reset all pic type is first frame encoded flag*/
+
+ ASSERT(pic_type == IV_IDR_FRAME || pic_type == IV_I_FRAME);
+ }
+ else if(ps_rc_ctxt->ai4_I_model_only_reset[i4_enc_frm_id])
+ {
+ rc_reset_first_frame_coded_flag(ps_rc_ctxt->rc_hdl, I_PIC);
+ ASSERT(rc_pic_type == I_PIC);
+ ASSERT(ps_rc_ctxt->ai4_is_non_I_scd_pic[i4_enc_frm_id] == 0);
+ }
+ else if(
+ ps_rc_ctxt->ai4_is_non_I_scd_pic[i4_enc_frm_id] ||
+ ps_rc_ctxt->ai4_is_pause_to_resume[i4_enc_frm_id] ||
+ ps_rc_ctxt->ai4_is_cmplx_change_reset_model[i4_enc_frm_id] || i4_fade_scene)
+ {
+ /*Only when there are back to back scene cuts we need a non- Ipic will be marked as scene cut*/
+ /* Same path can also be followed during pause to resume detection to determine cur frame qp however handling during update is different*/
+ WORD32 i4_prev_qp, i, i4_new_qp_hevc_qp, I_hevc_qp, cur_hevc_qp;
+
+ /*both cannot be set at same time since lap cannot mark same frame as both scene cut and pause to resume flag*/
+ ASSERT(
+ (ps_rc_ctxt->ai4_is_non_I_scd_pic[i4_enc_frm_id] &&
+ ps_rc_ctxt->ai4_is_pause_to_resume[i4_enc_frm_id]) == 0);
+
+ I_hevc_qp = i4_hevc_frame_qp;
+
+ /*alter ai4_prev_pic_hevc_qp so that qp restriction ll not let even other pictures temporary scd are thrashed*/
+ //if(ps_rc_lap_out->i4_rc_temporal_lyr_id != ps_rc_ctxt->i4_max_temporal_lyr)
+ {
+ if(ps_rc_ctxt->i4_field_pic == 0)
+ {
+ for(i = 1; i < ps_rc_ctxt->i4_num_active_pic_type; i++)
+ {
+ i4_prev_qp = ps_rc_ctxt->ai4_prev_pic_hevc_qp[i4_scene_num][i];
+ i4_new_qp_hevc_qp = I_hevc_qp + i;
+ i4_new_qp_hevc_qp = ihevce_clip_min_max_qp(
+ ps_rc_ctxt, i4_new_qp_hevc_qp, (picture_type_e)i, i - 1);
+ if(i4_prev_qp < i4_new_qp_hevc_qp)
+ {
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[i4_scene_num][i] =
+ i4_new_qp_hevc_qp;
+ }
+ }
+ }
+ else
+ { /*field case*/
+
+ for(i = 1; i < ps_rc_ctxt->i4_num_active_pic_type; i++)
+ {
+ i4_prev_qp = ps_rc_ctxt->ai4_prev_pic_hevc_qp[i4_scene_num][i];
+ i4_new_qp_hevc_qp = I_hevc_qp + i;
+ i4_new_qp_hevc_qp = ihevce_clip_min_max_qp(
+ ps_rc_ctxt, i4_new_qp_hevc_qp, (picture_type_e)i, i - 1);
+ if(i4_prev_qp < i4_new_qp_hevc_qp)
+ {
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[i4_scene_num][i] =
+ i4_new_qp_hevc_qp;
+ }
+
+ i4_prev_qp =
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[i4_scene_num][i + FIELD_OFFSET];
+ i4_new_qp_hevc_qp = I_hevc_qp + i;
+ i4_new_qp_hevc_qp = ihevce_clip_min_max_qp(
+ ps_rc_ctxt, i4_new_qp_hevc_qp, (picture_type_e)i, i - 1);
+ if(i4_prev_qp < i4_new_qp_hevc_qp)
+ {
+ ps_rc_ctxt
+ ->ai4_prev_pic_hevc_qp[i4_scene_num][i + FIELD_OFFSET] =
+ i4_new_qp_hevc_qp;
+ }
+ }
+ }
+ }
+ {
+ WORD32 i4_updated_qp = ps_rc_ctxt->ai4_prev_pic_hevc_qp[i4_scene_num][i];
+ WORD32 i4_scale;
+
+ if(I_hevc_qp == i4_updated_qp)
+ i4_scale = 16;
+ else if(I_hevc_qp == (i4_updated_qp - 1))
+ i4_scale = 14;
+ else if(I_hevc_qp == (i4_updated_qp - 2))
+ i4_scale = 12;
+ else
+ i4_scale = 10;
+
+ *pi4_tot_bits_estimated = (i4_scale * (*pi4_tot_bits_estimated)) >> 4;
+ i4_estimate_to_calc_frm_error =
+ (i4_scale * i4_estimate_to_calc_frm_error) >> 4;
+ }
+ if(call_type == ENC_GET_QP)
+ {
+ ps_rc_lap_out->i8_est_text_bits = *pi4_tot_bits_estimated;
+ }
+ ASSERT(*pi4_tot_bits_estimated != 0);
+
+ /*use previous frame qp of same pic type or SCD i frame qp with offset whichever is maximum*/
+ /*For field case adding of grater than 4 results in the qp increasing greatly when compared to previous pics/fields*/
+ if(rc_pic_type <= FIELD_OFFSET)
+ cur_hevc_qp = I_hevc_qp + rc_pic_type;
+ else
+ cur_hevc_qp = I_hevc_qp + (rc_pic_type - FIELD_OFFSET);
+
+ i4_prev_qp = ps_rc_ctxt->ai4_prev_pic_hevc_qp[i4_scene_num][rc_pic_type];
+
+ if((cur_hevc_qp < i4_prev_qp) && (ps_rc_ctxt->i4_num_active_pic_type > 2) &&
+ (is_first_frame_coded(ps_rc_ctxt->rc_hdl)) && (!i4_fade_scene))
+ {
+ cur_hevc_qp = i4_prev_qp;
+ }
+ i4_frame_qp =
+ ihevce_rc_get_scaled_mpeg2_qp(cur_hevc_qp, ps_rc_ctxt->ps_rc_quant_ctxt);
+ i4_hevc_frame_qp = cur_hevc_qp;
+ //ps_rc_ctxt->i4_is_non_I_scd_pic = 0;
+
+ rc_reset_first_frame_coded_flag(ps_rc_ctxt->rc_hdl, rc_pic_type);
+ }
+ else
+ {}
+ }
+ if((1 == i4_is_no_model_scd) && (call_type == ENC_GET_QP))
+ {
+ WORD32 i4_clip_QP;
+ i4_frame_qp_q6 =
+ clip_qp_based_on_prev_ref(ps_rc_ctxt->rc_hdl, rc_pic_type, 1, i4_scene_num);
+ i4_clip_QP =
+ ihevce_rc_get_scaled_hevce_qp_q6(i4_frame_qp_q6, ps_rc_ctxt->u1_bit_depth);
+ if(ps_rc_ctxt->i4_rc_pass != 2)
+ {
+ i4_hevc_frame_qp = i4_clip_QP;
+ }
+ if((rc_pic_type == P_PIC) || (rc_pic_type == P1_PIC))
+ {
+ *pi4_tot_bits_estimated = (*pi4_tot_bits_estimated * 11) >> 4; /* P picture*/
+ i4_estimate_to_calc_frm_error = (i4_estimate_to_calc_frm_error * 11) >> 4;
+ }
+ else if((rc_pic_type == B_PIC) || (rc_pic_type == BB_PIC))
+ {
+ *pi4_tot_bits_estimated = (*pi4_tot_bits_estimated * 9) >> 4; /* B layer 1*/
+ i4_estimate_to_calc_frm_error = (i4_estimate_to_calc_frm_error * 9) >> 4;
+ }
+ else if((rc_pic_type == B1_PIC) || (rc_pic_type == B11_PIC))
+ {
+ *pi4_tot_bits_estimated = (*pi4_tot_bits_estimated * 7) >> 4; /* B layer 2*/
+ i4_estimate_to_calc_frm_error = (i4_estimate_to_calc_frm_error * 7) >> 4;
+ }
+ else if((rc_pic_type == B2_PIC) || (rc_pic_type == B22_PIC))
+ {
+ *pi4_tot_bits_estimated = (*pi4_tot_bits_estimated * 5) >> 4; /* B layer 3*/
+ i4_estimate_to_calc_frm_error = (i4_estimate_to_calc_frm_error * 5) >> 4;
+ }
+ }
+ rc_add_est_tot(ps_rc_ctxt->rc_hdl, *pi4_tot_bits_estimated);
+ }
+
+ ASSERT(i4_hevc_frame_qp >= -ps_rc_ctxt->ps_rc_quant_ctxt->i1_qp_offset);
+
+ /*constraint qp swing based on neighbour frames*/
+ if(is_first_frame_coded(ps_rc_ctxt->rc_hdl))
+ {
+ if(ps_rc_ctxt->i4_field_pic == 0)
+ {
+ /*In dissolve case the p frame comes before an I pic and ref b comes after then what
+ happens is b frame qp is restricted by the p frame qp so changed it to prev ref pic type*/
+ if(rc_pic_type != I_PIC && rc_pic_type != P_PIC)
+ {
+ if(ps_rc_lap_out->i4_rc_temporal_lyr_id == 1)
+ {
+ picture_type_e prev_ref_pic_type =
+ rc_getprev_ref_pic_type(ps_rc_ctxt->rc_hdl);
+
+ if(i4_hevc_frame_qp >
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[i4_scene_num][prev_ref_pic_type] + 3)
+ {
+ if(ps_rc_ctxt->ai4_prev_pic_hevc_qp[i4_scene_num][prev_ref_pic_type] >
+ 0)
+ i4_hevc_frame_qp =
+ ps_rc_ctxt
+ ->ai4_prev_pic_hevc_qp[i4_scene_num][prev_ref_pic_type] +
+ 3;
+ }
+ }
+ else if(
+ i4_hevc_frame_qp >
+ (ps_rc_ctxt->ai4_prev_pic_hevc_qp[i4_scene_num][rc_pic_type - 1] + 3))
+ {
+ /*allow max of +3 compared to previous frame*/
+ if(ps_rc_ctxt->ai4_prev_pic_hevc_qp[i4_scene_num][rc_pic_type - 1] > 0)
+ i4_hevc_frame_qp =
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[i4_scene_num][rc_pic_type - 1] + 3;
+ }
+ }
+
+ if((rc_pic_type != I_PIC && rc_pic_type != P_PIC) &&
+ (i4_hevc_frame_qp <
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[i4_scene_num][rc_pic_type - 1]))
+ {
+ i4_hevc_frame_qp =
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[i4_scene_num][rc_pic_type - 1];
+ }
+
+ /** Force non-ref B pic qp to be ref_B_PIC_qp - 1. This is not valid for when max teporla later is less than 2*/
+ if(temporal_layer_id == ps_rc_ctxt->i4_max_temporal_lyr &&
+ ps_rc_ctxt->i4_max_temporal_lyr > 1)
+ {
+ i4_hevc_frame_qp =
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[i4_scene_num][rc_pic_type - 1] + 1;
+ }
+ }
+ else /*for field case*/
+ {
+ if(ps_rc_lap_out->i4_rc_temporal_lyr_id >= 1)
+ {
+ /*To make the comparison of qp with the top field's of previous layer tempor layer id matches with the pic type. */
+ if(i4_hevc_frame_qp >
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[i4_scene_num]
+ [ps_rc_lap_out->i4_rc_temporal_lyr_id] +
+ 3)
+ {
+ /*allow max of +3 compared to previous frame*/
+ if(0 <
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[i4_scene_num]
+ [ps_rc_lap_out->i4_rc_temporal_lyr_id])
+ i4_hevc_frame_qp =
+ ps_rc_ctxt
+ ->ai4_prev_pic_hevc_qp[i4_scene_num]
+ [ps_rc_lap_out->i4_rc_temporal_lyr_id] +
+ 3;
+ }
+ if(i4_hevc_frame_qp <
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[i4_scene_num]
+ [ps_rc_lap_out->i4_rc_temporal_lyr_id])
+ {
+ i4_hevc_frame_qp =
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[i4_scene_num]
+ [ps_rc_lap_out->i4_rc_temporal_lyr_id];
+ }
+
+ /** Force non-ref B pic qp to be ref_B_PIC_qp - 1. This is not valid for when max teporla later is less than 2*/
+ if(temporal_layer_id == ps_rc_ctxt->i4_max_temporal_lyr &&
+ ps_rc_ctxt->i4_max_temporal_lyr > 1)
+ {
+ i4_hevc_frame_qp =
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[i4_scene_num]
+ [ps_rc_lap_out->i4_rc_temporal_lyr_id] +
+ 1;
+ }
+ }
+ /** At lower range qp swing for same pic type is also imposed to make sure
+ qp does not fall from 10 to 4 since they differ by only one q scale*/
+ }
+ }
+
+ /**clip to min qp which is user configurable*/
+ i4_hevc_frame_qp = ihevce_clip_min_max_qp(
+ ps_rc_ctxt, i4_hevc_frame_qp, rc_pic_type, ps_rc_lap_out->i4_rc_temporal_lyr_id);
+
+#if 1 //FRAME_PARALLEL_LVL
+ ps_rc_ctxt->i4_est_text_bits_ctr_get_qp++; //ELP_RC
+ ps_rc_ctxt->i4_est_text_bits_ctr_get_qp =
+ (ps_rc_ctxt->i4_est_text_bits_ctr_get_qp % (ps_rc_ctxt->i4_num_frame_parallel));
+#endif
+ /** the estimates are reset only duing enc call*/
+
+#if USE_USER_FIRST_FRAME_QP
+ /*I_PIC check is necessary coz pre-enc can query for qp even before first frame update has happened*/
+ if(!ps_rc_ctxt->i4_is_first_frame_encoded && rc_pic_type == I_PIC)
+ {
+ i4_hevc_frame_qp = ps_rc_ctxt->i4_init_frame_qp_user;
+ DBG_PRINTF("FIXED START QP PATH *************************\n");
+ }
+#endif
+ }
+
+ if(CONST_QP != e_rc_type)
+ {
+ ASSERT(*pi4_tot_bits_estimated != 0);
+ }
+
+ ps_rc_ctxt->s_rc_high_lvl_stat.i4_finalQP = i4_hevc_frame_qp;
+ if(ps_rc_lap_out->i4_is_model_valid)
+ {
+ get_bits_for_final_qp(
+ ps_rc_ctxt->rc_hdl,
+ &ps_rc_ctxt->s_rc_high_lvl_stat.i4_modelQP,
+ &ps_rc_ctxt->s_rc_high_lvl_stat.i4_maxEbfQP,
+ &ps_rc_ctxt->s_rc_high_lvl_stat.i8_bits_from_finalQP,
+ i4_hevc_frame_qp,
+ ihevce_rc_get_scaled_mpeg2_qp_q6(
+ i4_hevc_frame_qp + ps_rc_ctxt->ps_rc_quant_ctxt->i1_qp_offset,
+ ps_rc_ctxt->u1_bit_depth),
+ i4_cur_est_header_bits,
+ i4_est_tex_bits,
+ i4_vbv_buf_max_bits,
+ rc_pic_type,
+ ps_rc_lap_out->i4_rc_display_num);
+ }
+ i4_deltaQP = ihevce_ebf_based_rc_correction_to_avoid_overflow(
+ ps_rc_ctxt, ps_rc_lap_out, pi4_tot_bits_estimated);
+ i4_hevc_frame_qp += i4_deltaQP;
+
+ /**clip to min qp which is user configurable*/
+ i4_hevc_frame_qp = ihevce_clip_min_max_qp(
+ ps_rc_ctxt, i4_hevc_frame_qp, rc_pic_type, ps_rc_lap_out->i4_rc_temporal_lyr_id);
+
+ /*set estimate status for frame level error calculation*/
+ if(i4_estimate_to_calc_frm_error > 0)
+ {
+ rc_set_estimate_status(
+ ps_rc_ctxt->rc_hdl,
+ i4_estimate_to_calc_frm_error - ps_rc_ctxt->i8_est_I_pic_header_bits,
+ ps_rc_ctxt->i8_est_I_pic_header_bits,
+ ps_rc_ctxt->i4_est_text_bits_ctr_get_qp);
+ }
+ else
+ {
+ rc_set_estimate_status(
+ ps_rc_ctxt->rc_hdl,
+ -1,
+ ps_rc_ctxt->i8_est_I_pic_header_bits,
+ ps_rc_ctxt->i4_est_text_bits_ctr_get_qp);
+ }
+
+ ps_rc_lap_out->i8_est_text_bits = *pi4_tot_bits_estimated;
+
+ /*B pictures which are in fades will take the highest QP of either side of P pics*/
+ if(ps_rc_lap_out->i4_rc_pic_type == IV_B_FRAME &&
+ (ps_rc_lap_out->i4_rc_scene_type == SCENE_TYPE_FADE_IN ||
+ ps_rc_lap_out->i4_rc_scene_type == SCENE_TYPE_FADE_OUT))
+ {
+ i4_hevc_frame_qp =
+ MAX(ps_rc_ctxt->ai4_last_tw0_lyr0_pic_qp[0], ps_rc_ctxt->ai4_last_tw0_lyr0_pic_qp[1]);
+ }
+
+ /*saving the last two pics of layer 0*/
+ if(0 == ps_rc_lap_out->i4_rc_temporal_lyr_id)
+ {
+ ps_rc_ctxt->ai4_last_tw0_lyr0_pic_qp[1] = ps_rc_ctxt->ai4_last_tw0_lyr0_pic_qp[0];
+ ps_rc_ctxt->ai4_last_tw0_lyr0_pic_qp[0] = i4_hevc_frame_qp;
+ }
+
+ return i4_hevc_frame_qp;
+}
+
+/*##########################################################*/
+/******* END OF ENC THRD QP QUERY FUNCTIONS ****************/
+/*########################################################*/
+
+/*####################################################*/
+/******* START OF I2AVG RATIO FUNCTIONS **************/
+/*##################################################*/
+
+/**
+******************************************************************************
+*
+* @brief function to get i_to_avg_rest at scene cut frame based on data available from LAP
+*
+* @par Description
+*
+* @param[in] pv_rc_ctxt
+* void pointer to rc ctxt
+* @param[in] ps_rc_lap_out : pointer to lap out structure
+* @param[in] i4_update_delay : The Delay in the update. This can happen for dist. case!
+* All decision should consider this delay for updation!
+* @return WORD32 i_to_rest bit ratio
+*
+******************************************************************************
+*/
+float ihevce_get_i_to_avg_ratio(
+ void *pv_rc_ctxt,
+ rc_lap_out_params_t *ps_rc_lap_out,
+ WORD32 i_to_p_qp_offset,
+ WORD32 i4_offset_flag,
+ WORD32 i4_call_type,
+ WORD32 ai4_qp_offsets[4],
+ WORD32 i4_update_delay)
+{
+ rc_context_t *ps_rc_ctxt = (rc_context_t *)pv_rc_ctxt;
+ WORD32 i = 0, k = 0, num_frames_in_lap[MAX_PIC_TYPE] = { 0 }, ai4_pic_dist[MAX_PIC_TYPE],
+ ai4_pic_dist_in_cur_gop[MAX_PIC_TYPE] = { 0 };
+ WORD32 i4_num_b, i4_num_frms_traversed_in_lap = 0, total_frms_considered = 0,
+ i4_flag_i_frame_exit = 0, u4_rc_scene_number;
+ rc_lap_out_params_t *ps_cur_rc_lap_out = ps_rc_lap_out;
+
+ rc_lap_out_params_t *ps_cur_rc_lap_out_I = ps_rc_lap_out;
+ double complexity[MAX_PIC_TYPE] = { 0 }, d_first_i_complexity = 0, d_first_p_complexity = 0.0f,
+ cur_lambda_modifer, den = 0, average_intra_complexity = 0;
+ double i_frm_lambda_modifier;
+ float i_to_rest_bit_ratio = 8.00;
+ picture_type_e curr_rc_pic_type;
+ LWORD64 i8_l1_analysis_lap_comp = 0;
+ WORD32 i4_intra_frame_interval = rc_get_intra_frame_interval(ps_rc_ctxt->rc_hdl);
+ UWORD32 u4_L1_based_lap_complexity_q7 = 0;
+ WORD32 i4_frame_qp = 0, i4_I_frame_qp = 0;
+
+ WORD32 ai4_lambda_offsets[5] = { -3, -2, 2, 6, 7 };
+ /* The window for which your update is guaranteed */
+ WORD32 updated_window = ps_rc_ctxt->i4_num_frame_in_lap_window - i4_update_delay;
+
+ ASSERT(ps_rc_ctxt->i4_rc_pass != 2);
+ rc_get_pic_distribution(ps_rc_ctxt->rc_hdl, &ai4_pic_dist[0]);
+
+ if(ps_rc_ctxt->i4_max_temporal_lyr)
+ {
+ i4_num_b = ((WORD32)pow((float)2, ps_rc_ctxt->i4_max_temporal_lyr)) - 1;
+ }
+ else
+ {
+ i4_num_b = 0;
+ }
+ i_frm_lambda_modifier = ihevce_get_frame_lambda_modifier((WORD8)I_PIC, 0, 1, 1, i4_num_b);
+ /* check should be wrt inter frame interval*/
+ /*If lap frames are not sufficient return default ratio*/
+ u4_rc_scene_number = ps_cur_rc_lap_out_I->u4_rc_scene_num;
+
+ if(updated_window < 4)
+ {
+ return i_to_rest_bit_ratio;
+ }
+
+ k = 0;
+ if(ps_cur_rc_lap_out != NULL)
+ {
+ WORD32 i4_temp_frame_qp;
+
+ if(ps_cur_rc_lap_out->i4_L0_qp == -1)
+ {
+ i4_frame_qp = ps_cur_rc_lap_out->i4_L1_qp;
+ i4_I_frame_qp = ps_cur_rc_lap_out->i4_L1_qp - 3;
+ }
+ else
+ {
+ i4_frame_qp = ps_cur_rc_lap_out->i4_L0_qp;
+ i4_I_frame_qp = ps_cur_rc_lap_out->i4_L0_qp - 3;
+ }
+
+ do
+ {
+ curr_rc_pic_type = ihevce_rc_conv_pic_type(
+ (IV_PICTURE_CODING_TYPE_T)ps_cur_rc_lap_out->i4_rc_pic_type,
+ ps_rc_ctxt->i4_field_pic,
+ ps_cur_rc_lap_out->i4_rc_temporal_lyr_id,
+ ps_cur_rc_lap_out->i4_is_bottom_field,
+ ps_rc_ctxt->i4_top_field_first);
+ cur_lambda_modifer = ihevce_get_frame_lambda_modifier(
+ (WORD8)curr_rc_pic_type,
+ ps_cur_rc_lap_out->i4_rc_temporal_lyr_id,
+ 1,
+ ps_cur_rc_lap_out->i4_rc_is_ref_pic,
+ i4_num_b);
+ if(curr_rc_pic_type == I_PIC)
+ {
+ i4_temp_frame_qp = i4_frame_qp + ai4_lambda_offsets[curr_rc_pic_type];
+ }
+ else
+ {
+ i4_temp_frame_qp =
+ i4_frame_qp + ai4_lambda_offsets[ps_cur_rc_lap_out->i4_rc_temporal_lyr_id + 1];
+ i4_temp_frame_qp =
+ i4_temp_frame_qp +
+ ps_cur_rc_lap_out->ai4_offsets[ps_cur_rc_lap_out->i4_rc_temporal_lyr_id + 1];
+ }
+
+ i4_temp_frame_qp = CLIP3(i4_temp_frame_qp, 1, 51);
+ i4_I_frame_qp = CLIP3(i4_I_frame_qp, 1, 51);
+
+ if(curr_rc_pic_type == I_PIC)
+ {
+ complexity[I_PIC] += (double)ps_cur_rc_lap_out->ai8_pre_intra_sad[i4_I_frame_qp];
+ if(total_frms_considered == 0)
+ d_first_i_complexity =
+ (double)ps_cur_rc_lap_out->ai8_pre_intra_sad[i4_I_frame_qp];
+
+ num_frames_in_lap[I_PIC]++;
+ i8_l1_analysis_lap_comp +=
+ (LWORD64)(1.17 * ps_cur_rc_lap_out->i8_raw_pre_intra_sad);
+ }
+ else
+ {
+ if((num_frames_in_lap[P_PIC] == 0) && (curr_rc_pic_type == P_PIC))
+ d_first_p_complexity =
+ (double)ps_cur_rc_lap_out->ai8_pre_intra_sad[i4_I_frame_qp];
+
+ if(total_frms_considered == 0)
+ {
+ num_frames_in_lap[I_PIC]++;
+ {
+ complexity[I_PIC] +=
+ (double)ps_cur_rc_lap_out->ai8_pre_intra_sad[i4_I_frame_qp];
+ d_first_i_complexity =
+ (double)ps_cur_rc_lap_out->ai8_pre_intra_sad[i4_I_frame_qp];
+ }
+ }
+ else
+ {
+ /*SAD is scaled according the lambda parametrs use to make it proportional to bits consumed in the end*/
+#if !USE_SQRT
+ //complexity[curr_rc_pic_type] += (double)(MIN(ps_cur_rc_lap_out->ai8_frame_acc_coarse_me_sad[i4_temp_frame_qp],ps_cur_rc_lap_out->i8_pre_intra_sad)/(/*(cur_lambda_modifer/i_frm_lambda_modifier) * */pow(1.125,(ps_rc_lap_out->i4_rc_temporal_lyr_id + 1/*i_to_p_qp_offset*/))));
+ if((curr_rc_pic_type > P_PIC) &&
+ (ps_rc_lap_out->i4_rc_quality_preset == IHEVCE_QUALITY_P6))
+ complexity[curr_rc_pic_type] +=
+ (double)(ps_cur_rc_lap_out->ai8_frame_acc_coarse_me_sad
+ [i4_temp_frame_qp]); // /(/*(cur_lambda_modifer/i_frm_lambda_modifier) * */pow(1.125,(ps_rc_lap_out->i4_rc_temporal_lyr_id + 1/*i_to_p_qp_offset*/))));
+ else
+ complexity[curr_rc_pic_type] += (double)(MIN(
+ ps_cur_rc_lap_out->ai8_frame_acc_coarse_me_sad[i4_temp_frame_qp],
+ ps_cur_rc_lap_out->ai8_pre_intra_sad
+ [i4_temp_frame_qp])); ///(/*(cur_lambda_modifer/i_frm_lambda_modifier) * */pow(1.125,(ps_rc_lap_out->i4_rc_temporal_lyr_id + 1/*i_to_p_qp_offset*/))));
+
+#else
+ complexity[curr_rc_pic_type] +=
+ MIN(ps_cur_rc_lap_out->ai8_frame_acc_coarse_me_sad[i4_temp_frame_qp],
+ ps_cur_rc_lap_out->i8_pre_intra_sad) /
+ (sqrt(cur_lambda_modifer / i_frm_lambda_modifier) *
+ pow(1.125, (ps_rc_lap_out->i4_rc_temporal_lyr_id + 1)));
+#endif
+ num_frames_in_lap[curr_rc_pic_type]++;
+ }
+ i8_l1_analysis_lap_comp += (LWORD64)(
+ (float)ps_cur_rc_lap_out->i8_raw_l1_coarse_me_sad /
+ pow(1.125, curr_rc_pic_type));
+ }
+
+ if(ps_rc_lap_out->i4_rc_quality_preset == IHEVCE_QUALITY_P6)
+ {
+ if(curr_rc_pic_type < B_PIC)
+ {
+ /*accumulate average intra sad*/
+ average_intra_complexity +=
+ ps_cur_rc_lap_out
+ ->ai8_pre_intra_sad[i4_I_frame_qp] /*/i_frm_lambda_modifier*/;
+ i4_num_frms_traversed_in_lap++;
+ }
+ }
+ else
+ {
+ /*accumulate average intra sad*/
+ average_intra_complexity +=
+ ps_cur_rc_lap_out->ai8_pre_intra_sad[i4_I_frame_qp] /*/i_frm_lambda_modifier*/;
+ i4_num_frms_traversed_in_lap++;
+ }
+
+ ai4_pic_dist_in_cur_gop[curr_rc_pic_type]++;
+ i++;
+ total_frms_considered++;
+ i4_num_frms_traversed_in_lap++;
+ ps_cur_rc_lap_out = (rc_lap_out_params_t *)ps_cur_rc_lap_out->ps_rc_lap_out_next_encode;
+
+ if((ps_cur_rc_lap_out == NULL) ||
+ ((total_frms_considered + k) == i4_intra_frame_interval) || (i >= updated_window))
+ {
+ break;
+ }
+
+ if((i >= (ps_rc_ctxt->i4_next_sc_i_in_rc_look_ahead - k) ||
+ (ps_cur_rc_lap_out->i4_rc_pic_type == IV_I_FRAME) ||
+ (ps_cur_rc_lap_out->i4_rc_pic_type == IV_IDR_FRAME)) &&
+ (i4_offset_flag == 1))
+ {
+ break;
+ }
+ /*If an I frame enters the lookahead it can cause bit allocation to go bad
+ if corresponding p/b frames are absent*/
+ if(((total_frms_considered + k) > (WORD32)(0.75f * i4_intra_frame_interval)) &&
+ ((ps_cur_rc_lap_out->i4_rc_pic_type == IV_I_FRAME) ||
+ (ps_cur_rc_lap_out->i4_rc_pic_type == IV_IDR_FRAME)))
+ {
+ i4_flag_i_frame_exit = 1;
+ break;
+ }
+
+ } while(1);
+
+ if(total_frms_considered > 0)
+ {
+ float lap_L1_comp =
+ (float)i8_l1_analysis_lap_comp /
+ (total_frms_considered * ps_rc_ctxt->i4_frame_height * ps_rc_ctxt->i4_frame_width);
+
+ lap_L1_comp = rc_get_offline_normalized_complexity(
+ ps_rc_ctxt->u4_intra_frame_interval,
+ ps_rc_ctxt->i4_frame_height * ps_rc_ctxt->i4_frame_width,
+ lap_L1_comp,
+ ps_rc_ctxt->i4_rc_pass);
+
+ u4_L1_based_lap_complexity_q7 = (WORD32)((lap_L1_comp * (1 << 7)) + .05f);
+ }
+ else
+ {
+ u4_L1_based_lap_complexity_q7 = 25;
+ }
+
+ if(i4_call_type == 1)
+ {
+ if(num_frames_in_lap[0] > 0)
+ {
+ float f_curr_i_to_sum = (float)(d_first_i_complexity / complexity[0]);
+ f_curr_i_to_sum = CLIP3(f_curr_i_to_sum, 0.1f, 100.0f);
+ rc_set_i_to_sum_api_ba(ps_rc_ctxt->rc_hdl, f_curr_i_to_sum);
+ }
+ }
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ if(num_frames_in_lap[i] > 0)
+ {
+ complexity[i] = complexity[i] / num_frames_in_lap[i];
+ }
+ }
+ /*for non - I scd case it is possible that entire LAP window might not have intra picture. Consider average intra sad when
+ atleast one I pic is not available*/
+ if(num_frames_in_lap[I_PIC] == 0)
+ {
+ ASSERT(i4_num_frms_traversed_in_lap);
+ complexity[I_PIC] = average_intra_complexity / i4_num_frms_traversed_in_lap;
+ }
+ /*get picture type distribution in LAP*/
+ if(num_frames_in_lap[I_PIC] == 0)
+ {
+ rc_get_pic_distribution(ps_rc_ctxt->rc_hdl, &ai4_pic_dist[0]);
+ }
+ else
+ {
+ memmove(ai4_pic_dist, num_frames_in_lap, sizeof(WORD32) * MAX_PIC_TYPE);
+ }
+
+ {
+ WORD32 num_inter_pic = 0;
+ for(i = 1; i < MAX_PIC_TYPE; i++)
+ {
+ den += complexity[i] * ai4_pic_dist[i];
+ }
+
+ for(i = 1; i < MAX_PIC_TYPE; i++)
+ {
+ num_inter_pic += ai4_pic_dist[i];
+ }
+ if(num_inter_pic > 0)
+ den = den / num_inter_pic;
+ else
+ den = 0.0;
+ }
+
+ if(den > 0)
+ i_to_rest_bit_ratio = (float)((complexity[I_PIC]) / den);
+ else
+ i_to_rest_bit_ratio = 15;
+
+ if((total_frms_considered < (WORD32)(0.75f * i4_intra_frame_interval)) &&
+ (total_frms_considered < (updated_window - 1)) &&
+ ((UWORD32)total_frms_considered < ((ps_rc_ctxt->u4_max_frame_rate / 1000))))
+ {
+ /*This GOP will only sustain for few frames hence have strict restriction for I to rest ratio*/
+ if(i_to_rest_bit_ratio > 12)
+ i_to_rest_bit_ratio = 12;
+
+ if(i_to_rest_bit_ratio > 8 &&
+ total_frms_considered < (ps_rc_ctxt->i4_max_inter_frm_int * 2))
+ i_to_rest_bit_ratio = 8;
+ }
+ }
+
+ if((i4_call_type == 1) && (i_to_rest_bit_ratio < I_TO_REST_VVFAST) && (i4_offset_flag == 1))
+ {
+ float f_p_to_i_ratio = (float)(d_first_p_complexity / d_first_i_complexity);
+ if(ps_rc_lap_out->i8_frame_satd_act_accum <
+ (ps_rc_ctxt->i4_frame_height * ps_rc_ctxt->i4_frame_width * 1.5f))
+ rc_set_p_to_i_complexity_ratio(ps_rc_ctxt->rc_hdl, f_p_to_i_ratio);
+ }
+
+ /*Reset the pic distribution if I frame exit was encountered*/
+
+ if(ps_rc_ctxt->e_rate_control_type != CONST_QP)
+ {
+ rc_get_pic_distribution(ps_rc_ctxt->rc_hdl, &ai4_pic_dist[0]);
+ if((ai4_pic_dist_in_cur_gop[I_PIC] > 1) && (ai4_pic_dist[0] == 1))
+ {
+ i4_flag_i_frame_exit = 1;
+ }
+ if(i4_flag_i_frame_exit && (i4_call_type == 1))
+ {
+ if(ai4_pic_dist_in_cur_gop[I_PIC] == 0)
+ memmove(ai4_pic_dist_in_cur_gop, num_frames_in_lap, sizeof(WORD32) * MAX_PIC_TYPE);
+
+ rc_update_pic_distn_lap_to_rc(ps_rc_ctxt->rc_hdl, ai4_pic_dist_in_cur_gop);
+ rc_set_bits_based_on_complexity(
+ ps_rc_ctxt->rc_hdl, u4_L1_based_lap_complexity_q7, total_frms_considered);
+ }
+ }
+
+ return i_to_rest_bit_ratio;
+}
+
+/*##################################################*/
+/******* END OF I2AVG RATIO FUNCTIONS **************/
+/*################################################*/
+
+/*#########################################################*/
+/******* START OF QSCALE CONVERSION FUNCTIONS *************/
+/*########################################################*/
+
+/**
+******************************************************************************
+*
+* @brief function to convert from qscale to qp
+*
+* @par Description
+* @param[in] i4_frame_qs_q3 : QP value in qscale
+* return frame qp
+******************************************************************************
+*/
+
+WORD32 ihevce_rc_get_scaled_hevc_qp_from_qs_q3(WORD32 i4_frame_qs_q3, rc_quant_t *ps_rc_quant_ctxt)
+{
+ if(i4_frame_qs_q3 > ps_rc_quant_ctxt->i2_max_qscale)
+ {
+ i4_frame_qs_q3 = ps_rc_quant_ctxt->i2_max_qscale;
+ }
+ else if(i4_frame_qs_q3 < ps_rc_quant_ctxt->i2_min_qscale)
+ {
+ i4_frame_qs_q3 = ps_rc_quant_ctxt->i2_min_qscale;
+ }
+
+ return (ps_rc_quant_ctxt->pi4_qscale_to_qp[i4_frame_qs_q3]);
+}
+
+/**
+******************************************************************************
+*
+* @brief function to convert from qp to qscale
+*
+* @par Description
+* @param[in] i4_frame_qp : QP value
+* return value in qscale
+******************************************************************************
+*/
+WORD32 ihevce_rc_get_scaled_mpeg2_qp(WORD32 i4_frame_qp, rc_quant_t *ps_rc_quant_ctxt)
+{
+ //i4_frame_qp = i4_frame_qp >> 3; // Q3 format is mantained for accuarate calc at lower qp
+ WORD32 i4_qscale;
+ if(i4_frame_qp > ps_rc_quant_ctxt->i2_max_qp)
+ {
+ i4_frame_qp = ps_rc_quant_ctxt->i2_max_qp;
+ }
+ else if(i4_frame_qp < ps_rc_quant_ctxt->i2_min_qp)
+ {
+ i4_frame_qp = ps_rc_quant_ctxt->i2_min_qp;
+ }
+
+ i4_qscale = (ps_rc_quant_ctxt->pi4_qp_to_qscale[i4_frame_qp + ps_rc_quant_ctxt->i1_qp_offset] +
+ (1 << (QSCALE_Q_FAC_3 - 1))) >>
+ QSCALE_Q_FAC_3;
+ return i4_qscale;
+}
+
+/**
+******************************************************************************
+*
+* @brief function to convert from qp to qscale
+*
+* @par Description : This function maps logarithmic QP values to linear QP
+* values. The linear values are represented in Q6 format.
+*
+* @param[in] i4_frame_qp : QP value (log scale)
+*
+* @return value in QP (linear scale)
+*
+******************************************************************************
+*/
+WORD32 ihevce_rc_get_scaled_mpeg2_qp_q6(WORD32 i4_frame_qp, UWORD8 u1_bit_depth)
+{
+ WORD32 i4_frame_qp_q6;
+ number_t s_frame_qp;
+ float f_qp;
+
+ (void)u1_bit_depth;
+ ASSERT(i4_frame_qp >= 0);
+ ASSERT(i4_frame_qp <= 51 + ((u1_bit_depth - 8) * 6));
+ f_qp = (float)pow((float)2, ((float)(i4_frame_qp - 4) / 6));
+ convert_float_to_fix(f_qp, &s_frame_qp);
+ convert_varq_to_fixq(s_frame_qp, &i4_frame_qp_q6, QSCALE_Q_FAC);
+
+ if(i4_frame_qp_q6 < (1 << QSCALE_Q_FAC))
+ i4_frame_qp_q6 = 1 << QSCALE_Q_FAC;
+
+ return i4_frame_qp_q6;
+}
+
+/**
+******************************************************************************
+*
+* @brief function to convert from qscale to qp
+*
+* @par Description
+* @param[in] i4_frame_qp_q6 : QP value in qscale. the input is assumed to be in q6 format
+* return frame qp
+******************************************************************************
+*/
+WORD32 ihevce_rc_get_scaled_hevce_qp_q6(WORD32 i4_frame_qp_q6, UWORD8 u1_bit_depth)
+{
+ WORD32 i4_hevce_qp;
+ number_t s_hevce_qp, s_temp;
+ float f_mpeg2_qp, f_hevce_qp;
+ f_mpeg2_qp = (float)i4_frame_qp_q6 / (1 << QSCALE_Q_FAC);
+ f_hevce_qp = (6 * ((float)log(f_mpeg2_qp) / (float)log((float)2))) + 4;
+ convert_float_to_fix(f_hevce_qp, &s_hevce_qp);
+
+ /*rounf off to nearest integer*/
+ s_temp.sm = 1;
+ s_temp.e = 1;
+ add32_var_q(s_hevce_qp, s_temp, &s_hevce_qp);
+ number_t_to_word32(s_hevce_qp, &i4_hevce_qp);
+ if(i4_frame_qp_q6 == 0)
+ {
+ i4_hevce_qp = 0;
+ }
+
+ i4_hevce_qp -= ((u1_bit_depth - 8) * 6);
+
+ return i4_hevce_qp;
+}
+
+/**
+******************************************************************************
+*
+* @brief function to convert from qp scale to qp
+*
+* @par Description : This function maps linear QP values to logarithimic QP
+* values. The linear values are represented in Q3 format.
+*
+* @param[in] i4_frame_qp : QP value (linear scale, Q3 mode)
+*
+* @return value in QP (log scale)
+*
+******************************************************************************
+*/
+WORD32 ihevce_rc_get_scaled_hevce_qp_q3(WORD32 i4_frame_qp, UWORD8 u1_bit_depth)
+{
+ WORD32 i4_hevce_qp;
+ number_t s_hevce_qp, s_temp;
+
+ if(i4_frame_qp == 0)
+ {
+ i4_hevce_qp = 0;
+ }
+ else
+ {
+ float f_mpeg2_qp, f_hevce_qp;
+
+ f_mpeg2_qp = (float)i4_frame_qp;
+ f_hevce_qp = (6 * ((float)log(f_mpeg2_qp) / (float)log((float)2) - 3)) + 4;
+ convert_float_to_fix(f_hevce_qp, &s_hevce_qp);
+
+ /*rounf off to nearest integer*/
+ s_temp.sm = 1;
+ s_temp.e = 1;
+ add32_var_q(s_hevce_qp, s_temp, &s_hevce_qp);
+ number_t_to_word32(s_hevce_qp, &i4_hevce_qp);
+ }
+ i4_hevce_qp -= ((u1_bit_depth - 8) * 6);
+
+ return i4_hevce_qp;
+}
+
+/*#######################################################*/
+/******* END OF QSCALE CONVERSION FUNCTIONS *************/
+/*######################################################*/
+
+/*###############################################*/
+/******* START OF SET,GET FUNCTIONS *************/
+/*#############################################*/
+
+/**
+******************************************************************************
+*
+* @brief Convert pic type to rc pic type
+*
+* @par Description
+*
+*
+* @param[in] pic_type
+* Pic type
+*
+* @return rc_pic_type
+*
+******************************************************************************
+*/
+picture_type_e ihevce_rc_conv_pic_type(
+ IV_PICTURE_CODING_TYPE_T pic_type,
+ WORD32 i4_field_pic,
+ WORD32 i4_temporal_layer_id,
+ WORD32 i4_is_bottom_field,
+ WORD32 i4_top_field_first)
+{
+ picture_type_e rc_pic_type = pic_type;
+ /*interlaced pictype are not supported*/
+ if(pic_type > 9 && i4_temporal_layer_id > 3) /**/
+ {
+ DBG_PRINTF("unsupported picture type or temporal id\n");
+ exit(0);
+ }
+
+ if(i4_field_pic == 0) /*Progressive Source*/
+ {
+ if(pic_type == IV_IDR_FRAME)
+ {
+ rc_pic_type = I_PIC;
+ }
+ else
+ {
+ rc_pic_type = (picture_type_e)pic_type;
+
+ /*return different picture type based on temporal layer*/
+ if(i4_temporal_layer_id > 1)
+ {
+ rc_pic_type = (picture_type_e)(pic_type + (i4_temporal_layer_id - 1));
+ }
+ }
+ }
+
+ else if(i4_field_pic == 1)
+ {
+ if(pic_type == IV_IDR_FRAME || pic_type == IV_I_FRAME)
+ {
+ rc_pic_type = I_PIC;
+ }
+
+ else if(i4_top_field_first == 1)
+ {
+ rc_pic_type = (picture_type_e)pic_type;
+
+ if(i4_temporal_layer_id <= 1)
+
+ {
+ if(i4_is_bottom_field == 1)
+ rc_pic_type = (picture_type_e)(pic_type + 4);
+ }
+ /*return different picture type based on temporal layer*/
+ if(i4_temporal_layer_id > 1)
+ {
+ if(i4_is_bottom_field == 0)
+ rc_pic_type = (picture_type_e)(pic_type + (i4_temporal_layer_id - 1));
+ else
+ rc_pic_type = (picture_type_e)(
+ pic_type + (i4_temporal_layer_id - 1) +
+ 4); /*Offset of 4 for the bottomfield*/
+ }
+ }
+ else if(i4_top_field_first == 0)
+ {
+ rc_pic_type = (picture_type_e)pic_type;
+
+ if(i4_temporal_layer_id <= 1)
+ {
+ if(i4_is_bottom_field == 1)
+ rc_pic_type = (picture_type_e)(pic_type + 4);
+ }
+ /*return different picture type based on temporal layer*/
+ if(i4_temporal_layer_id > 1)
+ {
+ if(i4_is_bottom_field == 0)
+ rc_pic_type = (picture_type_e)(pic_type + (i4_temporal_layer_id - 1));
+ else
+ rc_pic_type = (picture_type_e)(
+ pic_type + (i4_temporal_layer_id - 1) + 4); /*Offset of 4 for the topfield*/
+ }
+ }
+ }
+
+ return rc_pic_type;
+}
+
+/**
+******************************************************************************
+*
+* @brief function to update current frame intra cost
+*
+* @par Description
+* @param[inout] ps_rc_ctxt
+* @param[in] i8_cur_frm_intra_cost
+******************************************************************************
+*/
+void ihevce_rc_update_cur_frm_intra_satd(
+ void *pv_ctxt, LWORD64 i8_cur_frm_intra_cost, WORD32 i4_enc_frm_id)
+{
+ rc_context_t *ps_rc_ctxt = (rc_context_t *)pv_ctxt;
+ ps_rc_ctxt->ai8_cur_frm_intra_cost[i4_enc_frm_id] = i8_cur_frm_intra_cost;
+}
+/**
+******************************************************************************
+*
+* @brief function to return scene type
+*
+* @par Description
+* @param[inout] ps_rc_lap_out
+* @return i4_rc_scene_type
+******************************************************************************
+*/
+/* Functions dependent on lap input*/
+WORD32 ihevce_rc_lap_get_scene_type(rc_lap_out_params_t *ps_rc_lap_out)
+{
+ return (WORD32)ps_rc_lap_out->i4_rc_scene_type;
+}
+
+/**
+******************************************************************************
+*
+* @name ihevce_rc_get_pic_param
+*
+* @par Description
+*
+* @param[in] rc_pic_type
+*
+* @return void
+*
+******************************************************************************
+*/
+static void ihevce_rc_get_pic_param(
+ picture_type_e rc_pic_type, WORD32 *pi4_tem_lyr, WORD32 *pi4_is_bottom_field)
+{
+ /*bottom field determination*/
+ if(rc_pic_type >= P1_PIC)
+ *pi4_is_bottom_field = 1;
+ else
+ *pi4_is_bottom_field = 0;
+
+ /*temporal lyr id determination*/
+ if(rc_pic_type == I_PIC || rc_pic_type == P_PIC || rc_pic_type == P1_PIC)
+ {
+ *pi4_tem_lyr = 0;
+ }
+ else if(rc_pic_type == B_PIC || rc_pic_type == BB_PIC)
+ {
+ *pi4_tem_lyr = 1;
+ }
+ else if(rc_pic_type == B1_PIC || rc_pic_type == B11_PIC)
+ {
+ *pi4_tem_lyr = 2;
+ }
+ else if(rc_pic_type == B2_PIC || rc_pic_type == B22_PIC)
+ {
+ *pi4_tem_lyr = 3;
+ }
+ else
+ {
+ ASSERT(0);
+ }
+}
+/**
+******************************************************************************
+*
+* @name ihevce_get_offline_index
+*
+* @par Description
+*
+* @param[in] ps_rc_ctxt - pointer to rc context
+*
+* @return index
+*
+******************************************************************************
+*/
+static WORD32 ihevce_get_offline_index(rc_context_t *ps_rc_ctxt, WORD32 i4_num_pels_in_frame)
+{
+ WORD32 i4_rc_quality_preset = ps_rc_ctxt->i4_quality_preset;
+ WORD32 base = 1;
+ if(i4_num_pels_in_frame > 5000000) /*ultra HD*/
+ {
+ base = 0;
+ }
+ else if(i4_num_pels_in_frame > 1500000) /*Full HD*/
+ {
+ base = 5;
+ }
+ else if(i4_num_pels_in_frame > 600000) /*720p*/
+ {
+ base = 10;
+ }
+ else /*SD*/
+ {
+ base = 15;
+ }
+ /*based on preset choose coeff*/
+ if(i4_rc_quality_preset == IHEVCE_QUALITY_P0) /*Pristine quality*/
+ {
+ return base;
+ }
+ else if(i4_rc_quality_preset == IHEVCE_QUALITY_P2) /*High quality*/
+ {
+ return base + 1;
+ }
+ else if(
+ (i4_rc_quality_preset == IHEVCE_QUALITY_P5) ||
+ (i4_rc_quality_preset == IHEVCE_QUALITY_P6)) /*Extreme speed */
+ {
+ return base + 4;
+ }
+ else if(i4_rc_quality_preset == IHEVCE_QUALITY_P4) /*High speed */
+ {
+ return base + 3;
+ }
+ else if(i4_rc_quality_preset == IHEVCE_QUALITY_P3) /*default assume Medium speed*/
+ {
+ return base + 2;
+ }
+ else
+ {
+ ASSERT(0);
+ }
+ return base + 2;
+}
+
+/**
+******************************************************************************
+*
+* @name ihevce_get_frame_lambda_modifier
+*
+* @par Description
+*
+* @param[in] pic_type
+* i4_rc_temporal_lyr_id
+* @param[in] i4_first_field
+* @param[in] i4_rc_is_ref_pic
+* @return lambda_modifier
+*
+******************************************************************************
+*/
+static double ihevce_get_frame_lambda_modifier(
+ WORD8 pic_type,
+ WORD32 i4_rc_temporal_lyr_id,
+ WORD32 i4_first_field,
+ WORD32 i4_rc_is_ref_pic,
+ WORD32 i4_num_b_frms)
+{
+ double lambda_modifier;
+ WORD32 num_b_frms = i4_num_b_frms, first_field = i4_first_field;
+
+ if(I_PIC == pic_type)
+ {
+ double temporal_correction_islice = 1.0 - 0.05 * num_b_frms;
+ temporal_correction_islice = MAX(0.5, temporal_correction_islice);
+
+ lambda_modifier = 0.57 * temporal_correction_islice;
+ }
+ else if(P_PIC == pic_type)
+ {
+ if(first_field)
+ lambda_modifier = 0.442; //0.442*0.8;
+ else
+ lambda_modifier = 0.442;
+
+ //lambda_modifier *= pow(2.00,(double)(1.00/3.00));
+ }
+ else
+ {
+ /* BSLICE */
+ if(1 == i4_rc_is_ref_pic)
+ {
+ lambda_modifier = 0.3536;
+ }
+ else if(2 == i4_rc_is_ref_pic)
+ {
+ lambda_modifier = 0.45;
+ }
+ else
+ {
+ lambda_modifier = 0.68;
+ }
+
+ /* TODO: Disable lambda modification for interlace encode to match HM runs */
+ //if(0 == ps_enc_ctxt->s_runtime_src_prms.i4_field_pic)
+ {
+ /* modify b lambda further based on temporal id */
+ if(i4_rc_temporal_lyr_id)
+ {
+ lambda_modifier *= 3.00;
+ }
+ }
+ //lambda_modifier *= pow(2.00,(double)((1.00/3.00) * (i4_rc_temporal_lyr_id + 1)));
+ }
+
+ /* modify the base lambda according to lambda modifier */
+ lambda_modifier = sqrt(lambda_modifier);
+ return lambda_modifier;
+}
+
+/*!
+******************************************************************************
+* \if Function name : get_avg_bitrate_bufsize
+*
+* \brief
+*
+* \param[in] *pv_ctxt -> rc context
+*
+* \return
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void get_avg_bitrate_bufsize(void *pv_ctxt, LWORD64 *pi8_bitrate, LWORD64 *pi8_ebf)
+{
+ rc_context_t *ps_rc_ctxt = (rc_context_t *)pv_ctxt;
+ *pi8_bitrate = rc_get_bit_rate(ps_rc_ctxt->rc_hdl);
+ *pi8_ebf = rc_get_vbv_buf_size(ps_rc_ctxt->rc_hdl);
+}
+
+/**
+******************************************************************************
+*
+* @name ihevce_get_dbf_buffer_size
+*
+* @par Description
+*
+* @param[in] ps_rc_ctxt - pointer to rc context
+*
+* @return qp
+*
+******************************************************************************
+*/
+void ihevce_get_dbf_buffer_size(
+ void *pv_rc_ctxt, UWORD32 *pi4_buffer_size, UWORD32 *pi4_dbf, UWORD32 *pi4_bit_rate)
+{
+ rc_context_t *ps_rc_ctxt = (rc_context_t *)pv_rc_ctxt;
+
+ pi4_buffer_size[0] = (WORD32)ps_rc_ctxt->s_vbv_compliance.f_buffer_size;
+ pi4_dbf[0] = (WORD32)(ps_rc_ctxt->s_vbv_compliance.f_curr_buffer_level);
+ ASSERT(
+ ps_rc_ctxt->s_vbv_compliance.f_buffer_size >=
+ ps_rc_ctxt->s_vbv_compliance.f_curr_buffer_level);
+
+ pi4_bit_rate[0] = (WORD32)ps_rc_ctxt->s_vbv_compliance.f_bit_rate;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_set_L0_scd_qp
+*
+* \brief
+*
+* \param[in] *pv_ctxt -> rc context
+*
+* \return
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_set_L0_scd_qp(void *pv_rc_ctxt, WORD32 i4_scd_qp)
+{
+ rc_context_t *ps_rc_ctxt = (rc_context_t *)pv_rc_ctxt;
+
+ ps_rc_ctxt->i4_L0_frame_qp = i4_scd_qp;
+}
+
+/**
+******************************************************************************
+*
+* @name rc_get_buffer_level_unclip
+*
+* @par Description
+*
+* @param[in] pv_rc_ctxt
+*
+*
+* @return void
+*
+******************************************************************************
+*/
+float rc_get_buffer_level_unclip(void *pv_rc_ctxt)
+{
+ rc_context_t *ps_rc_ctxt = (rc_context_t *)pv_rc_ctxt;
+ return (ps_rc_ctxt->s_vbv_compliance.f_curr_buffer_level_unclip);
+}
+
+/**
+******************************************************************************
+*
+* @brief Clip QP based on min and max frame qp
+*
+* @par Description
+*
+* @param[inout] ps_rc_ctxt
+* pointer to rc context
+*
+* @param[in] rc_pic_type
+* Pic type
+*
+* @return i4_hevc_frame_qp
+*
+******************************************************************************
+*/
+static WORD32 ihevce_clip_min_max_qp(
+ rc_context_t *ps_rc_ctxt,
+ WORD32 i4_hevc_frame_qp,
+ picture_type_e rc_pic_type,
+ WORD32 i4_rc_temporal_lyr_id)
+{
+ ASSERT(i4_rc_temporal_lyr_id >= 0);
+ /**clip to min qp which is user configurable*/
+ if(rc_pic_type == I_PIC && i4_hevc_frame_qp < ps_rc_ctxt->i4_min_frame_qp)
+ {
+ i4_hevc_frame_qp = ps_rc_ctxt->i4_min_frame_qp;
+ }
+ else if(rc_pic_type == P_PIC && i4_hevc_frame_qp < (ps_rc_ctxt->i4_min_frame_qp + 1))
+ {
+ i4_hevc_frame_qp = ps_rc_ctxt->i4_min_frame_qp + 1;
+ }
+ else if(i4_hevc_frame_qp < (ps_rc_ctxt->i4_min_frame_qp + i4_rc_temporal_lyr_id + 1))
+ {
+ /** For B frame max qp is set based on temporal reference*/
+ i4_hevc_frame_qp = ps_rc_ctxt->i4_min_frame_qp + i4_rc_temporal_lyr_id + 1;
+ }
+ /* clip the Qp to MAX QP */
+ if(i4_hevc_frame_qp < ps_rc_ctxt->ps_rc_quant_ctxt->i2_min_qp)
+ {
+ i4_hevc_frame_qp = ps_rc_ctxt->ps_rc_quant_ctxt->i2_min_qp;
+ }
+ /**clip to max qp based on pic type*/
+ if(rc_pic_type == I_PIC && i4_hevc_frame_qp > ps_rc_ctxt->i4_max_frame_qp)
+ {
+ i4_hevc_frame_qp = ps_rc_ctxt->i4_max_frame_qp;
+ }
+ else if(rc_pic_type == P_PIC && i4_hevc_frame_qp > (ps_rc_ctxt->i4_max_frame_qp + 1))
+ {
+ i4_hevc_frame_qp = ps_rc_ctxt->i4_max_frame_qp + 1;
+ }
+ else if(i4_hevc_frame_qp > (ps_rc_ctxt->i4_max_frame_qp + i4_rc_temporal_lyr_id + 1))
+ {
+ /** For B frame max qp is set based on temporal reference*/
+ i4_hevc_frame_qp = ps_rc_ctxt->i4_max_frame_qp + i4_rc_temporal_lyr_id + 1;
+ }
+ /* clip the Qp to MAX QP */
+ if(i4_hevc_frame_qp > ps_rc_ctxt->ps_rc_quant_ctxt->i2_max_qp)
+ {
+ i4_hevc_frame_qp = ps_rc_ctxt->ps_rc_quant_ctxt->i2_max_qp;
+ }
+ return i4_hevc_frame_qp;
+}
+
+/*#############################################*/
+/******* END OF SET,GET FUNCTIONS *************/
+/*###########################################*/
+
+/*#################################################*/
+/******* START OF RC UPDATE FUNCTIONS **************/
+/*#################################################*/
+
+/**
+******************************************************************************
+*
+* @brief updates the picture level information like bits consumed and
+*
+* @par Description
+*
+* @param[inout] ps_mem_tab
+* pointer to memory descriptors table
+*
+* @param[in] ps_init_prms
+* Create time static parameters
+*
+* @return void
+*
+******************************************************************************
+*/
+
+void ihevce_rc_update_pic_info(
+ void *pv_ctxt,
+ UWORD32 u4_total_bits_consumed,
+ UWORD32 u4_total_header_bits,
+ UWORD32 u4_frame_sad,
+ UWORD32 u4_frame_intra_sad,
+ IV_PICTURE_CODING_TYPE_T pic_type,
+ WORD32 i4_avg_frame_hevc_qp,
+ WORD32 i4_suppress_bpic_update,
+ WORD32 *pi4_qp_normalized_8x8_cu_sum,
+ WORD32 *pi4_8x8_cu_sum,
+ LWORD64 *pi8_sad_by_qscale,
+ ihevce_lap_output_params_t *ps_lap_out,
+ rc_lap_out_params_t *ps_rc_lap_out,
+ WORD32 i4_buf_id,
+ UWORD32 u4_open_loop_intra_sad,
+ LWORD64 i8_total_ssd_frame,
+ WORD32 i4_enc_frm_id)
+{
+ LWORD64 a_mb_type_sad[2];
+ WORD32 a_mb_type_tex_bits[2];
+ /*dummy variables not used*/
+ WORD32 a_mb_in_type[2] = { 0, 0 };
+ LWORD64 a_mb_type_qp_q6[2] = { 0, 0 };
+ /*qp accumulation at */
+ WORD32 i4_avg_activity = 250; //hardcoding to usual value
+ WORD32 i4_intra_cost, i4_avg_frame_qp_q6, i;
+ rc_context_t *ps_rc_ctxt = (rc_context_t *)pv_ctxt;
+ WORD32 i4_frame_complexity, i4_bits_to_be_stuffed = 0, i4_is_last_frm_period = 0;
+ picture_type_e rc_pic_type = ihevce_rc_conv_pic_type(
+ pic_type,
+ ps_rc_ctxt->i4_field_pic,
+ ps_rc_lap_out->i4_rc_temporal_lyr_id,
+ ps_rc_lap_out->i4_is_bottom_field,
+ ps_rc_ctxt->i4_top_field_first);
+ frame_info_t s_frame_info;
+ WORD32 i4_ctr = -1, i4_i, i4_j;
+ WORD32 i4_scene_num = ps_rc_lap_out->u4_rc_scene_num % MAX_SCENE_NUM;
+
+ /*update bit consumption. used only in rdopt*/
+ //ASSERT(ps_rc_ctxt->ai4_rdopt_bit_consumption_estimate[ps_rc_ctxt->i4_rdopt_bit_count] == -1);
+ //ASSERT(i4_buf_id>=0);
+ ps_rc_ctxt->ai4_rdopt_bit_consumption_estimate[ps_rc_ctxt->i4_rdopt_bit_count] =
+ u4_total_bits_consumed;
+ ps_rc_ctxt->ai4_rdopt_bit_consumption_buf_id[ps_rc_ctxt->i4_rdopt_bit_count] = i4_buf_id;
+ ps_rc_ctxt->i4_rdopt_bit_count =
+ (ps_rc_ctxt->i4_rdopt_bit_count + 1) % NUM_BUF_RDOPT_ENT_CORRECT;
+
+ {
+ LWORD64 i8_texture_bits = u4_total_bits_consumed - u4_total_header_bits;
+ ps_rc_lap_out->i4_use_offline_model_2pass = 0;
+
+ /*flag to guide whether 2nd pass can use offline model or not*/
+ if((abs(ps_rc_lap_out->i4_orig_rc_qp - i4_avg_frame_hevc_qp) < 2) &&
+ (i8_texture_bits <= (ps_rc_lap_out->i8_est_text_bits * 2.0f)) &&
+ (i8_texture_bits >= (ps_rc_lap_out->i8_est_text_bits * 0.5f)))
+
+ {
+ ps_rc_lap_out->i4_use_offline_model_2pass = 1;
+ }
+ }
+ /*Counter of number of bit alloction periods*/
+ if(rc_pic_type == I_PIC)
+ ps_rc_ctxt
+ ->i8_num_bit_alloc_period++; //Currently only I frame periods are considerd as bit allocation period (Ignoring non- I scd and complexity reset flag
+ /*initialze frame info*/
+ init_frame_info(&s_frame_info);
+ s_frame_info.i4_rc_hevc_qp = i4_avg_frame_hevc_qp;
+ s_frame_info.i4_num_entries++;
+ s_frame_info.i8_L1_me_sad = ps_rc_lap_out->i8_raw_l1_coarse_me_sad;
+ s_frame_info.i8_L1_ipe_raw_sad = ps_rc_lap_out->i8_raw_pre_intra_sad;
+ s_frame_info.i4_num_entries++;
+ s_frame_info.i4_num_entries++;
+ s_frame_info.i8_L0_open_cost = (LWORD64)u4_open_loop_intra_sad;
+ s_frame_info.i4_num_entries++;
+
+ if(rc_pic_type == I_PIC)
+ s_frame_info.i8_L1_me_or_ipe_raw_sad = ps_rc_lap_out->i8_raw_pre_intra_sad;
+ else
+ s_frame_info.i8_L1_me_or_ipe_raw_sad = ps_rc_lap_out->i8_raw_l1_coarse_me_sad;
+ s_frame_info.i4_num_entries++;
+ s_frame_info.i4_poc = ps_rc_lap_out->i4_rc_poc;
+ s_frame_info.i4_num_entries++;
+ s_frame_info.i4_scene_type = ps_rc_lap_out->i4_rc_scene_type;
+ s_frame_info.i4_num_entries++;
+ s_frame_info.i4_non_i_scd = ps_rc_lap_out->i4_is_non_I_scd || ps_rc_lap_out->i4_is_I_only_scd;
+ s_frame_info.i4_num_entries++;
+ s_frame_info.i8_cl_sad = u4_frame_sad;
+ s_frame_info.i4_num_entries++;
+ s_frame_info.i8_header_bits = u4_total_header_bits;
+ s_frame_info.i4_num_entries++;
+ s_frame_info.i8_tex_bits = u4_total_bits_consumed - u4_total_header_bits;
+ s_frame_info.i4_num_entries++;
+ s_frame_info.e_pic_type = rc_pic_type;
+ s_frame_info.i4_num_entries++;
+ s_frame_info.i8_est_texture_bits = ps_rc_lap_out->i8_est_text_bits;
+ s_frame_info.i4_num_entries++;
+ s_frame_info.i4_lap_complexity_q7 = ps_rc_ctxt->ai4_lap_complexity_q7[i4_enc_frm_id];
+ s_frame_info.i4_num_entries++;
+ s_frame_info.i4_lap_f_sim = ps_rc_ctxt->ai4_lap_f_sim[i4_enc_frm_id];
+ s_frame_info.i4_num_entries++;
+ s_frame_info.i8_frame_acc_coarse_me_cost = ps_rc_lap_out->i8_frame_acc_coarse_me_cost;
+ s_frame_info.i4_num_entries++;
+ s_frame_info.i_to_avg_bit_ratio = ps_rc_ctxt->ai_to_avg_bit_ratio[i4_enc_frm_id];
+ s_frame_info.i4_num_entries++;
+ s_frame_info.i4_num_scd_in_lap_window = ps_rc_ctxt->ai4_num_scd_in_lap_window[i4_enc_frm_id];
+ s_frame_info.i4_num_entries++;
+ s_frame_info.i4_num_frames_b4_scd = ps_rc_ctxt->ai4_num_frames_b4_scd[i4_enc_frm_id];
+ s_frame_info.i4_num_entries++;
+ s_frame_info.i8_num_bit_alloc_period = ps_rc_ctxt->i8_num_bit_alloc_period;
+ s_frame_info.i4_num_entries++;
+ s_frame_info.i1_is_complexity_based_bits_reset =
+ (WORD8)ps_rc_lap_out->i4_is_cmplx_change_reset_bits;
+ s_frame_info.i4_num_entries++;
+ /*For the complexity based movement in 2nd pass*/
+ memmove(
+ (void *)s_frame_info.af_sum_weigh,
+ ps_rc_lap_out->ps_frame_info->af_sum_weigh,
+ sizeof(float) * MAX_PIC_TYPE * 3);
+ s_frame_info.i4_num_entries++;
+
+ /*store frame qp to clip qp accordingly*/
+ if(ps_rc_lap_out->i4_is_rc_model_needs_to_be_updated)
+ {
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[i4_scene_num][rc_pic_type] = i4_avg_frame_hevc_qp;
+ }
+
+ for(i4_i = 0; i4_i < MAX_NON_REF_B_PICS_IN_QUEUE_SGI; i4_i++)
+ {
+ if(ps_rc_lap_out->u4_rc_scene_num == ps_rc_ctxt->au4_prev_scene_num_multi_scene[i4_i])
+ {
+ i4_ctr = i4_i;
+ break;
+ }
+ }
+ if(-1 == i4_ctr)
+ {
+ ps_rc_ctxt->i4_prev_qp_ctr++;
+ ps_rc_ctxt->i4_prev_qp_ctr = ps_rc_ctxt->i4_prev_qp_ctr % MAX_NON_REF_B_PICS_IN_QUEUE_SGI;
+ i4_ctr = ps_rc_ctxt->i4_prev_qp_ctr;
+ ps_rc_ctxt->au4_prev_scene_num_multi_scene[i4_ctr] = ps_rc_lap_out->u4_rc_scene_num;
+ for(i4_j = 0; i4_j < MAX_PIC_TYPE; i4_j++)
+ {
+ ps_rc_ctxt->ai4_qp_for_previous_scene_multi_scene[i4_ctr][i4_j] = 0;
+ }
+ }
+
+ {
+ ps_rc_ctxt->ai4_qp_for_previous_scene_multi_scene[i4_ctr][rc_pic_type] =
+ i4_avg_frame_hevc_qp;
+ }
+ if(i4_scene_num < HALF_MAX_SCENE_ARRAY_QP)
+ {
+ WORD32 i4_i;
+ ps_rc_ctxt->ai4_scene_numbers[i4_scene_num + HALF_MAX_SCENE_ARRAY_QP] = 0;
+ for(i4_i = 0; i4_i < MAX_PIC_TYPE; i4_i++)
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[i4_scene_num + HALF_MAX_SCENE_ARRAY_QP][i4_i] =
+ INIT_HEVCE_QP_RC;
+ }
+ else
+ {
+ WORD32 i4_i;
+ ps_rc_ctxt->ai4_scene_numbers[i4_scene_num - HALF_MAX_SCENE_ARRAY_QP] = 0;
+ for(i4_i = 0; i4_i < MAX_PIC_TYPE; i4_i++)
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[i4_scene_num - HALF_MAX_SCENE_ARRAY_QP][i4_i] =
+ INIT_HEVCE_QP_RC;
+ }
+
+ /*update will have HEVC qp, convert it back to mpeg2 range qp for all internal calculations of RC*/
+
+ i4_avg_frame_qp_q6 = ps_rc_ctxt->ps_rc_quant_ctxt->pi4_qp_to_qscale_q_factor
+ [i4_avg_frame_hevc_qp + ps_rc_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
+
+ if(pic_type == IV_I_FRAME || pic_type == IV_IDR_FRAME)
+ {
+ /*TODO : Take care of precision of a_mb_type_sad*/
+ a_mb_type_sad[0] =
+ (((pi8_sad_by_qscale[1] * i4_avg_frame_qp_q6) +
+ (((LWORD64)1) << (SAD_BY_QSCALE_Q + QSCALE_Q_FAC - 1))) >>
+ (SAD_BY_QSCALE_Q + QSCALE_Q_FAC)); //u4_frame_sad;
+
+ a_mb_type_sad[1] =
+ (((pi8_sad_by_qscale[0] * i4_avg_frame_qp_q6) +
+ (((LWORD64)1) << (SAD_BY_QSCALE_Q + QSCALE_Q_FAC - 1))) >>
+ (SAD_BY_QSCALE_Q + QSCALE_Q_FAC));
+ a_mb_type_tex_bits[0] =
+ u4_total_bits_consumed - u4_total_header_bits; //(u4_total_bits_consumed >> 3);
+ a_mb_type_tex_bits[1] = 0;
+ a_mb_in_type[0] = (ps_rc_ctxt->i4_frame_height * ps_rc_ctxt->i4_frame_width) >> 8;
+ a_mb_in_type[1] = 0;
+ }
+ else
+ {
+ /*TODO : Take care of precision of a_mb_type_sad*/
+ a_mb_type_sad[1] =
+ (((pi8_sad_by_qscale[0] * i4_avg_frame_qp_q6) +
+ (((LWORD64)1) << (SAD_BY_QSCALE_Q + QSCALE_Q_FAC - 1))) >>
+ (SAD_BY_QSCALE_Q + QSCALE_Q_FAC));
+
+ a_mb_type_tex_bits[0] =
+ u4_total_bits_consumed - u4_total_header_bits; //(u4_total_bits_consumed >> 3);
+ a_mb_type_sad[0] =
+ (((pi8_sad_by_qscale[1] * i4_avg_frame_qp_q6) +
+ (((LWORD64)1) << (SAD_BY_QSCALE_Q + QSCALE_Q_FAC - 1))) >>
+ (SAD_BY_QSCALE_Q + QSCALE_Q_FAC)); //u4_frame_sad;
+ a_mb_type_tex_bits[1] =
+ u4_total_bits_consumed - u4_total_header_bits; //(u4_total_bits_consumed >> 3);
+ a_mb_type_tex_bits[0] = 0;
+ a_mb_in_type[1] = (ps_rc_ctxt->i4_frame_height * ps_rc_ctxt->i4_frame_width) >> 8;
+ a_mb_in_type[0] = 0;
+ }
+ ASSERT(a_mb_type_sad[0] >= 0);
+ ASSERT(a_mb_type_sad[1] >= 0);
+ /*THis calclates sum of Qps of all MBs as per the corresponding mb type*/
+ /*THis is different from a_mb_in_type,a_mb_type_sad and a_mb_type_tex_bits*/
+ a_mb_type_qp_q6[0] = ((LWORD64)i4_avg_frame_qp_q6) * a_mb_in_type[0];
+ a_mb_type_qp_q6[1] = ((LWORD64)i4_avg_frame_qp_q6) * a_mb_in_type[1];
+ {
+ WORD32 i4_avg_qp_q6_without_offset = 0, i4_hevc_qp_rc = i4_avg_frame_hevc_qp;
+ WORD32 i4_rc_pic_type_rc_for_offset = rc_pic_type;
+ if(i4_rc_pic_type_rc_for_offset > B2_PIC)
+ i4_rc_pic_type_rc_for_offset = i4_rc_pic_type_rc_for_offset - B2_PIC;
+ i4_hevc_qp_rc = i4_hevc_qp_rc - ps_rc_lap_out->ai4_offsets[i4_rc_pic_type_rc_for_offset] +
+ ps_rc_ctxt->ps_rc_quant_ctxt->i1_qp_offset;
+
+ i4_hevc_qp_rc =
+ CLIP3(i4_hevc_qp_rc, 1, MAX_HEVC_QP + ps_rc_ctxt->ps_rc_quant_ctxt->i1_qp_offset);
+ i4_avg_qp_q6_without_offset =
+ ps_rc_ctxt->ps_rc_quant_ctxt->pi4_qp_to_qscale_q_factor[i4_hevc_qp_rc];
+
+ /*Store the HBD qscale with and without accounting for offset*/
+ s_frame_info.f_hbd_q_scale_without_offset =
+ (float)i4_avg_qp_q6_without_offset / (1 << QSCALE_Q_FAC);
+ s_frame_info.f_hbd_q_scale = (float)i4_avg_frame_qp_q6 / (1 << QSCALE_Q_FAC);
+ s_frame_info.i4_num_entries++;
+ s_frame_info.i4_num_entries++;
+
+ /*Store the 8 bit qscale with and without accounting for offset*/
+ /*Can be useful for pre-enc stage*/
+ if(ps_rc_ctxt->ps_rc_quant_ctxt->i1_qp_offset != 0)
+ {
+ s_frame_info.f_8bit_q_scale_without_offset =
+ s_frame_info.f_hbd_q_scale_without_offset / (1 << (ps_rc_ctxt->u1_bit_depth - 8));
+ s_frame_info.f_8bit_q_scale =
+ s_frame_info.f_hbd_q_scale / (1 << (ps_rc_ctxt->u1_bit_depth - 8));
+ }
+ else
+ {
+ s_frame_info.f_8bit_q_scale_without_offset = s_frame_info.f_hbd_q_scale_without_offset;
+ s_frame_info.f_8bit_q_scale = s_frame_info.f_hbd_q_scale;
+ }
+ s_frame_info.i4_num_entries++;
+ s_frame_info.i4_num_entries++;
+ }
+
+ /*making intra cost same as ssd as of now*/
+ i4_intra_cost = u4_frame_intra_sad;
+
+ /* Handling bits stuffing and skips */
+ {
+ WORD32 i4_num_bits_to_prevent_vbv_underflow;
+ vbv_buf_status_e vbv_buffer_status;
+ vbv_buffer_status = get_buffer_status(
+ ps_rc_ctxt->rc_hdl,
+ u4_total_bits_consumed,
+ rc_pic_type, //the picture type convention is different in buffer handling
+ &i4_num_bits_to_prevent_vbv_underflow);
+
+ if(vbv_buffer_status == VBV_UNDERFLOW)
+ {
+ }
+ if(vbv_buffer_status == VBV_OVERFLOW)
+ {
+ i4_bits_to_be_stuffed =
+ get_bits_to_stuff(ps_rc_ctxt->rc_hdl, u4_total_bits_consumed, rc_pic_type);
+ //i4_bits_to_be_stuffed = 0;/*STORAGE_RC*/
+ }
+ }
+ {
+ WORD32 ai4_sad[MAX_PIC_TYPE], i4_valid_sad_entry = 0;
+ UWORD32 u4_avg_sad = 0;
+
+ /*calculate frame complexity. Given same content frame complexity should not vary across I,P and Bpic. Hence frame complexity is calculated
+ based on average of all pic types SAD*/
+ if(rc_pic_type == I_PIC)
+ {
+ ai4_sad[I_PIC] = u4_frame_intra_sad;
+ }
+ else
+ {
+ /*call to get previous I-PIC sad*/
+ rc_get_sad(ps_rc_ctxt->rc_hdl, &ai4_sad[0]);
+ }
+
+ /*since intra sad is not available for every frame use previous I pic intra frame SAD*/
+ rc_put_sad(ps_rc_ctxt->rc_hdl, ai4_sad[I_PIC], u4_frame_sad, rc_pic_type);
+ rc_get_sad(ps_rc_ctxt->rc_hdl, &ai4_sad[0]);
+ /*for first few frame valid SAD is not available. This will make sure invalid data is not used*/
+ if(ps_rc_ctxt->i4_field_pic == 0)
+ {
+ for(i = 0; i < ps_rc_ctxt->i4_num_active_pic_type; i++)
+ {
+ if(ai4_sad[i] >= 0)
+ {
+ u4_avg_sad += ai4_sad[i];
+ i4_valid_sad_entry++;
+ }
+ }
+ }
+ else /*for field case*/
+ {
+ if(ai4_sad[0] >= 0)
+ {
+ u4_avg_sad += ai4_sad[0];
+ i4_valid_sad_entry++;
+ }
+
+ for(i = 1; i < ps_rc_ctxt->i4_num_active_pic_type; i++)
+ {
+ if(ai4_sad[i] >= 0)
+ {
+ u4_avg_sad += ai4_sad[i];
+ i4_valid_sad_entry++;
+ }
+
+ if(ai4_sad[i + FIELD_OFFSET] >= 0)
+ {
+ u4_avg_sad += ai4_sad[i + FIELD_OFFSET];
+ i4_valid_sad_entry++;
+ }
+ }
+ }
+
+ if(i4_valid_sad_entry > 0)
+ {
+ i4_frame_complexity =
+ (u4_avg_sad) /
+ (i4_valid_sad_entry * (ps_rc_ctxt->i4_frame_width * ps_rc_ctxt->i4_frame_height));
+ }
+ else
+ {
+ i4_frame_complexity = 1;
+ }
+ }
+ ASSERT(i4_frame_complexity >= 0);
+ /*I_model only reset In case of fade-in and fade-out*/
+ if(ps_rc_ctxt->ai4_I_model_only_reset[i4_enc_frm_id])
+ {
+ ASSERT(rc_pic_type == I_PIC);
+ rc_reset_pic_model(ps_rc_ctxt->rc_hdl, I_PIC);
+ ps_rc_ctxt->ai4_I_model_only_reset[i4_enc_frm_id] = 0;
+ }
+
+ /*check if next picture is I frame, both scene cuts and I pictures are treated as end of period*/
+ {
+ if(ps_rc_lap_out->i4_rc_pic_type != -1 && ps_rc_lap_out->i4_rc_scene_type != -1)
+ {
+ if(ps_rc_ctxt->u4_intra_frame_interval != 1)
+ {
+ /*TBD: For second pass this should be only criteria, While merging to latest verison make sure non - I SCD is not considered as one of the condition*/
+ i4_is_last_frm_period = (WORD32)(
+ ps_rc_lap_out->i4_next_pic_type == IV_IDR_FRAME ||
+ ps_rc_lap_out->i4_next_pic_type == IV_I_FRAME);
+ }
+ else
+ {
+ i4_is_last_frm_period =
+ (WORD32)(ps_rc_lap_out->i4_next_scene_type == SCENE_TYPE_SCENE_CUT);
+ }
+ }
+
+ /*In two pass only I frame ending should be considered end of period, otherwise complexity changes should be allowed to reset model in CBR and VBR modes*/
+ if(ps_rc_ctxt->i4_rc_pass != 2)
+ i4_is_last_frm_period = i4_is_last_frm_period ||
+ ps_rc_ctxt->ai4_is_cmplx_change_reset_bits[i4_enc_frm_id];
+ }
+
+#if 1 //FRAME_PARALLEL_LVL //ELP_RC
+ ps_rc_ctxt->i4_est_text_bits_ctr_update_qp++;
+ ps_rc_ctxt->i4_est_text_bits_ctr_update_qp =
+ (ps_rc_ctxt->i4_est_text_bits_ctr_update_qp % (ps_rc_ctxt->i4_num_frame_parallel));
+#endif
+
+ update_frame_level_info(
+ ps_rc_ctxt->rc_hdl,
+ rc_pic_type,
+ a_mb_type_sad,
+ u4_total_bits_consumed, /*total bits consumed by frame*/
+ u4_total_header_bits,
+ a_mb_type_tex_bits,
+ a_mb_type_qp_q6, /*sum of qp of all mb in frame, since no ctb level modulation*/
+ a_mb_in_type,
+ i4_avg_activity,
+ ps_rc_ctxt->ai4_is_frame_scd[i4_enc_frm_id], /*currenlty SCD is not enabled*/
+ 0, /*not a pre encode skip*/
+ i4_intra_cost,
+ 0,
+ ps_rc_lap_out
+ ->i4_ignore_for_rc_update, /*HEVC_hierarchy: do not supress update for non-ref B pic*/
+ i4_bits_to_be_stuffed,
+ (ps_rc_ctxt->ai4_is_pause_to_resume[i4_enc_frm_id] ||
+ ps_rc_ctxt->ai4_is_non_I_scd_pic[i4_enc_frm_id] ||
+ ps_rc_ctxt->ai4_is_cmplx_change_reset_model[i4_enc_frm_id]),
+ ps_rc_ctxt->ai4_lap_complexity_q7[i4_enc_frm_id],
+ i4_is_last_frm_period,
+ ps_rc_ctxt->ai4_is_cmplx_change_reset_bits[i4_enc_frm_id],
+ &s_frame_info,
+ ps_rc_lap_out->i4_is_rc_model_needs_to_be_updated,
+ ps_rc_ctxt->ps_rc_quant_ctxt->i1_qp_offset,
+ i4_scene_num,
+ ps_rc_ctxt->ai4_scene_numbers[i4_scene_num],
+ ps_rc_ctxt->i4_est_text_bits_ctr_update_qp);
+ /** reset flags valid for only one frame*/
+ ps_rc_ctxt->ai4_is_frame_scd[i4_enc_frm_id] = 0;
+ ps_rc_ctxt->ai4_is_pause_to_resume[i4_enc_frm_id] = 0;
+ ps_rc_ctxt->ai4_is_non_I_scd_pic[i4_enc_frm_id] = 0;
+ ps_rc_ctxt->ai4_is_cmplx_change_reset_model[i4_enc_frm_id] = 0;
+ ps_rc_ctxt->ai4_is_cmplx_change_reset_bits[i4_enc_frm_id] = 0;
+
+ ps_rc_ctxt->i4_is_first_frame_encoded = 1;
+
+ /** update the scene num for current frame*/
+ ps_rc_ctxt->au4_scene_num_temp_id[ps_rc_lap_out->i4_rc_temporal_lyr_id] =
+ ps_rc_lap_out->u4_rc_scene_num;
+
+ if(ps_rc_ctxt->ai4_is_frame_scd[i4_enc_frm_id])
+ {
+ /*reset pre-enc SAD whenever SCD is detected so that it does not detect scene cut for other pictures*/
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rc_ctxt->ai8_prev_frm_pre_enc_cost[i] = -1;
+ }
+ }
+
+ /*remember i frame's cost metric to scale SAD of next of I frame*/
+ if(pic_type == IV_I_FRAME || pic_type == IV_IDR_FRAME)
+ {
+ ps_rc_ctxt->i8_prev_i_frm_cost = ps_rc_ctxt->ai8_cur_frm_intra_cost[i4_enc_frm_id];
+ ps_rc_ctxt->ai8_prev_frm_pre_enc_cost[rc_pic_type] =
+ ps_rc_ctxt->ai8_cur_frm_intra_cost[i4_enc_frm_id];
+ }
+ /*for other picture types update hme cost*/
+ else
+ {
+ ps_rc_ctxt->ai8_prev_frm_pre_enc_cost[rc_pic_type] =
+ ps_rc_ctxt->ai8_cur_frame_coarse_ME_cost[i4_enc_frm_id];
+ }
+}
+/*!
+******************************************************************************
+* \if Function name : ihevce_rc_interface_update \endif
+*
+* \brief
+* Updating rate control interface parameters after the query call.
+*
+* \param[in] Rate control interface context,
+* Picture Type
+* Lap out structure pointer
+*
+*
+* \return
+* None
+*
+* \author Ittiam
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_rc_interface_update(
+ void *pv_ctxt,
+ IV_PICTURE_CODING_TYPE_T pic_type,
+ rc_lap_out_params_t *ps_rc_lap_out,
+ WORD32 i4_avg_frame_hevc_qp,
+ WORD32 i4_enc_frm_id)
+{
+ rc_context_t *ps_rc_ctxt = (rc_context_t *)pv_ctxt;
+ picture_type_e rc_pic_type = ihevce_rc_conv_pic_type(
+ pic_type,
+ ps_rc_ctxt->i4_field_pic,
+ ps_rc_lap_out->i4_rc_temporal_lyr_id,
+ ps_rc_lap_out->i4_is_bottom_field,
+ ps_rc_ctxt->i4_top_field_first);
+ WORD32 i;
+ WORD32 i4_avg_frame_qp_q6, i4_ctr = -1, i4_i, i4_j;
+ WORD32 i4_scene_num = ps_rc_lap_out->u4_rc_scene_num % MAX_SCENE_NUM;
+
+ /*store frame qp to clip qp accordingly*/
+ if(ps_rc_lap_out->i4_is_rc_model_needs_to_be_updated)
+ {
+ WORD32 i4_i, i4_temp_i_qp, i4_temp_qp;
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[i4_scene_num][rc_pic_type] = i4_avg_frame_hevc_qp;
+ ps_rc_ctxt->ai4_scene_numbers[i4_scene_num]++;
+
+ if(rc_pic_type < P1_PIC)
+ i4_temp_i_qp = i4_avg_frame_hevc_qp - rc_pic_type;
+ else
+ i4_temp_i_qp = i4_avg_frame_hevc_qp - rc_pic_type + 4;
+
+ i4_temp_i_qp = ihevce_clip_min_max_qp(ps_rc_ctxt, i4_temp_i_qp, I_PIC, 0);
+
+ if(ps_rc_ctxt->ai4_scene_numbers[i4_scene_num] == 1)
+ {
+ for(i4_i = 0; i4_i < 5; i4_i++)
+ {
+ if(ps_rc_ctxt->ai4_prev_pic_hevc_qp[i4_scene_num][i4_i] == INIT_HEVCE_QP_RC)
+ {
+ i4_temp_qp = i4_temp_i_qp + i4_i;
+ i4_temp_qp = ihevce_clip_min_max_qp(
+ ps_rc_ctxt, i4_temp_qp, (picture_type_e)i4_i, MAX(i4_i - 1, 0));
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[i4_scene_num][i4_i] = i4_temp_qp;
+
+ if(i4_i > 0)
+ ps_rc_ctxt->ai4_prev_pic_hevc_qp[i4_scene_num][i4_i + 4] = i4_temp_qp;
+ }
+ }
+ }
+ }
+
+ for(i4_i = 0; i4_i < MAX_NON_REF_B_PICS_IN_QUEUE_SGI; i4_i++)
+ {
+ if(ps_rc_lap_out->u4_rc_scene_num == ps_rc_ctxt->au4_prev_scene_num_multi_scene[i4_i])
+ {
+ i4_ctr = i4_i;
+ break;
+ }
+ }
+ if(-1 == i4_ctr)
+ {
+ ps_rc_ctxt->i4_prev_qp_ctr++;
+ ps_rc_ctxt->i4_prev_qp_ctr = ps_rc_ctxt->i4_prev_qp_ctr % MAX_NON_REF_B_PICS_IN_QUEUE_SGI;
+ i4_ctr = ps_rc_ctxt->i4_prev_qp_ctr;
+ ps_rc_ctxt->au4_prev_scene_num_multi_scene[i4_ctr] = ps_rc_lap_out->u4_rc_scene_num;
+ for(i4_j = 0; i4_j < MAX_PIC_TYPE; i4_j++)
+ {
+ ps_rc_ctxt->ai4_qp_for_previous_scene_multi_scene[i4_ctr][i4_j] = 0;
+ }
+ }
+
+ {
+ ps_rc_ctxt->ai4_qp_for_previous_scene_multi_scene[i4_ctr][rc_pic_type] =
+ i4_avg_frame_hevc_qp;
+ }
+
+ /*I_model only reset In case of fade-in and fade-out*/
+ if(ps_rc_ctxt->ai4_I_model_only_reset[i4_enc_frm_id])
+ {
+ ASSERT(rc_pic_type == I_PIC);
+ rc_reset_pic_model(ps_rc_ctxt->rc_hdl, I_PIC);
+ ps_rc_ctxt->ai4_I_model_only_reset[i4_enc_frm_id] = 0;
+ }
+
+ i4_avg_frame_qp_q6 = ps_rc_ctxt->ps_rc_quant_ctxt->pi4_qp_to_qscale_q_factor
+ [i4_avg_frame_hevc_qp + ps_rc_ctxt->ps_rc_quant_ctxt->i1_qp_offset];
+
+ update_frame_rc_get_frame_qp_info(
+ ps_rc_ctxt->rc_hdl,
+ rc_pic_type,
+ ps_rc_ctxt->ai4_is_frame_scd[i4_enc_frm_id],
+ (ps_rc_ctxt->ai4_is_pause_to_resume[i4_enc_frm_id] ||
+ ps_rc_ctxt->ai4_is_non_I_scd_pic[i4_enc_frm_id] ||
+ ps_rc_ctxt->ai4_is_cmplx_change_reset_model[i4_enc_frm_id]),
+ i4_avg_frame_qp_q6,
+ ps_rc_lap_out->i4_ignore_for_rc_update,
+ i4_scene_num,
+ ps_rc_ctxt->ai4_scene_numbers[i4_scene_num]);
+
+ /** update the scene num for current frame*/
+ ps_rc_ctxt->au4_scene_num_temp_id[ps_rc_lap_out->i4_rc_temporal_lyr_id] =
+ ps_rc_lap_out->u4_rc_scene_num;
+
+ if(ps_rc_ctxt->ai4_is_frame_scd[i4_enc_frm_id])
+ {
+ /*reset pre-enc SAD whenever SCD is detected so that it does not detect scene cut for other pictures*/
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rc_ctxt->ai8_prev_frm_pre_enc_cost[i] = -1;
+ }
+ }
+
+ /*remember i frame's cost metric to scale SAD of next of I frame*/
+ if(pic_type == IV_I_FRAME || pic_type == IV_IDR_FRAME)
+ {
+ ps_rc_ctxt->i8_prev_i_frm_cost = ps_rc_ctxt->ai8_cur_frm_intra_cost[i4_enc_frm_id];
+ ps_rc_ctxt->ai8_prev_frm_pre_enc_cost[rc_pic_type] =
+ ps_rc_ctxt->ai8_cur_frm_intra_cost[i4_enc_frm_id];
+ }
+ /*for other picture types update hme cost*/
+ else
+ {
+ ps_rc_ctxt->ai8_prev_frm_pre_enc_cost[rc_pic_type] =
+ ps_rc_ctxt->ai8_cur_frame_coarse_ME_cost[i4_enc_frm_id];
+ }
+
+ ps_rc_ctxt->i4_is_first_frame_encoded = 1;
+}
+
+/****************************************************************************
+Function Name : ihevce_rc_store_retrive_update_info
+Description : for storing and retrieving the data in case of the Enc Loop Parallelism.
+Inputs :
+Globals :
+Processing :
+Outputs :
+Returns :
+Issues :
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+
+void ihevce_rc_store_retrive_update_info(
+ void *pv_ctxt,
+ rc_bits_sad_t *ps_rc_frame_stat,
+ WORD32 i4_enc_frm_id_rc,
+ WORD32 bit_rate_id,
+ WORD32 i4_store_retrive,
+ WORD32 *pout_buf_id,
+ WORD32 *pi4_rc_pic_type,
+ WORD32 *pcur_qp,
+ void *ps_lap_out,
+ void *ps_rc_lap_out)
+{
+ rc_context_t *ps_rc_ctxt = (rc_context_t *)pv_ctxt;
+ if(1 == i4_store_retrive)
+ {
+ memcpy(
+ &ps_rc_ctxt->as_rc_frame_stat_store[i4_enc_frm_id_rc][bit_rate_id],
+ ps_rc_frame_stat,
+ sizeof(rc_bits_sad_t));
+ memcpy(&ps_rc_ctxt->out_buf_id[i4_enc_frm_id_rc][bit_rate_id], pout_buf_id, sizeof(WORD32));
+ memcpy(&ps_rc_ctxt->i4_pic_type[i4_enc_frm_id_rc], pi4_rc_pic_type, sizeof(WORD32));
+ memcpy(&ps_rc_ctxt->cur_qp[i4_enc_frm_id_rc][bit_rate_id], pcur_qp, sizeof(WORD32));
+ memcpy(
+ &ps_rc_ctxt->as_lap_out[i4_enc_frm_id_rc],
+ ps_lap_out,
+ sizeof(ihevce_lap_output_params_t));
+ memcpy(
+ &ps_rc_ctxt->as_rc_lap_out[i4_enc_frm_id_rc],
+ ps_rc_lap_out,
+ sizeof(rc_lap_out_params_t));
+ //BUG_FIX related to the releasing of the next lap out buffers and retrieving of the data for the delayed update.
+
+ {
+ rc_lap_out_params_t *ps_rc_lap_out_next_encode;
+ ps_rc_lap_out_next_encode =
+ (rc_lap_out_params_t *)((rc_lap_out_params_t *)ps_rc_lap_out)
+ ->ps_rc_lap_out_next_encode;
+
+ if(NULL != ps_rc_lap_out_next_encode)
+ {
+ ps_rc_ctxt->as_rc_lap_out[i4_enc_frm_id_rc].i4_next_pic_type =
+ ps_rc_lap_out_next_encode->i4_rc_pic_type;
+ ps_rc_ctxt->as_rc_lap_out[i4_enc_frm_id_rc].i4_next_scene_type =
+ ps_rc_lap_out_next_encode->i4_rc_scene_type;
+ }
+ else
+ {
+ ps_rc_ctxt->as_rc_lap_out[i4_enc_frm_id_rc].i4_next_pic_type = -1;
+ ps_rc_ctxt->as_rc_lap_out[i4_enc_frm_id_rc].i4_next_scene_type = -1;
+ }
+
+ ps_rc_ctxt->as_rc_lap_out[i4_enc_frm_id_rc].ps_rc_lap_out_next_encode =
+ NULL; //RC_BUG_FIX
+ }
+ }
+ else if(2 == i4_store_retrive)
+ {
+ memcpy(
+ ps_rc_frame_stat,
+ &ps_rc_ctxt->as_rc_frame_stat_store[i4_enc_frm_id_rc][bit_rate_id],
+ sizeof(rc_bits_sad_t));
+ memcpy(pout_buf_id, &ps_rc_ctxt->out_buf_id[i4_enc_frm_id_rc][bit_rate_id], sizeof(WORD32));
+ memcpy(pi4_rc_pic_type, &ps_rc_ctxt->i4_pic_type[i4_enc_frm_id_rc], sizeof(WORD32));
+ memcpy(pcur_qp, &ps_rc_ctxt->cur_qp[i4_enc_frm_id_rc][bit_rate_id], sizeof(WORD32));
+ memcpy(
+ ps_lap_out,
+ &ps_rc_ctxt->as_lap_out[i4_enc_frm_id_rc],
+ sizeof(ihevce_lap_output_params_t));
+ memcpy(
+ ps_rc_lap_out,
+ &ps_rc_ctxt->as_rc_lap_out[i4_enc_frm_id_rc],
+ sizeof(rc_lap_out_params_t));
+ }
+ else
+ {
+ ASSERT(0);
+ }
+}
+
+/*###############################################*/
+/******* END OF RC UPDATE FUNCTIONS **************/
+/*###############################################*/
+
+/*#################################################*/
+/******* START OF RC UTILS FUNCTIONS **************/
+/*#################################################*/
+
+/**
+******************************************************************************
+*
+* @brief function to account for error correction between bits rdopt estimate
+* and actual entropy bit generation
+*
+* @par Description
+*
+* @param[in] pv_rc_ctxt
+* void pointer to rc ctxt
+* @param[in] i4_rdopt_bits_gen_error
+* WODd32 variable with error correction between rdopt and entropy bytes gen
+*
+* @return void
+*
+******************************************************************************
+*/
+
+void ihevce_rc_rdopt_entropy_bit_correct(
+ void *pv_rc_ctxt, WORD32 i4_cur_entropy_consumption, WORD32 i4_buf_id)
+{
+ rc_context_t *ps_ctxt = (rc_context_t *)pv_rc_ctxt;
+ WORD32 i4_error;
+ WORD32 i, count = 0;
+ ASSERT(i4_buf_id >= 0);
+ ps_ctxt->ai4_entropy_bit_consumption[ps_ctxt->i4_entropy_bit_count] =
+ i4_cur_entropy_consumption;
+ ps_ctxt->ai4_entropy_bit_consumption_buf_id[ps_ctxt->i4_entropy_bit_count] = i4_buf_id;
+ ps_ctxt->i4_entropy_bit_count = (ps_ctxt->i4_entropy_bit_count + 1) % NUM_BUF_RDOPT_ENT_CORRECT;
+
+ for(i = 0; i < NUM_BUF_RDOPT_ENT_CORRECT; i++)
+ {
+ if(ps_ctxt->ai4_rdopt_bit_consumption_buf_id[i] >= 0 &&
+ (ps_ctxt->ai4_rdopt_bit_consumption_buf_id[i] ==
+ ps_ctxt->ai4_entropy_bit_consumption_buf_id[i]))
+ {
+ i4_error = ps_ctxt->ai4_rdopt_bit_consumption_estimate[i] -
+ ps_ctxt->ai4_entropy_bit_consumption[i];
+ //DBG_PRINTF("entropy mismatch error = %d\n",i4_error/ps_ctxt->ai4_rdopt_bit_consumption_estimate[i]);
+ ps_ctxt->ai4_rdopt_bit_consumption_estimate[i] = -1;
+ ps_ctxt->ai4_rdopt_bit_consumption_buf_id[i] = -1;
+ ps_ctxt->ai4_entropy_bit_consumption[i] = -1;
+ ps_ctxt->ai4_entropy_bit_consumption_buf_id[i] = -1;
+ /*accumulate mismatch along with gop level bit error that is propogated to next frame*/
+ /*error = rdopt - entropy so it is expected to be negative*/
+ rc_update_mismatch_error(ps_ctxt->rc_hdl, i4_error);
+ count++;
+ }
+ }
+}
+
+/**
+******************************************************************************
+*
+* @name ihevce_rc_check_non_lap_scd
+*
+* @par Description Detects SCD frames as I_only_scds or non_I_scds based
+ on intrasatd & ME costs. Updates scd flags
+*
+* @param[in] ps_rc_ctxt - pointer to rc context
+* ps_rc_lap_out
+* @return void
+*
+******************************************************************************
+*/
+void ihevce_rc_check_non_lap_scd(void *pv_rc_ctxt, rc_lap_out_params_t *ps_rc_lap_out)
+{
+ rc_context_t *ps_rc_ctxt = (rc_context_t *)pv_rc_ctxt;
+ picture_type_e rc_pic_type = ihevce_rc_conv_pic_type(
+ (IV_PICTURE_CODING_TYPE_T)ps_rc_lap_out->i4_rc_pic_type,
+ ps_rc_ctxt->i4_field_pic,
+ ps_rc_lap_out->i4_rc_temporal_lyr_id,
+ ps_rc_lap_out->i4_is_bottom_field,
+ ps_rc_ctxt->i4_top_field_first);
+
+ /*Init to normal frames*/
+ ps_rc_lap_out->i4_is_I_only_scd = 0;
+ ps_rc_lap_out->i4_is_non_I_scd = 0;
+
+ /*None of the above check is valid if marked as scene cut*/
+ if(ps_rc_lap_out->i4_rc_scene_type == SCENE_TYPE_SCENE_CUT)
+ {
+ WORD32 i;
+ /*reset all older data*/
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rc_ctxt->s_l1_state_metric.ai8_L1_prev_I_intra_raw_satd[i] = -1;
+ ps_rc_ctxt->s_l1_state_metric.ai8_L1_prev_pic_coarse_me_cost[i] = -1;
+ ps_rc_ctxt->s_l1_state_metric.ai8_L1_prev_pic_coarse_me_sad[i] = -1;
+ }
+ }
+ else
+ {
+ /*Check if it is I only reset case, lap_out is assumed to have latest data which is used to set the corresponding flags*/
+ /*For I pic check for I only reset case and for other pictures check for non-I scd case*/
+ if(rc_pic_type == I_PIC)
+ {
+ if(ps_rc_lap_out->i8_pre_intra_satd <
+ (ps_rc_ctxt->s_l1_state_metric.ai8_L1_prev_I_intra_raw_satd[rc_pic_type] >> 1) ||
+ ps_rc_lap_out->i8_pre_intra_satd >
+ (ps_rc_ctxt->s_l1_state_metric.ai8_L1_prev_I_intra_raw_satd[rc_pic_type] << 1))
+ {
+ /*Check if atleast one frame data is available*/
+ if(ps_rc_ctxt->s_l1_state_metric.ai8_L1_prev_I_intra_raw_satd[rc_pic_type] >= 0)
+ ps_rc_lap_out->i4_is_I_only_scd = 1;
+ }
+ }
+ else if(
+ ((rc_pic_type == P_PIC) &&
+ (ps_rc_lap_out->i4_rc_quality_preset == IHEVCE_QUALITY_P6)) ||
+ (ps_rc_lap_out->i4_rc_quality_preset < IHEVCE_QUALITY_P6))
+ {
+#define SAD_THREASHOLD_30FPS (2.5)
+ /*Choose threshold as 2.5 for 30 fps content and 1.75 for 60 fps. Scale accordingly for intermediate framerate*/
+ WORD32 i4_non_simple_repeat_prev_frame_detect = 0;
+ float sad_change_threshold =
+ (float)(-0.8f * ((float)ps_rc_ctxt->u4_max_frame_rate / 30000) + 3.05f); /*Change of SAD threshold for 30 fps content, this should be lowered for 60 fps*/
+ if(sad_change_threshold < 1.5f)
+ sad_change_threshold = 1.5f;
+ if(sad_change_threshold > 3.0f)
+ sad_change_threshold = 3.0f;
+ ASSERT(ps_rc_lap_out->i8_raw_l1_coarse_me_sad >= 0);
+
+ /*block variance computed at 4x4 level in w/4*h/4,
+ percent dc blks is how many block's variance are less than or equal to 16*/
+ if(ps_rc_lap_out->i4_perc_dc_blks < 85)
+ {
+ /*me sad is expected to be zero for repeat frames*/
+ if((ps_rc_ctxt->s_l1_state_metric.ai8_L1_prev_pic_coarse_me_sad[rc_pic_type] ==
+ 0) &&
+ (ps_rc_lap_out->i4_rc_temporal_lyr_id == ps_rc_ctxt->i4_max_temporal_lyr))
+ {
+ i4_non_simple_repeat_prev_frame_detect = 1;
+ }
+ }
+ if(ps_rc_lap_out->i8_frame_acc_coarse_me_cost >
+ (sad_change_threshold *
+ ps_rc_ctxt->s_l1_state_metric.ai8_L1_prev_pic_coarse_me_cost[rc_pic_type]) &&
+ (ps_rc_ctxt->s_l1_state_metric.ai8_L1_prev_pic_coarse_me_cost[rc_pic_type] >= 0) &&
+ (!i4_non_simple_repeat_prev_frame_detect))
+ {
+ WORD32 one_per_pixel_sad_L1;
+ /*per pixel sad has to be greater than 1 to avoid repeat frames influence non-I scd detection*/
+ if((ps_rc_ctxt->i4_frame_height * ps_rc_ctxt->i4_frame_width) < 4000000)
+ {
+ /*1080*/
+ one_per_pixel_sad_L1 =
+ (ps_rc_ctxt->i4_frame_height * ps_rc_ctxt->i4_frame_width) >> 2;
+ }
+ else
+ {
+ /*4k*/
+ one_per_pixel_sad_L1 =
+ (ps_rc_ctxt->i4_frame_height * ps_rc_ctxt->i4_frame_width) >> 4;
+ }
+ if(ps_rc_lap_out->i8_frame_acc_coarse_me_cost > one_per_pixel_sad_L1)
+ {
+ {
+ ps_rc_lap_out->i4_is_non_I_scd = 1;
+ }
+ }
+ }
+
+ if(rc_pic_type == P_PIC)
+ {
+ if(ps_rc_ctxt->s_l1_state_metric.ai8_L1_prev_pic_coarse_me_cost[rc_pic_type] < 0)
+ {
+ if(ps_rc_ctxt->s_l1_state_metric.ai8_L1_prev_I_intra_raw_satd[I_PIC] > 0)
+ {
+ if(ps_rc_lap_out->i8_pre_intra_satd >
+ ps_rc_ctxt->s_l1_state_metric.ai8_L1_prev_I_intra_raw_satd[I_PIC] << 1)
+ {
+ ps_rc_lap_out->i4_is_non_I_scd = 1;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ /*remember the previous frame stats*/
+ ps_rc_ctxt->s_l1_state_metric.ai8_L1_prev_I_intra_raw_satd[rc_pic_type] =
+ ps_rc_lap_out->i8_pre_intra_satd; //ps_rc_lap_out->i8_pre_intra_satd;
+ ps_rc_ctxt->s_l1_state_metric.ai8_L1_prev_pic_coarse_me_cost[rc_pic_type] =
+ ps_rc_lap_out->i8_frame_acc_coarse_me_cost; //ps_rc_lap_out->i8_frame_acc_coarse_me_sad;
+ ps_rc_ctxt->s_l1_state_metric.ai8_L1_prev_pic_coarse_me_sad[rc_pic_type] =
+ ps_rc_lap_out->i8_raw_l1_coarse_me_sad;
+}
+
+/**
+******************************************************************************
+*
+* @name ihevce_rc_check_is_pre_enc_qp_valid
+*
+* @par Description checking whether enc thread has updated qp in reverse queue
+*
+* @param[in] ps_rc_ctxt - pointer to rc context
+*
+* @return zero on success
+*
+******************************************************************************
+*/
+/**only function accessed by encoder without using mutex lock*/
+WORD32 ihevce_rc_check_is_pre_enc_qp_valid(void *pv_rc_ctxt, volatile WORD32 *pi4_force_end_flag)
+{
+ rc_context_t *ps_rc_ctxt = (rc_context_t *)pv_rc_ctxt;
+
+ volatile WORD32 i4_is_qp_valid;
+ volatile WORD32 *pi4_is_qp_valid;
+
+ pi4_is_qp_valid =
+ (volatile WORD32 *)&ps_rc_ctxt->as_pre_enc_qp_queue[ps_rc_ctxt->i4_pre_enc_qp_read_index]
+ .i4_is_qp_valid;
+ i4_is_qp_valid = *pi4_is_qp_valid;
+
+ /*Due to stagger between L1 IPE and L0 IPE, towards the end (when encoder is in flush mode) L0 IPE can race ahead of enc
+ since it will suddenly get stagger between L1 and L0 worth of free buffers. It could try to start L0 even before enc has
+ populated qp for such frames. qp = -1 is returned in such case which implies encoder should wait for qp to be pop*/
+
+ while(i4_is_qp_valid == -1)
+ {
+ /*this rate control call is outside mutex lock to avoid deadlock. If this acquires mutex lock enc will not be able to
+ populate qp*/
+ i4_is_qp_valid = *pi4_is_qp_valid;
+
+ if(1 == (*pi4_force_end_flag))
+ {
+ *pi4_is_qp_valid = 1;
+ i4_is_qp_valid = 1;
+ }
+ }
+ return 0;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_compute_temporal_complexity_reset_Kp_Kb
+*
+* \brief
+*
+* \param[in] *pv_ctxt -> rc context
+*
+* \return
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_compute_temporal_complexity_reset_Kp_Kb(
+ rc_lap_out_params_t *ps_rc_lap_out, void *pv_rc_ctxt, WORD32 i4_Kp_Kb_reset_flag)
+{
+ rc_context_t *ps_rc_ctxt = (rc_context_t *)pv_rc_ctxt;
+ rc_lap_out_params_t *ps_cur_rc_lap_out_temporal_offset,
+ *ps_cur_rc_lap_out_temporal_offset_scd_detect;
+ picture_type_e curr_rc_pic_type;
+ LWORD64 i8_total_acc_coarse_me_sad = 0, i8_avg_acc_coarse_me_sad = 0;
+ WORD8 i1_num_frames_in_Sub_GOP = 0, i = 0, i1_no_reset = 0;
+ WORD32 i4_inter_frame_interval = rc_get_inter_frame_interval(ps_rc_ctxt->rc_hdl);
+ WORD32 i4_frame_qp = 0, i4_temp_frame_qp = 0;
+ WORD32 ai4_offsets[5] = { -3, -2, 2, 6, 7 };
+ ps_cur_rc_lap_out_temporal_offset = ps_rc_lap_out;
+ ps_cur_rc_lap_out_temporal_offset_scd_detect = ps_rc_lap_out;
+
+ curr_rc_pic_type = ihevce_rc_conv_pic_type(
+ (IV_PICTURE_CODING_TYPE_T)ps_cur_rc_lap_out_temporal_offset->i4_rc_pic_type,
+ ps_rc_ctxt->i4_field_pic,
+ ps_cur_rc_lap_out_temporal_offset->i4_rc_temporal_lyr_id,
+ ps_cur_rc_lap_out_temporal_offset->i4_is_bottom_field,
+ ps_rc_ctxt->i4_top_field_first);
+
+ if(curr_rc_pic_type == I_PIC)
+ {
+ ps_cur_rc_lap_out_temporal_offset_scd_detect =
+ (rc_lap_out_params_t *)ps_cur_rc_lap_out_temporal_offset->ps_rc_lap_out_next_encode;
+ ps_cur_rc_lap_out_temporal_offset =
+ (rc_lap_out_params_t *)ps_cur_rc_lap_out_temporal_offset->ps_rc_lap_out_next_encode;
+
+ if(NULL != ps_cur_rc_lap_out_temporal_offset)
+ {
+ curr_rc_pic_type = ihevce_rc_conv_pic_type(
+ (IV_PICTURE_CODING_TYPE_T)ps_cur_rc_lap_out_temporal_offset->i4_rc_pic_type,
+ ps_rc_ctxt->i4_field_pic,
+ ps_cur_rc_lap_out_temporal_offset->i4_rc_temporal_lyr_id,
+ ps_cur_rc_lap_out_temporal_offset->i4_is_bottom_field,
+ ps_rc_ctxt->i4_top_field_first);
+ }
+ else
+ return;
+ }
+
+ if(ps_cur_rc_lap_out_temporal_offset->i4_L1_qp == -1)
+ return;
+
+ if(ps_cur_rc_lap_out_temporal_offset->i4_L0_qp == -1)
+ i4_frame_qp = ps_cur_rc_lap_out_temporal_offset->i4_L1_qp;
+ else
+ i4_frame_qp = ps_cur_rc_lap_out_temporal_offset->i4_L0_qp;
+
+ i1_num_frames_in_Sub_GOP = 0;
+ i = 0;
+
+ i1_no_reset = 0;
+ do
+ {
+ if(ps_cur_rc_lap_out_temporal_offset != NULL)
+ {
+ if(curr_rc_pic_type != I_PIC)
+ i4_temp_frame_qp =
+ i4_frame_qp + ps_cur_rc_lap_out_temporal_offset->i4_rc_temporal_lyr_id + 1;
+
+ i4_temp_frame_qp += ai4_offsets[curr_rc_pic_type];
+ i4_temp_frame_qp = CLIP3(i4_temp_frame_qp, 1, 51);
+
+ {
+ if(curr_rc_pic_type != I_PIC)
+ {
+ i8_total_acc_coarse_me_sad +=
+ ps_cur_rc_lap_out_temporal_offset
+ ->ai8_frame_acc_coarse_me_sad[i4_temp_frame_qp];
+ i1_num_frames_in_Sub_GOP++;
+ i++;
+ }
+ else
+ {
+ break;
+ }
+ }
+
+ ps_cur_rc_lap_out_temporal_offset =
+ (rc_lap_out_params_t *)ps_cur_rc_lap_out_temporal_offset->ps_rc_lap_out_next_encode;
+
+ if(ps_cur_rc_lap_out_temporal_offset == NULL)
+ {
+ break;
+ }
+ curr_rc_pic_type = ihevce_rc_conv_pic_type(
+ (IV_PICTURE_CODING_TYPE_T)ps_cur_rc_lap_out_temporal_offset->i4_rc_pic_type,
+ ps_rc_ctxt->i4_field_pic,
+ ps_cur_rc_lap_out_temporal_offset->i4_rc_temporal_lyr_id,
+ ps_cur_rc_lap_out_temporal_offset->i4_is_bottom_field,
+ ps_rc_ctxt->i4_top_field_first);
+ }
+ else
+ {
+ i1_num_frames_in_Sub_GOP = 0;
+ break;
+ }
+ } while(
+ ((((curr_rc_pic_type != P_PIC) && ((curr_rc_pic_type != I_PIC))) ||
+ (curr_rc_pic_type == P_PIC)) &&
+ (i1_num_frames_in_Sub_GOP < i4_inter_frame_interval)));
+
+ if((i1_num_frames_in_Sub_GOP) && (i1_no_reset == 0))
+ {
+ float f_hme_sad_per_pixel;
+ i8_avg_acc_coarse_me_sad = (i8_total_acc_coarse_me_sad / i1_num_frames_in_Sub_GOP);
+ f_hme_sad_per_pixel =
+ ((float)i8_avg_acc_coarse_me_sad /
+ (ps_rc_ctxt->i4_frame_height * ps_rc_ctxt->i4_frame_width));
+ f_hme_sad_per_pixel = CLIP3(f_hme_sad_per_pixel, 0.01f, 5.0f);
+ /*reset the QP offsets for the next sub GOP depending on the offline model based on the temporal complexity */
+ if(i4_Kp_Kb_reset_flag)
+ {
+ WORD32 i4_bin;
+
+ rc_reset_Kp_Kb(
+ ps_rc_ctxt->rc_hdl,
+ 8.00,
+ ps_rc_ctxt->i4_num_active_pic_type,
+ f_hme_sad_per_pixel,
+ &i4_bin,
+ ps_rc_ctxt->i4_rc_pass);
+ }
+ else
+ {
+ rc_ba_get_qp_offset_offline_data(
+ ps_rc_ctxt->rc_hdl,
+ ps_rc_lap_out->ai4_offsets,
+ f_hme_sad_per_pixel,
+ ps_rc_ctxt->i4_num_active_pic_type,
+ &ps_rc_lap_out->i4_complexity_bin);
+
+ ps_cur_rc_lap_out_temporal_offset = ps_rc_lap_out;
+ ps_cur_rc_lap_out_temporal_offset->i4_offsets_set_flag = 1;
+
+ curr_rc_pic_type = ihevce_rc_conv_pic_type(
+ (IV_PICTURE_CODING_TYPE_T)ps_rc_lap_out->i4_rc_pic_type,
+ ps_rc_ctxt->i4_field_pic,
+ ps_rc_lap_out->i4_rc_temporal_lyr_id,
+ ps_rc_lap_out->i4_is_bottom_field,
+ ps_rc_ctxt->i4_top_field_first);
+
+ if((curr_rc_pic_type == I_PIC) &&
+ ((rc_lap_out_params_t *)ps_cur_rc_lap_out_temporal_offset->ps_rc_lap_out_next_encode)
+ ->i4_rc_pic_type == P_PIC)
+ i1_num_frames_in_Sub_GOP++;
+
+ for(i = 1; i < i1_num_frames_in_Sub_GOP; i++)
+ {
+ ps_cur_rc_lap_out_temporal_offset =
+ (rc_lap_out_params_t *)
+ ps_cur_rc_lap_out_temporal_offset->ps_rc_lap_out_next_encode;
+ memmove(
+ ps_cur_rc_lap_out_temporal_offset->ai4_offsets,
+ ps_rc_lap_out->ai4_offsets,
+ sizeof(WORD32) * 5);
+ ps_cur_rc_lap_out_temporal_offset->i4_complexity_bin =
+ ps_rc_lap_out->i4_complexity_bin;
+ ps_cur_rc_lap_out_temporal_offset->i4_offsets_set_flag = 1;
+ }
+ }
+ }
+}
+
+/**
+******************************************************************************
+*
+* @brief function to get delta QP or In frame RC bits estimate to avoid buffer underflow
+*
+* @par Description
+* @param[in]
+******************************************************************************
+*/
+
+WORD32 ihevce_ebf_based_rc_correction_to_avoid_overflow(
+ rc_context_t *ps_rc_ctxt, rc_lap_out_params_t *ps_rc_lap_out, WORD32 *pi4_tot_bits_estimated)
+{
+ WORD32 i4_modelQP, i4_clipQP, i4_maxEbfQP, i4_diffQP, i4_is_model_valid, i4_deltaQP = 0;
+ LWORD64 i8_bitsClipQP, i8_grwEbf; // i8_bitsComp;
+ WORD32 i4_is_offline_model_used;
+ WORD32 i4_vbv_buffer_size, i4_drain_rate, i4_currEbf, i4_maxEbf;
+ WORD32 i4_case = -1;
+ float f_thrsh_i_pic_delta_qp_1, f_thrsh_i_pic_delta_qp_2, f_thrsh_p_pic_delta_qp_1,
+ f_thrsh_p_pic_delta_qp_2;
+ float f_thrsh_br_pic_delta_qp_1, f_thrsh_br_pic_delta_qp_2, f_thrsh_bnr_pic_delta_qp_1,
+ f_thrsh_bnr_pic_delta_qp_2;
+ float f_vbv_thrsh_delta_qp;
+
+ /*initialization of all the variables*/
+ rc_init_buffer_info(
+ ps_rc_ctxt->rc_hdl, &i4_vbv_buffer_size, &i4_currEbf, &i4_maxEbf, &i4_drain_rate);
+
+ i4_is_model_valid = ps_rc_lap_out->i4_is_model_valid;
+ i4_modelQP = ps_rc_ctxt->s_rc_high_lvl_stat.i4_modelQP;
+ i4_clipQP = ps_rc_ctxt->s_rc_high_lvl_stat.i4_finalQP;
+ i4_maxEbfQP = ps_rc_ctxt->s_rc_high_lvl_stat.i4_maxEbfQP;
+ i8_bitsClipQP = ps_rc_ctxt->s_rc_high_lvl_stat.i8_bits_from_finalQP;
+ i4_is_offline_model_used = ps_rc_ctxt->s_rc_high_lvl_stat.i4_is_offline_model_used;
+ ASSERT(i4_clipQP != INVALID_QP);
+
+ if(ps_rc_ctxt->i4_num_frame_parallel > 1)
+ {
+ f_thrsh_i_pic_delta_qp_1 = (float)VBV_THRSH_FRM_PRLL_I_PIC_DELTA_QP_1;
+ f_thrsh_i_pic_delta_qp_2 = (float)VBV_THRSH_FRM_PRLL_I_PIC_DELTA_QP_2;
+ f_thrsh_p_pic_delta_qp_1 = (float)VBV_THRSH_FRM_PRLL_P_PIC_DELTA_QP_1;
+ f_thrsh_p_pic_delta_qp_2 = (float)VBV_THRSH_FRM_PRLL_P_PIC_DELTA_QP_2;
+ f_thrsh_br_pic_delta_qp_1 = (float)VBV_THRSH_FRM_PRLL_BR_PIC_DELTA_QP_1;
+ f_thrsh_br_pic_delta_qp_2 = (float)VBV_THRSH_FRM_PRLL_BR_PIC_DELTA_QP_2;
+ f_thrsh_bnr_pic_delta_qp_1 = (float)VBV_THRSH_FRM_PRLL_BNR_PIC_DELTA_QP_1;
+ f_thrsh_bnr_pic_delta_qp_2 = (float)VBV_THRSH_FRM_PRLL_BNR_PIC_DELTA_QP_2;
+ f_vbv_thrsh_delta_qp = (float)VBV_THRSH_FRM_PRLL_DELTA_QP;
+ }
+ else
+ {
+ f_thrsh_i_pic_delta_qp_1 = (float)VBV_THRSH_I_PIC_DELTA_QP_1;
+ f_thrsh_i_pic_delta_qp_2 = (float)VBV_THRSH_I_PIC_DELTA_QP_2;
+ f_thrsh_p_pic_delta_qp_1 = (float)VBV_THRSH_P_PIC_DELTA_QP_1;
+ f_thrsh_p_pic_delta_qp_2 = (float)VBV_THRSH_P_PIC_DELTA_QP_2;
+ f_thrsh_br_pic_delta_qp_1 = (float)VBV_THRSH_BR_PIC_DELTA_QP_1;
+ f_thrsh_br_pic_delta_qp_2 = (float)VBV_THRSH_BR_PIC_DELTA_QP_2;
+ f_thrsh_bnr_pic_delta_qp_1 = (float)VBV_THRSH_BNR_PIC_DELTA_QP_1;
+ f_thrsh_bnr_pic_delta_qp_2 = (float)VBV_THRSH_BNR_PIC_DELTA_QP_2;
+ f_vbv_thrsh_delta_qp = (float)VBV_THRSH_DELTA_QP;
+ }
+
+ /* function logic starts */
+ if(i4_is_model_valid)
+ {
+ ASSERT(i4_modelQP != INVALID_QP);
+ i8_grwEbf = i8_bitsClipQP - (LWORD64)i4_drain_rate;
+ if(((i4_currEbf + i8_grwEbf) > (0.6*i4_vbv_buffer_size)) /*&&
+ i4_modelQP >= i4_clipQP*/)
+ {
+ /* part of existing scene (i.e. no new scene)
+ In which case this is not first I/P/Bref/Bnref etc
+ The models for I/P/Bref/Bnref are all valid*/
+ if(((i4_currEbf + i8_grwEbf) <
+ i4_maxEbf)) /* does not matter whether this is 2pass, 1 pass, VBR, CBR etc*/
+ {
+ /* clipQP has been determined keeping in view certain other quality constraints like pusling etc.
+ So better to honour it if possible*/
+ //if (i8_bitsClipQP > i8_drain_rate)
+ {
+ LWORD64 i8_thrsh_for_deltaQP_2 = i4_vbv_buffer_size,
+ i8_thrsh_for_deltaQP_1 = i4_vbv_buffer_size;
+ /*even when (modelQP - clipQP) = 0, we intend to QP increase as expected ebf is above 60%*/
+ i4_diffQP = MAX(i4_modelQP - i4_clipQP, 1);
+ switch(ps_rc_lap_out->i4_rc_pic_type)
+ {
+ case IV_I_FRAME:
+ case IV_IDR_FRAME:
+ {
+ i8_thrsh_for_deltaQP_1 =
+ (LWORD64)(f_thrsh_i_pic_delta_qp_1 * i4_vbv_buffer_size);
+ i8_thrsh_for_deltaQP_2 =
+ (LWORD64)(f_thrsh_i_pic_delta_qp_2 * i4_vbv_buffer_size);
+ break;
+ }
+ case IV_P_FRAME:
+ {
+ i8_thrsh_for_deltaQP_1 =
+ (LWORD64)(f_thrsh_p_pic_delta_qp_1 * i4_vbv_buffer_size);
+ i8_thrsh_for_deltaQP_2 =
+ (LWORD64)(f_thrsh_p_pic_delta_qp_2 * i4_vbv_buffer_size);
+ break;
+ }
+ case IV_B_FRAME:
+ {
+ if(ps_rc_lap_out->i4_rc_is_ref_pic)
+ {
+ i8_thrsh_for_deltaQP_1 =
+ (LWORD64)(f_thrsh_br_pic_delta_qp_1 * i4_vbv_buffer_size);
+ i8_thrsh_for_deltaQP_2 =
+ (LWORD64)(f_thrsh_br_pic_delta_qp_2 * i4_vbv_buffer_size);
+ }
+ else
+ {
+ /*as of now using the same thresholds as B reference, later may have to tune if required*/
+ i8_thrsh_for_deltaQP_1 =
+ (LWORD64)(f_thrsh_bnr_pic_delta_qp_1 * i4_vbv_buffer_size);
+ i8_thrsh_for_deltaQP_2 =
+ (LWORD64)(f_thrsh_bnr_pic_delta_qp_2 * i4_vbv_buffer_size);
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ if((i4_currEbf + i8_grwEbf) > i8_thrsh_for_deltaQP_1)
+ {
+ /*For more than 2 QP chnage this means a larger scale issue and probably needs to be handled elsewhere ?*/
+ i4_deltaQP =
+ MIN(2, i4_diffQP); /* we dont intend to change QP by more than 2 */
+ i4_case = 0;
+ }
+ else if((i4_currEbf + i8_grwEbf) > i8_thrsh_for_deltaQP_2)
+ {
+ i4_deltaQP = MIN(1, i4_diffQP);
+ i4_case = 1;
+ }
+ }
+ /* else if (i8_bitsClipQP > i8_drain_rate)
+ {
+ we have no correection, buffer will be healthy after this.
+ However, there could be one problem if the currEbf is already close to say 80% of EBF.
+ This means we have not reacted well early - needs to be handled?
+
+ This could be the case where it is a simple scene immediately following a complex scene
+ and is the I picture (not the first I since model is valid).
+ Is this possible - maybe, what to do - dont know?
+ }
+ */
+ }
+ else /*(i4_clipQP < i4_maxEbfQP)*/
+ {
+ i4_deltaQP = 2;
+ i4_case = 2;
+ }
+ }
+ if((i4_currEbf + i8_grwEbf) < (0.6 * i4_vbv_buffer_size))
+ {
+ *pi4_tot_bits_estimated = i8_bitsClipQP;
+ }
+ }
+ else
+ {
+ if(i4_is_offline_model_used)
+ {
+ /* this can be only for non-I SCD, where we reset RC */
+ WORD32 i4_bits_est_for_in_frm_rc = *pi4_tot_bits_estimated;
+ i8_grwEbf = i4_bits_est_for_in_frm_rc - i4_drain_rate;
+ if((i4_currEbf + i8_grwEbf) > (f_vbv_thrsh_delta_qp * i4_vbv_buffer_size))
+ {
+ i4_bits_est_for_in_frm_rc =
+ i4_drain_rate + (WORD32)(0.85 * i4_vbv_buffer_size) - i4_currEbf;
+ /* if pi4_tot_bits_estimated becomes less than zero or less than drain rate this indiactes that we are near or above 85% of the buffer */
+ /* this needs a reaction */
+ if(i4_bits_est_for_in_frm_rc < i4_drain_rate)
+ {
+ *pi4_tot_bits_estimated =
+ MAX((i4_drain_rate + (WORD32)(0.95 * i4_vbv_buffer_size) - i4_currEbf),
+ i4_drain_rate);
+ i4_deltaQP = 2; /* this needs some review, needs to be handled well */
+ }
+ }
+ i4_case = 3;
+ }
+ else
+ {
+ i8_bitsClipQP = *pi4_tot_bits_estimated;
+ i8_grwEbf = i8_bitsClipQP - i4_drain_rate;
+
+ if(((i4_currEbf + i8_grwEbf) <
+ i4_maxEbf)) /* does not matter whether this is 2pass, 1 pass, VBR, CBR etc*/
+ {
+ /* clipQP has been determined keeping in view certain other quality constraints like pusling etc.
+ So better to honour it if possible*/
+ //if (i8_bitsClipQP > i8_drain_rate)
+ {
+ LWORD64 i8_thrsh_for_deltaQP_2 = i4_vbv_buffer_size,
+ i8_thrsh_for_deltaQP_1 = i4_vbv_buffer_size;
+
+ switch(ps_rc_lap_out->i4_rc_pic_type)
+ {
+ case IV_I_FRAME:
+ case IV_IDR_FRAME:
+ {
+ i8_thrsh_for_deltaQP_1 =
+ (LWORD64)(f_thrsh_i_pic_delta_qp_1 * i4_vbv_buffer_size);
+ i8_thrsh_for_deltaQP_2 =
+ (LWORD64)(f_thrsh_i_pic_delta_qp_2 * i4_vbv_buffer_size);
+ break;
+ }
+ case IV_P_FRAME:
+ {
+ i8_thrsh_for_deltaQP_1 =
+ (LWORD64)(f_thrsh_p_pic_delta_qp_1 * i4_vbv_buffer_size);
+ i8_thrsh_for_deltaQP_2 =
+ (LWORD64)(f_thrsh_p_pic_delta_qp_2 * i4_vbv_buffer_size);
+ break;
+ }
+ case IV_B_FRAME:
+ {
+ if(ps_rc_lap_out->i4_rc_is_ref_pic)
+ {
+ i8_thrsh_for_deltaQP_1 =
+ (LWORD64)(f_thrsh_br_pic_delta_qp_1 * i4_vbv_buffer_size);
+ i8_thrsh_for_deltaQP_2 =
+ (LWORD64)(f_thrsh_br_pic_delta_qp_2 * i4_vbv_buffer_size);
+ }
+ else
+ {
+ /*as of now using the same thresholds as B reference, later may have to tune if required*/
+ i8_thrsh_for_deltaQP_1 =
+ (LWORD64)(f_thrsh_bnr_pic_delta_qp_1 * i4_vbv_buffer_size);
+ i8_thrsh_for_deltaQP_2 =
+ (LWORD64)(f_thrsh_bnr_pic_delta_qp_2 * i4_vbv_buffer_size);
+ }
+ break;
+ }
+ default:
+ break;
+ }
+
+ if((i4_currEbf + i8_grwEbf) > i8_thrsh_for_deltaQP_1)
+ {
+ /*For more than 2 QP chnage this means a larger scale issue and probably needs to be handled elsewhere ?*/
+ i4_deltaQP = 2; /* we dont intend to change QP by more than 2 */
+ i4_case = 5;
+ }
+ else if((i4_currEbf + i8_grwEbf) > i8_thrsh_for_deltaQP_2)
+ {
+ i4_deltaQP = 1;
+ i4_case = 6;
+ }
+ }
+ }
+ else
+ {
+ i4_deltaQP = 2;
+ i4_case = 7;
+ }
+ }
+ }
+ return i4_deltaQP;
+}
+
+/*###############################################*/
+/******* END OF RC UTILS FUNCTIONS ***************/
+/*###############################################*/
+
+/*########################################################*/
+/******* START OF VBV COMPLIANCE FUNCTIONS ***************/
+/*#######################################################*/
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_vbv_compliance_frame_level_update
+*
+* \brief
+* this function initializes the hrd buffer level to be used for vbv compliance testing using the parameters feeded in VUI parameters
+*
+* \param[in] *pv_ctxt -> rc context
+* i4_bits_generated -> bits generated from entropy
+* i4_resolution_id -> info needed for log Dump
+* i4_appln_bitrate_inst -> info needed for log Dump
+* u4_cur_cpb_removal_delay_minus1 -> cbp removal delay of present frame
+* \return
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+
+void ihevce_vbv_compliance_frame_level_update(
+ void *pv_rc_ctxt,
+ WORD32 i4_bits_generated,
+ WORD32 i4_resolution_id,
+ WORD32 i4_appln_bitrate_inst,
+ UWORD32 u4_cur_cpb_removal_delay_minus1)
+{
+ rc_context_t *ps_rc_ctxt = (rc_context_t *)pv_rc_ctxt;
+ float f_max_vbv_buff_size = (float)ps_rc_ctxt->s_vbv_compliance.f_buffer_size;
+ WORD32 i4_cbp_removal_delay_diff = 1;
+
+ if((ps_rc_ctxt->s_vbv_compliance.u4_prev_cpb_removal_delay_minus1 > 0) &&
+ (u4_cur_cpb_removal_delay_minus1 >
+ ps_rc_ctxt->s_vbv_compliance.u4_prev_cpb_removal_delay_minus1))
+ i4_cbp_removal_delay_diff =
+ (u4_cur_cpb_removal_delay_minus1 -
+ ps_rc_ctxt->s_vbv_compliance.u4_prev_cpb_removal_delay_minus1);
+
+ ps_rc_ctxt->s_vbv_compliance.f_curr_buffer_level =
+ ps_rc_ctxt->s_vbv_compliance.f_curr_buffer_level - (float)i4_bits_generated +
+ (i4_cbp_removal_delay_diff * ps_rc_ctxt->s_vbv_compliance.f_drain_rate);
+
+ ps_rc_ctxt->s_vbv_compliance.f_curr_buffer_level_unclip =
+ ps_rc_ctxt->s_vbv_compliance.f_curr_buffer_level;
+
+ if(ps_rc_ctxt->s_vbv_compliance.f_curr_buffer_level < 0)
+ {
+ ps_rc_ctxt->s_vbv_compliance.f_curr_buffer_level = 0;
+ }
+
+ if(ps_rc_ctxt->s_vbv_compliance.f_curr_buffer_level >
+ ps_rc_ctxt->s_vbv_compliance.f_buffer_size)
+ {
+ ps_rc_ctxt->s_vbv_compliance.f_curr_buffer_level =
+ ps_rc_ctxt->s_vbv_compliance.f_buffer_size;
+ ps_rc_ctxt->s_vbv_compliance.f_curr_buffer_level_unclip -=
+ ps_rc_ctxt->s_vbv_compliance.f_buffer_size;
+ }
+ else if(ps_rc_ctxt->s_vbv_compliance.f_curr_buffer_level_unclip > 0)
+ {
+ ps_rc_ctxt->s_vbv_compliance.f_curr_buffer_level_unclip = 0;
+ }
+
+ if(ps_rc_ctxt->e_rate_control_type == VBR_STREAMING)
+ {
+ if(ps_rc_ctxt->s_vbv_compliance.f_curr_buffer_level_unclip > 0)
+ ps_rc_ctxt->s_vbv_compliance.f_curr_buffer_level_unclip = 0;
+ }
+ ps_rc_ctxt->s_vbv_compliance.u4_prev_cpb_removal_delay_minus1 = u4_cur_cpb_removal_delay_minus1;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_vbv_complaince_init_level
+*
+* \brief
+* this function initializes the hrd buffer level to be used for vbv compliance testing using the parameters feeded in VUI parameters
+*
+* \param[in] *pv_ctxt -> rc context
+* *ps_vui -> VUI parameters
+* \return
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+
+void ihevce_vbv_complaince_init_level(void *pv_ctxt, vui_t *ps_vui)
+{
+ rc_context_t *ps_rc_ctxt = (rc_context_t *)pv_ctxt;
+
+ ps_rc_ctxt->s_vbv_compliance.f_frame_rate =
+ (float)((float)ps_vui->u4_vui_time_scale / ps_vui->u4_vui_num_units_in_tick); //rc_get_frame_rate(ps_rc_ctxt->rc_hdl);
+
+ if(1 == ps_vui->s_vui_hrd_parameters.u1_sub_pic_cpb_params_present_flag)
+ {
+ ASSERT(1 == ps_vui->s_vui_hrd_parameters.u1_sub_pic_cpb_params_present_flag);
+
+ ps_rc_ctxt->s_vbv_compliance.f_bit_rate = (float)((
+ (ps_vui->s_vui_hrd_parameters.as_sub_layer_hrd_params[0].au4_bit_rate_du_value_minus1[0] +
+ 1)
+ << (6 + ps_vui->s_vui_hrd_parameters
+ .u4_bit_rate_scale))); //rc_get_bit_rate(ps_rc_ctxt->rc_hdl);
+
+ ps_rc_ctxt->s_vbv_compliance.f_buffer_size = (float)((
+ (ps_vui->s_vui_hrd_parameters.as_sub_layer_hrd_params[0].au4_cpb_size_du_value_minus1[0] +
+ 1)
+ << (4 + ps_vui->s_vui_hrd_parameters
+ .u4_cpb_size_du_scale))); //ps_rc_ctxt->u4_max_vbv_buff_size;
+ }
+ else
+ {
+ ps_rc_ctxt->s_vbv_compliance.f_bit_rate = (float)((
+ (ps_vui->s_vui_hrd_parameters.as_sub_layer_hrd_params[0].au4_bit_rate_value_minus1[0] +
+ 1)
+ << (6 + ps_vui->s_vui_hrd_parameters
+ .u4_bit_rate_scale))); //rc_get_bit_rate(ps_rc_ctxt->rc_hdl);
+
+ ps_rc_ctxt->s_vbv_compliance.f_buffer_size = (float)((
+ (ps_vui->s_vui_hrd_parameters.as_sub_layer_hrd_params[0].au4_cpb_size_value_minus1[0] +
+ 1)
+ << (4 + ps_vui->s_vui_hrd_parameters
+ .u4_cpb_size_scale))); //ps_rc_ctxt->u4_max_vbv_buff_size;
+ }
+ ps_rc_ctxt->s_vbv_compliance.f_curr_buffer_level =
+ (float)ps_rc_ctxt->s_vbv_compliance.f_buffer_size; //ps_rc_ctxt->u4_max_vbv_buff_size;
+
+ ps_rc_ctxt->s_vbv_compliance.f_drain_rate =
+ ((ps_rc_ctxt->s_vbv_compliance.f_bit_rate) / ps_rc_ctxt->s_vbv_compliance.f_frame_rate);
+
+ ps_rc_ctxt->s_vbv_compliance.u4_prev_cpb_removal_delay_minus1 = 0;
+}
+
+/*########################################################*/
+/******* END OF VBV COMPLIANCE FUNCTIONS *****************/
+/*#######################################################*/
+
+/*################################################################*/
+/******* START OF DYN CHANGE iN BITRATE FUNCTIONS *****************/
+/*################################################################*/
+/*!
+******************************************************************************
+* \if Function name : change_bitrate_vbv_complaince
+*
+* \brief
+* this function updates the new bitrate and re calculates the drain rate
+*
+* \param[in] *pv_ctxt -> rc context
+* \return
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void change_bitrate_vbv_complaince(void *pv_ctxt, LWORD64 i8_new_bitrate, LWORD64 i8_buffer_size)
+{
+ rc_context_t *ps_rc_ctxt = (rc_context_t *)pv_ctxt;
+ ps_rc_ctxt->s_vbv_compliance.f_buffer_size = (float)i8_buffer_size;
+ ps_rc_ctxt->s_vbv_compliance.f_bit_rate = (float)i8_new_bitrate;
+ if(ps_rc_ctxt->s_vbv_compliance.f_curr_buffer_level > i8_buffer_size)
+ ps_rc_ctxt->s_vbv_compliance.f_curr_buffer_level = (float)i8_buffer_size;
+ ps_rc_ctxt->s_vbv_compliance.f_drain_rate =
+ ps_rc_ctxt->s_vbv_compliance.f_bit_rate / ps_rc_ctxt->s_vbv_compliance.f_frame_rate;
+}
+/*!
+******************************************************************************
+* \if Function name : ihevce_rc_register_dyn_change_bitrate
+*
+* \brief
+* this function registers call to change bitrate dynamically.
+*
+* \param[in] *pv_ctxt -> rc context
+*
+* \return
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+
+void ihevce_rc_register_dyn_change_bitrate(
+ void *pv_ctxt,
+ LWORD64 i8_new_bitrate,
+ LWORD64 i8_new_peak_bitrate,
+ WORD32 i4_new_rate_factor,
+ WORD32 i4_rate_control_mode)
+{
+ rc_context_t *ps_rc_ctxt = (rc_context_t *)pv_ctxt;
+
+ /*Register new bitrate*/
+ if(1 != i4_rate_control_mode)
+ {
+ if(i8_new_bitrate != -1)
+ {
+ ps_rc_ctxt->i8_new_bitrate = i8_new_bitrate;
+ ps_rc_ctxt->i8_new_peak_bitrate = i8_new_peak_bitrate;
+ ps_rc_ctxt->i4_bitrate_changed = 1;
+ }
+ }
+ else
+ {
+ if(i4_new_rate_factor != -1)
+ {
+ ps_rc_ctxt->i8_new_bitrate = (i8_new_peak_bitrate * i4_new_rate_factor) / 1000;
+ ps_rc_ctxt->i8_new_peak_bitrate = i8_new_peak_bitrate;
+ ps_rc_ctxt->i4_bitrate_changed = 1;
+ }
+ }
+
+ ASSERT(ps_rc_ctxt->i8_new_bitrate > 0);
+ ASSERT(ps_rc_ctxt->i8_new_peak_bitrate > 0);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_rc_get_new_bitrate
+*
+* \brief
+* get new bitrate
+*
+* \param[in] *pv_ctxt -> rc context
+*
+* \return
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+LWORD64 ihevce_rc_get_new_bitrate(void *pv_ctxt)
+{
+ rc_context_t *ps_rc_ctxt = (rc_context_t *)pv_ctxt;
+ return ps_rc_ctxt->i8_new_bitrate;
+}
+/*!
+******************************************************************************
+* \if Function name : ihevce_rc_get_new_peak_bitrate
+*
+* \brief
+* get new peak rate
+*
+* \param[in] *pv_ctxt -> rc context
+*
+* \return
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+LWORD64 ihevce_rc_get_new_peak_bitrate(void *pv_ctxt)
+{
+ rc_context_t *ps_rc_ctxt = (rc_context_t *)pv_ctxt;
+ return ps_rc_ctxt->i8_new_peak_bitrate;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_rc_change_avg_bitrate
+*
+* \brief
+* change average bitrate configured based on new bitrate
+*
+* \param[in] *pv_ctxt -> rc context
+*
+* \return
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+LWORD64 ihevce_rc_change_avg_bitrate(void *pv_ctxt)
+{
+ rc_context_t *ps_rc_ctxt = (rc_context_t *)pv_ctxt;
+ LWORD64 vbv_buffer_level_b4_change;
+
+ ASSERT(ps_rc_ctxt->i8_new_bitrate != -1);
+ ASSERT(ps_rc_ctxt->i8_new_peak_bitrate != -1);
+ /*Get the VBV buffer level just before forcing bitrate change*/
+ vbv_buffer_level_b4_change = (LWORD64)rc_get_ebf(ps_rc_ctxt->rc_hdl);
+
+ change_avg_bit_rate(
+ ps_rc_ctxt->rc_hdl,
+ (UWORD32)ps_rc_ctxt->i8_new_bitrate,
+ (UWORD32)ps_rc_ctxt->i8_new_peak_bitrate);
+ /*Once the request is serviced set new bitrate to -1*/
+ ps_rc_ctxt->i8_new_bitrate = -1;
+ ps_rc_ctxt->i8_new_peak_bitrate = -1;
+ return vbv_buffer_level_b4_change;
+}
+
+/*##############################################################*/
+/******* END OF DYN CHNAGE iN BITRATE FUNCTIONS *****************/
+/*##############################################################*/
diff --git a/encoder/ihevce_rc_interface.h b/encoder/ihevce_rc_interface.h
new file mode 100644
index 0000000..646922a
--- /dev/null
+++ b/encoder/ihevce_rc_interface.h
@@ -0,0 +1,262 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+******************************************************************************
+* \file ihevce_rc_interface.h
+*
+* \brief
+* This file contains interface defination of HEVC Rate control library
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************/
+
+#ifndef _IHEVCE_RC_INTERFACE_H_
+#define _IHEVCE_RC_INTERFACE_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define ES_TO_PQ_FACTOR (1.0)
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+/*call type to distinguish call from enc/ preenc stage*/
+typedef enum
+{
+ ENC_GET_QP = 0,
+ PRE_ENC_GET_QP
+} IHEVCE_RC_CALL_TYPE;
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+WORD32 ihevce_rc_get_num_mem_recs(void);
+
+WORD32 ihevce_rc_get_mem_recs(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ WORD32 mem_space,
+ ihevce_sys_api_t *ps_sys_api);
+
+void *ihevce_rc_mem_init(
+ iv_mem_rec_t *ps_mem_tab,
+ ihevce_static_cfg_params_t *ps_init_prms,
+ WORD32 i4_bitrate_instance_id,
+ rc_quant_t *ps_rc_quant,
+ WORD32 i4_resolution_id,
+ WORD32 i4_look_ahead_frames_in_first_pass);
+
+void ihevce_rc_init(
+ void *pv_ctxt,
+ ihevce_src_params_t *ps_run_time_src_param,
+ ihevce_tgt_params_t *ps_tgt_params,
+ rc_quant_t *ps_rc_quant,
+ ihevce_sys_api_t *ps_sys_api,
+ ihevce_lap_params_t *ps_lap_prms,
+ WORD32 i4_num_frame_parallel);
+
+void ihevce_rc_update_pic_info(
+ void *pv_ctxt,
+ UWORD32 u4_total_bits_consumed,
+ UWORD32 u4_total_header_bits,
+ UWORD32 u4_frame_sad,
+ UWORD32 u4_frame_intra_sad,
+ IV_PICTURE_CODING_TYPE_T pic_type,
+ WORD32 i4_avg_frame_qp,
+ WORD32 i4_suppress_bpic_update,
+ WORD32 *pi4_qp_normalized_8x8_cu_sum,
+ WORD32 *pi4_8x8_cu_sum,
+ LWORD64 *pi8_sad_by_qscale,
+ ihevce_lap_output_params_t *ps_lap_out,
+ rc_lap_out_params_t *ps_rc_lap_out,
+ WORD32 i4_buf_id,
+ UWORD32 u4_open_loop_intra_sad,
+ LWORD64 i8_total_ssd_frame,
+ WORD32 i4_enc_frm_id);
+
+WORD32 ihevce_rc_get_pic_quant(
+ void *pv_ctxt,
+ rc_lap_out_params_t *ps_rc_lap_out,
+ IHEVCE_RC_CALL_TYPE call_type,
+ WORD32 i4_enc_frm_id,
+ WORD32 i4_update_delay,
+ WORD32 *pi4_curr_bits_estimated);
+
+WORD32 ihevce_rc_get_cu_quant(WORD32 i4_frame_qp, WORD32 i4_cu_size);
+
+void ihevce_rc_update_cur_frm_intra_satd(
+ void *pv_ctxt, LWORD64 i8_cur_frm_intra_satd, WORD32 i4_enc_frm_id);
+
+WORD32 ihevce_rc_get_scaled_mpeg2_qp(WORD32 i4_frame_qp, rc_quant_t *ps_rc_quant_ctxt);
+
+WORD32 ihevce_rc_get_scaled_mpeg2_qp_q6(WORD32 i4_frame_qp, UWORD8 u1_bit_depth);
+
+/*funtion top return hevce qp when input mpeg2 qp is in q6 format*/
+WORD32 ihevce_rc_get_scaled_hevce_qp_q6(WORD32 i4_frame_qp_q6, UWORD8 u1_bit_depth);
+
+WORD32 ihevce_rc_get_scaled_hevce_qp_q3(WORD32 i4_frame_qp, UWORD8 u1_bit_depth);
+
+WORD32 ihevce_rc_get_scaled_hevc_qp_from_qs_q3(WORD32 i4_frame_qs_q3, rc_quant_t *ps_rc_quant_ctxt);
+
+/* Functions dependent on lap input*/
+WORD32 ihevce_rc_lap_get_scene_type(rc_lap_out_params_t *ps_rc_lap_out);
+
+/*funciton that calculates scene change qp based on offline stat*/
+WORD32 ihevce_rc_get_new_scene_qp(
+ WORD32 i4_est_texture_bits,
+ LWORD64 i8_satd_by_act_accum,
+ WORD32 i4_variance,
+ WORD32 i4_num_pixel);
+/* Function to be called in entropy thread to account for error between rdopt bits
+ estimate and actual bits generated in entropy thread*/
+void ihevce_rc_rdopt_entropy_bit_correct(
+ void *pv_rc_ctxt, WORD32 i4_cur_entropy_consumption, WORD32 i4_cur_time_stamp_low);
+/*Funciton to get qp after L1 analysis using estimated L0 sadt/act so that L1 can happen with closer qp as that will be used by enc*/
+WORD32 ihevce_get_L0_est_satd_based_scd_qp(
+ void *pv_rc_ctxt,
+ rc_lap_out_params_t *ps_rc_lap_out,
+ LWORD64 i8_est_L0_satd_act,
+ float i_to_avg_rest_ratio);
+
+/*Function to calculate L0 satd using L1 satd based on offline stat*/
+LWORD64 ihevce_get_L0_satd_based_on_L1(
+ LWORD64 i8_satd_by_act_L1, WORD32 i4_num_pixel, WORD32 i4_cur_q_scale);
+
+/*Function to get qp for Lap-1 to get qp for L1 analysis*/
+WORD32 ihevce_rc_get_bpp_based_frame_qp(void *pv_rc_ctxt, rc_lap_out_params_t *ps_rc_lap_out);
+
+/*L1 data is registred so that L1 qp can be computed assuming previous frame data*/
+void ihevce_rc_register_L1_analysis_data(
+ void *pv_rc_ctxt,
+ rc_lap_out_params_t *ps_rc_lap_out,
+ LWORD64 i8_est_L0_satd_by_act,
+ LWORD64 i8_pre_intra_sad,
+ LWORD64 i8_l1_hme_sad);
+
+/*populates qp for future frames for all possible pic type*/
+void ihevce_rc_cal_pre_enc_qp(void *pv_rc_ctxt);
+
+WORD32 ihevce_rc_pre_enc_qp_query(
+ void *pv_rc_ctxt, rc_lap_out_params_t *ps_rc_lap_out, WORD32 i4_update_delay);
+/*In flush mode L0 IPE has to wait till encoder populates pre-enc*/
+/*THIS FUNCTION IS MEANT TO BE CALLED WITHOUT MUTEX LOCK*/
+WORD32 ihevce_rc_check_is_pre_enc_qp_valid(void *pv_rc_ctxt, volatile WORD32 *pi4_force_end_flag);
+
+float ihevce_get_i_to_avg_ratio(
+ void *pv_rc_ctxt,
+ rc_lap_out_params_t *ps_rc_lap_out,
+ WORD32 i_to_p_qp_offset,
+ WORD32 i4_offset_flag,
+ WORD32 i4_call_type,
+ WORD32 ai4_qp_offsets[4],
+ WORD32 i4_update_delay);
+
+/*funtion to detect scene change inside LAP*/
+void ihevce_rc_check_non_lap_scd(void *pv_rc_ctxt, rc_lap_out_params_t *ps_rc_lap_out);
+
+void ihevce_get_dbf_buffer_size(
+ void *pv_rc_ctxt, UWORD32 *pi4_buffer_size, UWORD32 *pi4_dbf, UWORD32 *pi4_bit_rate);
+
+void ihevce_vbv_compliance_frame_level_update(
+ void *pv_rc_ctxt,
+ WORD32 i4_bits_generated,
+ WORD32 i4_resolution_id,
+ WORD32 i4_appln_bitrate_inst,
+ UWORD32 u4_cur_cpb_removal_delay_minus1);
+
+void ihevce_vbv_complaince_init_level(void *pv_ctxt, vui_t *ps_vui);
+
+void ihevce_rc_register_dyn_change_bitrate(
+ void *pv_ctxt,
+ LWORD64 i8_new_bitrate,
+ LWORD64 i8_new_peak_bitrate,
+ WORD32 i4_new_rate_factor,
+ WORD32 i4_rate_control_mode);
+LWORD64 ihevce_rc_get_new_bitrate(void *pv_ctxt);
+
+LWORD64 ihevce_rc_get_new_peak_bitrate(void *pv_ctxt);
+
+LWORD64 ihevce_rc_change_avg_bitrate(void *pv_ctxt);
+
+LWORD64 ihevce_rc_change_rate_factor(void *pv_ctxt);
+
+void get_avg_bitrate_bufsize(void *pv_ctxt, LWORD64 *pi8_bitrate, LWORD64 *pi8_ebf);
+
+void change_bitrate_vbv_complaince(void *pv_ctxt, LWORD64 i8_new_bitrate, LWORD64 i8_buffer_size);
+
+void ihevce_compute_temporal_complexity_reset_Kp_Kb(
+ rc_lap_out_params_t *ps_rc_lap_out, void *pv_rc_ctxt, WORD32 i4_Kp_Kb_reset_flag);
+
+/* SGI & Enc Loop Parallelism related changes*/
+void ihevce_rc_interface_update(
+ void *pv_ctxt,
+ IV_PICTURE_CODING_TYPE_T pic_type,
+ rc_lap_out_params_t *ps_rc_lap_out,
+ WORD32 i4_avg_frame_hevc_qp,
+ WORD32 i4_enc_frm_id);
+
+void ihevce_rc_store_retrive_update_info(
+ void *pv_ctxt,
+ rc_bits_sad_t *ps_rc_frame_stat,
+ WORD32 i4_enc_frm_id,
+ WORD32 bit_rate_id,
+ WORD32 i4_store_retrive,
+ WORD32 *pout_buf_id,
+ WORD32 *pi4_pic_type,
+ WORD32 *pcur_qp,
+ void *ps_lap_out,
+ void *ps_rc_lap_out);
+
+void ihevce_set_L0_scd_qp(void *pv_rc_ctxt, WORD32 i4_scd_qp);
+
+void rc_set_gop_inter_complexity(void *pv_rc_ctxt, rc_lap_out_params_t *ps_rc_lap_out);
+
+void rc_set_subgop_inter_complexity(void *pv_rc_ctxt, rc_lap_out_params_t *ps_rc_lap_out);
+
+void rc_set_gop_complexity_for_bit_allocation(void *pv_rc_ctxt, rc_lap_out_params_t *ps_rc_lap_out);
+
+float rc_get_buffer_level_unclip(void *pv_rc_ctxt);
+
+void ihevce_rc_populate_common_params(
+ ihevce_lap_output_params_t *ps_lap_out, rc_lap_out_params_t *ps_rc_lap_out);
+#endif
diff --git a/encoder/ihevce_rc_structs.h b/encoder/ihevce_rc_structs.h
new file mode 100644
index 0000000..ab6583a
--- /dev/null
+++ b/encoder/ihevce_rc_structs.h
@@ -0,0 +1,458 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+*
+* @file ihevce_rc_structs.h
+*
+* @brief
+* This file contains rc interface structures and prototypes
+*
+* @author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_RC_STRUCTS_H_
+#define _IHEVCE_RC_STRUCTS_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+#define MAX_NUM_TEMPORAL_LAYERS 4
+#define HALF_MAX_SCENE_ARRAY_QP MAX_SCENE_NUM / 2
+
+/*moderate value of fsim to be passed when LAP is not enabled*/
+#define MODERATE_FSIM_VALUE 110
+#define MODERATE_LAP2_COMPLEXITY_Q7 25
+
+/*also present in RATE CONTROL HEADER FILE with same name*/
+#define MAX_LAP_COMPLEXITY_Q7 90
+
+/*value of maximum variance in content used to generate offline model.*/
+#define MAX_LAP_VAR 1000
+#define AVG_LAP_VAR 400
+
+/*buffer to store bit consumption between rdopt and entropy to calculate correction in entropy thread*/
+#define NUM_BUF_RDOPT_ENT_CORRECT (NUM_FRMPROC_ENTCOD_BUFS + 1) //+(1<<FRAME_PARALLEL_LVL))
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+/**
+******************************************************************************
+ * @brief Enumeration for memory records requested by entropy module
+******************************************************************************
+ */
+typedef enum
+{
+ RC_CTXT = 0,
+ RC_QSCALE_TO_QP,
+ RC_QP_TO_QSCALE,
+ RC_QP_TO_QSCALE_Q_FACTOR,
+ RC_MULTI_PASS_GOP_STAT,
+
+ /* should always be the last entry */
+ NUM_RC_MEM_RECS
+
+} IHEVCE_RC_MEM_TABS_T;
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+ * @brief pre enc qp queue struct
+******************************************************************************
+ */
+typedef struct
+{
+ WORD32 ai4_quant[NUM_RC_PIC_TYPE];
+ WORD32 i4_scd_qp;
+ WORD32 i4_is_qp_valid;
+} pre_enc_qp_queue;
+
+typedef struct
+{
+ LWORD64 ai8_L1_prev_I_intra_raw_satd[MAX_PIC_TYPE];
+ LWORD64 ai8_L1_prev_pic_coarse_me_cost[MAX_PIC_TYPE];
+ LWORD64 ai8_L1_prev_pic_coarse_me_sad[MAX_PIC_TYPE];
+ UWORD32 au4_prev_scene_num[MAX_PIC_TYPE];
+} rc_L1_state_t;
+
+/**
+******************************************************************************
+ * vbv compliance testing struct
+******************************************************************************
+*/
+typedef struct vbv_compliance_t
+{
+ /** frame rate */
+ float f_frame_rate;
+
+ /** bit rate */
+ float f_bit_rate;
+
+ /** current buffer level */
+ float f_curr_buffer_level;
+
+ /*current buffer level unclipped for current frame*/
+ float f_curr_buffer_level_unclip;
+
+ /** total buffer size */
+ float f_buffer_size;
+
+ /** drain rate */
+ float f_drain_rate;
+ /** previous cbp_removal_removal_delay minus 1**/
+ UWORD32 u4_prev_cpb_removal_delay_minus1;
+
+} vbv_compliance_t;
+
+/* structure defined to maintain the qp's of Non reference b pictures based on reference */
+/* b pictures of next layer to handle in steadystate,SCD and Non_I_SCD's. The offset is */
+/* based on the temporeal complexities of the sub GOP */
+typedef struct
+{
+ WORD32 i4_enc_order_num_rc;
+
+ WORD32 i4_non_ref_B_pic_qp;
+
+ UWORD32 u4_scene_num_rc;
+
+} non_ref_b_qp_store_t;
+/* structure to get high level stat from rc to adjust clip QP in case
+if it causes encoder buffer overflow*/
+typedef struct
+{
+ /*online model valid flag*/
+ WORD32 i4_is_model_valid;
+
+ /*model given QP if model is valid either offline or online
+ else set it to INVALID_QP*/
+ WORD32 i4_modelQP;
+
+ /*final RC QP,must be always valid*/
+ WORD32 i4_finalQP;
+
+ /* QP to reach maxEbf if model is valid*/
+ WORD32 i4_maxEbfQP;
+
+ /* bits for final QP if model is valid*/
+ LWORD64 i8_bits_from_finalQP;
+
+ /*offline model flag for I scd, non i scd, I only scd*/
+ WORD32 i4_is_offline_model_used;
+
+} rc_high_level_stat_t;
+
+typedef struct
+{
+ /* START of static parameters*/
+ rate_control_handle rc_hdl;
+ rc_type_e e_rate_control_type;
+ UWORD8 u1_is_mb_level_rc_on;
+ /* bit rate to achieved across the entire file size */
+ UWORD32 u4_avg_bit_rate;
+ /* max possible drain rate */
+ UWORD32 au4_peak_bit_rate[MAX_PIC_TYPE];
+ UWORD32 u4_min_bit_rate;
+ /* frames per 1000 seconds */
+ UWORD32 u4_max_frame_rate;
+ /* Buffer delay for CBR */
+ UWORD32 u4_max_delay;
+ /* Intraframe interval equal to GOP size */
+ UWORD32 u4_intra_frame_interval;
+ /* IDR period which indicates occurance of open GOP */
+ UWORD32 u4_idr_period;
+ /* Initial Qp array for I and P frames */
+ WORD32 ai4_init_qp[MAX_PIC_TYPE];
+ //0x3fffffff; /* Max VBV buffer size */
+ UWORD32 u4_max_vbv_buff_size;
+ /* MAx interval between I and P frame */
+ WORD32 i4_max_inter_frm_int;
+ /* Whether GOP is open or closed */
+ WORD32 i4_is_gop_closed;
+ WORD32 ai4_min_max_qp[MAX_PIC_TYPE * 2];
+ /* Whether to use estimated SAD or Previous I frame SAD */
+ WORD32 i4_use_est_intra_sad;
+ UWORD32 u4_src_ticks;
+ UWORD32 u4_tgt_ticks;
+
+ WORD32 i4_auto_generate_init_qp;
+
+ WORD32 i4_frame_width;
+ WORD32 i4_frame_height;
+
+ WORD32 i4_min_frame_qp;
+ WORD32 i4_max_frame_qp;
+
+ WORD32 i4_init_vbv_fullness;
+ /* Num frames in lap window*/
+ WORD32 i4_num_frame_in_lap_window;
+ /** Max temporal layer the configured at init time*/
+ WORD32 i4_max_temporal_lyr;
+ /*Number of active picture type. Depends on max temporal reference*/
+ WORD32 i4_num_active_pic_type;
+ /* User defined constant qp or init qp to be used during scene cut*/
+ WORD32 i4_init_frame_qp_user;
+ /* To remember whether the pic type is field:1 or not:0*/
+ WORD32 i4_field_pic;
+ /*To convey whether top field is encoded first:1 or bottom field :0*/
+ WORD32 i4_top_field_first;
+ /** Quality preset to choose offline model coeff*/
+ WORD32 i4_quality_preset;
+ /*populate init pre enc qp based on bpp for all pictype*/
+ WORD32 ai4_init_pre_enc_qp[MAX_PIC_TYPE];
+ WORD32 i4_initial_decoder_delay_frames;
+
+ float f_vbr_max_peak_sustain_dur;
+ LWORD64 i8_num_frms_to_encode;
+
+ WORD32 i4_min_scd_hevc_qp;
+
+ UWORD8 u1_bit_depth;
+
+ rc_quant_t *ps_rc_quant_ctxt;
+
+ WORD32 i4_rc_pass;
+ /*Memory allocated for storing GOP level stat*/
+ void *pv_gop_stat;
+
+ LWORD64 i8_num_gop_mem_alloc;
+
+ WORD32 i4_is_infinite_gop;
+
+ WORD32 ai4_offsets[5];
+ /*End of static parameters */
+
+ /* Start of parameters updated and accessed during pre-enc*/
+ rc_L1_state_t s_l1_state_metric;
+ /*estimate of pre-enc header bits*/
+ LWORD64 i8_est_I_pic_header_bits;
+ /** previous frame estimated L0 SATD/act predicted using pre-enc intra SAD*/
+ LWORD64 ai8_prev_frame_est_L0_satd[MAX_PIC_TYPE];
+
+ LWORD64 ai8_prev_frame_pre_intra_sad[MAX_PIC_TYPE];
+
+ LWORD64 ai8_prev_frame_hme_sad[MAX_PIC_TYPE];
+
+ /** Is previous frame intra sad available. set = 1 when atleast one frame of each picture type has been encoded*/
+ WORD32 i4_is_est_L0_intra_sad_available;
+
+ FILE *pf_stat_file;
+
+ /* END of parameters updated and accessed during pre-enc */
+
+ /* START of parameters updated during update call and accessed in other threads (pre enc/entropy)*/
+
+ /*variables related to creation of pre enc qp queue*/
+ pre_enc_qp_queue as_pre_enc_qp_queue[MAX_PRE_ENC_RC_DELAY];
+ /*Remember RDOPT opt concumption, and corresponding time stamp*/
+ WORD32 ai4_rdopt_bit_consumption_estimate[NUM_BUF_RDOPT_ENT_CORRECT];
+
+ WORD32 ai4_rdopt_bit_consumption_buf_id[NUM_BUF_RDOPT_ENT_CORRECT];
+
+ WORD32 i4_rdopt_bit_count;
+
+ /*Remember entropy bit consumption and corresponding time stamp*/
+ WORD32 ai4_entropy_bit_consumption[NUM_BUF_RDOPT_ENT_CORRECT];
+
+ WORD32 ai4_entropy_bit_consumption_buf_id[NUM_BUF_RDOPT_ENT_CORRECT];
+
+ WORD32 i4_entropy_bit_count;
+
+ WORD32 i4_pre_enc_qp_read_index;
+
+ WORD32 i4_pre_enc_qp_write_index;
+
+ WORD32 i4_use_qp_offset_pre_enc;
+
+ WORD32 i4_num_frms_from_reset;
+ /*CAll back functions for print/write operations*/
+ ihevce_sys_api_t *ps_sys_rc_api;
+
+ LWORD64 i8_num_frame_read;
+
+ LWORD64 i8_num_bit_alloc_period;
+
+ vbv_compliance_t s_vbv_compliance;
+
+ WORD32 i4_next_sc_i_in_rc_look_ahead;
+
+ LWORD64 i8_new_bitrate;
+ /*Set to -1 when no request. Positive value indicates pending change in bitrate request*/ //FRAME
+
+ LWORD64 i8_new_peak_bitrate;
+
+ WORD32 i4_num_frames_subgop;
+
+ WORD32 i4_is_last_frame_scan;
+
+ LWORD64 i8_total_acc_coarse_me_sad;
+
+ WORD32 i4_L0_frame_qp;
+
+ /** prev pic scene num of same temporal id*/
+ UWORD32 au4_scene_num_temp_id[MAX_NUM_TEMPORAL_LAYERS];
+
+ /* END of parameters updated during update call and accessed in other threads (pre enc/entropy)*/
+
+ /* START of parameters to be updated at the query QP level(updation) */
+
+ /** Intra frame cost exported by pre-enc IPE for current frame*/
+ ULWORD64 ai8_cur_frm_intra_cost[MAX_NUM_ENC_LOOP_PARALLEL];
+ /** remember prev frame intra cost*/
+ ULWORD64 i8_prev_i_frm_cost;
+ /* Current frame inter cost from coarse ME*/
+ LWORD64 ai8_cur_frame_coarse_ME_cost[MAX_NUM_ENC_LOOP_PARALLEL];
+ /** Flag for first frame so that same logic as scd can be used(offline data)*/
+ WORD32 i4_is_first_frame_encoded;
+ /*Flag to remember to reset I model only based on SCD detecton based on open loop SATD
+ of two consecutive I pic*/
+ WORD32 ai4_I_model_only_reset[MAX_NUM_ENC_LOOP_PARALLEL];
+ /** prev pic intra cost for I pic and coarse ME cost for rest of picture types
+ For intra L0 cost is availbale and HME cost is on L1 layer*/
+ LWORD64 ai8_prev_frm_pre_enc_cost[MAX_PIC_TYPE];
+ /*previous qp used encoded*/
+ WORD32 ai4_prev_pic_hevc_qp[MAX_SCENE_NUM][MAX_PIC_TYPE];
+
+ WORD32 ai4_scene_numbers[MAX_SCENE_NUM];
+
+ /* END of parameters to be updated at the query QP lecvel */
+
+ /* START of parameters to be maintained array for Enc loop parallelism */
+
+ /** is scene cut frame at base layer*/
+ WORD32 ai4_is_frame_scd[MAX_NUM_ENC_LOOP_PARALLEL]; //ELP_RC
+ /*Flag to remember frames that are detected as scene cut but not made I due to another SCD following it immediately*/
+ WORD32 ai4_is_non_I_scd_pic[MAX_NUM_ENC_LOOP_PARALLEL]; //ELP_RC
+ /*Flag to remember pause to resume so that only P and B models can be reset*/
+ WORD32 ai4_is_pause_to_resume[MAX_NUM_ENC_LOOP_PARALLEL]; //ELP_RC
+ /*Frame similarity over look ahead window*/
+ WORD32 ai4_lap_f_sim[MAX_NUM_ENC_LOOP_PARALLEL]; //ELP_RC
+ /*Overall lap complexity including inter and intra in q7 format*/
+ WORD32 ai4_lap_complexity_q7[MAX_NUM_ENC_LOOP_PARALLEL]; //ELP_RC
+
+ float af_sum_weigh[MAX_NUM_ENC_LOOP_PARALLEL][MAX_PIC_TYPE][3];
+
+ WORD32 ai4_is_cmplx_change_reset_model[MAX_NUM_ENC_LOOP_PARALLEL];
+
+ WORD32 ai4_is_cmplx_change_reset_bits[MAX_NUM_ENC_LOOP_PARALLEL];
+
+ float ai_to_avg_bit_ratio[MAX_NUM_ENC_LOOP_PARALLEL];
+
+ WORD32 ai4_num_scd_in_lap_window[MAX_NUM_ENC_LOOP_PARALLEL];
+
+ WORD32 ai4_num_frames_b4_scd[MAX_NUM_ENC_LOOP_PARALLEL];
+
+ /* END of parameters to be maintained array for Enc loop parallelism */
+
+ UWORD32 u4_prev_scene_num;
+
+ WORD32 ai4_qp_for_previous_scene[MAX_PIC_TYPE];
+
+ UWORD32 au4_prev_scene_num_pre_enc[MAX_PIC_TYPE];
+
+ WORD32 ai4_qp_for_previous_scene_pre_enc[MAX_PIC_TYPE];
+
+ UWORD32 u4_scene_num_est_L0_intra_sad_available;
+
+ non_ref_b_qp_store_t as_non_ref_b_qp[MAX_NON_REF_B_PICS_IN_QUEUE_SGI];
+
+ UWORD32 au4_prev_scene_num_multi_scene[MAX_NON_REF_B_PICS_IN_QUEUE_SGI];
+
+ WORD32 ai4_qp_for_previous_scene_multi_scene[MAX_NON_REF_B_PICS_IN_QUEUE_SGI][MAX_PIC_TYPE];
+
+ WORD32 i4_prev_qp_ctr;
+
+ WORD32 i4_cur_scene_num;
+
+ WORD32 i4_non_ref_B_ctr;
+
+ float af_sum_weigh_2_pass[MAX_PIC_TYPE][3];
+
+ rc_bits_sad_t as_rc_frame_stat_store[MAX_NUM_ENC_LOOP_PARALLEL]
+ [IHEVCE_MAX_NUM_BITRATES]; //ELP_RC
+
+ WORD32 out_buf_id[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; //ELP_RC
+
+ WORD32 i4_pic_type[MAX_NUM_ENC_LOOP_PARALLEL]; //ELP_RC
+
+ WORD32 cur_qp[MAX_NUM_ENC_LOOP_PARALLEL][IHEVCE_MAX_NUM_BITRATES]; //ELP_RC
+
+ ihevce_lap_output_params_t as_lap_out[MAX_NUM_ENC_LOOP_PARALLEL]; //ELP_RC
+
+ rc_lap_out_params_t as_rc_lap_out[MAX_NUM_ENC_LOOP_PARALLEL]; //ELP_RC
+
+ WORD32 i4_complexity_bin;
+
+ WORD32 i4_last_p_or_i_frame_gop;
+
+ WORD32 i4_qp_at_I_frame_for_skip_sad;
+
+ WORD32 i4_denominator_i_to_avg;
+
+ WORD32 i4_no_more_set_rbip_for_cur_gop;
+
+ WORD32 i4_num_frm_scnd_fr_alloc;
+
+ WORD32 i4_last_disp_num_scanned;
+
+ LWORD64 i8_l1_analysis_lap_comp;
+
+ WORD32 i4_est_text_bits_ctr_get_qp; //ELP_RC
+
+ WORD32 i4_est_text_bits_ctr_update_qp; //ELP_RC
+
+ WORD32 i4_num_frame_parallel; //ELP_RC
+
+ WORD32 i4_scene_num_latest;
+
+ WORD32 i4_pre_enc_rc_delay;
+
+ /*Enable this falg to do bit allocation within a gop in
+ in second pass based on first pass data*/
+ WORD32 i4_fp_bit_alloc_in_sp;
+
+ WORD32 i4_bitrate_changed;
+
+ /* Flag which shows that capped vbr mode is enabled */
+ WORD32 i4_capped_vbr_flag;
+
+ rc_high_level_stat_t s_rc_high_lvl_stat;
+
+ WORD32 i4_normal_inter_pic;
+
+ WORD32 i4_br_id_for_2pass;
+
+ WORD32 ai4_scene_num_last_pic[MAX_PIC_TYPE];
+
+ WORD32 ai4_last_tw0_lyr0_pic_qp[2];
+} rc_context_t;
+
+/* NOTE:: Please add any new parameters accordin to the categorization as specified in the comments of */
+/* the structure definition. strat and end of the category are present in the defifnition*/
+
+#endif
diff --git a/encoder/ihevce_rdoq_macros.h b/encoder/ihevce_rdoq_macros.h
new file mode 100644
index 0000000..63ea9a3
--- /dev/null
+++ b/encoder/ihevce_rdoq_macros.h
@@ -0,0 +1,229 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevc_rdoq_macros.h
+*
+* @brief
+* Macros used for RDOQ algorthm
+*
+* @author
+* Ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+#ifndef IHEVC_RDOQ_MACROS_H_
+#define IHEVC_RDOQ_MACROS_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+/*Used for calculating the distortion in the transform domain*/
+#define CALC_SSD_IN_TRANS_DOMAIN(a, b, i4_round_val, i4_shift_val) \
+ (SHR_NEG(((((a) - (b)) * ((a) - (b))) + i4_round_val), i4_shift_val))
+#define CALC_CUMMUL_SSD_IN_TRANS_DOMAIN(a, b, i4_round_val, i4_shift_val) \
+ (SHR_NEG((((a) - (b)) + i4_round_val), i4_shift_val))
+
+#define MAX_INT 0x7FFFFFFF
+
+#define COMPUTE_RATE_COST_CLIP30_RDOQ(r, l, qshift) \
+ ((WORD32)CLIP30((((ULWORD64)r) * ((ULWORD64)l)) >> (qshift)))
+
+/*This macro is required to test the RDOQ changes*/
+/*1 implies cabac context validation using the test-bench*/
+/*Also prints some debug information*/
+#define TEST_BENCH_RDOQ 0
+
+/*Macro to enable and disable coefficient RDOQ. When 1, coefficient RDOQ is enabled*/
+#define COEFF_RDOQ 0
+
+/*Macro to optimize the copying of cabac states across various temp/scratch cabac contexts
+ Should always be 0 when COEFF_RDOQ is 1*/
+#define OPT_MEMCPY 1
+
+/** Macro which accounts subtracts 4096 bits from the total bits generated per TU in the RDOPT stage
+ if SBH is on*/
+#define ACCOUNT_SIGN_BITS 0
+
+/*Macro defining the maximum number of context elements in the cabac state*/
+//#define MAX_NUM_CONTEXT_ELEMENTS 5
+
+/*****************************************************************************/
+/* Enum */
+/*****************************************************************************/
+/*Enum to indicate which context element in the cabac state is currently being altered*/
+typedef enum
+{
+ LASTXY,
+ SUB_BLK_CODED_FLAG,
+ SIG_COEFF,
+ GRTR_THAN_1,
+ GRTR_THAN_2,
+ MAX_NUM_CONTEXT_ELEMENTS
+} BACKUP_CTXT_ELEMENTS;
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/*Structure defined to optimize copying of cabac states across various temporary/scratch cabac states*/
+typedef struct
+{
+ // clang-format off
+ /**
+ ai4_ctxt_to_backup[x] tells us if xth element has been altered. where
+ x context element Meaning
+ 0 IHEVC_CAB_COEFFX_PREFIX lastx last y has been coded
+ 1 IHEVC_CAB_CODED_SUBLK_IDX sub-blk coded or not flag has been coded
+ 2 IHEVC_CAB_COEFF_FLAG sigcoeff has been coded
+ 3 IHEVC_CAB_COEFABS_GRTR1_FLAG greater than 1 bin has been coded
+ 4 IHEVC_CAB_COEFABS_GRTR2_FLAG greater than 2 bin has been coded
+ */
+ // clang-format on
+ UWORD8 au1_ctxt_to_backup[MAX_NUM_CONTEXT_ELEMENTS];
+
+ /** Number of bits generated */
+ WORD32 i4_num_bits;
+} backup_ctxt_t;
+
+/**
+******************************************************************************
+* @brief Structure to store the position of the coefficient to be changed
+ through SBH
+******************************************************************************
+ */
+typedef struct
+{
+ UWORD8 x;
+ UWORD8 y;
+ UWORD8 is_valid_pos;
+ WORD16 i2_old_coeff;
+} s_sbh_coeff_pos_t;
+
+/**
+******************************************************************************
+ * @brief RDOQ SBH context for cabac bit estimation etc
+******************************************************************************
+ */
+
+typedef struct
+{
+ /** TU size */
+ WORD32 i4_trans_size;
+
+ /** Log 2 TU size */
+ WORD32 i4_log2_trans_size;
+
+ /**
+ * Boolean value representing if the current TU is luma or not.
+ * 1 => Luma
+ */
+ WORD32 i4_is_luma;
+
+ /**
+ * Calculate rounding and shifting values required for normalizing original
+ * and inverse quantized transform coefficients (for calculation of SSD in
+ * transform domain)
+ */
+ WORD32 i4_round_val_ssd_in_td;
+ WORD32 i4_shift_val_ssd_in_td;
+
+ /** Matrix used in inverse quantization */
+ WORD32 quant_scale_mat_offset;
+
+ /** Index of the csb within the TU*/
+ WORD32 i4_trans_idx;
+
+ /** value of lambda used in the D+Rlambda metric*/
+ LWORD64 i8_cl_ssd_lambda_qf;
+
+ /** Used while inverse quantizing*/
+ WORD16 i2_qp_rem;
+ WORD32 i4_qp_div;
+
+ /** Scan index of the csbs within the TU */
+ WORD32 i4_scan_idx;
+
+ /** Pointer to the csbf buf. This buffer will contain 1 if the csb is coded
+ * and 0 if it is not*/
+ UWORD8 *pu1_csbf_buf;
+
+ /** Boolean value which is 1 if any of the csbs in the current TU are
+ * coded*/
+ UWORD8 i1_tu_is_coded;
+
+ /**
+ * Pointer to an array of pointer to store the scaling matrices for
+ * all transform sizes and qp % 6 (pre computed)
+ */
+ WORD16 *pi2_dequant_coeff;
+
+ /** Pointer to the quantized coeffs*/
+ WORD16 *pi2_quant_coeffs;
+
+ /** Pointer to the inverse quantized values*/
+ WORD16 *pi2_iquant_coeffs;
+
+ /** Pointer ot the transformed values(before quantization) */
+ WORD16 *pi2_trans_values;
+
+ /** Stride of the inverse quant data*/
+ WORD32 i4_iq_data_strd;
+
+ /** Stride of the quant data*/
+ WORD32 i4_q_data_strd;
+
+ /** Intermediate array to store transform output for RDOQ*/
+ WORD16 ai2_trans_values[MAX_TRANS_SIZE];
+
+ /** Pointer to zero rows and zero cols*/
+ WORD32 *pi4_zero_row;
+ WORD32 *pi4_zero_col;
+
+ /** Array containing information about the position of the coefficient
+ * to be altered during SBH
+ */
+ s_sbh_coeff_pos_t s_best_pos[(MAX_TU_SIZE * MAX_TU_SIZE / 4 / 4) + 1];
+
+ /** SSD cost for this particular TU*/
+ LWORD64 i8_ssd_cost;
+
+ WORD32 i4_perform_all_cand_rdoq;
+ WORD32 i4_perform_best_cand_rdoq;
+ WORD32 i4_perform_all_cand_sbh;
+ WORD32 i4_perform_best_cand_sbh;
+
+ WORD32 i4_bit_depth;
+
+ WORD32 *pi4_subBlock2csbfId_map;
+
+} rdoq_sbh_ctxt_t;
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+void ihevce_sign_data_hiding(rdoq_sbh_ctxt_t *ps_rdoq_sbh_params);
+
+#endif /* IHEVC_RDOQ_MACROS_H_ */
diff --git a/encoder/ihevce_recur_bracketing.c b/encoder/ihevce_recur_bracketing.c
new file mode 100644
index 0000000..9438791
--- /dev/null
+++ b/encoder/ihevce_recur_bracketing.c
@@ -0,0 +1,3440 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+******************************************************************************
+* \file ihevce_recur_bracketing.c
+*
+* \brief
+* This file contains interface functions of recursive bracketing
+* module
+* \date
+* 12/02/2012
+*
+* \author
+* Ittiam
+*
+* List of Functions
+*
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_ipe_instr_set_router.h"
+#include "ihevce_ipe_structs.h"
+#include "ihevce_ipe_pass.h"
+#include "ihevce_recur_bracketing.h"
+#include "ihevce_nbr_avail.h"
+#include "ihevc_common_tables.h"
+#include "ihevce_decomp_pre_intra_structs.h"
+#include "ihevce_decomp_pre_intra_pass.h"
+
+#include "cast_types.h"
+#include "osal.h"
+#include "osal_defaults.h"
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define IP_DBG_L1_l2 0
+#define CHILD_BIAS 12
+
+/*****************************************************************************/
+/* Globals */
+/*****************************************************************************/
+extern pf_intra_pred g_apf_lum_ip[10];
+
+extern WORD32 g_i4_ip_funcs[MAX_NUM_IP_MODES];
+
+UWORD8 gau1_cu_pos_x[64] = { 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7,
+ 6, 7, 4, 5, 4, 5, 6, 7, 6, 7, 0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1,
+ 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 4, 5, 4, 5, 6, 7, 6, 7 };
+
+UWORD8 gau1_cu_pos_y[64] = { 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3, 0, 0, 1, 1, 0, 0,
+ 1, 1, 2, 2, 3, 3, 2, 2, 3, 3, 4, 4, 5, 5, 4, 4, 5, 5, 6, 6, 7, 7,
+ 6, 6, 7, 7, 4, 4, 5, 5, 4, 4, 5, 5, 6, 6, 7, 7, 6, 6, 7, 7 };
+
+#define RESET_BIT(x, bit) (x = x & ~((WORD32)1 << bit))
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_update_cand_list \endif
+*
+* \brief
+* Final Candidate list population, nbr flag andd nbr mode update function
+*
+* \param[in] ps_row_cu : pointer to cu analyse struct
+* \param[in] ps_cu_node : pointer to cu node info buffer
+* \param[in] ps_ed_blk_l1 : pointer to level 1 and 2 decision buffer
+* \param[in] pu1_cand_mode_list : pointer to candidate list buffer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_update_cand_list(
+ ihevce_ipe_cu_tree_t *ps_cu_node, ihevce_ed_blk_t *ps_ed_blk_l1, ihevce_ipe_ctxt_t *ps_ctxt)
+{
+ WORD32 row, col, x, y, size;
+
+ /* Candidate mode Update */
+ (void)ps_ed_blk_l1;
+ /* Update CTB mode map for the finalised CU */
+ x = ((ps_cu_node->u2_x0 << 3) >> 2) + 1;
+ y = ((ps_cu_node->u2_y0 << 3) >> 2) + 1;
+ size = ps_cu_node->u1_cu_size >> 2;
+ for(row = y; row < (y + size); row++)
+ {
+ for(col = x; col < (x + size); col++)
+ {
+ ps_ctxt->au1_ctb_mode_map[row][col] = ps_cu_node->best_mode;
+ }
+ }
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_intra_populate_mode_bits_cost_bracketing \endif
+*
+* \brief
+* Mpm indx calc function based on left and top available modes
+*
+* \param[in] top_intra_mode : Top available intra mode
+* \param[in] left_intra_mode : Left available intra mode
+* \param[in] available_top : Top availability flag
+* \param[in] available_left : Left availability flag
+* \param[in] cu_pos_y : cu position wrt to CTB
+* \param[in] mode_bits_cost : pointer to mode bits buffer
+* \param[in] lambda : Lambda value (SAD/SATD)
+* \param[in] cand_mode_list : pointer to candidate list buffer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_intra_populate_mode_bits_cost_bracketing(
+ WORD32 top_intra_mode,
+ WORD32 left_intra_mode,
+ WORD32 available_top,
+ WORD32 available_left,
+ WORD32 cu_pos_y,
+ UWORD16 *mode_bits_cost,
+ UWORD16 *mode_bits,
+ WORD32 lambda,
+ WORD32 *cand_mode_list)
+{
+ /* local variables */
+ WORD32 i;
+ WORD32 cand_intra_pred_mode_left, cand_intra_pred_mode_top;
+
+ UWORD16 one_bits_cost =
+ COMPUTE_RATE_COST_CLIP30(4, lambda, (LAMBDA_Q_SHIFT + 1)); //1.5 * lambda
+ UWORD16 two_bits_cost =
+ COMPUTE_RATE_COST_CLIP30(6, lambda, (LAMBDA_Q_SHIFT + 1)); //2.5 * lambda
+ UWORD16 five_bits_cost =
+ COMPUTE_RATE_COST_CLIP30(12, lambda, (LAMBDA_Q_SHIFT + 1)); //5.5 * lambda
+
+ for(i = 0; i < 35; i++)
+ {
+ mode_bits_cost[i] = five_bits_cost;
+ mode_bits[i] = 5;
+ }
+
+ /* EIID: set availability flag to zero if modes are invalid.
+ Required since some CU's might be skipped (though available)
+ and their modes will be set to 255 (-1)*/
+ if(35 < top_intra_mode || 0 > top_intra_mode)
+ available_top = 0;
+ if(35 < left_intra_mode || 0 > left_intra_mode)
+ available_left = 0;
+
+ /* Calculate cand_intra_pred_mode_N as per sec. 8.4.2 in JCTVC-J1003_d7 */
+ /* N = top */
+ if(0 == available_top)
+ {
+ cand_intra_pred_mode_top = INTRA_DC;
+ }
+ /* for neighbour != INTRA, setting DC is done outside */
+ else if(0 == cu_pos_y) /* It's on the CTB boundary */
+ {
+ cand_intra_pred_mode_top = INTRA_DC;
+ }
+ else
+ {
+ cand_intra_pred_mode_top = top_intra_mode;
+ }
+
+ /* N = left */
+ if(0 == available_left)
+ {
+ cand_intra_pred_mode_left = INTRA_DC;
+ //cand_intra_pred_mode_left = cand_intra_pred_mode_top;
+ }
+ /* for neighbour != INTRA, setting DC is done outside */
+ else
+ {
+ cand_intra_pred_mode_left = left_intra_mode;
+ }
+
+ /* Calculate cand_mode_list as per sec. 8.4.2 in JCTVC-J1003_d7 */
+ if(cand_intra_pred_mode_left == cand_intra_pred_mode_top)
+ {
+ if(cand_intra_pred_mode_left < 2)
+ {
+ cand_mode_list[0] = INTRA_PLANAR;
+ cand_mode_list[1] = INTRA_DC;
+ cand_mode_list[2] = INTRA_ANGULAR(26); /* angular 26 = Vertical */
+ }
+ else
+ {
+ cand_mode_list[0] = cand_intra_pred_mode_left;
+ cand_mode_list[1] = 2 + ((cand_intra_pred_mode_left + 29) % 32);
+ cand_mode_list[2] = 2 + ((cand_intra_pred_mode_left - 2 + 1) % 32);
+ }
+ }
+ else
+ {
+ if(0 == available_left)
+ {
+ cand_mode_list[0] = cand_intra_pred_mode_top;
+ cand_mode_list[1] = cand_intra_pred_mode_left;
+ }
+ else
+ {
+ cand_mode_list[0] = cand_intra_pred_mode_left;
+ cand_mode_list[1] = cand_intra_pred_mode_top;
+ }
+ if((cand_intra_pred_mode_left != INTRA_PLANAR) &&
+ (cand_intra_pred_mode_top != INTRA_PLANAR))
+ {
+ cand_mode_list[2] = INTRA_PLANAR;
+ }
+ else if((cand_intra_pred_mode_left != INTRA_DC) && (cand_intra_pred_mode_top != INTRA_DC))
+ {
+ cand_mode_list[2] = INTRA_DC;
+ }
+ else
+ {
+ cand_mode_list[2] = INTRA_ANGULAR(26);
+ }
+ }
+ mode_bits_cost[cand_mode_list[0]] = one_bits_cost;
+ mode_bits_cost[cand_mode_list[1]] = two_bits_cost;
+ mode_bits_cost[cand_mode_list[2]] = two_bits_cost;
+
+ mode_bits[cand_mode_list[0]] = 2;
+ mode_bits[cand_mode_list[1]] = 3;
+ mode_bits[cand_mode_list[2]] = 3;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_pu_calc_4x4_blk \endif
+*
+* \brief
+* 4x4 pu (8x8 CU) mode decision using step 8421 method
+*
+* \param[in] ps_cu_node : pointer to cu node info buffer
+* \param[in] pu1_src : pointer to src pixels
+* \param[in] src_stride : frm source stride
+* \param[in] ref : pointer to reference pixels for prediction
+* \param[in] cand_mode_list : pointer to candidate list buffer
+* \param[in] best_costs_4x4 : pointer to 3 best cost buffer
+* \param[in] best_modes_4x4 : pointer to 3 best mode buffer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_pu_calc_4x4_blk(
+ ihevce_ipe_ctxt_t *ps_ctxt,
+ ihevce_ipe_cu_tree_t *ps_cu_node,
+ UWORD8 *pu1_src,
+ WORD32 src_stride,
+ UWORD8 *ref,
+ UWORD16 *mode_bits_cost,
+ WORD32 *best_costs_4x4,
+ UWORD8 *best_modes_4x4,
+ func_selector_t *ps_func_selector)
+{
+ WORD16 *pi2_trans_tmp = ps_ctxt->pi2_trans_tmp;
+ WORD16 *pi2_trans_out = ps_ctxt->pi2_trans_out;
+ UWORD8 u1_use_satd = ps_ctxt->u1_use_satd;
+ UWORD8 u1_level_1_refine_on = ps_ctxt->u1_level_1_refine_on;
+
+ WORD32 i, j = 0, i_end;
+ UWORD8 mode, best_amode = 255;
+ UWORD8 pred[16];
+
+ UWORD16 sad;
+ WORD32 sad_cost = 0;
+ WORD32 best_asad_cost = 0xFFFFF;
+ WORD32 temp;
+ UWORD8 modes_to_eval[5];
+ WORD32 costs_4x4[5];
+ UWORD8 modes_4x4[5] = { 0, 1, 2, 3, 4 };
+
+ /* LO resolution hence low resolution disable */
+ WORD32 u1_low_resol = 0;
+ UWORD8 au1_best_modes[1] = { 0 };
+ WORD32 ai4_best_sad_costs[1] = { 0 };
+
+ WORD16 *pi2_tmp = &pi2_trans_tmp[0];
+
+ ihevce_ipe_optimised_function_list_t *ps_ipe_optimised_function_list =
+ &ps_ctxt->s_ipe_optimised_function_list;
+
+ //apf_resd_trns[0] = &ihevc_resi_trans_4x4_ttype1;
+ //apf_resd_trns[0] = &ihevc_HAD_4x4_8bit;
+
+ for(i = 0; i < 5; i++)
+ {
+ costs_4x4[i] = MAX_INTRA_COST_IPE;
+ }
+
+ ps_ipe_optimised_function_list->pf_ed_4x4_find_best_modes(
+ pu1_src,
+ src_stride,
+ ref,
+ mode_bits_cost,
+ au1_best_modes,
+ ai4_best_sad_costs,
+ u1_low_resol,
+ ps_ipe_optimised_function_list->pf_4x4_sad_computer);
+
+ best_amode = au1_best_modes[0];
+ best_asad_cost = ai4_best_sad_costs[0];
+
+ ASSERT(best_amode != 255);
+ /* Around best level 4 angular mode, search for best level 2 mode */
+ modes_to_eval[0] = best_amode - 2;
+ modes_to_eval[1] = best_amode + 2;
+ i = 0;
+ i_end = 2;
+ if(best_amode == 2)
+ i = 1;
+ else if(best_amode == 34)
+ i_end = 1;
+ for(; i < i_end; i++)
+ {
+ mode = modes_to_eval[i];
+
+ g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode);
+
+ sad = ps_ipe_optimised_function_list->pf_4x4_sad_computer(pu1_src, &pred[0], src_stride, 4);
+
+ sad_cost = sad;
+ sad_cost += mode_bits_cost[mode];
+
+ if(sad_cost < best_asad_cost)
+ {
+ best_amode = mode;
+ best_asad_cost = sad_cost;
+ }
+ }
+
+ /* Around best level 2 angular mode, search for best level 1 mode */
+ /* Also evaluate for non-angular mode */
+
+ i = 0;
+ /*Level 1 refinement is disabled for ES preset */
+ if(1 == u1_level_1_refine_on)
+ {
+ if(best_amode != 2)
+ modes_to_eval[i++] = best_amode - 1;
+ modes_to_eval[i++] = best_amode;
+ }
+
+ modes_to_eval[i++] = 0;
+ modes_to_eval[i++] = 1;
+
+ if(1 == u1_level_1_refine_on)
+ {
+ if(best_amode != 34)
+ modes_to_eval[i++] = best_amode + 1;
+ }
+ i_end = i;
+ i = 0;
+
+ for(; i < i_end; i++)
+ {
+ mode = modes_to_eval[i];
+
+ g_apf_lum_ip[g_i4_ip_funcs[mode]](&ref[0], 0, &pred[0], 4, 4, mode);
+
+ /* Hard coding to use SATD */
+ if(u1_use_satd)
+ {
+ ps_func_selector->ihevc_resi_trans_4x4_ttype1_fptr(
+ pu1_src, &pred[0], (WORD32 *)pi2_tmp, pi2_trans_out, src_stride, 4, (4 << 16) | 0);
+
+ sad = ihevce_ipe_pass_satd(pi2_trans_out, 4, 4);
+ }
+ else
+ {
+ sad = ps_ipe_optimised_function_list->pf_4x4_sad_computer(
+ pu1_src, &pred[0], src_stride, 4);
+ }
+ sad_cost = sad;
+ sad_cost += mode_bits_cost[mode];
+
+ costs_4x4[i] = sad_cost;
+ }
+
+ /* Arrange the reference array in ascending order */
+ for(i = 0; i < (i_end - 1); i++)
+ {
+ for(j = i + 1; j < i_end; j++)
+ {
+ if(costs_4x4[i] > costs_4x4[j])
+ {
+ temp = costs_4x4[i];
+ costs_4x4[i] = costs_4x4[j];
+ costs_4x4[j] = temp;
+
+ temp = modes_4x4[i];
+ modes_4x4[i] = modes_4x4[j];
+ modes_4x4[j] = temp;
+ }
+ }
+ }
+ for(i = 0; i < 3; i++)
+ {
+ best_costs_4x4[i] = costs_4x4[i];
+ best_modes_4x4[i] = modes_to_eval[modes_4x4[i]];
+ }
+
+ {
+ ps_cu_node->best_mode = best_modes_4x4[0];
+ ps_cu_node->best_cost = best_costs_4x4[0];
+ ps_cu_node->best_satd = best_costs_4x4[0] - mode_bits_cost[ps_cu_node->best_mode];
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_pu_calc_8x8_blk \endif
+*
+* \brief
+* 4x4 pu (8x8 CU) mode decision loop using step 8421 method
+*
+* \param[in] ps_curr_src : pointer to src pixels struct
+* \param[in] ps_ctxt : pointer to IPE context struct
+* \param[in] ps_cu_node : pointer to cu node info buffer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_pu_calc_8x8_blk(
+ iv_enc_yuv_buf_t *ps_curr_src,
+ ihevce_ipe_ctxt_t *ps_ctxt,
+ ihevce_ipe_cu_tree_t *ps_cu_node,
+ func_selector_t *ps_func_selector)
+{
+ WORD32 i, j;
+ WORD32 nbr_flags;
+ nbr_avail_flags_t s_nbr;
+ WORD32 trans_size = ps_cu_node->ps_parent->u1_cu_size >> 1;
+
+ UWORD8 *pu1_src_4x4;
+ WORD32 xA, xB, yA, yB;
+ //WORD32 x, y, size;
+ WORD32 top_intra_mode;
+ WORD32 left_intra_mode;
+ // WORD8 *top_intra_mode_ptr;
+ // WORD8 *left_intra_mode_ptr;
+ UWORD8 *pu1_orig;
+ WORD32 src_strd = ps_curr_src->i4_y_strd;
+
+ WORD32 cu_pos_x = ps_cu_node->ps_parent->u2_x0 << 1;
+ WORD32 cu_pos_y = ps_cu_node->ps_parent->u2_y0 << 1;
+ ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
+
+ ihevc_intra_pred_luma_ref_substitution_fptr =
+ ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
+
+ pu1_orig = (UWORD8 *)(ps_curr_src->pv_y_buf) +
+ ((ps_cu_node->ps_parent->u2_y0 << 3) * src_strd) +
+ (ps_cu_node->ps_parent->u2_x0 << 3);
+ for(i = 0; i < 2; i++)
+ {
+ for(j = 0; j < 2; j++)
+ {
+ WORD32 cand_mode_list[3];
+ pu1_src_4x4 = pu1_orig + (i * trans_size * src_strd) + (j * trans_size);
+ /* get the neighbour availability flags */
+ nbr_flags = ihevce_get_nbr_intra(
+ &s_nbr,
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ cu_pos_x + ((j) * (trans_size >> 2)),
+ cu_pos_y + ((i) * (trans_size >> 2)),
+ trans_size >> 2);
+
+ /* call the function which populates sad cost for all the modes */
+ xA = ((ps_cu_node->ps_parent->u2_x0 << 3) >> 2) + j;
+ yA = ((ps_cu_node->ps_parent->u2_y0 << 3) >> 2) + 1 + i;
+ xB = xA + 1;
+ yB = yA - 1;
+ left_intra_mode = ps_ctxt->au1_ctb_mode_map[yA][xA];
+ top_intra_mode = ps_ctxt->au1_ctb_mode_map[yB][xB];
+
+ ihevce_intra_populate_mode_bits_cost_bracketing(
+ top_intra_mode,
+ left_intra_mode,
+ s_nbr.u1_top_avail,
+ s_nbr.u1_left_avail,
+ ps_cu_node->ps_parent->u2_y0,
+ &ps_ctxt->au2_mode_bits_cost_8x8pu[i * 2 + j][0],
+ &ps_ctxt->au2_mode_bits_8x8_pu[0],
+ ps_ctxt->i4_ol_sad_lambda,
+ cand_mode_list);
+
+ /* call the function which populates ref data for intra predicion */
+ ihevc_intra_pred_luma_ref_substitution_fptr(
+ pu1_src_4x4 - src_strd - 1,
+ pu1_src_4x4 - src_strd,
+ pu1_src_4x4 - 1,
+ src_strd,
+ 4,
+ nbr_flags,
+ &ps_ctxt->au1_ref_8x8pu[i * 2 + j][0],
+ 0);
+
+ ihevce_pu_calc_4x4_blk(
+ ps_ctxt,
+ ps_cu_node->ps_sub_cu[(i * 2) + j],
+ pu1_src_4x4,
+ src_strd,
+ &ps_ctxt->au1_ref_8x8pu[i * 2 + j][0],
+ &ps_ctxt->au2_mode_bits_cost_8x8pu[i * 2 + j][0],
+ &ps_cu_node->ps_sub_cu[(i * 2) + j]->au4_best_cost_1tu[0],
+ &ps_cu_node->ps_sub_cu[(i * 2) + j]->au1_best_mode_1tu[0],
+ ps_func_selector);
+
+ /*&au4_cost_4x4[i*2 + j][0],
+ &au1_modes_4x4[i*2 + j][0]);*/ //TTODO : mode will change for the four partition
+
+ ihevce_set_nbr_map(
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ cu_pos_x + ((j) * (trans_size >> 2)),
+ cu_pos_y + ((i) * (trans_size >> 2)),
+ (trans_size >> 2),
+ 1);
+
+ xA = ((ps_cu_node->ps_parent->u2_x0 << 3) >> 2) + 1 + j;
+ yA = ((ps_cu_node->ps_parent->u2_y0 << 3) >> 2) + 1 + i;
+ ps_ctxt->au1_ctb_mode_map[yA][xA] = ps_cu_node->ps_sub_cu[i * 2 + j]->best_mode;
+ ps_cu_node->ps_sub_cu[i * 2 + j]->u2_mode_bits_cost =
+ ps_ctxt->au2_mode_bits_8x8_pu[ps_cu_node->ps_sub_cu[i * 2 + j]->best_mode];
+ }
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_bracketing_analysis \endif
+*
+* \brief
+* Interface function that evaluates MAX cu and MAX - 1 cu, with MAX cu size
+* info decided coarse resolution mode decision. Compares the SATD/SAD cost btwn
+* 2 CUS and determines the actual CU size and best 3 modes to be given to rdopt
+*
+* \param[in] ps_ctxt : pointer to IPE context struct
+* \param[in] ps_cu_node : pointer to cu node info buffer
+* \param[in] ps_curr_src : pointer to src pixels struct
+* \param[in] ps_ctb_out : pointer to ip ctb out struct
+* \param[in] ps_row_cu : pointer to cu analyse struct
+* \param[in] ps_ed_l1_ctb : pointer to level 1 early deci struct
+* \param[in] ps_ed_l2_ctb : pointer to level 2 early deci struct
+* \param[in] ps_l0_ipe_out_ctb : pointer to ipe_l0_ctb_analyse_for_me_t struct
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_bracketing_analysis(
+ ihevce_ipe_ctxt_t *ps_ctxt,
+ ihevce_ipe_cu_tree_t *ps_cu_node,
+ iv_enc_yuv_buf_t *ps_curr_src,
+ ctb_analyse_t *ps_ctb_out,
+ //cu_analyse_t *ps_row_cu,
+ ihevce_ed_blk_t *ps_ed_l1_ctb,
+ ihevce_ed_blk_t *ps_ed_l2_ctb,
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1,
+ ipe_l0_ctb_analyse_for_me_t *ps_l0_ipe_out_ctb)
+{
+ WORD32 cu_pos_x = 0;
+ WORD32 cu_pos_y = 0;
+
+ UWORD8 u1_curr_ctb_wdt = ps_cu_node->u1_width;
+ UWORD8 u1_curr_ctb_hgt = ps_cu_node->u1_height;
+ WORD32 num_8x8_blks_x = (u1_curr_ctb_wdt >> 3);
+ WORD32 num_8x8_blks_y = (u1_curr_ctb_hgt >> 3);
+
+ ihevce_ed_blk_t *ps_ed_blk_l1 = ps_ed_l1_ctb;
+ ihevce_ed_blk_t *ps_ed_blk_l2 = ps_ed_l2_ctb;
+
+ WORD32 i;
+ WORD32 cand_mode_list[3];
+ //cu_analyse_t *ps_curr_cu = ps_row_cu;
+ WORD32 blk_cnt = 0;
+ WORD32 j = 0;
+ WORD32 merge_32x32_l1, merge_32x32_l2;
+
+ WORD32 i4_skip_intra_eval_32x32_l1;
+ //EIID: flag indicating number of 16x16 blocks to be skipped for intra evaluation within 32x32 block
+
+ WORD32 parent_cost = 0;
+ WORD32 child_cost[4] = { 0 };
+ WORD32 child_cost_least = 0;
+ WORD32 child_satd[4] = { 0 };
+ WORD32 x, y, size;
+ WORD32 merge_64x64 = 1;
+ UWORD8 au1_best_32x32_modes[4];
+ WORD32 au4_best_32x32_cost[4];
+ WORD32 parent_best_mode;
+ UWORD8 best_mode;
+
+ WORD32 i4_quality_preset = ps_ctxt->i4_quality_preset;
+ /* flag to control 1CU-4TU modes based on quality preset */
+ /* if set 1CU-4TU are explicity evaluated else 1CU-1TU modes are copied */
+ WORD32 i4_enable_1cu_4tu = (i4_quality_preset == IHEVCE_QUALITY_P2) ||
+ (i4_quality_preset == IHEVCE_QUALITY_P0);
+
+ /* flag to control 4CU-16TU mode based on quality preset */
+ /* if set 4CU-16TU are explicity evaluated else 4CU-4TU modes are copied*/
+ WORD32 i4_enable_4cu_16tu = (i4_quality_preset == IHEVCE_QUALITY_P2) ||
+ (i4_quality_preset == IHEVCE_QUALITY_P0);
+
+ WORD32 i4_mod_factor_num, i4_mod_factor_den = QP_MOD_FACTOR_DEN; //2;
+ float f_strength;
+ /* Accumalte satd */
+ LWORD64 i8_frame_acc_satd_cost = 0, i8_frame_acc_satd_by_modqp_q10 = 0;
+ WORD32 i4_ctb_acc_satd = 0;
+
+ /* Accumalate Mode bits cost */
+ LWORD64 i8_frame_acc_mode_bits_cost = 0;
+
+ /* Step2 is bypassed for parent, uses children modes*/
+ WORD32 step2_bypass = 1;
+
+ if(1 == ps_ctxt->u1_disable_child_cu_decide)
+ step2_bypass = 0;
+
+ ps_cu_node->ps_parent = ps_ctxt->ps_ipe_cu_tree;
+ for(i = 0; i < 4; i++)
+ {
+ ps_cu_node->ps_sub_cu[i] = ps_ctxt->ps_ipe_cu_tree + 1 + i;
+ }
+
+ /* Loop for all 8x8 block in a CTB */
+ ps_ctb_out->u4_cu_split_flags = 0x1;
+
+ /* Initialize intra 64x64, 32x32 and 16x16 costs to max value */
+ for(i = 0; i < (MAX_CU_IN_CTB >> 4); i++)
+ {
+ ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[i] = MAX_INTRA_COST_IPE;
+ }
+
+ for(i = 0; i < (MAX_CU_IN_CTB >> 2); i++)
+ {
+ ps_l0_ipe_out_ctb->ai4_best16x16_intra_cost[i] = MAX_INTRA_COST_IPE;
+ }
+
+ for(i = 0; i < (MAX_CU_IN_CTB); i++)
+ {
+ ps_l0_ipe_out_ctb->ai4_best8x8_intra_cost[i] = MAX_INTRA_COST_IPE;
+ }
+
+ ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = MAX_INTRA_COST_IPE;
+
+ /* by default 64x64 modes are set to default values DC and Planar */
+ ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[0] = 0;
+ ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[1] = 1;
+ ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[2] = 255;
+
+ /* by default 64x4 split is set to 1 */
+ ps_l0_ipe_out_ctb->u1_split_flag = 1;
+
+ /* Modulation factor calculated based on spatial variance instead of hardcoded val*/
+ i4_mod_factor_num = ps_ctxt->ai4_mod_factor_derived_by_variance[1]; //16;
+
+ f_strength = ps_ctxt->f_strength;
+
+ /* ------------------------------------------------ */
+ /* populate the early decisions done by L1 analysis */
+ /* ------------------------------------------------ */
+ {
+ ihevce_ed_blk_t *ps_ed_blk_l1_curr = ps_ed_l1_ctb;
+ WORD32 ctr_8x8;
+ WORD8 *pi1_ed_buf;
+
+ /* set all the decisions to invalid */
+ memset(
+ &ps_l0_ipe_out_ctb->ai1_early_intra_inter_decision[0],
+ 0,
+ sizeof(UWORD8) * MAX_CU_IN_CTB);
+
+ pi1_ed_buf = &ps_l0_ipe_out_ctb->ai1_early_intra_inter_decision[0];
+
+ for(ctr_8x8 = 0; ctr_8x8 < MAX_CTB_SIZE; ctr_8x8++)
+ {
+ WORD32 pos_x_8x8, pos_y_8x8;
+
+ pos_x_8x8 = gau1_cu_pos_x[ctr_8x8];
+ pos_y_8x8 = gau1_cu_pos_y[ctr_8x8];
+
+ pi1_ed_buf[pos_x_8x8 + (pos_y_8x8 * MAX_CU_IN_CTB_ROW)] =
+ ps_ed_blk_l1_curr->intra_or_inter;
+ ps_ed_blk_l1_curr++;
+ }
+
+ for(ctr_8x8 = 0; ctr_8x8 < (MAX_CU_IN_CTB >> 2); ctr_8x8++)
+ {
+ ps_l0_ipe_out_ctb->ai4_best_sad_8x8_l1_ipe[ctr_8x8] =
+ ps_ed_ctb_l1->i4_best_sad_8x8_l1_ipe[ctr_8x8];
+
+ ps_l0_ipe_out_ctb->ai4_best_sad_cost_8x8_l1_ipe[ctr_8x8] =
+ ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_ipe[ctr_8x8];
+
+ /*Earlier only me sad was getting populated, now best of ipe and me is populated*/
+ ps_l0_ipe_out_ctb->ai4_best_sad_8x8_l1_me[ctr_8x8] =
+ ps_ed_ctb_l1->i4_best_sad_8x8_l1_me[ctr_8x8];
+ //ps_ed_ctb_l1->i4_sad_me_for_ref[ctr_8x8];
+
+ ps_l0_ipe_out_ctb->ai4_best_sad_cost_8x8_l1_me[ctr_8x8] =
+ ps_ed_ctb_l1->i4_best_sad_cost_8x8_l1_me[ctr_8x8];
+ //ps_ed_ctb_l1->i4_sad_cost_me_for_ref[ctr_8x8];
+ }
+
+ /*Init CTB level accumalated SATD and MPM bits */
+ ps_l0_ipe_out_ctb->i4_ctb_acc_satd = 0;
+ ps_l0_ipe_out_ctb->i4_ctb_acc_mpm_bits = 0;
+ }
+
+ /* ------------------------------------------------ */
+ /* Loop over all the blocks in current CTB */
+ /* ------------------------------------------------ */
+
+ {
+ /* 64 8x8 blocks should be encountered for the do,while loop to exit */
+ do
+ {
+ intra32_analyse_t *ps_intra32_analyse;
+ intra16_analyse_t *ps_intra16_analyse;
+ WORD32 *pi4_intra_32_cost;
+ WORD32 *pi4_intra_16_cost;
+ WORD32 *pi4_intra_8_cost;
+ WORD32 merge_16x16_l1;
+
+ /* Given the blk_cnt, get the CU's top-left 8x8 block's x and y positions within the CTB */
+ cu_pos_x = gau1_cu_pos_x[blk_cnt];
+ cu_pos_y = gau1_cu_pos_y[blk_cnt];
+
+ /* default value for 32x32 best mode - blk_cnt increases by 16 for each 32x32 */
+ au1_best_32x32_modes[blk_cnt >> 4] = 255;
+
+ /* get the corresponding intra 32 analyse pointer use (blk_cnt / 16) */
+ /* blk cnt is in terms of 8x8 units so a 32x32 will have 16 8x8 units */
+ ps_intra32_analyse = &ps_l0_ipe_out_ctb->as_intra32_analyse[blk_cnt >> 4];
+
+ /* get the corresponding intra 16 analyse pointer use (blk_cnt & 0xF / 4)*/
+ /* blk cnt is in terms of 8x8 units so a 16x16 will have 4 8x8 units */
+ ps_intra16_analyse = &ps_intra32_analyse->as_intra16_analyse[(blk_cnt & 0xF) >> 2];
+
+ /* Line below assumes min_cu_size of 8 - checks whether CU starts are within picture */
+ if((cu_pos_x < num_8x8_blks_x) && (cu_pos_y < num_8x8_blks_y))
+ {
+ /* Reset to zero for every cu decision */
+ merge_32x32_l1 = 0;
+
+ child_cost_least = 0;
+
+ /* At L2, each 4x4 corresponds to 16x16 at L0. Every 4 16x16 stores a merge_success flag */
+ ps_ed_blk_l2 = ps_ed_l2_ctb + (blk_cnt >> 2);
+
+ pi4_intra_32_cost = &ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[blk_cnt >> 4];
+
+ /* by default 32x32 modes are set to default values DC and Planar */
+ ps_intra32_analyse->au1_best_modes_32x32_tu[0] = 0;
+ ps_intra32_analyse->au1_best_modes_32x32_tu[1] = 1;
+ ps_intra32_analyse->au1_best_modes_32x32_tu[2] = 255;
+
+ /* By default 32x32 split is set to 1 */
+ ps_intra32_analyse->b1_split_flag = 1;
+
+ ps_intra32_analyse->au1_best_modes_16x16_tu[0] = 0;
+ ps_intra32_analyse->au1_best_modes_16x16_tu[1] = 1;
+ ps_intra32_analyse->au1_best_modes_16x16_tu[2] = 255;
+
+ /* 16x16 cost & 8x8 cost are stored in Raster scan order */
+ /* stride of 16x16 buffer is MAX_CU_IN_CTB_ROW >> 1 */
+ /* stride of 8x8 buffer is MAX_CU_IN_CTB_ROW */
+ {
+ WORD32 pos_x_8x8, pos_y_8x8;
+
+ pos_x_8x8 = gau1_cu_pos_x[blk_cnt];
+ pos_y_8x8 = gau1_cu_pos_y[blk_cnt];
+
+ pi4_intra_16_cost = &ps_l0_ipe_out_ctb->ai4_best16x16_intra_cost[0];
+
+ pi4_intra_16_cost +=
+ ((pos_x_8x8 >> 1) + ((pos_y_8x8 >> 1) * (MAX_CU_IN_CTB_ROW >> 1)));
+
+ pi4_intra_8_cost = &ps_l0_ipe_out_ctb->ai4_best8x8_intra_cost[0];
+
+ pi4_intra_8_cost += (pos_x_8x8 + (pos_y_8x8 * MAX_CU_IN_CTB_ROW));
+ }
+
+ merge_32x32_l1 = 0;
+ merge_32x32_l2 = 0;
+ i4_skip_intra_eval_32x32_l1 = 0;
+
+ /* Enable 16x16 merge iff sufficient 8x8 blocks remain in the current CTB */
+ merge_16x16_l1 = 0;
+ if(((num_8x8_blks_x - cu_pos_x) >= 2) && ((num_8x8_blks_y - cu_pos_y) >= 2))
+ {
+#if !ENABLE_UNIFORM_CU_SIZE_8x8
+ merge_16x16_l1 = ps_ed_blk_l1->merge_success;
+#else
+ merge_16x16_l1 = 0;
+#endif
+ }
+
+ /* Enable 32x32 merge iff sufficient 8x8 blocks remain in the current CTB */
+ if(((num_8x8_blks_x - cu_pos_x) >= 4) && ((num_8x8_blks_y - cu_pos_y) >= 4))
+ {
+ /* Check 4 flags of L1(8x8) say merge */
+ for(i = 0; i < 4; i++)
+ {
+ merge_32x32_l1 += (ps_ed_blk_l1 + (i * 4))->merge_success;
+
+ //EIDD: num 16x16 blocks for which inter_intra flag says eval only inter, i.e. skip intra eval
+ i4_skip_intra_eval_32x32_l1 +=
+ ((ps_ed_blk_l1 + (i * 4))->intra_or_inter == 2) ? 1 : 0;
+ }
+
+#if !ENABLE_UNIFORM_CU_SIZE_8x8
+ /* Check 1 flag from L2(16x16) say merge */
+ merge_32x32_l2 = ps_ed_blk_l2->merge_success;
+#else
+ merge_32x32_l1 = 0;
+ merge_32x32_l2 = 0;
+#endif
+ }
+
+#if DISABLE_L2_IPE_IN_PB_L1_IN_B
+ if((i4_quality_preset == IHEVCE_QUALITY_P6) && (ps_ctxt->i4_slice_type != ISLICE))
+ {
+ merge_32x32_l2 = 0;
+ ps_ed_blk_l2->merge_success = 0;
+ }
+#endif
+
+ ps_intra32_analyse->b1_valid_cu = 1;
+
+ /* If Merge success from all 4 L1 and L2, max CU size 32x32 is chosen */
+ /* EIID: if all blocks to be skipped then skip entire 32x32 for intra eval,
+ if no blocks to be skipped then eval entire 32x32,
+ else break the merge and go to 16x16 level eval */
+ if((merge_32x32_l1 == 4) && merge_32x32_l2 &&
+ ((i4_skip_intra_eval_32x32_l1 == 0) ||
+ (i4_skip_intra_eval_32x32_l1 == 4)) //comment this line to disable break-merge
+ )
+ {
+#if IP_DBG_L1_l2
+ /* Populate params for 32x32 block analysis */
+ ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE;
+
+ ps_cu_node->ps_parent->u1_cu_size = 32;
+ ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[blk_cnt]; /* Populate properly */
+ ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[blk_cnt]; /* Populate properly */
+ ps_cu_node->ps_parent->best_mode = ps_ed_blk_l2->best_merge_mode;
+ /* CU size 32x32 and fill the final cu params */
+
+ ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
+
+ /* Increment pointers */
+ ps_ed_blk_l1 += 16;
+ blk_cnt += 16;
+ ps_row_cu++;
+ merge_64x64 &= 1;
+#else
+
+ /* EIID: dont evaluate if all 4 blocks at L1 said inter is winning*/
+ if(4 == i4_skip_intra_eval_32x32_l1 && (ps_ctxt->i4_slice_type != ISLICE))
+ {
+ WORD32 i4_local_ctr1, i4_local_ctr2;
+
+ ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE;
+
+ ps_cu_node->ps_parent->u1_cu_size = 32;
+ ps_cu_node->ps_parent->u2_x0 =
+ gau1_cu_pos_x[blk_cnt]; /* Populate properly */
+ ps_cu_node->ps_parent->u2_y0 =
+ gau1_cu_pos_y[blk_cnt]; /* Populate properly */
+ ps_cu_node->ps_parent->best_mode =
+ INTRA_DC; //ps_ed_blk_l2->best_merge_mode;
+ /* CU size 32x32 and fill the final cu params */
+
+ /* fill in the first modes as invalid */
+ ps_cu_node->ps_parent->au1_best_mode_1tu[0] = INTRA_DC;
+ ps_cu_node->ps_parent->au1_best_mode_1tu[1] =
+ INTRA_DC; //for safery. Since update_cand_list will set num_modes as 3
+ ps_cu_node->ps_parent->au1_best_mode_1tu[2] = INTRA_DC;
+
+ ps_cu_node->ps_parent->au1_best_mode_4tu[0] = INTRA_DC;
+ ps_cu_node->ps_parent->au1_best_mode_4tu[1] = INTRA_DC;
+ ps_cu_node->ps_parent->au1_best_mode_4tu[2] = INTRA_DC;
+
+ ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
+
+ //ps_row_cu->s_cu_intra_cand.b6_num_intra_cands = 0;
+ //ps_row_cu->u1_num_intra_rdopt_cands = 0;
+
+ ps_intra32_analyse->b1_valid_cu = 0;
+ ps_intra32_analyse->b1_split_flag = 0;
+ ps_intra32_analyse->b1_merge_flag = 0;
+ /*memset (&ps_intra32_analyse->au1_best_modes_32x32_tu,
+ 255,
+ NUM_BEST_MODES);
+ memset (&ps_intra32_analyse->au1_best_modes_16x16_tu,
+ 255,
+ NUM_BEST_MODES);*/
+ //set only first mode since if it's 255. it wont go ahead
+ ps_intra32_analyse->au1_best_modes_32x32_tu[0] = 255;
+ ps_intra32_analyse->au1_best_modes_16x16_tu[0] = 255;
+ ps_intra32_analyse->i4_best_intra_cost = MAX_INTRA_COST_IPE;
+
+ *pi4_intra_32_cost = MAX_INTRA_COST_IPE;
+
+ /*since ME will start evaluating from bottom up, set the lower
+ cu size data invalid */
+ for(i4_local_ctr1 = 0; i4_local_ctr1 < 4; i4_local_ctr1++)
+ {
+ WORD32 *pi4_intra_8_cost_curr16;
+
+ ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
+ .au1_best_modes_16x16_tu[0] = 255;
+ ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
+ .au1_best_modes_8x8_tu[0] = 255;
+ ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
+ .i4_best_intra_cost = MAX_INTRA_COST_IPE;
+ ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1].b1_merge_flag = 0;
+ ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1].b1_valid_cu = 0;
+ ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1].b1_split_flag = 0;
+
+ pi4_intra_16_cost
+ [(i4_local_ctr1 & 1) + ((MAX_CU_IN_CTB_ROW >> 1) *
+ (i4_local_ctr1 >> 1))] = MAX_INTRA_COST_IPE;
+
+ pi4_intra_8_cost_curr16 = pi4_intra_8_cost + ((i4_local_ctr1 & 1) << 1);
+ pi4_intra_8_cost_curr16 +=
+ ((i4_local_ctr1 >> 1) << 1) * MAX_CU_IN_CTB_ROW;
+
+ for(i4_local_ctr2 = 0; i4_local_ctr2 < 4; i4_local_ctr2++)
+ {
+ ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
+ .as_intra8_analyse[i4_local_ctr2]
+ .au1_4x4_best_modes[0][0] = 255;
+ ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
+ .as_intra8_analyse[i4_local_ctr2]
+ .au1_4x4_best_modes[1][0] = 255;
+ ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
+ .as_intra8_analyse[i4_local_ctr2]
+ .au1_4x4_best_modes[2][0] = 255;
+ ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
+ .as_intra8_analyse[i4_local_ctr2]
+ .au1_4x4_best_modes[3][0] = 255;
+ ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
+ .as_intra8_analyse[i4_local_ctr2]
+ .au1_best_modes_8x8_tu[0] = 255;
+ ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
+ .as_intra8_analyse[i4_local_ctr2]
+ .au1_best_modes_4x4_tu[0] = 255;
+ ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
+ .as_intra8_analyse[i4_local_ctr2]
+ .i4_best_intra_cost = MAX_INTRA_COST_IPE;
+ ps_intra32_analyse->as_intra16_analyse[i4_local_ctr1]
+ .as_intra8_analyse[i4_local_ctr2]
+ .b1_valid_cu = 0;
+
+ pi4_intra_8_cost_curr16
+ [(i4_local_ctr2 & 1) +
+ (MAX_CU_IN_CTB_ROW * (i4_local_ctr2 >> 1))] =
+ MAX_INTRA_COST_IPE;
+ }
+ }
+
+ /* set neighbours even if intra is not evaluated, since source is always available. */
+ ihevce_set_nbr_map(
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ ps_cu_node->ps_parent->u2_x0 << 1,
+ ps_cu_node->ps_parent->u2_y0 << 1,
+ (ps_cu_node->ps_parent->u1_cu_size >> 2),
+ 1);
+
+ /* cost accumalation of best cu size candiate */
+ /*i8_frame_acc_satd_cost += parent_cost;*/
+
+ /* Mode bits cost accumalation for best cu size and cu mode */
+ /*i8_frame_acc_mode_bits_cost += ps_cu_node->ps_parent->u2_mode_bits_cost;*/
+
+ /*satd/mod_qp accumulation of best cu */
+ /*i8_frame_acc_satd_by_modqp_q10 += ((LWORD64)ps_cu_node->ps_parent->best_satd << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3))/i4_q_scale_q3_mod;*/
+
+ /* Increment pointers */
+ ps_ed_blk_l1 += 16;
+ blk_cnt += 16;
+ //ps_row_cu++;
+ merge_64x64 = 0;
+
+ /* increment for stat purpose only. Increment is valid only on single thread */
+ ps_ctxt->u4_num_16x16_skips_at_L0_IPE += 4;
+ }
+ else
+ {
+ /* Revaluation of 4 16x16 blocks at 8x8 prediction level */
+ //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map));
+
+ if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6) &&
+ (ps_ctxt->i4_slice_type == PSLICE))
+ {
+ ps_ctxt->u1_disable_child_cu_decide = 1;
+ step2_bypass = 0;
+ }
+
+ /* Based on the flag, Child modes decision can be disabled*/
+ if(0 == ps_ctxt->u1_disable_child_cu_decide)
+ {
+ for(j = 0; j < 4; j++)
+ {
+ ps_cu_node->ps_sub_cu[j]->u2_x0 =
+ gau1_cu_pos_x[blk_cnt + (j * 4)]; /* Populate properly */
+ ps_cu_node->ps_sub_cu[j]->u2_y0 =
+ gau1_cu_pos_y[blk_cnt + (j * 4)]; /* Populate properly */
+ ps_cu_node->ps_sub_cu[j]->u1_cu_size = 16;
+
+ {
+ WORD32 best_ang_mode =
+ (ps_ed_blk_l1 + (j * 4))->best_merge_mode;
+
+ if(best_ang_mode < 2)
+ best_ang_mode = 26;
+
+ ihevce_mode_eval_filtering(
+ ps_cu_node->ps_sub_cu[j],
+ ps_cu_node,
+ ps_ctxt,
+ ps_curr_src,
+ best_ang_mode,
+ &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0],
+ &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0],
+ !step2_bypass,
+ 1);
+
+ if(i4_enable_4cu_16tu)
+ {
+ ihevce_mode_eval_filtering(
+ ps_cu_node->ps_sub_cu[j],
+ ps_cu_node,
+ ps_ctxt,
+ ps_curr_src,
+ best_ang_mode,
+ &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0],
+ &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0],
+ !step2_bypass,
+ 0);
+ }
+ else
+ {
+ /* 4TU not evaluated : 4tu modes set same as 1tu modes */
+ memcpy(
+ &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0],
+ &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0],
+ NUM_BEST_MODES);
+
+ /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */
+ memcpy(
+ &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0],
+ &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0],
+ NUM_BEST_MODES * sizeof(WORD32));
+ }
+
+ child_cost[j] =
+ MIN(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0],
+ ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0]);
+
+ /* Child cost is sum of costs at 16x16 level */
+ child_cost_least += child_cost[j];
+
+ /* Select the best mode to be populated as top and left nbr depending on the
+ 4tu and 1tu cost */
+ if(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0] >
+ ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0])
+ {
+ ps_cu_node->ps_sub_cu[j]->best_mode =
+ ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0];
+ }
+ else
+ {
+ ps_cu_node->ps_sub_cu[j]->best_mode =
+ ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0];
+ }
+
+ { /* Update the CTB nodes only for MAX - 1 CU nodes */
+ WORD32 xA, yA, row, col;
+ xA = ((ps_cu_node->ps_sub_cu[j]->u2_x0 << 3) >> 2) + 1;
+ yA = ((ps_cu_node->ps_sub_cu[j]->u2_y0 << 3) >> 2) + 1;
+ size = ps_cu_node->ps_sub_cu[j]->u1_cu_size >> 2;
+ for(row = yA; row < (yA + size); row++)
+ {
+ for(col = xA; col < (xA + size); col++)
+ {
+ ps_ctxt->au1_ctb_mode_map[row][col] =
+ ps_cu_node->ps_sub_cu[j]->best_mode;
+ }
+ }
+ }
+ }
+
+ /*Child SATD cost*/
+ child_satd[j] = ps_cu_node->ps_sub_cu[j]->best_satd;
+
+ /* store the child 16x16 costs */
+ pi4_intra_16_cost[(j & 1) + ((MAX_CU_IN_CTB_ROW >> 1) * (j >> 1))] =
+ child_cost[j];
+
+ /* set the CU valid flag */
+ ps_intra16_analyse[j].b1_valid_cu = 1;
+
+ /* All 16x16 merge is valid, if Cu 32x32 is chosen */
+ /* To be reset, if CU 64x64 is chosen */
+ ps_intra16_analyse[j].b1_merge_flag = 1;
+
+ /* storing the modes to intra 16 analyse */
+ /* store the best 16x16 modes 8x8 tu */
+ memcpy(
+ &ps_intra16_analyse[j].au1_best_modes_8x8_tu[0],
+ &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0],
+ sizeof(UWORD8) * (NUM_BEST_MODES));
+ ps_intra16_analyse[j].au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255;
+
+ /* store the best 16x16 modes 16x16 tu */
+ memcpy(
+ &ps_intra16_analyse[j].au1_best_modes_16x16_tu[0],
+ &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0],
+ sizeof(UWORD8) * (NUM_BEST_MODES));
+ ps_intra16_analyse[j].au1_best_modes_16x16_tu[NUM_BEST_MODES] = 255;
+
+ /* divide the 16x16 costs (pro rating) to 4 8x8 costs */
+ /* store the same 16x16 modes as 4 8x8 child modes */
+ {
+ WORD32 idx_8x8;
+ WORD32 *pi4_intra_8_cost_curr16;
+ intra8_analyse_t *ps_intra8_analyse;
+
+ pi4_intra_8_cost_curr16 = pi4_intra_8_cost + ((j & 1) << 1);
+ pi4_intra_8_cost_curr16 += ((j >> 1) << 1) * MAX_CU_IN_CTB_ROW;
+
+ for(idx_8x8 = 0; idx_8x8 < 4; idx_8x8++)
+ {
+ pi4_intra_8_cost_curr16
+ [(idx_8x8 & 1) + (MAX_CU_IN_CTB_ROW * (idx_8x8 >> 1))] =
+ (child_cost[j] + 3) >> 2;
+
+ ps_intra8_analyse =
+ &ps_intra16_analyse[j].as_intra8_analyse[idx_8x8];
+
+ ps_intra8_analyse->b1_enable_nxn = 0;
+ ps_intra8_analyse->b1_valid_cu = 1;
+
+ /* store the best 8x8 modes 8x8 tu */
+ memcpy(
+ &ps_intra8_analyse->au1_best_modes_8x8_tu[0],
+ &ps_intra16_analyse[j].au1_best_modes_8x8_tu[0],
+ sizeof(UWORD8) * (NUM_BEST_MODES + 1));
+
+ /* store the best 8x8 modes 4x4 tu */
+ memcpy(
+ &ps_intra8_analyse->au1_best_modes_4x4_tu[0],
+ &ps_intra16_analyse[j].au1_best_modes_8x8_tu[0],
+ sizeof(UWORD8) * (NUM_BEST_MODES + 1));
+
+ /* NXN modes not evaluated hence set to 0 */
+ memset(
+ &ps_intra8_analyse->au1_4x4_best_modes[0][0],
+ 255,
+ sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1));
+ }
+ }
+ }
+
+ ihevce_set_nbr_map(
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ ps_cu_node->ps_sub_cu[0]->u2_x0 << 1,
+ ps_cu_node->ps_sub_cu[0]->u2_y0 << 1,
+ (ps_cu_node->ps_sub_cu[0]->u1_cu_size >> 1),
+ 0);
+ }
+#if 1 //DISBLE_CHILD_CU_EVAL_L0_IPE //1
+ else
+ {
+ for(j = 0; j < 4; j++)
+ {
+ WORD32 idx_8x8;
+ intra8_analyse_t *ps_intra8_analyse;
+ ps_intra16_analyse[j].au1_best_modes_8x8_tu[0] = 255;
+ ps_intra16_analyse[j].au1_best_modes_16x16_tu[0] = 255;
+
+ ps_intra16_analyse[j].b1_valid_cu = 0;
+
+ for(idx_8x8 = 0; idx_8x8 < 4; idx_8x8++)
+ {
+ ps_intra8_analyse =
+ &ps_intra16_analyse[j].as_intra8_analyse[idx_8x8];
+
+ ps_intra8_analyse->au1_best_modes_8x8_tu[0] = 255;
+ ps_intra8_analyse->au1_best_modes_4x4_tu[0] = 255;
+
+ ps_intra8_analyse->b1_enable_nxn = 0;
+ ps_intra8_analyse->b1_valid_cu = 0;
+
+ /* NXN modes not evaluated hence set to 0 */
+ memset(
+ &ps_intra8_analyse->au1_4x4_best_modes[0][0],
+ 255,
+ sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1));
+ }
+ }
+
+ child_cost_least = MAX_INTRA_COST_IPE;
+ }
+#endif
+
+ /* Populate params for 32x32 block analysis */
+
+ ps_cu_node->ps_parent->u1_cu_size = 32;
+ ps_cu_node->ps_parent->u2_x0 =
+ gau1_cu_pos_x[blk_cnt]; /* Populate properly */
+ ps_cu_node->ps_parent->u2_y0 =
+ gau1_cu_pos_y[blk_cnt]; /* Populate properly */
+
+ /* Revaluation for 32x32 parent block at 16x16 prediction level */
+ //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map));
+
+ {
+ /* Eval for TUSize = CuSize */
+ ihevce_mode_eval_filtering(
+ ps_cu_node->ps_parent,
+ ps_cu_node,
+ ps_ctxt,
+ ps_curr_src,
+ 26,
+ &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
+ &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
+ step2_bypass,
+ 1);
+
+ if(i4_enable_1cu_4tu)
+ {
+ /* Eval for TUSize = CuSize/2 */
+ ihevce_mode_eval_filtering(
+ ps_cu_node->ps_parent,
+ ps_cu_node,
+ ps_ctxt,
+ ps_curr_src,
+ 26,
+ &ps_cu_node->ps_parent->au4_best_cost_4tu[0],
+ &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
+ step2_bypass,
+ 0);
+ }
+ else
+ {
+ /* 4TU not evaluated : 4tu modes set same as 1tu modes */
+ memcpy(
+ &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
+ &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
+ NUM_BEST_MODES);
+
+ /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */
+ memcpy(
+ &ps_cu_node->ps_parent->au4_best_cost_4tu[0],
+ &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
+ NUM_BEST_MODES * sizeof(WORD32));
+ }
+ }
+
+ ps_ctxt->u1_disable_child_cu_decide = 0;
+ step2_bypass = 1;
+
+ /* Update parent cost */
+ parent_cost =
+ MIN(ps_cu_node->ps_parent->au4_best_cost_4tu[0],
+ ps_cu_node->ps_parent->au4_best_cost_1tu[0]);
+
+ /* Select the best mode to be populated as top and left nbr depending on the
+ 4tu and 1tu cost */
+ if(ps_cu_node->ps_parent->au4_best_cost_4tu[0] >
+ ps_cu_node->ps_parent->au4_best_cost_1tu[0])
+ {
+ ps_cu_node->ps_parent->best_mode =
+ ps_cu_node->ps_parent->au1_best_mode_1tu[0];
+ }
+ else
+ {
+ ps_cu_node->ps_parent->best_mode =
+ ps_cu_node->ps_parent->au1_best_mode_4tu[0];
+ }
+
+ /* store the 32x32 cost */
+ *pi4_intra_32_cost = parent_cost;
+
+ /* set the CU valid flag */
+ ps_intra32_analyse->b1_valid_cu = 1;
+
+ ps_intra32_analyse->b1_merge_flag = 1;
+
+ /* storing the modes to intra 32 analyse */
+ {
+ /* store the best 32x32 modes 16x16 tu */
+ memcpy(
+ &ps_intra32_analyse->au1_best_modes_16x16_tu[0],
+ &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
+ sizeof(UWORD8) * (NUM_BEST_MODES));
+ ps_intra32_analyse->au1_best_modes_16x16_tu[NUM_BEST_MODES] = 255;
+
+ /* store the best 32x32 modes 32x32 tu */
+ memcpy(
+ &ps_intra32_analyse->au1_best_modes_32x32_tu[0],
+ &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
+ sizeof(UWORD8) * (NUM_BEST_MODES));
+ ps_intra32_analyse->au1_best_modes_32x32_tu[NUM_BEST_MODES] = 255;
+ }
+ parent_best_mode = ps_cu_node->ps_parent->best_mode;
+ if((parent_cost <=
+ child_cost_least + (ps_ctxt->i4_ol_satd_lambda * CHILD_BIAS >>
+ LAMBDA_Q_SHIFT))) //|| identical_modes)
+ {
+ WORD32 i4_q_scale_q3_mod;
+ UWORD8 u1_cu_possible_qp;
+ WORD32 i4_act_factor;
+
+ /* CU size 32x32 and fill the final cu params */
+
+ ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
+
+ if((IHEVCE_QUALITY_P3 > i4_quality_preset))
+ {
+ for(i = 0; i < 4; i++)
+ {
+ intra8_analyse_t *ps_intra8_analyse;
+ ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[i];
+ for(j = 0; j < 4; j++)
+ {
+ /* Populate best 3 nxn modes */
+ ps_intra8_analyse->au1_4x4_best_modes[j][0] =
+ ps_cu_node->ps_sub_cu[i]->au1_best_mode_4tu[0];
+ ps_intra8_analyse->au1_4x4_best_modes[j][1] =
+ ps_cu_node->ps_sub_cu[i]
+ ->au1_best_mode_4tu[1]; //(ps_ed + 1)->best_mode;
+ ps_intra8_analyse->au1_4x4_best_modes[j][2] =
+ ps_cu_node->ps_sub_cu[i]
+ ->au1_best_mode_4tu[2]; //(ps_ed + 2)->best_mode;
+ ps_intra8_analyse->au1_4x4_best_modes[j][3] = 255;
+ }
+ }
+ }
+ /* store the 32x32 non split flag */
+ ps_intra32_analyse->b1_split_flag = 0;
+ ps_intra32_analyse->as_intra16_analyse[0].b1_split_flag = 0;
+ ps_intra32_analyse->as_intra16_analyse[1].b1_split_flag = 0;
+ ps_intra32_analyse->as_intra16_analyse[2].b1_split_flag = 0;
+ ps_intra32_analyse->as_intra16_analyse[3].b1_split_flag = 0;
+
+ au1_best_32x32_modes[blk_cnt >> 4] =
+ ps_cu_node->ps_parent->au1_best_mode_1tu[0];
+
+ au4_best_32x32_cost[blk_cnt >> 4] =
+ ps_cu_node->ps_parent->au4_best_cost_1tu[0];
+ /*As 32*32 has won, pick L2 8x8 qp which maps
+ to L0 32x32 Qp*/
+ ASSERT(((blk_cnt >> 4) & 3) == (blk_cnt >> 4));
+ ASSERT(ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0] != -2);
+ u1_cu_possible_qp = ihevce_cu_level_qp_mod(
+ ps_ctxt->i4_qscale,
+ ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0],
+ ps_ctxt->ld_curr_frame_16x16_log_avg[0],
+ f_strength,
+ &i4_act_factor,
+ &i4_q_scale_q3_mod,
+ ps_ctxt->ps_rc_quant_ctxt);
+ /* cost accumalation of best cu size candiate */
+ i8_frame_acc_satd_cost += parent_cost;
+
+ /* satd and mpm bits accumalation of best cu size candiate */
+ i4_ctb_acc_satd += ps_cu_node->ps_parent->best_satd;
+
+ /* Mode bits cost accumalation for best cu size and cu mode */
+ i8_frame_acc_mode_bits_cost += ps_cu_node->ps_parent->u2_mode_bits_cost;
+
+ /*satd/mod_qp accumulation of best cu */
+ i8_frame_acc_satd_by_modqp_q10 +=
+ ((LWORD64)ps_cu_node->ps_parent->best_satd
+ << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
+ i4_q_scale_q3_mod;
+
+ /* Increment pointers */
+ ps_ed_blk_l1 += 16;
+ blk_cnt += 16;
+ //ps_row_cu++;
+ merge_64x64 &= 1;
+ }
+ else
+ {
+ /* store the 32x32 split flag */
+ ps_intra32_analyse->b1_split_flag = 1;
+
+ /* CU size 16x16 and fill the final cu params for all 4 blocks */
+ for(j = 0; j < 4; j++)
+ {
+ WORD32 i4_q_scale_q3_mod;
+ UWORD8 u1_cu_possible_qp;
+ WORD32 i4_act_factor;
+
+ /* Set CU split flag */
+ ASSERT(blk_cnt % 4 == 0);
+
+ ihevce_update_cand_list(
+ ps_cu_node->ps_sub_cu[j], ps_ed_blk_l1, ps_ctxt);
+
+ /* store the 16x16 non split flag */
+ ps_intra16_analyse[j].b1_split_flag = 0;
+
+ ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2));
+ ASSERT(ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0] != -2);
+ /*As 16*16 has won, pick L1 8x8 qp which maps
+ to L0 16x16 Qp*/
+ u1_cu_possible_qp = ihevce_cu_level_qp_mod(
+ ps_ctxt->i4_qscale,
+ ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0],
+ ps_ctxt->ld_curr_frame_8x8_log_avg[0],
+ f_strength,
+ &i4_act_factor,
+ &i4_q_scale_q3_mod,
+ ps_ctxt->ps_rc_quant_ctxt);
+
+ /*accum satd/qp for all child block*/
+ i8_frame_acc_satd_by_modqp_q10 +=
+ ((LWORD64)child_satd[j]
+ << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
+ i4_q_scale_q3_mod;
+
+ /* Accumalate mode bits for all child blocks */
+ i8_frame_acc_mode_bits_cost +=
+ ps_cu_node->ps_sub_cu[j]->u2_mode_bits_cost;
+
+ /* satd and mpm bits accumalation of best cu size candiate */
+ i4_ctb_acc_satd += child_satd[j];
+
+ /* Increment pointers */
+ //ps_row_cu++;
+ ps_ed_blk_l1 += 4;
+ blk_cnt += 4;
+ }
+
+ /* cost accumalation of best cu size candiate */
+ i8_frame_acc_satd_cost += child_cost_least;
+
+ /* 64x64 merge is not possible */
+ merge_64x64 = 0;
+ }
+
+ //ps_ed_blk_l2 += 4;
+
+ } //end of EIID's else
+#endif
+ }
+ /* If Merge success for L1 max CU size 16x16 is chosen */
+ else if(merge_16x16_l1)
+ {
+#if IP_DBG_L1_l2
+ ps_cu_node->ps_parent->u1_cu_size = 16;
+ ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[blk_cnt]; /* Populate properly */
+ ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[blk_cnt]; /* Populate properly */
+ ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_merge_mode;
+ ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
+
+ blk_cnt += 4;
+ ps_ed_blk_l1 += 4;
+ ps_row_cu++;
+ merge_64x64 = 0;
+#else
+
+ /*EIID: evaluate only if L1 early-inter-intra decision is not favouring inter*/
+ /* enable this only in B pictures */
+ if(ps_ed_blk_l1->intra_or_inter == 2 && (ps_ctxt->i4_slice_type != ISLICE))
+ {
+ WORD32 i4_q_scale_q3_mod, i4_local_ctr;
+ WORD8 i1_cu_possible_qp;
+ WORD32 i4_act_factor;
+ /* make cost infinity. */
+ /* make modes invalid */
+ /* update loop variables */
+ /* set other output variales */
+ /* dont set neighbour flag so that next blocks wont access this cu */
+ /* what happens to ctb_mode_map?? */
+
+ ps_cu_node->ps_parent->u1_cu_size = 16;
+ ps_cu_node->ps_parent->u2_x0 =
+ gau1_cu_pos_x[blk_cnt]; /* Populate properly */
+ ps_cu_node->ps_parent->u2_y0 =
+ gau1_cu_pos_y[blk_cnt]; /* Populate properly */
+ ps_cu_node->ps_parent->best_mode =
+ INTRA_DC; //ps_ed_blk_l1->best_merge_mode;
+
+ /* fill in the first modes as invalid */
+
+ ps_cu_node->ps_parent->au1_best_mode_1tu[0] = INTRA_DC;
+ ps_cu_node->ps_parent->au1_best_mode_1tu[1] =
+ INTRA_DC; //for safery. Since update_cand_list will set num_modes as 3
+ ps_cu_node->ps_parent->au1_best_mode_1tu[2] = INTRA_DC;
+
+ ps_cu_node->ps_parent->au1_best_mode_4tu[0] = INTRA_DC;
+ ps_cu_node->ps_parent->au1_best_mode_4tu[1] = INTRA_DC;
+ ps_cu_node->ps_parent->au1_best_mode_4tu[2] = INTRA_DC;
+
+ ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
+
+ //ps_row_cu->s_cu_intra_cand.b6_num_intra_cands = 0;
+ //ps_row_cu->u1_num_intra_rdopt_cands = 0;
+
+ ps_intra32_analyse->b1_split_flag = 1;
+ ps_intra32_analyse->b1_merge_flag = 0;
+
+ ps_intra16_analyse->b1_valid_cu = 0;
+ ps_intra16_analyse->b1_split_flag = 0;
+ ps_intra16_analyse->b1_merge_flag = 1;
+ //memset (&ps_intra16_analyse->au1_best_modes_16x16_tu,
+ // 255,
+ // NUM_BEST_MODES);
+ //memset (&ps_intra16_analyse->au1_best_modes_8x8_tu,
+ // 255,
+ // NUM_BEST_MODES);
+ //set only first mode since if it's 255. it wont go ahead
+ ps_intra16_analyse->au1_best_modes_16x16_tu[0] = 255;
+ ps_intra16_analyse->au1_best_modes_8x8_tu[0] = 255;
+ ps_intra16_analyse->i4_best_intra_cost = MAX_INTRA_COST_IPE;
+ *pi4_intra_16_cost = MAX_INTRA_COST_IPE;
+
+ /*since ME will start evaluating from bottom up, set the lower
+ cu size data invalid */
+ for(i4_local_ctr = 0; i4_local_ctr < 4; i4_local_ctr++)
+ {
+ ps_intra16_analyse->as_intra8_analyse[i4_local_ctr]
+ .au1_4x4_best_modes[0][0] = 255;
+ ps_intra16_analyse->as_intra8_analyse[i4_local_ctr]
+ .au1_4x4_best_modes[1][0] = 255;
+ ps_intra16_analyse->as_intra8_analyse[i4_local_ctr]
+ .au1_4x4_best_modes[2][0] = 255;
+ ps_intra16_analyse->as_intra8_analyse[i4_local_ctr]
+ .au1_4x4_best_modes[3][0] = 255;
+ ps_intra16_analyse->as_intra8_analyse[i4_local_ctr]
+ .au1_best_modes_8x8_tu[0] = 255;
+ ps_intra16_analyse->as_intra8_analyse[i4_local_ctr]
+ .au1_best_modes_4x4_tu[0] = 255;
+ ps_intra16_analyse->as_intra8_analyse[i4_local_ctr].i4_best_intra_cost =
+ MAX_INTRA_COST_IPE;
+
+ pi4_intra_8_cost
+ [(i4_local_ctr & 1) + (MAX_CU_IN_CTB_ROW * (i4_local_ctr >> 1))] =
+ MAX_INTRA_COST_IPE;
+ }
+
+ /* set neighbours even if intra is not evaluated, since source is always available. */
+ ihevce_set_nbr_map(
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ ps_cu_node->ps_parent->u2_x0 << 1,
+ ps_cu_node->ps_parent->u2_y0 << 1,
+ (ps_cu_node->ps_parent->u1_cu_size >> 2),
+ 1);
+
+ //what happends to RC variables??
+ /* run only constant Qp */
+ ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2));
+ ASSERT(ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0] != -2);
+ i1_cu_possible_qp = ihevce_cu_level_qp_mod(
+ ps_ctxt->i4_qscale,
+ ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][0],
+ ps_ctxt->ld_curr_frame_8x8_log_avg[0],
+ f_strength,
+ &i4_act_factor,
+ &i4_q_scale_q3_mod,
+ ps_ctxt->ps_rc_quant_ctxt);
+
+ /* cost accumalation of best cu size candiate */
+ i8_frame_acc_satd_cost += 0; //parent_cost; //incorrect accumulation
+
+ /*satd/mod_qp accumulation of best cu */
+ i8_frame_acc_satd_by_modqp_q10 += 0; //incorrect accumulation
+ //((LWORD64)ps_cu_node->ps_parent->best_satd << SATD_BY_ACT_Q_FAC)/i4_q_scale_q3_mod;
+
+ /* Accumalate mode bits for all child blocks */
+ i8_frame_acc_mode_bits_cost +=
+ 0; //ps_cu_node->ps_parent->u2_mode_bits_cost;
+ //incoorect accumulation
+
+ blk_cnt += 4;
+ ps_ed_blk_l1 += 4;
+ //ps_row_cu++;
+ merge_64x64 = 0;
+
+ /* increment for stat purpose only. Increment is valid only on single thread */
+ ps_ctxt->u4_num_16x16_skips_at_L0_IPE += 1;
+ }
+ else
+ {
+ /* 64x64 merge is not possible */
+ merge_64x64 = 0;
+
+ /* set the 32x32 split flag to 1 */
+ ps_intra32_analyse->b1_split_flag = 1;
+
+ ps_intra32_analyse->b1_merge_flag = 0;
+
+ ps_intra16_analyse->b1_merge_flag = 1;
+
+ if((ps_ctxt->i4_quality_preset == IHEVCE_QUALITY_P6) &&
+ (ps_ctxt->i4_slice_type == PSLICE))
+ {
+ ps_ctxt->u1_disable_child_cu_decide = 1;
+ step2_bypass = 0;
+ }
+ //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map));
+ /* Based on the flag, Child modes decision can be disabled*/
+ if(0 == ps_ctxt->u1_disable_child_cu_decide)
+ {
+ for(j = 0; j < 4; j++)
+ {
+ intra8_analyse_t *ps_intra8_analyse;
+ WORD32 best_ang_mode = (ps_ed_blk_l1 + j)->best_mode;
+
+ if(best_ang_mode < 2)
+ best_ang_mode = 26;
+
+ //ps_cu_node->ps_sub_cu[j]->best_cost = MAX_INTRA_COST_IPE;
+ //ps_cu_node->ps_sub_cu[j]->best_mode = (ps_ed_blk_l1 + j)->best_mode;
+
+ ps_cu_node->ps_sub_cu[j]->u2_x0 =
+ gau1_cu_pos_x[blk_cnt + j]; /* Populate properly */
+ ps_cu_node->ps_sub_cu[j]->u2_y0 =
+ gau1_cu_pos_y[blk_cnt + j]; /* Populate properly */
+ ps_cu_node->ps_sub_cu[j]->u1_cu_size = 8;
+
+ ihevce_mode_eval_filtering(
+ ps_cu_node->ps_sub_cu[j],
+ ps_cu_node,
+ ps_ctxt,
+ ps_curr_src,
+ best_ang_mode,
+ &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0],
+ &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0],
+ !step2_bypass,
+ 1);
+
+ if(i4_enable_4cu_16tu)
+ {
+ ihevce_mode_eval_filtering(
+ ps_cu_node->ps_sub_cu[j],
+ ps_cu_node,
+ ps_ctxt,
+ ps_curr_src,
+ best_ang_mode,
+ &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0],
+ &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0],
+ !step2_bypass,
+ 0);
+ }
+ else
+ {
+ /* 4TU not evaluated : 4tu modes set same as 1tu modes */
+ memcpy(
+ &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0],
+ &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0],
+ NUM_BEST_MODES);
+
+ /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */
+ memcpy(
+ &ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0],
+ &ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0],
+ NUM_BEST_MODES * sizeof(WORD32));
+ }
+
+ child_cost[j] =
+ MIN(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0],
+ ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0]);
+
+ child_cost_least += child_cost[j];
+
+ /* Select the best mode to be populated as top and left nbr depending on the
+ 4tu and 1tu cost */
+ if(ps_cu_node->ps_sub_cu[j]->au4_best_cost_4tu[0] >
+ ps_cu_node->ps_sub_cu[j]->au4_best_cost_1tu[0])
+ {
+ ps_cu_node->ps_sub_cu[j]->best_mode =
+ ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0];
+ }
+ else
+ {
+ ps_cu_node->ps_sub_cu[j]->best_mode =
+ ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0];
+ }
+ { /* Update the CTB nodes only for MAX - 1 CU nodes */
+ WORD32 xA, yA, row, col;
+ xA = ((ps_cu_node->ps_sub_cu[j]->u2_x0 << 3) >> 2) + 1;
+ yA = ((ps_cu_node->ps_sub_cu[j]->u2_y0 << 3) >> 2) + 1;
+ size = ps_cu_node->ps_sub_cu[j]->u1_cu_size >> 2;
+ for(row = yA; row < (yA + size); row++)
+ {
+ for(col = xA; col < (xA + size); col++)
+ {
+ ps_ctxt->au1_ctb_mode_map[row][col] =
+ ps_cu_node->ps_sub_cu[j]->best_mode;
+ }
+ }
+ }
+
+ /*collect individual child satd for final SATD/qp accum*/
+ child_satd[j] = ps_cu_node->ps_sub_cu[j]->best_satd;
+
+ ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[j];
+
+ /* store the child 8x8 costs */
+ pi4_intra_8_cost[(j & 1) + (MAX_CU_IN_CTB_ROW * (j >> 1))] =
+ child_cost[j];
+
+ /* set the CU valid flag */
+ ps_intra8_analyse->b1_valid_cu = 1;
+ ps_intra8_analyse->b1_enable_nxn = 0;
+
+ /* storing the modes to intra8 analyse */
+
+ /* store the best 8x8 modes 8x8 tu */
+ memcpy(
+ &ps_intra8_analyse->au1_best_modes_8x8_tu[0],
+ &ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0],
+ sizeof(UWORD8) * (NUM_BEST_MODES));
+ ps_intra8_analyse->au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255;
+
+ /* store the best 8x8 modes 4x4 tu */
+ memcpy(
+ &ps_intra8_analyse->au1_best_modes_4x4_tu[0],
+ &ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0],
+ sizeof(UWORD8) * (NUM_BEST_MODES));
+ ps_intra8_analyse->au1_best_modes_4x4_tu[NUM_BEST_MODES] = 255;
+
+ /* NXN modes not evaluated hence set to 255 */
+ memset(
+ &ps_intra8_analyse->au1_4x4_best_modes[0][0],
+ 255,
+ sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1));
+ }
+
+ ihevce_set_nbr_map(
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ ps_cu_node->ps_sub_cu[0]->u2_x0 << 1,
+ ps_cu_node->ps_sub_cu[0]->u2_y0 << 1,
+ (ps_cu_node->ps_sub_cu[0]->u1_cu_size >> 1),
+ 0);
+ }
+#if 1 //DISBLE_CHILD_CU_EVAL_L0_IPE //1
+ else
+ {
+ for(j = 0; j < 4; j++)
+ {
+ intra8_analyse_t *ps_intra8_analyse;
+ ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[j];
+ ps_intra8_analyse->au1_best_modes_8x8_tu[0] = 255;
+ ps_intra8_analyse->au1_best_modes_4x4_tu[0] = 255;
+ /* NXN modes not evaluated hence set to 255 */
+ memset(
+ &ps_intra8_analyse->au1_4x4_best_modes[0][0],
+ 255,
+ sizeof(UWORD8) * 4 * (NUM_BEST_MODES + 1));
+
+ ps_intra8_analyse->b1_valid_cu = 0;
+ ps_intra8_analyse->b1_enable_nxn = 0;
+ }
+ child_cost_least = MAX_INTRA_COST_IPE;
+ }
+#endif
+ //ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_mode;
+ //ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE;
+
+ ps_cu_node->ps_parent->u1_cu_size = 16;
+ ps_cu_node->ps_parent->u2_x0 =
+ gau1_cu_pos_x[blk_cnt]; /* Populate properly */
+ ps_cu_node->ps_parent->u2_y0 =
+ gau1_cu_pos_y[blk_cnt]; /* Populate properly */
+
+ //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map));
+
+ /* Eval for TUSize = CuSize */
+ ihevce_mode_eval_filtering(
+ ps_cu_node->ps_parent,
+ ps_cu_node,
+ ps_ctxt,
+ ps_curr_src,
+ 26,
+ &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
+ &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
+ step2_bypass,
+ 1);
+
+ if(i4_enable_1cu_4tu)
+ {
+ /* Eval for TUSize = CuSize/2 */
+ ihevce_mode_eval_filtering(
+ ps_cu_node->ps_parent,
+ ps_cu_node,
+ ps_ctxt,
+ ps_curr_src,
+ 26,
+ &ps_cu_node->ps_parent->au4_best_cost_4tu[0],
+ &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
+ step2_bypass,
+ 0);
+ }
+ else
+ {
+ /* 4TU not evaluated : 4tu modes set same as 1tu modes */
+ memcpy(
+ &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
+ &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
+ NUM_BEST_MODES);
+
+ /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */
+ memcpy(
+ &ps_cu_node->ps_parent->au4_best_cost_4tu[0],
+ &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
+ NUM_BEST_MODES * sizeof(WORD32));
+ }
+
+ ps_ctxt->u1_disable_child_cu_decide = 0;
+ step2_bypass = 1;
+
+ /* Update parent cost */
+ parent_cost =
+ MIN(ps_cu_node->ps_parent->au4_best_cost_4tu[0],
+ ps_cu_node->ps_parent->au4_best_cost_1tu[0]);
+
+ /* Select the best mode to be populated as top and left nbr depending on the
+ 4tu and 1tu cost */
+ if(ps_cu_node->ps_parent->au4_best_cost_4tu[0] >
+ ps_cu_node->ps_parent->au4_best_cost_1tu[0])
+ {
+ ps_cu_node->ps_parent->best_mode =
+ ps_cu_node->ps_parent->au1_best_mode_1tu[0];
+ }
+ else
+ {
+ ps_cu_node->ps_parent->best_mode =
+ ps_cu_node->ps_parent->au1_best_mode_4tu[0];
+ }
+
+ /* store the 16x16 cost */
+ *pi4_intra_16_cost = parent_cost;
+
+ /* accumulate the 32x32 cost */
+ if(MAX_INTRA_COST_IPE == *pi4_intra_32_cost)
+ {
+ *pi4_intra_32_cost = parent_cost;
+ }
+ else
+ {
+ *pi4_intra_32_cost += parent_cost;
+ }
+
+ /* set the CU valid flag */
+ ps_intra16_analyse->b1_valid_cu = 1;
+
+ /* storing the modes to intra 16 analyse */
+ {
+ /* store the best 16x16 modes 16x16 tu */
+ memcpy(
+ &ps_intra16_analyse->au1_best_modes_16x16_tu[0],
+ &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
+ sizeof(UWORD8) * NUM_BEST_MODES);
+ ps_intra16_analyse->au1_best_modes_16x16_tu[NUM_BEST_MODES] = 255;
+
+ /* store the best 16x16 modes 8x8 tu */
+ memcpy(
+ &ps_intra16_analyse->au1_best_modes_8x8_tu[0],
+ &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
+ sizeof(UWORD8) * NUM_BEST_MODES);
+ ps_intra16_analyse->au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255;
+ }
+
+ parent_best_mode = ps_cu_node->ps_parent->best_mode;
+ if(parent_cost <=
+ child_cost_least + (ps_ctxt->i4_ol_satd_lambda * CHILD_BIAS >>
+ LAMBDA_Q_SHIFT)) //|| identical_modes)
+ {
+ WORD32 i4_q_scale_q3_mod;
+ WORD8 i1_cu_possible_qp;
+ WORD32 i4_act_factor;
+ //choose parent CU
+
+ ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
+
+ /* set the 16x16 non split flag */
+ ps_intra16_analyse->b1_split_flag = 0;
+
+ /*As 16*16 has won, pick L1 8x8 qp which maps
+ to L0 16x16 Qp*/
+ ASSERT(((blk_cnt >> 4) & 3) == (blk_cnt >> 4));
+ ASSERT(ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0] != -2);
+ i1_cu_possible_qp = ihevce_cu_level_qp_mod(
+ ps_ctxt->i4_qscale,
+ ps_ed_ctb_l1->i4_16x16_satd[blk_cnt >> 4][0],
+ ps_ctxt->ld_curr_frame_8x8_log_avg[0],
+ f_strength,
+ &i4_act_factor,
+ &i4_q_scale_q3_mod,
+ ps_ctxt->ps_rc_quant_ctxt);
+
+ /* cost accumalation of best cu size candiate */
+ i8_frame_acc_satd_cost += parent_cost;
+
+ /* satd and mpm bits accumalation of best cu size candiate */
+ i4_ctb_acc_satd += ps_cu_node->ps_parent->best_satd;
+
+ /*satd/mod_qp accumulation of best cu */
+ i8_frame_acc_satd_by_modqp_q10 +=
+ ((LWORD64)ps_cu_node->ps_parent->best_satd
+ << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
+ i4_q_scale_q3_mod;
+
+ /* Accumalate mode bits for all child blocks */
+ i8_frame_acc_mode_bits_cost += ps_cu_node->ps_parent->u2_mode_bits_cost;
+
+ blk_cnt += 4;
+ ps_ed_blk_l1 += 4;
+ //ps_row_cu++;
+ }
+ else
+ {
+ //choose child CU
+ WORD8 i1_cu_possible_qp;
+ WORD32 i4_act_factor;
+ WORD32 i4_q_scale_q3_mod;
+
+ ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2));
+ ASSERT(ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][1] != -2);
+ i1_cu_possible_qp = ihevce_cu_level_qp_mod(
+ ps_ctxt->i4_qscale,
+ ps_ed_ctb_l1->i4_8x8_satd[blk_cnt >> 2][1],
+ ps_ctxt->ld_curr_frame_8x8_log_avg[1],
+ f_strength,
+ &i4_act_factor,
+ &i4_q_scale_q3_mod,
+ ps_ctxt->ps_rc_quant_ctxt);
+
+ /* set the 16x16 split flag */
+ ps_intra16_analyse->b1_split_flag = 1;
+
+ for(j = 0; j < 4; j++)
+ {
+ ihevce_update_cand_list(
+ ps_cu_node->ps_sub_cu[j], ps_ed_blk_l1, ps_ctxt);
+
+ if((IHEVCE_QUALITY_P3 > i4_quality_preset))
+ {
+ WORD32 k;
+ intra8_analyse_t *ps_intra8_analyse;
+ ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[j];
+
+ for(k = 0; k < 4; k++)
+ {
+ /* Populate best 3 nxn modes */
+ ps_intra8_analyse->au1_4x4_best_modes[k][0] =
+ ps_cu_node->ps_sub_cu[j]->au1_best_mode_4tu[0];
+ ps_intra8_analyse->au1_4x4_best_modes[k][1] =
+ ps_cu_node->ps_sub_cu[j]
+ ->au1_best_mode_4tu[1]; //(ps_ed + 1)->best_mode;
+ ps_intra8_analyse->au1_4x4_best_modes[k][2] =
+ ps_cu_node->ps_sub_cu[j]
+ ->au1_best_mode_4tu[2]; //(ps_ed + 2)->best_mode;
+ ps_intra8_analyse->au1_4x4_best_modes[k][3] = 255;
+ }
+ }
+ /*accum satd/qp for all child block*/
+ i8_frame_acc_satd_by_modqp_q10 +=
+ ((LWORD64)child_satd[j]
+ << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
+ i4_q_scale_q3_mod;
+
+ /* Accumalate mode bits for all child blocks */
+ i8_frame_acc_mode_bits_cost +=
+ ps_cu_node->ps_sub_cu[j]->u2_mode_bits_cost;
+
+ /* satd and mpm bits accumalation of best cu size candiate */
+ i4_ctb_acc_satd += child_satd[j];
+
+ blk_cnt += 1;
+ ps_ed_blk_l1 += 1;
+ //ps_row_cu++;
+ }
+
+ /* cost accumalation of best cu size candiate */
+ i8_frame_acc_satd_cost += child_cost_least;
+ }
+
+ } //else of EIID
+#endif
+ } // if(merge_16x16_l1)
+ /* MAX CU SIZE 8x8 */
+ else
+ {
+#if IP_DBG_L1_l2
+ for(i = 0; i < 4; i++)
+ {
+ ps_cu_node->ps_parent->u1_cu_size = 8;
+ ps_cu_node->ps_parent->u2_x0 =
+ gau1_cu_pos_x[blk_cnt]; /* Populate properly */
+ ps_cu_node->ps_parent->u2_y0 =
+ gau1_cu_pos_y[blk_cnt]; /* Populate properly */
+ ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_mode;
+
+ ihevce_update_cand_list(ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
+ blk_cnt++;
+ ps_ed_blk_l1++;
+ ps_row_cu++;
+ merge_64x64 = 0;
+ }
+#else
+
+ /* EIID: Skip all 4 8x8 block if L1 decisions says skip intra */
+ if(ps_ed_blk_l1->intra_or_inter == 2 && (ps_ctxt->i4_slice_type != ISLICE))
+ {
+ WORD32 i4_q_scale_q3_mod;
+ WORD8 i1_cu_possible_qp;
+ WORD32 i4_act_factor;
+
+ merge_64x64 = 0;
+
+ ps_intra32_analyse->b1_merge_flag = 0;
+
+ ps_intra16_analyse->au1_best_modes_8x8_tu[0] = 255;
+ ps_intra16_analyse->au1_best_modes_8x8_tu[1] = 255;
+ ps_intra16_analyse->au1_best_modes_8x8_tu[2] = 255;
+
+ ps_intra16_analyse->au1_best_modes_16x16_tu[0] = 255;
+ ps_intra16_analyse->au1_best_modes_16x16_tu[1] = 255;
+ ps_intra16_analyse->au1_best_modes_16x16_tu[2] = 255;
+ ps_intra16_analyse->b1_split_flag = 1;
+ ps_intra16_analyse->b1_valid_cu = 0;
+ ps_intra16_analyse->b1_merge_flag = 0;
+
+ ps_intra16_analyse->i4_best_intra_cost = MAX_INTRA_COST_IPE;
+
+ for(i = 0; i < 4; i++)
+ {
+ intra8_analyse_t *ps_intra8_analyse;
+ WORD32 ctr_sub_cu;
+
+ cu_pos_x = gau1_cu_pos_x[blk_cnt];
+ cu_pos_y = gau1_cu_pos_y[blk_cnt];
+
+ if((cu_pos_x < num_8x8_blks_x) && (cu_pos_y < num_8x8_blks_y))
+ {
+ ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[i];
+
+ ps_intra8_analyse->b1_valid_cu = 0;
+ ps_intra8_analyse->b1_enable_nxn = 0;
+ ps_intra8_analyse->au1_4x4_best_modes[0][0] = 255;
+ ps_intra8_analyse->au1_4x4_best_modes[1][0] = 255;
+ ps_intra8_analyse->au1_4x4_best_modes[2][0] = 255;
+ ps_intra8_analyse->au1_4x4_best_modes[3][0] = 255;
+ ps_intra8_analyse->au1_best_modes_4x4_tu[0] = 255;
+ ps_intra8_analyse->au1_best_modes_8x8_tu[0] = 255;
+ ps_intra8_analyse->i4_best_intra_cost = MAX_INTRA_COST_IPE;
+
+ ps_cu_node->ps_parent->u1_cu_size = 8;
+ ps_cu_node->ps_parent->u2_x0 =
+ gau1_cu_pos_x[blk_cnt]; /* Populate properly */
+ ps_cu_node->ps_parent->u2_y0 =
+ gau1_cu_pos_y[blk_cnt]; /* Populate properly */
+ ps_cu_node->ps_parent->best_mode =
+ INTRA_DC; //ps_ed_blk_l1->best_mode;
+
+ /* fill in the first modes as invalid */
+
+ ps_cu_node->ps_parent->au1_best_mode_1tu[0] = INTRA_DC;
+ ps_cu_node->ps_parent->au1_best_mode_1tu[1] =
+ INTRA_DC; //for safery. Since update_cand_list will set num_modes as 3
+ ps_cu_node->ps_parent->au1_best_mode_1tu[2] = INTRA_DC;
+
+ ps_cu_node->ps_parent->au1_best_mode_4tu[0] = INTRA_DC;
+ ps_cu_node->ps_parent->au1_best_mode_4tu[1] = INTRA_DC;
+ ps_cu_node->ps_parent->au1_best_mode_4tu[2] = INTRA_DC;
+
+ ihevce_update_cand_list(
+ ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
+
+ //ps_row_cu->s_cu_intra_cand.b6_num_intra_cands = 0;
+ //ps_row_cu->u1_num_intra_rdopt_cands = 0;
+
+ for(ctr_sub_cu = 0; ctr_sub_cu < 4; ctr_sub_cu++)
+ {
+ ps_cu_node->ps_sub_cu[ctr_sub_cu]->au1_best_mode_1tu[0] =
+ INTRA_DC;
+ ps_cu_node->ps_sub_cu[ctr_sub_cu]->au1_best_mode_4tu[0] =
+ INTRA_DC;
+ ps_cu_node->ps_sub_cu[ctr_sub_cu]->au4_best_cost_1tu[0] =
+ MAX_INTRA_COST_IPE;
+
+ ps_cu_node->ps_sub_cu[ctr_sub_cu]->au4_best_cost_4tu[0] =
+ MAX_INTRA_COST_IPE;
+ ps_cu_node->ps_sub_cu[ctr_sub_cu]->best_cost =
+ MAX_INTRA_COST_IPE;
+ }
+
+ pi4_intra_8_cost[(i & 1) + (MAX_CU_IN_CTB_ROW * (i >> 1))] =
+ MAX_INTRA_COST_IPE;
+
+ ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2));
+ ASSERT(ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1] != -2);
+ i1_cu_possible_qp = ihevce_cu_level_qp_mod(
+ ps_ctxt->i4_qscale,
+ ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1],
+ ps_ctxt->ld_curr_frame_8x8_log_avg[1],
+ f_strength,
+ &i4_act_factor,
+ &i4_q_scale_q3_mod,
+ ps_ctxt->ps_rc_quant_ctxt);
+
+ /* set neighbours even if intra is not evaluated, since source is always available. */
+ ihevce_set_nbr_map(
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ ps_cu_node->ps_parent->u2_x0 << 1,
+ ps_cu_node->ps_parent->u2_y0 << 1,
+ (ps_cu_node->ps_parent->u1_cu_size >> 2),
+ 1);
+
+ //ps_row_cu++;
+ }
+ blk_cnt++;
+ ps_ed_blk_l1++;
+ }
+ }
+ else
+ {
+ //cu_intra_cand_t *ps_cu_intra_cand;
+ WORD8 i1_cu_possible_qp;
+ WORD32 i4_act_factor;
+ WORD32 i4_q_scale_q3_mod;
+
+ ASSERT(((blk_cnt >> 2) & 0xF) == (blk_cnt >> 2));
+ ASSERT(ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1] != -2);
+ i1_cu_possible_qp = ihevce_cu_level_qp_mod(
+ ps_ctxt->i4_qscale,
+ ps_ed_ctb_l1->i4_8x8_satd[(blk_cnt >> 2)][1],
+ ps_ctxt->ld_curr_frame_8x8_log_avg[1],
+ f_strength,
+ &i4_act_factor,
+ &i4_q_scale_q3_mod,
+ ps_ctxt->ps_rc_quant_ctxt);
+
+ /* 64x64 merge is not possible */
+ merge_64x64 = 0;
+
+ ps_intra32_analyse->b1_merge_flag = 0;
+
+ ps_intra16_analyse->b1_merge_flag = 0;
+
+ /* by default 16x16 modes are set to default values DC and Planar */
+ ps_intra16_analyse->au1_best_modes_8x8_tu[0] = 0;
+ ps_intra16_analyse->au1_best_modes_8x8_tu[1] = 1;
+ ps_intra16_analyse->au1_best_modes_8x8_tu[2] = 255;
+
+ ps_intra16_analyse->au1_best_modes_16x16_tu[0] = 0;
+ ps_intra16_analyse->au1_best_modes_16x16_tu[1] = 1;
+ ps_intra16_analyse->au1_best_modes_16x16_tu[2] = 255;
+ ps_intra16_analyse->b1_split_flag = 1;
+ ps_intra16_analyse->b1_valid_cu = 1;
+
+ for(i = 0; i < 4; i++)
+ {
+ intra8_analyse_t *ps_intra8_analyse;
+ cu_pos_x = gau1_cu_pos_x[blk_cnt];
+ cu_pos_y = gau1_cu_pos_y[blk_cnt];
+ if((cu_pos_x < num_8x8_blks_x) && (cu_pos_y < num_8x8_blks_y))
+ {
+ //ps_cu_intra_cand = &ps_row_cu->s_cu_intra_cand;
+ //ps_cu_node->ps_parent->best_cost = MAX_INTRA_COST_IPE;
+
+ //ps_cu_node->ps_parent->best_mode = ps_ed_blk_l1->best_mode;
+
+ child_cost_least = 0;
+
+ ps_intra8_analyse = &ps_intra16_analyse->as_intra8_analyse[i];
+ ps_cu_node->ps_parent->u1_cu_size = 8;
+ ps_cu_node->ps_parent->u2_x0 =
+ gau1_cu_pos_x[blk_cnt]; /* Populate properly */
+ ps_cu_node->ps_parent->u2_y0 =
+ gau1_cu_pos_y[blk_cnt]; /* Populate properly */
+
+ //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map));
+
+ /*EARLY DECISION 8x8 block */
+ ihevce_pu_calc_8x8_blk(
+ ps_curr_src, ps_ctxt, ps_cu_node, ps_ctxt->ps_func_selector);
+ for(j = 0; j < 4; j++)
+ {
+ child_cost_least += ps_cu_node->ps_sub_cu[j]->best_cost;
+ child_satd[j] = ps_cu_node->ps_sub_cu[j]->best_satd;
+ }
+
+ /* Based on the flag, CU = 4TU modes decision can be disabled, CU = 4PU is retained */
+ if(0 == ps_ctxt->u1_disable_child_cu_decide)
+ {
+ ihevce_set_nbr_map(
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ ps_cu_node->ps_parent->u2_x0 << 1,
+ ps_cu_node->ps_parent->u2_y0 << 1,
+ (ps_cu_node->ps_parent->u1_cu_size >> 2),
+ 0);
+
+ //memcpy(ps_ctxt->ai1_ctb_mode_map_temp, ps_ctxt->ai1_ctb_mode_map, sizeof(ps_ctxt->ai1_ctb_mode_map));
+
+ /* Eval for TUSize = CuSize */
+ ihevce_mode_eval_filtering(
+ ps_cu_node->ps_parent,
+ ps_cu_node,
+ ps_ctxt,
+ ps_curr_src,
+ 26,
+ &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
+ &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
+ step2_bypass,
+ 1);
+
+ if(i4_enable_1cu_4tu)
+ {
+ /* Eval for TUSize = CuSize/2 */
+ ihevce_mode_eval_filtering(
+ ps_cu_node->ps_parent,
+ ps_cu_node,
+ ps_ctxt,
+ ps_curr_src,
+ 26,
+ &ps_cu_node->ps_parent->au4_best_cost_4tu[0],
+ &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
+ step2_bypass,
+ 0);
+ }
+ else
+ {
+ /* 4TU not evaluated : 4tu modes set same as 1tu modes */
+ memcpy(
+ &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
+ &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
+ NUM_BEST_MODES);
+
+ /* 4TU not evaluated : currently 4tu cost set same as 1tu cost */
+ memcpy(
+ &ps_cu_node->ps_parent->au4_best_cost_4tu[0],
+ &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
+ NUM_BEST_MODES * sizeof(WORD32));
+ }
+
+ /* Update parent cost */
+ parent_cost =
+ MIN(ps_cu_node->ps_parent->au4_best_cost_4tu[0],
+ ps_cu_node->ps_parent->au4_best_cost_1tu[0]);
+
+ /* Select the best mode to be populated as top and left nbr depending on the
+ 4tu and 1tu cost */
+ if(ps_cu_node->ps_parent->au4_best_cost_4tu[0] >
+ ps_cu_node->ps_parent->au4_best_cost_1tu[0])
+ {
+ ps_cu_node->ps_parent->best_mode =
+ ps_cu_node->ps_parent->au1_best_mode_1tu[0];
+ }
+ else
+ {
+ ps_cu_node->ps_parent->best_mode =
+ ps_cu_node->ps_parent->au1_best_mode_4tu[0];
+ }
+ }
+
+ /* set the CU valid flag */
+ ps_intra8_analyse->b1_valid_cu = 1;
+ ps_intra8_analyse->b1_enable_nxn = 0;
+
+ /* storing the modes to intra 8 analyse */
+
+ /* store the best 8x8 modes 8x8 tu */
+ memcpy(
+ &ps_intra8_analyse->au1_best_modes_8x8_tu[0],
+ &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
+ sizeof(UWORD8) * (NUM_BEST_MODES));
+ ps_intra8_analyse->au1_best_modes_8x8_tu[NUM_BEST_MODES] = 255;
+
+ /* store the best 8x8 modes 4x4 tu */
+ memcpy(
+ &ps_intra8_analyse->au1_best_modes_4x4_tu[0],
+ &ps_cu_node->ps_parent->au1_best_mode_4tu[0],
+ sizeof(UWORD8) * (NUM_BEST_MODES));
+ ps_intra8_analyse->au1_best_modes_4x4_tu[NUM_BEST_MODES] = 255;
+
+ /*As 8*8 has won, pick L1 4x4 qp which is equal to
+ L1 8x8 Qp*/
+ //ps_row_cu->u1_cu_possible_qp[0] = u1_cu_possible_qp;
+ //ps_row_cu->i4_act_factor[0][1] = i4_act_factor;
+
+ parent_best_mode = ps_cu_node->ps_parent->best_mode;
+ if(parent_cost <=
+ child_cost_least +
+ (ps_ctxt->i4_ol_satd_lambda * CHILD_BIAS >> LAMBDA_Q_SHIFT))
+ {
+ /*CU = 4TU */
+ ihevce_update_cand_list(
+ ps_cu_node->ps_parent, ps_ed_blk_l1, ps_ctxt);
+
+ /* store the child 8x8 costs */
+ pi4_intra_8_cost[(i & 1) + (MAX_CU_IN_CTB_ROW * (i >> 1))] =
+ parent_cost;
+
+ /* cost accumalation of best cu size candiate */
+ i8_frame_acc_satd_cost += parent_cost;
+
+ /*satd/mod_qp accumulation of best cu */
+ i8_frame_acc_satd_by_modqp_q10 +=
+ ((LWORD64)ps_cu_node->ps_parent->best_satd
+ << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
+ i4_q_scale_q3_mod;
+
+ /* Accumalate mode bits for all child blocks */
+ i8_frame_acc_mode_bits_cost +=
+ ps_cu_node->ps_parent->u2_mode_bits_cost;
+
+ /* satd and mpm bits accumalation of best cu size candiate */
+ i4_ctb_acc_satd += ps_cu_node->ps_parent->best_satd;
+
+ /* accumulate the 16x16 cost*/
+ if(MAX_INTRA_COST_IPE == *pi4_intra_16_cost)
+ {
+ *pi4_intra_16_cost = parent_cost;
+ }
+ else
+ {
+ *pi4_intra_16_cost += parent_cost;
+ }
+
+ /* accumulate the 32x32 cost*/
+ if(MAX_INTRA_COST_IPE == *pi4_intra_32_cost)
+ {
+ *pi4_intra_32_cost = parent_cost;
+ }
+ else
+ {
+ *pi4_intra_32_cost += parent_cost;
+ }
+ }
+ else
+ {
+ /*CU = 4PU*/
+ //ps_row_cu->b3_cu_pos_x = (UWORD8) ps_cu_node->ps_parent->u2_x0;
+ //ps_row_cu->b3_cu_pos_y = (UWORD8) ps_cu_node->ps_parent->u2_y0;
+ //ps_row_cu->u1_cu_size = ps_cu_node->ps_parent->u1_cu_size;
+
+ /* store the child 8x8 costs woth 4x4 pu summed cost */
+ pi4_intra_8_cost[(i & 1) + (MAX_CU_IN_CTB_ROW * (i >> 1))] =
+ (child_cost_least);
+
+ /* accumulate the 16x16 cost*/
+ if(MAX_INTRA_COST_IPE == *pi4_intra_16_cost)
+ {
+ *pi4_intra_16_cost = child_cost_least;
+ }
+ else
+ {
+ *pi4_intra_16_cost += child_cost_least;
+ }
+
+ /* cost accumalation of best cu size candiate */
+ i8_frame_acc_satd_cost += child_cost_least;
+
+ for(j = 0; j < 4; j++)
+ {
+ /*satd/qp accumualtion*/
+ i8_frame_acc_satd_by_modqp_q10 +=
+ ((LWORD64)child_satd[j]
+ << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
+ i4_q_scale_q3_mod;
+
+ /* Accumalate mode bits for all child blocks */
+ i8_frame_acc_mode_bits_cost +=
+ ps_cu_node->ps_sub_cu[j]->u2_mode_bits_cost;
+
+ /* satd and mpm bits accumalation of best cu size candiate */
+ i4_ctb_acc_satd += child_satd[j];
+ }
+
+ /* accumulate the 32x32 cost*/
+ if(MAX_INTRA_COST_IPE == *pi4_intra_32_cost)
+ {
+ *pi4_intra_32_cost = child_cost_least;
+ }
+ else
+ {
+ *pi4_intra_32_cost += child_cost_least;
+ }
+
+ ps_intra8_analyse->b1_enable_nxn = 1;
+
+ /* Insert the best 8x8 modes unconditionally */
+
+ x = ((ps_cu_node->u2_x0 << 3) >> 2) + 1;
+ y = ((ps_cu_node->u2_y0 << 3) >> 2) + 1;
+ size = ps_cu_node->u1_cu_size >> 2;
+
+ ps_ctxt->au1_ctb_mode_map[y][x] =
+ ps_cu_node->ps_sub_cu[0]->best_mode;
+ ps_ctxt->au1_ctb_mode_map[y][x + 1] =
+ ps_cu_node->ps_sub_cu[1]->best_mode;
+ ps_ctxt->au1_ctb_mode_map[y + 1][x] =
+ ps_cu_node->ps_sub_cu[2]->best_mode;
+ ps_ctxt->au1_ctb_mode_map[y + 1][x + 1] =
+ ps_cu_node->ps_sub_cu[3]->best_mode;
+ }
+ /* NXN mode population */
+ for(j = 0; j < 4; j++)
+ {
+ cand_mode_list[0] =
+ ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[0];
+ cand_mode_list[1] =
+ ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[1];
+ cand_mode_list[2] =
+ ps_cu_node->ps_sub_cu[j]->au1_best_mode_1tu[2];
+
+ if(1)
+ {
+ /* Populate best 3 nxn modes */
+ ps_intra8_analyse->au1_4x4_best_modes[j][0] =
+ cand_mode_list[0];
+ ps_intra8_analyse->au1_4x4_best_modes[j][1] =
+ cand_mode_list[1]; //(ps_ed + 1)->best_mode;
+ ps_intra8_analyse->au1_4x4_best_modes[j][2] =
+ cand_mode_list[2]; //(ps_ed + 2)->best_mode;
+ ps_intra8_analyse->au1_4x4_best_modes[j][3] = 255;
+
+ //memcpy(ps_intra8_analyse->au1_4x4_best_modes[j], ps_row_cu->s_cu_intra_cand.au1_intra_luma_modes_nxn[j], 4);
+ }
+ /* For HQ, all 35 modes to be used for RDOPT, removed from here for memory clean-up */
+
+ else /* IHEVCE_QUALITY_P0 == i4_quality_preset */
+ {
+ /* To indicate to enc loop that NXN is enabled in HIGH QUALITY fior CU 8x8*/
+ ps_intra8_analyse->au1_4x4_best_modes[j][0] = 0;
+ }
+
+ ps_intra8_analyse
+ ->au1_4x4_best_modes[j][MAX_INTRA_CU_CANDIDATES] = 255;
+ }
+
+ //ps_row_cu++;
+ }
+ else
+ {
+ /* For Incomplete CTB, 16x16 is not valid */
+ ps_intra16_analyse->b1_valid_cu = 0;
+ }
+ blk_cnt++;
+ ps_ed_blk_l1++;
+ }
+ //ps_ed_blk_l2 ++;
+ } //else of EIID
+#endif
+ }
+ }
+ else
+ {
+ /* For incomplete CTB, init valid CU to 0 */
+ ps_ed_blk_l1++;
+ ps_intra32_analyse->b1_valid_cu = 0;
+ ps_intra16_analyse[0].b1_valid_cu = 0;
+ blk_cnt++;
+ merge_64x64 = 0;
+ }
+ } while(blk_cnt != MAX_CTB_SIZE);
+ /* if 64x64 merge is possible then check for 32x32 having same best modes */
+ if(1 == merge_64x64)
+ {
+ WORD32 act_mode = au1_best_32x32_modes[0];
+
+ ps_ed_blk_l2 = ps_ed_l2_ctb;
+ best_mode = ps_ed_blk_l2->best_mode;
+ merge_64x64 =
+ ((act_mode == au1_best_32x32_modes[0]) + (act_mode == au1_best_32x32_modes[1]) +
+ (act_mode == au1_best_32x32_modes[2]) +
+ (act_mode == au1_best_32x32_modes[3]) ==
+ 4);
+ if(merge_64x64 == 1)
+ best_mode = au1_best_32x32_modes[0];
+ else
+ best_mode = ps_ed_blk_l2->best_mode;
+ /* All 32x32 costs are accumalated to 64x64 cost */
+ ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = 0;
+ for(i = 0; i < 4; i++)
+ {
+ ps_l0_ipe_out_ctb->i4_best64x64_intra_cost +=
+ ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[i];
+ }
+
+ /* If all modes of 32x32 block is not same */
+ if(0 == merge_64x64)
+ {
+ /*Compute CHILD cost for 32x32 */
+ WORD32 child_cost_64x64 = au4_best_32x32_cost[0] + au4_best_32x32_cost[1] +
+ au4_best_32x32_cost[2] + au4_best_32x32_cost[3];
+ WORD32 cost = MAX_INTRA_COST_IPE;
+
+ WORD32 best_mode_temp = 0;
+ /*Compute 64x64 cost for each mode of 32x32*/
+ for(i = 0; i < 4; i++)
+ {
+ WORD32 mode = au1_best_32x32_modes[i];
+ if(mode < 2)
+ mode = 26;
+ ps_cu_node->ps_parent->u1_cu_size = 64;
+ ps_cu_node->ps_parent->u2_x0 = gau1_cu_pos_x[0]; /* Populate properly */
+ ps_cu_node->ps_parent->u2_y0 = gau1_cu_pos_y[0]; /* Populate properly */
+
+ ihevce_set_nbr_map(
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ (ps_cu_node->ps_parent->u2_x0 << 1),
+ (ps_cu_node->ps_parent->u2_y0 << 1),
+ (ps_cu_node->ps_parent->u1_cu_size >> 2),
+ 0);
+
+ ihevce_mode_eval_filtering(
+ ps_cu_node->ps_parent,
+ ps_cu_node,
+ ps_ctxt,
+ ps_curr_src,
+ mode,
+ &ps_cu_node->ps_parent->au4_best_cost_1tu[0],
+ &ps_cu_node->ps_parent->au1_best_mode_1tu[0],
+ !step2_bypass,
+ 0);
+
+ parent_cost = ps_cu_node->ps_parent->best_cost;
+ if(cost > parent_cost)
+ {
+ cost = parent_cost;
+ best_mode_temp = ps_cu_node->ps_parent->best_mode;
+ }
+ }
+ if(cost < child_cost_64x64)
+ {
+ merge_64x64 = 1;
+ best_mode = best_mode_temp;
+
+ /* Update 64x64 cost if CU 64x64 is chosen */
+ ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = cost;
+
+ /* Accumalate the least cost for CU 64x64 */
+ i8_frame_acc_satd_cost = cost;
+ i8_frame_acc_mode_bits_cost = ps_cu_node->ps_parent->u2_mode_bits_cost;
+
+ /* satd and mpm bits accumalation of best cu size candiate */
+ i4_ctb_acc_satd = ps_cu_node->ps_parent->best_satd;
+ }
+ }
+ }
+
+ if(merge_64x64)
+ {
+ WORD32 i, j;
+ intra32_analyse_t *ps_intra32_analyse;
+ intra16_analyse_t *ps_intra16_analyse;
+ WORD32 row, col;
+ WORD32 i4_q_scale_q3_mod;
+ WORD8 i1_cu_possible_qp;
+ WORD32 i4_act_factor;
+ //ps_row_cu = ps_curr_cu;
+ ps_ctb_out->u4_cu_split_flags = 0x0;
+ ps_ed_blk_l1 = ps_ed_l1_ctb;
+ ps_ed_blk_l2 = ps_ed_l2_ctb;
+
+ ps_l0_ipe_out_ctb->u1_split_flag = 0;
+
+ /* If CU size of 64x64 is chosen, disbale all the 16x16 flag*/
+ for(i = 0; i < 4; i++)
+ {
+ /* get the corresponding intra 32 analyse pointer use (blk_cnt / 16) */
+ /* blk cnt is in terms of 8x8 units so a 32x32 will have 16 8x8 units */
+ ps_intra32_analyse = &ps_l0_ipe_out_ctb->as_intra32_analyse[i];
+
+ for(j = 0; j < 4; j++)
+ {
+ /* get the corresponding intra 16 analyse pointer use (blk_cnt & 0xF / 4)*/
+ /* blk cnt is in terms of 8x8 units so a 16x16 will have 4 8x8 units */
+ ps_intra16_analyse = &ps_intra32_analyse->as_intra16_analyse[j];
+ ps_intra16_analyse->b1_merge_flag = 0;
+ }
+ }
+
+ /* CU size 64x64 and fill the final cu params */
+ //ps_row_cu->b3_cu_pos_x = gau1_cu_pos_x[0];
+ //ps_row_cu->b3_cu_pos_y = gau1_cu_pos_y[0];
+ //ps_row_cu->u1_cu_size = 64;
+
+ /* Candidate mode Update */
+ cand_mode_list[0] = best_mode;
+ if(cand_mode_list[0] > 1)
+ {
+ if(cand_mode_list[0] == 2)
+ {
+ cand_mode_list[1] = 34;
+ cand_mode_list[2] = 3;
+ }
+ else if(cand_mode_list[0] == 34)
+ {
+ cand_mode_list[1] = 2;
+ cand_mode_list[2] = 33;
+ }
+ else
+ {
+ cand_mode_list[1] = cand_mode_list[0] - 1;
+ cand_mode_list[2] = cand_mode_list[0] + 1;
+ }
+ //cand_mode_list[1] = ps_ed_blk_l1->nang_attr.best_mode;
+ //cand_mode_list[2] = ps_ed_blk_l1->ang_attr.best_mode;
+ }
+ else
+ {
+ cand_mode_list[0] = 0;
+ cand_mode_list[1] = 1;
+ cand_mode_list[2] = 26;
+ //cand_mode_list[2] = ps_ed_blk_l1->nang_attr.best_mode;
+ }
+
+ /* All 32x32 costs are accumalated to 64x64 cost */
+ ps_l0_ipe_out_ctb->i4_best64x64_intra_cost = 0;
+ for(i = 0; i < 4; i++)
+ {
+ ps_l0_ipe_out_ctb->i4_best64x64_intra_cost +=
+ ps_l0_ipe_out_ctb->ai4_best32x32_intra_cost[i];
+ }
+ /* by default 64x64 modes are set to default values DC and Planar */
+ ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[0] = cand_mode_list[0];
+ ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[1] = cand_mode_list[1];
+ ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[2] = cand_mode_list[2];
+ ps_l0_ipe_out_ctb->au1_best_modes_32x32_tu[3] = 255;
+
+ /* Update CTB mode map for the finalised CU */
+ x = ((ps_cu_node->u2_x0 << 3) >> 2) + 1;
+ y = ((ps_cu_node->u2_y0 << 3) >> 2) + 1;
+ size = ps_cu_node->u1_cu_size >> 2;
+
+ for(row = y; row < (y + size); row++)
+ {
+ for(col = x; col < (x + size); col++)
+ {
+ ps_ctxt->au1_ctb_mode_map[row][col] = best_mode;
+ }
+ }
+
+ ihevce_set_nbr_map(
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ (ps_cu_node->u2_x0 << 1),
+ (ps_cu_node->u2_y0 << 1),
+ (ps_cu_node->u1_cu_size >> 2),
+ 1);
+
+ /*As 64*64 has won, pick L1 32x32 qp*/
+ //ASSERT(((blk_cnt>>6) & 0xF) == (blk_cnt>>6));
+ //ASSERT((blk_cnt>>6) == 0);
+ ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][0] != -2);
+ i1_cu_possible_qp = ihevce_cu_level_qp_mod(
+ ps_ctxt->i4_qscale,
+ ps_ed_ctb_l1->i4_32x32_satd[0][0],
+ ps_ctxt->ld_curr_frame_32x32_log_avg[0],
+ f_strength,
+ &i4_act_factor,
+ &i4_q_scale_q3_mod,
+ ps_ctxt->ps_rc_quant_ctxt);
+
+ i8_frame_acc_satd_by_modqp_q10 =
+ (i8_frame_acc_satd_cost << (SATD_BY_ACT_Q_FAC + QSCALE_Q_FAC_3)) /
+ i4_q_scale_q3_mod;
+ /* Increment pointers */
+ ps_ed_blk_l1 += 64;
+ ps_ed_blk_l2 += 16;
+ //ps_row_cu++;
+ }
+ }
+
+ //ps_ctb_out->u1_num_cus_in_ctb = (UWORD8)(ps_row_cu - ps_curr_cu);
+
+ {
+ WORD32 i4_i, i4_j;
+ WORD32 dummy;
+ WORD8 i1_cu_qp;
+ (void)i1_cu_qp;
+ /*MAM_VAR_L1*/
+ for(i4_j = 0; i4_j < 2; i4_j++)
+ {
+ i4_mod_factor_num = ps_ctxt->ai4_mod_factor_derived_by_variance[i4_j];
+ f_strength = ps_ctxt->f_strength;
+
+ //i4_mod_factor_num = 4;
+
+ ps_ed_blk_l1 = ps_ed_l1_ctb;
+ ps_ed_blk_l2 = ps_ed_l2_ctb;
+ //ps_row_cu = ps_curr_cu;
+
+ /*Valid only for complete CTB */
+ if((64 == u1_curr_ctb_wdt) && (64 == u1_curr_ctb_hgt))
+ {
+ ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][0] != -2);
+ ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][1] != -2);
+ ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][2] != -2);
+ ASSERT(ps_ed_ctb_l1->i4_32x32_satd[0][3] != -2);
+
+ i1_cu_qp = ihevce_cu_level_qp_mod(
+ ps_ctxt->i4_qscale,
+ ps_ed_ctb_l1->i4_32x32_satd[0][0],
+ ps_ctxt->ld_curr_frame_32x32_log_avg[0],
+ f_strength,
+ &ps_l0_ipe_out_ctb->i4_64x64_act_factor[0][i4_j],
+ &dummy,
+ ps_ctxt->ps_rc_quant_ctxt);
+
+ i1_cu_qp = ihevce_cu_level_qp_mod(
+ ps_ctxt->i4_qscale,
+ ps_ed_ctb_l1->i4_32x32_satd[0][1],
+ ps_ctxt->ld_curr_frame_32x32_log_avg[1],
+ f_strength,
+ &ps_l0_ipe_out_ctb->i4_64x64_act_factor[1][i4_j],
+ &dummy,
+ ps_ctxt->ps_rc_quant_ctxt);
+ i1_cu_qp = ihevce_cu_level_qp_mod(
+ ps_ctxt->i4_qscale,
+ ps_ed_ctb_l1->i4_32x32_satd[0][2],
+ ps_ctxt->ld_curr_frame_32x32_log_avg[2],
+ f_strength,
+ &ps_l0_ipe_out_ctb->i4_64x64_act_factor[2][i4_j],
+ &dummy,
+ ps_ctxt->ps_rc_quant_ctxt);
+
+ i1_cu_qp = ihevce_cu_level_qp_mod(
+ ps_ctxt->i4_qscale,
+ ps_ed_ctb_l1->i4_32x32_satd[0][3],
+ 2.0 + ps_ctxt->ld_curr_frame_16x16_log_avg[0],
+ f_strength,
+ &ps_l0_ipe_out_ctb->i4_64x64_act_factor[3][i4_j],
+ &dummy,
+ ps_ctxt->ps_rc_quant_ctxt);
+
+ ASSERT(ps_l0_ipe_out_ctb->i4_64x64_act_factor[3][i4_j] > 0);
+ }
+ else
+ {
+ ps_l0_ipe_out_ctb->i4_64x64_act_factor[0][i4_j] = 1024;
+ ps_l0_ipe_out_ctb->i4_64x64_act_factor[1][i4_j] = 1024;
+ ps_l0_ipe_out_ctb->i4_64x64_act_factor[2][i4_j] = 1024;
+ ps_l0_ipe_out_ctb->i4_64x64_act_factor[3][i4_j] = 1024;
+ }
+
+ /*Store the 8x8 Qps from L2 (in raster order) as output of intra prediction
+ for the usage by ME*/
+
+ {
+ WORD32 pos_x_32, pos_y_32, pos;
+ //WORD32 i4_incomplete_ctb_val_8;
+ pos_x_32 = u1_curr_ctb_wdt / 16;
+ pos_y_32 = u1_curr_ctb_hgt / 16;
+
+ pos = (pos_x_32 < pos_y_32) ? pos_x_32 : pos_y_32;
+
+ for(i4_i = 0; i4_i < 4; i4_i++)
+ {
+ if(i4_i < pos)
+ {
+ ASSERT(ps_ed_ctb_l1->i4_16x16_satd[i4_i][0] != -2);
+ ASSERT(ps_ed_ctb_l1->i4_16x16_satd[i4_i][1] != -2);
+ ASSERT(ps_ed_ctb_l1->i4_16x16_satd[i4_i][2] != -2);
+ i1_cu_qp = ihevce_cu_level_qp_mod(
+ ps_ctxt->i4_qscale,
+ ps_ed_ctb_l1->i4_16x16_satd[i4_i][0],
+ ps_ctxt->ld_curr_frame_16x16_log_avg[0],
+ f_strength,
+ &ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][0][i4_j],
+ &dummy,
+ ps_ctxt->ps_rc_quant_ctxt);
+ i1_cu_qp = ihevce_cu_level_qp_mod(
+ ps_ctxt->i4_qscale,
+ ps_ed_ctb_l1->i4_16x16_satd[i4_i][1],
+ ps_ctxt->ld_curr_frame_16x16_log_avg[1],
+ f_strength,
+ &ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][1][i4_j],
+ &dummy,
+ ps_ctxt->ps_rc_quant_ctxt);
+ i1_cu_qp = ihevce_cu_level_qp_mod(
+ ps_ctxt->i4_qscale,
+ ps_ed_ctb_l1->i4_16x16_satd[i4_i][2],
+ ps_ctxt->ld_curr_frame_16x16_log_avg[2],
+ f_strength,
+ &ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][2][i4_j],
+ &dummy,
+ ps_ctxt->ps_rc_quant_ctxt);
+ }
+ else
+ {
+ /*For incomplete CTB */
+ ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][0][i4_j] = 1024;
+ ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][1][i4_j] = 1024;
+ ps_l0_ipe_out_ctb->i4_32x32_act_factor[i4_i][2][i4_j] = 1024;
+ }
+ }
+ }
+
+ /*Store the 8x8 Qps from L1 (in raster order) as output of intra prediction
+ for the usage by ME*/
+ {
+ WORD32 pos_x_16, pos_y_16, pos;
+ //WORD32 i4_incomplete_ctb_val_8;
+ pos_x_16 = u1_curr_ctb_wdt / 4;
+ pos_y_16 = u1_curr_ctb_hgt / 4;
+
+ pos = (pos_x_16 < pos_y_16) ? pos_x_16 : pos_y_16;
+ for(i4_i = 0; i4_i < 16; i4_i++)
+ {
+ if(i4_i < pos)
+ {
+ ASSERT(ps_ed_ctb_l1->i4_8x8_satd[i4_i][0] != -2);
+ ASSERT(ps_ed_ctb_l1->i4_8x8_satd[i4_i][1] != -2);
+ i1_cu_qp = ihevce_cu_level_qp_mod(
+ ps_ctxt->i4_qscale,
+ ps_ed_ctb_l1->i4_8x8_satd[i4_i][0],
+ ps_ctxt->ld_curr_frame_8x8_log_avg[0],
+ f_strength,
+ &ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][0][i4_j],
+ &dummy,
+ ps_ctxt->ps_rc_quant_ctxt);
+ i1_cu_qp = ihevce_cu_level_qp_mod(
+ ps_ctxt->i4_qscale,
+ ps_ed_ctb_l1->i4_8x8_satd[i4_i][1],
+ ps_ctxt->ld_curr_frame_8x8_log_avg[1],
+ f_strength,
+ &ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][1][i4_j],
+ &dummy,
+ ps_ctxt->ps_rc_quant_ctxt);
+ }
+ else
+ {
+ /*For incomplete CTB */
+ ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][0][i4_j] = 1024;
+ ps_l0_ipe_out_ctb->i4_16x16_act_factor[i4_i][1][i4_j] = 1024;
+ }
+ }
+ }
+ } //for loop
+
+ /* Accumalate the cost of ctb to the total cost */
+ ps_ctxt->i8_frame_acc_satd_cost += i8_frame_acc_satd_cost;
+ ps_ctxt->i8_frame_acc_satd_by_modqp_q10 += i8_frame_acc_satd_by_modqp_q10;
+
+ ps_ctxt->i8_frame_acc_mode_bits_cost += i8_frame_acc_mode_bits_cost;
+
+ /* satd and mpm bits accumalation of best cu size candiate for the ctb */
+ ps_l0_ipe_out_ctb->i4_ctb_acc_satd = i4_ctb_acc_satd;
+ ps_l0_ipe_out_ctb->i4_ctb_acc_mpm_bits = i8_frame_acc_mode_bits_cost;
+
+ ps_ctxt->i8_frame_acc_satd += i4_ctb_acc_satd;
+ }
+
+ {
+ WORD32 ctr_8x8;
+ for(ctr_8x8 = 0; ctr_8x8 < (MAX_CU_IN_CTB >> 2); ctr_8x8++)
+ {
+ /*Accumalate activity factor for Intra and Inter*/
+ if(ps_l0_ipe_out_ctb->ai4_best_sad_cost_8x8_l1_ipe[ctr_8x8] <
+ ps_ed_ctb_l1->i4_sad_me_for_ref[ctr_8x8])
+ {
+ ps_l0_ipe_out_ctb->ai4_8x8_act_factor[ctr_8x8] =
+ ps_l0_ipe_out_ctb->i4_16x16_act_factor[ctr_8x8][1][0];
+ }
+ else
+ {
+ ps_l0_ipe_out_ctb->ai4_8x8_act_factor[ctr_8x8] =
+ ps_l0_ipe_out_ctb->i4_16x16_act_factor[ctr_8x8][1][0];
+ }
+
+ /*Accumalate activity factor at frame level*/
+ ps_ctxt->i8_frame_acc_act_factor += ps_l0_ipe_out_ctb->ai4_8x8_act_factor[ctr_8x8];
+ }
+ }
+ return;
+}
+
+WORD32 ihevce_nxn_sad_computer(
+ UWORD8 *pu1_inp, WORD32 i4_inp_stride, UWORD8 *pu1_ref, WORD32 i4_ref_stride, WORD32 trans_size)
+{
+ WORD32 wd, ht, i, j;
+ WORD32 sad = 0;
+
+ wd = trans_size;
+ ht = trans_size;
+
+ for(i = 0; i < ht; i++)
+ {
+ for(j = 0; j < wd; j++)
+ {
+ sad += (ABS(((WORD32)pu1_inp[j] - (WORD32)pu1_ref[j])));
+ }
+ pu1_inp += i4_inp_stride;
+ pu1_ref += i4_ref_stride;
+ }
+
+ return sad;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_mode_eval_filtering \endif
+*
+* \brief
+* Evaluates best 3 modes for the given CU size with probable modes from,
+* early decision structure, mpm candidates and dc, planar mode
+*
+* \param[in] ps_cu_node : pointer to MAX cu node info buffer
+* \param[in] ps_child_cu_node : pointer to (MAX - 1) cu node info buffer
+* \param[in] ps_ctxt : pointer to IPE context struct
+* \param[in] ps_curr_src : pointer to src pixels struct
+* \param[in] best_amode : best angular mode from l1 layer or
+ from (MAX - 1) CU mode
+* \param[in] best_costs_4x4 : pointer to 3 best cost buffer
+* \param[in] best_modes_4x4 : pointer to 3 best mode buffer
+* \param[in] step2_bypass : if 0, (MAX - 1) CU is evaluated
+* if 1, (MAX CU) sugested is evaluated
+* \param[in] tu_eq_cu : indicates if tu size is same as cu or cu/2
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_mode_eval_filtering(
+ ihevce_ipe_cu_tree_t *ps_cu_node,
+ ihevce_ipe_cu_tree_t *ps_child_cu_node,
+ ihevce_ipe_ctxt_t *ps_ctxt,
+ iv_enc_yuv_buf_t *ps_curr_src,
+ WORD32 best_amode,
+ WORD32 *best_costs_4x4,
+ UWORD8 *best_modes_4x4,
+ WORD32 step2_bypass,
+ WORD32 tu_eq_cu)
+{
+ UWORD8 *pu1_origin, *pu1_orig;
+ WORD32 src_strd = ps_curr_src->i4_y_strd;
+ WORD32 nbr_flags;
+ nbr_avail_flags_t s_nbr;
+ WORD32 trans_size = tu_eq_cu ? ps_cu_node->u1_cu_size : ps_cu_node->u1_cu_size >> 1;
+ WORD32 num_tu_in_x = tu_eq_cu ? 1 : 2;
+ WORD32 num_tu_in_y = tu_eq_cu ? 1 : 2;
+ UWORD8 mode;
+
+ WORD32 cost_ang_mode = MAX_INTRA_COST_IPE;
+ WORD32 filter_flag;
+ WORD32 cost_amode_step2[7] = { 0 };
+ /*WORD32 best_sad[5]; // NOTE_A01: Not getting consumed at present */
+ WORD32 sad = 0;
+ WORD32 cu_pos_x, cu_pos_y;
+ WORD32 temp;
+ WORD32 i = 0, j, k, i_end, z;
+ //WORD32 row, col, size;
+ UWORD8 *pu1_ref;
+ WORD32 xA, yA, xB, yB;
+ WORD32 top_intra_mode;
+ WORD32 left_intra_mode;
+ UWORD8 *pu1_ref_orig = &ps_ctxt->au1_ref_samples[0];
+ UWORD8 *pu1_ref_filt = &ps_ctxt->au1_filt_ref_samples[0];
+
+ UWORD8 modes_4x4[5] = { 0, 1, 2, 3, 4 };
+ WORD32 count;
+
+ pf_ipe_res_trans_had apf_resd_trns_had[4];
+
+ WORD32 cand_mode_satd_list[3];
+ ihevc_intra_pred_luma_ref_substitution_ft *ihevc_intra_pred_luma_ref_substitution_fptr;
+
+ ihevc_intra_pred_luma_ref_substitution_fptr =
+ ps_ctxt->ps_func_selector->ihevc_intra_pred_luma_ref_substitution_fptr;
+
+ apf_resd_trns_had[0] = ps_ctxt->s_cmn_opt_func.pf_HAD_4x4_8bit;
+ apf_resd_trns_had[1] = ps_ctxt->s_cmn_opt_func.pf_HAD_8x8_8bit;
+ apf_resd_trns_had[2] = ps_ctxt->s_cmn_opt_func.pf_HAD_16x16_8bit;
+ apf_resd_trns_had[3] = ps_ctxt->s_cmn_opt_func.pf_HAD_32x32_8bit;
+
+ /* initialize modes_to_eval as zero */
+ memset(&ps_ctxt->au1_modes_to_eval, 0, MAX_NUM_IP_MODES);
+
+ /* Compute the Parent Cost */
+
+ /* Pointer to top-left of the CU - y0,x0 in 8x8 granularity */
+ pu1_orig = (UWORD8 *)(ps_curr_src->pv_y_buf) + ((ps_cu_node->u2_y0 << 3) * src_strd) +
+ (ps_cu_node->u2_x0 << 3);
+
+ /* Get position of CU within CTB at 4x4 granularity */
+ cu_pos_x = ps_cu_node->u2_x0 << 1;
+ cu_pos_y = ps_cu_node->u2_y0 << 1;
+
+ /* get the neighbour availability flags */
+ ihevce_get_only_nbr_flag(
+ &s_nbr,
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ cu_pos_x,
+ cu_pos_y,
+ trans_size >> 2,
+ trans_size >> 2);
+
+ /* Traverse for all 4 child blocks in the parent block */
+ xA = (ps_cu_node->u2_x0 << 3) >> 2;
+ yA = ((ps_cu_node->u2_y0 << 3) >> 2) + 1;
+ xB = xA + 1;
+ yB = yA - 1;
+ left_intra_mode = ps_ctxt->au1_ctb_mode_map[yA][xA];
+ top_intra_mode = ps_ctxt->au1_ctb_mode_map[yB][xB];
+ /* call the function which populates sad cost for all the modes */
+
+ ihevce_intra_populate_mode_bits_cost_bracketing(
+ top_intra_mode,
+ left_intra_mode,
+ s_nbr.u1_top_avail,
+ s_nbr.u1_left_avail,
+ ps_cu_node->u2_y0,
+ &ps_ctxt->au2_mode_bits_satd_cost[0],
+ &ps_ctxt->au2_mode_bits_satd[0],
+ ps_ctxt->i4_ol_satd_lambda,
+ cand_mode_satd_list);
+
+ for(k = 0; k < num_tu_in_y; k++)
+ {
+ for(j = 0; j < num_tu_in_x; j++)
+ {
+ /* get the neighbour availability flags */
+ nbr_flags = ihevce_get_nbr_intra(
+ &s_nbr,
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ cu_pos_x + ((j) * (trans_size >> 2)),
+ cu_pos_y + ((k) * (trans_size >> 2)),
+ trans_size >> 2);
+
+ pu1_origin = pu1_orig + (k * trans_size * src_strd) + (j * trans_size);
+
+ /* Create reference samples array */
+ ihevc_intra_pred_luma_ref_substitution_fptr(
+ pu1_origin - src_strd - 1,
+ pu1_origin - src_strd,
+ pu1_origin - 1,
+ src_strd,
+ trans_size,
+ nbr_flags,
+ pu1_ref_orig,
+ 0);
+
+ /* Perform reference samples filtering */
+ ihevce_intra_pred_ref_filtering(pu1_ref_orig, trans_size, pu1_ref_filt);
+
+ ihevce_set_nbr_map(
+ ps_ctxt->pu1_ctb_nbr_map,
+ ps_ctxt->i4_nbr_map_strd,
+ cu_pos_x + ((j) * (trans_size >> 2)),
+ cu_pos_y + ((k) * (trans_size >> 2)),
+ (trans_size >> 2),
+ 1);
+
+ pu1_ref_orig += (4 * MAX_CTB_SIZE + 1);
+ pu1_ref_filt += (4 * MAX_CTB_SIZE + 1);
+ }
+ }
+
+ /* Revaluation for angular mode */
+ //if(ps_ed_blk->ang_attr.mode_present == 1)
+ //if(((best_amode & 0x1) != 1))
+
+ {
+ WORD32 u1_trans_idx = trans_size >> 3;
+ if(trans_size == 32)
+ u1_trans_idx = 3;
+ //best_amode = ps_ed_blk->ang_attr.best_mode;
+
+ i = 0;
+ if(!step2_bypass)
+ {
+ /* Around best level 4 angular mode, search for best level 2 mode */
+ ASSERT((best_amode >= 2) && (best_amode <= 34));
+
+ if(ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P3)
+ {
+ if(best_amode >= 4)
+ ps_ctxt->au1_modes_to_eval_temp[i++] = best_amode - 2;
+ }
+
+ ps_ctxt->au1_modes_to_eval_temp[i++] = best_amode;
+
+ if(ps_ctxt->i4_quality_preset <= IHEVCE_QUALITY_P3)
+ {
+ if(best_amode <= 32)
+ ps_ctxt->au1_modes_to_eval_temp[i++] = best_amode + 2;
+ }
+ }
+ else
+ {
+ ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[0]->best_mode;
+ ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[1]->best_mode;
+ ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[2]->best_mode;
+ ps_ctxt->au1_modes_to_eval_temp[i++] = ps_child_cu_node->ps_sub_cu[3]->best_mode;
+ }
+
+ /* Add the left and top MPM modes for computation*/
+
+ ps_ctxt->au1_modes_to_eval_temp[i++] = cand_mode_satd_list[0];
+ ps_ctxt->au1_modes_to_eval_temp[i++] = cand_mode_satd_list[1];
+
+ i_end = i;
+ count = 0;
+
+ /*Remove duplicate modes from modes_to_eval_temp[] */
+ for(j = 0; j < i_end; j++)
+ {
+ for(k = 0; k < count; k++)
+ {
+ if(ps_ctxt->au1_modes_to_eval_temp[j] == ps_ctxt->au1_modes_to_eval[k])
+ break;
+ }
+ if((k == count) && (ps_ctxt->au1_modes_to_eval_temp[j] > 1))
+ {
+ ps_ctxt->au1_modes_to_eval[count] = ps_ctxt->au1_modes_to_eval_temp[j];
+ count++;
+ }
+ }
+ i_end = count;
+ if(count == 0)
+ {
+ ps_ctxt->au1_modes_to_eval[0] = 26;
+ i_end = 1;
+ }
+
+ for(i = 0; i < i_end; i++)
+ {
+ pu1_ref_orig = &ps_ctxt->au1_ref_samples[0];
+ pu1_ref_filt = &ps_ctxt->au1_filt_ref_samples[0];
+
+ mode = ps_ctxt->au1_modes_to_eval[i];
+ ASSERT((mode >= 2) && (mode <= 34));
+ cost_amode_step2[i] = ps_ctxt->au2_mode_bits_satd_cost[mode];
+ filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(trans_size) - 2));
+
+ for(k = 0; k < num_tu_in_y; k++)
+ {
+ for(j = 0; j < num_tu_in_x; j++)
+ {
+ pu1_origin = pu1_orig + (k * trans_size * src_strd) + (j * trans_size);
+
+ if(0 == filter_flag)
+ pu1_ref = pu1_ref_orig;
+ else
+ pu1_ref = pu1_ref_filt;
+
+ g_apf_lum_ip[g_i4_ip_funcs[mode]](
+ pu1_ref, 0, &ps_ctxt->au1_pred_samples[0], trans_size, trans_size, mode);
+
+ if(ps_ctxt->u1_use_satd)
+ {
+ sad = apf_resd_trns_had[u1_trans_idx](
+ pu1_origin,
+ ps_curr_src->i4_y_strd,
+ &ps_ctxt->au1_pred_samples[0],
+ trans_size,
+ NULL,
+ 0
+
+ );
+ }
+ else
+ {
+ sad = ps_ctxt->s_ipe_optimised_function_list.pf_nxn_sad_computer(
+ pu1_origin,
+ ps_curr_src->i4_y_strd,
+ &ps_ctxt->au1_pred_samples[0],
+ trans_size,
+ trans_size);
+ }
+
+ cost_amode_step2[i] += sad;
+
+ pu1_ref_orig += (4 * MAX_CTB_SIZE + 1);
+ pu1_ref_filt += (4 * MAX_CTB_SIZE + 1);
+ }
+ }
+ }
+ best_amode = ps_ctxt->au1_modes_to_eval[0];
+ /*Init cost indx */
+ cost_ang_mode = MAX_INTRA_COST_IPE; //cost_amode_step2[0];
+ for(z = 0; z < i_end; z++)
+ {
+ /* Least cost of all 3 angles are stored in cost_amode_step2[0] and corr. mode*/
+ if(cost_ang_mode >= cost_amode_step2[z])
+ {
+ if(cost_ang_mode == cost_amode_step2[z])
+ {
+ if(best_amode > ps_ctxt->au1_modes_to_eval[z])
+ best_amode = ps_ctxt->au1_modes_to_eval[z];
+ }
+ else
+ {
+ best_amode = ps_ctxt->au1_modes_to_eval[z];
+ }
+ cost_ang_mode = cost_amode_step2[z];
+ }
+ }
+
+ /*Modify mode bits for the angular modes */
+ }
+
+ {
+ /* Step - I modification */
+ ASSERT((best_amode >= 2) && (best_amode <= 34));
+ i_end = 0;
+ z = 0;
+
+ /* Around best level 3 angular mode, search for best level 1 mode */
+ ps_ctxt->au1_modes_to_eval[i_end++] = 0;
+ ps_ctxt->au1_modes_to_eval[i_end++] = 1;
+
+ if(best_amode != 2)
+ ps_ctxt->au1_modes_to_eval[i_end++] = best_amode - 1;
+
+ ps_ctxt->au1_modes_to_eval[i_end++] = best_amode;
+
+ if(best_amode != 34)
+ ps_ctxt->au1_modes_to_eval[i_end++] = best_amode + 1;
+
+ /* Inserting step_2's best mode at last to avoid
+ recalculation of it's SATD cost */
+
+ //ps_ctxt->au1_modes_to_eval[i_end] = best_amode; //Bugfix: HSAD compared with SAD
+ //cost_amode_step2[i_end] = cost_ang_mode;
+
+ /*best_sad[i_end] = cost_ang_mode
+ - mode_bits_satd_cost[best_amode]; //See NOTE_A01 above */
+
+ cost_ang_mode = MAX_INTRA_COST_IPE; /* Init cost */
+
+ for(i = 0; i < i_end; i++)
+ {
+ WORD32 u1_trans_idx = trans_size >> 3;
+ if(trans_size == 32)
+ u1_trans_idx = 3;
+ pu1_ref_orig = &ps_ctxt->au1_ref_samples[0];
+ pu1_ref_filt = &ps_ctxt->au1_filt_ref_samples[0];
+
+ /*best_sad[i] = 0; //See NOTE_A01 above */
+ mode = ps_ctxt->au1_modes_to_eval[i];
+ cost_amode_step2[i] = ps_ctxt->au2_mode_bits_satd_cost[mode];
+ filter_flag = gau1_intra_pred_ref_filter[mode] & (1 << (CTZ(trans_size) - 2));
+
+ for(k = 0; k < num_tu_in_y; k++)
+ {
+ for(j = 0; j < num_tu_in_x; j++)
+ {
+ pu1_origin = pu1_orig + (k * trans_size * src_strd) + (j * trans_size);
+
+ if(0 == filter_flag)
+ pu1_ref = pu1_ref_orig;
+ else
+ pu1_ref = pu1_ref_filt;
+
+ g_apf_lum_ip[g_i4_ip_funcs[mode]](
+ pu1_ref, 0, &ps_ctxt->au1_pred_samples[0], trans_size, trans_size, mode);
+
+ //if(trans_size != 4)
+ {
+ sad = apf_resd_trns_had[u1_trans_idx](
+ pu1_origin,
+ ps_curr_src->i4_y_strd,
+ &ps_ctxt->au1_pred_samples[0],
+ trans_size,
+ NULL,
+ 0);
+ }
+
+ /*accumualting SATD though name says it is sad*/
+ cost_amode_step2[i] += sad;
+ /*best_sad[i] +=sad; //See NOTE_A01 above */
+ pu1_ref_orig += (4 * MAX_CTB_SIZE + 1);
+ pu1_ref_filt += (4 * MAX_CTB_SIZE + 1);
+ }
+ }
+ }
+ /* Updating i_end for the step_2's inserted mode*/
+ // i_end++;
+
+ /* Arrange the reference array in ascending order */
+
+ for(i = 0; i < (i_end - 1); i++)
+ {
+ for(j = i + 1; j < i_end; j++)
+ {
+ if(cost_amode_step2[i] > cost_amode_step2[j])
+ {
+ temp = cost_amode_step2[i];
+ cost_amode_step2[i] = cost_amode_step2[j];
+ cost_amode_step2[j] = temp;
+
+ temp = modes_4x4[i];
+ modes_4x4[i] = modes_4x4[j];
+ modes_4x4[j] = temp;
+ }
+ }
+ }
+
+ /* Least cost of all 3 angles are stored in cost_amode_step2[0] and corr. mode*/
+ best_amode = ps_ctxt->au1_modes_to_eval[modes_4x4[0]];
+ cost_ang_mode = cost_amode_step2[0];
+ ps_cu_node->best_satd = cost_ang_mode - ps_ctxt->au2_mode_bits_satd_cost[best_amode];
+ ps_cu_node->best_cost = cost_amode_step2[0];
+ ps_cu_node->best_mode = ps_ctxt->au1_modes_to_eval[modes_4x4[0]];
+ ps_cu_node->best_satd =
+ ps_cu_node->best_cost - ps_ctxt->au2_mode_bits_satd_cost[ps_cu_node->best_mode];
+
+ /*Accumalate best mode bits cost for RC*/
+ ps_cu_node->u2_mode_bits_cost = ps_ctxt->au2_mode_bits_satd[ps_cu_node->best_mode];
+
+ /* Store the best three candidates */
+ for(i = 0; i < 3; i++)
+ {
+ best_costs_4x4[i] = cost_amode_step2[i];
+ best_modes_4x4[i] = ps_ctxt->au1_modes_to_eval[modes_4x4[i]];
+ }
+ }
+
+ return;
+}
diff --git a/encoder/ihevce_recur_bracketing.h b/encoder/ihevce_recur_bracketing.h
new file mode 100644
index 0000000..b8cedb9
--- /dev/null
+++ b/encoder/ihevce_recur_bracketing.h
@@ -0,0 +1,102 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_recur_bracketing.h
+*
+* \brief
+* This file contains interface definition of structs and fucntions for
+* recursive bracketing
+*
+* \date
+* 12/02/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_RECUR_BRACKETING_H_
+#define _IHEVCE_RECUR_BRACKETING_H_
+
+//*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+void ihevce_recur_bracketing(
+ ihevce_ipe_ctxt_t *ps_ctxt,
+ ihevce_ipe_cu_tree_t *ps_cu_node,
+ iv_enc_yuv_buf_t *ps_curr_src,
+ ctb_analyse_t *ps_ctb_out,
+ cu_analyse_t *ps_row_cu);
+
+void ihevce_bracketing_analysis(
+ ihevce_ipe_ctxt_t *ps_ctxt,
+ ihevce_ipe_cu_tree_t *ps_cu_node,
+ iv_enc_yuv_buf_t *ps_curr_src,
+ ctb_analyse_t *ps_ctb_out,
+ //cu_analyse_t *ps_row_cu,
+ ihevce_ed_blk_t *ps_ed_l1_ctb,
+ ihevce_ed_blk_t *ps_ed_l2_ctb,
+ ihevce_ed_ctb_l1_t *ps_ed_ctb_l1,
+ ipe_l0_ctb_analyse_for_me_t *ps_l0_ipe_out_ctb);
+
+void ihevce_mode_eval_filtering(
+ ihevce_ipe_cu_tree_t *ps_cu_node,
+ ihevce_ipe_cu_tree_t *ps_child_cu_node,
+ ihevce_ipe_ctxt_t *ps_ctxt,
+ iv_enc_yuv_buf_t *ps_curr_src,
+ WORD32 best_amode,
+ WORD32 *best_costs_4x4,
+ UWORD8 *best_modes_4x4,
+ WORD32 step2_bypass,
+ WORD32 tu_eq_cu);
+
+void ihevce_update_cand_list(
+ ihevce_ipe_cu_tree_t *ps_cu_node, ihevce_ed_blk_t *ps_ed_blk_l1, ihevce_ipe_ctxt_t *ps_ctxt);
+
+WORD32 sad_nxn_blk(
+ UWORD8 *pu1_inp, WORD32 i4_inp_stride, UWORD8 *pu1_ref, WORD32 i4_ref_stride, WORD32 trans_size);
+
+#endif /* _IHEVCE_RECUR_BRACKETING_H_ */
diff --git a/encoder/ihevce_sao.c b/encoder/ihevce_sao.c
new file mode 100644
index 0000000..4e7ea0a
--- /dev/null
+++ b/encoder/ihevce_sao.c
@@ -0,0 +1,1253 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevce_sao.c
+*
+* @brief
+* Contains definition for the ctb level sao function
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+* ihevce_sao_set_avilability()
+* ihevce_sao_ctb()
+* ihevce_sao_analyse()
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_cabac_rdo.h"
+#include "ihevce_sao.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* ihevce_sao_set_avilability
+*
+* @par Description:
+* Sets the availability flag for SAO.
+*
+* @param[in]
+* ps_sao_ctxt: Pointer to SAO context
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+void ihevce_sao_set_avilability(
+ UWORD8 *pu1_avail, sao_ctxt_t *ps_sao_ctxt, ihevce_tile_params_t *ps_tile_params)
+{
+ WORD32 i;
+
+ WORD32 ctb_x_pos = ps_sao_ctxt->i4_ctb_x;
+ WORD32 ctb_y_pos = ps_sao_ctxt->i4_ctb_y;
+
+ for(i = 0; i < 8; i++)
+ {
+ pu1_avail[i] = 255;
+ }
+
+ /* SAO_note_01: If the CTB lies on a tile or a slice boundary and
+ in-loop filtering is enabled at tile and slice boundary, then SAO must
+ be performed at tile/slice boundaries also.
+ Hence the boundary checks should be based on frame position of CTB
+ rather than s_ctb_nbr_avail_flags.u1_left_avail flags.
+ Search for <SAO_note_01> in workspace to know more */
+ /* Availaibility flags for first col*/
+ if(ctb_x_pos == ps_tile_params->i4_first_ctb_x)
+ {
+ pu1_avail[0] = 0;
+ pu1_avail[4] = 0;
+ pu1_avail[6] = 0;
+ }
+
+ /* Availaibility flags for last col*/
+ if((ctb_x_pos + 1) ==
+ (ps_tile_params->i4_first_ctb_x + ps_tile_params->i4_curr_tile_wd_in_ctb_unit))
+ {
+ pu1_avail[1] = 0;
+ pu1_avail[5] = 0;
+ pu1_avail[7] = 0;
+ }
+
+ /* Availaibility flags for first row*/
+ if(ctb_y_pos == ps_tile_params->i4_first_ctb_y)
+ {
+ pu1_avail[2] = 0;
+ pu1_avail[4] = 0;
+ pu1_avail[5] = 0;
+ }
+
+ /* Availaibility flags for last row*/
+ if((ctb_y_pos + 1) ==
+ (ps_tile_params->i4_first_ctb_y + ps_tile_params->i4_curr_tile_ht_in_ctb_unit))
+ {
+ pu1_avail[3] = 0;
+ pu1_avail[6] = 0;
+ pu1_avail[7] = 0;
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* Sao CTB level function.
+*
+* @par Description:
+* For a given CTB, sao is done. Both the luma and chroma
+* blocks are processed
+*
+* @param[in]
+* ps_sao_ctxt: Pointer to SAO context
+*
+* @returns
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+void ihevce_sao_ctb(sao_ctxt_t *ps_sao_ctxt, ihevce_tile_params_t *ps_tile_params)
+{
+ sao_enc_t *ps_sao;
+ UWORD8 u1_src_top_left_luma, u1_src_top_left_chroma[2];
+ UWORD8 *pu1_src_left_luma_buf, *pu1_src_top_luma_buf;
+ UWORD8 *pu1_src_left_chroma_buf, *pu1_src_top_chroma_buf;
+ UWORD8 *pu1_src_luma, *pu1_src_chroma;
+ WORD32 luma_src_stride, ctb_size;
+ WORD32 chroma_src_stride;
+ UWORD8 au1_avail_luma[8], au1_avail_chroma[8];
+ WORD32 sao_blk_wd, sao_blk_ht, sao_wd_chroma, sao_ht_chroma;
+ UWORD8 *pu1_top_left_luma, *pu1_top_left_chroma;
+ UWORD8 *pu1_src_bot_left_luma, *pu1_src_top_right_luma;
+ UWORD8 *pu1_src_bot_left_chroma, *pu1_src_top_right_chroma;
+ UWORD8 u1_is_422 = (ps_sao_ctxt->ps_sps->i1_chroma_format_idc == 2);
+
+ ps_sao = ps_sao_ctxt->ps_sao;
+
+ ASSERT(
+ (abs(ps_sao->u1_y_offset[1]) <= 7) && (abs(ps_sao->u1_y_offset[2]) <= 7) &&
+ (abs(ps_sao->u1_y_offset[3]) <= 7) && (abs(ps_sao->u1_y_offset[4]) <= 7));
+ ASSERT(
+ (abs(ps_sao->u1_cb_offset[1]) <= 7) && (abs(ps_sao->u1_cb_offset[2]) <= 7) &&
+ (abs(ps_sao->u1_cb_offset[3]) <= 7) && (abs(ps_sao->u1_cb_offset[4]) <= 7));
+ ASSERT(
+ (abs(ps_sao->u1_cr_offset[1]) <= 7) && (abs(ps_sao->u1_cr_offset[2]) <= 7) &&
+ (abs(ps_sao->u1_cr_offset[3]) <= 7) && (abs(ps_sao->u1_cr_offset[4]) <= 7));
+ ASSERT(
+ (ps_sao->b5_y_band_pos <= 28) && (ps_sao->b5_cb_band_pos <= 28) &&
+ (ps_sao->b5_cr_band_pos <= 28));
+
+ if(ps_sao_ctxt->i1_slice_sao_luma_flag)
+ {
+ /*initialize the src pointer to current row*/
+ luma_src_stride = ps_sao_ctxt->i4_cur_luma_recon_stride;
+
+ ctb_size = ps_sao_ctxt->i4_ctb_size;
+
+ /* 1 extra byte in top buf stride for top left of 1st ctb of every row*/
+ ps_sao->u1_y_offset[0] = 0; /* 0th element is not being used */
+ sao_blk_wd = ps_sao_ctxt->i4_sao_blk_wd;
+ sao_blk_ht = ps_sao_ctxt->i4_sao_blk_ht;
+
+ pu1_src_luma = ps_sao_ctxt->pu1_cur_luma_recon_buf;
+ /* Pointer to the top luma buffer corresponding to the current ctb row*/
+ pu1_src_top_luma_buf = ps_sao_ctxt->pu1_curr_sao_src_top_luma;
+
+ /* Pointer to left luma buffer corresponding to the current ctb row*/
+ pu1_src_left_luma_buf = ps_sao_ctxt->au1_left_luma_scratch;
+
+ /* Pointer to the top right luma buffer corresponding to the current ctb row*/
+ pu1_src_top_right_luma = pu1_src_top_luma_buf /*- top_buf_stide*/ + sao_blk_wd;
+
+ /* Pointer to the bottom left luma buffer corresponding to the current ctb row*/
+ pu1_src_bot_left_luma =
+ ps_sao_ctxt->pu1_frm_luma_recon_buf + ctb_size * ps_sao_ctxt->i4_frm_luma_recon_stride -
+ 1 + (ps_sao_ctxt->i4_frm_luma_recon_stride * ps_sao_ctxt->i4_ctb_y * ctb_size) +
+ (ps_sao_ctxt->i4_ctb_x * ctb_size); /* Bottom left*/
+
+ /* Back up the top left pixel for (x+1, y+1)th ctb*/
+ u1_src_top_left_luma = *(pu1_src_top_luma_buf + sao_blk_wd - 1);
+ pu1_top_left_luma = pu1_src_top_luma_buf - 1;
+
+ if(SAO_BAND == ps_sao->b3_y_type_idx)
+ {
+ ihevc_sao_band_offset_luma(
+ pu1_src_luma,
+ luma_src_stride,
+ pu1_src_left_luma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */
+ pu1_src_top_luma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */
+ pu1_src_top_luma_buf - 1, /* Top left*/
+ ps_sao->b5_y_band_pos,
+ ps_sao->u1_y_offset,
+ sao_blk_wd,
+ sao_blk_ht);
+
+ if((ps_sao_ctxt->i4_ctb_y > 0))
+ {
+ *(pu1_src_top_luma_buf + sao_blk_wd - 1) = u1_src_top_left_luma;
+ }
+ }
+ else if(ps_sao->b3_y_type_idx >= SAO_EDGE_0_DEG)
+ {
+ /*In case of edge offset, 1st and 2nd offsets are always inferred as offsets
+ * corresponding to EO category 1 and 2 which should be always positive
+ * And 3rd and 4th offsets are always inferred as offsets corresponding to
+ * EO category 3 and 4 which should be negative for all the EO classes(or EO typeidx)
+ */
+ // clang-format off
+ ASSERT((ps_sao->u1_y_offset[1] >= 0) && (ps_sao->u1_y_offset[2] >= 0));
+ ASSERT((ps_sao->u1_y_offset[3] <= 0) && (ps_sao->u1_y_offset[4] <= 0));
+ // clang-format on
+
+ ihevce_sao_set_avilability(au1_avail_luma, ps_sao_ctxt, ps_tile_params);
+
+ ps_sao_ctxt->apf_sao_luma[ps_sao->b3_y_type_idx - 2](
+ pu1_src_luma,
+ luma_src_stride,
+ pu1_src_left_luma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */
+ pu1_src_top_luma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */
+ pu1_top_left_luma, /* Top left*/
+ pu1_src_top_right_luma, /* Top right*/
+ pu1_src_bot_left_luma, /* Bottom left*/
+ au1_avail_luma,
+ ps_sao->u1_y_offset,
+ sao_blk_wd,
+ sao_blk_ht);
+
+ if((ps_sao_ctxt->i4_ctb_y > 0))
+ {
+ *(pu1_src_top_luma_buf + sao_blk_wd - 1) = u1_src_top_left_luma;
+ }
+ }
+ }
+
+ if(ps_sao_ctxt->i1_slice_sao_chroma_flag)
+ {
+ /*initialize the src pointer to current row*/
+ chroma_src_stride = ps_sao_ctxt->i4_cur_chroma_recon_stride;
+ ctb_size = ps_sao_ctxt->i4_ctb_size;
+
+ /* 1 extra byte in top buf stride for top left of 1st ctb of every row*/
+ //top_buf_stide = ps_sao_ctxt->u4_ctb_aligned_wd + 2;
+ ps_sao->u1_cb_offset[0] = 0; /* 0th element is not used */
+ ps_sao->u1_cr_offset[0] = 0;
+ sao_wd_chroma = ps_sao_ctxt->i4_sao_blk_wd;
+ sao_ht_chroma = ps_sao_ctxt->i4_sao_blk_ht / (!u1_is_422 + 1);
+
+ pu1_src_chroma = ps_sao_ctxt->pu1_cur_chroma_recon_buf;
+ /* Pointer to the top luma buffer corresponding to the current ctb row*/
+ pu1_src_top_chroma_buf = ps_sao_ctxt->pu1_curr_sao_src_top_chroma;
+ // clang-format off
+ /* Pointer to left luma buffer corresponding to the current ctb row*/
+ pu1_src_left_chroma_buf = ps_sao_ctxt->au1_left_chroma_scratch; //ps_sao_ctxt->au1_sao_src_left_chroma;
+ // clang-format on
+ /* Pointer to the top right chroma buffer corresponding to the current ctb row*/
+ pu1_src_top_right_chroma = pu1_src_top_chroma_buf /*- top_buf_stide*/ + sao_wd_chroma;
+
+ /* Pointer to the bottom left luma buffer corresponding to the current ctb row*/
+ pu1_src_bot_left_chroma =
+ ps_sao_ctxt->pu1_frm_chroma_recon_buf +
+ (ctb_size >> !u1_is_422) * ps_sao_ctxt->i4_frm_chroma_recon_stride - 2 +
+ (ps_sao_ctxt->i4_frm_chroma_recon_stride * ps_sao_ctxt->i4_ctb_y *
+ (ctb_size >> !u1_is_422)) +
+ (ps_sao_ctxt->i4_ctb_x * ctb_size); /* Bottom left*/
+
+ /* Back up the top left pixel for (x+1, y+1)th ctb*/
+ u1_src_top_left_chroma[0] = *(pu1_src_top_chroma_buf + sao_wd_chroma - 2);
+ u1_src_top_left_chroma[1] = *(pu1_src_top_chroma_buf + sao_wd_chroma - 1);
+ pu1_top_left_chroma = pu1_src_top_chroma_buf - 2;
+
+ if(SAO_BAND == ps_sao->b3_cb_type_idx)
+ {
+ ihevc_sao_band_offset_chroma(
+ pu1_src_chroma,
+ chroma_src_stride,
+ pu1_src_left_chroma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */
+ pu1_src_top_chroma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */
+ pu1_top_left_chroma, /* Top left*/
+ ps_sao->b5_cb_band_pos,
+ ps_sao->b5_cr_band_pos,
+ ps_sao->u1_cb_offset,
+ ps_sao->u1_cr_offset,
+ sao_wd_chroma,
+ sao_ht_chroma);
+
+ if((ps_sao_ctxt->i4_ctb_y > 0))
+ {
+ *(pu1_src_top_chroma_buf + sao_wd_chroma - 2) = u1_src_top_left_chroma[0];
+ *(pu1_src_top_chroma_buf + sao_wd_chroma - 1) = u1_src_top_left_chroma[1];
+ }
+ }
+ else if(ps_sao->b3_cb_type_idx >= SAO_EDGE_0_DEG)
+ {
+ /*In case of edge offset, 1st and 2nd offsets are always inferred as offsets
+ * corresponding to EO category 1 and 2 which should be always positive
+ * And 3rd and 4th offsets are always inferred as offsets corresponding to
+ * EO category 3 and 4 which should be negative for all the EO classes(or EO typeidx)
+ */
+ ASSERT((ps_sao->u1_cb_offset[1] >= 0) && (ps_sao->u1_cb_offset[2] >= 0));
+ ASSERT((ps_sao->u1_cb_offset[3] <= 0) && (ps_sao->u1_cb_offset[4] <= 0));
+
+ ASSERT((ps_sao->u1_cr_offset[1] >= 0) && (ps_sao->u1_cr_offset[2] >= 0));
+ ASSERT((ps_sao->u1_cr_offset[3] <= 0) && (ps_sao->u1_cr_offset[4] <= 0));
+
+ ihevce_sao_set_avilability(au1_avail_chroma, ps_sao_ctxt, ps_tile_params);
+
+ ps_sao_ctxt->apf_sao_chroma[ps_sao->b3_cb_type_idx - 2](
+ pu1_src_chroma,
+ chroma_src_stride,
+ pu1_src_left_chroma_buf, /* Pass the pointer to the left luma buffer backed up in the (x-1,y)th ctb */
+ pu1_src_top_chroma_buf, /* Pass the ptr to the top luma buf backed up in the (x,y-1)th ctb */
+ pu1_top_left_chroma, /* Top left*/
+ pu1_src_top_right_chroma, /* Top right*/
+ pu1_src_bot_left_chroma, /* Bottom left*/
+ au1_avail_chroma,
+ ps_sao->u1_cb_offset,
+ ps_sao->u1_cr_offset,
+ sao_wd_chroma,
+ sao_ht_chroma);
+
+ if((ps_sao_ctxt->i4_ctb_y > 0))
+ {
+ *(pu1_src_top_chroma_buf + sao_wd_chroma - 2) = u1_src_top_left_chroma[0];
+ *(pu1_src_top_chroma_buf + sao_wd_chroma - 1) = u1_src_top_left_chroma[1];
+ }
+ }
+ }
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* CTB level function to do SAO analysis.
+*
+* @par Description:
+* For a given CTB, sao analysis is done for both luma and chroma.
+*
+*
+* @param[in]
+* ps_sao_ctxt: Pointer to SAO context
+* ps_ctb_enc_loop_out : pointer to ctb level output structure from enc loop
+*
+* @returns
+*
+* @remarks
+* None
+*
+* @Assumptions:
+* 1) Initial Cabac state for current ctb to be sao'ed (i.e (x-1,y-1)th ctb) is assumed to be
+* almost same as cabac state of (x,y)th ctb.
+* 2) Distortion is calculated in spatial domain but lamda used to calculate the cost is
+* in freq domain.
+*******************************************************************************
+*/
+void ihevce_sao_analyse(
+ sao_ctxt_t *ps_sao_ctxt,
+ ctb_enc_loop_out_t *ps_ctb_enc_loop_out,
+ UWORD32 *pu4_frame_rdopt_header_bits,
+ ihevce_tile_params_t *ps_tile_params)
+{
+ UWORD8 *pu1_luma_scratch_buf;
+ UWORD8 *pu1_chroma_scratch_buf;
+ UWORD8 *pu1_src_luma, *pu1_recon_luma;
+ UWORD8 *pu1_src_chroma, *pu1_recon_chroma;
+ WORD32 luma_src_stride, luma_recon_stride, ctb_size, ctb_wd, ctb_ht;
+ WORD32 chroma_src_stride, chroma_recon_stride;
+ WORD32 i4_luma_scratch_buf_stride;
+ WORD32 i4_chroma_scratch_buf_stride;
+ sao_ctxt_t s_sao_ctxt;
+ UWORD32 ctb_bits = 0, distortion = 0, curr_cost = 0, best_cost = 0;
+ LWORD64 i8_cl_ssd_lambda_qf, i8_cl_ssd_lambda_chroma_qf;
+ WORD32 rdo_cand, num_luma_rdo_cand = 0, num_rdo_cand = 0;
+ WORD32 curr_buf_idx, best_buf_idx, best_cand_idx;
+ WORD32 row;
+ WORD32 edgeidx;
+ WORD32 acc_error_category[5] = { 0, 0, 0, 0, 0 }, category_count[5] = { 0, 0, 0, 0, 0 };
+ sao_enc_t s_best_luma_chroma_cand;
+ WORD32 best_ctb_sao_bits = 0;
+#if DISABLE_SAO_WHEN_NOISY && !defined(ENC_VER_v2)
+ UWORD8 u1_force_no_offset =
+ ps_sao_ctxt
+ ->ps_ctb_data
+ [ps_sao_ctxt->i4_ctb_x + ps_sao_ctxt->i4_ctb_data_stride * ps_sao_ctxt->i4_ctb_y]
+ .s_ctb_noise_params.i4_noise_present;
+#endif
+ UWORD8 u1_is_422 = (ps_sao_ctxt->ps_sps->i1_chroma_format_idc == 2);
+
+ *pu4_frame_rdopt_header_bits = 0;
+
+ ctb_size = ps_sao_ctxt->i4_ctb_size;
+ ctb_wd = ps_sao_ctxt->i4_sao_blk_wd;
+ ctb_ht = ps_sao_ctxt->i4_sao_blk_ht;
+
+ s_sao_ctxt = ps_sao_ctxt[0];
+
+ /* Memset the best luma_chroma_cand structure to avoid asserts in debug mode*/
+ memset(&s_best_luma_chroma_cand, 0, sizeof(sao_enc_t));
+
+ /* Initialize the pointer and strides for luma buffers*/
+ pu1_recon_luma = ps_sao_ctxt->pu1_cur_luma_recon_buf;
+ luma_recon_stride = ps_sao_ctxt->i4_cur_luma_recon_stride;
+
+ pu1_src_luma = ps_sao_ctxt->pu1_cur_luma_src_buf;
+ luma_src_stride = ps_sao_ctxt->i4_cur_luma_src_stride;
+ i4_luma_scratch_buf_stride = SCRATCH_BUF_STRIDE;
+
+ /* Initialize the pointer and strides for luma buffers*/
+ pu1_recon_chroma = ps_sao_ctxt->pu1_cur_chroma_recon_buf;
+ chroma_recon_stride = ps_sao_ctxt->i4_cur_chroma_recon_stride;
+
+ pu1_src_chroma = ps_sao_ctxt->pu1_cur_chroma_src_buf;
+ chroma_src_stride = ps_sao_ctxt->i4_cur_chroma_src_stride;
+ i4_chroma_scratch_buf_stride = SCRATCH_BUF_STRIDE;
+
+ i8_cl_ssd_lambda_qf = ps_sao_ctxt->i8_cl_ssd_lambda_qf;
+ i8_cl_ssd_lambda_chroma_qf = ps_sao_ctxt->i8_cl_ssd_lambda_chroma_qf;
+
+ /*****************************************************/
+ /********************RDO FOR LUMA CAND****************/
+ /*****************************************************/
+
+#if !DISABLE_SAO_WHEN_NOISY
+ if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
+#else
+ if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag && !u1_force_no_offset)
+#endif
+ {
+ /* Candidate for Edge offset SAO*/
+ /* Following is the convention for curr pixel and
+ * two neighbouring pixels for 0 deg, 90 deg, 135 deg and 45 deg */
+ /*
+ * 0 deg : a c b 90 deg: a 135 deg: a 45 deg: a
+ * c c c
+ * b b b
+ */
+
+ /* 0 deg SAO CAND*/
+ /* Reset the error and edge count*/
+ for(edgeidx = 0; edgeidx < 5; edgeidx++)
+ {
+ acc_error_category[edgeidx] = 0;
+ category_count[edgeidx] = 0;
+ }
+
+ /* Call the funciton to populate the EO parameter for this ctb for 0 deg EO class*/
+ // clang-format off
+ ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_0_DEG,
+ acc_error_category, category_count);
+ // clang-format on
+ // clang-format off
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_0_DEG;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0]
+ ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
+ : 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1]
+ ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
+ : 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3]
+ ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
+ : 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] =category_count[4]
+ ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
+ : 0;
+ // clang-format on
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0;
+ // clang-format off
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE;
+ // clang-format on
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0;
+
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0;
+
+ num_luma_rdo_cand++;
+
+ /* 90 degree SAO CAND*/
+ for(edgeidx = 0; edgeidx < 5; edgeidx++)
+ {
+ acc_error_category[edgeidx] = 0;
+ category_count[edgeidx] = 0;
+ }
+
+ /* Call the funciton to populate the EO parameter for this ctb for 90 deg EO class*/
+ // clang-format off
+ ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_90_DEG,
+ acc_error_category, category_count);
+
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_90_DEG;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0]
+ ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
+ : 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1]
+ ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
+ : 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3]
+ ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
+ : 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] = category_count[4]
+ ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
+ : 0;
+ // clang-format on
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0;
+
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0;
+
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0;
+
+ num_luma_rdo_cand++;
+
+ /* 135 degree SAO CAND*/
+ for(edgeidx = 0; edgeidx < 5; edgeidx++)
+ {
+ acc_error_category[edgeidx] = 0;
+ category_count[edgeidx] = 0;
+ }
+
+ /* Call the funciton to populate the EO parameter for this ctb for 135 deg EO class*/
+ // clang-format off
+ ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_135_DEG,
+ acc_error_category, category_count);
+
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_135_DEG;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0]
+ ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
+ : 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1]
+ ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
+ : 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3]
+ ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
+ : 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] = category_count[4]
+ ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
+ : 0;
+ // clang-format on
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0;
+
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0;
+
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0;
+
+ num_luma_rdo_cand++;
+
+ /* 45 degree SAO CAND*/
+ for(edgeidx = 0; edgeidx < 5; edgeidx++)
+ {
+ acc_error_category[edgeidx] = 0;
+ category_count[edgeidx] = 0;
+ }
+
+ /* Call the funciton to populate the EO parameter for this ctb for 45 deg EO class*/
+ // clang-format off
+ ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_luma_eo_sao_params(ps_sao_ctxt, SAO_EDGE_45_DEG,
+ acc_error_category, category_count);
+
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_y_type_idx = SAO_EDGE_45_DEG;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[1] = category_count[0]
+ ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
+ : 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[2] = category_count[1]
+ ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
+ : 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[3] = category_count[3]
+ ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
+ : 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_y_offset[4] = category_count[4]
+ ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
+ : 0;
+ // clang-format on
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_y_band_pos = 0;
+
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cb_type_idx = SAO_NONE;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[1] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[2] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[3] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cb_offset[4] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cb_band_pos = 0;
+
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b3_cr_type_idx = SAO_NONE;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[1] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[2] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[3] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].u1_cr_offset[4] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b5_cr_band_pos = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_left_flag = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_luma_rdo_cand].b1_sao_merge_up_flag = 0;
+
+ num_luma_rdo_cand++;
+
+ /* First cand will be best cand after 1st iteration*/
+ curr_buf_idx = 0;
+ best_buf_idx = 1;
+ best_cost = 0xFFFFFFFF;
+ best_cand_idx = 0;
+
+ /*Back up the top pixels for (x,y+1)th ctb*/
+ if(!ps_sao_ctxt->i4_is_last_ctb_row)
+ {
+ memcpy(
+ ps_sao_ctxt->pu1_curr_sao_src_top_luma + ps_sao_ctxt->i4_frm_top_luma_buf_stride,
+ pu1_recon_luma + luma_recon_stride * (ctb_size - 1),
+ ps_sao_ctxt->i4_sao_blk_wd);
+ }
+
+ for(rdo_cand = 0; rdo_cand < num_luma_rdo_cand; rdo_cand++)
+ {
+ s_sao_ctxt.ps_sao = &ps_sao_ctxt->as_sao_rd_cand[rdo_cand];
+
+ /* This memcpy is required because cabac uses parameters from this structure
+ * to evaluate bits and this structure ptr is sent to cabac through
+ * "ihevce_cabac_rdo_encode_sao" function
+ */
+ memcpy(&ps_ctb_enc_loop_out->s_sao, s_sao_ctxt.ps_sao, sizeof(sao_enc_t));
+
+ /* Copy the left pixels to the scratch buffer for evry rdo cand because its
+ overwritten by the sao leaf level function for next ctb*/
+ memcpy(
+ s_sao_ctxt.au1_left_luma_scratch,
+ ps_sao_ctxt->au1_sao_src_left_luma,
+ ps_sao_ctxt->i4_sao_blk_ht);
+
+ /* Copy the top and top left pixels to the scratch buffer for evry rdo cand because its
+ overwritten by the sao leaf level function for next ctb*/
+ memcpy(
+ s_sao_ctxt.au1_top_luma_scratch,
+ ps_sao_ctxt->pu1_curr_sao_src_top_luma - 1,
+ ps_sao_ctxt->i4_sao_blk_wd + 2);
+ s_sao_ctxt.pu1_curr_sao_src_top_luma = s_sao_ctxt.au1_top_luma_scratch + 1;
+
+ pu1_luma_scratch_buf = ps_sao_ctxt->au1_sao_luma_scratch[curr_buf_idx];
+
+ ASSERT(
+ (abs(s_sao_ctxt.ps_sao->u1_y_offset[1]) <= 7) &&
+ (abs(s_sao_ctxt.ps_sao->u1_y_offset[2]) <= 7) &&
+ (abs(s_sao_ctxt.ps_sao->u1_y_offset[3]) <= 7) &&
+ (abs(s_sao_ctxt.ps_sao->u1_y_offset[4]) <= 7));
+ ASSERT(
+ (abs(s_sao_ctxt.ps_sao->u1_cb_offset[1]) <= 7) &&
+ (abs(s_sao_ctxt.ps_sao->u1_cb_offset[2]) <= 7) &&
+ (abs(s_sao_ctxt.ps_sao->u1_cb_offset[3]) <= 7) &&
+ (abs(s_sao_ctxt.ps_sao->u1_cb_offset[4]) <= 7));
+ ASSERT(
+ (abs(s_sao_ctxt.ps_sao->u1_cr_offset[1]) <= 7) &&
+ (abs(s_sao_ctxt.ps_sao->u1_cr_offset[2]) <= 7) &&
+ (abs(s_sao_ctxt.ps_sao->u1_cr_offset[3]) <= 7) &&
+ (abs(s_sao_ctxt.ps_sao->u1_cr_offset[4]) <= 7));
+ ASSERT(
+ (s_sao_ctxt.ps_sao->b5_y_band_pos <= 28) &&
+ (s_sao_ctxt.ps_sao->b5_cb_band_pos <= 28) &&
+ (s_sao_ctxt.ps_sao->b5_cr_band_pos <= 28));
+
+ /* Copy the deblocked recon data to scratch buffer to do sao*/
+
+ ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
+ pu1_luma_scratch_buf,
+ i4_luma_scratch_buf_stride,
+ pu1_recon_luma,
+ luma_recon_stride,
+ SCRATCH_BUF_STRIDE,
+ ctb_ht + 1);
+
+ s_sao_ctxt.pu1_cur_luma_recon_buf = pu1_luma_scratch_buf;
+ s_sao_ctxt.i4_cur_luma_recon_stride = i4_luma_scratch_buf_stride;
+
+ s_sao_ctxt.i1_slice_sao_luma_flag = s_sao_ctxt.ps_slice_hdr->i1_slice_sao_luma_flag;
+ s_sao_ctxt.i1_slice_sao_chroma_flag = 0;
+
+ ihevce_sao_ctb(&s_sao_ctxt, ps_tile_params);
+
+ /* Calculate the distortion between sao'ed ctb and original src ctb*/
+ // clang-format off
+ distortion =
+ ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_luma,
+ s_sao_ctxt.pu1_cur_luma_recon_buf, luma_src_stride,
+ s_sao_ctxt.i4_cur_luma_recon_stride, ctb_wd, ctb_ht);
+ // clang-format on
+
+ ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx = curr_buf_idx;
+ ctb_bits = ihevce_cabac_rdo_encode_sao(
+ ps_sao_ctxt->ps_rdopt_entropy_ctxt, ps_ctb_enc_loop_out);
+
+ /* Calculate the cost as D+(lamda)*R */
+ curr_cost = distortion +
+ COMPUTE_RATE_COST_CLIP30(ctb_bits, i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
+
+ if(curr_cost < best_cost)
+ {
+ best_cost = curr_cost;
+ best_buf_idx = ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx;
+ best_cand_idx = rdo_cand;
+ curr_buf_idx = !curr_buf_idx;
+ }
+ }
+
+ /* Copy the sao parameters of the best luma cand into the luma_chroma cnad structure for next stage of RDO
+ * between luma_chroma combined cand, NO SAO cand, LEFT and TOP merge cand
+ */
+ s_best_luma_chroma_cand.b3_y_type_idx =
+ ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].b3_y_type_idx;
+ s_best_luma_chroma_cand.u1_y_offset[1] =
+ ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[1];
+ s_best_luma_chroma_cand.u1_y_offset[2] =
+ ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[2];
+ s_best_luma_chroma_cand.u1_y_offset[3] =
+ ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[3];
+ s_best_luma_chroma_cand.u1_y_offset[4] =
+ ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].u1_y_offset[4];
+ s_best_luma_chroma_cand.b5_y_band_pos =
+ ps_sao_ctxt->as_sao_rd_cand[best_cand_idx].b5_y_band_pos;
+ }
+ else
+ {
+ /*Back up the top pixels for (x,y+1)th ctb*/
+ if(!ps_sao_ctxt->i4_is_last_ctb_row)
+ {
+ memcpy(
+ ps_sao_ctxt->pu1_curr_sao_src_top_luma + ps_sao_ctxt->i4_frm_top_luma_buf_stride,
+ pu1_recon_luma + luma_recon_stride * (ctb_size - 1),
+ ps_sao_ctxt->i4_sao_blk_wd);
+ }
+
+ s_best_luma_chroma_cand.b3_y_type_idx = SAO_NONE;
+ s_best_luma_chroma_cand.u1_y_offset[1] = 0;
+ s_best_luma_chroma_cand.u1_y_offset[2] = 0;
+ s_best_luma_chroma_cand.u1_y_offset[3] = 0;
+ s_best_luma_chroma_cand.u1_y_offset[4] = 0;
+ s_best_luma_chroma_cand.b5_y_band_pos = 0;
+ s_best_luma_chroma_cand.b1_sao_merge_left_flag = 0;
+ s_best_luma_chroma_cand.b1_sao_merge_up_flag = 0;
+
+ s_best_luma_chroma_cand.b3_cb_type_idx = SAO_NONE;
+ s_best_luma_chroma_cand.u1_cb_offset[1] = 0;
+ s_best_luma_chroma_cand.u1_cb_offset[2] = 0;
+ s_best_luma_chroma_cand.u1_cb_offset[3] = 0;
+ s_best_luma_chroma_cand.u1_cb_offset[4] = 0;
+ s_best_luma_chroma_cand.b5_cb_band_pos = 0;
+
+ s_best_luma_chroma_cand.b3_cr_type_idx = SAO_NONE;
+ s_best_luma_chroma_cand.u1_cr_offset[1] = 0;
+ s_best_luma_chroma_cand.u1_cr_offset[2] = 0;
+ s_best_luma_chroma_cand.u1_cr_offset[3] = 0;
+ s_best_luma_chroma_cand.u1_cr_offset[4] = 0;
+ s_best_luma_chroma_cand.b5_cr_band_pos = 0;
+ }
+ /*****************************************************/
+ /********************RDO FOR CHROMA CAND**************/
+ /*****************************************************/
+#if !DISABLE_SAO_WHEN_NOISY
+ if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
+#else
+ if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag && !u1_force_no_offset)
+#endif
+ {
+ /*Back up the top pixels for (x,y+1)th ctb*/
+ if(!ps_sao_ctxt->i4_is_last_ctb_row)
+ {
+ memcpy(
+ ps_sao_ctxt->pu1_curr_sao_src_top_chroma +
+ ps_sao_ctxt->i4_frm_top_chroma_buf_stride,
+ pu1_recon_chroma + chroma_recon_stride * ((ctb_size >> !u1_is_422) - 1),
+ ps_sao_ctxt->i4_sao_blk_wd);
+ }
+
+ /* Reset the error and edge count*/
+ for(edgeidx = 0; edgeidx < 5; edgeidx++)
+ {
+ acc_error_category[edgeidx] = 0;
+ category_count[edgeidx] = 0;
+ }
+ // clang-format off
+ ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_get_chroma_eo_sao_params(ps_sao_ctxt,
+ s_best_luma_chroma_cand.b3_y_type_idx, acc_error_category,
+ category_count);
+ // clang-format on
+
+ /* Copy the sao parameters of the best luma cand into the luma_chroma cnad structure for next stage of RDO
+ * between luma_chroma combined cand, NO SAO cand, LEFT and TOP merge cand
+ */
+ // clang-format off
+ s_best_luma_chroma_cand.b3_cb_type_idx = s_best_luma_chroma_cand.b3_y_type_idx;
+ s_best_luma_chroma_cand.u1_cb_offset[1] = category_count[0]
+ ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
+ : 0;
+ s_best_luma_chroma_cand.u1_cb_offset[2] = category_count[1]
+ ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
+ : 0;
+ s_best_luma_chroma_cand.u1_cb_offset[3] = category_count[3]
+ ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
+ : 0;
+ s_best_luma_chroma_cand.u1_cb_offset[4] = category_count[4]
+ ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
+ : 0;
+ s_best_luma_chroma_cand.b5_cb_band_pos = 0;
+
+ s_best_luma_chroma_cand.b3_cr_type_idx = s_best_luma_chroma_cand.b3_y_type_idx;
+ s_best_luma_chroma_cand.u1_cr_offset[1] = category_count[0]
+ ? (CLIP3(acc_error_category[0] / category_count[0], 0, 7))
+ : 0;
+ s_best_luma_chroma_cand.u1_cr_offset[2] = category_count[1]
+ ? (CLIP3(acc_error_category[1] / category_count[1], 0, 7))
+ : 0;
+ s_best_luma_chroma_cand.u1_cr_offset[3] = category_count[3]
+ ? (CLIP3(acc_error_category[3] / category_count[3], -7, 0))
+ : 0;
+ s_best_luma_chroma_cand.u1_cr_offset[4] = category_count[4]
+ ? (CLIP3(acc_error_category[4] / category_count[4], -7, 0))
+ : 0;
+ // clang-format on
+ s_best_luma_chroma_cand.b5_cr_band_pos = 0;
+ }
+ else
+ {
+ /*Back up the top pixels for (x,y+1)th ctb*/
+ if(!ps_sao_ctxt->i4_is_last_ctb_row)
+ {
+ memcpy(
+ ps_sao_ctxt->pu1_curr_sao_src_top_chroma +
+ ps_sao_ctxt->i4_frm_top_chroma_buf_stride,
+ pu1_recon_chroma + chroma_recon_stride * ((ctb_size >> !u1_is_422) - 1),
+ ps_sao_ctxt->i4_sao_blk_wd);
+ }
+
+ s_best_luma_chroma_cand.b3_cb_type_idx = SAO_NONE;
+ s_best_luma_chroma_cand.u1_cb_offset[1] = 0;
+ s_best_luma_chroma_cand.u1_cb_offset[2] = 0;
+ s_best_luma_chroma_cand.u1_cb_offset[3] = 0;
+ s_best_luma_chroma_cand.u1_cb_offset[4] = 0;
+ s_best_luma_chroma_cand.b5_cb_band_pos = 0;
+
+ s_best_luma_chroma_cand.b3_cr_type_idx = SAO_NONE;
+ s_best_luma_chroma_cand.u1_cr_offset[1] = 0;
+ s_best_luma_chroma_cand.u1_cr_offset[2] = 0;
+ s_best_luma_chroma_cand.u1_cr_offset[3] = 0;
+ s_best_luma_chroma_cand.u1_cr_offset[4] = 0;
+ s_best_luma_chroma_cand.b5_cr_band_pos = 0;
+
+ s_best_luma_chroma_cand.b1_sao_merge_left_flag = 0;
+ s_best_luma_chroma_cand.b1_sao_merge_up_flag = 0;
+ }
+
+ s_best_luma_chroma_cand.b1_sao_merge_left_flag = 0;
+ s_best_luma_chroma_cand.b1_sao_merge_up_flag = 0;
+
+ /*****************************************************/
+ /**RDO for Best Luma - Chroma combined, No SAO,*******/
+ /*************Left merge and Top merge****************/
+ /*****************************************************/
+
+ /* No SAO cand*/
+ ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 0;
+
+ ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b3_y_type_idx = SAO_NONE;
+ ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[1] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[2] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[3] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_y_offset[4] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b5_y_band_pos = 0;
+
+ ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b3_cb_type_idx = SAO_NONE;
+ ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[1] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[2] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[3] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cb_offset[4] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b5_cb_band_pos = 0;
+
+ ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b3_cr_type_idx = SAO_NONE;
+ ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[1] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[2] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[3] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].u1_cr_offset[4] = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b5_cr_band_pos = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 0;
+
+ num_rdo_cand++;
+
+ /* SAO_note_01: If the CTB lies on a tile or a slice boundary, then
+ the standard mandates that the merge candidates must be set to unavailable.
+ Hence, check for tile boundary condition by reading
+ s_ctb_nbr_avail_flags.u1_left_avail rather than frame position of CTB.
+ A special case: Merge-candidates should be available at dependent-slices boundaries.
+ Search for <SAO_note_01> in workspace to know more */
+
+#if !DISABLE_SAO_WHEN_NOISY
+ if(1)
+#else
+ if(!u1_force_no_offset)
+#endif
+ {
+ /* Merge left cand*/
+ if(ps_ctb_enc_loop_out->s_ctb_nbr_avail_flags.u1_left_avail)
+ {
+ memcpy(
+ &ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand],
+ &ps_sao_ctxt->s_left_ctb_sao,
+ sizeof(sao_enc_t));
+ ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 1;
+ ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 0;
+ num_rdo_cand++;
+ }
+
+ /* Merge top cand*/
+ if(ps_ctb_enc_loop_out->s_ctb_nbr_avail_flags.u1_top_avail)
+ {
+ memcpy(
+ &ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand],
+ (ps_sao_ctxt->ps_top_ctb_sao - ps_sao_ctxt->u4_num_ctbs_horz),
+ sizeof(sao_enc_t));
+ ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_left_flag = 0;
+ ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand].b1_sao_merge_up_flag = 1;
+ num_rdo_cand++;
+ }
+
+ /* Best luma-chroma candidate*/
+ memcpy(
+ &ps_sao_ctxt->as_sao_rd_cand[num_rdo_cand],
+ &s_best_luma_chroma_cand,
+ sizeof(sao_enc_t));
+ num_rdo_cand++;
+ }
+
+ {
+ UWORD32 luma_distortion = 0, chroma_distortion = 0;
+ /* First cand will be best cand after 1st iteration*/
+ curr_buf_idx = 0;
+ best_buf_idx = 1;
+ best_cost = 0xFFFFFFFF;
+ best_cand_idx = 0;
+
+ for(rdo_cand = 0; rdo_cand < num_rdo_cand; rdo_cand++)
+ {
+ s_sao_ctxt.ps_sao = &ps_sao_ctxt->as_sao_rd_cand[rdo_cand];
+
+ distortion = 0;
+
+ /* This memcpy is required because cabac uses parameters from this structure
+ * to evaluate bits and this structure ptr is sent to cabac through
+ * "ihevce_cabac_rdo_encode_sao" function
+ */
+ memcpy(&ps_ctb_enc_loop_out->s_sao, s_sao_ctxt.ps_sao, sizeof(sao_enc_t));
+
+ if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
+ {
+ /* Copy the left pixels to the scratch buffer for evry rdo cand because its
+ overwritten by the sao leaf level function for next ctb*/
+ memcpy(
+ s_sao_ctxt.au1_left_luma_scratch,
+ ps_sao_ctxt->au1_sao_src_left_luma,
+ ps_sao_ctxt->i4_sao_blk_ht);
+
+ /* Copy the top and top left pixels to the scratch buffer for evry rdo cand because its
+ overwritten by the sao leaf level function for next ctb*/
+ memcpy(
+ s_sao_ctxt.au1_top_luma_scratch,
+ ps_sao_ctxt->pu1_curr_sao_src_top_luma - 1,
+ ps_sao_ctxt->i4_sao_blk_wd + 2);
+ s_sao_ctxt.pu1_curr_sao_src_top_luma = s_sao_ctxt.au1_top_luma_scratch + 1;
+
+ pu1_luma_scratch_buf = ps_sao_ctxt->au1_sao_luma_scratch[curr_buf_idx];
+
+ /* Copy the deblocked recon data to scratch buffer to do sao*/
+
+ ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
+ pu1_luma_scratch_buf,
+ i4_luma_scratch_buf_stride,
+ pu1_recon_luma,
+ luma_recon_stride,
+ SCRATCH_BUF_STRIDE,
+ ctb_ht + 1);
+ s_sao_ctxt.pu1_cur_luma_recon_buf = pu1_luma_scratch_buf;
+ s_sao_ctxt.i4_cur_luma_recon_stride = i4_luma_scratch_buf_stride;
+
+ ASSERT(
+ (abs(s_sao_ctxt.ps_sao->u1_y_offset[1]) <= 7) &&
+ (abs(s_sao_ctxt.ps_sao->u1_y_offset[2]) <= 7) &&
+ (abs(s_sao_ctxt.ps_sao->u1_y_offset[3]) <= 7) &&
+ (abs(s_sao_ctxt.ps_sao->u1_y_offset[4]) <= 7));
+ }
+ if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
+ {
+ /* Copy the left pixels to the scratch buffer for evry rdo cand because its
+ overwritten by the sao leaf level function for next ctb*/
+ memcpy(
+ s_sao_ctxt.au1_left_chroma_scratch,
+ ps_sao_ctxt->au1_sao_src_left_chroma,
+ (ps_sao_ctxt->i4_sao_blk_ht >> !u1_is_422) * 2);
+
+ /* Copy the top and top left pixels to the scratch buffer for evry rdo cand because its
+ overwritten by the sao leaf level function for next ctb*/
+ memcpy(
+ s_sao_ctxt.au1_top_chroma_scratch,
+ ps_sao_ctxt->pu1_curr_sao_src_top_chroma - 2,
+ ps_sao_ctxt->i4_sao_blk_wd + 4);
+
+ s_sao_ctxt.pu1_curr_sao_src_top_chroma = s_sao_ctxt.au1_top_chroma_scratch + 2;
+
+ pu1_chroma_scratch_buf = ps_sao_ctxt->au1_sao_chroma_scratch[curr_buf_idx];
+
+ /* Copy the deblocked recon data to scratch buffer to do sao*/
+
+ ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
+ pu1_chroma_scratch_buf,
+ i4_chroma_scratch_buf_stride,
+ pu1_recon_chroma,
+ chroma_recon_stride,
+ SCRATCH_BUF_STRIDE,
+ (ctb_ht >> !u1_is_422) + 1);
+
+ s_sao_ctxt.pu1_cur_chroma_recon_buf = pu1_chroma_scratch_buf;
+ s_sao_ctxt.i4_cur_chroma_recon_stride = i4_chroma_scratch_buf_stride;
+
+ ASSERT(
+ (abs(s_sao_ctxt.ps_sao->u1_cb_offset[1]) <= 7) &&
+ (abs(s_sao_ctxt.ps_sao->u1_cb_offset[2]) <= 7) &&
+ (abs(s_sao_ctxt.ps_sao->u1_cb_offset[3]) <= 7) &&
+ (abs(s_sao_ctxt.ps_sao->u1_cb_offset[4]) <= 7));
+ ASSERT(
+ (abs(s_sao_ctxt.ps_sao->u1_cr_offset[1]) <= 7) &&
+ (abs(s_sao_ctxt.ps_sao->u1_cr_offset[2]) <= 7) &&
+ (abs(s_sao_ctxt.ps_sao->u1_cr_offset[3]) <= 7) &&
+ (abs(s_sao_ctxt.ps_sao->u1_cr_offset[4]) <= 7));
+ }
+
+ ASSERT(
+ (s_sao_ctxt.ps_sao->b5_y_band_pos <= 28) &&
+ (s_sao_ctxt.ps_sao->b5_cb_band_pos <= 28) &&
+ (s_sao_ctxt.ps_sao->b5_cr_band_pos <= 28));
+
+ s_sao_ctxt.i1_slice_sao_luma_flag = s_sao_ctxt.ps_slice_hdr->i1_slice_sao_luma_flag;
+ s_sao_ctxt.i1_slice_sao_chroma_flag = s_sao_ctxt.ps_slice_hdr->i1_slice_sao_chroma_flag;
+
+ ihevce_sao_ctb(&s_sao_ctxt, ps_tile_params);
+
+ if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
+ { // clang-format off
+ luma_distortion =
+ ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_luma,
+ s_sao_ctxt.pu1_cur_luma_recon_buf, luma_src_stride,
+ s_sao_ctxt.i4_cur_luma_recon_stride, ctb_wd,
+ ctb_ht);
+ } // clang-format on
+
+ if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
+ { // clang-format off
+ chroma_distortion =
+ ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_ssd_calculator(pu1_src_chroma,
+ s_sao_ctxt.pu1_cur_chroma_recon_buf,
+ chroma_src_stride,
+ s_sao_ctxt.i4_cur_chroma_recon_stride, ctb_wd,
+ (ctb_ht >> !u1_is_422));
+ } // clang-format on
+
+ /*chroma distortion is added after correction because of lambda difference*/
+ distortion =
+ luma_distortion +
+ (UWORD32)(chroma_distortion * (i8_cl_ssd_lambda_qf / i8_cl_ssd_lambda_chroma_qf));
+
+ ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx = curr_buf_idx;
+ ctb_bits = ihevce_cabac_rdo_encode_sao(
+ ps_sao_ctxt->ps_rdopt_entropy_ctxt, ps_ctb_enc_loop_out);
+
+ /* Calculate the cost as D+(lamda)*R */
+ curr_cost = distortion +
+ COMPUTE_RATE_COST_CLIP30(ctb_bits, i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
+
+ if(curr_cost < best_cost)
+ {
+ best_ctb_sao_bits = ctb_bits;
+ best_cost = curr_cost;
+ best_buf_idx = ps_sao_ctxt->ps_rdopt_entropy_ctxt->i4_curr_buf_idx;
+ best_cand_idx = rdo_cand;
+ curr_buf_idx = !curr_buf_idx;
+ }
+ }
+ /*Adding sao bits to header bits*/
+ *pu4_frame_rdopt_header_bits = best_ctb_sao_bits;
+
+ ihevce_update_best_sao_cabac_state(ps_sao_ctxt->ps_rdopt_entropy_ctxt, best_buf_idx);
+
+ /* store the sao parameters of curr ctb for top merge and left merge*/
+ memcpy(
+ ps_sao_ctxt->ps_top_ctb_sao,
+ &ps_sao_ctxt->as_sao_rd_cand[best_cand_idx],
+ sizeof(sao_enc_t));
+ memcpy(
+ &ps_sao_ctxt->s_left_ctb_sao,
+ &ps_sao_ctxt->as_sao_rd_cand[best_cand_idx],
+ sizeof(sao_enc_t));
+
+ /* Copy the sao parameters of winning candidate into the structure which will be sent to entropy thrd*/
+ memcpy(
+ &ps_ctb_enc_loop_out->s_sao,
+ &ps_sao_ctxt->as_sao_rd_cand[best_cand_idx],
+ sizeof(sao_enc_t));
+
+ if(!ps_sao_ctxt->i4_is_last_ctb_col)
+ {
+ /* Update left luma buffer for next ctb */
+ for(row = 0; row < ps_sao_ctxt->i4_sao_blk_ht; row++)
+ {
+ ps_sao_ctxt->au1_sao_src_left_luma[row] =
+ ps_sao_ctxt->pu1_cur_luma_recon_buf
+ [row * ps_sao_ctxt->i4_cur_luma_recon_stride +
+ (ps_sao_ctxt->i4_sao_blk_wd - 1)];
+ }
+ }
+
+ if(!ps_sao_ctxt->i4_is_last_ctb_col)
+ {
+ /* Update left chroma buffer for next ctb */
+ for(row = 0; row < (ps_sao_ctxt->i4_sao_blk_ht >> 1); row++)
+ {
+ *(UWORD16 *)(ps_sao_ctxt->au1_sao_src_left_chroma + row * 2) =
+ *(UWORD16 *)(ps_sao_ctxt->pu1_cur_chroma_recon_buf +
+ row * ps_sao_ctxt->i4_cur_chroma_recon_stride +
+ (ps_sao_ctxt->i4_sao_blk_wd - 2));
+ }
+ }
+
+ if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_luma_flag)
+ {
+ /* Copy the sao'ed output of the best candidate to the recon buffer*/
+
+ ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
+ ps_sao_ctxt->pu1_cur_luma_recon_buf,
+ ps_sao_ctxt->i4_cur_luma_recon_stride,
+ ps_sao_ctxt->au1_sao_luma_scratch[best_buf_idx],
+ i4_luma_scratch_buf_stride,
+ ctb_wd,
+ ctb_ht);
+ }
+ if(ps_sao_ctxt->ps_slice_hdr->i1_slice_sao_chroma_flag)
+ {
+ /* Copy the sao'ed output of the best candidate to the chroma recon buffer*/
+
+ ps_sao_ctxt->ps_cmn_utils_optimised_function_list->pf_copy_2d(
+ ps_sao_ctxt->pu1_cur_chroma_recon_buf,
+ ps_sao_ctxt->i4_cur_chroma_recon_stride,
+ ps_sao_ctxt->au1_sao_chroma_scratch[best_buf_idx],
+ i4_chroma_scratch_buf_stride,
+ ctb_wd,
+ ctb_ht >> !u1_is_422);
+ }
+ }
+}
diff --git a/encoder/ihevce_sao.h b/encoder/ihevce_sao.h
new file mode 100644
index 0000000..fb3db4c
--- /dev/null
+++ b/encoder/ihevce_sao.h
@@ -0,0 +1,76 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_sao.h
+*
+* \brief
+* This file contains interface defination of sao ctb function
+*
+* \date
+* 30/01/2014
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_SAO_H_
+#define _IHEVCE_SAO_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define BYTES_PER_PIXEL 2
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+void ihevce_sao_ctb(sao_ctxt_t *ps_sao_ctxt, ihevce_tile_params_t *ps_tile_params);
+
+void ihevce_sao_analyse(
+ sao_ctxt_t *ps_sao_ctxt,
+ ctb_enc_loop_out_t *ps_ctb_enc_loop_out,
+ UWORD32 *pu4_frame_rdopt_header_bits,
+ ihevce_tile_params_t *ps_tile_params);
+
+#endif /* _IHEVCE_SAO_H_ */
diff --git a/encoder/ihevce_stasino_helpers.c b/encoder/ihevce_stasino_helpers.c
new file mode 100644
index 0000000..6961a27
--- /dev/null
+++ b/encoder/ihevce_stasino_helpers.c
@@ -0,0 +1,923 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+*******************************************************************************
+* @file
+* ihevce_stasino_helpers.c
+*
+* @brief
+*
+* @author
+* Ittiam
+*
+* @par List of Functions:
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_stasino_helpers.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function calculates the variance of given data set.
+*
+* @par Description:
+* This function is mainly used to find the variance of the block of pixel values.
+* The block can be rectangular also. Single pass variance calculation
+* implementation.
+*
+* @param[in] p_input
+* The input buffer to calculate the variance.
+*
+* @param[out] pi4_mean
+* Pointer ot the mean of the datset
+*
+* @param[out] pi4_variance
+* Pointer tot he variabce of the data set
+*
+* @param[in] u1_is_hbd
+* 1 if the data is in high bit depth
+*
+* @param[in] stride
+* Stride for the input buffer
+*
+* @param[in] block_height
+* height of the pixel block
+*
+* @param[in] block_width
+* width of the pixel block
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+void ihevce_calc_variance(
+ void *pv_input,
+ WORD32 i4_stride,
+ WORD32 *pi4_mean,
+ UWORD32 *pu4_variance,
+ UWORD8 u1_block_height,
+ UWORD8 u1_block_width,
+ UWORD8 u1_is_hbd,
+ UWORD8 u1_disable_normalization)
+{
+ UWORD8 *pui1_buffer; // pointer for 8 bit usecase
+ WORD32 i, j;
+ WORD32 total_elements;
+
+ LWORD64 mean;
+ ULWORD64 variance;
+ ULWORD64 sum;
+ ULWORD64 sq_sum;
+
+ /* intialisation */
+ total_elements = u1_block_height * u1_block_width;
+ mean = 0;
+ variance = 0;
+ sum = 0;
+ sq_sum = 0;
+
+ /* handle the case of 8/10 bit depth separately */
+ if(!u1_is_hbd)
+ {
+ pui1_buffer = (UWORD8 *)pv_input;
+
+ /* loop over all the values in the block */
+ for(i = 0; i < u1_block_height; i++)
+ {
+ /* loop over a row in the block */
+ for(j = 0; j < u1_block_width; j++)
+ {
+ sum += pui1_buffer[i * i4_stride + j];
+ sq_sum += (pui1_buffer[i * i4_stride + j] * pui1_buffer[i * i4_stride + j]);
+ }
+ }
+
+ if(!u1_disable_normalization)
+ {
+ mean = sum / total_elements;
+ variance =
+ ((total_elements * sq_sum) - (sum * sum)) / (total_elements * (total_elements));
+ }
+ else
+ {
+ mean = sum;
+ variance = ((total_elements * sq_sum) - (sum * sum));
+ }
+ }
+
+ /* copy back the values to the output variables */
+ *pi4_mean = mean;
+ *pu4_variance = variance;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function calcluates the variance of given data set which is WORD16
+*
+* @par Description:
+* This function is mainly used to find the variance of the block of pixel values.
+* Single pass variance calculation implementation.
+*
+* @param[in] pv_input
+* The input buffer to calculate the variance.
+*
+*
+* @param[in] stride
+* Stride for the input buffer
+*
+* @param[out] pi4_mean
+* Pointer ot the mean of the datset
+*
+* @param[out] pi4_variance
+* Pointer tot he variabce of the data set
+*
+* @param[in] block_height
+* height of the pixel block
+*
+* @param[in] block_width
+* width of the pixel block
+*
+*
+* @remarks
+* None
+*
+*******************************************************************************/
+void ihevce_calc_variance_signed(
+ WORD16 *pv_input,
+ WORD32 i4_stride,
+ WORD32 *pi4_mean,
+ UWORD32 *pu4_variance,
+ UWORD8 u1_block_height,
+ UWORD8 u1_block_width)
+{
+ WORD16 *pi2_buffer; // poinbter for 10 bit use case
+
+ WORD32 i, j;
+ WORD32 total_elements;
+
+ LWORD64 mean;
+ LWORD64 variance;
+ LWORD64 sum;
+ LWORD64 sq_sum;
+
+ /* intialisation */
+ total_elements = u1_block_height * u1_block_width;
+ mean = 0;
+ variance = 0;
+ sum = 0;
+ sq_sum = 0;
+
+ pi2_buffer = pv_input;
+
+ for(i = 0; i < u1_block_height; i++)
+ {
+ for(j = 0; j < u1_block_width; j++)
+ {
+ sum += pi2_buffer[i * i4_stride + j];
+ sq_sum += (pi2_buffer[i * i4_stride + j] * pi2_buffer[i * i4_stride + j]);
+ }
+ }
+
+ mean = sum; /// total_elements;
+ variance = ((total_elements * sq_sum) - (sum * sum)); // / (total_elements * (total_elements) )
+
+ /* copy back the values to the output variables */
+ *pi4_mean = mean;
+ *pu4_variance = variance;
+}
+
+/**
+*******************************************************************************
+*
+* @brief
+* This function calculates the variance of a chrominance plane for 420SP data
+*
+* @par Description:
+* This function is mainly used to find the variance of the block of pixel values.
+* The block can be rectangular also. Single pass variance calculation
+* implementation.
+*
+* @param[in] p_input
+* The input buffer to calculate the variance.
+*
+* @param[in] stride
+* Stride for the input buffer
+*
+* @param[out] pi4_mean
+* Pointer ot the mean of the datset
+*
+* @param[out] pi4_variance
+* Pointer tot he variabce of the data set
+*
+* @param[in] block_height
+* height of the pixel block
+*
+* @param[in] block_width
+* width of the pixel block
+*
+* @param[in] u1_is_hbd
+* 1 if the data is in high bit depth
+*
+* @param[in] e_chroma_plane
+* is U or V
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+void ihevce_calc_chroma_variance(
+ void *pv_input,
+ WORD32 i4_stride,
+ WORD32 *pi4_mean,
+ UWORD32 *pu4_variance,
+ UWORD8 u1_block_height,
+ UWORD8 u1_block_width,
+ UWORD8 u1_is_hbd,
+ CHROMA_PLANE_ID_T e_chroma_plane)
+{
+ UWORD8 *pui1_buffer; // pointer for 8 bit usecase
+ WORD32 i, j;
+ WORD32 total_elements;
+
+ LWORD64 mean;
+ ULWORD64 variance;
+ LWORD64 sum;
+ LWORD64 sq_sum;
+
+ /* intialisation */
+ total_elements = u1_block_height * u1_block_width;
+ mean = 0;
+ variance = 0;
+ sum = 0;
+ sq_sum = 0;
+
+ /* handle the case of 8/10 bit depth separately */
+ if(!u1_is_hbd)
+ {
+ pui1_buffer = (UWORD8 *)pv_input;
+
+ pui1_buffer += e_chroma_plane;
+
+ /* loop over all the values in the block */
+ for(i = 0; i < u1_block_height; i++)
+ {
+ /* loop over a row in the block */
+ for(j = 0; j < u1_block_width; j++)
+ {
+ sum += pui1_buffer[i * i4_stride + j * 2];
+ sq_sum += (pui1_buffer[i * i4_stride + j * 2] * pui1_buffer[i * i4_stride + j * 2]);
+ }
+ }
+
+ mean = sum / total_elements;
+ variance = ((total_elements * sq_sum) - (sum * sum)) / (total_elements * (total_elements));
+ }
+
+ /* copy back the values to the output variables */
+ *pi4_mean = mean;
+ *pu4_variance = variance;
+}
+
+LWORD64 ihevce_inject_stim_into_distortion(
+ void *pv_src,
+ WORD32 i4_src_stride,
+ void *pv_pred,
+ WORD32 i4_pred_stride,
+ LWORD64 i8_distortion,
+ WORD32 i4_alpha_stim_multiplier,
+ UWORD8 u1_blk_size,
+ UWORD8 u1_is_hbd,
+ UWORD8 u1_enable_psyRDOPT,
+ CHROMA_PLANE_ID_T e_chroma_plane)
+{
+ if(!u1_enable_psyRDOPT)
+ {
+ UWORD32 u4_src_variance;
+ UWORD32 u4_pred_variance;
+ WORD32 i4_mean;
+ WORD32 i4_noise_term;
+
+ if(NULL_PLANE == e_chroma_plane)
+ {
+ ihevce_calc_variance(
+ pv_src,
+ i4_src_stride,
+ &i4_mean,
+ &u4_src_variance,
+ u1_blk_size,
+ u1_blk_size,
+ u1_is_hbd,
+ 0);
+
+ ihevce_calc_variance(
+ pv_pred,
+ i4_pred_stride,
+ &i4_mean,
+ &u4_pred_variance,
+ u1_blk_size,
+ u1_blk_size,
+ u1_is_hbd,
+ 0);
+ }
+ else
+ {
+ ihevce_calc_chroma_variance(
+ pv_src,
+ i4_src_stride,
+ &i4_mean,
+ &u4_src_variance,
+ u1_blk_size,
+ u1_blk_size,
+ u1_is_hbd,
+ e_chroma_plane);
+
+ ihevce_calc_chroma_variance(
+ pv_pred,
+ i4_pred_stride,
+ &i4_mean,
+ &u4_pred_variance,
+ u1_blk_size,
+ u1_blk_size,
+ u1_is_hbd,
+ e_chroma_plane);
+ }
+
+ i4_noise_term =
+ ihevce_compute_noise_term(i4_alpha_stim_multiplier, u4_src_variance, u4_pred_variance);
+
+ MULTIPLY_STIM_WITH_DISTORTION(i8_distortion, i4_noise_term, STIM_Q_FORMAT, ALPHA_Q_FORMAT);
+
+ return i8_distortion;
+ }
+ else
+ {
+ return i8_distortion;
+ }
+}
+
+UWORD8 ihevce_determine_cu_noise_based_on_8x8Blk_data(
+ UWORD8 *pu1_is_8x8Blk_noisy, UWORD8 u1_cu_x_pos, UWORD8 u1_cu_y_pos, UWORD8 u1_cu_size)
+{
+ UWORD8 u1_num_noisy_children = 0;
+ UWORD8 u1_start_index = (u1_cu_x_pos / 8) + u1_cu_y_pos;
+
+ if(8 == u1_cu_size)
+ {
+ return pu1_is_8x8Blk_noisy[u1_start_index];
+ }
+
+ u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data(
+ pu1_is_8x8Blk_noisy, u1_cu_x_pos, u1_cu_y_pos, u1_cu_size / 2);
+
+ u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data(
+ pu1_is_8x8Blk_noisy, u1_cu_x_pos + (u1_cu_size / 2), u1_cu_y_pos, u1_cu_size / 2);
+
+ u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data(
+ pu1_is_8x8Blk_noisy, u1_cu_x_pos, u1_cu_y_pos + (u1_cu_size / 2), u1_cu_size / 2);
+
+ u1_num_noisy_children += ihevce_determine_cu_noise_based_on_8x8Blk_data(
+ pu1_is_8x8Blk_noisy,
+ u1_cu_x_pos + (u1_cu_size / 2),
+ u1_cu_y_pos + (u1_cu_size / 2),
+ u1_cu_size / 2);
+
+ return (u1_num_noisy_children >= 2);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_psy_rd_cost_croma \endif
+*
+* \brief
+* Calculates the psyco visual cost for RD opt. This is
+*
+* \param[in] pui4_source_satd
+* This is the pointer to the array of 8x8 satd of the corresponding source CTB. This is pre calculated.
+* \param[in] *pui1_recon
+* This si the pointer to the pred data.
+* \param[in] recon_stride
+* This si the pred stride
+* \param[in] pic_type
+* Picture type.
+* \param[in] layer_id
+* Indicates the temporal layer.
+* \param[in] lambda
+* This is the weighting factor for the cost.
+* \param[in] is_hbd
+* This is the high bit depth flag which indicates if the bit depth of the pixels is 10 bit or 8 bit.
+* \param[in] sub_sampling_type
+* This is the chroma subsampling type. 11 - for 420 and 13 for 422
+* \return
+* the cost for the psyRDopt
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+LWORD64 ihevce_psy_rd_cost_croma(
+ LWORD64 *pui4_source_satd,
+ void *p_recon,
+ WORD32 recon_stride_vert,
+ WORD32 recond_stride_horz,
+ WORD32 cu_size_luma,
+ WORD32 pic_type,
+ WORD32 layer_id,
+ WORD32 lambda,
+ WORD32 start_index,
+ WORD32 is_hbd,
+ WORD32 sub_sampling_type,
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list)
+{
+ /* declare local variables to store the SATD values for the pred for the current block. */
+ LWORD64 psy_rd_cost;
+ UWORD32 lambda_mod;
+ WORD32 psy_factor;
+
+ /* declare local variables */
+ WORD32 i;
+ WORD32 cu_total_size;
+ WORD32 num_comp_had_blocks;
+
+ UWORD8 *pu1_l0_block;
+ UWORD8 *pu1_l0_block_prev;
+ UWORD8 *pu1_recon;
+ WORD32 ht_offset;
+ WORD32 wd_offset;
+ WORD32 cu_ht;
+ WORD32 cu_wd;
+
+ WORD32 num_horz_blocks;
+
+ WORD16 pi2_residue_had[64];
+ /* this is used as a buffer with all values equal to 0. This is emulate the case with
+ pred being zero in HAD fucntion */
+ UWORD8 ai1_zeros_buffer[64];
+
+ WORD32 had_block_size;
+ LWORD64 source_satd; // to hold source for current 8x8 block
+ LWORD64 recon_satd; // holds the current recon 8x8 satd
+
+ WORD32 index_for_src_satd;
+
+ (void)recond_stride_horz;
+ (void)pic_type;
+ (void)layer_id;
+ if(!is_hbd)
+ {
+ pu1_recon = (UWORD8 *)p_recon;
+ }
+
+ /**** initialize the variables ****/
+ had_block_size = 4;
+
+ if(sub_sampling_type == 1) // 420
+ {
+ cu_ht = cu_size_luma / 2;
+ cu_wd = cu_size_luma / 2;
+ }
+ else
+ {
+ cu_ht = cu_size_luma;
+ cu_wd = cu_size_luma / 2;
+ }
+
+ num_horz_blocks = 2 * cu_wd / had_block_size; //ctb_width / had_block_size;
+ ht_offset = -had_block_size;
+ wd_offset = 0; //-had_block_size;
+
+ cu_total_size = cu_ht * cu_wd;
+ num_comp_had_blocks = 2 * cu_total_size / (had_block_size * had_block_size);
+
+ index_for_src_satd = start_index;
+
+ for(i = 0; i < 64; i++)
+ {
+ ai1_zeros_buffer[i] = 0;
+ }
+
+ psy_factor = PSY_STRENGTH_CHROMA;
+ psy_rd_cost = 0;
+ lambda_mod = lambda * psy_factor;
+
+ /************************************************************/
+ /* loop over for every 4x4 blocks in the CU for Cb */
+ for(i = 0; i < num_comp_had_blocks; i++)
+ {
+ if(i % num_horz_blocks == 0)
+ {
+ wd_offset = -had_block_size;
+ ht_offset += had_block_size;
+ }
+ wd_offset += had_block_size;
+
+ /* source satd for the current 8x8 block */
+ source_satd = pui4_source_satd[index_for_src_satd];
+
+ if(i % 2 != 0)
+ {
+ if(!is_hbd)
+ {
+ pu1_l0_block = pu1_l0_block_prev + 1;
+ }
+ }
+ else
+ {
+ if(!is_hbd)
+ {
+ /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
+ pu1_l0_block = pu1_recon + recon_stride_vert * ht_offset + wd_offset;
+ pu1_l0_block_prev = pu1_l0_block;
+ }
+ }
+
+ if(had_block_size == 4)
+ {
+ if(!is_hbd)
+ {
+ recon_satd = ps_cmn_utils_optimised_function_list->pf_chroma_AC_HAD_4x4_8bit(
+ pu1_l0_block,
+ recon_stride_vert,
+ ai1_zeros_buffer,
+ had_block_size,
+ pi2_residue_had,
+ had_block_size);
+ }
+
+ /* get the additional cost function based on the absolute SATD diff of source and recon. */
+ psy_rd_cost += (lambda_mod * llabs(source_satd - recon_satd));
+
+ index_for_src_satd++;
+
+ if((i % num_horz_blocks) == (num_horz_blocks - 1))
+ {
+ index_for_src_satd -= num_horz_blocks;
+ index_for_src_satd +=
+ (MAX_CU_SIZE / 8); /* Assuming CTB size = 64 and blocksize = 8 */
+ }
+
+ } // if had block size ==4
+ } // for loop for all 4x4 block in the cu
+
+ psy_rd_cost = psy_rd_cost >> (Q_PSY_STRENGTH_CHROMA + LAMBDA_Q_SHIFT);
+ /* reutrn the additional cost for the psy RD opt */
+ return (psy_rd_cost);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_psy_rd_cost \endif
+*
+* \brief
+* Calculates the psyco visual cost for RD opt. This is
+*
+* \param[in] pui4_source_satd
+* This is the pointer to the array of 8x8 satd of the corresponding source CTB. This is pre calculated.
+* \param[in] *pui1_recon
+* This si the pointer to the pred data.
+* \param[in] recon_stride
+* This si the pred stride
+* \param[in] pic_type
+* Picture type.
+* \param[in] layer_id
+* Indicates the temporal layer.
+* \param[in] lambda
+* This is the weighting factor for the cost.
+*
+* \return
+* the cost for the psyRDopt
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+LWORD64 ihevce_psy_rd_cost(
+ LWORD64 *pui4_source_satd,
+ void *pv_recon,
+ WORD32 recon_stride_vert,
+ WORD32 recond_stride_horz,
+ WORD32 cu_size,
+ WORD32 pic_type,
+ WORD32 layer_id,
+ WORD32 lambda,
+ WORD32 start_index,
+ WORD32 is_hbd,
+ UWORD32 u4_psy_strength,
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list)
+{
+ /* declare local variables to store the SATD values for the pred for the current block. */
+ LWORD64 psy_rd_cost; // TODO : check if overflow is there.
+ UWORD32 lambda_mod;
+ WORD32 psy_factor;
+
+ /* declare local variables */
+ WORD32 i;
+ WORD32 cu_total_size;
+ WORD32 num_comp_had_blocks;
+
+ UWORD8 *pu1_l0_block;
+ UWORD8 *pu1_recon;
+
+ WORD32 ht_offset;
+ WORD32 wd_offset;
+ WORD32 cu_ht;
+ WORD32 cu_wd;
+
+ WORD32 num_horz_blocks;
+
+ //WORD16 pi2_residue_had[64];
+ WORD16 pi2_residue_had_zscan[64];
+ //WORD16 pi2_residue[64];
+ /* this is used as a buffer with all values equal to 0. This is emulate the case with
+ pred being zero in HAD fucntion */
+ UWORD8 ai1_zeros_buffer[64];
+
+ WORD32 had_block_size;
+ LWORD64 source_satd; // to hold source for current 8x8 block
+ LWORD64 recon_satd; // holds the current recon 8x8 satd
+
+ WORD32 index_for_src_satd;
+
+ (void)recond_stride_horz;
+ (void)pic_type;
+ (void)layer_id;
+ /***** initialize the variables ****/
+ had_block_size = 8;
+ cu_ht = cu_size;
+ cu_wd = cu_size;
+
+ num_horz_blocks = cu_wd / had_block_size; //ctb_width / had_block_size;
+
+ ht_offset = -had_block_size;
+ wd_offset = 0 - had_block_size;
+
+ cu_total_size = cu_ht * cu_wd;
+ num_comp_had_blocks = cu_total_size / (had_block_size * had_block_size);
+
+ index_for_src_satd = start_index;
+
+ for(i = 0; i < 64; i++)
+ {
+ ai1_zeros_buffer[i] = 0;
+ }
+ psy_factor = u4_psy_strength; //PSY_STRENGTH;
+ psy_rd_cost = 0;
+ lambda_mod = lambda * psy_factor;
+
+ if(!is_hbd)
+ {
+ pu1_recon = (UWORD8 *)pv_recon;
+ }
+
+ /**************************************************************/
+ /* loop over for every 8x8 blocks in the CU */
+ for(i = 0; i < num_comp_had_blocks; i++)
+ {
+ if(i % num_horz_blocks == 0)
+ {
+ wd_offset = -had_block_size;
+ ht_offset += had_block_size;
+ }
+ wd_offset += had_block_size;
+
+ /* source satd for the current 8x8 block */
+ source_satd = pui4_source_satd[index_for_src_satd];
+
+ if(had_block_size == 8)
+ {
+ //WORD32 index;
+ //WORD32 u4_satd;
+ //WORD32 dst_strd = 8;
+ //WORD32 i4_frm_qstep = 0;
+ //WORD32 early_cbf;
+ if(!is_hbd)
+ {
+ /* get memory pointers for each of L0 and L1 blocks whose hadamard has to be computed */
+ pu1_l0_block = pu1_recon + recon_stride_vert * ht_offset + wd_offset;
+
+ recon_satd = ps_cmn_utils_optimised_function_list->pf_AC_HAD_8x8_8bit(
+ pu1_l0_block,
+ recon_stride_vert,
+ ai1_zeros_buffer,
+ had_block_size,
+ pi2_residue_had_zscan,
+ had_block_size);
+ }
+
+ /* get the additional cost function based on the absolute SATD diff of source and recon. */
+ psy_rd_cost += (lambda_mod * llabs(source_satd - recon_satd));
+
+ index_for_src_satd++;
+ if((i % num_horz_blocks) == (num_horz_blocks - 1))
+ {
+ index_for_src_satd -= num_horz_blocks;
+ index_for_src_satd +=
+ (MAX_CU_SIZE / 8); /* Assuming CTB size = 64 and blocksize = 8 */
+ }
+ } // if
+ } // for loop
+ psy_rd_cost = psy_rd_cost >> (Q_PSY_STRENGTH + LAMBDA_Q_SHIFT);
+
+ /* reutrn the additional cost for the psy RD opt */
+ return (psy_rd_cost);
+}
+
+unsigned long ihevce_calc_stim_injected_variance(
+ ULWORD64 *pu8_sigmaX,
+ ULWORD64 *pu8_sigmaXSquared,
+ ULWORD64 *u8_var,
+ WORD32 i4_inv_wpred_wt,
+ WORD32 i4_inv_wt_shift_val,
+ WORD32 i4_wpred_log_wdc,
+ WORD32 i4_part_id)
+{
+ ULWORD64 u8_X_Square, u8_temp_var;
+ WORD32 i4_bits_req;
+
+ const WORD32 i4_default_src_wt = ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
+
+ u8_X_Square = (pu8_sigmaX[i4_part_id] * pu8_sigmaX[i4_part_id]);
+ u8_temp_var = pu8_sigmaXSquared[i4_part_id] - u8_X_Square;
+
+ if(i4_inv_wpred_wt != i4_default_src_wt)
+ {
+ i4_inv_wpred_wt = i4_inv_wpred_wt >> i4_inv_wt_shift_val;
+
+ u8_temp_var = SHR_NEG(
+ (u8_temp_var * i4_inv_wpred_wt * i4_inv_wpred_wt),
+ (30 - (2 * i4_inv_wt_shift_val) - i4_wpred_log_wdc * 2));
+ }
+
+ GETRANGE64(i4_bits_req, u8_temp_var);
+
+ if(i4_bits_req > 27)
+ {
+ *u8_var = u8_temp_var >> (i4_bits_req - 27);
+ return (i4_bits_req - 27);
+ }
+ else
+ {
+ *u8_var = u8_temp_var;
+ return 0;
+ }
+}
+
+unsigned long ihevce_calc_variance_for_diff_weights(
+ ULWORD64 *pu8_sigmaX,
+ ULWORD64 *pu8_sigmaXSquared,
+ ULWORD64 *u8_var,
+ WORD32 *pi4_inv_wt,
+ WORD32 *pi4_inv_wt_shift_val,
+ pu_result_t *ps_result,
+ WORD32 i4_wpred_log_wdc,
+ PART_ID_T *pe_part_id,
+ UWORD8 u1_cu_size,
+ UWORD8 u1_num_parts,
+ UWORD8 u1_is_for_src)
+{
+ WORD32 i4_k;
+ UWORD32 u4_wd, u4_ht;
+ UWORD8 u1_num_base_blks;
+ UWORD32 u4_num_pixels_in_part;
+ UWORD8 u1_index;
+ WORD32 i4_bits_req;
+
+ UWORD8 u1_base_blk_size = 4;
+ UWORD32 u4_tot_num_pixels = u1_cu_size * u1_cu_size;
+ ULWORD64 u8_temp_sigmaX[MAX_NUM_INTER_PARTS] = { 0, 0 };
+ ULWORD64 u8_temp_sigmaXsquared[MAX_NUM_INTER_PARTS] = { 0, 0 };
+ ULWORD64 u8_z;
+
+ const WORD32 i4_default_src_wt = ((1 << 15) + (WGHT_DEFAULT >> 1)) / WGHT_DEFAULT;
+
+ for(i4_k = 0; i4_k < u1_num_parts; i4_k++)
+ {
+ u4_wd = ps_result[i4_k].pu.b4_wd + 1;
+ u4_ht = ps_result[i4_k].pu.b4_ht + 1;
+ u1_num_base_blks = u4_wd * u4_ht;
+ u4_num_pixels_in_part = u1_num_base_blks * u1_base_blk_size * u1_base_blk_size;
+
+ if(u1_is_for_src)
+ {
+ u1_index = pe_part_id[i4_k];
+ }
+ else
+ {
+ u1_index = i4_k;
+ }
+
+ u8_temp_sigmaXsquared[i4_k] = pu8_sigmaXSquared[u1_index] / u4_num_pixels_in_part;
+ u8_temp_sigmaX[i4_k] = pu8_sigmaX[u1_index];
+
+ if(u1_is_for_src)
+ {
+ if(pi4_inv_wt[i4_k] != i4_default_src_wt)
+ {
+ pi4_inv_wt[i4_k] = pi4_inv_wt[i4_k] >> pi4_inv_wt_shift_val[i4_k];
+ u8_temp_sigmaX[i4_k] = SHR_NEG(
+ (u8_temp_sigmaX[i4_k] * pi4_inv_wt[i4_k]),
+ (15 - pi4_inv_wt_shift_val[i4_k] - i4_wpred_log_wdc));
+ u8_temp_sigmaXsquared[i4_k] = SHR_NEG(
+ (u8_temp_sigmaXsquared[i4_k] * pi4_inv_wt[i4_k] * pi4_inv_wt[i4_k]),
+ (30 - (2 * pi4_inv_wt_shift_val[i4_k]) - i4_wpred_log_wdc * 2));
+ }
+ }
+ }
+
+ u8_z = (u4_tot_num_pixels * (u8_temp_sigmaXsquared[0] + u8_temp_sigmaXsquared[1])) -
+ ((u8_temp_sigmaX[0] + u8_temp_sigmaX[1]) * (u8_temp_sigmaX[0] + u8_temp_sigmaX[1]));
+
+ GETRANGE64(i4_bits_req, u8_z);
+
+ if(i4_bits_req > 27)
+ {
+ *u8_var = u8_z >> (i4_bits_req - 27);
+ return (i4_bits_req - 27);
+ }
+ else
+ {
+ *u8_var = u8_z;
+ return 0;
+ }
+}
diff --git a/encoder/ihevce_stasino_helpers.h b/encoder/ihevce_stasino_helpers.h
new file mode 100644
index 0000000..8c79ed5
--- /dev/null
+++ b/encoder/ihevce_stasino_helpers.h
@@ -0,0 +1,262 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* ihevce_stasino_helpers.h
+*
+* @brief
+*
+*
+* @author
+* Ittiam
+*
+* @remarks
+* None
+*
+*******************************************************************************
+*/
+
+#ifndef _IHEVCE_STASINO_HELPERS_H_
+#define _IHEVCE_STASINO_HELPERS_H_
+
+#include <math.h>
+/****************************************************************************/
+/* Constant Macros */
+/****************************************************************************/
+
+/****************************************************************************/
+/* Function Macros */
+/****************************************************************************/
+#define MULTIPLY_STIM_WITH_DISTORTION(dist, stimXalpha, stim_q_level, alpha_q_level) \
+ { \
+ ULWORD64 u8_pure_dist = (dist); \
+ WORD32 i4_q_level = stim_q_level + alpha_q_level; \
+ \
+ u8_pure_dist *= ((1 << (i4_q_level)) - (stimXalpha)); \
+ u8_pure_dist += (1 << ((i4_q_level)-1)); \
+ (dist) = u8_pure_dist >> (i4_q_level); \
+ }
+
+/****************************************************************************/
+/* Typedefs */
+/****************************************************************************/
+
+/****************************************************************************/
+/* Enums */
+/****************************************************************************/
+
+/****************************************************************************/
+/* Structure */
+/****************************************************************************/
+
+/****************************************************************************/
+/* Function Prototypes */
+/****************************************************************************/
+
+void ihevce_calc_variance(
+ void *pv_input,
+ WORD32 i4_stride,
+ WORD32 *pi4_mean,
+ UWORD32 *pu4_variance,
+ UWORD8 u1_block_height,
+ UWORD8 u1_block_width,
+ UWORD8 u1_is_hbd,
+ UWORD8 u1_disable_normalization);
+
+void ihevce_calc_variance_signed(
+ WORD16 *pv_input,
+ WORD32 i4_stride,
+ WORD32 *pi4_mean,
+ UWORD32 *pu4_variance,
+ UWORD8 u1_block_height,
+ UWORD8 u1_block_width);
+
+void ihevce_calc_chroma_variance(
+ void *pv_input,
+ WORD32 i4_stride,
+ WORD32 *pi4_mean,
+ UWORD32 *pu4_variance,
+ UWORD8 u1_block_height,
+ UWORD8 u1_block_width,
+ UWORD8 u1_is_hbd,
+ CHROMA_PLANE_ID_T e_chroma_plane);
+
+static INLINE UWORD32 ihevce_compute_stim(UWORD32 u4_variance1, UWORD32 u4_variance2)
+{
+ return (u4_variance1 == u4_variance2)
+ ? (1 << STIM_Q_FORMAT)
+ : ((UWORD32)(
+ ((2 * (double)u4_variance1 * (double)u4_variance2) /
+ (pow((double)u4_variance1, 2) + pow((double)u4_variance2, 2))) *
+ pow((double)2, STIM_Q_FORMAT)));
+}
+
+LWORD64 ihevce_inject_stim_into_distortion(
+ void *pv_src,
+ WORD32 i4_src_stride,
+ void *pv_pred,
+ WORD32 i4_pred_stride,
+ LWORD64 i8_distortion,
+ WORD32 i4_alpha_stim_multiplier,
+ UWORD8 u1_blk_size,
+ UWORD8 u1_is_hbd,
+ UWORD8 u1_enable_psyRDOPT,
+ CHROMA_PLANE_ID_T e_chroma_plane);
+
+static INLINE WORD32 ihevce_derive_noise_weighted_alpha_stim_multiplier(
+ WORD32 i4_alpha, UWORD32 u4SrcVar, UWORD32 u4PredVar, WORD32 i4_stim)
+{
+ (void)u4SrcVar;
+ (void)u4PredVar;
+ (void)i4_stim;
+ return i4_alpha;
+}
+
+static INLINE WORD32 ihevce_compute_noise_term(WORD32 i4_alpha, UWORD32 u4SrcVar, UWORD32 u4PredVar)
+{
+ if(i4_alpha)
+ {
+ WORD32 i4_stim = ihevce_compute_stim(u4SrcVar, u4PredVar);
+
+ ASSERT(i4_stim >= 0);
+
+ i4_alpha = ihevce_derive_noise_weighted_alpha_stim_multiplier(
+ i4_alpha, u4SrcVar, u4PredVar, i4_stim);
+
+ return i4_stim * i4_alpha;
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+UWORD8 ihevce_determine_cu_noise_based_on_8x8Blk_data(
+ UWORD8 *pu1_is_8x8Blk_noisy, UWORD8 u1_cu_x_pos, UWORD8 u1_cu_y_pos, UWORD8 u1_cu_size);
+
+LWORD64 ihevce_psy_rd_cost_croma(
+ LWORD64 *pui4_source_satd,
+ void *p_recon,
+ WORD32 recon_stride_vert,
+ WORD32 recond_stride_horz,
+ WORD32 cu_size_luma,
+ WORD32 pic_type,
+ WORD32 layer_id,
+ WORD32 lambda,
+ WORD32 start_index,
+ WORD32 is_hbd,
+ WORD32 sub_sampling_type,
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list
+
+);
+
+LWORD64 ihevce_psy_rd_cost(
+ LWORD64 *pui4_source_satd,
+ void *pv_recon,
+ WORD32 recon_stride_vert,
+ WORD32 recond_stride_horz,
+ WORD32 cu_size,
+ WORD32 pic_type,
+ WORD32 layer_id,
+ WORD32 lambda,
+ WORD32 start_index,
+ WORD32 is_hbd,
+ UWORD32 u4_psy_strength,
+ ihevce_cmn_opt_func_t *ps_cmn_utils_optimised_function_list);
+
+WORD32 ihevce_ctb_noise_detect(
+ UWORD8 *pu1_l0_ctb,
+ WORD32 l0_stride,
+ UWORD8 *pu1_l1_ctb,
+ WORD32 l1_stride,
+ WORD32 had_block_size,
+ WORD32 ctb_width,
+ WORD32 ctb_height,
+ ihevce_ctb_noise_params *ps_ctb_noise_params,
+ WORD32 ctb_height_offset,
+ WORD32 ctb_width_offset,
+ WORD32 frame_height,
+ WORD32 frame_width);
+
+void ihevce_had4_4x4_noise_detect(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst4x4,
+ WORD16 *pi2_residue,
+ WORD32 dst_strd,
+ WORD32 scaling_for_pred);
+
+WORD32 ihevce_had_16x16_r_noise_detect(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 pos_x_y_4x4,
+ WORD32 num_4x4_in_row,
+ WORD32 scaling_for_pred);
+
+UWORD32 ihevce_compute_8x8HAD_using_4x4_noise_detect(
+ WORD16 *pi2_4x4_had,
+ WORD32 had4_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 i4_frm_qstep,
+ WORD32 *pi4_cbf);
+
+void ihevce_had_8x8_using_4_4x4_noise_detect(
+ UWORD8 *pu1_src,
+ WORD32 src_strd,
+ UWORD8 *pu1_pred,
+ WORD32 pred_strd,
+ WORD16 *pi2_dst,
+ WORD32 dst_strd,
+ WORD32 pos_x_y_4x4,
+ WORD32 num_4x4_in_row,
+ WORD32 scaling_for_pred);
+
+unsigned long ihevce_calc_stim_injected_variance(
+ ULWORD64 *pu8_sigmaX,
+ ULWORD64 *pu8_sigmaXSquared,
+ ULWORD64 *u8_var,
+ WORD32 i4_inv_wpred_wt,
+ WORD32 i4_inv_wt_shift_val,
+ WORD32 i4_wpred_log_wdc,
+ WORD32 i4_part_id);
+
+unsigned long ihevce_calc_variance_for_diff_weights(
+ ULWORD64 *pu8_sigmaX,
+ ULWORD64 *pu8_sigmaXSquared,
+ ULWORD64 *u8_var,
+ WORD32 *pi4_inv_wt,
+ WORD32 *pi4_inv_wt_shift_val,
+ pu_result_t *ps_result,
+ WORD32 i4_wpred_log_wdc,
+ PART_ID_T *pe_part_id,
+ UWORD8 u1_cu_size,
+ UWORD8 u1_num_parts,
+ UWORD8 u1_is_for_src);
+
+#endif /* _IHEVCE_STASINO_HELPERS_H_ */
diff --git a/encoder/ihevce_sub_pic_rc.c b/encoder/ihevce_sub_pic_rc.c
new file mode 100644
index 0000000..0760c77
--- /dev/null
+++ b/encoder/ihevce_sub_pic_rc.c
@@ -0,0 +1,530 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+******************************************************************************
+* \file ihevce_sub_pic_rc.c
+*
+* \brief
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+* List of Functions
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_debug.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_buffer_que_interface.h"
+#include "ihevce_hle_interface.h"
+#include "ihevce_hle_q_func.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_checks.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_trace.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_global_tables.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_entropy_interface.h"
+#include "ihevce_enc_loop_structs.h"
+#include "hme_datatype.h"
+#include "hme_interface.h"
+#include "hme_common_defs.h"
+#include "hme_defs.h"
+#include "ihevce_rc_enc_structs.h"
+#include "ihevce_rc_interface.h"
+#include "ihevce_sub_pic_rc.h"
+
+#include "cast_types.h"
+#include "osal.h"
+#include "osal_defaults.h"
+
+/*****************************************************************************/
+/* Globals */
+/*****************************************************************************/
+/* @ brief : Qp deviation of -6 to 6 is mapped */
+float qp_scale_dev[13] = { 0.5, 0.56, 0.630, 0.707, 0.794, 0.891, 1.00,
+ 1.122, 1.259, 1.414, 1.587, 1.782, 2.00 };
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define IN_FRAME_RC_PRINT 0
+#define IN_FRAME_RC_FRAME_NUM 4
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_sub_pic_rc_bits_fill \endif
+*
+* \brief
+* Sub-pic RC thread interface function
+*
+* \param[in] Frame process pointer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_sub_pic_rc_in_data(
+ void *pv_multi_thrd_ctxt, void *pv_ctxt, void *pv_ctb_ipe_analyse, void *pv_frm_ctb_prms)
+{
+ multi_thrd_ctxt_t *ps_multi_thrd_ctxt = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
+ ihevce_enc_loop_ctxt_t *ps_ctxt = (ihevce_enc_loop_ctxt_t *)pv_ctxt;
+ ipe_l0_ctb_analyse_for_me_t *ps_ctb_ipe_analyse =
+ (ipe_l0_ctb_analyse_for_me_t *)pv_ctb_ipe_analyse;
+ frm_ctb_ctxt_t *ps_frm_ctb_prms = (frm_ctb_ctxt_t *)pv_frm_ctb_prms;
+
+ WORD32 j = 0;
+ WORD32 i4_frm_id = ps_ctxt->i4_enc_frm_id;
+ WORD32 i4_br_id = ps_ctxt->i4_bitrate_instance_num;
+ WORD32 i4_thrd_id = ps_ctxt->thrd_id;
+ WORD32 i4_ctb_count_flag = 0;
+ WORD32 i4_is_intra_pic = (ISLICE == ps_ctxt->i1_slice_type);
+
+ /*Accumalate all the variables in shared memory */
+ for(j = 0; j < (MAX_CU_IN_CTB >> 2); j++)
+ {
+ ps_multi_thrd_ctxt->ai8_nctb_ipe_sad[i4_frm_id][i4_br_id][i4_thrd_id] +=
+ ps_ctb_ipe_analyse->ai4_best_sad_8x8_l1_ipe[j];
+ ps_multi_thrd_ctxt->ai8_nctb_me_sad[i4_frm_id][i4_br_id][i4_thrd_id] +=
+ ps_ctb_ipe_analyse->ai4_best_sad_8x8_l1_me[j];
+
+ ps_multi_thrd_ctxt->ai8_nctb_act_factor[i4_frm_id][i4_br_id][i4_thrd_id] +=
+ ps_ctb_ipe_analyse->ai4_8x8_act_factor[j];
+ }
+
+ ps_multi_thrd_ctxt->ai8_nctb_l0_ipe_sad[i4_frm_id][i4_br_id][i4_thrd_id] +=
+ ps_ctb_ipe_analyse->i4_ctb_acc_satd;
+
+ /*Accumalte L0 MPM bits for N CTB*/
+ ps_multi_thrd_ctxt->ai8_nctb_mpm_bits_consumed[i4_frm_id][i4_br_id][i4_thrd_id] +=
+ ps_ctb_ipe_analyse->i4_ctb_acc_mpm_bits;
+
+ /*Accumate the total bits and hdr bits for N Ctbs*/
+ ps_multi_thrd_ctxt->ai8_nctb_bits_consumed[i4_frm_id][i4_br_id][i4_thrd_id] +=
+ ps_ctxt->u4_total_cu_bits;
+ ps_multi_thrd_ctxt->ai8_acc_bits_consumed[i4_frm_id][i4_br_id][i4_thrd_id] +=
+ ps_ctxt->u4_total_cu_bits;
+ ps_multi_thrd_ctxt->ai8_acc_bits_mul_qs_consumed[i4_frm_id][i4_br_id][i4_thrd_id] +=
+ ps_ctxt->u4_total_cu_bits_mul_qs;
+ ps_multi_thrd_ctxt->ai8_nctb_hdr_bits_consumed[i4_frm_id][i4_br_id][i4_thrd_id] +=
+ ps_ctxt->u4_total_cu_hdr_bits;
+
+ /*Reset the total CU bits, accumalated for all CTBS*/
+ ps_ctxt->u4_total_cu_bits = 0;
+ ps_ctxt->u4_total_cu_hdr_bits = 0;
+ ps_ctxt->u4_total_cu_bits_mul_qs = 0;
+
+ /*Put mutex lock for incrementing cb count */
+ osal_mutex_lock(ps_multi_thrd_ctxt->pv_sub_pic_rc_mutex_lock_hdl);
+
+ ps_multi_thrd_ctxt->ai4_acc_ctb_ctr[i4_frm_id][i4_br_id] += 1;
+ ps_multi_thrd_ctxt->ai4_ctb_ctr[i4_frm_id][i4_br_id] += 1;
+
+ /*Check if the acc ctb counter across thread has reached the required threshold */
+ if(ps_multi_thrd_ctxt->ai4_acc_ctb_ctr[i4_frm_id][i4_br_id] >=
+ ps_ctxt->i4_num_ctb_for_out_scale)
+ {
+ i4_ctb_count_flag = 1;
+ /*Reset accumalated CTB counter appropriately s */
+ ps_multi_thrd_ctxt->ai4_acc_ctb_ctr[i4_frm_id][i4_br_id] = 0;
+ }
+
+ /*Variables to be sent in the queue after required ctb count is reached */
+ if(1 == i4_ctb_count_flag)
+ {
+ WORD32 i4_temp_thrd_id;
+ LWORD64 i8_nctb_l1_me_sad = 0, i8_nctb_l1_ipe_sad = 0;
+ LWORD64 i8_nctb_l0_ipe_satd = 0, i8_nctb_l1_activity_fact = 0;
+ LWORD64 i8_nctb_hdr_bits_consumed = 0, i8_nctb_l0_mpm_bits = 0;
+ LWORD64 i8_nctb_bits_consumed = 0, i8_acc_bits_consumed = 0,
+ i8_acc_bits_mul_qs_consumed = 0;
+ LWORD64 i8_frame_l1_ipe_sad, i8_frame_l0_ipe_satd, i8_frame_l1_me_sad;
+ LWORD64 i8_frame_l1_activity_fact, i8_frame_bits_estimated;
+
+ for(i4_temp_thrd_id = 0; i4_temp_thrd_id < ps_ctxt->i4_num_proc_thrds; i4_temp_thrd_id++)
+ {
+ /*Accumalte only if thread id is valid */
+ if(ps_multi_thrd_ctxt->ai4_thrd_id_valid_flag[i4_frm_id][i4_br_id][i4_temp_thrd_id] ==
+ 1)
+ {
+ /*store complexities for the ctbs across all threads till then */
+ i8_nctb_l1_me_sad +=
+ ps_multi_thrd_ctxt->ai8_nctb_me_sad[i4_frm_id][i4_br_id][i4_temp_thrd_id];
+ i8_nctb_l1_ipe_sad +=
+ ps_multi_thrd_ctxt->ai8_nctb_ipe_sad[i4_frm_id][i4_br_id][i4_temp_thrd_id];
+ i8_nctb_l0_ipe_satd +=
+ ps_multi_thrd_ctxt->ai8_nctb_l0_ipe_sad[i4_frm_id][i4_br_id][i4_temp_thrd_id];
+ i8_nctb_l1_activity_fact +=
+ ps_multi_thrd_ctxt->ai8_nctb_act_factor[i4_frm_id][i4_br_id][i4_temp_thrd_id];
+
+ /*Set encoder total and hdr bits and mpm bits for that N ctbs */
+ i8_nctb_hdr_bits_consumed +=
+ ps_multi_thrd_ctxt
+ ->ai8_nctb_hdr_bits_consumed[i4_frm_id][i4_br_id][i4_temp_thrd_id];
+ i8_nctb_l0_mpm_bits +=
+ ps_multi_thrd_ctxt
+ ->ai8_nctb_mpm_bits_consumed[i4_frm_id][i4_br_id][i4_temp_thrd_id];
+ i8_nctb_bits_consumed +=
+ ps_multi_thrd_ctxt->ai8_nctb_bits_consumed[i4_frm_id][i4_br_id][i4_temp_thrd_id];
+
+ /*Set encoder total bits for ctbs till then */
+ i8_acc_bits_consumed +=
+ ps_multi_thrd_ctxt->ai8_acc_bits_consumed[i4_frm_id][i4_br_id][i4_temp_thrd_id];
+ i8_acc_bits_mul_qs_consumed +=
+ ps_multi_thrd_ctxt
+ ->ai8_acc_bits_mul_qs_consumed[i4_frm_id][i4_br_id][i4_temp_thrd_id];
+
+ /*Reset NCTB total and hdr, mpm bits counter to zero */
+ ps_multi_thrd_ctxt->ai8_nctb_bits_consumed[i4_frm_id][i4_br_id][i4_temp_thrd_id] =
+ 0;
+ ps_multi_thrd_ctxt
+ ->ai8_nctb_hdr_bits_consumed[i4_frm_id][i4_br_id][i4_temp_thrd_id] = 0;
+ ps_multi_thrd_ctxt
+ ->ai8_nctb_mpm_bits_consumed[i4_frm_id][i4_br_id][i4_temp_thrd_id] = 0;
+ }
+ }
+
+ /*Store all frame level params */
+ i8_frame_l1_ipe_sad = ps_ctxt->i8_frame_l1_ipe_sad;
+ i8_frame_l0_ipe_satd = ps_ctxt->i8_frame_l0_ipe_satd;
+ i8_frame_l1_me_sad = ps_ctxt->i8_frame_l1_me_sad;
+ i8_frame_l1_activity_fact = ps_ctxt->i8_frame_l1_activity_fact;
+ i8_frame_bits_estimated = ps_ctxt->ai4_frame_bits_estimated[i4_frm_id][i4_br_id];
+
+ /*If CU level RC is disabled reset the nctb and frame level factor */
+ if(0 == ps_ctxt->i4_qp_mod)
+ {
+ i8_frame_l1_activity_fact = 0;
+ }
+
+ ASSERT(ps_ctxt->ai4_frame_bits_estimated[i4_frm_id][i4_br_id] != 0);
+
+ {
+ float bits_estimated, activity_ratio = 1;
+ WORD32 i8_ctb_bits_estimated;
+ float f_bit_deviation;
+ WORD32 i4_tot_frame_ctb =
+ ps_frm_ctb_prms->i4_num_ctbs_vert * ps_frm_ctb_prms->i4_num_ctbs_horz;
+
+ /*The QP limit can only increment/decrement by 3/1 */
+ float f_qp_increase_limit = (1.414);
+ //float f_qp_decrease_limit = (0.891);
+
+ /*Frame level activity is set to 0 for cu-level rc off*/
+ if(i8_frame_l1_activity_fact != 0)
+ activity_ratio =
+ (float)(i8_frame_l1_activity_fact) / (float)(i8_nctb_l1_activity_fact);
+
+ activity_ratio = 1;
+
+ /*Estimate the bits to be consumed based on the intra and inter complexity */
+ if(i4_is_intra_pic)
+ {
+ float sad_ratio = (float)(i8_nctb_l0_ipe_satd) / (float)(i8_frame_l0_ipe_satd);
+ bits_estimated = sad_ratio * activity_ratio * ((float)i8_frame_bits_estimated);
+ }
+ else
+ {
+ float sad_ratio = (float)(i8_nctb_l1_me_sad) / (float)(i8_frame_l1_me_sad);
+ bits_estimated = sad_ratio * activity_ratio * ((float)i8_frame_bits_estimated);
+ }
+
+ i8_ctb_bits_estimated = (i8_frame_bits_estimated / i4_tot_frame_ctb);
+
+ /*for better control on both sides*/
+ f_bit_deviation = ((i8_acc_bits_consumed * 1.0) / bits_estimated);
+ //printf("\n dev = %f\t",f_bit_deviation);
+ /* if consumed bits is higher than 7.5% or consumed bits is lower by 20%)*/
+ if((f_bit_deviation > 1.075) ||
+ ((f_bit_deviation < 0.8) &&
+ (ps_ctxt->i4_is_model_valid == 0 ||
+ (ps_multi_thrd_ctxt->ai4_threshold_reached[i4_frm_id][i4_br_id]))))
+ {
+ float f_qscale_avg_factor;
+ WORD32 i4_cu_qp_sub_pic_rc_curr;
+ /*get the Qscale of Frame QP*/
+ WORD32 i4_frm_qs_q3 =
+ (ps_ctxt->ps_rc_quant_ctxt->pi4_qp_to_qscale
+ [ps_ctxt->i4_frame_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]);
+ WORD32 i4_prev_qp = ps_ctxt->i4_frame_mod_qp;
+
+ ps_multi_thrd_ctxt->ai4_threshold_reached[i4_frm_id][i4_br_id] = 1;
+
+ /*Calculating Intra scale factor */
+ if(i4_is_intra_pic)
+ {
+ /*In case of lower QP, Qscale increase at every step is very low, which doesn't allow QP increase
+ to meet the rate, hence disable deviation clip below QP 4 for all bitdepth*/
+ if(i4_prev_qp > MIN_QP_NO_CLIP_DEV)
+ {
+ /* Clip the bits deviation such that it never cross +3 qp shifts from average QP so far coded with in-frame rc*/
+ if(f_bit_deviation > f_qp_increase_limit)
+ {
+ f_bit_deviation = f_qp_increase_limit;
+ }
+ }
+
+ /*The current qscale should do not deviate +/- 3 QP from the previous qscale */
+ f_qscale_avg_factor =
+ (((float)(i8_acc_bits_mul_qs_consumed * (1 << QSCALE_Q_FAC_3))) /
+ (i8_acc_bits_consumed * i4_frm_qs_q3));
+ i4_cu_qp_sub_pic_rc_curr =
+ f_qscale_avg_factor * f_bit_deviation * (1 << QP_LEVEL_MOD_ACT_FACTOR);
+ }
+ else /*Calculating Inter scale factor */
+ {
+ /*In case of lower QP, Qscale increase at every step is very low, which doesn't allow QP increase
+ to meet the rate, hence disable deviation clip below QP 4 for all bitdepth*/
+ if(i4_prev_qp > MIN_QP_NO_CLIP_DEV)
+ {
+ /* Clip the bits deviation such that it never cross +3 qp shifts from average QP so far coded with in-frame rc*/
+ if(f_bit_deviation > f_qp_increase_limit)
+ {
+ f_bit_deviation = f_qp_increase_limit;
+ }
+ }
+
+ /*The current qscale should do not deviate +/- 3 QP from the previous qscale */
+ f_qscale_avg_factor =
+ (((float)(i8_acc_bits_mul_qs_consumed * (1 << QSCALE_Q_FAC_3))) /
+ (i8_acc_bits_consumed * i4_frm_qs_q3));
+ i4_cu_qp_sub_pic_rc_curr =
+ f_qscale_avg_factor * f_bit_deviation * (1 << QP_LEVEL_MOD_ACT_FACTOR);
+ }
+ //printf("Avg_qscale = %f\t qs_inq3 = %d",f_qscale_avg_factor,i4_frm_qs_q3);
+ /*update of previous chunk QP in multi-thread context, so that all threads can use it from now onwards*/
+ {
+ ps_multi_thrd_ctxt->ai4_prev_chunk_qp[i4_frm_id][i4_br_id] =
+ ps_ctxt->i4_frame_mod_qp;
+ }
+ /*Limit the qp from decreasing less than 6 compared to frame qp */
+ {
+ osal_mutex_lock(ps_multi_thrd_ctxt->pv_sub_pic_rc_for_qp_update_mutex_lock_hdl);
+ ps_multi_thrd_ctxt->ai4_curr_qp_estimated[i4_frm_id][i4_br_id] =
+ i4_cu_qp_sub_pic_rc_curr;
+ osal_mutex_unlock(
+ ps_multi_thrd_ctxt->pv_sub_pic_rc_for_qp_update_mutex_lock_hdl);
+ }
+ }
+ }
+ }
+ osal_mutex_unlock(ps_multi_thrd_ctxt->pv_sub_pic_rc_mutex_lock_hdl);
+ return;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_sub_pic_rc_qp_query \endif
+*
+* \brief
+* Sub-pic RC thread interface function
+*
+* \param[in] Frame process pointer
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void ihevce_sub_pic_rc_scale_query(void *pv_multi_thrd_ctxt, void *pv_ctxt)
+{
+ multi_thrd_ctxt_t *ps_multi_thrd_ctxt = (multi_thrd_ctxt_t *)pv_multi_thrd_ctxt;
+ ihevce_enc_loop_ctxt_t *ps_ctxt = (ihevce_enc_loop_ctxt_t *)pv_ctxt;
+ WORD32 i4_mod_qp, i4_prev_qs;
+ WORD32 i4_previous_chunk_qp;
+
+ WORD32 i4_qp_delata_max_limit, i4_qp_delata_min_limit;
+
+ osal_mutex_lock(ps_multi_thrd_ctxt->pv_sub_pic_rc_for_qp_update_mutex_lock_hdl);
+
+ i4_mod_qp =
+ (ps_ctxt->ps_rc_quant_ctxt
+ ->pi4_qp_to_qscale[ps_ctxt->i4_frame_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]);
+ i4_previous_chunk_qp =
+ ps_multi_thrd_ctxt
+ ->ai4_prev_chunk_qp[ps_ctxt->i4_enc_frm_id][ps_ctxt->i4_bitrate_instance_num];
+ i4_prev_qs =
+ (ps_ctxt->ps_rc_quant_ctxt
+ ->pi4_qp_to_qscale[i4_previous_chunk_qp + ps_ctxt->ps_rc_quant_ctxt->i1_qp_offset]);
+ /*Limit the qp_delta_scale if it exceeds the limit of QP51 and QP 1 */
+
+ i4_qp_delata_max_limit =
+ ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale * (1 << QP_LEVEL_MOD_ACT_FACTOR);
+ i4_qp_delata_max_limit = i4_qp_delata_max_limit / i4_mod_qp;
+
+ i4_qp_delata_min_limit =
+ ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale * (1 << QP_LEVEL_MOD_ACT_FACTOR);
+ i4_qp_delata_min_limit = i4_qp_delata_min_limit / i4_mod_qp;
+ {
+ /*For Non-I SCD and Frames after SCD*/
+ /*The scale is tweeked to only increase qp (increased by 6) if the bits consumed is higher than bits
+ estimated */
+ ps_ctxt->i4_cu_qp_sub_pic_rc =
+ ps_multi_thrd_ctxt
+ ->ai4_curr_qp_estimated[ps_ctxt->i4_enc_frm_id][ps_ctxt->i4_bitrate_instance_num];
+ /*Limit the Qscale */
+ if(ps_ctxt->i4_cu_qp_sub_pic_rc > i4_qp_delata_max_limit)
+ {
+ ps_ctxt->i4_cu_qp_sub_pic_rc = i4_qp_delata_max_limit;
+ }
+ else if(ps_ctxt->i4_cu_qp_sub_pic_rc < i4_qp_delata_min_limit)
+ {
+ ps_ctxt->i4_cu_qp_sub_pic_rc = i4_qp_delata_min_limit;
+ }
+
+ ps_multi_thrd_ctxt
+ ->ai4_curr_qp_estimated[ps_ctxt->i4_enc_frm_id][ps_ctxt->i4_bitrate_instance_num] =
+ ps_ctxt->i4_cu_qp_sub_pic_rc;
+ }
+
+ /*Accumalate the CTB level QP here and feed to rc as average qp*/
+ {
+ WORD32 i4_mod_cur_qp, i4_mod_prev_qp;
+
+ i4_mod_cur_qp =
+ ((i4_mod_qp * ps_ctxt->i4_cu_qp_sub_pic_rc) + (1 << (QP_LEVEL_MOD_ACT_FACTOR - 1))) >>
+ QP_LEVEL_MOD_ACT_FACTOR;
+
+ /*Limit the qscale and qp */
+ if(i4_mod_cur_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale)
+ {
+ i4_mod_cur_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale;
+ ASSERT(0);
+ }
+ else if(i4_mod_cur_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale)
+ {
+ i4_mod_cur_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale;
+ ASSERT(0);
+ }
+
+ i4_mod_cur_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[i4_mod_cur_qp];
+ /*limit the prev qs*/
+ if(i4_prev_qs > ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale)
+ {
+ i4_prev_qs = ps_ctxt->ps_rc_quant_ctxt->i2_max_qscale;
+ }
+ else if(i4_prev_qs < ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale)
+ {
+ i4_prev_qs = ps_ctxt->ps_rc_quant_ctxt->i2_min_qscale;
+ }
+
+ i4_mod_prev_qp = ps_ctxt->ps_rc_quant_ctxt->pi4_qscale_to_qp[i4_prev_qs];
+
+ /*cur qp < prev qp, then allow only -1*/
+ if(i4_mod_cur_qp < i4_mod_prev_qp)
+ {
+ i4_mod_cur_qp = i4_mod_prev_qp - 1;
+ if(i4_mod_cur_qp < (ps_ctxt->i4_frame_qp - 6))
+ {
+ i4_mod_cur_qp = (ps_ctxt->i4_frame_qp - 6);
+ }
+ }
+
+ /*In case of lower QP, Qscale increase at every step is very low, which doesn't allow QP increase
+ to meet the rate, hence disable deviation clip below QP 4 for all bitdepth*/
+ if(i4_mod_cur_qp > i4_mod_prev_qp)
+ {
+ i4_mod_cur_qp = MIN(i4_mod_prev_qp + 3, i4_mod_cur_qp);
+ }
+
+ /* CLIP to maintain Qp between user configured and min and max Qp values*/
+ if(i4_mod_cur_qp > ps_ctxt->ps_rc_quant_ctxt->i2_max_qp)
+ i4_mod_cur_qp = ps_ctxt->ps_rc_quant_ctxt->i2_max_qp;
+ else if(i4_mod_cur_qp < ps_ctxt->ps_rc_quant_ctxt->i2_min_qp)
+ i4_mod_cur_qp = ps_ctxt->ps_rc_quant_ctxt->i2_min_qp;
+
+ /*Modify the qp based on delta*/
+ ps_ctxt->i4_frame_mod_qp = i4_mod_cur_qp;
+ ps_ctxt->i4_is_first_query = 0;
+ if(ps_ctxt->i4_frame_mod_qp != ps_ctxt->i4_frame_qp)
+ {
+ ps_ctxt->i4_is_ctb_qp_modified = 1;
+ }
+ }
+
+ ps_multi_thrd_ctxt->ai4_curr_qp_acc[ps_ctxt->i4_enc_frm_id][ps_ctxt->i4_bitrate_instance_num] +=
+ ps_ctxt->i4_frame_mod_qp;
+
+ osal_mutex_unlock(ps_multi_thrd_ctxt->pv_sub_pic_rc_for_qp_update_mutex_lock_hdl);
+
+ return;
+}
diff --git a/encoder/ihevce_sub_pic_rc.h b/encoder/ihevce_sub_pic_rc.h
new file mode 100644
index 0000000..354b2dc
--- /dev/null
+++ b/encoder/ihevce_sub_pic_rc.h
@@ -0,0 +1,73 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_sub_pic_rc.h
+*
+* \brief
+* This file contains interface defination of SubPic level RC
+*
+* \date
+* 11/09/2015
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_SUB_PIC_RC_H_
+#define _IHEVCE_SUB_PIC_RC_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define MIN_QP_NO_CLIP_DEV 4
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Variable Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+void ihevce_sub_pic_rc_in_data(
+ void *pv_multi_thrd_ctxt, void *pv_ctxt, void *pv_ctb_ipe_analyse, void *pv_frm_ctb_prms);
+
+void ihevce_sub_pic_rc_scale_query(void *pv_multi_thrd_ctxt, void *pv_ctxt);
+
+#endif /* _IHEVCE_SUB_PIC_RC_H_ */
diff --git a/encoder/ihevce_sys_api.c b/encoder/ihevce_sys_api.c
new file mode 100644
index 0000000..c18e180
--- /dev/null
+++ b/encoder/ihevce_sys_api.c
@@ -0,0 +1,315 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+ ******************************************************************************
+ * \file ihevce_sys_api.c
+ *
+ * \brief
+ * This file contains wrapper utilities to use hevc encoder library
+ *
+ * \date
+ * 15/04/2014
+ *
+ * \author
+ * Ittiam
+ *
+ * List of Functions
+ *
+ *
+ ******************************************************************************
+ */
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "ihevc_macros.h"
+
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*!
+ ******************************************************************************
+ * \if Function name : ihevce_printf \endif
+ *
+ * \brief
+ * This function implements printf
+ *
+ *****************************************************************************
+ */
+WORD32 ihevce_printf(void *pv_handle, const char *format, ...)
+{
+ UNUSED(pv_handle);
+ UNUSED(format);
+ return 0;
+}
+
+/*!
+ ******************************************************************************
+ * \if Function name : ihevce_fopen \endif
+ *
+ * \brief
+ * This function implements fopen
+ *
+ *****************************************************************************
+ */
+FILE *ihevce_fopen(void *pv_handle, const char *filename, const char *mode)
+{
+ UNUSED(pv_handle);
+ UNUSED(filename);
+ UNUSED(mode);
+ return NULL;
+}
+
+/*!
+ ******************************************************************************
+ * \if Function name : ihevce_fclose \endif
+ *
+ * \brief
+ * This function implements fclose
+ *
+ *****************************************************************************
+ */
+int ihevce_fclose(void *pv_handle, FILE *file_ptr)
+{
+ UNUSED(pv_handle);
+ UNUSED(file_ptr);
+ return -1;
+}
+
+/*!
+ ******************************************************************************
+ * \if Function name : ihevce_fflush \endif
+ *
+ * \brief
+ * This function implements fflush
+ *
+ *****************************************************************************
+ */
+int ihevce_fflush(void *pv_handle, FILE *file_ptr)
+{
+ UNUSED(pv_handle);
+ UNUSED(file_ptr);
+ return -1;
+}
+
+/*!
+ ******************************************************************************
+ * \if Function name : ihevce_fseek \endif
+ *
+ * \brief
+ * This function implements fseek
+ *
+ *****************************************************************************
+ */
+int ihevce_fseek(void *pv_handle, FILE *file_ptr, long offset, int origin)
+{
+ UNUSED(pv_handle);
+ UNUSED(file_ptr);
+ UNUSED(offset);
+ UNUSED(origin);
+ return -1;
+}
+
+/*!
+ ******************************************************************************
+ * \if Function name : ihevce_fscanf \endif
+ *
+ * \brief
+ * This function implements fscanf
+ *
+ *****************************************************************************
+ */
+int ihevce_fscanf(
+ void *pv_handle, IHEVCE_DATA_TYPE e_data_type, FILE *file_ptr, const char *format, void *pv_dst)
+{
+ UNUSED(pv_handle);
+ UNUSED(e_data_type);
+ UNUSED(file_ptr);
+ UNUSED(format);
+ UNUSED(pv_dst);
+ return 0;
+}
+
+/*!
+ ******************************************************************************
+ * \if Function name : ihevce_fgets \endif
+ *
+ * \brief
+ * This function implements fgets
+ *
+ *****************************************************************************
+ */
+char *ihevce_fgets(void *pv_handle, char *pi1_str, int i4_size, FILE *pf_stream)
+{
+ UNUSED(pv_handle);
+ UNUSED(pi1_str);
+ UNUSED(i4_size);
+ UNUSED(pf_stream);
+ return NULL;
+}
+
+/*!
+ ******************************************************************************
+ * \if Function name : ihevce_fread \endif
+ *
+ * \brief
+ * This function implements fread
+ *
+ *****************************************************************************
+ */
+size_t
+ ihevce_fread(void *pv_handle, void *pv_dst, size_t element_size, size_t count, FILE *file_ptr)
+{
+ UNUSED(pv_handle);
+ UNUSED(pv_dst);
+ UNUSED(element_size);
+ UNUSED(count);
+ UNUSED(file_ptr);
+ return 0;
+}
+
+/*!
+ ******************************************************************************
+ * \if Function name : ihevce_sscanf \endif
+ *
+ * \brief
+ * This function implements sscanf
+ *
+ *****************************************************************************
+ */
+int ihevce_sscanf(void *pv_handle, const char *pv_src, const char *format, int *p_dst_int)
+{
+ UNUSED(pv_handle);
+ UNUSED(pv_src);
+ UNUSED(format);
+ UNUSED(p_dst_int);
+ return 0;
+}
+
+/*!
+ ******************************************************************************
+ * \if Function name : ihevce_fprintf \endif
+ *
+ * \brief
+ * This function implements fprintf
+ *
+ *****************************************************************************
+ */
+int ihevce_fprintf(void *pv_handle, FILE *file_ptr, const char *format, ...)
+{
+ UNUSED(pv_handle);
+ UNUSED(file_ptr);
+ UNUSED(format);
+ return 0;
+}
+
+/*!
+ ******************************************************************************
+ * \if Function name : ihevce_fwrite \endif
+ *
+ * \brief
+ * This function implements fwrite
+ *
+ *****************************************************************************
+ */
+size_t ihevce_fwrite(
+ void *pv_handle, const void *pv_src, size_t element_size, size_t count, FILE *file_ptr)
+{
+ UNUSED(pv_handle);
+ UNUSED(pv_src);
+ UNUSED(element_size);
+ UNUSED(count);
+ UNUSED(file_ptr);
+ return 0;
+}
+
+/*!
+ ******************************************************************************
+ * \if Function name : ihevce_sprintf \endif
+ *
+ * \brief
+ * This function implements sprintf
+ *
+ *****************************************************************************
+ */
+int ihevce_sprintf(void *pv_handle, char *dst, const char *format, ...)
+{
+ UNUSED(pv_handle);
+ UNUSED(dst);
+ UNUSED(format);
+ return 0;
+}
+
+/*!
+ ******************************************************************************
+ * \if Function name : ihevce_init_sys_api \endif
+ *
+ * \brief
+ * This function initialises sysstem call apis
+ *
+ * \param[in]
+ * pv_main_ctxt : This is used only for storing.
+ * ps_sys_api : This is address to sys_api structure of static_cfg_prms
+ *
+ * \return
+ * None
+ *
+ * \author
+ * Ittiam
+ *
+ *****************************************************************************
+ */
+void ihevce_init_sys_api(void *pv_cb_handle, ihevce_sys_api_t *ps_sys_api)
+{
+ ps_sys_api->pv_cb_handle = pv_cb_handle;
+
+ /* Console IO APIs */
+ ps_sys_api->ihevce_printf = ihevce_printf;
+
+ ps_sys_api->ihevce_sscanf = ihevce_sscanf;
+ ps_sys_api->ihevce_sprintf = ihevce_sprintf;
+
+ /* File IO APIs */
+ ps_sys_api->s_file_io_api.ihevce_fopen = ihevce_fopen;
+ ps_sys_api->s_file_io_api.ihevce_fclose = ihevce_fclose;
+ ps_sys_api->s_file_io_api.ihevce_fflush = ihevce_fflush;
+ ps_sys_api->s_file_io_api.ihevce_fseek = ihevce_fseek;
+
+ ps_sys_api->s_file_io_api.ihevce_fscanf = ihevce_fscanf;
+ ps_sys_api->s_file_io_api.ihevce_fread = ihevce_fread;
+
+ ps_sys_api->s_file_io_api.ihevce_fprintf = ihevce_fprintf;
+ ps_sys_api->s_file_io_api.ihevce_fwrite = ihevce_fwrite;
+ ps_sys_api->s_file_io_api.ihevce_fgets = ihevce_fgets;
+}
diff --git a/encoder/ihevce_tile_interface.c b/encoder/ihevce_tile_interface.c
new file mode 100644
index 0000000..ba42665
--- /dev/null
+++ b/encoder/ihevce_tile_interface.c
@@ -0,0 +1,625 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+******************************************************************************
+* \file ihevce_tile_interface.c
+*
+* \brief
+* This file contains functions related to tile interface
+*
+* \date
+* 24/10/2012
+*
+* \author
+* Ittiam
+*
+*
+* List of Functions
+*
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_tile_interface.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_update_tile_params \endif
+*
+* \brief
+* Updates the ps_tile_params structres based on the tile-position in frame.
+*
+*****************************************************************************
+*/
+void ihevce_update_tile_params(
+ ihevce_static_cfg_params_t *ps_static_cfg_prms,
+ ihevce_tile_params_t *ps_tile_params,
+ WORD32 i4_resolution_id)
+{
+ /* Total number of tiles in a frame */
+ ihevce_app_tile_params_t *ps_app_tile_prms;
+ WORD32 i4_num_tiles;
+ WORD32 i4_cu_aligned_tgt_frame_ht,
+ i4_cu_aligned_tgt_frame_wd; //Frame width and height specific to target-resolution
+ WORD32 i4_ctb_aligned_tgt_frame_ht,
+ i4_ctb_aligned_tgt_frame_wd; //Frame width and height specific to target-resolution
+ WORD32 i4_x_y = 0;
+ WORD32 i4_pos;
+ WORD32 i4_i;
+
+ WORD32 i4_curr_tile_id;
+ WORD32 i4_max_log2_cu_size, i4_ctb_size;
+ WORD32 i4_pic_wd_in_ctb;
+ WORD32 i4_pic_ht_in_ctb;
+ WORD32 min_cu_size;
+ WORD32 i4_num_tile_cols = 1;
+ WORD32 i4_num_tile_rows = 1;
+
+ ps_app_tile_prms = &ps_static_cfg_prms->s_app_tile_params;
+
+ i4_max_log2_cu_size = ps_static_cfg_prms->s_config_prms.i4_max_log2_cu_size;
+ i4_ctb_size = 1 << i4_max_log2_cu_size;
+
+ min_cu_size = 1 << ps_static_cfg_prms->s_config_prms.i4_min_log2_cu_size;
+
+ /* Allign the frame width to min CU size */
+ i4_cu_aligned_tgt_frame_wd =
+ ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width +
+ SET_CTB_ALIGN(
+ ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width,
+ min_cu_size);
+
+ /* Allign the frame hieght to min CU size */
+ i4_cu_aligned_tgt_frame_ht =
+ ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height +
+ SET_CTB_ALIGN(
+ ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height,
+ min_cu_size);
+
+ if(1 == ps_app_tile_prms->i4_tiles_enabled_flag)
+ {
+ i4_num_tile_cols = ps_app_tile_prms->i4_num_tile_cols;
+ i4_num_tile_rows = ps_app_tile_prms->i4_num_tile_rows;
+ }
+
+ i4_num_tiles = i4_num_tile_cols * i4_num_tile_rows;
+
+ i4_ctb_aligned_tgt_frame_wd = i4_cu_aligned_tgt_frame_wd;
+ i4_ctb_aligned_tgt_frame_wd += SET_CTB_ALIGN(i4_ctb_aligned_tgt_frame_wd, MAX_CTB_SIZE);
+ i4_pic_wd_in_ctb = i4_ctb_aligned_tgt_frame_wd >> i4_max_log2_cu_size;
+
+ i4_ctb_aligned_tgt_frame_ht = i4_cu_aligned_tgt_frame_ht;
+ i4_ctb_aligned_tgt_frame_ht += SET_CTB_ALIGN(i4_ctb_aligned_tgt_frame_ht, MAX_CTB_SIZE);
+ i4_pic_ht_in_ctb = i4_ctb_aligned_tgt_frame_ht >> i4_max_log2_cu_size;
+
+ /* Update tile enable flag in each instance's tile struct */
+ ps_tile_params->i4_tiles_enabled_flag = ps_app_tile_prms->i4_tiles_enabled_flag;
+
+ ps_tile_params->i4_num_tile_cols = i4_num_tile_cols;
+ ps_tile_params->i4_num_tile_rows = i4_num_tile_rows;
+
+ i4_curr_tile_id = ps_tile_params->i4_curr_tile_id;
+
+ /* num tiles in frame */
+ ps_tile_params->i4_num_tiles = i4_num_tiles;
+
+ ps_tile_params->i4_uniform_spacing_flag = ps_app_tile_prms->i4_uniform_spacing_flag;
+
+ if(0 == ps_tile_params->i4_tiles_enabled_flag)
+ {
+ /* curr tile width and height */
+ ps_tile_params->i4_curr_tile_width = i4_cu_aligned_tgt_frame_wd;
+ ps_tile_params->i4_curr_tile_height = i4_cu_aligned_tgt_frame_ht;
+
+ ps_tile_params->i4_first_ctb_x = 0;
+ ps_tile_params->i4_first_ctb_y = 0;
+
+ ps_tile_params->i4_first_sample_x = 0;
+ ps_tile_params->i4_first_sample_y = 0;
+ }
+ else
+ {
+ if(0 == ps_app_tile_prms->i4_uniform_spacing_flag)
+ {
+ /* curr tile width */
+ ps_tile_params->i4_curr_tile_width =
+ ps_app_tile_prms->ai4_column_width[i4_curr_tile_id % i4_num_tile_cols];
+
+ /* curr tile height */
+ ps_tile_params->i4_curr_tile_height =
+ ps_app_tile_prms->ai4_row_height[i4_curr_tile_id / i4_num_tile_cols];
+
+ /* ctb_x and ctb_y of first ctb in tile */
+ i4_pos = i4_curr_tile_id % i4_num_tile_cols;
+
+ for(i4_i = 0; i4_i < i4_pos; i4_i++)
+ {
+ i4_x_y += ps_app_tile_prms->ai4_column_width[i4_i];
+ }
+
+ ps_tile_params->i4_first_sample_x = i4_x_y;
+ ps_tile_params->i4_first_ctb_x = i4_x_y >> i4_max_log2_cu_size;
+
+ i4_pos = i4_curr_tile_id / i4_num_tile_cols;
+
+ i4_x_y = 0;
+
+ for(i4_i = 0; i4_i < i4_pos; i4_i++)
+ {
+ i4_x_y += ps_app_tile_prms->ai4_row_height[i4_i];
+ }
+
+ ps_tile_params->i4_first_sample_y = i4_x_y;
+ ps_tile_params->i4_first_ctb_y = i4_x_y >> i4_max_log2_cu_size;
+ }
+ else
+ {
+ /* below formula for tile width/height and start_x/start_y are derived from HM Decoder */
+ WORD32 i4_start = 0;
+ WORD32 i4_value = 0;
+ /* curr tile width */
+ for(i4_i = 0; i4_i < i4_num_tile_cols; i4_i++)
+ {
+ i4_value = ((i4_i + 1) * i4_pic_wd_in_ctb) / i4_num_tile_cols -
+ (i4_i * i4_pic_wd_in_ctb) / i4_num_tile_cols;
+
+ if(i4_i == (i4_curr_tile_id % i4_num_tile_cols))
+ {
+ ps_tile_params->i4_first_ctb_x = i4_start;
+ ps_tile_params->i4_first_sample_x = (i4_start << i4_max_log2_cu_size);
+ ps_tile_params->i4_curr_tile_width = (i4_value << i4_max_log2_cu_size);
+ if(i4_i == (i4_num_tile_cols - 1))
+ {
+ if(i4_cu_aligned_tgt_frame_wd % i4_ctb_size)
+ {
+ ps_tile_params->i4_curr_tile_width =
+ (ps_tile_params->i4_curr_tile_width - i4_ctb_size) +
+ (i4_cu_aligned_tgt_frame_wd % i4_ctb_size);
+ }
+ }
+ break;
+ }
+ i4_start += i4_value;
+ }
+
+ /* curr tile height */
+ i4_start = 0;
+ for(i4_i = 0; i4_i < i4_num_tile_rows; i4_i++)
+ {
+ i4_value = ((i4_i + 1) * i4_pic_ht_in_ctb) / i4_num_tile_rows -
+ (i4_i * i4_pic_ht_in_ctb) / i4_num_tile_rows;
+
+ if(i4_i == (i4_curr_tile_id / i4_num_tile_cols))
+ {
+ ps_tile_params->i4_first_ctb_y = i4_start;
+ ps_tile_params->i4_first_sample_y = (i4_start << i4_max_log2_cu_size);
+ ps_tile_params->i4_curr_tile_height = (i4_value << i4_max_log2_cu_size);
+ if(i4_i == (i4_num_tile_rows - 1))
+ {
+ if(i4_cu_aligned_tgt_frame_ht % i4_ctb_size)
+ {
+ ps_tile_params->i4_curr_tile_height =
+ (ps_tile_params->i4_curr_tile_height - i4_ctb_size) +
+ (i4_cu_aligned_tgt_frame_ht % i4_ctb_size);
+ }
+ }
+ break;
+ }
+ i4_start += i4_value;
+ }
+ }
+ }
+
+ /* Initiallize i4_curr_tile_wd_in_ctb_unit and i4_curr_tile_ht_in_ctb_unit */
+ ps_tile_params->i4_curr_tile_wd_in_ctb_unit =
+ ps_tile_params->i4_curr_tile_width +
+ SET_CTB_ALIGN(ps_tile_params->i4_curr_tile_width, i4_ctb_size);
+
+ ps_tile_params->i4_curr_tile_ht_in_ctb_unit =
+ ps_tile_params->i4_curr_tile_height +
+ SET_CTB_ALIGN(ps_tile_params->i4_curr_tile_height, i4_ctb_size);
+
+ ps_tile_params->i4_curr_tile_wd_in_ctb_unit /= i4_ctb_size;
+ ps_tile_params->i4_curr_tile_ht_in_ctb_unit /= i4_ctb_size;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_tiles_get_num_mem_recs \endif
+*
+* \brief
+* Returns the total no. of memory records needed for tile encoding
+*
+* \param
+* None
+*
+* \return
+* total no. of memory required
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32 ihevce_tiles_get_num_mem_recs(void)
+{
+ WORD32 i4_total_memtabs_req = 0;
+
+ /*------------------------------------------------------------------*/
+ /* Get number of memtabs */
+ /*------------------------------------------------------------------*/
+ /* Memory for keeping all tile's parameters */
+ i4_total_memtabs_req++;
+
+ /* Memory for keeping frame level tile_id map */
+ i4_total_memtabs_req++;
+
+ return (i4_total_memtabs_req);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_tiles_get_mem_recs \endif
+*
+* \brief
+* Fills each memory record attributes of tiles
+*
+* \param[in,out] ps_mem_tab : pointer to memory descriptors table
+* \param[in] ps_tile_master_prms : master tile params
+* \param[in] i4_mem_space : memspace in whihc memory request should be done
+*
+* \return
+* total no. of mem records filled
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+WORD32 ihevce_tiles_get_mem_recs(
+ iv_mem_rec_t *ps_memtab,
+ ihevce_static_cfg_params_t *ps_static_cfg_params,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ WORD32 i4_resolution_id,
+ WORD32 i4_mem_space)
+{
+ //WORD32 i4_frame_width, i4_frame_height;
+ WORD32 i4_num_tiles;
+ WORD32 i4_total_memtabs_filled = 0;
+ WORD32 i4_num_tile_cols = 1;
+ WORD32 i4_num_tile_rows = 1;
+ WORD32 ctb_aligned_frame_width, ctb_aligned_frame_height;
+ WORD32 u4_ctb_in_a_row, u4_ctb_rows_in_a_frame;
+
+ ihevce_app_tile_params_t *ps_app_tile_params = &ps_static_cfg_params->s_app_tile_params;
+ /*
+ i4_frame_width = ps_tile_master_prms->i4_frame_width;
+ i4_frame_height = ps_tile_master_prms->i4_frame_height;*/
+
+ if(1 == ps_app_tile_params->i4_tiles_enabled_flag)
+ {
+ i4_num_tile_cols = ps_app_tile_params->i4_num_tile_cols;
+ i4_num_tile_rows = ps_app_tile_params->i4_num_tile_rows;
+ }
+
+ i4_num_tiles = i4_num_tile_cols * i4_num_tile_rows;
+
+ /* -------- Memory for storing all tile params ---------*/
+ ps_memtab[0].i4_size = sizeof(iv_mem_rec_t);
+ ps_memtab[0].i4_mem_size = i4_num_tiles * sizeof(ihevce_tile_params_t);
+ ps_memtab[0].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+ ps_memtab[0].i4_mem_alignment = 8;
+ i4_total_memtabs_filled++;
+
+ /* -------- Memory for CTB level tile-id map ---------*/
+ ctb_aligned_frame_width =
+ ps_static_cfg_params->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
+ ctb_aligned_frame_height =
+ ps_static_cfg_params->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height;
+
+ /*making the width and height a multiple of CTB size*/
+ ctb_aligned_frame_width += SET_CTB_ALIGN(
+ ps_static_cfg_params->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width,
+ MAX_CTB_SIZE);
+ ctb_aligned_frame_height += SET_CTB_ALIGN(
+ ps_static_cfg_params->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_height,
+ MAX_CTB_SIZE);
+
+ u4_ctb_in_a_row = (ctb_aligned_frame_width / MAX_CTB_SIZE);
+ u4_ctb_rows_in_a_frame = (ctb_aligned_frame_height / MAX_CTB_SIZE);
+
+ ps_frm_ctb_prms->i4_tile_id_ctb_map_stride = (ctb_aligned_frame_width / MAX_CTB_SIZE);
+
+ /* Memory for a frame level memory to store tile-id corresponding to each CTB of frame*/
+ /* (u4_ctb_in_a_row + 1): Keeping an extra column on the left. Tile Id's will be set to -1 in it */
+ /* (u4_ctb_rows_in_a_frame + 1): Keeping an extra column on the top. Tile Id's will be set to -1 in it */
+ /* -1 -1 -1 -1 -1 -1 -1 ....... -1 -1
+ -1 0 0 1 1 2 2 ....... M -1
+ -1 0 0 1 1 2 2 ....... M -1
+ .. .. .. .. .. .. .. ....... M -1
+ .. .. .. .. .. .. .. ....... M -1
+ -1 N N N+1 N+1 N+2 N+2 ....... N+M -1
+ */
+ ps_memtab[1].i4_size = sizeof(iv_mem_rec_t);
+ ps_memtab[1].i4_mem_size =
+ (1 + u4_ctb_in_a_row + 1) * (1 + u4_ctb_rows_in_a_frame) * sizeof(WORD32);
+ ps_memtab[1].e_mem_type = (IV_MEM_TYPE_T)i4_mem_space;
+ ps_memtab[1].i4_mem_alignment = 8;
+ i4_total_memtabs_filled++;
+
+ return (i4_total_memtabs_filled);
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_tiles_mem_init \endif
+*
+* \brief
+* Initialization of shared buffer memories
+*
+* \param[in] ps_mem_tab : pointer to memory descriptors table
+* \param[in] ps_tile_master_prms : master tile params
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void *ihevce_tiles_mem_init(
+ iv_mem_rec_t *ps_memtab,
+ ihevce_static_cfg_params_t *ps_static_cfg_prms,
+ enc_ctxt_t *ps_enc_ctxt,
+ WORD32 i4_resolution_id)
+{
+ WORD32 i4_num_tiles, tile_ctr;
+ WORD32 ctb_row_ctr, ctb_col_ctr, i;
+ WORD32 tile_pos_x, tile_pos_y;
+ WORD32 tile_wd_in_ctb, tile_ht_in_ctb;
+ WORD32 *pi4_tile_id_map_temp, *pi4_tile_id_map_base;
+ WORD32 frame_width_in_ctb;
+ WORD32 i4_num_tile_cols = 1;
+ WORD32 i4_num_tile_rows = 1;
+
+ ihevce_tile_params_t *ps_tile_params_base;
+ frm_ctb_ctxt_t *ps_frm_ctb_prms = &ps_enc_ctxt->s_frm_ctb_prms;
+
+ if(1 == ps_static_cfg_prms->s_app_tile_params.i4_tiles_enabled_flag)
+ {
+ i4_num_tile_cols = ps_static_cfg_prms->s_app_tile_params.i4_num_tile_cols;
+ i4_num_tile_rows = ps_static_cfg_prms->s_app_tile_params.i4_num_tile_rows;
+ }
+
+ frame_width_in_ctb =
+ ps_static_cfg_prms->s_tgt_lyr_prms.as_tgt_params[i4_resolution_id].i4_width;
+ frame_width_in_ctb += SET_CTB_ALIGN(frame_width_in_ctb, MAX_CTB_SIZE);
+ frame_width_in_ctb /= MAX_CTB_SIZE;
+
+ /* -------- Memory for storing all tile params ---------*/
+ ps_tile_params_base = (ihevce_tile_params_t *)ps_memtab->pv_base;
+ ps_memtab++;
+
+ i4_num_tiles = i4_num_tile_cols * i4_num_tile_rows;
+
+ for(tile_ctr = 0; tile_ctr < i4_num_tiles; tile_ctr++)
+ {
+ WORD32 i4_i;
+ ihevce_tile_params_t *ps_tile_params = (ps_tile_params_base + tile_ctr);
+
+ /* Setting default values */
+ memset(ps_tile_params, 0, sizeof(ihevce_tile_params_t));
+
+ ps_tile_params->i4_curr_tile_id = tile_ctr; /* tile id */
+
+ /* update create time tile params in each encoder context */
+ ihevce_update_tile_params(ps_static_cfg_prms, ps_tile_params, i4_resolution_id);
+
+ if(0 == ps_static_cfg_prms->s_app_tile_params.i4_uniform_spacing_flag)
+ {
+ /* Storing column width array and row height array inro enc ctxt */
+ for(i4_i = 0; i4_i < i4_num_tile_cols; i4_i++)
+ {
+ ps_enc_ctxt->ai4_column_width_array[i4_i] =
+ ps_static_cfg_prms->s_app_tile_params.ai4_column_width[i4_i];
+ }
+ for(i4_i = 0; i4_i < i4_num_tile_rows; i4_i++)
+ {
+ ps_enc_ctxt->ai4_row_height_array[i4_i] =
+ ps_static_cfg_prms->s_app_tile_params.ai4_row_height[i4_i];
+ }
+ }
+ }
+
+ /* -------- Memory for CTB level tile-id map ---------*/
+ pi4_tile_id_map_base = (WORD32 *)ps_memtab->pv_base;
+
+ // An extra col and row at top, left and right aroun frame level memory. Is set to -1.
+ ps_frm_ctb_prms->i4_tile_id_ctb_map_stride = frame_width_in_ctb + 2;
+ ps_frm_ctb_prms->pi4_tile_id_map =
+ pi4_tile_id_map_base + ps_frm_ctb_prms->i4_tile_id_ctb_map_stride + 1;
+ ps_memtab++;
+
+ /* Filling -1 in top row */
+ for(i = 0; i < ps_frm_ctb_prms->i4_tile_id_ctb_map_stride; i++)
+ {
+ pi4_tile_id_map_base[i] = -1;
+ }
+
+ /* Now creating tile-id map */
+ for(tile_ctr = 0; tile_ctr < ps_tile_params_base->i4_num_tiles; tile_ctr++)
+ {
+ ihevce_tile_params_t *ps_tile_params = ps_tile_params_base + tile_ctr;
+
+ tile_pos_x = ps_tile_params->i4_first_ctb_x;
+ tile_pos_y = ps_tile_params->i4_first_ctb_y;
+ tile_wd_in_ctb = ps_tile_params->i4_curr_tile_wd_in_ctb_unit;
+ tile_ht_in_ctb = ps_tile_params->i4_curr_tile_ht_in_ctb_unit;
+
+ pi4_tile_id_map_temp = ps_frm_ctb_prms->pi4_tile_id_map +
+ tile_pos_y * ps_frm_ctb_prms->i4_tile_id_ctb_map_stride + tile_pos_x;
+
+ for(ctb_row_ctr = 0; (ctb_row_ctr < tile_ht_in_ctb); ctb_row_ctr++)
+ {
+ if(tile_pos_x == 0)
+ { /* Filling -1 in left column */
+ pi4_tile_id_map_temp[-1] = -1;
+ }
+
+ for(ctb_col_ctr = 0; (ctb_col_ctr < tile_wd_in_ctb); ctb_col_ctr++)
+ {
+ pi4_tile_id_map_temp[ctb_col_ctr] = tile_ctr;
+ }
+
+ if(frame_width_in_ctb == (tile_pos_x + tile_wd_in_ctb))
+ { /* Filling -1 in right column */
+ pi4_tile_id_map_temp[tile_wd_in_ctb] = -1;
+ }
+
+ pi4_tile_id_map_temp += ps_frm_ctb_prms->i4_tile_id_ctb_map_stride;
+ }
+ }
+
+ return (void *)ps_tile_params_base;
+}
+
+/*!
+******************************************************************************
+* \if Function name : update_last_coded_cu_qp \endif
+*
+* \brief Update i1_last_cu_qp based on CTB's position in tile
+*
+* \param[in] pi1_top_last_cu_qp
+* Pointer to the CTB row's Qp storage
+* \param[in] i1_entropy_coding_sync_enabled_flag
+* flag to indicate rate control mode
+* \param[in] ps_frm_ctb_prms
+* Frame ctb parameters
+* \param[in] i1_frame_qp
+* Frame qp
+* \param[in] vert_ctr
+* first CTB row of frame
+* \param[in] ctb_ctr
+* ct row count
+* \param[out] pi1_last_cu_qp
+* Qp of the last CU of previous CTB row
+*
+* \return
+* None
+*
+* \author
+* Ittiam
+*
+*****************************************************************************
+*/
+void update_last_coded_cu_qp(
+ WORD8 *pi1_top_last_cu_qp,
+ WORD8 i1_entropy_coding_sync_enabled_flag,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ WORD8 i1_frame_qp,
+ WORD32 vert_ctr,
+ WORD32 ctb_ctr,
+ WORD8 *pi1_last_cu_qp)
+{
+ WORD32 i4_curr_ctb_tile_id, i4_left_ctb_tile_id, i4_top_ctb_tile_id;
+ WORD32 *pi4_tile_id_map_temp;
+
+ pi4_tile_id_map_temp = ps_frm_ctb_prms->pi4_tile_id_map +
+ vert_ctr * ps_frm_ctb_prms->i4_tile_id_ctb_map_stride + ctb_ctr;
+
+ i4_curr_ctb_tile_id = *(pi4_tile_id_map_temp);
+ i4_left_ctb_tile_id = *(pi4_tile_id_map_temp - 1);
+ i4_top_ctb_tile_id = *(pi4_tile_id_map_temp - ps_frm_ctb_prms->i4_tile_id_ctb_map_stride);
+
+ if(i4_curr_ctb_tile_id == i4_left_ctb_tile_id)
+ {
+ return;
+ }
+ else if(i4_curr_ctb_tile_id != i4_top_ctb_tile_id)
+ { /* First CTB of tile */
+ *pi1_last_cu_qp = i1_frame_qp;
+ }
+ else
+ { /* First CTB of CTB-row */
+ if(1 == i1_entropy_coding_sync_enabled_flag)
+ {
+ *pi1_last_cu_qp = i1_frame_qp;
+ }
+ else
+ {
+ *pi1_last_cu_qp = *(pi1_top_last_cu_qp);
+ }
+ }
+}
diff --git a/encoder/ihevce_tile_interface.h b/encoder/ihevce_tile_interface.h
new file mode 100644
index 0000000..2179427
--- /dev/null
+++ b/encoder/ihevce_tile_interface.h
@@ -0,0 +1,77 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+******************************************************************************
+* \file ihevce_tile_interface.h
+*
+* \brief
+* This file contains functions prototypes, constants, enumerations and
+* structures related to tile interface
+*
+* \date
+* 03 july 2012
+*
+* \author
+* Ittiam
+*
+*
+* List of Functions
+*
+*
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_TILE_INTERFACE_H_
+#define _IHEVCE_TILE_INTERFACE_H_
+
+/****************************************************************************/
+/* Function Prototypes */
+/****************************************************************************/
+void ihevce_update_tile_params(
+ ihevce_static_cfg_params_t *ps_static_cfg_prms,
+ ihevce_tile_params_t *ps_tile_params,
+ WORD32 i4_resolution_id);
+
+WORD32 ihevce_tiles_get_num_mem_recs(void);
+
+WORD32 ihevce_tiles_get_mem_recs(
+ iv_mem_rec_t *ps_memtab,
+ ihevce_static_cfg_params_t *ps_static_cfg_params,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ WORD32 i4_resolution_id,
+ WORD32 i4_mem_space);
+
+void *ihevce_tiles_mem_init(
+ iv_mem_rec_t *ps_memtab,
+ ihevce_static_cfg_params_t *ps_static_cfg_prms,
+ enc_ctxt_t *ps_enc_ctxt,
+ WORD32 i4_resolution_id);
+
+void update_last_coded_cu_qp(
+ WORD8 *pi1_ctb_row_qp,
+ WORD8 i1_entropy_coding_sync_enabled_flag,
+ frm_ctb_ctxt_t *ps_frm_ctb_prms,
+ WORD8 i1_frame_qp,
+ WORD32 vert_ctr,
+ WORD32 ctb_ctr,
+ WORD8 *pi1_last_cu_qp);
+
+#endif //_IHEVCE_TILE_INTERFACE_H_
diff --git a/encoder/ihevce_trace.c b/encoder/ihevce_trace.c
new file mode 100644
index 0000000..ea54ce3
--- /dev/null
+++ b/encoder/ihevce_trace.c
@@ -0,0 +1,187 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file ihevce_trace.c
+*
+* @brief
+* This file contains function definitions for implementing trace
+*
+* @author
+* Ittiam
+*
+* List of Functions
+* ihevce_trace_deinit()
+* ihevce_trace_init()
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "ihevc_debug.h"
+#include "ihevc_defs.h"
+#include "ihevc_macros.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_cabac_tables.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+#include "ihevce_defs.h"
+#include "ihevce_buffer_que_interface.h"
+#include "ihevce_hle_interface.h"
+
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_trace.h"
+
+/*****************************************************************************/
+/* Global Variable Definitions */
+/*****************************************************************************/
+
+/* Table of Prefix names, one for each bit of the debug_id mask */
+/* bits 12 - 16 are used inly to indicate the layer id */
+/* add null strings to those locations */
+// clang-format off
+const char *g_api1_prefix_name[32] =
+{
+ SEQ_LEVEL, /* TRACE_SEQ = 0x00000001, !< sequence params dump enable */
+ PIC_LEVEL, /* TRACE_PIC = 0x00000002, !< picparams dump enable */
+ SLICE_LEVEL, /* TRACE_SLICE = 0x00000004, !< slice params dump enable */
+ MB_LEVEL, /* TRACE_MB_PARAMS = 0x00000008, !< mb level decoded dump enable */
+ MB_LEVEL, /* TRACE_MB_INF_PARAMS = 0x00000010, !< mb level inferred dumping enable */
+ ECD_DATA, /* TRACE_ECD_DATA = 0x00000020, !< ECD data dump */
+ LYR_COEFF_LEVEL, /* TRACE_LYR_COEFF_LEVEL = 0x00000040, !< Current layer coeff levels */
+ ACC_COEFF_LEVEL, /* TRACE_ACC_COEFF_LEVEL = 0x00000080, !< Accumulated coffs/level */
+ ACC_COEFFS, /* TRACE_ACC_COEFFS = 0x00000100, !< Accumulated coeffs */
+ LYR_DIFF_SIG, /* TRACE_LYR_DIFF_SIG = 0x00000200, !< layer level differential signal */
+ LYR_IP_SIG, /* TRACE_LYR_IP_SIG = 0x00000400, !< layer level Intra pred signal */
+ MB_LEVEL, /* TRACE_INTRA_UPSMPL_SIG= 0x00000800, !< Intra upsampled data */
+ MB_LEVEL, /* TRACE_RES_UPSMPL_SIG = 0x00001000, !< Residual upsampled data */
+ MB_LEVEL, /* TRACE_BS_INFO = 0x00002000, !< BS information */
+ DUMMY, /* 0x00004000 */
+ DUMMY, /* 0x00008000 */
+ DUMMY, /* 0x00010000 */
+ DUMMY, /* 0x00020000 */
+ DUMMY, /* 0x00040000 */
+ DUMMY, /* 0x00080000 */
+ RES_CHANGE_SIG, /* TRACE_RES_CGE_MV = 0x00100000, !< Res change Motion vectors */
+ RES_CHANGE_SIG, /* TRACE_RES_CGE_MODE = 0x00200000, !< Res change MB modes */
+ RES_CHANGE_SIG, /* TRACE_RES_CGE_DATA = 0x00400000, !< Res change data */
+ TARGET_MC, /* TRACE_TGT_MC_PRED = 0x00800000, !< moiton comp pred sugnal dump enable */
+ DUMMY, /* 0x01000000 */
+ DUMMY, /* 0x02000000 */
+ DUMMY, /* 0x04000000 */
+ TARGET_DEBLK, /* TRACE_TGT_LYR_DEBLK = 0x08000000, !< Input to target layer deblocking */
+ REF_BASE_DEBLK, /* TRACE_REF_BASE_DEBLK = 0x10000000, !< deblocked data dumping enable */
+ DUMMY, /* 0x20000000 */
+ DUMMY, /* 0x40000000 */
+ DUMMY /* 0x80000000 */
+};
+// clang-format on
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/**
+******************************************************************************
+*
+* @brief Dummar trace init when trace is disabled in encoder
+*
+* @par Description
+* This routine needs to be called at start of trace
+*
+* @param[in] pu1_file_name
+* Name of file where trace outputs need to be stores (handle)
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_trace_init(UWORD8 *pu1_file_name)
+{
+ (void)pu1_file_name;
+ return IHEVCE_SUCCESS;
+}
+
+/**
+******************************************************************************
+*
+* @brief Dummar trace de-init function when trace is disabled
+*
+* @par Description
+* This routine needs to be called at end of trace
+*
+* @return success or failure error code
+*
+******************************************************************************
+*/
+WORD32 ihevce_trace_deinit(void)
+{
+ return IHEVCE_SUCCESS;
+}
+
+/** \brief This function return the bit position set of the input */
+WORD32 svcd_trace_get_bit_pos(UWORD32 u4_input)
+{
+ /* local variables */
+ WORD32 i4_bit_pos;
+
+ i4_bit_pos = -1;
+
+ /* only a single bit of 32 bits should to be set */
+ assert(0 == (u4_input & (u4_input - 1)));
+
+ /* loop to get the bit position of the prefix */
+ while(0 != u4_input)
+ {
+ u4_input >>= 1;
+ i4_bit_pos++;
+ } /* end of while loop */
+
+ /* check on validity of the bit position */
+ assert((31 >= i4_bit_pos) && (0 <= i4_bit_pos));
+
+ return (i4_bit_pos);
+}
+
+/** \brief This function does the parameter dumping for trace info */
+
+WORD32 ihevce_trace(UWORD32 u4_prefix)
+{
+ WORD32 i4_array_indx;
+
+ /* get the bit position of the prefix */
+ i4_array_indx = svcd_trace_get_bit_pos(u4_prefix);
+ return i4_array_indx;
+}
diff --git a/encoder/ihevce_trace.h b/encoder/ihevce_trace.h
new file mode 100644
index 0000000..2eea611
--- /dev/null
+++ b/encoder/ihevce_trace.h
@@ -0,0 +1,114 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* @file ihevce_trace.h
+*
+* @brief
+* This file contains entropy and cabac trace related structures and macros
+*
+* @author
+* Ittiam
+******************************************************************************
+*/
+
+#ifndef _IHEVCE_TRACE_H_
+#define _IHEVCE_TRACE_H_
+
+#define ENABLE_TRACE 0
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+/* strings assigned for prefix names */
+// clang-format off
+#define SEQ_LEVEL "SEQ" /*!< prefix string for sequence params */
+#define HRD_LEVEL "HRD" /*!< prefix string for hrd params */
+#define PIC_LEVEL "PIC_INFO" /*!< prefix string for picture params */
+#define SLICE_LEVEL "SLICE" /*!< prefix string for slice params */
+#define MB_LEVEL "MB" /*!< prefix string for MB params */
+#define ECD_DATA "ECD"
+#define LYR_COEFF_LEVEL "LYR" /*!< prefix string for current layer tx levels */
+#define ACC_COEFF_LEVEL "LYR" /*!< prefix string for accumulated tx levels/coeffs */
+#define ACC_COEFFS "LYR" /*!< prefix string for accumulated coeffs */
+#define LYR_DIFF_SIG "LYR" /*!< prefix string for MB params */
+#define LYR_IP_SIG "LYR" /*!< prefix string for MB params */
+#define RES_CHANGE_SIG "RES CGE"
+#define REF_BASE_DEBLK "REF BASE" /*!< refix string for ref base parameters */
+#define TARGET_DEBLK "TGT" /*!< prefix string for target layer parameters */
+#define TARGET_MC "TGT" /*!< prefix string for target layer parameters */
+#define DUMMY "NOT VALID"
+// clang-format on
+
+#define TRACE(a) ihevce_trace((a))
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+typedef enum
+{
+ IHEVCE_FALSE = 0,
+ IHEVCE_TRUE = 1
+} IHEVCE_BOOL_T;
+
+// clang-format off
+typedef enum
+{
+ TRACE_SEQ = 0x00000001, /*!< sequence params dump enable */
+ TRACE_PIC = 0x00000002, /*!< picparams dump enable */
+ TRACE_SLICE = 0x00000004, /*!< slice params dump enable */
+ TRACE_MB_PARAMS = 0x00000008, /*!< mb level decoded dump enable */
+ TRACE_MB_INF_PARAMS = 0x00000010, /*!< mb level inferred dumping enable */
+ TRACE_ECD_DATA = 0x00000020, /*!< ECD data dump */
+ TRACE_LYR_COEFF_LEVEL = 0x00000040, /*!< Current layer coeff levels */
+ TRACE_ACC_COEFF_LEVEL = 0x00000080, /*!< Accumulated coffs/level */
+ TRACE_ACC_COEFFS = 0x00000100, /*!< Accumulated coeffs */
+ TRACE_LYR_DIFF_SIG = 0x00000200, /*!< layer level differential signal */
+ TRACE_LYR_IP_SIG = 0x00000400, /*!< layer level Intra pred signal */
+ TRACE_INTRA_UPSMPL_SIG= 0x00000800, /*!< Intra upsampled data */
+ TRACE_RES_UPSMPL_SIG = 0x00001000, /*!< Residual upsampled data */
+ TRACE_BS_INFO = 0x00002000, /*!< BS information */
+ TRACE_RES_CGE_MV = 0x00100000, /*!< Res change Motion vectors */
+ TRACE_RES_CGE_MODE = 0x00200000, /*!< Res change MB modes */
+ TRACE_RES_CGE_DATA = 0x00400000, /*!< Res change data */
+ TRACE_TGT_MC_PRED = 0x00800000, /*!< moiton comp pred sugnal dump enable */
+ TRACE_TGT_LYR_DEBLK = 0x08000000, /*!< Input to target layer deblocking */
+ TRACE_REF_BASE_DEBLK = 0x10000000, /*!< deblocked data dumping enable */
+ TRACE_ALL = 0xFFFFFFFF /*!< all params dumping enable */
+}TRACE_PREFIX_T;
+// clang-format on
+
+/*****************************************************************************/
+/* Globals */
+/*****************************************************************************/
+extern const char *g_api1_prefix_name[32];
+
+/* Dummy macros when trace is disabled */
+#define ENTROPY_TRACE(syntax_string, value)
+
+#define AEV_TRACE(string, value, range)
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+WORD32 ihevce_trace(UWORD32 u4_prefix);
+
+#endif //_IHEVCE_TRACE_H_
diff --git a/encoder/ihevce_tu_tree_selector.c b/encoder/ihevce_tu_tree_selector.c
new file mode 100644
index 0000000..c5f5cb4
--- /dev/null
+++ b/encoder/ihevce_tu_tree_selector.c
@@ -0,0 +1,2665 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+******************************************************************************
+* \file ihevce_tu_tree_selector.c
+*
+* \brief
+* Functions that facilitate selection of optimal TU tree
+*
+* \date
+* 20/04/2016
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdarg.h>
+#include <math.h>
+#include <limits.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+
+#include "rc_cntrl_param.h"
+#include "rc_frame_info_collector.h"
+#include "rc_look_ahead_params.h"
+
+#include "ihevc_defs.h"
+#include "ihevc_structs.h"
+#include "ihevc_platform_macros.h"
+#include "ihevc_deblk.h"
+#include "ihevc_itrans_recon.h"
+#include "ihevc_chroma_itrans_recon.h"
+#include "ihevc_chroma_intra_pred.h"
+#include "ihevc_intra_pred.h"
+#include "ihevc_inter_pred.h"
+#include "ihevc_mem_fns.h"
+#include "ihevc_padding.h"
+#include "ihevc_weighted_pred.h"
+#include "ihevc_sao.h"
+#include "ihevc_resi_trans.h"
+#include "ihevc_quant_iquant_ssd.h"
+#include "ihevc_cabac_tables.h"
+
+#include "ihevce_defs.h"
+#include "ihevce_lap_enc_structs.h"
+#include "ihevce_multi_thrd_structs.h"
+#include "ihevce_me_common_defs.h"
+#include "ihevce_had_satd.h"
+#include "ihevce_error_codes.h"
+#include "ihevce_bitstream.h"
+#include "ihevce_cabac.h"
+#include "ihevce_rdoq_macros.h"
+#include "ihevce_function_selector.h"
+#include "ihevce_enc_structs.h"
+#include "ihevce_entropy_structs.h"
+#include "ihevce_cmn_utils_instr_set_router.h"
+#include "ihevce_enc_loop_structs.h"
+#include "ihevce_enc_loop_utils.h"
+#include "ihevce_tu_tree_selector.h"
+
+/*****************************************************************************/
+/* Function Definitions */
+/*****************************************************************************/
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_tu_tree_coverage_in_cu \endif
+*
+* \brief
+* Determination of the area within the CU that is swept by the TU tree.
+* Input : Pointer to a node of the TU tree
+* Output : Area covered by the current TU or its children
+*
+*****************************************************************************
+*/
+WORD32 ihevce_tu_tree_coverage_in_cu(tu_tree_node_t *ps_node)
+{
+ WORD32 i4_tu_tree_area = 0;
+
+ if(ps_node->u1_is_valid_node)
+ {
+ i4_tu_tree_area += ps_node->s_luma_data.u1_size * ps_node->s_luma_data.u1_size;
+ }
+ else
+ {
+ if(NULL != ps_node->ps_child_node_tl)
+ {
+ i4_tu_tree_area += ihevce_tu_tree_coverage_in_cu(ps_node->ps_child_node_tl);
+ }
+
+ if(NULL != ps_node->ps_child_node_tr)
+ {
+ i4_tu_tree_area += ihevce_tu_tree_coverage_in_cu(ps_node->ps_child_node_tr);
+ }
+
+ if(NULL != ps_node->ps_child_node_bl)
+ {
+ i4_tu_tree_area += ihevce_tu_tree_coverage_in_cu(ps_node->ps_child_node_bl);
+ }
+
+ if(NULL != ps_node->ps_child_node_br)
+ {
+ i4_tu_tree_area += ihevce_tu_tree_coverage_in_cu(ps_node->ps_child_node_br);
+ }
+ }
+
+ return i4_tu_tree_area;
+}
+
+static void ihevce_tu_node_data_init(
+ tu_node_data_t *ps_tu_data, UWORD8 u1_size, UWORD8 u1_posx, UWORD8 u1_posy)
+{
+ ps_tu_data->u1_size = u1_size;
+ ps_tu_data->i8_ssd = 0;
+ ps_tu_data->i8_cost = 0;
+#if ENABLE_INTER_ZCU_COST
+ ps_tu_data->i8_not_coded_cost = 0;
+#endif
+ ps_tu_data->u4_sad = 0;
+ ps_tu_data->i4_bits = 0;
+ ps_tu_data->i4_num_bytes_used_for_ecd = 0;
+ ps_tu_data->u1_cbf = 0;
+ ps_tu_data->u1_reconBufId = UCHAR_MAX;
+ ps_tu_data->u1_posx = u1_posx;
+ ps_tu_data->u1_posy = u1_posy;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_tu_node_init \endif
+*
+* \brief
+* This function initialises all nodes of the TU tree from the root upto and
+* including the nodes at the max tree depth. Only those nodes that lie
+* within the (max + 1) and (min - 1) depths are set as valid. Everything
+* else is invalid. The pointers to the children nodes of the leaf-most
+* nodes in the tree are assigned NULL.
+* Input : Pointer to root of the tree containing TU info.
+* Output : The memory of this node and all its progeny shall be modified
+* returns Number of nodes of the TU tree that have been modified
+*
+*****************************************************************************
+*/
+static UWORD16 ihevce_tu_node_init(
+ tu_tree_node_t *ps_root,
+ UWORD8 u1_size,
+ UWORD8 u1_parent_posx,
+ UWORD8 u1_parent_posy,
+ UWORD8 u1_cur_depth,
+ UWORD8 u1_min_tree_depth,
+ UWORD8 u1_max_tree_depth,
+ UWORD8 u1_chroma_processing_enabled,
+ UWORD8 u1_is_422,
+ TU_POS_T e_tu_pos)
+{
+ tu_tree_node_t *ps_node;
+ tu_tree_node_t *ps_childNodeTL;
+ tu_tree_node_t *ps_childNodeTR;
+ tu_tree_node_t *ps_childNodeBL;
+ tu_tree_node_t *ps_childNodeBR;
+
+ UWORD8 u1_start_index_for_parent = 0;
+ UWORD8 u1_start_index_for_child = 0;
+ UWORD16 u2_parent_offset = 0;
+ UWORD16 u2_child_offset = 0;
+ UWORD8 u1_posx = 0;
+ UWORD8 u1_posy = 0;
+
+ const UWORD8 u1_nxn_tu_node_start_index = 0;
+ const UWORD8 u1_nBye2xnBye2_tu_node_start_index = 1;
+ const UWORD8 u1_nBye4xnBye4_tu_node_start_index = 1 + 4;
+ const UWORD8 u1_nBye8xnBye8_tu_node_start_index = 1 + 4 + 16;
+ const UWORD8 u1_nBye16xnBye16_tu_node_start_index = 1 + 4 + 16 + 64;
+ UWORD16 u2_num_nodes_initialised = 0;
+
+ ASSERT(u1_cur_depth <= u1_max_tree_depth);
+ ASSERT(u1_max_tree_depth >= u1_min_tree_depth);
+
+ switch(e_tu_pos)
+ {
+ case POS_TL:
+ {
+ u1_posx = u1_parent_posx;
+ u1_posy = u1_parent_posy;
+
+ break;
+ }
+ case POS_TR:
+ {
+ u1_posx = u1_parent_posx + u1_size;
+ u1_posy = u1_parent_posy;
+
+ break;
+ }
+ case POS_BL:
+ {
+ u1_posx = u1_parent_posx;
+ u1_posy = u1_parent_posy + u1_size;
+
+ break;
+ }
+ case POS_BR:
+ {
+ u1_posx = u1_parent_posx + u1_size;
+ u1_posy = u1_parent_posy + u1_size;
+
+ break;
+ }
+ default:
+ {
+ /* Here be dragons */
+ ASSERT(0);
+ }
+ }
+
+ switch(u1_cur_depth)
+ {
+ case 0:
+ {
+ u1_start_index_for_parent = u1_nxn_tu_node_start_index;
+ u1_start_index_for_child = u1_nBye2xnBye2_tu_node_start_index;
+
+ u2_parent_offset = 0;
+ u2_child_offset = 0;
+
+ break;
+ }
+ case 1:
+ {
+ u1_start_index_for_parent = u1_nBye2xnBye2_tu_node_start_index;
+ u1_start_index_for_child = u1_nBye4xnBye4_tu_node_start_index;
+
+ u2_parent_offset = e_tu_pos;
+ u2_child_offset = 4 * u1_posx / u1_size + 8 * u1_posy / u1_size;
+
+ break;
+ }
+ case 2:
+ {
+ u1_start_index_for_parent = u1_nBye4xnBye4_tu_node_start_index;
+ u1_start_index_for_child = u1_nBye8xnBye8_tu_node_start_index;
+
+ u2_parent_offset = 2 * u1_parent_posx / u1_size + 4 * u1_parent_posy / u1_size + e_tu_pos;
+ u2_child_offset = 4 * u1_posx / u1_size + 16 * u1_posy / u1_size;
+
+ break;
+ }
+ case 3:
+ {
+ u1_start_index_for_parent = u1_nBye8xnBye8_tu_node_start_index;
+ u1_start_index_for_child = u1_nBye16xnBye16_tu_node_start_index;
+
+ u2_parent_offset = 2 * u1_parent_posx / u1_size + 8 * u1_parent_posy / u1_size + e_tu_pos;
+ u2_child_offset = 4 * u1_posx / u1_size + 32 * u1_posy / u1_size;
+
+ break;
+ }
+ case 4:
+ {
+ u1_start_index_for_parent = u1_nBye16xnBye16_tu_node_start_index;
+ u1_start_index_for_child = 0;
+
+ u2_parent_offset = 2 * u1_parent_posx / u1_size + 16 * u1_parent_posy / u1_size + e_tu_pos;
+ u2_child_offset = 0;
+
+ break;
+ }
+ default:
+ {
+ /* Here be dragons */
+ ASSERT(0);
+ }
+ }
+
+ ASSERT((u1_start_index_for_parent + u2_parent_offset) < (256 + 64 + 16 + 4 + 1));
+ ASSERT((u1_start_index_for_child + u2_child_offset + POS_BR) < (256 + 64 + 16 + 4 + 1));
+
+ ps_node = ps_root + u1_start_index_for_parent + u2_parent_offset;
+ ps_childNodeTL = ps_root + u1_start_index_for_child + u2_child_offset + POS_TL;
+ ps_childNodeTR = ps_root + u1_start_index_for_child + u2_child_offset + POS_TR;
+ ps_childNodeBL = ps_root + u1_start_index_for_child + u2_child_offset + POS_BL;
+ ps_childNodeBR = ps_root + u1_start_index_for_child + u2_child_offset + POS_BR;
+
+ ihevce_tu_node_data_init(&ps_node->s_luma_data, u1_size, u1_posx, u1_posy);
+
+ if(u1_chroma_processing_enabled)
+ {
+ UWORD8 i;
+
+ if(u1_size > 4)
+ {
+ for(i = 0; i < (u1_is_422 + 1); i++)
+ {
+ ihevce_tu_node_data_init(
+ &ps_node->as_cb_data[i],
+ u1_size / 2,
+ u1_posx / 2,
+ !u1_is_422 ? u1_posy / 2 : u1_posy + i * u1_size / 2);
+
+ ihevce_tu_node_data_init(
+ &ps_node->as_cr_data[i],
+ u1_size / 2,
+ u1_posx / 2,
+ !u1_is_422 ? u1_posy / 2 : u1_posy + i * u1_size / 2);
+ }
+ }
+ else if(POS_TL == e_tu_pos)
+ {
+ for(i = 0; i < (u1_is_422 + 1); i++)
+ {
+ ihevce_tu_node_data_init(
+ &ps_node->as_cb_data[i],
+ u1_size,
+ u1_posx / 2,
+ !u1_is_422 ? u1_posy / 2 : u1_posy + i * u1_size);
+
+ ihevce_tu_node_data_init(
+ &ps_node->as_cr_data[i],
+ u1_size,
+ u1_posx / 2,
+ !u1_is_422 ? u1_posy / 2 : u1_posy + i * u1_size);
+ }
+ }
+ else
+ {
+ for(i = 0; i < (u1_is_422 + 1); i++)
+ {
+ ihevce_tu_node_data_init(
+ &ps_node->as_cb_data[i],
+ u1_size / 2,
+ u1_posx / 2,
+ !u1_is_422 ? u1_posy / 2 : u1_posy + i * u1_size);
+
+ ihevce_tu_node_data_init(
+ &ps_node->as_cr_data[i],
+ u1_size / 2,
+ u1_posx / 2,
+ !u1_is_422 ? u1_posy / 2 : u1_posy + i * u1_size);
+ }
+ }
+ }
+
+ if((u1_cur_depth >= u1_min_tree_depth) && (u1_cur_depth <= u1_max_tree_depth))
+ {
+ ps_node->u1_is_valid_node = 1;
+ }
+ else
+ {
+ ps_node->u1_is_valid_node = 0;
+ }
+
+ u2_num_nodes_initialised++;
+
+ if((u1_cur_depth < u1_max_tree_depth) && (u1_size > MIN_TU_SIZE))
+ {
+ ps_node->ps_child_node_tl = ps_childNodeTL;
+ ps_node->ps_child_node_tr = ps_childNodeTR;
+ ps_node->ps_child_node_bl = ps_childNodeBL;
+ ps_node->ps_child_node_br = ps_childNodeBR;
+
+ u2_num_nodes_initialised += ihevce_tu_node_init(
+ ps_root,
+ u1_size / 2,
+ ps_node->s_luma_data.u1_posx,
+ ps_node->s_luma_data.u1_posy,
+ u1_cur_depth + 1,
+ u1_min_tree_depth,
+ u1_max_tree_depth,
+ u1_chroma_processing_enabled,
+ u1_is_422,
+ POS_TL);
+
+ u2_num_nodes_initialised += ihevce_tu_node_init(
+ ps_root,
+ u1_size / 2,
+ ps_node->s_luma_data.u1_posx,
+ ps_node->s_luma_data.u1_posy,
+ u1_cur_depth + 1,
+ u1_min_tree_depth,
+ u1_max_tree_depth,
+ u1_chroma_processing_enabled,
+ u1_is_422,
+ POS_TR);
+
+ u2_num_nodes_initialised += ihevce_tu_node_init(
+ ps_root,
+ u1_size / 2,
+ ps_node->s_luma_data.u1_posx,
+ ps_node->s_luma_data.u1_posy,
+ u1_cur_depth + 1,
+ u1_min_tree_depth,
+ u1_max_tree_depth,
+ u1_chroma_processing_enabled,
+ u1_is_422,
+ POS_BL);
+
+ u2_num_nodes_initialised += ihevce_tu_node_init(
+ ps_root,
+ u1_size / 2,
+ ps_node->s_luma_data.u1_posx,
+ ps_node->s_luma_data.u1_posy,
+ u1_cur_depth + 1,
+ u1_min_tree_depth,
+ u1_max_tree_depth,
+ u1_chroma_processing_enabled,
+ u1_is_422,
+ POS_BR);
+ }
+ else
+ {
+ ps_node->ps_child_node_tl = NULL;
+ ps_node->ps_child_node_tr = NULL;
+ ps_node->ps_child_node_bl = NULL;
+ ps_node->ps_child_node_br = NULL;
+ }
+
+ return u2_num_nodes_initialised;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_tu_tree_init \endif
+*
+* \brief
+* Initialises all relevant data within all nodes for a specified TU tree
+* Input : Pointer to root of the tree containing TU info.
+* Output : Returns the number of nodes initialised
+*
+*****************************************************************************
+*/
+UWORD16 ihevce_tu_tree_init(
+ tu_tree_node_t *ps_root,
+ UWORD8 u1_cu_size,
+ UWORD8 u1_min_tree_depth,
+ UWORD8 u1_max_tree_depth,
+ UWORD8 u1_chroma_processing_enabled,
+ UWORD8 u1_is_422)
+{
+ UWORD16 u2_num_nodes = 0;
+
+ ASSERT(u1_max_tree_depth >= u1_min_tree_depth);
+
+ u2_num_nodes += ihevce_tu_node_init(
+ ps_root,
+ u1_cu_size,
+ 0,
+ 0,
+ 0,
+ u1_min_tree_depth,
+ u1_max_tree_depth,
+ u1_chroma_processing_enabled,
+ u1_is_422,
+ POS_TL);
+
+ return u2_num_nodes;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_cabac_bins2Bits_converter_and_state_updater \endif
+*
+* \brief
+* cabac bin to bits converter
+* Input : 1. Pointer to buffer which stores the current CABAC state. This
+* buffer shall be modified by this function. 2. Index to the cabac state
+* that corresponds to the bin. 3. bin value
+* Output : Number of bits required to encode the bin
+*
+*****************************************************************************
+*/
+static INLINE UWORD32 ihevce_cabac_bins2Bits_converter_and_state_updater(
+ UWORD8 *pu1_cabac_ctxt, UWORD8 u1_cabac_state_idx, UWORD8 u1_bin_value)
+{
+ UWORD32 u4_bits = 0;
+
+ u4_bits += gau2_ihevce_cabac_bin_to_bits[pu1_cabac_ctxt[u1_cabac_state_idx] ^ u1_bin_value];
+ pu1_cabac_ctxt[u1_cabac_state_idx] =
+ gau1_ihevc_next_state[(pu1_cabac_ctxt[u1_cabac_state_idx] << 1) | u1_bin_value];
+
+ return u4_bits;
+}
+
+static tu_tree_node_t *
+ ihevce_tu_node_parent_finder(tu_tree_node_t *ps_root, tu_tree_node_t *ps_leaf)
+{
+ UWORD8 u1_depth_of_leaf;
+
+ GETRANGE(u1_depth_of_leaf, ps_root->s_luma_data.u1_size / ps_leaf->s_luma_data.u1_size);
+ u1_depth_of_leaf--;
+
+ if(0 == u1_depth_of_leaf)
+ {
+ return NULL;
+ }
+ else if(1 == u1_depth_of_leaf)
+ {
+ return ps_root;
+ }
+ else
+ {
+ UWORD8 u1_switch_conditional =
+ (ps_leaf->s_luma_data.u1_posx >= ps_root->ps_child_node_tl->s_luma_data.u1_size) +
+ (ps_leaf->s_luma_data.u1_posy >= ps_root->ps_child_node_tl->s_luma_data.u1_size) * 2;
+
+ ASSERT(NULL != ps_root->ps_child_node_tl);
+ ASSERT(NULL != ps_root->ps_child_node_tr);
+ ASSERT(NULL != ps_root->ps_child_node_bl);
+ ASSERT(NULL != ps_root->ps_child_node_br);
+
+ switch(u1_switch_conditional)
+ {
+ case 0:
+ {
+ return ihevce_tu_node_parent_finder(ps_root->ps_child_node_tl, ps_leaf);
+ }
+ case 1:
+ {
+ return ihevce_tu_node_parent_finder(ps_root->ps_child_node_tr, ps_leaf);
+ }
+ case 2:
+ {
+ return ihevce_tu_node_parent_finder(ps_root->ps_child_node_bl, ps_leaf);
+ }
+ case 3:
+ {
+ return ihevce_tu_node_parent_finder(ps_root->ps_child_node_br, ps_leaf);
+ }
+ }
+ }
+
+ return NULL;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_compute_bits_for_TUSplit_and_cbf \endif
+*
+* \notes
+* 1. This function ought to be called before the call to 'ihevce_tu_tree_selector'
+* of children TU's in order to determine bits to encode splitFlag as 1.
+* This should also be called at the end of 'ihevce_tu_processor' in order
+* to determine bits required to encode cbf and splitFlag.
+* 2. When 'ENABLE_TOP_DOWN_TU_RECURSION' = 0 and 'INCLUDE_CHROMA_DURING_TU_RECURSION' = 1,
+* it shall be assumed that parent chroma cbf is 1.
+* 3. When 'INCLUDE_CHROMA_DURING_TU_RECURSION' = 0, this function works as
+* though no chroma related syntax was included in the HEVC syntax for coding
+* the transform tree
+* Input : 1. ps_root: Pointer to root of the tree containing TU info
+* 2. ps_leaf: Pointer to current node of the TU tree
+* 3. pu1_cabac_ctxt: Pointer to buffer which stores the current CABAC
+* state. This buffer shall be modified by this function
+* Output : Number of bits required to encode cbf and splitFlags
+*
+*****************************************************************************
+*/
+static WORD32 ihevce_compute_bits_for_TUSplit_and_cbf(
+ tu_tree_node_t *ps_root,
+ tu_tree_node_t *ps_leaf,
+ UWORD8 *pu1_cabac_ctxt,
+ UWORD8 u1_max_tu_size,
+ UWORD8 u1_min_tu_size,
+ UWORD8 u1_cur_depth,
+ UWORD8 u1_max_depth,
+ UWORD8 u1_is_intra,
+ UWORD8 u1_is_intra_nxn_pu,
+ UWORD8 u1_chroma_processing_enabled,
+ UWORD8 u1_is_422)
+{
+ UWORD8 u1_cabac_state_idx;
+ UWORD8 u1_log2_tu_size;
+
+ UWORD32 u4_num_bits = 0;
+ UWORD8 u1_tu_size = ps_leaf->s_luma_data.u1_size;
+
+ ASSERT(u1_min_tu_size >= MIN_TU_SIZE);
+ ASSERT(u1_min_tu_size <= u1_max_tu_size);
+ ASSERT(u1_max_tu_size <= MAX_TU_SIZE);
+ ASSERT(u1_tu_size >= MIN_TU_SIZE);
+ ASSERT(u1_tu_size <= MAX_TU_SIZE);
+ ASSERT(u1_cur_depth <= u1_max_depth);
+
+ GETRANGE(u1_log2_tu_size, u1_tu_size);
+
+ if((ps_root->s_luma_data.u1_size >> u1_cur_depth) == u1_tu_size)
+ {
+ if((u1_tu_size <= u1_max_tu_size) && (u1_tu_size > u1_min_tu_size) &&
+ (u1_cur_depth < u1_max_depth) && !(u1_is_intra_nxn_pu && !u1_cur_depth))
+ {
+ u1_cabac_state_idx = IHEVC_CAB_SPLIT_TFM + (5 - u1_log2_tu_size);
+ u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater(
+ pu1_cabac_ctxt, u1_cabac_state_idx, 0);
+ }
+
+ if(u1_chroma_processing_enabled && (u1_tu_size > 4))
+ {
+ tu_tree_node_t *ps_parent = ihevce_tu_node_parent_finder(ps_root, ps_leaf);
+
+ u1_cabac_state_idx = IHEVC_CAB_CBCR_IDX + u1_cur_depth;
+
+ if(!u1_cur_depth || ps_parent->as_cb_data[0].u1_cbf || ps_parent->as_cb_data[1].u1_cbf)
+ {
+ if(u1_is_422)
+ {
+ u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater(
+ pu1_cabac_ctxt, u1_cabac_state_idx, ps_leaf->as_cb_data[0].u1_cbf);
+
+ u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater(
+ pu1_cabac_ctxt, u1_cabac_state_idx, ps_leaf->as_cb_data[1].u1_cbf);
+ }
+ else
+ {
+ u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater(
+ pu1_cabac_ctxt, u1_cabac_state_idx, ps_leaf->as_cb_data[0].u1_cbf);
+ }
+ }
+
+ if(!u1_cur_depth || ps_parent->as_cr_data[0].u1_cbf || ps_parent->as_cr_data[1].u1_cbf)
+ {
+ if(u1_is_422)
+ {
+ u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater(
+ pu1_cabac_ctxt, u1_cabac_state_idx, ps_leaf->as_cr_data[0].u1_cbf);
+
+ u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater(
+ pu1_cabac_ctxt, u1_cabac_state_idx, ps_leaf->as_cr_data[1].u1_cbf);
+ }
+ else
+ {
+ u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater(
+ pu1_cabac_ctxt, u1_cabac_state_idx, ps_leaf->as_cr_data[0].u1_cbf);
+ }
+ }
+ }
+
+ if(u1_is_intra || u1_cur_depth)
+ {
+ u1_cabac_state_idx = IHEVC_CAB_CBF_LUMA_IDX + !u1_cur_depth;
+ u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater(
+ pu1_cabac_ctxt, u1_cabac_state_idx, ps_leaf->s_luma_data.u1_cbf);
+ }
+ }
+ else
+ {
+ if((u1_tu_size <= u1_max_tu_size) && (u1_tu_size > u1_min_tu_size) &&
+ (u1_cur_depth < u1_max_depth) && !(u1_is_intra_nxn_pu && !u1_cur_depth))
+ {
+ u1_cabac_state_idx = IHEVC_CAB_SPLIT_TFM + (5 - u1_log2_tu_size);
+ u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater(
+ pu1_cabac_ctxt, u1_cabac_state_idx, 1);
+ }
+
+ if(u1_chroma_processing_enabled && (u1_tu_size > 4))
+ {
+ tu_tree_node_t *ps_parent = ihevce_tu_node_parent_finder(ps_root, ps_leaf);
+
+ u1_cabac_state_idx = IHEVC_CAB_CBCR_IDX + u1_cur_depth;
+
+ if(!u1_cur_depth || ps_parent->as_cb_data[0].u1_cbf || ps_parent->as_cb_data[1].u1_cbf)
+ {
+ if(u1_is_422 && (8 == u1_tu_size))
+ {
+ u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater(
+ pu1_cabac_ctxt, u1_cabac_state_idx, ps_leaf->as_cb_data[0].u1_cbf);
+
+ u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater(
+ pu1_cabac_ctxt, u1_cabac_state_idx, ps_leaf->as_cb_data[1].u1_cbf);
+ }
+ else
+ {
+ u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater(
+ pu1_cabac_ctxt,
+ u1_cabac_state_idx,
+ ps_leaf->as_cb_data[0].u1_cbf || ps_leaf->as_cb_data[1].u1_cbf);
+ }
+ }
+
+ if(!u1_cur_depth || ps_parent->as_cr_data[0].u1_cbf || ps_parent->as_cr_data[1].u1_cbf)
+ {
+ if(u1_is_422 && (8 == u1_tu_size))
+ {
+ u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater(
+ pu1_cabac_ctxt, u1_cabac_state_idx, ps_leaf->as_cr_data[0].u1_cbf);
+
+ u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater(
+ pu1_cabac_ctxt, u1_cabac_state_idx, ps_leaf->as_cr_data[1].u1_cbf);
+ }
+ else
+ {
+ u4_num_bits += ihevce_cabac_bins2Bits_converter_and_state_updater(
+ pu1_cabac_ctxt,
+ u1_cabac_state_idx,
+ ps_leaf->as_cr_data[0].u1_cbf || ps_leaf->as_cr_data[1].u1_cbf);
+ }
+ }
+ }
+ }
+
+ return u4_num_bits;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_tu_processor \endif
+*
+* \notes
+* Input : 1. ps_ctxt: Pointer to enc-loop's context. Parts of this structure
+* shall be modified by this function. They include, au1_cu_csbf,
+* i8_cu_not_coded_cost, ai2_scratch and s_rdoq_sbh_ctxt
+* 2. ps_node: Pointer to current node of the TU tree. This struct
+* shall be modified by this function
+* 3. pv_src: Pointer to buffer which stores the source
+* 4. pv_pred: Pointer to buffer which stores the pred
+* 5. pv_recon: Pointer to buffer which stores the recon
+* This buffer shall be modified by this function
+* 6. pi2_deq_data: Pointer to buffer which stores the output of IQ.
+* This buffer shall be modified by this function
+* 7. pu1_ecd: Pointer to buffer which stores the data output by
+* entropy coding. This buffer shall be modified by this function
+* 8. pu1_cabac_ctxt: Pointer to buffer which stores the current CABAC
+* state. This buffer shall be modified by this function
+* Output : NA
+*
+*****************************************************************************
+*/
+static void ihevce_tu_processor(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ tu_tree_node_t *ps_node,
+ buffer_data_for_tu_t *ps_buffer_data,
+ UWORD8 *pu1_cabac_ctxt,
+ WORD32 i4_pred_mode,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ WORD32 i4_alpha_stim_multiplier,
+ UWORD8 u1_is_cu_noisy,
+#endif
+ UWORD8 u1_chroma_processing_enabled,
+ UWORD8 u1_compute_spatial_ssd)
+{
+ UWORD8 u1_is_recon_available;
+
+ void *pv_src = ps_buffer_data->s_src_pred_rec_buf_luma.pv_src;
+ void *pv_pred = ps_buffer_data->s_src_pred_rec_buf_luma.pv_pred;
+ void *pv_recon = ps_buffer_data->s_src_pred_rec_buf_luma.pv_recon;
+ WORD16 *pi2_deq_data = ps_buffer_data->pi2_deq_data;
+ UWORD8 *pu1_ecd = ps_buffer_data->ppu1_ecd[0];
+ WORD32 i4_src_stride = ps_buffer_data->s_src_pred_rec_buf_luma.i4_src_stride;
+ WORD32 i4_pred_stride = ps_buffer_data->s_src_pred_rec_buf_luma.i4_pred_stride;
+ WORD32 i4_recon_stride = ps_buffer_data->s_src_pred_rec_buf_luma.i4_recon_stride;
+ WORD32 i4_deq_data_stride = ps_buffer_data->i4_deq_data_stride;
+ UWORD8 u1_size = ps_node->s_luma_data.u1_size;
+ UWORD8 u1_posx = ps_node->s_luma_data.u1_posx;
+ UWORD8 u1_posy = ps_node->s_luma_data.u1_posy;
+ WORD32 trans_size = (64 == u1_size) ? 32 : u1_size;
+ UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
+
+ (void)pu1_cabac_ctxt;
+ {
+ pv_src = ((UWORD8 *)pv_src) + u1_posx + u1_posy * i4_src_stride;
+ pv_pred = ((UWORD8 *)pv_pred) + u1_posx + u1_posy * i4_pred_stride;
+ pv_recon = ((UWORD8 *)pv_recon) + u1_posx + u1_posy * i4_recon_stride;
+ }
+
+ pi2_deq_data += u1_posx + u1_posy * i4_deq_data_stride;
+
+ /*2 Multi- dimensinal array based on trans size of rounding factor to be added here */
+ /* arrays are for rounding factor corr. to 0-1 decision and 1-2 decision */
+ /* Currently the complete array will contain only single value*/
+ /*The rounding factor is calculated with the formula
+ Deadzone val = (((R1 - R0) * (2^(-8/3)) * lamMod) + 1)/2
+ rounding factor = (1 - DeadZone Val)
+
+ Assumption: Cabac states of All the sub-blocks in the TU are considered independent
+ */
+ if((ps_ctxt->i4_quant_rounding_level == TU_LEVEL_QUANT_ROUNDING) &&
+ (ps_node->s_luma_data.u1_posx || ps_node->s_luma_data.u1_posy))
+ {
+ double i4_lamda_modifier;
+
+ if((BSLICE == ps_ctxt->i1_slice_type) && (ps_ctxt->i4_temporal_layer_id))
+ {
+ i4_lamda_modifier = ps_ctxt->i4_lamda_modifier *
+ CLIP3((((double)(ps_ctxt->i4_cu_qp - 12)) / 6.0), 2.00, 4.00);
+ }
+ else
+ {
+ i4_lamda_modifier = ps_ctxt->i4_lamda_modifier;
+ }
+ if(ps_ctxt->i4_use_const_lamda_modifier)
+ {
+ if(ISLICE == ps_ctxt->i1_slice_type)
+ {
+ i4_lamda_modifier = ps_ctxt->f_i_pic_lamda_modifier;
+ }
+ else
+ {
+ i4_lamda_modifier = CONST_LAMDA_MOD_VAL;
+ }
+ }
+ ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] = &ps_ctxt->i4_quant_round_tu[0][0];
+ ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] = &ps_ctxt->i4_quant_round_tu[1][0];
+
+ memset(
+ ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
+ 0,
+ trans_size * trans_size * sizeof(WORD32));
+ memset(
+ ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
+ 0,
+ trans_size * trans_size * sizeof(WORD32));
+
+ ihevce_quant_rounding_factor_gen(
+ trans_size,
+ 1,
+ &ps_ctxt->s_rdopt_entropy_ctxt,
+ ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3],
+ ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3],
+ i4_lamda_modifier,
+ 1);
+ }
+ else
+ {
+ ps_ctxt->pi4_quant_round_factor_tu_0_1[trans_size >> 3] =
+ ps_ctxt->pi4_quant_round_factor_cu_ctb_0_1[trans_size >> 3];
+ ps_ctxt->pi4_quant_round_factor_tu_1_2[trans_size >> 3] =
+ ps_ctxt->pi4_quant_round_factor_cu_ctb_1_2[trans_size >> 3];
+ }
+
+#if ENABLE_INTER_ZCU_COST
+ ps_ctxt->i8_cu_not_coded_cost = 0;
+#endif
+
+ {
+ ps_node->s_luma_data.u1_cbf = ihevce_t_q_iq_ssd_scan_fxn(
+ ps_ctxt,
+ (UWORD8 *)pv_pred,
+ i4_pred_stride,
+ (UWORD8 *)pv_src,
+ i4_src_stride,
+ pi2_deq_data,
+ i4_deq_data_stride,
+ (UWORD8 *)pv_recon,
+ i4_recon_stride,
+ pu1_ecd,
+ ps_ctxt->au1_cu_csbf,
+ ps_ctxt->i4_cu_csbf_strd,
+ u1_size,
+ i4_pred_mode,
+ &ps_node->s_luma_data.i8_ssd,
+ &ps_node->s_luma_data.i4_num_bytes_used_for_ecd,
+ &ps_node->s_luma_data.i4_bits,
+ &ps_node->s_luma_data.u4_sad,
+ &ps_node->s_luma_data.i4_zero_col,
+ &ps_node->s_luma_data.i4_zero_row,
+ &u1_is_recon_available,
+ ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq,
+ ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ i4_alpha_stim_multiplier,
+ u1_is_cu_noisy,
+#endif
+ u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
+ 1);
+ }
+
+#if ENABLE_INTER_ZCU_COST
+ ps_node->s_luma_data.i8_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost;
+#endif
+
+ if(u1_compute_spatial_ssd && u1_is_recon_available)
+ {
+ ps_node->s_luma_data.u1_reconBufId = 0;
+ }
+ else
+ {
+ ps_node->s_luma_data.u1_reconBufId = UCHAR_MAX;
+ }
+
+ ps_node->s_luma_data.i8_cost =
+ ps_node->s_luma_data.i8_ssd +
+ COMPUTE_RATE_COST_CLIP30(
+ ps_node->s_luma_data.i4_bits, ps_ctxt->i8_cl_ssd_lambda_qf, LAMBDA_Q_SHIFT);
+
+ pu1_ecd += ps_node->s_luma_data.i4_num_bytes_used_for_ecd;
+
+ if(u1_chroma_processing_enabled &&
+ ((!(u1_posx % 8) && !(u1_posy % 8) && (4 == u1_size)) || (u1_size > 4)))
+ {
+ UWORD8 i;
+ void *pv_chroma_src;
+ void *pv_chroma_pred;
+ void *pv_chroma_recon;
+ WORD16 *pi2_deq_data_chroma;
+
+ WORD32 i4_chroma_src_stride = ps_buffer_data->s_src_pred_rec_buf_chroma.i4_src_stride;
+ WORD32 i4_chroma_pred_stride = ps_buffer_data->s_src_pred_rec_buf_chroma.i4_pred_stride;
+ WORD32 i4_chroma_recon_stride = ps_buffer_data->s_src_pred_rec_buf_chroma.i4_recon_stride;
+ WORD32 i4_deq_data_stride_chroma = ps_buffer_data->i4_deq_data_stride_chroma;
+
+ /* SubTU loop */
+ for(i = 0; i < u1_is_422 + 1; i++)
+ {
+ UWORD8 u1_chroma_size = ps_node->as_cb_data[i].u1_size;
+ UWORD8 u1_chroma_posx = ps_node->as_cb_data[i].u1_posx;
+ UWORD8 u1_chroma_posy = ps_node->as_cb_data[i].u1_posy;
+
+#if ENABLE_INTER_ZCU_COST
+ ps_ctxt->i8_cu_not_coded_cost = 0;
+#endif
+
+ pi2_deq_data_chroma = ps_buffer_data->pi2_deq_data_chroma + (u1_chroma_posx * 2) +
+ u1_chroma_posy * i4_deq_data_stride_chroma;
+
+ {
+ pv_chroma_src = ((UWORD8 *)ps_buffer_data->s_src_pred_rec_buf_chroma.pv_src) +
+ (u1_chroma_posx * 2) + u1_chroma_posy * i4_chroma_src_stride;
+ pv_chroma_pred = ((UWORD8 *)ps_buffer_data->s_src_pred_rec_buf_chroma.pv_pred) +
+ (u1_chroma_posx * 2) + u1_chroma_posy * i4_chroma_pred_stride;
+ pv_chroma_recon = ((UWORD8 *)ps_buffer_data->s_src_pred_rec_buf_chroma.pv_recon) +
+ (u1_chroma_posx * 2) + u1_chroma_posy * i4_chroma_recon_stride;
+
+ ps_node->as_cb_data[i].u1_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
+ ps_ctxt,
+ (UWORD8 *)pv_chroma_pred,
+ i4_chroma_pred_stride,
+ (UWORD8 *)pv_chroma_src,
+ i4_chroma_src_stride,
+ pi2_deq_data_chroma,
+ i4_deq_data_stride_chroma,
+ (UWORD8 *)pv_chroma_recon,
+ i4_chroma_recon_stride,
+ pu1_ecd,
+ ps_ctxt->au1_cu_csbf,
+ ps_ctxt->i4_cu_csbf_strd,
+ u1_chroma_size,
+ SCAN_DIAG_UPRIGHT,
+ 0,
+ &ps_node->as_cb_data[i].i4_num_bytes_used_for_ecd,
+ &ps_node->as_cb_data[i].i4_bits,
+ &ps_node->as_cb_data[i].i4_zero_col,
+ &ps_node->as_cb_data[i].i4_zero_row,
+ &u1_is_recon_available,
+ ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq,
+ ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh,
+ &ps_node->as_cb_data[i].i8_ssd,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ i4_alpha_stim_multiplier,
+ u1_is_cu_noisy,
+#endif
+ i4_pred_mode == PRED_MODE_SKIP,
+ u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
+ U_PLANE);
+ }
+
+#if ENABLE_INTER_ZCU_COST
+ ps_node->as_cb_data[i].i8_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost;
+#endif
+
+ if(u1_compute_spatial_ssd && u1_is_recon_available)
+ {
+ ps_node->as_cb_data[i].u1_reconBufId = 0;
+ }
+ else
+ {
+ ps_node->as_cb_data[i].u1_reconBufId = UCHAR_MAX;
+ }
+
+ ps_node->as_cb_data[i].i8_cost =
+ ps_node->as_cb_data[i].i8_ssd + COMPUTE_RATE_COST_CLIP30(
+ ps_node->as_cb_data[i].i4_bits,
+ ps_ctxt->i8_cl_ssd_lambda_chroma_qf,
+ LAMBDA_Q_SHIFT);
+
+#if WEIGH_CHROMA_COST
+ ps_node->as_cb_data[i].i8_cost =
+ (ps_node->as_cb_data[i].i8_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
+ (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
+ CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT;
+#endif
+
+ pu1_ecd += ps_node->as_cb_data[i].i4_num_bytes_used_for_ecd;
+ }
+
+ for(i = 0; i < u1_is_422 + 1; i++)
+ {
+ UWORD8 u1_chroma_size = ps_node->as_cr_data[i].u1_size;
+ UWORD8 u1_chroma_posx = ps_node->as_cr_data[i].u1_posx;
+ UWORD8 u1_chroma_posy = ps_node->as_cr_data[i].u1_posy;
+
+#if ENABLE_INTER_ZCU_COST
+ ps_ctxt->i8_cu_not_coded_cost = 0;
+#endif
+
+ pi2_deq_data_chroma = ps_buffer_data->pi2_deq_data_chroma + u1_chroma_size +
+ (u1_chroma_posx * 2) + u1_chroma_posy * i4_deq_data_stride_chroma;
+
+ {
+ pv_chroma_src = ((UWORD8 *)ps_buffer_data->s_src_pred_rec_buf_chroma.pv_src) +
+ (u1_chroma_posx * 2) + u1_chroma_posy * i4_chroma_src_stride;
+ pv_chroma_pred = ((UWORD8 *)ps_buffer_data->s_src_pred_rec_buf_chroma.pv_pred) +
+ (u1_chroma_posx * 2) + u1_chroma_posy * i4_chroma_pred_stride;
+ pv_chroma_recon = ((UWORD8 *)ps_buffer_data->s_src_pred_rec_buf_chroma.pv_recon) +
+ (u1_chroma_posx * 2) + u1_chroma_posy * i4_chroma_recon_stride;
+
+ ps_node->as_cr_data[i].u1_cbf = ihevce_chroma_t_q_iq_ssd_scan_fxn(
+ ps_ctxt,
+ (UWORD8 *)pv_chroma_pred,
+ i4_chroma_pred_stride,
+ (UWORD8 *)pv_chroma_src,
+ i4_chroma_src_stride,
+ pi2_deq_data_chroma,
+ i4_deq_data_stride_chroma,
+ (UWORD8 *)pv_chroma_recon,
+ i4_chroma_recon_stride,
+ pu1_ecd,
+ ps_ctxt->au1_cu_csbf,
+ ps_ctxt->i4_cu_csbf_strd,
+ u1_chroma_size,
+ SCAN_DIAG_UPRIGHT,
+ 0,
+ &ps_node->as_cr_data[i].i4_num_bytes_used_for_ecd,
+ &ps_node->as_cr_data[i].i4_bits,
+ &ps_node->as_cr_data[i].i4_zero_col,
+ &ps_node->as_cr_data[i].i4_zero_row,
+ &u1_is_recon_available,
+ ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_rdoq,
+ ps_ctxt->s_rdoq_sbh_ctxt.i4_perform_all_cand_sbh,
+ &ps_node->as_cr_data[i].i8_ssd,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ i4_alpha_stim_multiplier,
+ u1_is_cu_noisy,
+#endif
+ i4_pred_mode == PRED_MODE_SKIP,
+ u1_compute_spatial_ssd ? SPATIAL_DOMAIN_SSD : FREQUENCY_DOMAIN_SSD,
+ V_PLANE);
+ }
+
+#if ENABLE_INTER_ZCU_COST
+ ps_node->as_cr_data[i].i8_not_coded_cost = ps_ctxt->i8_cu_not_coded_cost;
+#endif
+
+ if(u1_compute_spatial_ssd && u1_is_recon_available)
+ {
+ ps_node->as_cr_data[i].u1_reconBufId = 0;
+ }
+ else
+ {
+ ps_node->as_cr_data[i].u1_reconBufId = UCHAR_MAX;
+ }
+
+ ps_node->as_cr_data[i].i8_cost =
+ ps_node->as_cr_data[i].i8_ssd + COMPUTE_RATE_COST_CLIP30(
+ ps_node->as_cr_data[i].i4_bits,
+ ps_ctxt->i8_cl_ssd_lambda_chroma_qf,
+ LAMBDA_Q_SHIFT);
+
+#if WEIGH_CHROMA_COST
+ ps_node->as_cr_data[i].i8_cost =
+ (ps_node->as_cr_data[i].i8_cost * ps_ctxt->u4_chroma_cost_weighing_factor +
+ (1 << (CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT - 1))) >>
+ CHROMA_COST_WEIGHING_FACTOR_Q_SHIFT;
+#endif
+
+ pu1_ecd += ps_node->as_cr_data[i].i4_num_bytes_used_for_ecd;
+ }
+ }
+}
+
+static INLINE void ihevce_nbr_data_copier(
+ nbr_4x4_t *ps_nbr_data_buf,
+ WORD32 i4_nbr_data_buf_stride,
+ WORD32 i4_cu_qp,
+ UWORD8 u1_cbf,
+ WORD32 u1_posx,
+ UWORD8 u1_posy,
+ UWORD8 u1_size)
+{
+ WORD32 i, j;
+
+ UWORD8 u1_num_4x4_in_tu = u1_size / 4;
+
+ ps_nbr_data_buf += ((u1_posx) / 4) + (u1_posy / 4) * i4_nbr_data_buf_stride;
+
+ for(i = 0; i < u1_num_4x4_in_tu; i++)
+ {
+ for(j = 0; j < u1_num_4x4_in_tu; j++)
+ {
+ ps_nbr_data_buf[j].b8_qp = i4_cu_qp;
+ ps_nbr_data_buf[j].b1_y_cbf = u1_cbf;
+ }
+
+ ps_nbr_data_buf += i4_nbr_data_buf_stride;
+ }
+}
+
+static INLINE void ihevce_debriefer_when_parent_wins(
+ tu_tree_node_t *ps_node,
+ FT_COPY_2D *pf_copy_2d,
+ FT_CHROMA_INTERLEAVE_2D_COPY *pf_chroma_interleave_2d_copy,
+ nbr_4x4_t *ps_nbr_data_buf,
+ WORD16 *pi2_deq_data_src,
+ WORD16 *pi2_deq_data_dst,
+ WORD16 *pi2_deq_data_src_chroma,
+ WORD16 *pi2_deq_data_dst_chroma,
+ void *pv_recon_src,
+ void *pv_recon_dst,
+ void *pv_recon_src_chroma,
+ void *pv_recon_dst_chroma,
+ UWORD8 *pu1_cabac_ctxt_src,
+ UWORD8 *pu1_cabac_ctxt_dst,
+ UWORD8 *pu1_ecd_src,
+ UWORD8 *pu1_ecd_dst,
+ WORD32 i4_nbr_data_buf_stride,
+ WORD32 i4_deq_data_src_stride,
+ WORD32 i4_deq_data_dst_stride,
+ WORD32 i4_deq_data_src_stride_chroma,
+ WORD32 i4_deq_data_dst_stride_chroma,
+ WORD32 i4_recon_src_stride,
+ WORD32 i4_recon_dst_stride,
+ WORD32 i4_recon_src_stride_chroma,
+ WORD32 i4_recon_dst_stride_chroma,
+ WORD32 i4_cabac_state_table_size,
+ WORD32 i4_cu_qp,
+ UWORD8 u1_chroma_processing_enabled,
+ UWORD8 u1_is_422,
+ UWORD8 u1_is_hbd)
+{
+ UWORD8 i;
+
+ UWORD32 u4_num_ecd_bytes = 0;
+
+ /* Y */
+ {
+ UWORD8 u1_posx = ps_node->s_luma_data.u1_posx;
+ UWORD8 u1_posy = ps_node->s_luma_data.u1_posy;
+ UWORD8 *pu1_deq_data_dst =
+ (UWORD8 *)(pi2_deq_data_dst + u1_posx + u1_posy * i4_deq_data_dst_stride);
+ UWORD8 *pu1_deq_data_src =
+ (UWORD8 *)(pi2_deq_data_src + u1_posx + u1_posy * i4_deq_data_src_stride);
+ UWORD8 *pu1_recon_dst;
+ UWORD8 *pu1_recon_src;
+
+ {
+ pu1_recon_dst = (((UWORD8 *)pv_recon_dst) + u1_posx + u1_posy * i4_recon_dst_stride);
+ pu1_recon_src = (((UWORD8 *)pv_recon_src) + u1_posx + u1_posy * i4_recon_src_stride);
+ }
+ u4_num_ecd_bytes += ps_node->s_luma_data.i4_num_bytes_used_for_ecd;
+
+ if(ps_node->s_luma_data.u1_reconBufId != UCHAR_MAX)
+ {
+ pf_copy_2d(
+ pu1_recon_dst,
+ i4_recon_dst_stride * (u1_is_hbd + 1),
+ pu1_recon_src,
+ i4_recon_src_stride * (u1_is_hbd + 1),
+ ps_node->s_luma_data.u1_size * (u1_is_hbd + 1),
+ ps_node->s_luma_data.u1_size);
+ }
+ else if(ps_node->s_luma_data.u1_cbf)
+ {
+ pf_copy_2d(
+ pu1_deq_data_dst,
+ i4_deq_data_dst_stride * 2,
+ pu1_deq_data_src,
+ i4_deq_data_src_stride * 2,
+ ps_node->s_luma_data.u1_size * 2,
+ ps_node->s_luma_data.u1_size);
+ }
+ }
+
+ /* Cb */
+ if(u1_chroma_processing_enabled)
+ {
+ for(i = 0; i < u1_is_422 + 1; i++)
+ {
+ UWORD8 u1_posx = ps_node->as_cb_data[i].u1_posx;
+ UWORD8 u1_posy = ps_node->as_cb_data[i].u1_posy;
+ UWORD8 *pu1_deq_data_dst =
+ (UWORD8
+ *)(pi2_deq_data_dst_chroma + (u1_posx * 2) + (u1_posy * i4_deq_data_dst_stride_chroma));
+ UWORD8 *pu1_deq_data_src =
+ (UWORD8
+ *)(pi2_deq_data_src_chroma + (u1_posx * 2) + (u1_posy * i4_deq_data_src_stride_chroma));
+ UWORD8 *pu1_recon_dst;
+ UWORD8 *pu1_recon_src;
+
+ {
+ pu1_recon_dst =
+ (((UWORD8 *)pv_recon_dst_chroma) + (u1_posx * 2) +
+ u1_posy * i4_recon_dst_stride_chroma);
+ pu1_recon_src =
+ (((UWORD8 *)pv_recon_src_chroma) + (u1_posx * 2) +
+ u1_posy * i4_recon_src_stride_chroma);
+ }
+ u4_num_ecd_bytes += ps_node->as_cb_data[i].i4_num_bytes_used_for_ecd;
+
+ if(ps_node->as_cb_data[i].u1_reconBufId != UCHAR_MAX)
+ {
+ {
+ pf_chroma_interleave_2d_copy(
+ pu1_recon_src,
+ i4_recon_src_stride_chroma * (u1_is_hbd + 1),
+ pu1_recon_dst,
+ i4_recon_dst_stride_chroma * (u1_is_hbd + 1),
+ ps_node->as_cb_data[i].u1_size * (u1_is_hbd + 1),
+ ps_node->as_cb_data[i].u1_size,
+ U_PLANE);
+ }
+ }
+ else if(ps_node->as_cb_data[i].u1_cbf)
+ {
+ pf_copy_2d(
+ pu1_deq_data_dst,
+ i4_deq_data_dst_stride_chroma * 2,
+ pu1_deq_data_src,
+ i4_deq_data_src_stride_chroma * 2,
+ ps_node->as_cb_data[i].u1_size * 2,
+ ps_node->as_cb_data[i].u1_size);
+ }
+ }
+
+ /* Cr */
+ for(i = 0; i < u1_is_422 + 1; i++)
+ {
+ UWORD8 u1_posx = ps_node->as_cr_data[i].u1_posx;
+ UWORD8 u1_posy = ps_node->as_cr_data[i].u1_posy;
+ UWORD8 *pu1_deq_data_dst =
+ (UWORD8
+ *)(pi2_deq_data_dst_chroma + ps_node->as_cr_data[i].u1_size + (u1_posx * 2) + (u1_posy * i4_deq_data_dst_stride_chroma));
+ UWORD8 *pu1_deq_data_src =
+ (UWORD8
+ *)(pi2_deq_data_src_chroma + ps_node->as_cr_data[i].u1_size + (u1_posx * 2) + (u1_posy * i4_deq_data_src_stride_chroma));
+ UWORD8 *pu1_recon_dst;
+ UWORD8 *pu1_recon_src;
+
+ {
+ pu1_recon_dst =
+ (((UWORD8 *)pv_recon_dst_chroma) + (u1_posx * 2) +
+ u1_posy * i4_recon_dst_stride_chroma);
+ pu1_recon_src =
+ (((UWORD8 *)pv_recon_src_chroma) + (u1_posx * 2) +
+ u1_posy * i4_recon_src_stride_chroma);
+ }
+ u4_num_ecd_bytes += ps_node->as_cr_data[i].i4_num_bytes_used_for_ecd;
+
+ if(ps_node->as_cr_data[i].u1_reconBufId != UCHAR_MAX)
+ {
+ {
+ pf_chroma_interleave_2d_copy(
+ pu1_recon_src,
+ i4_recon_src_stride_chroma * (u1_is_hbd + 1),
+ pu1_recon_dst,
+ i4_recon_dst_stride_chroma * (u1_is_hbd + 1),
+ ps_node->as_cr_data[i].u1_size * (u1_is_hbd + 1),
+ ps_node->as_cr_data[i].u1_size,
+ V_PLANE);
+ }
+ }
+ else if(ps_node->as_cr_data[i].u1_cbf)
+ {
+ pf_copy_2d(
+ pu1_deq_data_dst,
+ i4_deq_data_dst_stride_chroma * 2,
+ pu1_deq_data_src,
+ i4_deq_data_src_stride_chroma * 2,
+ ps_node->as_cr_data[i].u1_size * 2,
+ ps_node->as_cr_data[i].u1_size);
+ }
+ }
+ }
+
+ if(pu1_ecd_dst != pu1_ecd_src)
+ {
+ memmove(pu1_ecd_dst, pu1_ecd_src, u4_num_ecd_bytes);
+ }
+
+ memcpy(pu1_cabac_ctxt_dst, pu1_cabac_ctxt_src, i4_cabac_state_table_size);
+
+ ihevce_nbr_data_copier(
+ ps_nbr_data_buf,
+ i4_nbr_data_buf_stride,
+ i4_cu_qp,
+ ps_node->s_luma_data.u1_cbf,
+ ps_node->s_luma_data.u1_posx,
+ ps_node->s_luma_data.u1_posy,
+ ps_node->s_luma_data.u1_size);
+
+ ps_node->ps_child_node_tl = NULL;
+ ps_node->ps_child_node_tr = NULL;
+ ps_node->ps_child_node_bl = NULL;
+ ps_node->ps_child_node_br = NULL;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_ecd_buffer_pointer_updater \endif
+*
+* \brief
+* Updates ppu1_ecd with current pointer
+* Output : Number of byte positions 'pu1_ecd_buf_ptr_at_t0' is incremented by
+*
+*****************************************************************************
+*/
+static INLINE UWORD32 ihevce_ecd_buffer_pointer_updater(
+ tu_tree_node_t *ps_node,
+ UWORD8 **ppu1_ecd,
+ UWORD8 *pu1_ecd_buf_ptr_at_t0,
+ UWORD8 u1_parent_has_won,
+ UWORD8 u1_chroma_processing_enabled,
+ UWORD8 u1_is_422)
+{
+ UWORD8 i;
+
+ UWORD32 u4_num_bytes = 0;
+
+ if(u1_parent_has_won)
+ {
+ u4_num_bytes += ps_node->s_luma_data.i4_num_bytes_used_for_ecd;
+
+ if(u1_chroma_processing_enabled)
+ {
+ for(i = 0; i < u1_is_422 + 1; i++)
+ {
+ u4_num_bytes += ps_node->as_cb_data[i].i4_num_bytes_used_for_ecd;
+ u4_num_bytes += ps_node->as_cr_data[i].i4_num_bytes_used_for_ecd;
+ }
+ }
+ }
+ else
+ {
+ u4_num_bytes += ps_node->ps_child_node_tl->s_luma_data.i4_num_bytes_used_for_ecd;
+ u4_num_bytes += ps_node->ps_child_node_tr->s_luma_data.i4_num_bytes_used_for_ecd;
+ u4_num_bytes += ps_node->ps_child_node_bl->s_luma_data.i4_num_bytes_used_for_ecd;
+ u4_num_bytes += ps_node->ps_child_node_br->s_luma_data.i4_num_bytes_used_for_ecd;
+
+ if(u1_chroma_processing_enabled)
+ {
+ for(i = 0; i < u1_is_422 + 1; i++)
+ {
+ u4_num_bytes += ps_node->ps_child_node_tl->as_cb_data[i].i4_num_bytes_used_for_ecd;
+ u4_num_bytes += ps_node->ps_child_node_tl->as_cr_data[i].i4_num_bytes_used_for_ecd;
+ u4_num_bytes += ps_node->ps_child_node_tr->as_cb_data[i].i4_num_bytes_used_for_ecd;
+ u4_num_bytes += ps_node->ps_child_node_tr->as_cr_data[i].i4_num_bytes_used_for_ecd;
+ u4_num_bytes += ps_node->ps_child_node_bl->as_cb_data[i].i4_num_bytes_used_for_ecd;
+ u4_num_bytes += ps_node->ps_child_node_bl->as_cr_data[i].i4_num_bytes_used_for_ecd;
+ u4_num_bytes += ps_node->ps_child_node_br->as_cb_data[i].i4_num_bytes_used_for_ecd;
+ u4_num_bytes += ps_node->ps_child_node_br->as_cr_data[i].i4_num_bytes_used_for_ecd;
+ }
+ }
+ }
+
+ ppu1_ecd[0] = pu1_ecd_buf_ptr_at_t0 + u4_num_bytes;
+
+ return u4_num_bytes;
+}
+
+static INLINE LWORD64 ihevce_tu_node_cost_collator(
+ tu_tree_node_t *ps_node, UWORD8 u1_chroma_processing_enabled, UWORD8 u1_is_422)
+{
+ UWORD8 i;
+
+ LWORD64 i8_cost = 0;
+
+ i8_cost += ps_node->s_luma_data.i8_cost;
+
+ if(u1_chroma_processing_enabled)
+ {
+ for(i = 0; i < u1_is_422 + 1; i++)
+ {
+ i8_cost += ps_node->as_cb_data[i].i8_cost;
+ i8_cost += ps_node->as_cr_data[i].i8_cost;
+ }
+ }
+
+ return i8_cost;
+}
+
+#if !ENABLE_TOP_DOWN_TU_RECURSION
+/*!
+******************************************************************************
+* \if Function name : ihevce_tu_processor \endif
+*
+* \notes
+* Determines RDO TU Tree using DFS. If the parent is the winner, then all
+* pointers to the children nodes are set to NULL
+* Input : 1. ps_ctxt: Pointer to enc-loop's context. Parts of this structure
+* shall be modified by this function. They include, au1_cu_csbf,
+* i8_cu_not_coded_cost, ai2_scratch, s_rdoq_sbh_ctxt,
+* pi4_quant_round_factor_tu_0_1, pi4_quant_round_factor_tu_1_2,
+* i4_quant_round_tu
+* 2. ps_node: Pointer to current node of the TU tree. This struct
+* shall be modified by this function
+* 3. pv_recon: Pointer to buffer which stores the recon
+* This buffer shall be modified by this function
+* 4. ps_nbr_data_buf: Pointer to struct used by succeeding CU's
+* during RDOPT. This buffer shall be modifie by this function
+* 6. pi2_deq_data: Pointer to buffer which stores the output of IQ.
+* This buffer shall be modified by this function
+* 7. pu1_ecd: Pointer to buffer which stores the data output by
+* entropy coding. This buffer shall be modified by this function
+* 8. pu1_cabac_ctxt: Pointer to buffer which stores the current CABAC
+* state. This buffer shall be modified by this function
+* Output : Cost of coding the current branch of the TU tree
+*
+*****************************************************************************
+*/
+LWORD64 ihevce_tu_tree_selector(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ tu_tree_node_t *ps_node,
+ buffer_data_for_tu_t *ps_buffer_data,
+ UWORD8 *pu1_cabac_ctxt,
+ WORD32 i4_pred_mode,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ WORD32 i4_alpha_stim_multiplier,
+ UWORD8 u1_is_cu_noisy,
+#endif
+ UWORD8 u1_cur_depth,
+ UWORD8 u1_max_depth,
+ UWORD8 u1_part_type,
+ UWORD8 u1_compute_spatial_ssd)
+{
+ UWORD8 au1_cabac_ctxt_backup[IHEVC_CAB_CTXT_END];
+ UWORD8 u1_are_children_available;
+ UWORD32 u4_tuSplitFlag_and_cbf_coding_bits;
+
+ nbr_4x4_t *ps_nbr_data_buf = ps_buffer_data->ps_nbr_data_buf;
+ void *pv_recon_chroma = ps_buffer_data->s_src_pred_rec_buf_chroma.pv_recon;
+ WORD16 *pi2_deq_data = ps_buffer_data->pi2_deq_data;
+ WORD16 *pi2_deq_data_chroma = ps_buffer_data->pi2_deq_data_chroma;
+ UWORD8 **ppu1_ecd = ps_buffer_data->ppu1_ecd;
+ WORD32 i4_nbr_data_buf_stride = ps_buffer_data->i4_nbr_data_buf_stride;
+ WORD32 i4_recon_stride = ps_buffer_data->s_src_pred_rec_buf_luma.i4_recon_stride;
+ WORD32 i4_recon_stride_chroma = ps_buffer_data->s_src_pred_rec_buf_chroma.i4_recon_stride;
+ WORD32 i4_deq_data_stride = ps_buffer_data->i4_deq_data_stride;
+ WORD32 i4_deq_data_stride_chroma = ps_buffer_data->i4_deq_data_stride_chroma;
+ UWORD8 *pu1_ecd_bPtr_backup_t1 = ppu1_ecd[0];
+ UWORD8 *pu1_ecd_bPtr_backup_t2 = ppu1_ecd[0];
+ LWORD64 i8_winning_cost = 0;
+
+ ASSERT(ps_node != NULL);
+ ASSERT(
+ !(!ps_node->u1_is_valid_node &&
+ ((NULL == ps_node->ps_child_node_tl) || (NULL == ps_node->ps_child_node_tr) ||
+ (NULL == ps_node->ps_child_node_bl) || (NULL == ps_node->ps_child_node_br))));
+
+ u1_are_children_available =
+ !((NULL == ps_node->ps_child_node_tl) && (NULL == ps_node->ps_child_node_tr) &&
+ (NULL == ps_node->ps_child_node_bl) && (NULL == ps_node->ps_child_node_br)) &&
+ (ps_node->s_luma_data.u1_size > MIN_TU_SIZE);
+
+ if(u1_are_children_available)
+ {
+ if(ps_node->u1_is_valid_node)
+ {
+ memcpy(au1_cabac_ctxt_backup, pu1_cabac_ctxt, sizeof(au1_cabac_ctxt_backup));
+ }
+
+ if(i4_pred_mode != PRED_MODE_SKIP)
+ {
+ u4_tuSplitFlag_and_cbf_coding_bits = ihevce_compute_bits_for_TUSplit_and_cbf(
+ ps_node,
+ ps_node->ps_child_node_tl,
+ pu1_cabac_ctxt,
+ MAX_TU_SIZE,
+ MIN_TU_SIZE,
+ 0,
+ 1,
+ i4_pred_mode == PRED_MODE_INTRA,
+ (u1_part_type == PART_NxN) && (i4_pred_mode == PRED_MODE_INTRA),
+ 0,
+ 0);
+
+ i8_winning_cost += COMPUTE_RATE_COST_CLIP30(
+ u4_tuSplitFlag_and_cbf_coding_bits,
+ ps_ctxt->i8_cl_ssd_lambda_qf,
+ (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
+ }
+
+ i8_winning_cost += ihevce_tu_tree_selector(
+ ps_ctxt,
+ ps_node->ps_child_node_tl,
+ ps_buffer_data,
+ pu1_cabac_ctxt,
+ i4_pred_mode,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ i4_alpha_stim_multiplier,
+ u1_is_cu_noisy,
+#endif
+ u1_cur_depth,
+ u1_max_depth,
+ u1_part_type,
+ u1_compute_spatial_ssd);
+
+ i8_winning_cost += ihevce_tu_tree_selector(
+ ps_ctxt,
+ ps_node->ps_child_node_tr,
+ ps_buffer_data,
+ pu1_cabac_ctxt,
+ i4_pred_mode,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ i4_alpha_stim_multiplier,
+ u1_is_cu_noisy,
+#endif
+ u1_cur_depth,
+ u1_max_depth,
+ u1_part_type,
+ u1_compute_spatial_ssd);
+
+ i8_winning_cost += ihevce_tu_tree_selector(
+ ps_ctxt,
+ ps_node->ps_child_node_bl,
+ ps_buffer_data,
+ pu1_cabac_ctxt,
+ i4_pred_mode,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ i4_alpha_stim_multiplier,
+ u1_is_cu_noisy,
+#endif
+ u1_cur_depth,
+ u1_max_depth,
+ u1_part_type,
+ u1_compute_spatial_ssd);
+
+ i8_winning_cost += ihevce_tu_tree_selector(
+ ps_ctxt,
+ ps_node->ps_child_node_br,
+ ps_buffer_data,
+ pu1_cabac_ctxt,
+ i4_pred_mode,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ i4_alpha_stim_multiplier,
+ u1_is_cu_noisy,
+#endif
+ u1_cur_depth,
+ u1_max_depth,
+ u1_part_type,
+ u1_compute_spatial_ssd);
+
+ if(ps_node->u1_is_valid_node)
+ {
+ WORD16 ai2_deq_data_backup[MAX_CU_SIZE * MAX_CU_SIZE];
+ UWORD16 au2_recon_backup[MAX_CU_SIZE * MAX_CU_SIZE];
+
+ buffer_data_for_tu_t s_buffer_data = ps_buffer_data[0];
+
+ pu1_ecd_bPtr_backup_t2 = ppu1_ecd[0];
+ s_buffer_data.pi2_deq_data = ai2_deq_data_backup;
+ s_buffer_data.i4_deq_data_stride = MAX_CU_SIZE;
+ s_buffer_data.s_src_pred_rec_buf_luma.pv_recon = au2_recon_backup;
+ s_buffer_data.s_src_pred_rec_buf_luma.i4_recon_stride = MAX_CU_SIZE;
+
+ ihevce_tu_processor(
+ ps_ctxt,
+ ps_node,
+ &s_buffer_data,
+ au1_cabac_ctxt_backup,
+ i4_pred_mode,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ i4_alpha_stim_multiplier,
+ u1_is_cu_noisy,
+#endif
+ 0,
+ u1_compute_spatial_ssd);
+
+ if(i4_pred_mode != PRED_MODE_SKIP)
+ {
+ u4_tuSplitFlag_and_cbf_coding_bits = ihevce_compute_bits_for_TUSplit_and_cbf(
+ ps_node,
+ ps_node,
+ au1_cabac_ctxt_backup,
+ MAX_TU_SIZE,
+ MIN_TU_SIZE,
+ 0,
+ (u1_cur_depth == u1_max_depth) ? 0 : 1,
+ i4_pred_mode == PRED_MODE_INTRA,
+ (u1_part_type == PART_NxN) && (i4_pred_mode == PRED_MODE_INTRA),
+ 0,
+ 0);
+
+ ps_node->s_luma_data.i8_cost += COMPUTE_RATE_COST_CLIP30(
+ u4_tuSplitFlag_and_cbf_coding_bits,
+ ps_ctxt->i8_cl_ssd_lambda_qf,
+ (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
+ }
+
+ if(ps_node->s_luma_data.i8_cost <= i8_winning_cost)
+ {
+ ihevce_debriefer_when_parent_wins(
+ ps_node,
+ ps_ctxt->s_cmn_opt_func.pf_copy_2d,
+ ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy,
+ ps_nbr_data_buf,
+ ai2_deq_data_backup,
+ pi2_deq_data,
+ ai2_deq_data_backup + MAX_CU_SIZE * MAX_CU_SIZE,
+ pi2_deq_data_chroma,
+ au2_recon_backup,
+ pv_recon_chroma,
+ au2_recon_backup + MAX_CU_SIZE * MAX_CU_SIZE,
+ pv_recon_chroma,
+ au1_cabac_ctxt_backup,
+ pu1_cabac_ctxt,
+ pu1_ecd_bPtr_backup_t2,
+ pu1_ecd_bPtr_backup_t1,
+ i4_nbr_data_buf_stride,
+ MAX_CU_SIZE,
+ i4_deq_data_stride,
+ MAX_CU_SIZE,
+ i4_deq_data_stride_chroma,
+ MAX_CU_SIZE,
+ i4_recon_stride,
+ MAX_CU_SIZE,
+ i4_recon_stride_chroma,
+ sizeof(au1_cabac_ctxt_backup),
+ ps_ctxt->i4_cu_qp,
+ 0,
+ ps_ctxt->u1_chroma_array_type == 2,
+ ps_ctxt->u1_bit_depth > 8);
+
+ ppu1_ecd[0] =
+ pu1_ecd_bPtr_backup_t1 + ps_node->s_luma_data.i4_num_bytes_used_for_ecd;
+ i8_winning_cost = ps_node->s_luma_data.i8_cost;
+ }
+ else
+ {
+ ps_node->u1_is_valid_node = 0;
+ }
+ }
+ }
+ else
+ {
+ ASSERT(ps_node->u1_is_valid_node);
+
+ ihevce_tu_processor(
+ ps_ctxt,
+ ps_node,
+ ps_buffer_data,
+ pu1_cabac_ctxt,
+ i4_pred_mode,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ i4_alpha_stim_multiplier,
+ u1_is_cu_noisy,
+#endif
+ 0,
+ u1_compute_spatial_ssd);
+
+ if(i4_pred_mode != PRED_MODE_SKIP)
+ {
+ u4_tuSplitFlag_and_cbf_coding_bits = ihevce_compute_bits_for_TUSplit_and_cbf(
+ ps_node,
+ ps_node,
+ pu1_cabac_ctxt,
+ MAX_TU_SIZE,
+ MIN_TU_SIZE,
+ 0,
+ (u1_cur_depth == u1_max_depth) ? 0 : 1,
+ i4_pred_mode == PRED_MODE_INTRA,
+ (u1_part_type == PART_NxN) && (i4_pred_mode == PRED_MODE_INTRA),
+ 0,
+ 0);
+
+ ps_node->s_luma_data.i8_cost += COMPUTE_RATE_COST_CLIP30(
+ u4_tuSplitFlag_and_cbf_coding_bits,
+ ps_ctxt->i8_cl_ssd_lambda_qf,
+ (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
+ }
+
+ ppu1_ecd[0] = pu1_ecd_bPtr_backup_t1 + ps_node->s_luma_data.i4_num_bytes_used_for_ecd;
+
+ ihevce_nbr_data_copier(
+ ps_nbr_data_buf,
+ i4_nbr_data_buf_stride,
+ ps_ctxt->i4_cu_qp,
+ ps_node->s_luma_data.u1_cbf,
+ ps_node->s_luma_data.u1_posx,
+ ps_node->s_luma_data.u1_posy,
+ ps_node->s_luma_data.u1_size);
+
+ i8_winning_cost = ps_node->s_luma_data.i8_cost;
+ }
+
+ return i8_winning_cost;
+}
+#endif
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_topDown_tu_tree_selector \endif
+*
+* \notes
+* Determines RDO TU Tree using DFS. If the parent is the winner, then all
+* pointers to the children nodes are set to NULL
+* Input : 1. ps_ctxt: Pointer to enc-loop's context. Parts of this structure
+* shall be modified by this function. They include, au1_cu_csbf,
+* i8_cu_not_coded_cost, ai2_scratch, s_rdoq_sbh_ctxt,
+* pi4_quant_round_factor_tu_0_1, pi4_quant_round_factor_tu_1_2,
+* i4_quant_round_tu
+* 2. ps_node: Pointer to current node of the TU tree. This struct
+* shall be modified by this function
+* 3. pv_recon: Pointer to buffer which stores the recon
+* This buffer shall be modified by this function
+* 4. ps_nbr_data_buf: Pointer to struct used by succeeding CU's
+* during RDOPT. This buffer shall be modifie by this function
+* 6. pi2_deq_data: Pointer to buffer which stores the output of IQ.
+* This buffer shall be modified by this function
+* 7. pu1_ecd: Pointer to buffer which stores the data output by
+* entropy coding. This buffer shall be modified by this function
+* 8. pu1_cabac_ctxt: Pointer to buffer which stores the current CABAC
+* state. This buffer shall be modified by this function
+* Output : Cost of coding the current branch of the TU tree
+*
+*****************************************************************************
+*/
+LWORD64 ihevce_topDown_tu_tree_selector(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ tu_tree_node_t *ps_node,
+ buffer_data_for_tu_t *ps_buffer_data,
+ UWORD8 *pu1_cabac_ctxt,
+ WORD32 i4_pred_mode,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ WORD32 i4_alpha_stim_multiplier,
+ UWORD8 u1_is_cu_noisy,
+#endif
+ UWORD8 u1_cur_depth,
+ UWORD8 u1_max_depth,
+ UWORD8 u1_part_type,
+ UWORD8 u1_chroma_processing_enabled,
+ UWORD8 u1_compute_spatial_ssd)
+{
+ UWORD8 au1_cabac_ctxt_backup[IHEVC_CAB_CTXT_END];
+ UWORD8 u1_are_children_available;
+ UWORD32 u4_tuSplitFlag_and_cbf_coding_bits;
+
+ nbr_4x4_t *ps_nbr_data_buf = ps_buffer_data->ps_nbr_data_buf;
+
+ void *pv_recon = ps_buffer_data->s_src_pred_rec_buf_luma.pv_recon;
+ void *pv_recon_chroma = ps_buffer_data->s_src_pred_rec_buf_chroma.pv_recon;
+ WORD16 *pi2_deq_data = ps_buffer_data->pi2_deq_data;
+ WORD16 *pi2_deq_data_chroma = ps_buffer_data->pi2_deq_data_chroma;
+ UWORD8 **ppu1_ecd = ps_buffer_data->ppu1_ecd;
+ WORD32 i4_nbr_data_buf_stride = ps_buffer_data->i4_nbr_data_buf_stride;
+ WORD32 i4_recon_stride = ps_buffer_data->s_src_pred_rec_buf_luma.i4_recon_stride;
+ WORD32 i4_recon_stride_chroma = ps_buffer_data->s_src_pred_rec_buf_chroma.i4_recon_stride;
+ WORD32 i4_deq_data_stride = ps_buffer_data->i4_deq_data_stride;
+ WORD32 i4_deq_data_stride_chroma = ps_buffer_data->i4_deq_data_stride_chroma;
+ UWORD8 *pu1_ecd_bPtr_backup_t1 = ppu1_ecd[0];
+ UWORD8 *pu1_ecd_bPtr_backup_t2 = ppu1_ecd[0];
+ LWORD64 i8_parent_cost = 0;
+ LWORD64 i8_child_cost = 0;
+ LWORD64 i8_winning_cost = 0;
+ UWORD8 u1_is_422 = (ps_ctxt->u1_chroma_array_type == 2);
+
+ ASSERT(ps_node != NULL);
+ ASSERT(
+ !(!ps_node->u1_is_valid_node &&
+ ((NULL == ps_node->ps_child_node_tl) || (NULL == ps_node->ps_child_node_tr) ||
+ (NULL == ps_node->ps_child_node_bl) || (NULL == ps_node->ps_child_node_br))));
+
+ u1_are_children_available =
+ !((NULL == ps_node->ps_child_node_tl) && (NULL == ps_node->ps_child_node_tr) &&
+ (NULL == ps_node->ps_child_node_bl) && (NULL == ps_node->ps_child_node_br)) &&
+ (ps_node->s_luma_data.u1_size > MIN_TU_SIZE);
+
+ if(u1_are_children_available)
+ {
+ WORD16 ai2_deq_data_backup[MAX_CU_SIZE * MAX_CU_SIZE * 2];
+ UWORD16 au2_recon_backup[MAX_CU_SIZE * MAX_CU_SIZE * 2];
+
+ UWORD8 u1_is_tu_coded = 0;
+
+ if(ps_node->u1_is_valid_node)
+ {
+ buffer_data_for_tu_t s_buffer_data = ps_buffer_data[0];
+
+ memcpy(au1_cabac_ctxt_backup, pu1_cabac_ctxt, sizeof(au1_cabac_ctxt_backup));
+
+ s_buffer_data.pi2_deq_data = ai2_deq_data_backup;
+ s_buffer_data.i4_deq_data_stride = MAX_CU_SIZE;
+ s_buffer_data.pi2_deq_data_chroma = ai2_deq_data_backup + MAX_CU_SIZE * MAX_CU_SIZE;
+ s_buffer_data.i4_deq_data_stride_chroma = MAX_CU_SIZE;
+ s_buffer_data.s_src_pred_rec_buf_luma.pv_recon = au2_recon_backup;
+ s_buffer_data.s_src_pred_rec_buf_luma.i4_recon_stride = MAX_CU_SIZE;
+ s_buffer_data.s_src_pred_rec_buf_chroma.pv_recon =
+ au2_recon_backup + MAX_CU_SIZE * MAX_CU_SIZE;
+ s_buffer_data.s_src_pred_rec_buf_chroma.i4_recon_stride = MAX_CU_SIZE;
+
+ ihevce_tu_processor(
+ ps_ctxt,
+ ps_node,
+ &s_buffer_data,
+ au1_cabac_ctxt_backup,
+ i4_pred_mode,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ i4_alpha_stim_multiplier,
+ u1_is_cu_noisy,
+#endif
+ u1_chroma_processing_enabled,
+ u1_compute_spatial_ssd);
+
+ if(i4_pred_mode != PRED_MODE_SKIP)
+ {
+ u4_tuSplitFlag_and_cbf_coding_bits = ihevce_compute_bits_for_TUSplit_and_cbf(
+ ps_node,
+ ps_node,
+ au1_cabac_ctxt_backup,
+ MAX_TU_SIZE,
+ MIN_TU_SIZE,
+ 0,
+ (u1_cur_depth == u1_max_depth) ? 0 : 1,
+ i4_pred_mode == PRED_MODE_INTRA,
+ (u1_part_type == PART_NxN) && (i4_pred_mode == PRED_MODE_INTRA),
+ u1_chroma_processing_enabled,
+ u1_is_422);
+
+ ps_node->s_luma_data.i8_cost += COMPUTE_RATE_COST_CLIP30(
+ u4_tuSplitFlag_and_cbf_coding_bits,
+ ps_ctxt->i8_cl_ssd_lambda_qf,
+ (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
+ }
+
+ i8_parent_cost +=
+ ihevce_tu_node_cost_collator(ps_node, u1_chroma_processing_enabled, u1_is_422);
+
+ ihevce_ecd_buffer_pointer_updater(
+ ps_node,
+ ppu1_ecd,
+ pu1_ecd_bPtr_backup_t1,
+ 1,
+ u1_chroma_processing_enabled,
+ u1_is_422);
+ }
+ else
+ {
+ ps_node->s_luma_data.i8_cost = i8_parent_cost = LLONG_MAX;
+ ps_node->s_luma_data.i4_num_bytes_used_for_ecd = 0;
+ }
+
+ u1_is_tu_coded |= ps_node->s_luma_data.u1_cbf;
+
+ if(u1_chroma_processing_enabled)
+ {
+ UWORD8 i;
+
+ for(i = 0; i < u1_is_422 + 1; i++)
+ {
+ u1_is_tu_coded |= ps_node->as_cb_data[i].u1_cbf;
+ u1_is_tu_coded |= ps_node->as_cr_data[i].u1_cbf;
+ }
+ }
+
+ if(!ps_node->u1_is_valid_node || u1_is_tu_coded)
+ {
+ pu1_ecd_bPtr_backup_t2 = ppu1_ecd[0];
+
+ if(i4_pred_mode != PRED_MODE_SKIP)
+ {
+ u4_tuSplitFlag_and_cbf_coding_bits = ihevce_compute_bits_for_TUSplit_and_cbf(
+ ps_node,
+ ps_node->ps_child_node_tl,
+ pu1_cabac_ctxt,
+ MAX_TU_SIZE,
+ MIN_TU_SIZE,
+ 0,
+ 1,
+ i4_pred_mode == PRED_MODE_INTRA,
+ (u1_part_type == PART_NxN) && (i4_pred_mode == PRED_MODE_INTRA),
+ u1_chroma_processing_enabled,
+ u1_is_422);
+
+ i8_child_cost += COMPUTE_RATE_COST_CLIP30(
+ u4_tuSplitFlag_and_cbf_coding_bits,
+ ps_ctxt->i8_cl_ssd_lambda_qf,
+ (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
+ }
+
+ if(i8_child_cost < i8_parent_cost)
+ {
+ i8_child_cost += ihevce_topDown_tu_tree_selector(
+ ps_ctxt,
+ ps_node->ps_child_node_tl,
+ ps_buffer_data,
+ pu1_cabac_ctxt,
+ i4_pred_mode,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ i4_alpha_stim_multiplier,
+ u1_is_cu_noisy,
+#endif
+ u1_cur_depth,
+ u1_max_depth,
+ u1_part_type,
+ u1_chroma_processing_enabled,
+ u1_compute_spatial_ssd);
+
+ ps_node->ps_child_node_tl->s_luma_data.i8_cost +=
+ i8_child_cost - ps_node->ps_child_node_tl->s_luma_data.i8_cost;
+ }
+
+ if(i8_child_cost < i8_parent_cost)
+ {
+ i8_child_cost += ihevce_topDown_tu_tree_selector(
+ ps_ctxt,
+ ps_node->ps_child_node_tr,
+ ps_buffer_data,
+ pu1_cabac_ctxt,
+ i4_pred_mode,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ i4_alpha_stim_multiplier,
+ u1_is_cu_noisy,
+#endif
+ u1_cur_depth,
+ u1_max_depth,
+ u1_part_type,
+ u1_chroma_processing_enabled,
+ u1_compute_spatial_ssd);
+ }
+
+ if(i8_child_cost < i8_parent_cost)
+ {
+ i8_child_cost += ihevce_topDown_tu_tree_selector(
+ ps_ctxt,
+ ps_node->ps_child_node_bl,
+ ps_buffer_data,
+ pu1_cabac_ctxt,
+ i4_pred_mode,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ i4_alpha_stim_multiplier,
+ u1_is_cu_noisy,
+#endif
+ u1_cur_depth,
+ u1_max_depth,
+ u1_part_type,
+ u1_chroma_processing_enabled,
+ u1_compute_spatial_ssd);
+ }
+
+ if(i8_child_cost < i8_parent_cost)
+ {
+ i8_child_cost += ihevce_topDown_tu_tree_selector(
+ ps_ctxt,
+ ps_node->ps_child_node_br,
+ ps_buffer_data,
+ pu1_cabac_ctxt,
+ i4_pred_mode,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ i4_alpha_stim_multiplier,
+ u1_is_cu_noisy,
+#endif
+ u1_cur_depth,
+ u1_max_depth,
+ u1_part_type,
+ u1_chroma_processing_enabled,
+ u1_compute_spatial_ssd);
+ }
+
+ if(i8_parent_cost > i8_child_cost)
+ {
+ UWORD32 u4_num_bytes = ihevce_ecd_buffer_pointer_updater(
+ ps_node,
+ ppu1_ecd,
+ pu1_ecd_bPtr_backup_t1,
+ 0,
+ u1_chroma_processing_enabled,
+ u1_is_422);
+
+ if(pu1_ecd_bPtr_backup_t2 != pu1_ecd_bPtr_backup_t1)
+ {
+ memmove(pu1_ecd_bPtr_backup_t1, pu1_ecd_bPtr_backup_t2, u4_num_bytes);
+ }
+
+ ps_node->s_luma_data.i4_num_bytes_used_for_ecd = u4_num_bytes;
+ ps_node->as_cb_data[0].i4_num_bytes_used_for_ecd = 0;
+ ps_node->as_cb_data[1].i4_num_bytes_used_for_ecd = 0;
+ ps_node->as_cr_data[0].i4_num_bytes_used_for_ecd = 0;
+ ps_node->as_cr_data[1].i4_num_bytes_used_for_ecd = 0;
+
+ ps_node->u1_is_valid_node = 0;
+
+ i8_winning_cost = i8_child_cost;
+ }
+ else
+ {
+ ihevce_debriefer_when_parent_wins(
+ ps_node,
+ ps_ctxt->s_cmn_opt_func.pf_copy_2d,
+ ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy,
+ ps_nbr_data_buf,
+ ai2_deq_data_backup,
+ pi2_deq_data,
+ ai2_deq_data_backup + MAX_CU_SIZE * MAX_CU_SIZE,
+ pi2_deq_data_chroma,
+ au2_recon_backup,
+ pv_recon,
+ au2_recon_backup + MAX_CU_SIZE * MAX_CU_SIZE,
+ pv_recon_chroma,
+ au1_cabac_ctxt_backup,
+ pu1_cabac_ctxt,
+ NULL,
+ NULL,
+ i4_nbr_data_buf_stride,
+ MAX_CU_SIZE,
+ i4_deq_data_stride,
+ MAX_CU_SIZE,
+ i4_deq_data_stride_chroma,
+ MAX_CU_SIZE,
+ i4_recon_stride,
+ MAX_CU_SIZE,
+ i4_recon_stride_chroma,
+ sizeof(au1_cabac_ctxt_backup),
+ ps_ctxt->i4_cu_qp,
+ u1_chroma_processing_enabled,
+ u1_is_422,
+ ps_ctxt->u1_bit_depth > 8);
+
+ ihevce_ecd_buffer_pointer_updater(
+ ps_node,
+ ppu1_ecd,
+ pu1_ecd_bPtr_backup_t1,
+ 1,
+ u1_chroma_processing_enabled,
+ u1_is_422);
+
+ i8_winning_cost = i8_parent_cost;
+ }
+ }
+ else
+ {
+ ihevce_debriefer_when_parent_wins(
+ ps_node,
+ ps_ctxt->s_cmn_opt_func.pf_copy_2d,
+ ps_ctxt->s_cmn_opt_func.pf_chroma_interleave_2d_copy,
+ ps_nbr_data_buf,
+ ai2_deq_data_backup,
+ pi2_deq_data,
+ ai2_deq_data_backup + MAX_CU_SIZE * MAX_CU_SIZE,
+ pi2_deq_data_chroma,
+ au2_recon_backup,
+ pv_recon,
+ au2_recon_backup + MAX_CU_SIZE * MAX_CU_SIZE,
+ pv_recon_chroma,
+ au1_cabac_ctxt_backup,
+ pu1_cabac_ctxt,
+ NULL,
+ NULL,
+ i4_nbr_data_buf_stride,
+ MAX_CU_SIZE,
+ i4_deq_data_stride,
+ MAX_CU_SIZE,
+ i4_deq_data_stride_chroma,
+ MAX_CU_SIZE,
+ i4_recon_stride,
+ MAX_CU_SIZE,
+ i4_recon_stride_chroma,
+ sizeof(au1_cabac_ctxt_backup),
+ ps_ctxt->i4_cu_qp,
+ u1_chroma_processing_enabled,
+ u1_is_422,
+ ps_ctxt->u1_bit_depth > 8);
+
+ ihevce_ecd_buffer_pointer_updater(
+ ps_node,
+ ppu1_ecd,
+ pu1_ecd_bPtr_backup_t1,
+ 1,
+ u1_chroma_processing_enabled,
+ u1_is_422);
+
+ i8_winning_cost = i8_parent_cost;
+ }
+ }
+ else
+ {
+ ASSERT(ps_node->u1_is_valid_node);
+
+ ihevce_tu_processor(
+ ps_ctxt,
+ ps_node,
+ ps_buffer_data,
+ pu1_cabac_ctxt,
+ i4_pred_mode,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ i4_alpha_stim_multiplier,
+ u1_is_cu_noisy,
+#endif
+ u1_chroma_processing_enabled,
+ u1_compute_spatial_ssd);
+
+ if(i4_pred_mode != PRED_MODE_SKIP)
+ {
+ u4_tuSplitFlag_and_cbf_coding_bits = ihevce_compute_bits_for_TUSplit_and_cbf(
+ ps_node,
+ ps_node,
+ pu1_cabac_ctxt,
+ MAX_TU_SIZE,
+ MIN_TU_SIZE,
+ 0,
+ (u1_cur_depth == u1_max_depth) ? 0 : 1,
+ i4_pred_mode == PRED_MODE_INTRA,
+ (u1_part_type == PART_NxN) && (i4_pred_mode == PRED_MODE_INTRA),
+ u1_chroma_processing_enabled,
+ u1_is_422);
+
+ ps_node->s_luma_data.i8_cost += COMPUTE_RATE_COST_CLIP30(
+ u4_tuSplitFlag_and_cbf_coding_bits,
+ ps_ctxt->i8_cl_ssd_lambda_qf,
+ (LAMBDA_Q_SHIFT + CABAC_FRAC_BITS_Q));
+ }
+
+ i8_winning_cost +=
+ ihevce_tu_node_cost_collator(ps_node, u1_chroma_processing_enabled, u1_is_422);
+
+ ihevce_ecd_buffer_pointer_updater(
+ ps_node, ppu1_ecd, pu1_ecd_bPtr_backup_t1, 1, u1_chroma_processing_enabled, u1_is_422);
+
+ ihevce_nbr_data_copier(
+ ps_nbr_data_buf,
+ i4_nbr_data_buf_stride,
+ ps_ctxt->i4_cu_qp,
+ ps_node->s_luma_data.u1_cbf,
+ ps_node->s_luma_data.u1_posx,
+ ps_node->s_luma_data.u1_posy,
+ ps_node->s_luma_data.u1_size);
+ }
+
+ return i8_winning_cost;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_tu_selector_debriefer \endif
+*
+* \notes
+* Conversion of TU Tree struct into TU info array. Collection of myriad CU
+* level data
+* Input : 1. ps_node: Pointer to current node of the TU tree. This struct
+* shall be modified by this function
+* 2. ps_final_prms: Pointer to struct that stores RDOPT output data.
+* This buffer shall be modified by this function
+* Output : 1. pi8_total_cost: Total CU-level cost
+* 2. pi8_total_non_coded_cost: Total CU level cost when no residue
+* is coded
+* 3. pi4_num_bytes_used_for_ecd: Number of bytes used for storing
+* entropy coding data
+* 4. pi4_num_bits_used_for_encoding: Number of bits used for encoding
+* 5. pu2_tu_ctr: Number of TU's in the CU
+*
+*****************************************************************************
+*/
+void ihevce_tu_selector_debriefer(
+ tu_tree_node_t *ps_node,
+ enc_loop_cu_final_prms_t *ps_final_prms,
+ LWORD64 *pi8_total_cost,
+ LWORD64 *pi8_total_non_coded_cost,
+ WORD32 *pi4_num_bytes_used_for_ecd,
+ WORD32 *pi4_num_bits_used_for_encoding,
+ UWORD16 *pu2_tu_ctr,
+ WORD32 i4_cu_qp,
+ UWORD8 u1_cu_posx,
+ UWORD8 u1_cu_posy,
+ UWORD8 u1_chroma_processing_enabled,
+ UWORD8 u1_is_422,
+ TU_POS_T e_tu_pos)
+{
+ UWORD8 u1_is_chroma_tu_valid = 1;
+ WORD32 i4_log2_size;
+
+ ASSERT(ps_node != NULL);
+
+ if(ps_node->u1_is_valid_node)
+ {
+ ASSERT(
+ (NULL == ps_node->ps_child_node_tl) && (NULL == ps_node->ps_child_node_tr) &&
+ (NULL == ps_node->ps_child_node_bl) && (NULL == ps_node->ps_child_node_br));
+ }
+ else
+ {
+ ASSERT(
+ !((NULL == ps_node->ps_child_node_tl) || (NULL == ps_node->ps_child_node_tr) ||
+ (NULL == ps_node->ps_child_node_bl) || (NULL == ps_node->ps_child_node_br)));
+ }
+
+ if(ps_node->u1_is_valid_node)
+ {
+ if((4 == ps_node->s_luma_data.u1_size) && (POS_TL != e_tu_pos))
+ {
+ u1_is_chroma_tu_valid = INTRA_PRED_CHROMA_IDX_NONE;
+ }
+
+ GETRANGE(i4_log2_size, ps_node->s_luma_data.u1_size);
+
+ ps_final_prms->s_recon_datastore.au1_bufId_with_winning_LumaRecon[pu2_tu_ctr[0]] =
+ ps_node->s_luma_data.u1_reconBufId;
+ ps_final_prms->u4_cu_sad += ps_node->s_luma_data.u4_sad;
+ ps_final_prms->u1_is_cu_coded |= ps_node->s_luma_data.u1_cbf;
+ ps_final_prms->u4_cu_luma_res_bits += ps_node->s_luma_data.i4_bits;
+
+ ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].i4_luma_coeff_offset =
+ pi4_num_bytes_used_for_ecd[0];
+ ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b1_y_cbf = ps_node->s_luma_data.u1_cbf;
+ ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b1_cb_cbf = 0;
+ ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b1_cr_cbf = 0;
+ ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b1_cb_cbf_subtu1 = 0;
+ ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b1_cr_cbf_subtu1 = 0;
+ ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b3_chroma_intra_mode_idx =
+ u1_is_chroma_tu_valid;
+ ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b7_qp = i4_cu_qp;
+ ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b1_first_tu_in_cu =
+ (!ps_node->s_luma_data.u1_posx && !ps_node->s_luma_data.u1_posx);
+ ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b1_transquant_bypass = 0;
+ ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b3_size = i4_log2_size - 3;
+ ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b4_pos_x =
+ (u1_cu_posx + ps_node->s_luma_data.u1_posx) / 4;
+ ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b4_pos_y =
+ (u1_cu_posy + ps_node->s_luma_data.u1_posy) / 4;
+
+ ps_final_prms->as_tu_enc_loop_temp_prms[pu2_tu_ctr[0]].i2_luma_bytes_consumed =
+ ps_node->s_luma_data.i4_num_bytes_used_for_ecd;
+ ps_final_prms->as_tu_enc_loop_temp_prms[pu2_tu_ctr[0]].u4_luma_zero_col =
+ ps_node->s_luma_data.i4_zero_col;
+ ps_final_prms->as_tu_enc_loop_temp_prms[pu2_tu_ctr[0]].u4_luma_zero_row =
+ ps_node->s_luma_data.i4_zero_row;
+
+ pi8_total_cost[0] += ps_node->s_luma_data.i8_cost;
+ pi8_total_non_coded_cost[0] += ps_node->s_luma_data.i8_not_coded_cost;
+ pi4_num_bytes_used_for_ecd[0] += ps_node->s_luma_data.i4_num_bytes_used_for_ecd;
+ pi4_num_bits_used_for_encoding[0] += ps_node->s_luma_data.i4_bits;
+
+ if(u1_chroma_processing_enabled)
+ {
+ UWORD8 i;
+
+ for(i = 0; i < u1_is_422 + 1; i++)
+ {
+ ps_final_prms->s_recon_datastore
+ .au1_bufId_with_winning_ChromaRecon[U_PLANE][pu2_tu_ctr[0]][i] =
+ ps_node->as_cb_data[i].u1_reconBufId;
+ ps_final_prms->u1_is_cu_coded |= ps_node->as_cb_data[i].u1_cbf;
+ ps_final_prms->u4_cu_chroma_res_bits += ps_node->as_cb_data[i].i4_bits;
+
+ ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].ai4_cb_coeff_offset[i] =
+ pi4_num_bytes_used_for_ecd[0];
+
+ if(!i)
+ {
+ ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b1_cb_cbf =
+ ps_node->as_cb_data[i].u1_cbf;
+ }
+ else
+ {
+ ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b1_cb_cbf_subtu1 =
+ ps_node->as_cb_data[i].u1_cbf;
+ }
+
+ ps_final_prms->as_tu_enc_loop_temp_prms[pu2_tu_ctr[0]].ai2_cb_bytes_consumed[i] =
+ ps_node->as_cb_data[i].i4_num_bytes_used_for_ecd;
+ ps_final_prms->as_tu_enc_loop_temp_prms[pu2_tu_ctr[0]].au4_cb_zero_col[i] =
+ ps_node->as_cb_data[i].i4_zero_col;
+ ps_final_prms->as_tu_enc_loop_temp_prms[pu2_tu_ctr[0]].au4_cb_zero_row[i] =
+ ps_node->as_cb_data[i].i4_zero_row;
+
+ pi8_total_cost[0] += ps_node->as_cb_data[i].i8_cost;
+ pi8_total_non_coded_cost[0] += ps_node->as_cb_data[i].i8_not_coded_cost;
+ pi4_num_bytes_used_for_ecd[0] += ps_node->as_cb_data[i].i4_num_bytes_used_for_ecd;
+ pi4_num_bits_used_for_encoding[0] += ps_node->as_cb_data[i].i4_bits;
+ }
+
+ for(i = 0; i < u1_is_422 + 1; i++)
+ {
+ ps_final_prms->s_recon_datastore
+ .au1_bufId_with_winning_ChromaRecon[V_PLANE][pu2_tu_ctr[0]][i] =
+ ps_node->as_cr_data[i].u1_reconBufId;
+ ps_final_prms->u1_is_cu_coded |= ps_node->as_cr_data[i].u1_cbf;
+ ps_final_prms->u4_cu_chroma_res_bits += ps_node->as_cr_data[i].i4_bits;
+
+ ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].ai4_cr_coeff_offset[i] =
+ pi4_num_bytes_used_for_ecd[0];
+
+ if(!i)
+ {
+ ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b1_cr_cbf =
+ ps_node->as_cr_data[i].u1_cbf;
+ }
+ else
+ {
+ ps_final_prms->as_tu_enc_loop[pu2_tu_ctr[0]].s_tu.b1_cr_cbf_subtu1 =
+ ps_node->as_cr_data[i].u1_cbf;
+ }
+
+ ps_final_prms->as_tu_enc_loop_temp_prms[pu2_tu_ctr[0]].ai2_cr_bytes_consumed[i] =
+ ps_node->as_cr_data[i].i4_num_bytes_used_for_ecd;
+ ps_final_prms->as_tu_enc_loop_temp_prms[pu2_tu_ctr[0]].au4_cr_zero_col[i] =
+ ps_node->as_cr_data[i].i4_zero_col;
+ ps_final_prms->as_tu_enc_loop_temp_prms[pu2_tu_ctr[0]].au4_cr_zero_row[i] =
+ ps_node->as_cr_data[i].i4_zero_row;
+
+ pi8_total_cost[0] += ps_node->as_cr_data[i].i8_cost;
+ pi8_total_non_coded_cost[0] += ps_node->as_cr_data[i].i8_not_coded_cost;
+ pi4_num_bytes_used_for_ecd[0] += ps_node->as_cr_data[i].i4_num_bytes_used_for_ecd;
+ pi4_num_bits_used_for_encoding[0] += ps_node->as_cr_data[i].i4_bits;
+ }
+ }
+
+ pu2_tu_ctr[0]++;
+ }
+ else
+ {
+ ihevce_tu_selector_debriefer(
+ ps_node->ps_child_node_tl,
+ ps_final_prms,
+ pi8_total_cost,
+ pi8_total_non_coded_cost,
+ pi4_num_bytes_used_for_ecd,
+ pi4_num_bits_used_for_encoding,
+ pu2_tu_ctr,
+ i4_cu_qp,
+ u1_cu_posx,
+ u1_cu_posy,
+ u1_chroma_processing_enabled,
+ u1_is_422,
+ POS_TL);
+
+ ihevce_tu_selector_debriefer(
+ ps_node->ps_child_node_tr,
+ ps_final_prms,
+ pi8_total_cost,
+ pi8_total_non_coded_cost,
+ pi4_num_bytes_used_for_ecd,
+ pi4_num_bits_used_for_encoding,
+ pu2_tu_ctr,
+ i4_cu_qp,
+ u1_cu_posx,
+ u1_cu_posy,
+ u1_chroma_processing_enabled,
+ u1_is_422,
+ POS_TR);
+
+ ihevce_tu_selector_debriefer(
+ ps_node->ps_child_node_bl,
+ ps_final_prms,
+ pi8_total_cost,
+ pi8_total_non_coded_cost,
+ pi4_num_bytes_used_for_ecd,
+ pi4_num_bits_used_for_encoding,
+ pu2_tu_ctr,
+ i4_cu_qp,
+ u1_cu_posx,
+ u1_cu_posy,
+ u1_chroma_processing_enabled,
+ u1_is_422,
+ POS_BL);
+
+ ihevce_tu_selector_debriefer(
+ ps_node->ps_child_node_br,
+ ps_final_prms,
+ pi8_total_cost,
+ pi8_total_non_coded_cost,
+ pi4_num_bytes_used_for_ecd,
+ pi4_num_bits_used_for_encoding,
+ pu2_tu_ctr,
+ i4_cu_qp,
+ u1_cu_posx,
+ u1_cu_posy,
+ u1_chroma_processing_enabled,
+ u1_is_422,
+ POS_BR);
+ }
+}
+
+static UWORD8 ihevce_get_curTUSplit_from_TUSplitArray(
+ WORD32 ai4_tuSplitArray[4], UWORD8 u1_cu_size, UWORD8 u1_tu_size, UWORD8 u1_posx, UWORD8 u1_posy)
+{
+ UWORD8 u1_is_split = 0;
+
+ UWORD8 u1_tuSplitArrayIndex = 0;
+ UWORD8 u1_bit_index = 0;
+
+ switch(u1_cu_size)
+ {
+ case 8:
+ {
+ switch(u1_tu_size)
+ {
+ case 8:
+ {
+ u1_is_split = !!(ai4_tuSplitArray[u1_tuSplitArrayIndex] & BIT_EN(u1_bit_index));
+
+ break;
+ }
+ case 4:
+ {
+ u1_is_split = 0;
+
+ break;
+ }
+ }
+
+ break;
+ }
+ case 16:
+ {
+ switch(u1_tu_size)
+ {
+ case 16:
+ {
+ u1_is_split = !!(ai4_tuSplitArray[u1_tuSplitArrayIndex] & BIT_EN(u1_bit_index));
+
+ break;
+ }
+ case 8:
+ {
+ u1_bit_index += ((u1_posx / 8) % 2) + 2 * ((u1_posy / 8) % 2) + 1;
+ u1_is_split = !!(ai4_tuSplitArray[u1_tuSplitArrayIndex] & BIT_EN(u1_bit_index));
+
+ break;
+ }
+ case 4:
+ {
+ u1_is_split = 0;
+
+ break;
+ }
+ }
+
+ break;
+ }
+ case 32:
+ {
+ switch(u1_tu_size)
+ {
+ case 32:
+ {
+ u1_is_split = !!(ai4_tuSplitArray[u1_tuSplitArrayIndex] & BIT_EN(u1_bit_index));
+
+ break;
+ }
+ case 16:
+ {
+ u1_bit_index += 5 * ((u1_posx / 16) % 2) + 10 * ((u1_posy / 16) % 2) + 1;
+ u1_is_split = !!(ai4_tuSplitArray[u1_tuSplitArrayIndex] & BIT_EN(u1_bit_index));
+
+ break;
+ }
+ case 8:
+ {
+ u1_bit_index = 5 * ((u1_posx / 16) % 2) + 10 * ((u1_posy / 16) % 2) + 1;
+ u1_bit_index += ((u1_posx / 8) % 2) + 2 * ((u1_posy / 8) % 2) + 1;
+ u1_is_split = !!(ai4_tuSplitArray[u1_tuSplitArrayIndex] & BIT_EN(u1_bit_index));
+
+ break;
+ }
+ case 4:
+ {
+ u1_is_split = 0;
+
+ break;
+ }
+ }
+
+ break;
+ }
+ case 64:
+ {
+ switch(u1_tu_size)
+ {
+ case 64:
+ {
+ u1_is_split = 1;
+
+ break;
+ }
+ case 32:
+ {
+ u1_tuSplitArrayIndex = ((u1_posx / 32) % 2) + 2 * ((u1_posy / 32) % 2);
+ u1_is_split = !!(ai4_tuSplitArray[u1_tuSplitArrayIndex] & BIT_EN(u1_bit_index));
+
+ break;
+ }
+ case 16:
+ {
+ u1_tuSplitArrayIndex = ((u1_posx / 32) % 2) + 2 * ((u1_posy / 32) % 2);
+ u1_bit_index += 5 * ((u1_posx / 16) % 2) + 10 * ((u1_posy / 16) % 2) + 1;
+ u1_is_split = !!(ai4_tuSplitArray[u1_tuSplitArrayIndex] & BIT_EN(u1_bit_index));
+
+ break;
+ }
+ case 8:
+ {
+ u1_tuSplitArrayIndex = ((u1_posx / 32) % 2) + 2 * ((u1_posy / 32) % 2);
+ u1_bit_index += 5 * ((u1_posx / 16) % 2) + 10 * ((u1_posy / 16) % 2) + 1;
+ u1_bit_index += ((u1_posx / 8) % 2) + 2 * ((u1_posy / 8) % 2) + 1;
+ u1_is_split = !!(ai4_tuSplitArray[u1_tuSplitArrayIndex] & BIT_EN(u1_bit_index));
+
+ break;
+ }
+ case 4:
+ {
+ u1_is_split = 0;
+
+ break;
+ }
+ }
+
+ break;
+ }
+ }
+
+ return u1_is_split;
+}
+
+/*!
+******************************************************************************
+* \if Function name : ihevce_tuSplitArray_to_tuTree_mapper \endif
+*
+* \notes
+* This function assumes that ihevce_tu_tree_init' has been called already.
+* The pointers to the children nodes of the leaf-most nodes in the tree
+* are assigned NULL
+* Input : 1. ps_root: Pointer to root of the tree containing TU info.
+* This struct shall be modified by this function
+* 2. ai4_tuSplitArray: Array containing information about TU splits
+* Output : 1. TU tree is modified such that it reflects the information
+* coded in ai4_tuSplitArray
+*
+*****************************************************************************
+*/
+void ihevce_tuSplitArray_to_tuTree_mapper(
+ tu_tree_node_t *ps_root,
+ WORD32 ai4_tuSplitArray[4],
+ UWORD8 u1_cu_size,
+ UWORD8 u1_tu_size,
+ UWORD8 u1_min_tu_size,
+ UWORD8 u1_max_tu_size,
+ UWORD8 u1_is_skip)
+{
+ UWORD8 u1_is_split;
+
+ ASSERT(u1_min_tu_size >= MIN_TU_SIZE);
+ ASSERT(u1_max_tu_size <= MAX_TU_SIZE);
+ ASSERT(u1_min_tu_size <= u1_max_tu_size);
+
+ ASSERT(!u1_is_skip);
+
+ ASSERT(ps_root != NULL);
+ ASSERT(ps_root->s_luma_data.u1_size == u1_tu_size);
+
+ if(u1_tu_size <= u1_max_tu_size)
+ {
+ ASSERT(ps_root->u1_is_valid_node);
+ }
+ else
+ {
+ ASSERT(!ps_root->u1_is_valid_node);
+ }
+
+ if(u1_tu_size > u1_min_tu_size)
+ {
+ ASSERT(ps_root->ps_child_node_tl != NULL);
+ ASSERT(ps_root->ps_child_node_tr != NULL);
+ ASSERT(ps_root->ps_child_node_bl != NULL);
+ ASSERT(ps_root->ps_child_node_br != NULL);
+ ASSERT(ps_root->ps_child_node_tl->s_luma_data.u1_size == (u1_tu_size / 2));
+ ASSERT(ps_root->ps_child_node_tr->s_luma_data.u1_size == (u1_tu_size / 2));
+ ASSERT(ps_root->ps_child_node_bl->s_luma_data.u1_size == (u1_tu_size / 2));
+ ASSERT(ps_root->ps_child_node_br->s_luma_data.u1_size == (u1_tu_size / 2));
+ ASSERT(ps_root->ps_child_node_tl->u1_is_valid_node);
+ ASSERT(ps_root->ps_child_node_tr->u1_is_valid_node);
+ ASSERT(ps_root->ps_child_node_bl->u1_is_valid_node);
+ ASSERT(ps_root->ps_child_node_br->u1_is_valid_node);
+ }
+ else
+ {
+ ASSERT(ps_root->ps_child_node_tl == NULL);
+ ASSERT(ps_root->ps_child_node_tr == NULL);
+ ASSERT(ps_root->ps_child_node_bl == NULL);
+ ASSERT(ps_root->ps_child_node_br == NULL);
+ }
+
+ u1_is_split = ihevce_get_curTUSplit_from_TUSplitArray(
+ ai4_tuSplitArray,
+ u1_cu_size,
+ u1_tu_size,
+ ps_root->s_luma_data.u1_posx,
+ ps_root->s_luma_data.u1_posy);
+
+ if(u1_tu_size == u1_min_tu_size)
+ {
+ ASSERT(!u1_is_split);
+ }
+
+ if(u1_is_split)
+ {
+ ps_root->u1_is_valid_node = 0;
+
+ ihevce_tuSplitArray_to_tuTree_mapper(
+ ps_root->ps_child_node_tl,
+ ai4_tuSplitArray,
+ u1_cu_size,
+ ps_root->ps_child_node_tl->s_luma_data.u1_size,
+ u1_min_tu_size,
+ u1_max_tu_size,
+ u1_is_skip);
+
+ ihevce_tuSplitArray_to_tuTree_mapper(
+ ps_root->ps_child_node_tr,
+ ai4_tuSplitArray,
+ u1_cu_size,
+ ps_root->ps_child_node_tr->s_luma_data.u1_size,
+ u1_min_tu_size,
+ u1_max_tu_size,
+ u1_is_skip);
+
+ ihevce_tuSplitArray_to_tuTree_mapper(
+ ps_root->ps_child_node_bl,
+ ai4_tuSplitArray,
+ u1_cu_size,
+ ps_root->ps_child_node_bl->s_luma_data.u1_size,
+ u1_min_tu_size,
+ u1_max_tu_size,
+ u1_is_skip);
+
+ ihevce_tuSplitArray_to_tuTree_mapper(
+ ps_root->ps_child_node_br,
+ ai4_tuSplitArray,
+ u1_cu_size,
+ ps_root->ps_child_node_br->s_luma_data.u1_size,
+ u1_min_tu_size,
+ u1_max_tu_size,
+ u1_is_skip);
+ }
+ else
+ {
+ ps_root->ps_child_node_tl = NULL;
+ ps_root->ps_child_node_tr = NULL;
+ ps_root->ps_child_node_bl = NULL;
+ ps_root->ps_child_node_br = NULL;
+ }
+}
diff --git a/encoder/ihevce_tu_tree_selector.h b/encoder/ihevce_tu_tree_selector.h
new file mode 100644
index 0000000..5dd21a9
--- /dev/null
+++ b/encoder/ihevce_tu_tree_selector.h
@@ -0,0 +1,148 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file ihevce_tu_tree_selector.h
+*
+* \brief
+* This file contains definitions and declarations used for TU tree selection
+*
+* \date
+* 20/04/2016
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _TU_TREE_SELECTOR_
+#define _TU_TREE_SELECTOR_
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+typedef struct
+{
+ void *pv_src;
+
+ void *pv_pred;
+
+ void *pv_recon;
+
+ WORD32 i4_src_stride;
+
+ WORD32 i4_pred_stride;
+
+ WORD32 i4_recon_stride;
+
+} src_pred_rec_buf_t;
+
+typedef struct
+{
+ src_pred_rec_buf_t s_src_pred_rec_buf_luma;
+
+ src_pred_rec_buf_t s_src_pred_rec_buf_chroma;
+
+ nbr_4x4_t *ps_nbr_data_buf;
+
+ WORD16 *pi2_deq_data;
+
+ WORD16 *pi2_deq_data_chroma;
+
+ UWORD8 **ppu1_ecd;
+
+ WORD32 i4_nbr_data_buf_stride;
+
+ WORD32 i4_deq_data_stride;
+
+ WORD32 i4_deq_data_stride_chroma;
+
+} buffer_data_for_tu_t;
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+extern WORD32 ihevce_tu_tree_coverage_in_cu(tu_tree_node_t *ps_node);
+
+extern UWORD16 ihevce_tu_tree_init(
+ tu_tree_node_t *ps_root,
+ UWORD8 u1_cu_size,
+ UWORD8 u1_min_tree_depth,
+ UWORD8 u1_max_tree_depth,
+ UWORD8 u1_chroma_processing_enabled,
+ UWORD8 u1_is_422);
+#if !ENABLE_TOP_DOWN_TU_RECURSION
+extern LWORD64 ihevce_tu_tree_selector(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ tu_tree_node_t *ps_node,
+ buffer_data_for_tu_t *ps_buffer_data,
+ UWORD8 *pu1_cabac_ctxt,
+ WORD32 i4_pred_mode,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ WORD32 i4_alpha_stim_multiplier,
+ UWORD8 u1_is_cu_noisy,
+#endif
+ UWORD8 u1_cur_depth,
+ UWORD8 u1_max_depth,
+ UWORD8 u1_part_type,
+ UWORD8 u1_compute_spatial_ssd);
+#endif
+extern LWORD64 ihevce_topDown_tu_tree_selector(
+ ihevce_enc_loop_ctxt_t *ps_ctxt,
+ tu_tree_node_t *ps_node,
+ buffer_data_for_tu_t *ps_buffer_data,
+ UWORD8 *pu1_cabac_ctxt,
+ WORD32 i4_pred_mode,
+#if USE_NOISE_TERM_IN_ZERO_CODING_DECISION_ALGORITHMS
+ WORD32 i4_alpha_stim_multiplier,
+ UWORD8 u1_is_cu_noisy,
+#endif
+ UWORD8 u1_cur_depth,
+ UWORD8 u1_max_depth,
+ UWORD8 u1_part_type,
+ UWORD8 u1_chroma_processing_enabled,
+ UWORD8 u1_compute_spatial_ssd);
+
+extern void ihevce_tu_selector_debriefer(
+ tu_tree_node_t *ps_node,
+ enc_loop_cu_final_prms_t *ps_final_prms,
+ LWORD64 *pi8_total_cost,
+ LWORD64 *pi8_total_non_coded_cost,
+ WORD32 *pi4_num_bytes_used_for_ecd,
+ WORD32 *pi4_num_bits_used_for_encoding,
+ UWORD16 *pu2_tu_ctr,
+ WORD32 i4_cu_qp,
+ UWORD8 u1_cu_posx,
+ UWORD8 u1_cu_posy,
+ UWORD8 u1_chroma_processing_enabled,
+ UWORD8 u1_is_422,
+ TU_POS_T e_tu_pos);
+
+extern void ihevce_tuSplitArray_to_tuTree_mapper(
+ tu_tree_node_t *ps_root,
+ WORD32 ai4_tuSplitArray[4],
+ UWORD8 u1_cu_size,
+ UWORD8 u1_tu_size,
+ UWORD8 u1_min_tu_size,
+ UWORD8 u1_max_tu_size,
+ UWORD8 u1_is_skip);
+
+#endif
diff --git a/encoder/init_qp.c b/encoder/init_qp.c
new file mode 100644
index 0000000..993f074
--- /dev/null
+++ b/encoder/init_qp.c
@@ -0,0 +1,198 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file init_qp.c
+*
+* \brief
+* This file contain qp initialization functions
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* User include files */
+#include "ittiam_datatypes.h"
+#include "rc_cntrl_param.h"
+#include "var_q_operator.h"
+#include "mem_req_and_acq.h"
+#include "rc_common.h"
+#include "init_qp.h"
+
+typedef struct init_qp_t
+{
+ /* WORD32 ai4_bpp_for_qp[MAX_MPEG2_QP]; */
+ WORD32 i4_max_qp;
+ WORD32 i4_num_pels_in_frame;
+ WORD32 i4_is_hbr;
+} init_qp_t;
+
+#define BPP_Q_FACTOR (16)
+#define QP_FOR_ONE_BPP (3) /*(10)*/
+
+#if NON_STEADSTATE_CODE
+WORD32 init_qp_num_fill_use_free_memtab(
+ init_qp_handle *pps_init_qp, itt_memtab_t *ps_memtab, ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static init_qp_t s_init_qp;
+
+ /* Hack for al alloc, during which we dont have any state memory.
+ Dereferencing can cause issues */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_init_qp) = &s_init_qp;
+
+ /*for src rate control state structure*/
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(
+ &ps_memtab[i4_mem_tab_idx], sizeof(init_qp_t), MEM_TAB_ALIGNMENT, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void **)pps_init_qp, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ return (i4_mem_tab_idx);
+}
+
+/****************************************************************************
+Function Name : init_init_qp
+Description :
+Inputs : ps_init_qp
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+void init_init_qp(
+ init_qp_handle ps_init_qp, WORD32 *pi4_min_max_qp, WORD32 i4_num_pels_in_frame, WORD32 i4_is_hbr)
+{
+ WORD32 i4_max_qp;
+ /* Finding the max qp among I P and B frame */
+ i4_max_qp = pi4_min_max_qp[1];
+ if(i4_max_qp < pi4_min_max_qp[3])
+ i4_max_qp = pi4_min_max_qp[3];
+ if(i4_max_qp < pi4_min_max_qp[5])
+ i4_max_qp = pi4_min_max_qp[5];
+
+ /*for(i=0;i<i4_max_qp;i++)
+ {
+ ps_init_qp->ai4_bpp_for_qp[i] = (QP_FOR_ONE_BPP*(1<<BPP_Q_FACTOR))/(i+1);
+ }*/
+ ps_init_qp->i4_max_qp = i4_max_qp;
+ ps_init_qp->i4_num_pels_in_frame = (!i4_num_pels_in_frame) ? 1 : i4_num_pels_in_frame;
+ ps_init_qp->i4_is_hbr = i4_is_hbr;
+}
+#endif /* #if NON_STEADSTATE_CODE */
+
+/* To ensure init_qp for high bit rates is low */
+#define QP_FOR_ONE_BPP_HBR (5)
+
+/****************************************************************************
+Function Name : get_init_qp_using_pels_bits_per_frame
+Description :
+Inputs : ps_init_qp
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+/* If the remaining pels in frame is zero we would be using the init time pixels for calculating the bits per pixel */
+WORD32 get_init_qp_using_pels_bits_per_frame(
+ init_qp_handle ps_init_qp,
+ picture_type_e e_pic_type,
+ WORD32 i4_bits_remaining_in_frame,
+ WORD32 i4_rem_pels_in_frame)
+{
+ WORD32 i4_qp;
+ WORD32 i4_qp_for_one_bpp;
+
+ if(ps_init_qp->i4_is_hbr)
+ {
+ i4_qp_for_one_bpp = QP_FOR_ONE_BPP_HBR;
+ }
+ else
+ {
+ i4_qp_for_one_bpp = QP_FOR_ONE_BPP;
+ }
+
+ if(!i4_rem_pels_in_frame)
+ i4_rem_pels_in_frame = ps_init_qp->i4_num_pels_in_frame;
+ if(e_pic_type == P_PIC || e_pic_type == P1_PIC)
+ i4_bits_remaining_in_frame = i4_bits_remaining_in_frame * I_TO_P_BIT_RATIO;
+ if(e_pic_type >= B_PIC && e_pic_type != P1_PIC)
+ i4_bits_remaining_in_frame =
+ i4_bits_remaining_in_frame * (I_TO_P_BIT_RATIO * P_TO_B_BIT_RATIO);
+
+ /* Assuming a 1 bpp => Qp = 12, So Qp = 1 => 12 bpp. [bpp halves with every doubling of Qp] */
+ /* x bpp = i4_bits_remaining_in_frame/i4_rem_pels_in_frame
+ 1 bpp = QP_FOR_ONE_BPP
+ QP_FOR_X_BPP = QP_FOR_ONE_BPP/(x) = QP_FOR_ONE_BPP*i4_rem_pels_in_frame/i4_bits_remaining_in_frame */
+ X_PROD_Y_DIV_Z(i4_qp_for_one_bpp, i4_rem_pels_in_frame, i4_bits_remaining_in_frame, i4_qp);
+
+ /* Scaling the Qp values based on picture type */
+ if(e_pic_type == P_PIC || e_pic_type == P1_PIC)
+ i4_qp = ((i4_qp * I_TO_P_RATIO) >> K_Q);
+
+ if(e_pic_type >= B_PIC && e_pic_type != P1_PIC)
+ {
+ if(!ps_init_qp->i4_is_hbr)
+ {
+ i4_qp = ((i4_qp * P_TO_B_RATIO * I_TO_P_RATIO) >> (K_Q + K_Q));
+ }
+ else
+ {
+ i4_qp = ((i4_qp * P_TO_B_RATIO_HBR * I_TO_P_RATIO) >> (K_Q + K_Q));
+ }
+ }
+
+ if(i4_qp > ps_init_qp->i4_max_qp)
+ i4_qp = ps_init_qp->i4_max_qp;
+ else if(i4_qp == 0)
+ i4_qp = 1;
+
+ return i4_qp;
+}
+
+#if NON_STEADSTATE_CODE
+/****************************************************************************
+Function Name : change_init_qp_max_qp
+Description :
+Inputs : ps_init_qp
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+void change_init_qp_max_qp(init_qp_handle ps_init_qp, WORD32 *pi4_min_max_qp)
+{
+ WORD32 i4_max_qp;
+ /* Finding the max qp among I P and B frame */
+ i4_max_qp = pi4_min_max_qp[1];
+ if(i4_max_qp < pi4_min_max_qp[3])
+ i4_max_qp = pi4_min_max_qp[3];
+ if(i4_max_qp < pi4_min_max_qp[5])
+ i4_max_qp = pi4_min_max_qp[5];
+
+ ps_init_qp->i4_max_qp = i4_max_qp;
+}
+#endif /* #if NON_STEADSTATE_CODE */
diff --git a/encoder/init_qp.h b/encoder/init_qp.h
new file mode 100644
index 0000000..32f9dca
--- /dev/null
+++ b/encoder/init_qp.h
@@ -0,0 +1,64 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file init_qp.h
+*
+* \brief
+* This file contains all the necessary declarations for
+* qp initialization functions
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+#ifndef _INIT_QP_H_
+#define _INIT_QP_H_
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+typedef struct init_qp_t *init_qp_handle;
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+
+WORD32 init_qp_num_fill_use_free_memtab(
+ init_qp_handle *pps_init_qp, itt_memtab_t *ps_memtab, ITT_FUNC_TYPE_E e_func_type);
+
+void init_init_qp(
+ init_qp_handle ps_init_qp,
+ WORD32 *pi4_min_max_qp,
+ WORD32 i4_num_pels_in_frame,
+ WORD32 i4_is_hbr);
+
+/* If the remaining pels in frame is zero we would be using the init time pixels for calculating the bits per pixel */
+WORD32 get_init_qp_using_pels_bits_per_frame(
+ init_qp_handle ps_init_qp,
+ picture_type_e e_pic_type,
+ WORD32 i4_bits_remaining_in_frame,
+ WORD32 i4_rem_pels_in_frame);
+
+void change_init_qp_max_qp(init_qp_handle ps_init_qp, WORD32 *pi4_min_max_qp);
+#endif /* _INIT_QP_H_ */
diff --git a/encoder/itt_video_api.h b/encoder/itt_video_api.h
new file mode 100644
index 0000000..b475915
--- /dev/null
+++ b/encoder/itt_video_api.h
@@ -0,0 +1,466 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file itt_video_api.h
+*
+* \brief
+* This file contains the necessary structure and enumeration definitions
+* needed for the Application Program Interface(API)
+*
+* \date
+* 18 09 2010
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+#ifndef _ITT_VIDEO_API_H_
+#define _ITT_VIDEO_API_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+/** @brief IV_API_CALL_STATUS_T: This is only to return the FAIL/PASS status to the
+ * application for the current API call
+ */
+
+typedef enum
+{
+ IV_FAIL = 0xFFFFFFFF,
+ IV_SUCCESS = 0
+} IV_API_CALL_STATUS_T;
+
+typedef enum
+{
+ ARCH_NA = 0x7FFFFFFF,
+ ARCH_ARM_NONEON = 0x0,
+ ARCH_ARM_V8_NEON,
+ ARCH_ARM_A9Q,
+ ARCH_ARM_A7,
+ ARCH_ARM_A5,
+ ARCH_ARM_NEONINTR,
+ ARCH_X86_GENERIC,
+ ARCH_X86_SSSE3,
+ ARCH_X86_SSE4,
+ ARCH_X86_AVX,
+ ARCH_X86_AVX2
+} IV_ARCH_T;
+
+/** @brief IV_MEM_TYPE_T: This Enumeration defines the type of memory (Internal/Ext
+ * -ernal) along with the cacheable/non-cacheable attributes
+ * Additional memtypes added ( Normal, Numa_Node0, Numa_node1)
+ */
+
+typedef enum
+{
+ IV_NA_MEM_TYPE = 0xFFFFFFFF,
+ IV_INTERNAL_CACHEABLE_PERSISTENT_MEM = 0x1,
+ IV_INTERNAL_CACHEABLE_SCRATCH_MEM = 0x2,
+ IV_EXTERNAL_CACHEABLE_PERSISTENT_MEM = 0x3,
+ IV_EXTERNAL_CACHEABLE_SCRATCH_MEM = 0x4,
+ IV_INTERNAL_NONCACHEABLE_PERSISTENT_MEM = 0x5,
+ IV_INTERNAL_NONCACHEABLE_SCRATCH_MEM = 0x6,
+ IV_EXTERNAL_NONCACHEABLE_PERSISTENT_MEM = 0x7,
+ IV_EXTERNAL_NONCACHEABLE_SCRATCH_MEM = 0x8,
+
+ IV_EXT_CACHEABLE_NORMAL_MEM = 0x9,
+ IV_EXT_CACHEABLE_NUMA_NODE0_MEM = 0xA,
+ IV_EXT_CACHEABLE_NUMA_NODE1_MEM = 0xB,
+
+} IV_MEM_TYPE_T;
+
+/** @brief IV_COLOR_FORMAT_T: This enumeration lists all the color formats which
+ * finds usage in video/image codecs
+ */
+
+typedef enum
+{
+ IV_CHROMA_NA = 0xFFFFFFFF,
+ IV_YUV_420P = 0x1,
+ IV_YUV_422P = 0x2,
+ IV_420_UV_INTL = 0x3,
+ IV_YUV_422IBE = 0x4,
+ IV_YUV_422ILE = 0x5,
+ IV_YUV_444P = 0x6,
+ IV_YUV_411P = 0x7,
+ IV_GRAY = 0x8,
+ IV_RGB_565 = 0x9,
+ IV_RGB_24 = 0xa,
+ IV_YUV_420SP_UV = 0xb,
+ IV_YUV_420SP_VU = 0xc,
+ IV_YUV_422SP_UV = 0xd,
+ IV_YUV_422SP_VU = 0xe
+
+} IV_COLOR_FORMAT_T;
+
+/** @brief IV_PICTURE_CODING_TYPE_T: VOP/Frame coding type Enumeration */
+
+typedef enum
+{
+ IV_NA_FRAME = 0xFFFFFFFF,
+ IV_I_FRAME = 0x0,
+ IV_P_FRAME = 0x1,
+ IV_B_FRAME = 0x2,
+ IV_IDR_FRAME = 0x3,
+ IV_II_FRAME = 0x4,
+ IV_IP_FRAME = 0x5,
+ IV_IB_FRAME = 0x6,
+ IV_PI_FRAME = 0x7,
+ IV_PP_FRAME = 0x8,
+ IV_PB_FRAME = 0x9,
+ IV_BI_FRAME = 0xa,
+ IV_BP_FRAME = 0xb,
+ IV_BB_FRAME = 0xc,
+ IV_MBAFF_I_FRAME = 0xd,
+ IV_MBAFF_P_FRAME = 0xe,
+ IV_MBAFF_B_FRAME = 0xf,
+ IV_MBAFF_IDR_FRAME = 0x10,
+ IV_NOT_CODED_FRAME = 0x11,
+ IV_FRAMETYPE_DEFAULT = IV_I_FRAME
+} IV_PICTURE_CODING_TYPE_T;
+
+/* @brief IV_FLD_TYPE_T: field type Enumeration */
+
+typedef enum
+{
+ IV_NA_FLD = 0xFFFFFFFF,
+ IV_TOP_FLD = 0x0,
+ IV_BOT_FLD = 0x1,
+ IV_FLD_TYPE_DEFAULT = IV_TOP_FLD
+} IV_FLD_TYPE_T;
+
+/* @brief IV_CONTENT_TYPE_T: Video content type */
+
+typedef enum
+{
+ IV_CONTENTTYPE_NA = -1,
+ IV_PROGRESSIVE = 0x0,
+ IV_INTERLACED = 0x1,
+ IV_PROGRESSIVE_FRAME = 0x2,
+ IV_INTERLACED_FRAME = 0x3,
+ IV_INTERLACED_TOPFIELD = 0x4,
+ IV_INTERLACED_BOTTOMFIELD = 0x5,
+ IV_CONTENTTYPE_DEFAULT = IV_PROGRESSIVE,
+} IV_CONTENT_TYPE_T;
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+
+/* @brief iv_mem_rec_t: This structure defines the memory record holder which will
+ * be used by the modules to communicate its memory requirements to the
+ * memory manager through appropriate API functions
+ */
+
+typedef struct
+{
+ /** i4_size of the structure : used for verison tracking */
+ WORD32 i4_size;
+
+ /** Pointer to the memory allocated by the memory manager */
+ void *pv_base;
+
+ /** size of the memory to be allocated */
+ WORD32 i4_mem_size;
+
+ /** Alignment of the memory pointer */
+ WORD32 i4_mem_alignment;
+
+ /** Nature of the memory to be allocated */
+ IV_MEM_TYPE_T e_mem_type;
+
+} iv_mem_rec_t;
+
+/* @brief iv_input_bufs_req_t: This structure contains the parameters
+ * related to input (data and control) buffer requirements of the codec.
+ * Application can call the memory query API to get these requirements
+ */
+
+typedef struct
+{
+ /** i4_size of the structure : used for verison tracking */
+ WORD32 i4_size;
+
+ /** Minimum sets of input buffers required for the codec */
+ WORD32 i4_min_num_yuv_bufs;
+
+ /** YUV format of the input */
+ WORD32 i4_yuv_format;
+
+ /** Minimum Size in bytes of Luma input buffer */
+ WORD32 i4_min_size_y_buf;
+
+ /** Minimum Size in bytes of CB-CR input buffer .
+ * if input format is Semiplanar then size will include
+ * both Cb and Cr requirements
+ */
+ WORD32 i4_min_size_uv_buf;
+
+ /** Minimum sets of Synchoronus command buffers
+ * required for the codec
+ */
+ WORD32 i4_min_num_synch_ctrl_bufs;
+
+ /** Minimum size of the Synchoronus command buffer */
+ WORD32 i4_min_size_synch_ctrl_bufs;
+
+ /** Minimum sets of Asynchoronus command buffers
+ * required for the codec
+ */
+ WORD32 i4_min_num_asynch_ctrl_bufs;
+
+ /** Minimum size of the Asynchoronus command buffer */
+ WORD32 i4_min_size_asynch_ctrl_bufs;
+
+} iv_input_bufs_req_t;
+
+/* @brief iv_output_bufs_req_t: This structure contains the parameters
+ * related to output (data and control) buffer requirements for a
+ * given target resolution of the codec
+ */
+
+typedef struct
+{
+ /** i4_size of the structure : used for verison tracking */
+ WORD32 i4_size;
+
+ /** Minimum sets of output buffers required for the codec */
+ WORD32 i4_min_num_out_bufs;
+
+ /** Minimum Size in bytes of bitstream buffer */
+ WORD32 i4_min_size_bitstream_buf;
+
+} iv_output_bufs_req_t;
+
+/* @brief iv_recon_bufs_req_t: This structure contains the parameters
+ * related to recon buffer requirements for a
+ * given target resolution of the codec
+ */
+
+typedef struct
+{
+ /** i4_size of the structure : used for verison tracking */
+ WORD32 i4_size;
+
+ /** Minimum sets of recon buffers required for the codec */
+ WORD32 i4_min_num_recon_bufs;
+
+ /** Minimum Size in bytes of Luma input buffer */
+ WORD32 i4_min_size_y_buf;
+
+ /** Minimum Size in bytes of CB-CR input buffer .
+ * if input format is Semiplanar then size will include
+ * both Cb and Cr requirements
+ */
+ WORD32 i4_min_size_uv_buf;
+
+} iv_recon_bufs_req_t;
+
+/* @brief iv_input_data_ctrl_buffs_desc_t: This structure contains the parameters
+ * related to input (data and sync control) buffers
+ * application should allocate these buffers and pass to the codec
+ */
+
+typedef struct
+{
+ /** i4_size of the structure : used for verison tracking */
+ WORD32 i4_size;
+
+ /** Number of sets of input buffers allocated by application */
+ WORD32 i4_num_yuv_bufs;
+
+ /** Size in bytes of each Luma input buffers passed */
+ WORD32 i4_size_y_buf;
+
+ /** Pointer to array of input Luma buffer pointers */
+ void **ppv_y_buf;
+
+ /** Size in bytes of each CB-CR input buffer passed.
+ * if input format is Semiplanar then size should include
+ * both Cb and Cr requirements
+ */
+ WORD32 i4_size_uv_buf;
+
+ /** Pointer to array of input Chroma Cb buffer pointers */
+ void **ppv_u_buf;
+
+ /** Pointer to array of input Chroma Cr buffer pointers
+ * Applicalbe if input format is planar
+ */
+ void **ppv_v_buf;
+
+ /** Number of sets of sync control buffers allocated by application */
+ WORD32 i4_num_synch_ctrl_bufs;
+
+ /** Size of the each Synchoronus command buffer passed*/
+ WORD32 i4_size_synch_ctrl_bufs;
+
+ /** Pointer to array of input sync command buffer pointers */
+ void **ppv_synch_ctrl_bufs;
+
+} iv_input_data_ctrl_buffs_desc_t;
+
+/* @brief iv_input_asynch_ctrl_buffs_desc_t: This structure contains the parameters
+ * related to input async control buffers
+ * application should allocate these buffers and pass to the codec
+ */
+
+typedef struct
+{
+ /** i4_size of the structure : used for verison tracking */
+ WORD32 i4_size;
+
+ /** Number of sets of async control buffers allocated by application */
+ WORD32 i4_num_asynch_ctrl_bufs;
+
+ /** Size of each Asynchoronus command buffer */
+ WORD32 i4_size_asynch_ctrl_bufs;
+
+ /** Pointer to array of async command buffer pointers */
+ void **ppv_asynch_ctrl_bufs;
+
+} iv_input_asynch_ctrl_buffs_desc_t;
+
+/* @brief iv_output_data_buffs_desc_t: This structure contains the parameters
+ * related to output data buffers for a given resolution layer
+ * application should allocate these buffers and pass to the codec
+ */
+
+typedef struct
+{
+ /** i4_size of the structure : used for verison tracking */
+ WORD32 i4_size;
+
+ /** Number of sets of output buffers allocated by application */
+ WORD32 i4_num_bitstream_bufs;
+
+ /** Size in bytes of each bitstream buffer passed */
+ WORD32 i4_size_bitstream_buf;
+
+ /** Pointer to array of output buffer pointers */
+ void **ppv_bitstream_bufs;
+
+} iv_output_data_buffs_desc_t;
+
+/* @brief iv_output_status_buffs_desc_t: This structure contains the parameters
+ * related to output control acknowledgement buffers
+ * application should allocate these buffers and pass to the codec
+ */
+
+typedef struct
+{
+ /** i4_size of the structure : used for verison tracking */
+ WORD32 i4_size;
+
+ /** Number of sets of async control ack buffers allocated by application */
+ WORD32 i4_num_asynch_status_bufs;
+
+ /** Size of each Asynchoronus command acknowledge buffer passed */
+ WORD32 i4_size_asynch_status_bufs;
+
+ /** Pointer to array of async command ack buffer pointers */
+ void **ppv_asynch_status_bufs;
+
+} iv_output_status_buffs_desc_t;
+
+/* @brief iv_recon_data_buffs_desc_t: This structure contains the parameters
+ * related to recon data buffers
+ * application should allocate these buffers and pass to the codec
+ */
+
+typedef struct
+{
+ /** i4_size of the structure : used for verison tracking */
+ WORD32 i4_size;
+
+ /** Number of sets of recon buffers allocated by application */
+ WORD32 i4_num_recon_bufs;
+
+ /** Size in bytes of each Luma recon buffers passed */
+ WORD32 i4_size_y_buf;
+
+ /** Pointer to array of recon Luma buffer pointers */
+ void **ppv_y_buf;
+
+ /** Size in bytes of each CB-CR recon buffer passed.
+ * if input format is Semiplanar then size should include
+ * both Cb and Cr requirements
+ */
+ WORD32 i4_size_uv_buf;
+
+ /** Pointer to array of recon Chroma Cb buffer pointers */
+ void **ppv_u_buf;
+
+ /** Pointer to array of recon Chroma Cr buffer pointers
+ * Applicalbe if input format is planar
+ */
+ void **ppv_v_buf;
+
+} iv_recon_data_buffs_desc_t;
+
+/* @brief IV_YUV_BUF_T: This structure defines attributes
+ * for the input yuv buffer
+ */
+typedef struct
+{
+ /** i4_size of the structure */
+ WORD32 i4_size;
+
+ /** Pointer to Luma (Y) Buffer */
+ void *pv_y_buf;
+
+ /** Pointer to Chroma (Cb) Buffer */
+ void *pv_u_buf;
+
+ /** Pointer to Chroma (Cr) Buffer */
+ void *pv_v_buf;
+
+ /** Width of the Luma (Y) Buffer in pixels */
+ WORD32 i4_y_wd;
+
+ /** Height of the Luma (Y) Buffer in pixels */
+ WORD32 i4_y_ht;
+
+ /** Stride/Pitch of the Luma (Y) Buffer */
+ WORD32 i4_y_strd;
+
+ /** Width of the Chroma (Cb / Cr) Buffer in pixels */
+ WORD32 i4_uv_wd;
+
+ /** Height of the Chroma (Cb / Cr) Buffer in pixels */
+ WORD32 i4_uv_ht;
+
+ /** Stride/Pitch of the Chroma (Cb / Cr) Buffer */
+ WORD32 i4_uv_strd;
+
+} iv_yuv_buf_t;
+
+#endif /* _ITT_VIDEO_API_H_ */
diff --git a/encoder/ittiam_datatypes.h b/encoder/ittiam_datatypes.h
new file mode 100644
index 0000000..193c8a5
--- /dev/null
+++ b/encoder/ittiam_datatypes.h
@@ -0,0 +1,69 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : ittiam_datatypes.h */
+/* */
+/* Description : This file has the definitions of the data types used */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 06 04 2006 Malavika Draft */
+/* */
+/*****************************************************************************/
+
+#ifndef _ITTIAM_DATATYPES_H_
+#define _ITTIAM_DATATYPES_H_
+
+/*****************************************************************************/
+/* Unsigned data types */
+/*****************************************************************************/
+typedef unsigned char UWORD8;
+typedef unsigned short UWORD16;
+typedef unsigned int UWORD32;
+typedef unsigned long ULWORD64;
+
+typedef unsigned char UWord8;
+typedef unsigned short UWord16;
+typedef unsigned int UWord32;
+
+/*****************************************************************************/
+/* Signed data types */
+/*****************************************************************************/
+typedef signed char WORD8;
+typedef short WORD16;
+typedef int WORD32;
+typedef long long LWORD64;
+
+typedef signed char Word8;
+typedef short Word16;
+typedef int Word32;
+
+/*****************************************************************************/
+/* Miscellaneous data types */
+/*****************************************************************************/
+typedef char BOOL;
+typedef char *pSTRING;
+
+#endif /*_ITTIAM_DATATYPES_H_*/
diff --git a/encoder/mb_model_based.c b/encoder/mb_model_based.c
new file mode 100644
index 0000000..1f339f9
--- /dev/null
+++ b/encoder/mb_model_based.c
@@ -0,0 +1,183 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file mb_model_based.c
+*
+* \brief
+* This file contain mb level API functions
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* User include files */
+#include "ittiam_datatypes.h"
+#include "rc_common.h"
+#include "rc_cntrl_param.h"
+#include "var_q_operator.h"
+#include "mem_req_and_acq.h"
+#include "mb_model_based.h"
+
+typedef struct mb_rate_control_t
+{
+ /* Frame Qp */
+ UWORD8 u1_frm_qp;
+ /* Estimated average activity for the current frame (updated with the previous
+ frame activity since it is independent of picture type whether it is I or P) */
+ WORD32 i4_avg_activity;
+} mb_rate_control_t;
+
+WORD32 mbrc_num_fill_use_free_memtab(
+ mb_rate_control_t **pps_mb_rate_control, itt_memtab_t *ps_memtab, ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static mb_rate_control_t s_mb_rate_control_temp;
+
+ /* Hack for al alloc, during which we dont have any state memory.
+ Dereferencing can cause issues */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_mb_rate_control) = &s_mb_rate_control_temp;
+
+ /*for src rate control state structure*/
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(
+ &ps_memtab[i4_mem_tab_idx],
+ sizeof(mb_rate_control_t),
+ MEM_TAB_ALIGNMENT,
+ PERSISTENT,
+ DDR);
+ use_or_fill_base(&ps_memtab[0], (void **)pps_mb_rate_control, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ return (i4_mem_tab_idx);
+}
+
+/********************************************************************************
+ MB LEVEL API FUNCTIONS
+********************************************************************************/
+/******************************************************************************
+ Function Name : init_mb_level_rc
+ Description : Initialise the mb model and the average activity to default values
+ Arguments :
+ Return Values : void
+ Revision History:
+ 13 03 2008 KJN Creation
+*********************************************************************************/
+void init_mb_level_rc(mb_rate_control_t *ps_mb_rate_control)
+{
+ /* Set values to default */
+ ps_mb_rate_control->i4_avg_activity = 0;
+}
+/******************************************************************************
+ Function Name : mb_init_frame_level
+ Description : Initialise the mb state with frame level decisions
+ Arguments : u1_frame_qp - Frame level qp
+ Return Values : void
+ Revision History:
+ 13 03 2008 KJN Creation
+*********************************************************************************/
+void mb_init_frame_level(mb_rate_control_t *ps_mb_rate_control, UWORD8 u1_frame_qp)
+{
+ /* Update frame level QP */
+ ps_mb_rate_control->u1_frm_qp = u1_frame_qp;
+}
+/******************************************************************************
+ Function Name : reset_mb_activity
+ Description : Reset the mb activity - Whenever there is SCD
+ the mb activity is reset
+ Arguments :
+ Return Values : void
+ Revision History:
+ 13 03 2008 KJN Creation
+*********************************************************************************/
+void reset_mb_activity(mb_rate_control_t *ps_mb_rate_control)
+{
+ ps_mb_rate_control->i4_avg_activity = 0;
+}
+
+/******************************************************************************
+ Function Name : get_mb_qp
+ Description : Calculates the mb level qp
+ Arguments : i4_cur_mb_activity - current frame mb activity
+ pi4_mb_qp - Array of 2 values for before and after mb activity
+ modulation
+ Return Values : void
+
+ Revision History:
+ 13 03 2008 KJN Creation
+*********************************************************************************/
+void get_mb_qp(mb_rate_control_t *ps_mb_rate_control, WORD32 i4_cur_mb_activity, WORD32 *pi4_mb_qp)
+{
+ WORD32 i4_qp;
+ /* Initialise the mb level qp with the frame level qp */
+ i4_qp = ps_mb_rate_control->u1_frm_qp;
+
+ /* Store the model based QP - This is used for updating the rate control model */
+ pi4_mb_qp[0] = i4_qp;
+
+ /* Modulate the Qp based on the activity */
+ if((ps_mb_rate_control->i4_avg_activity) && (i4_qp < 100))
+ {
+ i4_qp = ((((2 * i4_cur_mb_activity)) + ps_mb_rate_control->i4_avg_activity) * i4_qp +
+ ((i4_cur_mb_activity + 2 * ps_mb_rate_control->i4_avg_activity) >> 1)) /
+ (i4_cur_mb_activity + 2 * ps_mb_rate_control->i4_avg_activity);
+
+ if(i4_qp > ((3 * ps_mb_rate_control->u1_frm_qp) >> 1))
+ i4_qp = ((3 * ps_mb_rate_control->u1_frm_qp) >> 1);
+ }
+
+ /* Store the qp modulated by mb activity - This is used for encoding the MB */
+ pi4_mb_qp[1] = i4_qp;
+}
+/******************************************************************************
+ Function Name : get_frm_level_qp
+ Description : Returns the stored frame level QP
+ Arguments :
+ Revision History:
+ 13 03 2008 KJN Creation
+*********************************************************************************/
+UWORD8 get_frm_level_qp(mb_rate_control_t *ps_mb_rate_control)
+{
+ return (ps_mb_rate_control->u1_frm_qp);
+}
+/******************************************************************************
+ Function Name : mb_update_frame_level
+ Description : Update the frame level info collected
+ Arguments : i4_avg_activity - Average activity fot frame
+ Return Values :
+ Revision History:
+ 13 03 2008 KJN Creation
+*********************************************************************************/
+void mb_update_frame_level(mb_rate_control_t *ps_mb_rate_control, WORD32 i4_avg_activity)
+{
+ /*****************************************************************************
+ Update the Average Activity
+ *****************************************************************************/
+ ps_mb_rate_control->i4_avg_activity = i4_avg_activity;
+}
diff --git a/encoder/mb_model_based.h b/encoder/mb_model_based.h
new file mode 100644
index 0000000..ed88f70
--- /dev/null
+++ b/encoder/mb_model_based.h
@@ -0,0 +1,67 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file mb_model_based.h
+*
+* \brief
+* This file contains all the necessary declarations for
+* mb level API functions
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+#ifndef _MB_MODEL_BASED_H_
+#define _MB_MODEL_BASED_H_
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+typedef struct mb_rate_control_t *mb_rate_control_handle;
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+WORD32 mbrc_num_fill_use_free_memtab(
+ mb_rate_control_handle *pps_mb_rate_control,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+/* Initialising the state structure */
+void init_mb_level_rc(mb_rate_control_handle ps_mb_rate_control);
+/* MB parameters that are to be initalised at a frame level */
+void mb_init_frame_level(mb_rate_control_handle ps_mb_rate_control, UWORD8 u1_frame_qp);
+/* MB Level call to get the mb_level QP */
+void get_mb_qp(
+ mb_rate_control_handle ps_mb_rate_control, WORD32 i4_cur_mb_activity, WORD32 *pi4_mb_qp);
+/* MB Parameters that are to be updated at a frame level */
+void mb_update_frame_level(mb_rate_control_handle ps_mb_rate_control, WORD32 i4_avg_activity);
+/****************************************************************************
+ CONTROL FUCNTIONS FROM FRAME LEVEL
+****************************************************************************/
+/* Returns the stored frame level QP */
+UWORD8 get_frm_level_qp(mb_rate_control_handle ps_mb_rate_control);
+/* Disables activity based qp modulation */
+void reset_mb_activity(mb_rate_control_handle ps_mb_rate_control);
+
+#endif
diff --git a/encoder/mem_req_and_acq.h b/encoder/mem_req_and_acq.h
new file mode 100644
index 0000000..72aed0c
--- /dev/null
+++ b/encoder/mem_req_and_acq.h
@@ -0,0 +1,122 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/**
+ * @file mem_req_and_acq.h
+ * @brief Interface for mem request, acquiring and freeing
+ * @author K. J. Nitthilan
+ * @version 1.0
+ * @date 2009-04-21
+ */
+#ifndef _MEM_REQ_AND_ACQ_H_
+#define _MEM_REQ_AND_ACQ_H_
+
+typedef enum
+{
+ ALIGN_BYTE = 1,
+ ALIGN_WORD16 = 2,
+ ALIGN_WORD32 = 4,
+ ALIGN_WORD64 = 8,
+ ALIGN_128_BYTE = 128
+} ITT_MEM_ALIGNMENT_TYPE_E;
+
+typedef enum
+{
+ SCRATCH = 0,
+ PERSISTENT = 1,
+ WRITEONCE = 2
+} ITT_MEM_USAGE_TYPE_E;
+
+typedef enum
+{
+ L1D = 0,
+ SL2 = 1,
+ DDR = 3
+} ITT_MEM_REGION_E;
+
+typedef enum
+{
+ GET_NUM_MEMTAB = 0,
+ FILL_MEMTAB = 1,
+ USE_BASE = 2,
+ FILL_BASE = 3
+} ITT_FUNC_TYPE_E;
+
+/*NOTE : This should be an exact replica of IALG_MemRec, any change in IALG_MemRec
+ must be replected here*/
+typedef struct
+{
+ UWORD32 u4_size; /* Size in bytes */
+ WORD32 i4_alignment; /* Alignment in bytes */
+ ITT_MEM_REGION_E
+ e_mem_region; /* decides which memory region to be placed */
+ ITT_MEM_USAGE_TYPE_E e_usage; /* memory is scratch or persistent */
+ void *pv_base; /* Base pointer for allocated memory */
+} itt_memtab_t;
+
+static __inline void fill_memtab(
+ itt_memtab_t *ps_mem_tab,
+ WORD32 u4_size,
+ WORD32 i4_alignment,
+ ITT_MEM_USAGE_TYPE_E e_usage,
+ ITT_MEM_REGION_E e_mem_region)
+{
+ /* Make the size next multiple of alignment */
+ WORD32 i4_aligned_size = (((u4_size) + (i4_alignment - 1)) & (~(i4_alignment - 1)));
+
+ /* Fill the memtab */
+ ps_mem_tab->u4_size = i4_aligned_size;
+ ps_mem_tab->i4_alignment = i4_alignment;
+ ps_mem_tab->e_usage = e_usage;
+ ps_mem_tab->e_mem_region = e_mem_region;
+}
+
+static __inline WORD32
+ use_or_fill_base(itt_memtab_t *ps_mem_tab, void **ptr_to_be_filled, ITT_FUNC_TYPE_E e_func_type)
+{
+ /* Fill base for freeing the allocated memory */
+ if(e_func_type == FILL_BASE)
+ {
+ if(ptr_to_be_filled[0] != 0)
+ {
+ ps_mem_tab->pv_base = ptr_to_be_filled[0];
+ return (0);
+ }
+ else
+ {
+ return (-1);
+ }
+ }
+ /* obtain the allocated memory from base pointer */
+ if(e_func_type == USE_BASE)
+ {
+ if(ps_mem_tab->pv_base != 0)
+ {
+ ptr_to_be_filled[0] = ps_mem_tab->pv_base;
+ return (0);
+ }
+ else
+ {
+ return (-1);
+ }
+ }
+ return (0);
+}
+
+#endif /* _MEM_REQ_AND_ACQ_H_*/
diff --git a/encoder/osal.c b/encoder/osal.c
new file mode 100644
index 0000000..92e9a8d
--- /dev/null
+++ b/encoder/osal.c
@@ -0,0 +1,217 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : osal.c */
+/* */
+/* Description : This file contains all the API's of OSAL */
+/* initialization and closure */
+/* */
+/* List of Functions : osal_init */
+/* osal_register_callbacks */
+/* osal_close */
+/* osal_get_version */
+/* osal_print_status_log */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 19 04 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+
+#include <sys/types.h>
+#include <semaphore.h>
+#include <pthread.h>
+
+/* User include files */
+#include "cast_types.h"
+#include "osal.h"
+#include "osal_handle.h"
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+#define OSAL_VERSION "OSAL_v13.1"
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_init */
+/* */
+/* Description : This function creates and initializes the OSAL instance */
+/* */
+/* Inputs : Memory for OSAL handle */
+/* */
+/* Globals : None */
+/* */
+/* Processing : Initializes OSAL handle parameters to default values. */
+/* */
+/* Outputs : Status of OSAL handle initialization */
+/* */
+/* Returns : On SUCCESS - OSAL_SUCCESS */
+/* On FAILURE - OSAL_ERROR */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 19 04 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 osal_init(IN void *osal_handle)
+{
+ osal_t *handle = (osal_t *)osal_handle;
+
+ /* Validate the input */
+ if(0 == osal_handle)
+ return OSAL_ERROR;
+
+ /* Initialize call back functions */
+ handle->alloc = 0;
+ handle->free = 0;
+ handle->mmr_handle = 0;
+
+ return OSAL_SUCCESS;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_register_callbacks */
+/* */
+/* Description : This function registers MMR handle and allocation and */
+/* freeing call back functions. */
+/* */
+/* Inputs : OSAL handle */
+/* OSAL callback attributes */
+/* */
+/* Globals : None */
+/* */
+/* Processing : This function initializes OSAL call back parameters. */
+/* */
+/* Outputs : Status of OSAL callback registration */
+/* */
+/* Returns : On SUCCESS - OSAL_SUCCESS */
+/* On FAILURE - OSAL_ERROR */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 10 05 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 osal_register_callbacks(IN void *osal_handle, IN osal_cb_funcs_t *cb_funcs)
+{
+ osal_t *handle = (osal_t *)osal_handle;
+
+ /* Validate the input */
+ if(0 == handle || 0 == cb_funcs)
+ return OSAL_ERROR;
+
+ if(0 == cb_funcs->osal_alloc || 0 == cb_funcs->osal_free)
+ return OSAL_ERROR;
+
+ /* Initialize call back parameters */
+ handle->mmr_handle = cb_funcs->mmr_handle;
+ handle->alloc = cb_funcs->osal_alloc;
+ handle->free = cb_funcs->osal_free;
+
+ return OSAL_SUCCESS;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_close */
+/* */
+/* Description : This function closes the OSAL instance */
+/* */
+/* Inputs : OSAL handle */
+/* */
+/* Globals : None */
+/* */
+/* Processing : Frees the memory allocated for the OSAL handle */
+/* */
+/* Outputs : Status of OSAL instance close */
+/* */
+/* Returns : On SUCCESS - 0 */
+/* On FALIURE - -1 */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 19 04 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 osal_close(IN void *osal_handle)
+{
+ /* Validate input */
+ if(0 == osal_handle)
+ return OSAL_ERROR;
+
+ return OSAL_SUCCESS;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_get_version */
+/* */
+/* Description : This function gets the version of OSAL library. */
+/* */
+/* Inputs : None */
+/* Globals : None */
+/* */
+/* Processing : Returns a NULL terminated string with has the version of */
+/* library being used. */
+/* */
+/* Outputs : Version of OSAL library. */
+/* */
+/* Returns : Pointer to a NULL terminated string */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 07 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+WORD8 *osal_get_version()
+{
+ return ((WORD8 *)OSAL_VERSION);
+}
diff --git a/encoder/osal.h b/encoder/osal.h
new file mode 100644
index 0000000..dcd3a57
--- /dev/null
+++ b/encoder/osal.h
@@ -0,0 +1,623 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : osal.h */
+/* */
+/* Description : This file contains all the necessary OSAL Constants, */
+/* Enums, Structures and API declarations. */
+/* */
+/* List of Functions : None */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 03 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+#ifndef OSAL_H
+#define OSAL_H
+
+/* C linkage specifiers for C++ declarations. */
+#ifdef __cplusplus
+extern "C"
+{
+#endif /* __cplusplus */
+
+/*****************************************************************************/
+/* Constants */
+/*****************************************************************************/
+
+/* OSAL handle size */
+#define OSAL_HANDLE_SIZE 40
+
+/* Number of select entries */
+#define OSAL_SELECT_MAX 20
+
+/* OSAL Return Status */
+#define OSAL_SUCCESS 0
+#define OSAL_ERROR -1
+#define OSAL_NOT_SUPPORTED -2
+#define OSAL_TIMEOUT -3
+
+/* OSAL thread priority levels. */
+/* OSAL_PRIORITY_1 represents MINIMUM, */
+/* OSAL_PRIORITY_10 represents MAXIMUM, */
+/* OSAL_PRIORITY_DEFAULT represnts DEFAULT SYSTEM PRIROTIY LEVEL */
+#define OSAL_PRIORITY_DEFAULT 0
+#define OSAL_PRIORITY_1 1
+#define OSAL_PRIORITY_2 2
+#define OSAL_PRIORITY_3 3
+#define OSAL_PRIORITY_4 4
+#define OSAL_PRIORITY_5 5
+#define OSAL_PRIORITY_6 6
+#define OSAL_PRIORITY_7 7
+#define OSAL_PRIORITY_8 8
+#define OSAL_PRIORITY_9 9
+#define OSAL_PRIORITY_10 10
+
+/* OSAL socket option levels */
+#define OSAL_SOL_SOCKET 10000
+#define OSAL_IPPROTO_IP 10001
+
+/* OSAL socket options */
+#define OSAL_BROADCAST 1000
+#define OSAL_REUSEADDR 1001
+#define OSAL_KEEPALIVE 1002
+#define OSAL_LINGER 1003
+#define OSAL_OOBINLINE 1004
+#define OSAL_SNDBUF 1005
+#define OSAL_RCVBUF 1006
+#define OSAL_RCVTIMEO 1007
+#define OSAL_SNDTIMEO 1008
+#define OSAL_ADD_MEMBERSHIP 1009
+#define OSAL_DROP_MEMBERSHIP 1010
+#define OSAL_TTL 1011
+#define OSAL_DSCP 1012
+#define OSAL_MULTICAST_TTL 1013
+#define OSAL_ADDSRC_MEMBERSHIP 1014
+#define OSAL_DROPSRC_MEMBERSHIP 1015
+
+ /*****************************************************************************/
+ /* Enums */
+ /*****************************************************************************/
+
+ /* Protocols supported. */
+ typedef enum
+ {
+ OSAL_TCP, /* Address family = AF_INET, Type = SOCK_STREAM, Protocol = 0 */
+ OSAL_UDP /* Address family = AF_INET, Type = SOCK_DGRAM, Protocol = 0 */
+ } OSAL_PROTOCOL_T;
+
+ /* File Descriptor types. Used to specify the type of activity to check on */
+ /* a socket. */
+ typedef enum
+ {
+ OSAL_READ_FD,
+ OSAL_WRITE_FD,
+ OSAL_EXCEPT_FD
+ } OSAL_FD_TYPE_T;
+
+ /* Scheduling policies supported */
+ typedef enum
+ {
+ OSAL_SCHED_RR,
+ OSAL_SCHED_FIFO,
+ OSAL_SCHED_OTHER
+ } OSAL_SCHED_POLICY_TYPE_T;
+
+ /*****************************************************************************/
+ /* Structures */
+ /*****************************************************************************/
+
+ /* Structure to initialize OSAL */
+ typedef struct
+ {
+ /* Handle of memory manager being used. NULL is a valid argument.*/
+ void *mmr_handle;
+
+ /* Call back API to be called during allocation */
+ void *(*osal_alloc)(void *mmr_handle, UWORD32 size);
+
+ /* Call back API for freeing */
+ void (*osal_free)(void *mmr_handle, void *mem);
+ } osal_cb_funcs_t;
+
+ /* The structure (osal_mbox_attr_t) contains the attributes of the thread */
+ /* which are passed to osal_mbox_create() API. The created Mail box has */
+ /* attributes specified using the structure variable. */
+ typedef struct
+ {
+ void *thread_handle; /* Thread to be associated with mail box. */
+ STRWORD8 *name; /* NULL terminated string name for mail box */
+ UWORD32 msg_size; /* Length of each message. */
+ UWORD32 mbx_len; /* Maximum number of messages. */
+ } osal_mbox_attr_t;
+
+ /* The structure (osal_sem_attr_t) contains the attributes of the semaphore */
+ /* which are passed to osal_sem_create() API. The Semaphore attributes like */
+ /* initial value of semaphore. */
+ typedef struct
+ {
+ WORD32 value;
+ } osal_sem_attr_t;
+
+ /* The Structure (osal_thread_attr_t) contains the attributes of the thread */
+ /* which are passed to osal_thread_create() API. The created thread has */
+ /* attributes specified using the structure variable. */
+ typedef struct
+ {
+ /* Function from where thread execution starts */
+ void *thread_func;
+
+ /* Parameters for thread function. */
+ void *thread_param;
+
+ /* Stack size in bytes. For default value, set to '0' */
+ UWORD32 stack_size;
+
+ /* This attribute specifies a pre-allocated block of size 'stack_size' */
+ /* to be used for the task's private stack. For default value, set to */
+ /* 'NULL'. */
+ void *stack_addr;
+
+ /* NULL terminated string name for thread. For default value, set to */
+ /* 'NULL'. */
+ WORD8 *name;
+
+ /* Flag determining whether to use OSAL Thread priority mapping or not. */
+ /* Value set to 1 - use OSAL thread priority mapping. */
+ /* Value set to 0 - Direct value set as thread priority */
+ WORD32 priority_map_flag;
+
+ /* Priority range shall be considered + ve values for increasing */
+ /* priority and negative values for decreasing priority. The range shall */
+ /* be mapped to specific OS range internally through OSAL. For default */
+ /* value, set to '0'. */
+ WORD32 priority;
+
+ /* Exit return value on which thread shall exit */
+ WORD32 exit_code;
+
+ /* Scheduling policy of the thread */
+ OSAL_SCHED_POLICY_TYPE_T sched_policy;
+
+ /* Mask to specify on which cores the thread can run */
+ ULWORD64 core_affinity_mask;
+
+ /* Specifies on which group of processors the thread can run */
+ WORD16 group_num;
+
+ } osal_thread_attr_t;
+
+ /* The structure (osal_socket_attr_t) contains the attributes of the socket */
+ /* which are to be specified during socket creation. */
+ typedef struct
+ {
+ OSAL_PROTOCOL_T protocol;
+ } osal_socket_attr_t;
+
+ /* The structure (osal_sockaddr_t) is used to uniquely determine a socket in */
+ /* the network. The socket can be addressed using IP address and port number.*/
+ typedef struct
+ {
+ WORD8 ip_addr[16];
+ UWORD16 port;
+ } osal_sockaddr_t;
+
+ /* The structure contains the select engine thread parameters like thread */
+ /* name thread priority etc. */
+ typedef struct
+ {
+ /* Flag determining whether to use OSAL Thread priority mapping or not. */
+ /* Value set to 1 - use OSAL thread priority mapping. */
+ /* Value set to 0 - Direct value set as thread priority */
+ WORD32 priority_map_flag;
+
+ /* Priority range shall be considered + ve values for increasing */
+ /* priority and negative values for decreasing priority. The range shall */
+ /* be mapped to specific OS range internally through OSAL. For default */
+ /* value, set to '0'. */
+ WORD32 priority;
+
+ /* NULL terminated string name for thread. For default value, set to */
+ /* 'NULL'. */
+ WORD8 *name;
+
+ /* Timeout for thread sleep in micro seconds */
+ UWORD32 select_timeout;
+
+ /* Timeout for SELECT system called by osal library in micro seconds */
+ UWORD32 select_poll_interval;
+ } osal_select_engine_attr_t;
+
+ /* The structure used to register sockets to select engine. This structure */
+ /* has to be updated for each socket handle and select register has to be */
+ /* done. Currently registration is supported one at a time. */
+ /* Note: Function 'init' is assumed to return the socket handle. */
+ typedef struct osal_select_entry_t
+ {
+ /* Socket handle to be registered. */
+ void *socket_handle;
+
+ /* Activity to select for. */
+ OSAL_FD_TYPE_T type;
+
+ /* Call back called before doing select. The function init is assumed to */
+ /* return the socket handle. In case of NULL being returning by this */
+ /* function, The socket will be unregistered */
+ void *(*init)(void *);
+
+ /* Argument to init function */
+ void *init_param;
+
+ /* Call back function on select success */
+ WORD32 (*call_back)(void *socket_handle, void *call_back_param);
+
+ /* Call back function parameters */
+ void *call_back_param;
+
+ /* Call back called when the socket is unregistered. If set to NULL, */
+ /* this will not be called. The socket that has been registered is the */
+ /* first argument, the second argument will be terminate_param */
+ void (*terminate)(void *, void *);
+
+ /* Argument to terminate callback */
+ void *terminate_param;
+
+ /* Exit code of the call back function. */
+ WORD32 exit_code;
+
+ /* Identifier. Do not initialize this. */
+ WORD32 id;
+ } osal_select_entry_t;
+
+ /* File descriptor structure. Used in osal_socket_select() API call. */
+ /* Currently maximum number of sockets that can be set is fixed to */
+ /* SELECT_MAX */
+ /* Note : To initialize osal_fd_set structure variable, call API */
+ /* osal_socket_fd_zero() for Initialization. If initialization is not */
+ /* done, behaviour of osal_socket_select() and fd_set API's is */
+ /* undefined. */
+ typedef struct
+ {
+ void *array[OSAL_SELECT_MAX]; /* Array for holding the socket descriptors*/
+ WORD32 count; /* Number of socket descriptors in array */
+ } osal_fd_set_t;
+
+ /* Timeout value for osal_socket_select() API. */
+ typedef struct
+ {
+ WORD32 tv_sec; /* Time in seconds. */
+ WORD32 tv_usec; /* Time in micro seconds. */
+ } osal_timeval_t;
+
+ /* Attributes for setting Linger option for socket */
+ typedef struct
+ {
+ UWORD16 l_onoff;
+ UWORD16 l_linger;
+ } osal_sockopt_linger_t;
+
+ /* Attributes for Joining or dropping from a multicast group */
+ typedef struct
+ {
+ WORD8 imr_multiaddr[16];
+ WORD8 imr_interface[16];
+ WORD8 imr_srcaddr[16];
+ } osal_ip_mreq_t;
+
+ /*****************************************************************************/
+ /* Extern OSAL Initialization Function Declarations */
+ /*****************************************************************************/
+
+ /* Allocates memory for the OSAL instance handle. It also allocates memory */
+ /* for storing debug information. */
+ extern WORD32 osal_init(IN void *osal_handle);
+
+ /* Releases all the resources held by the OSAL handle */
+ extern WORD32 osal_close(IN void *osal_handle);
+
+ /* This function registers MMR call backs for OSAL */
+ extern WORD32 osal_register_callbacks(IN void *osal_handle, IN osal_cb_funcs_t *cb_funcs);
+
+ /*****************************************************************************/
+ /* Extern Mail Box Function Declarations */
+ /*****************************************************************************/
+
+ /* Allocates memory for mail box handle. Creates a mail box which is */
+ /* associated with the thread and updates the mail box, which returned for */
+ /* further actions to be performed on the mail box. */
+ extern void *osal_mbox_create(IN void *osal_handle, IN osal_mbox_attr_t *attr);
+
+ /* Closes the mail box and frees the memory allocated for mail box handle. */
+ extern WORD32 osal_mbox_destroy(IN void *mbox_handle);
+
+ /* Posts a message to the mail box */
+ extern WORD32 osal_mbox_post(IN void *mbox_handle, IN void *buf, IN UWORD32 len);
+
+ /* Gets the message form the specified mail box. If there are not messages */
+ /* in mail box, it waits infinitely till a message arrives. */
+ extern WORD32 osal_mbox_get(IN void *mbox_handle, OUT void *buf, IN UWORD32 len);
+
+ /* Gets the message from the specified mail box within the timeout period. */
+ /* If no messages are present in specified time, error code is returned. The */
+ /* error can be got from osal_get_last_error() API */
+ extern WORD32
+ osal_mbox_get_timed(IN void *mbox_handle, OUT void *buf, IN UWORD32 len, IN UWORD32 timeout);
+
+ /*****************************************************************************/
+ /* Extern Custom Mail Box Function Declarations */
+ /*****************************************************************************/
+
+ /* Allocates memory for mail box handle. Creates a mail box which is */
+ /* associated with the thread and updates the mail box, which returned for */
+ /* further actions to be performed on the mail box. */
+ extern void *osal_custom_mbox_create(IN void *osal_handle, IN osal_mbox_attr_t *attr);
+
+ /* Closes the mail box and frees the memory allocated for mail box handle. */
+ extern WORD32 osal_custom_mbox_destroy(IN void *mbox_handle);
+
+ /* Posts a message to the mail box */
+ extern WORD32 osal_custom_mbox_post(IN void *cust_mbox_handle, IN void *buf, IN UWORD32 len);
+
+ /* Gets the message form the specified mail box. If there are not messages */
+ /* in mail box, it waits infinitely till a message arrives. */
+ extern WORD32 osal_custom_mbox_get(IN void *cust_mbox_handle, OUT void *buf, IN UWORD32 len);
+
+ /* Gets the message from the specified mail box within the timeout period. */
+ /* If no messages are present in specified time, error code is returned. The */
+ /* error can be got from osal_get_last_error() API */
+ extern WORD32 osal_custom_mbox_get_timed(
+ IN void *cust_mbox_handle, OUT void *buf, IN UWORD32 len, IN UWORD32 timeout);
+
+ /*****************************************************************************/
+ /* Extern Mutex Function Declarations */
+ /*****************************************************************************/
+
+ /* Creates a mutex and returns the to mutex */
+ extern void *osal_mutex_create(IN void *osal_handle);
+
+ /* Closes the mutex. */
+ extern WORD32 osal_mutex_destroy(IN void *mutex_handle);
+
+ /* Waits infinitely till mutex lock is got. */
+ extern WORD32 osal_mutex_lock(IN void *mutex_handle);
+
+ /* Releases the lock held on the mutex. */
+ extern WORD32 osal_mutex_unlock(IN void *mutex_handle);
+
+ /*****************************************************************************/
+ /* Extern Semaphore Function Declarations */
+ /*****************************************************************************/
+
+ /* Creates a semaphore and returns the handle to semaphore. */
+ extern void *osal_sem_create(IN void *osal_handle, IN osal_sem_attr_t *attr);
+
+ /* Closes the semaphore. */
+ extern WORD32 osal_sem_destroy(IN void *sem_handle);
+
+ /* Waits infinitely till semaphore is zero. */
+ extern WORD32 osal_sem_wait(IN void *sem_handle);
+
+ /* Increments the value of semaphore by one. */
+ extern WORD32 osal_sem_post(IN void *sem_handle);
+
+ /* Returns the current value of semaphore. */
+ extern WORD32 osal_sem_count(IN void *sem_handle, OUT WORD32 *count);
+
+ /*****************************************************************************/
+ /* Extern Conditional Variable Function Declarations */
+ /*****************************************************************************/
+
+ /* Creates a conditional variable and returns the handle to it. */
+ extern void *osal_cond_var_create(IN void *osal_handle);
+
+ /* Destroys the conditional variable. */
+ extern WORD32 osal_cond_var_destroy(IN void *cond_var_handle);
+
+ /* Waits infinitely till conditional variable receives signal. */
+ extern WORD32 osal_cond_var_wait(IN void *cond_var_handle, IN void *mutex_handle);
+
+ /* Signals on conditional variable. */
+ extern WORD32 osal_cond_var_signal(IN void *cond_var_handle);
+
+ /*****************************************************************************/
+ /* Extern Thread Function Declarations */
+ /*****************************************************************************/
+
+ /* Creates a thread with specified parameters */
+ extern void *osal_thread_create(IN void *osal_handle, IN osal_thread_attr_t *attr);
+
+ /* Closes or halts the execution of thread specified by the handle. */
+ extern WORD32 osal_thread_destroy(IN void *thread_handle);
+
+ /* Makes the thread sleep for specified number of milliseconds */
+ extern WORD32 osal_thread_sleep(IN UWORD32 milli_seconds);
+
+ /* Yields the execution of thread. */
+ extern WORD32 osal_thread_yield(void);
+
+ /* Suspends the execution of thread until osal_thread_resume API is called. */
+ extern WORD32 osal_thread_suspend(IN void *thread_handle);
+
+ /* Resumes the execution of thread which was suspended by */
+ /* osal_thread_suspend API call. */
+ extern WORD32 osal_thread_resume(IN void *thread_handle);
+
+ /* Waits infinitely till the thread, whose handle is passed, completes */
+ /* execution. */
+ extern WORD32 osal_thread_wait(IN void *thread_handle);
+
+ /* Returns current thread handle */
+ extern void *osal_get_thread_handle(IN void *osal_handle);
+
+ /*****************************************************************************/
+ /* Extern Network Socket Function Declarations */
+ /*****************************************************************************/
+
+ /* Initializes network resources */
+ extern WORD32 osal_network_init(void);
+
+ /* Un-initializes all the network resources */
+ extern WORD32 osal_network_close(void);
+
+ /* Creates the socket and returns the socket descriptor. */
+ extern void *osal_socket_create(IN void *osal_handle, IN osal_socket_attr_t *attr);
+
+ /* Closes the open socket. */
+ extern WORD32 osal_socket_destroy(IN void *socket_handle);
+
+ /* Binds to the specified port number on the local machine. Socket_create */
+ /* API has to be called before calling socket_bind. */
+ extern WORD32 osal_socket_bind(IN void *socket_handle, IN osal_sockaddr_t *addr);
+
+ /* Starts listening at the specified port for any incoming connections. */
+ /* Socket descriptor should be bound before calling socket_listen */
+ extern WORD32 osal_socket_listen(IN void *socket_handle, IN WORD32 backlog);
+
+ /* Accepts incoming connection. If listen queue is empty it blocks till a */
+ /* successful connection is made. */
+ extern void *osal_socket_accept(IN void *socket_handle, OUT osal_sockaddr_t *addr);
+
+ /* Makes a connection request to the remote address specified. */
+ extern WORD32 osal_socket_connect(IN void *socket_handle, IN osal_sockaddr_t *addr);
+
+ /* Sends the specified number of bytes of data */
+ extern WORD32 osal_socket_send(
+ IN void *socket_handle, IN const WORD8 *buf, IN WORD32 len, IN WORD32 flags);
+
+ /* Receives data over TCP connection. */
+ extern WORD32
+ osal_socket_recv(IN void *socket_handle, OUT WORD8 *buf, IN WORD32 len, IN WORD32 flags);
+
+ /* Sends data over a datagram protocol */
+ extern WORD32 osal_socket_sendto(
+ IN void *socket_handle,
+ IN const WORD8 *buf,
+ IN WORD32 len,
+ IN WORD32 flags,
+ IN osal_sockaddr_t *to);
+
+ /* Receives packet over a UDP connection */
+ extern WORD32 osal_socket_recvfrom(
+ IN void *socket_handle,
+ OUT WORD8 *buf,
+ IN WORD32 len,
+ IN WORD32 flags,
+ OUT osal_sockaddr_t *from);
+
+ /* Polls the specified sockets for specified activity */
+ extern WORD32 osal_socket_select(
+ INOUT osal_fd_set_t *readfds,
+ INOUT osal_fd_set_t *writefds,
+ INOUT osal_fd_set_t *exceptfds,
+ INOUT osal_timeval_t *timeout);
+
+ /* Gets the socket options */
+ extern WORD32 osal_socket_getsockopt(
+ IN void *socket_handle,
+ IN WORD32 level,
+ IN WORD32 optname,
+ OUT WORD8 *optval,
+ INOUT WORD32 *optlen);
+
+ /* Sets the socket options to specified values */
+ extern WORD32 osal_socket_setsockopt(
+ IN void *socket_handle,
+ IN WORD32 level,
+ IN WORD32 optname,
+ IN const WORD8 *optval,
+ IN WORD32 optlen);
+
+ /* Adds the specified socket handle to the file descriptor set */
+ extern WORD32 osal_socket_fd_set(IN void *socket_handle, OUT osal_fd_set_t *set);
+
+ /* Checks the file descriptor set for the presence of socket handle. */
+ extern WORD32 osal_socket_fd_isset(IN void *socket_handle, IN osal_fd_set_t *set);
+
+ /* Resets the file descriptor set */
+ extern void osal_socket_fd_zero(INOUT osal_fd_set_t *set);
+
+ /* Removes the specified socket handle from the file descriptor set */
+ extern WORD32 osal_socket_fd_clr(IN void *socket_handle, OUT osal_fd_set_t *set);
+
+ /* To convert short integer from host byte order to network byte order */
+ extern UWORD16 osal_htons(IN UWORD16 hostshort);
+
+ /* To convert long integer from host to network byte order */
+ extern UWORD32 osal_htonl(IN UWORD32 hostlong);
+
+ /* To convert short integer from network to host byte order */
+ extern UWORD16 osal_ntohs(IN UWORD16 netshort);
+
+ /* To convert long integer from network to host byte order */
+ extern UWORD32 osal_ntohl(IN UWORD32 netlong);
+
+ /*****************************************************************************/
+ /* Extern Select Engine Function Declarations */
+ /*****************************************************************************/
+
+ /* Initializes the select engine. */
+ extern void *
+ osal_select_engine_init(IN void *osal_handle, IN osal_select_engine_attr_t *se_attr);
+
+ /* Closes the select engine. */
+ extern WORD32 osal_select_engine_close(IN void *select_engine);
+
+ /* Registers the socket handle specified in the entry. */
+ extern WORD32
+ osal_select_engine_register(IN void *select_engine, IN osal_select_entry_t *entry);
+
+ /* Un-registers the specified socket handle. */
+ extern WORD32 osal_select_engine_unregister(
+ IN void *select_engine, IN void *socket_handle, IN OSAL_FD_TYPE_T fd_type);
+ /*****************************************************************************/
+ /* Extern Other Function Declarations */
+ /*****************************************************************************/
+
+ /* Returns time in milliseconds */
+ extern UWORD32 osal_get_time(void);
+
+ /* For time in micro-second resolution */
+ extern WORD32 osal_get_time_usec(UWORD32 *sec, UWORD32 *usec);
+
+ /* Returns the last error code. 0 is no error */
+ extern UWORD32 osal_get_last_error(void);
+
+ /* Prints the last error code. 0 is no error */
+ extern void osal_print_last_error(IN const STRWORD8 *string);
+
+ /* Gets the version of library in NULL terminated string form. */
+ extern WORD8 *osal_get_version(void);
+
+ /* Gets the tid of the thread in whose context this call was made */
+ extern WORD32 osal_get_current_tid(void);
+
+/* C linkage specifiers for C++ declarations. */
+#ifdef __cplusplus
+}
+#endif /* __cplusplus */
+
+#endif /* OSAL_H */
diff --git a/encoder/osal_cond_var.c b/encoder/osal_cond_var.c
new file mode 100644
index 0000000..434763c
--- /dev/null
+++ b/encoder/osal_cond_var.c
@@ -0,0 +1,259 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : osal_cond_var.c */
+/* */
+/* Description : This file contains all the necessary function */
+/* definitions required to operate on Conditional */
+/* Variable. */
+/* */
+/* List of Functions : osal_cond_var_create */
+/* osal_cond_var_destroy */
+/* osal_cond_var_wait */
+/* osal_cond_var_wait_timed */
+/* osal_cond_var_signal */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 05 09 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <time.h>
+
+/* User include files */
+#include "cast_types.h"
+#include "osal.h"
+#include "osal_handle.h"
+#include "osal_mutex.h"
+#include "osal_cond_var.h"
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_cond_var_create */
+/* */
+/* Description : This function initializes the conditional variable and */
+/* returns the handle to it. */
+/* */
+/* Inputs : OSAL handle */
+/* Memory manager handle */
+/* */
+/* Globals : None */
+/* */
+/* Processing : Calls system specific API and returns handle to the */
+/* conditional variable. */
+/* */
+/* Outputs : Handle to Condtional Variable */
+/* */
+/* Returns : On SUCCESS - Handle to Conditional Varaible */
+/* On FAILURE - NULL */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 05 09 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+void *osal_cond_var_create(IN void *osal_handle)
+{
+ if(0 == osal_handle)
+ return 0;
+
+ {
+ osal_t *handle = osal_handle;
+ cond_var_handle_t *cond_var_handle = 0;
+ void *mmr_handle = 0;
+
+ if(0 == handle || 0 == handle->alloc || 0 == handle->free)
+ return 0;
+
+ /* Initialize MMR handle */
+ mmr_handle = handle->mmr_handle;
+
+ /* Allocate memory for the Handle */
+ cond_var_handle = handle->alloc(mmr_handle, sizeof(cond_var_handle_t));
+
+ /* Error in memory allocation */
+ if(0 == cond_var_handle)
+ return 0;
+
+ cond_var_handle->mmr_handle = mmr_handle;
+ cond_var_handle->hdl = handle;
+
+ /* Create semaphore */
+ if(0 != pthread_cond_init(&(cond_var_handle->cond_var), 0))
+ {
+ handle->free(mmr_handle, cond_var_handle);
+ return 0;
+ }
+
+ return cond_var_handle;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_cond_var_destroy */
+/* */
+/* Description : This function destroys all the OS resources allocated by */
+/* 'osal_cond_var_create' API. */
+/* */
+/* Inputs : Conditional Variable handle */
+/* */
+/* Globals : None */
+/* */
+/* Processing : Validates the input and destroys all the OS allocated */
+/* resources. */
+/* */
+/* Outputs : Status of closure */
+/* */
+/* Returns : On SUCCESS - OSAL_SUCCESS */
+/* On FAILURE - OSAL_ERROR */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 10 05 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 osal_cond_var_destroy(IN void *cond_var_handle)
+{
+ if(0 == cond_var_handle)
+ return OSAL_ERROR;
+
+ {
+ cond_var_handle_t *handle = (cond_var_handle_t *)cond_var_handle;
+ WORD32 status = 0;
+
+ if(0 == handle->hdl || 0 == handle->hdl->free)
+ return OSAL_ERROR;
+
+ /* Destroy the mutex */
+ status = pthread_cond_destroy(&(handle->cond_var));
+
+ if(0 != status)
+ return OSAL_ERROR;
+
+ /* Free the handle */
+ handle->hdl->free(handle->mmr_handle, handle);
+ return OSAL_SUCCESS;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_cond_var_wait */
+/* */
+/* Description : This function waits infinitely on conditional varaiable. */
+/* */
+/* Inputs : Conditional Variable handle */
+/* Mutex handle for lock */
+/* */
+/* Globals : None */
+/* */
+/* Processing : This function waits on Conditional variable signal. Till */
+/* signal is not, lock on mutex is relinquished. */
+/* */
+/* Outputs : Status of wait on conditional variable */
+/* */
+/* Returns : On SUCCESS - OSAL_SUCCESS */
+/* On FAILURE - OSAL_ERROR */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 10 05 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 osal_cond_var_wait(IN void *cond_var_handle, IN void *mutex_handle)
+{
+ if(0 == cond_var_handle || 0 == mutex_handle)
+ return OSAL_ERROR;
+
+ {
+ mutex_handle_t *mutex = (mutex_handle_t *)mutex_handle;
+ cond_var_handle_t *cond_var = (cond_var_handle_t *)cond_var_handle;
+
+ return pthread_cond_wait(&(cond_var->cond_var), &(mutex->mutex_handle));
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_cond_var_signal */
+/* */
+/* Description : This function signals on a conditional variable. */
+/* */
+/* Inputs : Conditional Variable handle */
+/* */
+/* Globals : None */
+/* */
+/* Processing : Calls the underlaying API to signal on a conditional */
+/* variable. */
+/* */
+/* Outputs : Status of signalling */
+/* */
+/* Returns : On SUCCESS - OSAL_SUCCESS */
+/* On FAILURE - OSAL_ERROR */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 10 05 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 osal_cond_var_signal(IN void *cond_var_handle)
+{
+ if(0 == cond_var_handle)
+ return OSAL_ERROR;
+
+ {
+ cond_var_handle_t *cond_var = (cond_var_handle_t *)cond_var_handle;
+ return pthread_cond_signal(&(cond_var->cond_var));
+ }
+}
diff --git a/encoder/osal_cond_var.h b/encoder/osal_cond_var.h
new file mode 100644
index 0000000..a83c421
--- /dev/null
+++ b/encoder/osal_cond_var.h
@@ -0,0 +1,49 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : osal_cond_var.h */
+/* */
+/* Description : This file contains OSAL Conditional Variable handle */
+/* structure definition. */
+/* */
+/* List of Functions : None */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 05 09 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+#ifndef OSAL_COND_VAR_H
+#define OSAL_COND_VAR_H
+
+typedef struct
+{
+ pthread_cond_t cond_var; /* Mutex Identifier */
+ void *mmr_handle; /* Pointer to memory manager handle */
+ osal_t *hdl; /* Associated OSAL handle */
+} cond_var_handle_t;
+
+#endif /* OSAL_COND_VAR_H */
diff --git a/encoder/osal_defaults.h b/encoder/osal_defaults.h
new file mode 100644
index 0000000..09dbc51
--- /dev/null
+++ b/encoder/osal_defaults.h
@@ -0,0 +1,139 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : osal_defaults.h */
+/* */
+/* Description : This file contains default values to initialize the */
+/* attributes required components created through OSAL */
+/* */
+/* List of Functions : None */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 14 07 2007 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+#ifndef OSAL_DEFAULTS_H
+#define OSAL_DEFAULTS_H
+
+/*****************************************************************************/
+/* Constants */
+/*****************************************************************************/
+
+/* Default attributes for a mailbox */
+#define OSAL_DEFAULT_MBOX_ATTR \
+ { \
+ 0, /* Thread handle */ \
+ 0, /* Mbox name */ \
+ 0, /* Mbox length */ \
+ 0 /* Msg size */ \
+ }
+
+/* Default attributes for a semaphore */
+#define OSAL_DEFAULT_SEM_ATTR \
+ { \
+ 0 /* Initial value */ \
+ }
+
+/* Default attributes for a thread */
+#define OSAL_DEFAULT_THREAD_ATTR \
+ { \
+ 0, /* Thread function */ \
+ 0, /* Thread parameters */ \
+ 0, /* Stack size */ \
+ 0, /* Stack start address */ \
+ 0, /* Thread name */ \
+ 1, /* Use OSAL priorities */ \
+ OSAL_PRIORITY_DEFAULT, /* Thread priority */ \
+ 0, /* Exit code */ \
+ OSAL_SCHED_OTHER, /* Scheduling policy */ \
+ 0, /* Core affinity mask */ \
+ 0 /* group num */ \
+ }
+
+/* Default attributes for a socket */
+#define OSAL_DEFAULT_SOCKET_ATTR \
+ { \
+ OSAL_UDP /* Protocol */ \
+ }
+
+/* Default attributes for a socket address entry */
+#define OSAL_DEFAULT_SOCKADDR \
+ { \
+ 0 \
+ } /* Initialize IP and port to 0 */
+
+/* Default attributes for the select engine */
+#define OSAL_DEFAULT_SELECT_ENGINE_ATTR \
+ { \
+ 1, /* Use OSAL priorities */ \
+ OSAL_PRIORITY_DEFAULT, /* Thread priority */ \
+ 0, /* Thread name */ \
+ 5000, /* Timeout for select call*/ \
+ 10000 /* Poll interavel */ \
+ }
+
+/* Default attributes for an entry in the select engine */
+#define OSAL_DEFAULT_SELECT_ENTRY \
+ { \
+ 0, /* Socket Handle */ \
+ OSAL_READ_FD, /* Socket type */ \
+ 0, /* Init callback */ \
+ 0, /* Init callback parameters */ \
+ 0, /* Socket activity callback */ \
+ 0, /* Socket activity callback params */ \
+ 0, /* Terminate-time callback */ \
+ 0, /* Terminate-time callback params */ \
+ 0, /* Succesful Exit code */ \
+ 0 /* ID */ \
+ }
+
+/* Default attributes for FD set */
+#define OSAL_DEFAULT_FD_SET \
+ { \
+ 0 /* Initializes count to 0 */ \
+ }
+
+/* Default attributes for time value structure */
+#define OSAL_DEFAULT_TIMEVAL \
+ { \
+ 0, /* Seconds */ \
+ 0 /* Microseconds */ \
+ }
+
+/* Default attributes for LINGER socket option structure */
+#define OSAL_DEFAULT_SOCKOPT_LINGER \
+ { \
+ 0, /* On/Off */ \
+ 0 /* Linger */ \
+ }
+
+/* Default attributes for Multicast interface IP */
+#define OSAL_DEFAULT_IP_MREQ \
+ { \
+ 0 \
+ } /* Initialize all IPs to 0 */
+
+#endif /* OSAL_DEFAULTS_H */
diff --git a/encoder/osal_errno.h b/encoder/osal_errno.h
new file mode 100644
index 0000000..153947e
--- /dev/null
+++ b/encoder/osal_errno.h
@@ -0,0 +1,207 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : osal_errno.h */
+/* */
+/* Description : This file error codes supported by OSAL */
+/* */
+/* List of Functions : None */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 30 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+#ifndef OSAL_ERRNO_H
+#define OSAL_ERRNO_H
+
+#define OSAL_SOCKERR_BASE 0x1000
+
+#define OSAL_NOERROR (OSAL_SOCKERR_BASE + 0)
+#define OSAL_INTR (OSAL_SOCKERR_BASE + 1)
+#define OSAL_BADF (OSAL_SOCKERR_BASE + 2)
+#define OSAL_ACCES (OSAL_SOCKERR_BASE + 3)
+#define OSAL_FAULT (OSAL_SOCKERR_BASE + 4)
+#define OSAL_INVAL (OSAL_SOCKERR_BASE + 5)
+#define OSAL_MFILE (OSAL_SOCKERR_BASE + 6)
+#define OSAL_WOULDBLOCK (OSAL_SOCKERR_BASE + 7)
+#define OSAL_INPROGRESS (OSAL_SOCKERR_BASE + 8)
+#define OSAL_ALREADY (OSAL_SOCKERR_BASE + 9)
+#define OSAL_NOTSOCK (OSAL_SOCKERR_BASE + 10)
+#define OSAL_DESTADDRREQ (OSAL_SOCKERR_BASE + 11)
+#define OSAL_MSGSIZE (OSAL_SOCKERR_BASE + 12)
+#define OSAL_PROTOTYPE (OSAL_SOCKERR_BASE + 13)
+#define OSAL_NOPROTOOPT (OSAL_SOCKERR_BASE + 14)
+#define OSAL_PROTONOSUPPORT (OSAL_SOCKERR_BASE + 15)
+#define OSAL_SOCKTNOSUPPORT (OSAL_SOCKERR_BASE + 16)
+#define OSAL_OPNOTSUPP (OSAL_SOCKERR_BASE + 17)
+#define OSAL_PFNOSUPPORT (OSAL_SOCKERR_BASE + 18)
+#define OSAL_AFNOSUPPORT (OSAL_SOCKERR_BASE + 19)
+#define OSAL_ADDRINUSE (OSAL_SOCKERR_BASE + 20)
+#define OSAL_ADDRNOTAVAIL (OSAL_SOCKERR_BASE + 21)
+#define OSAL_NETDOWN (OSAL_SOCKERR_BASE + 22)
+#define OSAL_NETUNREACH (OSAL_SOCKERR_BASE + 23)
+#define OSAL_NETRESET (OSAL_SOCKERR_BASE + 24)
+#define OSAL_CONNABORTED (OSAL_SOCKERR_BASE + 25)
+#define OSAL_CONNRESET (OSAL_SOCKERR_BASE + 26)
+#define OSAL_NOBUFS (OSAL_SOCKERR_BASE + 27)
+#define OSAL_ISCONN (OSAL_SOCKERR_BASE + 28)
+#define OSAL_NOTCONN (OSAL_SOCKERR_BASE + 29)
+#define OSAL_SHUTDOWN (OSAL_SOCKERR_BASE + 30)
+#define OSAL_TOOMANYREFS (OSAL_SOCKERR_BASE + 31)
+#define OSAL_TIMEDOUT (OSAL_SOCKERR_BASE + 32)
+#define OSAL_CONNREFUSED (OSAL_SOCKERR_BASE + 33)
+#define OSAL_LOOP (OSAL_SOCKERR_BASE + 34)
+#define OSAL_NAMETOOLONG (OSAL_SOCKERR_BASE + 35)
+#define OSAL_HOSTDOWN (OSAL_SOCKERR_BASE + 36)
+#define OSAL_HOSTUNREACH (OSAL_SOCKERR_BASE + 37)
+#define OSAL_NOTEMPTY (OSAL_SOCKERR_BASE + 38)
+#define OSAL_PROCLIM (OSAL_SOCKERR_BASE + 39)
+#define OSAL_USERS (OSAL_SOCKERR_BASE + 40)
+#define OSAL_DQUOT (OSAL_SOCKERR_BASE + 41)
+#define OSAL_STALE (OSAL_SOCKERR_BASE + 42)
+#define OSAL_REMOTE (OSAL_SOCKERR_BASE + 43)
+#define OSAL_SYSNOTREADY (OSAL_SOCKERR_BASE + 44)
+#define OSAL_VERNOTSUPPORTED (OSAL_SOCKERR_BASE + 45)
+#define OSAL_NOTINITIALISED (OSAL_SOCKERR_BASE + 46)
+#define OSAL_DISCON (OSAL_SOCKERR_BASE + 47)
+#define OSAL_NOMORE (OSAL_SOCKERR_BASE + 48)
+#define OSAL_CANCELLED (OSAL_SOCKERR_BASE + 49)
+#define OSAL_INVALIDPROCTABLE (OSAL_SOCKERR_BASE + 50)
+#define OSAL_INVALIDPROVIDER (OSAL_SOCKERR_BASE + 51)
+#define OSAL_PROVIDERFAILEDINIT (OSAL_SOCKERR_BASE + 52)
+#define OSAL_SYSCALLFAILURE (OSAL_SOCKERR_BASE + 53)
+#define OSAL_SERVICE_NOT_FOUND (OSAL_SOCKERR_BASE + 54)
+#define OSAL_TYPE_NOT_FOUND (OSAL_SOCKERR_BASE + 55)
+#define OSAL_E_NO_MORE (OSAL_SOCKERR_BASE + 56)
+#define OSAL_E_CANCELLED (OSAL_SOCKERR_BASE + 57)
+#define OSAL_REFUSED (OSAL_SOCKERR_BASE + 58)
+#define OSAL_HOST_NOT_FOUND (OSAL_SOCKERR_BASE + 59)
+#define OSAL_TRY_AGAIN (OSAL_SOCKERR_BASE + 60)
+#define OSAL_NO_RECOVERY (OSAL_SOCKERR_BASE + 61)
+#define OSAL_NO_DATA (OSAL_SOCKERR_BASE + 62)
+#define OSAL_NO_ADDRESS (OSAL_SOCKERR_BASE + 63)
+#define OSAL_QOS_RECEIVERS (OSAL_SOCKERR_BASE + 64)
+#define OSAL_QOS_SENDERS (OSAL_SOCKERR_BASE + 65)
+#define OSAL_QOS_NO_SENDERS (OSAL_SOCKERR_BASE + 66)
+#define OSAL_QOS_NO_RECEIVERS (OSAL_SOCKERR_BASE + 67)
+#define OSAL_QOS_REQUEST_CONFIRMED (OSAL_SOCKERR_BASE + 68)
+#define OSAL_QOS_ADMISSION_FAILURE (OSAL_SOCKERR_BASE + 69)
+#define OSAL_QOS_POLICY_FAILURE (OSAL_SOCKERR_BASE + 70)
+#define OSAL_QOS_BAD_STYLE (OSAL_SOCKERR_BASE + 71)
+#define OSAL_QOS_BAD_OBJECT (OSAL_SOCKERR_BASE + 72)
+#define OSAL_QOS_TRAFFIC_CTRL_ERROR (OSAL_SOCKERR_BASE + 73)
+#define OSAL_QOS_GENERIC_ERROR (OSAL_SOCKERR_BASE + 74)
+
+/* POSIX Error codes */
+#define OSAL_PERM (OSAL_SOCKERR_BASE + 75)
+#define OSAL_NOENT (OSAL_SOCKERR_BASE + 76)
+#define OSAL_SRCH (OSAL_SOCKERR_BASE + 77)
+#define OSAL_IO (OSAL_SOCKERR_BASE + 78)
+#define OSAL_NXIO (OSAL_SOCKERR_BASE + 79)
+#define OSAL_2BIG (OSAL_SOCKERR_BASE + 80)
+#define OSAL_NOEXEC (OSAL_SOCKERR_BASE + 81)
+#define OSAL_CHILD (OSAL_SOCKERR_BASE + 82)
+#define OSAL_AGAIN (OSAL_SOCKERR_BASE + 83)
+#define OSAL_NOMEM (OSAL_SOCKERR_BASE + 84)
+#define OSAL_NOTBLK (OSAL_SOCKERR_BASE + 85)
+#define OSAL_BUSY (OSAL_SOCKERR_BASE + 86)
+#define OSAL_EXIST (OSAL_SOCKERR_BASE + 87)
+#define OSAL_XDEV (OSAL_SOCKERR_BASE + 88)
+#define OSAL_NODEV (OSAL_SOCKERR_BASE + 89)
+#define OSAL_NOTDIR (OSAL_SOCKERR_BASE + 90)
+#define OSAL_ISDIR (OSAL_SOCKERR_BASE + 91)
+#define OSAL_NFILE (OSAL_SOCKERR_BASE + 92)
+#define OSAL_NOTTY (OSAL_SOCKERR_BASE + 93)
+#define OSAL_TXTBSY (OSAL_SOCKERR_BASE + 94)
+#define OSAL_FBIG (OSAL_SOCKERR_BASE + 95)
+#define OSAL_NOSPC (OSAL_SOCKERR_BASE + 96)
+#define OSAL_SPIPE (OSAL_SOCKERR_BASE + 97)
+#define OSAL_ROFS (OSAL_SOCKERR_BASE + 98)
+#define OSAL_MLINK (OSAL_SOCKERR_BASE + 99)
+#define OSAL_PIPE (OSAL_SOCKERR_BASE + 100)
+#define OSAL_DOM (OSAL_SOCKERR_BASE + 101)
+#define OSAL_RANGE (OSAL_SOCKERR_BASE + 102)
+#define OSAL_DEADLK (OSAL_SOCKERR_BASE + 103)
+#define OSAL_NOLCK (OSAL_SOCKERR_BASE + 104)
+#define OSAL_NOSYS (OSAL_SOCKERR_BASE + 105)
+#define OSAL_NOMSG (OSAL_SOCKERR_BASE + 106)
+#define OSAL_IDRM (OSAL_SOCKERR_BASE + 107)
+#define OSAL_CHRNG (OSAL_SOCKERR_BASE + 108)
+#define OSAL_L2NSYNC (OSAL_SOCKERR_BASE + 109)
+#define OSAL_L3HLT (OSAL_SOCKERR_BASE + 110)
+#define OSAL_L3RST (OSAL_SOCKERR_BASE + 111)
+#define OSAL_LNRNG (OSAL_SOCKERR_BASE + 112)
+#define OSAL_UNATCH (OSAL_SOCKERR_BASE + 113)
+#define OSAL_NOCSI (OSAL_SOCKERR_BASE + 114)
+#define OSAL_L2HLT (OSAL_SOCKERR_BASE + 115)
+#define OSAL_BADE (OSAL_SOCKERR_BASE + 116)
+#define OSAL_BADR (OSAL_SOCKERR_BASE + 117)
+#define OSAL_XFULL (OSAL_SOCKERR_BASE + 118)
+#define OSAL_NOANO (OSAL_SOCKERR_BASE + 119)
+#define OSAL_BADRQC (OSAL_SOCKERR_BASE + 120)
+#define OSAL_BADSLT (OSAL_SOCKERR_BASE + 121)
+#define OSAL_BFONT (OSAL_SOCKERR_BASE + 122)
+#define OSAL_NOSTR (OSAL_SOCKERR_BASE + 123)
+#define OSAL_NODATA (OSAL_SOCKERR_BASE + 124)
+#define OSAL_TIME (OSAL_SOCKERR_BASE + 125)
+#define OSAL_NOSR (OSAL_SOCKERR_BASE + 126)
+#define OSAL_NONET (OSAL_SOCKERR_BASE + 127)
+#define OSAL_NOPKG (OSAL_SOCKERR_BASE + 128)
+#define OSAL_NOLINK (OSAL_SOCKERR_BASE + 129)
+#define OSAL_ADV (OSAL_SOCKERR_BASE + 130)
+#define OSAL_SRMNT (OSAL_SOCKERR_BASE + 131)
+#define OSAL_COMM (OSAL_SOCKERR_BASE + 132)
+#define OSAL_PROTO (OSAL_SOCKERR_BASE + 133)
+#define OSAL_MULTIHOP (OSAL_SOCKERR_BASE + 134)
+#define OSAL_DOTDOT (OSAL_SOCKERR_BASE + 135)
+#define OSAL_BADMSG (OSAL_SOCKERR_BASE + 136)
+#define OSAL_OVERFLOW (OSAL_SOCKERR_BASE + 137)
+#define OSAL_NOTUNIQ (OSAL_SOCKERR_BASE + 138)
+#define OSAL_BADFD (OSAL_SOCKERR_BASE + 139)
+#define OSAL_REMCHG (OSAL_SOCKERR_BASE + 140)
+#define OSAL_LIBACC (OSAL_SOCKERR_BASE + 141)
+#define OSAL_LIBBAD (OSAL_SOCKERR_BASE + 142)
+#define OSAL_LIBSCN (OSAL_SOCKERR_BASE + 143)
+#define OSAL_LIBMAX (OSAL_SOCKERR_BASE + 144)
+#define OSAL_LIBEXEC (OSAL_SOCKERR_BASE + 145)
+#define OSAL_ILSEQ (OSAL_SOCKERR_BASE + 146)
+#define OSAL_RESTART (OSAL_SOCKERR_BASE + 147)
+#define OSAL_STRPIPE (OSAL_SOCKERR_BASE + 148)
+#define OSAL_UCLEAN (OSAL_SOCKERR_BASE + 149)
+#define OSAL_NOTNAM (OSAL_SOCKERR_BASE + 150)
+#define OSAL_NAVAIL (OSAL_SOCKERR_BASE + 151)
+#define OSAL_ISNAM (OSAL_SOCKERR_BASE + 152)
+#define OSAL_REMOTEIO (OSAL_SOCKERR_BASE + 153)
+#define OSAL_NOMEDIUM (OSAL_SOCKERR_BASE + 154)
+#define OSAL_MEDIUMTYPE (OSAL_SOCKERR_BASE + 155)
+#define OSAL_CANCELED (OSAL_SOCKERR_BASE + 156)
+#define OSAL_NOKEY (OSAL_SOCKERR_BASE + 157)
+#define OSAL_KEYEXPIRED (OSAL_SOCKERR_BASE + 158)
+#define OSAL_KEYREVOKED (OSAL_SOCKERR_BASE + 159)
+#define OSAL_KEYREJECTED (OSAL_SOCKERR_BASE + 160)
+
+#endif /* OSAL_ERRNO_H */
diff --git a/encoder/osal_error.c b/encoder/osal_error.c
new file mode 100644
index 0000000..a529af4
--- /dev/null
+++ b/encoder/osal_error.c
@@ -0,0 +1,224 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : osal_error.c */
+/* */
+/* Description : This file contains all the error code mappings across*/
+/* platforms. */
+/* */
+/* List of Functions : get_windows_error */
+/* get_linux_error */
+/* get_ti_bios_error */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 30 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System includes files */
+
+#include <errno.h>
+
+/* User include files */
+#include "cast_types.h"
+#include "osal_errno.h"
+
+/*****************************************************************************/
+/* Global Variable Definitions */
+/*****************************************************************************/
+
+WORD32 osal_errno[] = {
+ OSAL_NOERROR,
+ OSAL_PERM, /* EPERM */
+ OSAL_NOENT, /* ENOENT */
+ OSAL_SRCH, /* ESRCH */
+ OSAL_INTR, /* EINTR */
+ OSAL_IO, /* EIO */
+ OSAL_NXIO, /* ENXIO */
+ OSAL_2BIG, /* E2BIG */
+ OSAL_NOEXEC, /* ENOEXEC */
+ OSAL_BADF, /* EBADF */
+ OSAL_CHILD, /* ECHILD */
+ OSAL_AGAIN, /* EAGAIN, EDEADLOCK */
+ OSAL_NOMEM, /* ENOMEM */
+ OSAL_ACCES, /* EACCES */
+ OSAL_FAULT, /* EFAULT */
+ OSAL_NOTBLK, /* ENOTBLK */
+ OSAL_BUSY, /* EBUSY */
+ OSAL_EXIST, /* EEXIST */
+ OSAL_XDEV, /* EXDEV */
+ OSAL_NODEV, /* ENODEV */
+ OSAL_NOTDIR, /* ENOTDIR */
+ OSAL_ISDIR, /* EISDIR */
+ OSAL_INVAL, /* EINVAL */
+ OSAL_NFILE, /* ENFILE */
+ OSAL_MFILE, /* EMFILE */
+ OSAL_NOTTY, /* ENOTTY */
+ OSAL_TXTBSY, /* ETXTBSY */
+ OSAL_FBIG, /* EFBIG */
+ OSAL_NOSPC, /* ENOSPC */
+ OSAL_SPIPE, /* ESPIPE */
+ OSAL_ROFS, /* EROFS */
+ OSAL_MLINK, /* EMLINK */
+ OSAL_PIPE, /* EPIPE */
+ OSAL_DOM, /* EDOM */
+ OSAL_RANGE, /* ERANGE */
+ OSAL_DEADLK, /* EDEADLK, EDEADLOCK */
+ OSAL_NAMETOOLONG, /* ENAMETOOLONG */
+ OSAL_NOLCK, /* ENOLCK */
+ OSAL_NOSYS, /* ENOSYS */
+ OSAL_NOTEMPTY, /* ENOTEMPTY */
+ OSAL_LOOP, /* ELOOP */
+ OSAL_NOERROR,
+ OSAL_NOMSG, /* ENOMSG */
+ OSAL_IDRM, /* EIDRM */
+ OSAL_CHRNG, /* ECHRNG */
+ OSAL_L2NSYNC, /* EL2NSYNC */
+ OSAL_L3HLT, /* EL3HLT */
+ OSAL_L3RST, /* EL3RST */
+ OSAL_LNRNG, /* ELNRNG */
+ OSAL_UNATCH, /* EUNATCH */
+ OSAL_NOCSI, /* ENOCSI */
+ OSAL_L2HLT, /* EL2HLT */
+ OSAL_BADE, /* EBADE */
+ OSAL_BADR, /* EBADR */
+ OSAL_XFULL, /* EXFULL */
+ OSAL_NOANO, /* ENOANO */
+ OSAL_BADRQC, /* EBADRQC */
+ OSAL_BADSLT, /* EBADSLT */
+ OSAL_NOERROR,
+ OSAL_BFONT, /* EBFONT */
+ OSAL_NOSTR, /* ENOSTR */
+ OSAL_NODATA, /* ENODATA */
+ OSAL_TIME, /* ETIME */
+ OSAL_NOSR, /* ENOSR */
+ OSAL_NONET, /* ENONET */
+ OSAL_NOPKG, /* ENOPKG */
+ OSAL_REMOTE, /* EREMOTE */
+ OSAL_NOLINK, /* ENOLINK */
+ OSAL_ADV, /* EADV */
+ OSAL_SRMNT, /* ESRMNT */
+ OSAL_COMM, /* ECOMM */
+ OSAL_PROTO, /* EPROTO */
+ OSAL_MULTIHOP, /* EMULTIHOP */
+ OSAL_DOTDOT, /* EDOTDOT */
+ OSAL_BADMSG, /* EBADMSG */
+ OSAL_OVERFLOW, /* EOVERFLOW */
+ OSAL_NOTUNIQ, /* ENOTUNIQ */
+ OSAL_BADFD, /* EBADFD */
+ OSAL_REMCHG, /* EREMCHG */
+ OSAL_LIBACC, /* ELIBACC */
+ OSAL_LIBBAD, /* ELIBBAD */
+ OSAL_LIBSCN, /* ELIBSCN */
+ OSAL_LIBMAX, /* ELIBMAX */
+ OSAL_LIBEXEC, /* ELIBEXEC */
+ OSAL_ILSEQ, /* EILSEQ */
+ OSAL_RESTART, /* ERESTART */
+ OSAL_STRPIPE, /* ESTRPIPE */
+ OSAL_USERS, /* EUSERS */
+ OSAL_NOTSOCK, /* ENOTSOCK */
+ OSAL_DESTADDRREQ, /* EDESTADDRREQ */
+ OSAL_MSGSIZE, /* EMSGSIZE */
+ OSAL_PROTOTYPE, /* EPROTOTYPE */
+ OSAL_NOPROTOOPT, /* ENOPROTOOPT */
+ OSAL_PROTONOSUPPORT, /* EPROTONOSUPPORT */
+ OSAL_SOCKTNOSUPPORT, /* ESOCKTNOSUPPORT */
+ OSAL_OPNOTSUPP, /* EOPNOTSUPP */
+ OSAL_PFNOSUPPORT, /* EPFNOSUPPORT */
+ OSAL_AFNOSUPPORT, /* EAFNOSUPPORT */
+ OSAL_ADDRINUSE, /* EADDRINUSE */
+ OSAL_ADDRNOTAVAIL, /* EADDRNOTAVAIL */
+ OSAL_NETDOWN, /* ENETDOWN */
+ OSAL_NETUNREACH, /* ENETUNREACH */
+ OSAL_NETRESET, /* ENETRESET */
+ OSAL_CONNABORTED, /* ECONNABORTED */
+ OSAL_CONNRESET, /* ECONNRESET */
+ OSAL_NOBUFS, /* ENOBUFS */
+ OSAL_ISCONN, /* EISCONN */
+ OSAL_NOTCONN, /* ENOTCONN */
+ OSAL_SHUTDOWN, /* ESHUTDOWN */
+ OSAL_TOOMANYREFS, /* ETOOMANYREFS */
+ OSAL_TIMEDOUT, /* ETIMEDOUT */
+ OSAL_CONNREFUSED, /* ECONNREFUSED */
+ OSAL_HOSTDOWN, /* EHOSTDOWN */
+ OSAL_HOSTUNREACH, /* EHOSTUNREACH */
+ OSAL_ALREADY, /* EALREADY */
+ OSAL_INPROGRESS, /* EINPROGRESS */
+ OSAL_STALE, /* ESTALE */
+ OSAL_UCLEAN, /* EUCLEAN */
+ OSAL_NOTNAM, /* ENOTNAM */
+ OSAL_NAVAIL, /* ENAVAIL */
+ OSAL_ISNAM, /* EISNAM */
+ OSAL_REMOTEIO, /* EREMOTEIO */
+ OSAL_DQUOT, /* EDQUOT */
+ OSAL_NOMEDIUM, /* ENOMEDIUM */
+ OSAL_MEDIUMTYPE, /* EMEDIUMTYPE */
+ OSAL_CANCELED, /* ECANCELED */
+ OSAL_NOKEY, /* ENOKEY */
+ OSAL_KEYEXPIRED, /* EKEYEXPIRED */
+ OSAL_KEYREVOKED, /* EKEYREVOKED */
+ OSAL_KEYREJECTED, /* EKEYREJECTED */
+};
+
+/*****************************************************************************/
+/* */
+/* Function Name : get_linux_error */
+/* */
+/* Description : This function returns the error code for Redhat Linux */
+/* platform. */
+/* */
+/* Inputs : None */
+/* Globals : None */
+/* */
+/* Processing : Returns OSAL error code if error is a listed OSAL error */
+/* code. Or else returns platform depedent error code. */
+/* */
+/* Outputs : Error code */
+/* */
+/* Returns : If error is one of OSAL listed error code - OSAL_<ERROR> */
+/* Else system error code. */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 30 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+UWORD32 get_linux_error(void)
+{
+ /* Under Linux platform, error codes 0 - 130 are supported */
+ if(130 > errno)
+ return osal_errno[errno];
+
+ return errno;
+}
diff --git a/encoder/osal_handle.h b/encoder/osal_handle.h
new file mode 100644
index 0000000..9a41618
--- /dev/null
+++ b/encoder/osal_handle.h
@@ -0,0 +1,94 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : osal_handle.h */
+/* */
+/* Description : This file contains all the necessary structure */
+/* declarations use by OSAL library. */
+/* */
+/* List of Functions : None */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 06 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+#ifndef OSAL_HANDLE_H
+#define OSAL_HANDLE_H
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+#define MAX_FDS 40
+#define DEBUG_ORDER 100
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+
+typedef enum
+{
+ CREATED,
+ DESTROYED,
+ ERRORED
+} DEBUG_STATE_T;
+
+/*****************************************************************************/
+/* Typedefs */
+/*****************************************************************************/
+
+typedef struct
+{
+ void *handle;
+ WORD32 state;
+} debug_handle_t;
+
+typedef struct
+{
+ debug_handle_t thread_handle[DEBUG_ORDER];
+ WORD32 thread_count;
+ debug_handle_t mutex_handle[DEBUG_ORDER];
+ WORD32 mutex_count;
+ debug_handle_t mbox_handle[DEBUG_ORDER];
+ WORD32 mbox_count;
+ debug_handle_t socket_handle[DEBUG_ORDER];
+ WORD32 socket_count;
+ debug_handle_t sem_handle[DEBUG_ORDER];
+ WORD32 sem_count;
+ debug_handle_t select_engine_handle[DEBUG_ORDER];
+ WORD32 select_engine_count;
+} osal_debug_t;
+
+typedef struct
+{
+ void *mmr_handle; /* Handle to memory manager */
+ void *(*alloc)(void *mmr_handle, UWORD32 size); /* Call back for allocation */
+ void (*free)(void *mmr_handle, void *mem); /* Call back for free */
+
+} osal_t;
+
+#endif /* OSAL_HANDLE_H */
diff --git a/encoder/osal_mbox.h b/encoder/osal_mbox.h
new file mode 100644
index 0000000..d4be0e1
--- /dev/null
+++ b/encoder/osal_mbox.h
@@ -0,0 +1,76 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : osal_mbox.h */
+/* */
+/* Description : This file contains OSAL Mail box handle structure */
+/* definition. */
+/* */
+/* List of Functions : None */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 26 05 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+#ifndef OSAL_MBOX_H
+#define OSAL_MBOX_H
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/* Mail box handle structure. In WIN32 mail boxes are associated with each */
+/* Thread. So, thread id represents the mail box in question. In POSIX, name */
+/* distinguishs the mail boxes. */
+
+typedef struct
+{
+ WORD32 mq_id; /* Message queue identifier */
+ void *mmr_handle; /* Pointer to memory manager handle */
+ osal_t *hdl; /* Associated OSAL handle */
+} mbox_handle_t;
+
+typedef struct
+{
+ void *mmr_handle; /* Pointer to memory manager handle */
+ osal_t *hdl; /* Associated OSAL handle */
+
+ void *count_sem; /* Semaphore to take care of get to an empty queue */
+ void *sync_mutex; /* Mutex to maintain sync in post msg and get msg */
+ void *data; /* msg posted or got from the queue */
+
+ UWORD32 write_count; /* Post Count */
+ UWORD32 read_count; /* Get Count */
+ UWORD32 msg_size; /* Size of the msg */
+ UWORD32 mbox_len; /* Max number of messages the mailbox can handle */
+} custom_mbox_handle_t;
+
+#endif /* OSAL_MBOX_H */
diff --git a/encoder/osal_mutex.c b/encoder/osal_mutex.c
new file mode 100644
index 0000000..c0b61d0
--- /dev/null
+++ b/encoder/osal_mutex.c
@@ -0,0 +1,257 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : osal_mutex.c */
+/* */
+/* Description : This file contains all the necessary function */
+/* definitions required to operate on mutex */
+/* */
+/* List of Functions : osal_get_mutex_handle_size */
+/* osal_mutex_create */
+/* osal_mutex_destroy */
+/* osal_mutex_lock */
+/* osal_mutex_lock_timed */
+/* osal_mutex_unlock */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 20 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+
+#include <errno.h>
+#include <semaphore.h>
+#include <pthread.h>
+#include <time.h>
+
+/* User include files */
+#include "cast_types.h"
+#include "osal.h"
+#include "osal_handle.h"
+#include "osal_mutex.h"
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_mutex_create */
+/* */
+/* Description : This function creates the mutex and returns the handle */
+/* to the user. */
+/* */
+/* Inputs : OSAL handle */
+/* Pointer to Memory manager handle */
+/* */
+/* Globals : None */
+/* */
+/* Processing : Allocates memory for Mutex handle and calls OS specific */
+/* mutex create API call. */
+/* */
+/* Outputs : Mutex handle */
+/* */
+/* Returns : On SUCCESS - Mutex handle */
+/* On FAILURE - NULL */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 20 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+void *osal_mutex_create(IN void *osal_handle)
+{
+ void *mmr_handle = 0;
+
+ /* Currenlty naming semaphores is not supported */
+ {
+ osal_t *handle = osal_handle;
+ mutex_handle_t *mutex_handle = 0;
+
+ if(0 == handle || 0 == handle->alloc || 0 == handle->free)
+ return 0;
+
+ /* Initialize MMR handle */
+ mmr_handle = handle->mmr_handle;
+
+ /* Allocate memory for the Handle */
+ mutex_handle = handle->alloc(mmr_handle, sizeof(mutex_handle_t));
+
+ /* Error in memory allocation */
+ if(0 == mutex_handle)
+ return 0;
+
+ mutex_handle->mmr_handle = mmr_handle;
+ mutex_handle->hdl = handle;
+
+ /* Create semaphore */
+ if(0 != pthread_mutex_init(&(mutex_handle->mutex_handle), NULL))
+ {
+ handle->free(mmr_handle, mutex_handle);
+ return 0;
+ }
+
+ return mutex_handle;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_mutex_destroy */
+/* */
+/* Description : This function destroys the mutex. */
+/* */
+/* Inputs : Mutex Handle */
+/* */
+/* Globals : None */
+/* */
+/* Processing : This function destroys the mutex refernced by the handle */
+/* and frees the memory held by the handle. */
+/* */
+/* Outputs : Status of mutex destroy */
+/* */
+/* Returns : On SUCCESS - 0 */
+/* On FAILURE - -1 */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 22 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 osal_mutex_destroy(IN void *mutex_handle)
+{
+ if(0 == mutex_handle)
+ return OSAL_ERROR;
+
+ {
+ mutex_handle_t *handle = (mutex_handle_t *)mutex_handle;
+ WORD32 status = 0;
+
+ if(0 == handle->hdl || 0 == handle->hdl->free)
+ return OSAL_ERROR;
+
+ /* Destroy the mutex */
+ status = pthread_mutex_destroy(&(handle->mutex_handle));
+
+ if(0 != status)
+ return OSAL_ERROR;
+
+ /* Free the handle */
+ handle->hdl->free(handle->mmr_handle, handle);
+ return OSAL_SUCCESS;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_mutex_lock */
+/* */
+/* Description : This function locks the mutex. */
+/* */
+/* Inputs : Mutex handle */
+/* */
+/* Globals : None */
+/* */
+/* Processing : Calls OS specific mutex lock API. */
+/* */
+/* Outputs : Status of mutex lock */
+/* */
+/* Returns : On SUCCESS - 0 */
+/* On FAILURE - -1 */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 22 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 osal_mutex_lock(IN void *mutex_handle)
+{
+ if(0 == mutex_handle)
+ return OSAL_ERROR;
+
+ {
+ mutex_handle_t *handle = (mutex_handle_t *)mutex_handle;
+
+ /* Wait on mutex lock */
+ return pthread_mutex_lock(&(handle->mutex_handle));
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_mutex_unlock */
+/* */
+/* Description : This function unlocks the mutex */
+/* */
+/* Inputs : Mutex handle */
+/* */
+/* Globals : None */
+/* */
+/* Processing : Calls OS specific unlock mutex API. */
+/* */
+/* Outputs : Status of mutex unlock */
+/* */
+/* Returns : On SUCCESS - 0 */
+/* On FAILURE - -1 */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 22 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 osal_mutex_unlock(IN void *mutex_handle)
+{
+ if(0 == mutex_handle)
+ return OSAL_ERROR;
+
+ {
+ mutex_handle_t *handle = (mutex_handle_t *)mutex_handle;
+
+ /* Release the lock */
+ if(0 == pthread_mutex_unlock(&(handle->mutex_handle)))
+ return OSAL_SUCCESS;
+
+ return OSAL_ERROR;
+ }
+}
diff --git a/encoder/osal_mutex.h b/encoder/osal_mutex.h
new file mode 100644
index 0000000..233c31b
--- /dev/null
+++ b/encoder/osal_mutex.h
@@ -0,0 +1,51 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : osal_mutex.h */
+/* */
+/* Description : This file contains OSAL Mutex handle structure */
+/* definition. */
+/* */
+/* List of Functions : None */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 26 05 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+#ifndef OSAL_MUTEX_H
+#define OSAL_MUTEX_H
+
+/* Mutex Handle structure in WIN32 contains only handle to its mutex. */
+typedef struct
+{
+ pthread_mutex_t mutex_handle; /* Mutex Identifier */
+ void *mmr_handle; /* Pointer to memory manager handle */
+ osal_t *hdl; /* Associated OSAL handle */
+
+} mutex_handle_t;
+
+#endif /* OSAL_MUTEX_H */
diff --git a/encoder/osal_network.h b/encoder/osal_network.h
new file mode 100644
index 0000000..684efe9
--- /dev/null
+++ b/encoder/osal_network.h
@@ -0,0 +1,51 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : osal_network.h */
+/* */
+/* Description : This file contains OSAL Socket handle structure */
+/* definition. */
+/* */
+/* List of Functions : None */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 26 05 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+#ifndef OSAL_NETWORK_H
+#define OSAL_NETWORK_H
+
+/* Socket handle. */
+typedef struct
+{
+ WORD32 s; /* Socket Identifier. */
+ void *mmr_handle; /* Pointer to memory manager handle */
+ osal_t *hdl; /* Associated OSAL handle */
+
+} socket_handle_t;
+
+#endif /* OSAL_NETWORK_H */
diff --git a/encoder/osal_select_engine.h b/encoder/osal_select_engine.h
new file mode 100644
index 0000000..80f7ce1
--- /dev/null
+++ b/encoder/osal_select_engine.h
@@ -0,0 +1,76 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : osal_select_engine.h */
+/* */
+/* Description : This file contains OSAL Select Engine handle */
+/* structure definition. */
+/* */
+/* List of Functions : None */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 26 05 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+#ifndef OSAL_SELECT_ENGINE_H
+#define OSAL_SELECT_ENGINE_H
+
+/*****************************************************************************/
+/* Constants */
+/*****************************************************************************/
+
+#define ACTIVE 1
+#define SHUTDOWN 2
+#define DEAD 3
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+typedef struct
+{
+ osal_select_entry_t *readfds[MAX_FDS]; /* To check for read */
+ UWORD32 read_count; /* Count of read descriptors */
+ osal_select_entry_t *writefds[MAX_FDS]; /* To check for write */
+ UWORD32 write_count; /* Count of write descriptors */
+ osal_select_entry_t *exceptfds[MAX_FDS]; /* Check for errors */
+ UWORD32 except_count; /* Count of write descriptors */
+ WORD32 id; /* To generate id for each entry */
+ volatile WORD32 state; /* State of select engine */
+ void *thread_handle; /* Select engine thread handle */
+ void *mutex_handle; /* Mutex for mutual exclusion. */
+ void *mmr_handle; /* Handle to memory manager */
+ osal_t *hdl; /* Associated OSAL handle */
+
+ /* Timeout for thread sleep */
+ UWORD32 select_timeout;
+
+ /* Timeout for SELECT system called by osal library */
+ UWORD32 select_poll_interval;
+} select_engine_t;
+
+#endif /* OSAL_SELECT_ENGINE_H */
diff --git a/encoder/osal_semaphore.c b/encoder/osal_semaphore.c
new file mode 100644
index 0000000..9875159
--- /dev/null
+++ b/encoder/osal_semaphore.c
@@ -0,0 +1,334 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : osal_semaphore.c */
+/* */
+/* Description : This file contains all the necessary function */
+/* definitions required to operate on semaphore */
+/* */
+/* List of Functions : osal_sem_create */
+/* osal_sem_destroy */
+/* osal_sem_wait */
+/* osal_sem_wait_timed */
+/* osal_sem_post */
+/* osal_sem_count */
+/* query_semaphore */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 07 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+
+#include <semaphore.h>
+#include <errno.h>
+
+/* User include files */
+#include "cast_types.h"
+#include "osal.h"
+#include "osal_handle.h"
+#include "osal_semaphore.h"
+
+/*****************************************************************************/
+/* Static Function Declarations */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_sem_create */
+/* */
+/* Description : This function creates the semaphore and returns the */
+/* handle to the user. */
+/* */
+/* Inputs : Memory manager hamdle */
+/* Attributes to sempahore handle */
+/* */
+/* Globals : None */
+/* */
+/* Processing : Allocates memory for handle and creates the semaphore */
+/* with specified initialized value by calling OS specific */
+/* API's. */
+/* */
+/* Outputs : Semaphore handle */
+/* */
+/* Returns : On SUCCESS - Semaphore handle */
+/* On FAILURE - NULL */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 07 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+void *osal_sem_create(IN void *osal_handle, IN osal_sem_attr_t *attr)
+{
+ osal_t *handle = (osal_t *)osal_handle;
+ void *mmr_handle = 0;
+
+ if(0 == handle || 0 == handle->alloc || 0 == handle->free)
+ return 0;
+
+ /* Initialize MMR handle */
+ mmr_handle = handle->mmr_handle;
+
+ if(0 == attr)
+ return 0;
+
+ /* Currenlty naming semaphores is not supported */
+ {
+ /* Allocate memory for the sempahore handle */
+ sem_handle_t *sem_handle = handle->alloc(mmr_handle, sizeof(sem_handle_t));
+
+ if(0 == sem_handle)
+ return 0;
+
+ /* Initialize Semaphore handle parameters */
+ sem_handle->mmr_handle = mmr_handle;
+ sem_handle->hdl = handle;
+
+ /* Create a sempahore */
+ if(-1 == sem_init(
+ &(sem_handle->sem_handle), /* Semaphore handle */
+ 0, /* Shared only between threads */
+ attr->value)) /* Initialize value. */
+ {
+ handle->free(sem_handle->mmr_handle, sem_handle);
+ return 0;
+ }
+
+ return sem_handle;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_sem_destroy */
+/* */
+/* Description : This function closes the opened semaphore */
+/* */
+/* Inputs : Initialized Semaphore handle. */
+/* */
+/* Globals : None */
+/* */
+/* Processing : Calls OS specific API's to close the semaphore. */
+/* */
+/* Outputs : Status of Semaphore close */
+/* */
+/* Returns : On SUCCESS - 0 */
+/* On FAILURE - -1 */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 07 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 osal_sem_destroy(IN void *sem_handle)
+{
+ if(0 == sem_handle)
+ return OSAL_ERROR;
+
+ {
+ sem_handle_t *handle = (sem_handle_t *)sem_handle;
+
+ /* Validate OSAL handle */
+ if(0 == handle->hdl || 0 == handle->hdl->free)
+ return OSAL_ERROR;
+
+ /* Destroy the semaphore */
+ if(0 == sem_destroy(&(handle->sem_handle)))
+ {
+ handle->hdl->free(handle->mmr_handle, handle);
+ return OSAL_SUCCESS;
+ }
+
+ return OSAL_ERROR;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_sem_wait */
+/* */
+/* Description : This function waits for semaphore to be unlocked and */
+/* then locks the semaphore and control returns back. */
+/* */
+/* Inputs : Initialized Semaphore handle */
+/* */
+/* Globals : None */
+/* */
+/* Processing : This fucntion calls blocking semaphore lock API's which */
+/* block the caller till semaphore is locked by them or a */
+/* signal occurs which results in API function failure */
+/* */
+/* Outputs : Status of Semaphore wait */
+/* */
+/* Returns : On SUCCESS - 0 */
+/* On FAILURE - -1 */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 07 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 osal_sem_wait(IN void *sem_handle)
+{
+ if(0 == sem_handle)
+ return OSAL_ERROR;
+
+ {
+ sem_handle_t *handle = (sem_handle_t *)sem_handle;
+
+ /* Wait on Semaphore object infinitly */
+ return sem_wait(&(handle->sem_handle));
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_sem_post */
+/* */
+/* Description : This function releases the lock on the semaphore */
+/* */
+/* Inputs : Initialized Semaphore handle */
+/* */
+/* Globals : None */
+/* */
+/* Processing : Calls OS specific API's to release the lock on Semaphore */
+/* */
+/* Outputs : Status of semaphore lock release */
+/* */
+/* Returns : On SUCCESS - 0 */
+/* On FAILURE - -1 */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 07 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 osal_sem_post(IN void *sem_handle)
+{
+ if(0 == sem_handle)
+ return OSAL_ERROR;
+
+ {
+ sem_handle_t *handle = (sem_handle_t *)sem_handle;
+
+ /* Semaphore Post */
+ return sem_post(&(handle->sem_handle));
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_sem_count */
+/* */
+/* Description : This function returns the count of semaphore */
+/* */
+/* Inputs : Handle to Semaphore */
+/* Pointer to value holder */
+/* */
+/* Globals : None */
+/* */
+/* Processing : Calls OS specific API calls to query on semaphore */
+/* */
+/* Outputs : Status of Query */
+/* */
+/* Returns : On SUCCESS - 0 */
+/* On FAILURE - -1 */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 30 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 osal_sem_count(IN void *sem_handle, OUT WORD32 *count)
+{
+ if(0 == sem_handle || 0 == count)
+ return OSAL_ERROR;
+
+ {
+ sem_handle_t *handle = (sem_handle_t *)sem_handle;
+
+ if(-1 == sem_getvalue(&(handle->sem_handle), count))
+ return OSAL_ERROR;
+
+ return OSAL_SUCCESS;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : query_semaphore */
+/* */
+/* Description : This function calls NtQuerySemaphore() API call of */
+/* ntdll.dll */
+/* */
+/* Inputs : Handle to Semaphore */
+/* Pointer to value holder */
+/* */
+/* Globals : None */
+/* */
+/* Processing : This function calls NtQuerySemaphore() API call of */
+/* ntdll.dll */
+/* */
+/* Outputs : Status of Query */
+/* */
+/* Returns : On SUCCESS - 0 */
+/* On FAILURE - -1 */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 30 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
diff --git a/encoder/osal_semaphore.h b/encoder/osal_semaphore.h
new file mode 100644
index 0000000..e42b9fa
--- /dev/null
+++ b/encoder/osal_semaphore.h
@@ -0,0 +1,65 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : osal_semaphore.h */
+/* */
+/* Description : This file contains OSAL Semaphore handle structure */
+/* definition. */
+/* */
+/* List of Functions : None */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 26 05 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+#ifndef OSAL_SEMAPHORE_H
+#define OSAL_SEMAPHORE_H
+
+/*****************************************************************************/
+/* Constants */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/* Semaphore handle structure. In WIN32, Semaphore is refernced using Windows*/
+/* handle. In POSIX, semaphore is referenced using a descriptor which is a */
+/* typedef integer. */
+typedef struct
+{
+ sem_t sem_handle; /* Semaphore handle */
+ void *mmr_handle; /* Pointer to memory manager handle */
+ osal_t *hdl; /* Associated OSAL handle */
+
+} sem_handle_t;
+
+#endif /* OSAL_SEMAPHORE_H */
diff --git a/encoder/osal_thread.c b/encoder/osal_thread.c
new file mode 100644
index 0000000..76b4495
--- /dev/null
+++ b/encoder/osal_thread.c
@@ -0,0 +1,707 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : osal_thread.c */
+/* */
+/* Description : This file contains Thread API's implemented for */
+/* different platforms. */
+/* */
+/* List of Functions : osal_thread_create */
+/* osal_thread_destroy */
+/* osal_func */
+/* osal_set_thread_priority */
+/* osal_set_thread_core_affinity */
+/* osal_thread_sleep */
+/* osal_thread_yield */
+/* osal_thread_suspend */
+/* osal_thread_resume */
+/* osal_thread_wait */
+/* osal_get_thread_handle */
+/* osal_get_time */
+/* osal_get_time_usec */
+/* osal_get_last_error */
+/* osal_print_last_error */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 06 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <stdio.h>
+
+#include <semaphore.h>
+#include <pthread.h>
+#include <errno.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+
+#include <unistd.h>
+#include <math.h>
+#include <sched.h> /*for CPU_SET, etc.. */
+#include <linux/unistd.h>
+#include <sys/syscall.h>
+
+/* User include files */
+#include "cast_types.h"
+#include "osal.h"
+#include "osal_handle.h"
+#include "osal_thread.h"
+#include "osal_errno.h"
+
+/*****************************************************************************/
+/* Static Function Declarations */
+/*****************************************************************************/
+
+static void osal_func(void *param);
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_thread_create */
+/* */
+/* Description : This function create a new thread. */
+/* */
+/* Inputs : OSAL handle */
+/* Memory Manager Handle */
+/* Thread creation attributes */
+/* */
+/* Globals : None */
+/* */
+/* Processing : This function calls OS specific thread create API's and */
+/* creates a new thread with specified attributes. */
+/* */
+/* Outputs : Status of thread creation */
+/* */
+/* Returns : On SUCCESS - 0 */
+/* On FAILURE - -1 */
+/* */
+/* Issues : Only supports creating threads with default attributes */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 06 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+void *osal_thread_create(IN void *osal_handle, IN osal_thread_attr_t *attr)
+{
+ osal_t *handle = (osal_t *)osal_handle;
+ WORD32 priority = 0;
+ void *mmr_handle = 0;
+
+ /* If Handle or attributes are not valid, return ERRORED. */
+ if(0 == attr)
+ return 0;
+
+ if(0 == handle || 0 == handle->alloc || 0 == handle->free)
+ return 0;
+
+ /* Initialize MMR handle */
+ mmr_handle = handle->mmr_handle;
+
+ {
+ pthread_attr_t tattr;
+ thread_handle_t *hdl = 0;
+
+ attr->sched_policy = OSAL_SCHED_RR;
+
+ /* Allocate memory for thread handle */
+ hdl = handle->alloc(mmr_handle, sizeof(thread_handle_t));
+ if(0 == hdl)
+ return 0;
+
+ /* Initialize thread handle parameters */
+ hdl->mmr_handle = mmr_handle;
+ hdl->hdl = handle;
+ hdl->exit_code = attr->exit_code;
+ hdl->priority = priority;
+ hdl->thread_func = attr->thread_func;
+ hdl->thread_param = attr->thread_param;
+
+ /* initialized with default attributes */
+ if(0 != pthread_attr_init(&tattr))
+ {
+ handle->free(hdl->mmr_handle, hdl);
+ return 0;
+ }
+
+ /* Create the thread */
+ hdl->thread_id = pthread_create(
+ &(hdl->thread_handle), /* Thread Handle */
+ &tattr, /* Attributes */
+ (void *(*)(void *))osal_func,
+ hdl); /* Parameters */
+
+ /* In case of error in thread creationn, Free the handle memory and */
+ /* return error. */
+ if(0 != hdl->thread_id)
+ {
+ handle->free(hdl->mmr_handle, hdl);
+ return 0;
+ }
+
+ pthread_attr_destroy(&tattr);
+
+ return hdl;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_thread_destroy */
+/* */
+/* Description : This function calls OS specific API's to close a thread */
+/* which is represented by specified handle. */
+/* */
+/* Inputs : Initialized thread handle */
+/* */
+/* Globals : None */
+/* */
+/* Processing : Closing other threads is supported only in windows. So, */
+/* only windows platform supports this API. */
+/* */
+/* Outputs : Status of thread close */
+/* */
+/* Returns : On SUCCESS - 0 */
+/* On FAILURE - -1 */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 06 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 osal_thread_destroy(IN void *thread_handle)
+{
+ /* If thread handle is not valid, return error */
+ if(0 == thread_handle)
+ return OSAL_ERROR;
+
+ {
+ thread_handle_t *hdl = (thread_handle_t *)thread_handle;
+
+ /* Free memory allocated for Thread handle */
+ ((osal_t *)hdl->hdl)->free(hdl->mmr_handle, hdl);
+
+ return OSAL_SUCCESS;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_func */
+/* */
+/* Description : This function calls the registered threads calling */
+/* function */
+/* */
+/* Inputs : Thread Handle */
+/* */
+/* Globals : None */
+/* */
+/* Processing : Calls each registered thread function */
+/* */
+/* Outputs : None */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 10 05 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+void osal_func(IN void *param)
+{
+ thread_handle_t *hdl = (thread_handle_t *)param;
+
+ while(1)
+ {
+ /* Untill thread returns exit code, invoke the thread function */
+ if(hdl->exit_code == hdl->thread_func(hdl->thread_param))
+ break;
+ }
+
+ /* On Linux platforms call pthread_exit() to release all the resources */
+ /* allocated. */
+ pthread_exit(NULL);
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_thread_sleep */
+/* */
+/* Description : This function calls OS specific API and makes thread */
+/* sleep for specified number of milli seconds. */
+/* */
+/* Inputs : Initialized thread handle */
+/* Time to sleep in millisceonds */
+/* */
+/* Globals : None */
+/* */
+/* Processing : Calls API to sleep for specified number of milli seconds */
+/* */
+/* Outputs : Status of sleep */
+/* */
+/* Returns : On SUCCESS - 0 */
+/* On FAILURE - -1 */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 06 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 osal_thread_sleep(IN UWORD32 milli_seconds)
+{
+ {
+ struct timespec timer;
+
+ /* Convert time in milliseconds into seconds and nano seconds */
+ timer.tv_sec = milli_seconds / 1000;
+ milli_seconds -= (timer.tv_sec * 1000);
+ timer.tv_nsec = milli_seconds * MEGA_CONST;
+
+ /* Using Monotonic clock to sleep, also flag is set to 0 for relative */
+ /* time to current clock time */
+ if(0 == clock_nanosleep(CLOCK_MONOTONIC, 0, &timer, NULL))
+ {
+ return OSAL_SUCCESS;
+ }
+
+ return OSAL_ERROR;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_thread_yield */
+/* */
+/* Description : This function causes the yield its execution. */
+/* */
+/* Inputs : Thread Handle */
+/* */
+/* Globals : None */
+/* */
+/* Processing : Calls OS specific yield calls. */
+/* */
+/* Outputs : Status of Thread Yield */
+/* */
+/* Returns : On SUCCESS - 0 */
+/* On FAILURE - -1 */
+/* */
+/* Issues : Yield in WIN32 (whihc is a 16 - bit API) is still present*/
+/* only to maintian backward compatibility. Can get */
+/* deprecated in future. */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 06 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 osal_thread_yield()
+{
+ if(0 == sched_yield())
+ return OSAL_SUCCESS;
+
+ return OSAL_ERROR;
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_thread_suspend */
+/* */
+/* Description : This function causes the suspension its execution. */
+/* */
+/* Inputs : Thread Handle */
+/* */
+/* Globals : None */
+/* */
+/* Processing : Calls OS specific suspend calls. */
+/* */
+/* Outputs : Status of Thread Suspend */
+/* */
+/* Returns : On SUCCESS - 0 */
+/* On FAILURE - -1 */
+/* */
+/* Issues : API not supported in Redhat Linux. Refer Redhat */
+/* documentation in: */
+/* http://www.redhat.com/docs/wp/solaris_port/c1347.html */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 30 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 osal_thread_suspend(IN void *thread_handle)
+{
+ /* If thread handle is not valid, return error */
+ if(0 == thread_handle)
+ return OSAL_ERROR;
+
+ {
+ /* Thread suspend are not supported in Redhat Linux. Refer link */
+ /* http://www.redhat.com/docs/wp/solaris_port/c1347.html */
+
+ return OSAL_NOT_SUPPORTED;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_thread_resume */
+/* */
+/* Description : This function causes the resumption its execution. */
+/* */
+/* Inputs : Thread Handle */
+/* */
+/* Globals : None */
+/* */
+/* Processing : Calls OS specific resume calls. */
+/* */
+/* Outputs : Status of Thread Suspend */
+/* */
+/* Returns : On SUCCESS - 0 */
+/* On FAILURE - -1 */
+/* */
+/* Issues : API not supported in Redhat Linux. Refer Redhat */
+/* documentation in: */
+/* http://www.redhat.com/docs/wp/solaris_port/c1347.html */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 30 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 osal_thread_resume(IN void *thread_handle)
+{
+ /* If thread handle is not valid, return error */
+ if(0 == thread_handle)
+ return OSAL_ERROR;
+
+ {
+ /* Thread suspend are not supported in Redhat Linux. Refer link */
+ /* http://www.redhat.com/docs/wp/solaris_port/c1347.html */
+
+ return OSAL_NOT_SUPPORTED;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_thread_wait */
+/* */
+/* Description : This function causes the wait untill called thread */
+/* finishes execution */
+/* */
+/* Inputs : Thread Handle */
+/* */
+/* Globals : None */
+/* */
+/* Processing : Calls OS specific wait call for wait on another thread */
+/* */
+/* Outputs : Status of Thread wait */
+/* */
+/* Returns : On SUCCESS - 0 */
+/* On FAILURE - -1 */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 30 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 osal_thread_wait(IN void *thread_handle)
+{
+ if(0 == thread_handle)
+ return OSAL_ERROR;
+
+ {
+ WORD32 result = 0;
+ void *status = 0;
+
+ thread_handle_t *hdl = (thread_handle_t *)thread_handle;
+
+ /* Join the thread to wait for thread to complete execution */
+ result = pthread_join(hdl->thread_handle, (void **)&status);
+
+ return result;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_get_thread_handle */
+/* */
+/* Description : This function gets current thread handle. Currently not */
+/* supported */
+/* */
+/* Inputs : OSAL handle. */
+/* */
+/* Globals : None */
+/* */
+/* Processing : Gets all the thread properities and constructs a new */
+/* thread handle . */
+/* */
+/* Outputs : Thread handle to current thread. */
+/* */
+/* Returns : On SUCCESS - Current thread handle */
+/* On FAILURE - NULL */
+/* */
+/* Issues : Not supported on Linux and BIOS platforms. */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 10 05 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+void *osal_get_thread_handle(IN void *osal_handle)
+{
+ osal_t *handle = (osal_t *)osal_handle;
+
+ if(0 == osal_handle)
+ return 0;
+
+ {
+ thread_handle_t *hdl = handle->alloc(handle->mmr_handle, sizeof(thread_handle_t));
+ WORD32 schedpolicy;
+ struct sched_param schedparam;
+
+ if(0 == hdl)
+ return 0;
+
+ hdl->mmr_handle = handle->mmr_handle;
+ hdl->hdl = handle;
+ hdl->exit_code = 0;
+ hdl->thread_func = 0;
+ hdl->thread_param = 0;
+ hdl->thread_handle = pthread_self();
+ hdl->thread_id = 0;
+ hdl->priority = schedparam.sched_priority;
+
+ /* Get thread priority from scheduling parameters */
+ if(0 != pthread_getschedparam(hdl->thread_handle, &schedpolicy, &schedparam))
+ {
+ return 0;
+ }
+
+ return hdl;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_get_time */
+/* */
+/* Description : This function returns absolute time in milli seconds */
+/* */
+/* Inputs : None */
+/* Globals : None */
+/* */
+/* Processing : Gets the absolute time by calling OS specific API's. */
+/* */
+/* Outputs : Absolute time in milli seconds. */
+/* */
+/* Returns : +ve 32 bit value */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 06 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+UWORD32 osal_get_time()
+{
+ {
+ struct timespec time_val;
+ int cur_time;
+
+ /* Get the Monotonic time */
+ clock_gettime(CLOCK_MONOTONIC, &time_val);
+
+ /* Convert time in seconds and micro seconds into milliseconds time */
+ cur_time = time_val.tv_sec * 1000 + time_val.tv_nsec / 1000000;
+ return cur_time;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_get_time_usec */
+/* */
+/* Description : This function returns absolute time in micro seconds */
+/* */
+/* Inputs : None */
+/* Globals : None */
+/* */
+/* Processing : Gets the absolute time by calling OS specific API's. */
+/* */
+/* Outputs : Absolute time in micro seconds. */
+/* */
+/* Returns : +ve 32 bit value */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 06 03 2009 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 osal_get_time_usec(UWORD32 *sec, UWORD32 *usec)
+{
+ if((0 == sec) || (0 == usec))
+ return OSAL_ERROR;
+
+ {
+ struct timespec time_val;
+
+ /* Get the Monotonic time */
+ clock_gettime(CLOCK_MONOTONIC, &time_val);
+
+ /* Convert time in seconds and micro seconds into milliseconds time */
+ *sec = time_val.tv_sec;
+ *usec = time_val.tv_nsec / 1000;
+
+ return OSAL_SUCCESS;
+ }
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_get_last_error */
+/* */
+/* Description : This function gets the last error code. */
+/* */
+/* Inputs : None */
+/* Globals : None */
+/* */
+/* Processing : Gets the last occured error code by calling OS specific */
+/* API call. */
+/* */
+/* Outputs : Error Number */
+/* */
+/* Returns : If no error - 0 */
+/* Else - +ve number */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 06 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+UWORD32 osal_get_last_error()
+{
+ UWORD32 get_linux_error(void);
+ return get_linux_error();
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_print_last_error */
+/* */
+/* Description : This function prints the last error message. */
+/* */
+/* Inputs : None */
+/* Globals : None */
+/* */
+/* Processing : Gets the last occured error code by calling OS specific */
+/* API call. It prints argument string (if not NULL), */
+/* followed by ': ' then the error_string and <new_line>. */
+/* */
+/* Outputs : None */
+/* */
+/* Returns : None */
+/* */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 10 03 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+void osal_print_last_error(IN const STRWORD8 *string)
+{
+ perror(string);
+}
+
+/*****************************************************************************/
+/* */
+/* Function Name : osal_get_current_tid */
+/* */
+/* Description : Gets the tid of the thread in whose context this call */
+/* was made */
+/* */
+/* Inputs : None */
+/* Globals : None */
+/* Processing : None */
+/* Outputs : None */
+/* Returns : Thread ID, as a WORD32 */
+/* Issues : None */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 07 05 2015 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+WORD32 osal_get_current_tid(void)
+{
+ return syscall(__NR_gettid);
+}
diff --git a/encoder/osal_thread.h b/encoder/osal_thread.h
new file mode 100644
index 0000000..6c4a094
--- /dev/null
+++ b/encoder/osal_thread.h
@@ -0,0 +1,69 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*****************************************************************************/
+/* */
+/* File Name : osal_thread.h */
+/* */
+/* Description : This file contains OSAL Thread handle structure */
+/* definition. */
+/* */
+/* List of Functions : None */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 26 05 2006 Ittiam Draft */
+/* */
+/*****************************************************************************/
+
+#ifndef OSAL_THREAD_H
+#define OSAL_THREAD_H
+
+/*****************************************************************************/
+/* Constants */
+/*****************************************************************************/
+
+#define DIV_COEFF 10
+#define MEGA_CONST 1000 * 1000
+#define WAIT_INTERVAL 100
+
+/*****************************************************************************/
+/* Structures */
+/*****************************************************************************/
+
+/* Thread handle which stores attributes related to a thread based on the */
+/* platform its being used under. */
+typedef struct
+{
+ pthread_t thread_handle; /* POSIX Thread handle */
+ WORD32 thread_id; /* Thread Identifier. */
+ void *mmr_handle; /* Pointer to memory manager handle */
+ osal_t *hdl; /* Associated OSAL handle */
+ WORD32 priority; /* Thread priority, used in thread suspend*/
+ WORD32 policy; /* Scheduling policy */
+ WORD32 exit_code; /* Exit code on which thread shall exit */
+ WORD32 (*thread_func)(void *); /* Starting point of execution of thread */
+ void *thread_param; /* Thread function argument. */
+} thread_handle_t;
+
+#endif /* OSAL_THREAD_H */
diff --git a/encoder/picture_type.c b/encoder/picture_type.c
new file mode 100644
index 0000000..54a23da
--- /dev/null
+++ b/encoder/picture_type.c
@@ -0,0 +1,1842 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file picture_type.c
+*
+* \brief
+* This file contain picture handling struct and functions
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+
+/* User include files */
+#include "ittiam_datatypes.h"
+#include "rc_common.h"
+#include "rc_cntrl_param.h"
+#include "mem_req_and_acq.h"
+#include "picture_type.h"
+#include "trace_support.h"
+
+#define MAX_INTER_FRM_INT 10
+
+/* Pic_details */
+typedef struct
+{
+ WORD32 i4_pic_id; /* The id sent by the codec */
+ WORD32 i4_pic_disp_order_no; /* The pics come in, in this order */
+ picture_type_e e_pic_type; /* I,P,B */
+ WORD32 i4_is_scd;
+} pic_details_t;
+
+/********** Pic_handling structure **********/
+typedef struct pic_handling_t
+{
+ /* Inputs from the codec */
+ WORD32
+ i4_intra_frm_int; /* Number of frames after which an I frame will repeat in display order */
+ WORD32 i4_inter_frm_int; /* (num_b_pics_in_subgop + 1) */
+ WORD32 i4_idr_period; /* IDR frame interval, HEVC specific implementation*/
+ WORD32
+ i4_max_inter_frm_int; /* After these many buffered frames, the pics are encoded */
+ WORD32 i4_is_gop_closed; /* OPEN or CLOSED */
+ WORD32
+ i4_num_gop_in_idr_period; /* number of open GOPs between two closed GOP*/
+ WORD32
+ i4_open_gop_count; /* when open GOP count == i4_num_open_gop then insert a closed GOP*/
+
+ /* The pic stack */
+ pic_details_t
+ as_pic_stack[MAX_INTER_FRM_INT + 2]; /* Stack used to store the input pics in encode order */
+
+ /* Counters */
+ WORD32 i4_buf_pic_no; /* Decides whether a B or ref pic */
+ WORD32
+ i4_pic_disp_order_no; /* Current pic's number in displayed, and gets reset after an I-frm */
+ WORD32
+ i4_p_count_in_gop; /* Number of P frms that have come, in the current gop, so far */
+ WORD32
+ i4_b_count_in_gop; /* Number of B frms that have come, in the current gop, so far */
+ WORD32
+ i4_b_count_in_subgop; /* Number of B frms that have come, in the current subgop, so far */
+
+ /* Indices to the pic stack (Since we store the pics in the encode order, these vars are modified to meet that) */
+ WORD32 i4_b_pic_idx; /* B_PIC index */
+ WORD32 i4_ref_pic_idx; /* I,P PIC index */
+
+ /* Variables operating on the input pics */
+ WORD32
+ i4_is_first_gop; /* Flag denoting whether it's the first gop or not */
+ WORD32 i4_b_in_incomp_subgop; /* Number of B_PICs in an incomplete subgop */
+ WORD32
+ i4_extra_p; /* In CLOSED_GOPs, even if inter_frm_int > 1, there can be 2 continous
+ P_PICs at the GOP end. This takes values of 0 or 1 */
+ /* Arrays storing the number of frms in the gop */
+ WORD32 i4_frms_in_gop
+ [MAX_PIC_TYPE]; /* In the steady state, what's the pic distribution in display order */
+ WORD32 i4_frms_in_cur_gop
+ [MAX_PIC_TYPE]; /* In case of a change in inter frm int call, the pic distribution in that gop in display order */
+ WORD32 i4_actual_frms_in_gop
+ [MAX_PIC_TYPE]; /*HEVC_RC: This holds true number of pics in GOP ignoring ref and non ref B pic*/
+
+ /* WORD32 i4_rem_frms_in_gop[MAX_PIC_TYPE];*/ /* This is used to denote the number of frms remaining to be encoded in the current gop */
+ WORD32 i4_rem_frms_in_cur_gop;
+
+ /* Variables operating on the output pics */
+ WORD32 i4_coded_pic_no; /* Counts the frms encoded in a gop */
+ WORD32
+ i4_stack_count; /* Counts from the start of stack to the end repeatedly */
+
+ /* Tracking a change in the inputs from the codec */
+ WORD32
+ i4_change_in_inter_frm_int; /* A flag that is set when the codec calls for a change in inter_frm_int */
+ WORD32
+ i4_new_inter_frm_int; /* When a change_in_inter_frm_int is called, this stores the new inter_frm_int */
+ WORD32
+ i4_b_in_incomp_subgop_mix_gop; /* When a change_in_inter_frm_int is called in the middle of a gop,this stores
+ the B_PICs in the incomplete subgop of the mixed gop */
+ WORD32
+ i4_extra_p_mix_gop; /* For a CLOSED GOP, when a change_in_inter_frm_int is called in the middle of a gop,
+ this is a flag denoting if there is an extra P_PIC in the mixed gop */
+ WORD32
+ i4_change_in_intra_frm_int; /* A flag that is set when the codec calls for a change in intra_frm_int */
+ WORD32
+ i4_new_intra_frm_int; /* When a change_in_intra_frm_int is called, this stores the new intra_frm_int */
+
+ /* Previous pic_stack_indices & details */
+ pic_details_t s_prev_pic_details;
+ WORD32 i4_prev_b_pic_idx;
+
+ WORD32 i4_last_frm_in_gop;
+ WORD32 i4_first_gop_encoded;
+
+ picture_type_e e_previous_pic_type; /* NITT TBR */
+ WORD32 i4_force_I_frame;
+ WORD32 i4_sum_remaining_frm_in_gop;
+ WORD32 i4_mod_temp_ref_cnt;
+ WORD32 i4_frames_in_fif_gop;
+ WORD32 i4_prev_intra_frame_interval;
+ WORD32 i4_pic_order_cnt_base_offset;
+ WORD32 i4_enable_modulo;
+ WORD32 i4_change_inter_frm_interval_correction;
+ WORD32 i4_non_ref_B_pic_count;
+ WORD32 i4_num_active_pic_type;
+ WORD32 i4_field_pic;
+} pic_handling_t;
+
+static void update_pic_distbn(
+ pic_handling_t *ps_pic_handling,
+ WORD32 i4_intra_frm_int,
+ WORD32 i4_inter_frm_int,
+ WORD32 i4_gop_boundary);
+
+static void find_pic_distbn_in_gop(
+ WORD32 i4_frms_in_gop[MAX_PIC_TYPE],
+ WORD32 i4_actual_frms_in_gop[MAX_PIC_TYPE],
+ WORD32 i4_intra_frm_int,
+ WORD32 i4_inter_frm_int,
+ WORD32 i4_is_gop_closed,
+ WORD32 *pi4_b_in_incomp_subgop,
+ WORD32 *pi4_extra_p,
+ WORD32 i4_num_active_pic_type,
+ WORD32 i4_field_pic);
+
+#if NON_STEADSTATE_CODE
+WORD32 pic_handling_num_fill_use_free_memtab(
+ pic_handling_t **pps_pic_handling, itt_memtab_t *ps_memtab, ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static pic_handling_t s_pic_handling_temp;
+
+ /* Hack for al alloc, during which we dont have any state memory.
+ Dereferencing can cause issues */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_pic_handling) = &s_pic_handling_temp;
+
+ /*for src rate control state structure*/
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(
+ &ps_memtab[i4_mem_tab_idx], sizeof(pic_handling_t), MEM_TAB_ALIGNMENT, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void **)pps_pic_handling, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ return (i4_mem_tab_idx);
+}
+
+/******************************************************************************
+ Function Name : init_pic_handling
+ Description : initializes the pic handling state struct
+ Arguments :
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+void init_pic_handling(
+ pic_handling_t *ps_pic_handling,
+ WORD32 i4_intra_frm_int,
+ WORD32 i4_max_inter_frm_int,
+ WORD32 i4_is_gop_closed,
+ WORD32 i4_idr_period,
+ WORD32 i4_num_active_pic_type,
+ WORD32 i4_field_pic)
+{
+ /* Declarations */
+ WORD32 i;
+
+ ps_pic_handling->i4_num_active_pic_type = i4_num_active_pic_type;
+ ps_pic_handling->i4_idr_period = i4_idr_period;
+ /*Possible only if cdr period is zero*/
+ if(i4_intra_frm_int == i4_idr_period)
+ {
+ ps_pic_handling->i4_num_gop_in_idr_period = 1;
+ }
+ /*when idr is zero. All GOPs are open GOP*/
+ else if(!ps_pic_handling->i4_idr_period)
+ {
+ ps_pic_handling->i4_num_gop_in_idr_period = 1;
+ }
+ else if(ps_pic_handling->i4_idr_period > 0)
+ {
+ ps_pic_handling->i4_num_gop_in_idr_period =
+ (ps_pic_handling->i4_idr_period + i4_max_inter_frm_int - 1) / i4_intra_frm_int;
+ }
+ /* Checks */
+ /* Codec Parameters */
+ ps_pic_handling->i4_intra_frm_int = i4_intra_frm_int;
+ ps_pic_handling->i4_inter_frm_int = i4_max_inter_frm_int;
+ ps_pic_handling->i4_max_inter_frm_int = i4_max_inter_frm_int;
+ ps_pic_handling->i4_is_gop_closed = i4_is_gop_closed;
+ ps_pic_handling->i4_field_pic = i4_field_pic;
+ /* Pic_stack */
+ memset(ps_pic_handling->as_pic_stack, 0, sizeof(ps_pic_handling->as_pic_stack));
+ memset(&ps_pic_handling->s_prev_pic_details, 0, sizeof(ps_pic_handling->s_prev_pic_details));
+
+ /* Counters */
+ ps_pic_handling->i4_buf_pic_no = 0;
+ ps_pic_handling->i4_pic_disp_order_no = 0;
+
+ /* Indices to the pic_stack */
+ ps_pic_handling->i4_ref_pic_idx = 0;
+ ps_pic_handling->i4_b_pic_idx = 2;
+ ps_pic_handling->i4_prev_b_pic_idx = 2;
+
+ /* Variables working on the input frames */
+ ps_pic_handling->i4_is_first_gop = 1;
+ ps_pic_handling->i4_p_count_in_gop = 0;
+ ps_pic_handling->i4_b_count_in_gop = 0;
+ ps_pic_handling->i4_b_count_in_subgop = 0;
+
+ /* Variables working on the output frames */
+ ps_pic_handling->i4_coded_pic_no = -1;
+ ps_pic_handling->i4_stack_count = -1;
+
+ /* Tracks the changes in the Codec Parameters */
+ ps_pic_handling->i4_change_in_inter_frm_int = 0;
+ ps_pic_handling->i4_new_inter_frm_int = i4_max_inter_frm_int;
+
+ /* Tracks the changes in the Codec Parameters */
+ ps_pic_handling->i4_change_in_intra_frm_int = 0;
+ ps_pic_handling->i4_new_intra_frm_int = i4_intra_frm_int;
+ ps_pic_handling->i4_open_gop_count = 1;
+
+ /* Variables on which the bit allocation is dependent */
+ /* Get the pic distribution in the gop */
+ find_pic_distbn_in_gop(
+ ps_pic_handling->i4_frms_in_gop,
+ ps_pic_handling->i4_actual_frms_in_gop,
+ i4_intra_frm_int,
+ i4_max_inter_frm_int,
+ i4_is_gop_closed,
+ &ps_pic_handling->i4_b_in_incomp_subgop,
+ &ps_pic_handling->i4_extra_p,
+ ps_pic_handling->i4_num_active_pic_type,
+ ps_pic_handling->i4_field_pic);
+
+ ps_pic_handling->i4_rem_frms_in_cur_gop = 0;
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_pic_handling->i4_frms_in_cur_gop[i] = ps_pic_handling->i4_frms_in_gop[i];
+ ps_pic_handling->i4_rem_frms_in_cur_gop += ps_pic_handling->i4_actual_frms_in_gop[i];
+ }
+ /*Since first GOP will be closed GOP in all condition make sure end of GOP flag is set before qp query is done for next I frame*/
+ /*HEVC_hierarchy*/
+ ps_pic_handling->i4_rem_frms_in_cur_gop -= i4_max_inter_frm_int - 1;
+
+ ps_pic_handling->e_previous_pic_type = I_PIC;
+ ps_pic_handling->i4_force_I_frame = 0;
+ ps_pic_handling->i4_sum_remaining_frm_in_gop = 0;
+ ps_pic_handling->i4_mod_temp_ref_cnt = 0;
+
+ ps_pic_handling->i4_b_in_incomp_subgop_mix_gop = ps_pic_handling->i4_b_in_incomp_subgop;
+ ps_pic_handling->i4_extra_p_mix_gop = ps_pic_handling->i4_extra_p;
+
+ ps_pic_handling->i4_last_frm_in_gop = 0;
+ ps_pic_handling->i4_first_gop_encoded = 0;
+ ps_pic_handling->i4_frames_in_fif_gop = 0;
+ ps_pic_handling->i4_pic_order_cnt_base_offset = 0;
+ ps_pic_handling->i4_enable_modulo = 0;
+ ps_pic_handling->i4_change_inter_frm_interval_correction = 0;
+ ps_pic_handling->i4_prev_intra_frame_interval = i4_intra_frm_int; /*i_only*/
+ ps_pic_handling->i4_non_ref_B_pic_count = 0;
+}
+#endif /* #if NON_STEADSTATE_CODE */
+
+/* ******************************************************************************/
+/**
+ * @brief registers the new intra frame interval value
+ *
+ * @param ps_pic_handling
+ * @param i4_intra_frm_int
+ */
+/* ******************************************************************************/
+void pic_handling_register_new_int_frm_interval(
+ pic_handling_t *ps_pic_handling, WORD32 i4_intra_frm_int)
+{
+ ps_pic_handling->i4_change_in_intra_frm_int = 1;
+ ps_pic_handling->i4_new_intra_frm_int = i4_intra_frm_int;
+
+ /* The below call was made when a control call changes
+ * the intra frame interval before the first frame was getting encoded
+ * but i see that it is not required as of now NITT TBR
+ ps_pic_handling->i4_change_in_intra_frm_int = 0;
+ update_pic_distbn(ps_pic_handling,
+ ps_pic_handling->i4_new_intra_frm_int,
+ ps_pic_handling->i4_inter_frm_int,
+ 1); */
+}
+/******************************************************************************
+ Function Name : pic_handling_register_new_inter_frm_interval
+ Description :
+ Arguments : ps_pic_handling
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+void pic_handling_register_new_inter_frm_interval(
+ pic_handling_t *ps_pic_handling, WORD32 i4_inter_frm_int)
+{
+ /* Update the state structure with the latest values */
+ ps_pic_handling->i4_change_in_inter_frm_int = 1;
+ ps_pic_handling->i4_new_inter_frm_int = i4_inter_frm_int;
+}
+/******************************************************************************
+ Function Name : start_new_gop
+ Description :
+ Arguments : ps_pic_handling
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+static void start_new_gop(pic_handling_t *ps_pic_handling)
+{
+ WORD32 i;
+ WORD32 i4_sum_remaining_frm_in_gop = 0;
+ /* Now, the end of gop updates */
+ ps_pic_handling->i4_pic_disp_order_no = 0;
+ ps_pic_handling->i4_buf_pic_no = 0;
+ ps_pic_handling->i4_is_first_gop = 0;
+ ps_pic_handling->i4_extra_p_mix_gop = ps_pic_handling->i4_extra_p;
+
+ if(ps_pic_handling->i4_is_gop_closed)
+ {
+ ps_pic_handling->i4_b_in_incomp_subgop_mix_gop = ps_pic_handling->i4_b_in_incomp_subgop;
+ }
+ /* Store the number of frames in the gop that is encoded till now [just before Force I frame
+ call is made */
+ ps_pic_handling->i4_frames_in_fif_gop =
+ ps_pic_handling->i4_b_count_in_gop + ps_pic_handling->i4_p_count_in_gop + 1;
+
+ i4_sum_remaining_frm_in_gop = ps_pic_handling->i4_rem_frms_in_cur_gop;
+
+ ps_pic_handling->i4_sum_remaining_frm_in_gop = i4_sum_remaining_frm_in_gop;
+ ps_pic_handling->i4_rem_frms_in_cur_gop = 0;
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_pic_handling->i4_frms_in_cur_gop[i] = ps_pic_handling->i4_frms_in_gop[i];
+ ps_pic_handling->i4_rem_frms_in_cur_gop += ps_pic_handling->i4_frms_in_cur_gop[i];
+ }
+}
+
+/* ******************************************************************************/
+/**
+ * @brief Fills the pic_stack with the incoming pics in encode order
+ *
+ * @param ps_pic_handling
+ * @param i4_enc_pic_id
+ */
+/* ******************************************************************************/
+void add_pic_to_stack(pic_handling_t *ps_pic_handling, WORD32 i4_enc_pic_id, WORD32 i4_rc_in_pic)
+{
+ /* Declarations */
+ WORD32 i4_inter_frm_int, i4_max_inter_frm_int, i4_intra_frm_int, i4_new_inter_frm_int;
+ WORD32 i4_is_gop_closed;
+ WORD32 i4_buf_pic_no, i4_pic_disp_order_no;
+ WORD32 i4_b_pic_idx, i4_ref_pic_idx;
+ WORD32 i4_is_first_gop, i4_b_in_incomp_subgop, i4_p_count_in_gop, i4_b_count_in_gop,
+ i4_b_count_in_subgop;
+ WORD32 i, i4_p_frms_in_prd, i4_b_frms_in_prd, i4_num_b_in_subgop, i4_extra_p;
+ WORD32 i4_condn_for_change_in_inter_frm_int;
+ picture_type_e e_previous_pic_type, e_cur_pic_type;
+ WORD32 i4_force_I_frame;
+ WORD32 i4_is_scd = 0;
+
+ /* Just force an I picture if the input frame is an I frame. Normal I picture will anyway be taken care
+ inside add_pic_to_stack(). And inside add_pic_to_stack() let us take care of U(nexpected)I frame
+ for resetting the model */
+ if(i4_rc_in_pic == I_PIC || i4_rc_in_pic == I_PIC_SCD)
+ {
+ set_force_I_frame_flag(ps_pic_handling);
+ }
+
+ /* Initialize the local vars with the state struct values needed by the change calls */
+ i4_intra_frm_int = ps_pic_handling->i4_intra_frm_int;
+ i4_inter_frm_int = ps_pic_handling->i4_inter_frm_int;
+ i4_max_inter_frm_int = ps_pic_handling->i4_max_inter_frm_int;
+ i4_is_gop_closed = ps_pic_handling->i4_is_gop_closed;
+
+ i4_buf_pic_no = ps_pic_handling->i4_buf_pic_no;
+ i4_pic_disp_order_no = ps_pic_handling->i4_pic_disp_order_no;
+ i4_b_count_in_gop = ps_pic_handling->i4_b_count_in_gop;
+ i4_b_frms_in_prd = ps_pic_handling->i4_frms_in_cur_gop[B_PIC];
+ i4_is_first_gop = ps_pic_handling->i4_is_first_gop;
+ i4_new_inter_frm_int = ps_pic_handling->i4_new_inter_frm_int;
+ e_previous_pic_type = ps_pic_handling->e_previous_pic_type;
+ i4_force_I_frame = ps_pic_handling->i4_force_I_frame;
+ /******************************* Force I frame ******************************/
+ /* Two different cases
+ 1)OPEN_GOP:
+ New GOP is started after number of B pictures in the last sub gop of a gop to mimic the
+ GOP structure.
+
+ 2)Closed GOP:Wait till P frame at input and The frame after a P frame a new GOP is started
+ to mimic the GOP structure.
+ */
+ if(i4_force_I_frame)
+ {
+ WORD32 i4_temp_is_gop_closed;
+ WORD32 i4_codn = 0;
+ /* A special case of Open GOP where the it behaves like Closed GOP*/
+ if((i4_intra_frm_int % i4_inter_frm_int) == 1)
+ {
+ i4_temp_is_gop_closed = 1;
+ }
+ else
+ {
+ i4_temp_is_gop_closed = i4_is_gop_closed;
+ }
+ /* Get the current picture type to aid decision to force an I frame*/
+ if((i4_buf_pic_no % i4_inter_frm_int) &&
+ !(i4_is_gop_closed && (i4_b_count_in_gop == i4_b_frms_in_prd)))
+ {
+ e_cur_pic_type = B_PIC;
+ }
+ else
+ {
+ if(i4_pic_disp_order_no == 0)
+ {
+ e_cur_pic_type = I_PIC;
+ }
+ else
+ {
+ e_cur_pic_type = P_PIC;
+ }
+ }
+ if((i4_intra_frm_int % i4_inter_frm_int) == 0)
+ {
+ i4_codn = (e_cur_pic_type == P_PIC);
+ }
+ else
+ {
+ i4_codn =
+ (ps_pic_handling->i4_b_count_in_subgop == ps_pic_handling->i4_b_in_incomp_subgop);
+ }
+ if(e_cur_pic_type == I_PIC)
+ {
+ /*Don't do anything. Resetting the force I frame flag since the current picture
+ type is already a I frame */
+ i4_force_I_frame = 0;
+ }
+ else if(i4_inter_frm_int == 1)
+ {
+ /*IPP case , Force I frame immediately*/
+ start_new_gop(ps_pic_handling);
+ }
+ else if((!i4_temp_is_gop_closed) && i4_codn)
+ {
+ start_new_gop(ps_pic_handling);
+ if(ps_pic_handling->i4_b_count_in_subgop)
+ {
+ ps_pic_handling->i4_b_pic_idx += 1;
+ ps_pic_handling->i4_b_pic_idx %= (i4_max_inter_frm_int + 1);
+ }
+ }
+ else if(i4_temp_is_gop_closed && (e_previous_pic_type == P_PIC) && (e_cur_pic_type != P_PIC))
+ {
+ start_new_gop(ps_pic_handling);
+ ps_pic_handling->i4_b_pic_idx++;
+ ps_pic_handling->i4_b_pic_idx %= (i4_max_inter_frm_int + 1);
+ }
+ i4_is_first_gop = ps_pic_handling->i4_is_first_gop;
+
+ /* Check for unexpected I frame and assume its a scene change. If so, reset the model */
+ if(((e_cur_pic_type != I_PIC) && (i4_rc_in_pic == I_PIC)) || (i4_rc_in_pic == I_PIC_SCD))
+ {
+ /* Set the SCD flag */
+ i4_is_scd = 1;
+ }
+ }
+
+ /******************************* CHANGE_INTRA_FRM_INTERVAL ******************************/
+ /* Call the update_pic_distbn if
+ 1)Change in intra frm interval flag is set
+ 2)It's the first B_PIC of a gop
+ */
+ if((ps_pic_handling->i4_change_in_intra_frm_int == 1) && ((i4_pic_disp_order_no == 1)))
+ {
+ update_pic_distbn(
+ ps_pic_handling,
+ ps_pic_handling->i4_new_intra_frm_int,
+ ps_pic_handling->i4_inter_frm_int,
+ 1);
+
+ ps_pic_handling->i4_change_in_intra_frm_int = 0;
+
+ if(ps_pic_handling->i4_new_intra_frm_int == 1)
+ {
+ ps_pic_handling->i4_pic_disp_order_no = 0;
+ }
+ }
+ /******************************* CHANGE_INTER_FRM_INTERVAL ******************************/
+
+ /* Call update_pic_distbn if
+ 1)Change in inter frm interval flag is set
+ 2)It's the first B_PIC after gop/subgop start, and
+ 3)The new inter-frm-interval won't cross the intra_frm_interval
+ */
+
+ if((ps_pic_handling->i4_change_in_inter_frm_int == 1) &&
+ ((i4_buf_pic_no % i4_inter_frm_int == 1) || (i4_pic_disp_order_no == 1) ||
+ (i4_inter_frm_int == 1)))
+ {
+ /* Condn which checks if the new inter_frm_int will cross the intra_frm_int */
+ i4_condn_for_change_in_inter_frm_int =
+ ((i4_pic_disp_order_no + i4_new_inter_frm_int - 1) < i4_intra_frm_int);
+
+ if(i4_condn_for_change_in_inter_frm_int)
+ {
+ /* If the inter_frm_int = 1, then the b_pic_idx needs to be modified */
+ if(i4_inter_frm_int == 1)
+ {
+ ps_pic_handling->i4_b_pic_idx =
+ (1 + ps_pic_handling->i4_ref_pic_idx) % (i4_max_inter_frm_int + 1);
+ }
+ /* Store a correction factor for calculating the picture display order */
+ if(i4_inter_frm_int != i4_new_inter_frm_int)
+ {
+ ps_pic_handling->i4_change_inter_frm_interval_correction =
+ i4_inter_frm_int - i4_new_inter_frm_int;
+ /* ps_pic_handling->i4_change_inter_frm_interval_correction = 0; */
+ }
+
+ /* Depending on the gop/subgop boundary, call the change_inter_frm_int */
+ /* TO DO: make a single call, change the name of the fxn to update_state,
+ where state = frms_in_gop + b_incomp_subgop + extra_p */
+
+ if(i4_pic_disp_order_no == 1) /*GOP boundary*/
+ {
+ update_pic_distbn(
+ ps_pic_handling,
+ ps_pic_handling->i4_intra_frm_int,
+ ps_pic_handling->i4_new_inter_frm_int,
+ 1);
+ }
+ else /*Subgop boundary*/
+ {
+ update_pic_distbn(
+ ps_pic_handling,
+ ps_pic_handling->i4_intra_frm_int,
+ ps_pic_handling->i4_new_inter_frm_int,
+ 0);
+ }
+
+ ps_pic_handling->i4_change_in_inter_frm_int = 0;
+ ps_pic_handling->i4_new_inter_frm_int = ps_pic_handling->i4_inter_frm_int;
+ }
+ }
+
+ /* Initialize the local vars with the state struct values */
+ i4_buf_pic_no = ps_pic_handling->i4_buf_pic_no;
+ i4_pic_disp_order_no = ps_pic_handling->i4_pic_disp_order_no;
+ i4_b_pic_idx = ps_pic_handling->i4_b_pic_idx;
+ i4_ref_pic_idx = ps_pic_handling->i4_ref_pic_idx;
+ i4_b_in_incomp_subgop = ps_pic_handling->i4_b_in_incomp_subgop_mix_gop;
+ i4_p_count_in_gop = ps_pic_handling->i4_p_count_in_gop;
+ i4_b_count_in_gop = ps_pic_handling->i4_b_count_in_gop;
+ i4_b_count_in_subgop = ps_pic_handling->i4_b_count_in_subgop;
+ i4_p_frms_in_prd = ps_pic_handling->i4_frms_in_cur_gop[P_PIC];
+ i4_b_frms_in_prd = ps_pic_handling->i4_frms_in_cur_gop[B_PIC];
+ i4_extra_p = ps_pic_handling->i4_extra_p_mix_gop;
+ i4_inter_frm_int = ps_pic_handling->i4_inter_frm_int;
+ i4_intra_frm_int = ps_pic_handling->i4_intra_frm_int;
+
+ /* Initializing the prev_state vars */
+ ps_pic_handling->i4_prev_b_pic_idx = ps_pic_handling->i4_b_pic_idx;
+
+ i4_num_b_in_subgop = (i4_inter_frm_int - 1);
+
+ /***************************************** Fill the stack ***************************************/
+ /* The next part of the code is organized as
+
+ if(B_PIC conditions satisfied)
+ {
+ Fill the pic_stack using the b_pic_index
+ Update the b_pic_index and the other b_pic related vars for the next B_PIC
+ }
+ else
+ {
+ if(I_PIC conditions are satisfied)
+ {
+ Fill the pic_stack using the ref_pic_index
+ Update the ref_pic_index and the other ref_pic related vars for the next I_PIC/P_PIC
+ }
+ else
+ {
+ Fill the pic_stack using the ref_pic_index
+ Update the ref_pic_index and the other ref_pic related vars for the next I_PIC/P_PIC
+ }
+ }
+ */
+ /* Condition for a B_PIC -
+ 1) Other than the first I_PIC and the periodically appearing P_PICs, after every inter_frm_int,
+ rest all pics are B_PICs
+ 2) In case of CLOSED_GOP, the last frame of the gop has to be a P_PIC */
+
+ if(ps_pic_handling->i4_intra_frm_int ==
+ 1) /*i only case insert the pic only at first location of stack*/
+ {
+ i4_ref_pic_idx = 0;
+ i4_b_pic_idx = 0;
+ }
+ if((i4_buf_pic_no % i4_inter_frm_int) &&
+ !(i4_is_gop_closed && (i4_b_count_in_gop == i4_b_frms_in_prd))) /**** B_PIC ****/
+ {
+ /* Fill the pic_stack */
+ ps_pic_handling->as_pic_stack[i4_b_pic_idx].i4_pic_id = i4_enc_pic_id;
+ ps_pic_handling->as_pic_stack[i4_b_pic_idx].e_pic_type = B_PIC;
+ ps_pic_handling->as_pic_stack[i4_b_pic_idx].i4_pic_disp_order_no = i4_pic_disp_order_no;
+ ps_pic_handling->as_pic_stack[i4_b_pic_idx].i4_is_scd = 0;
+
+ /* Store Pic type*/
+ e_previous_pic_type = B_PIC;
+
+ /* Update the prev_pic_details */
+ memcpy(
+ &ps_pic_handling->s_prev_pic_details,
+ &ps_pic_handling->as_pic_stack[i4_b_pic_idx],
+ sizeof(pic_details_t));
+
+ i4_b_count_in_gop++;
+ i4_b_count_in_subgop++;
+
+ /* Update the i4_b_pic_idx */
+ if(!i4_is_gop_closed)
+ {
+ /* If this B_PIC features in one of the complete subgops */
+ if((i4_b_count_in_subgop < i4_num_b_in_subgop) &&
+ !(i4_b_count_in_gop == i4_b_frms_in_prd))
+ {
+ i4_b_pic_idx++;
+ }
+ else /* Else if this B_PIC is the last one in a subgop or gop */
+ {
+ /* If this is the last B_PIC of a GOP, depending on the number of incomp B_pics in
+ the subgop, there can be either only I or I,P pics between this and the next B_PIC */
+ if(i4_b_count_in_gop == i4_b_frms_in_prd)
+ {
+ i4_b_pic_idx += (2 + (!i4_b_in_incomp_subgop)); /*Prev*/
+ i4_b_count_in_gop = 0;
+ }
+ else /* For the last B_PIC of a subgop, there's always a P b/w this & the next B_PIC */
+ {
+ i4_b_pic_idx += 2;
+ }
+ i4_b_count_in_subgop = 0;
+ }
+ }
+ else
+ {
+ /* For the last B_PIC of a gop
+ Normally,there will be 3 pics (P,I,P) between this and the next B_PIC for a CLOSED gop,
+ except when
+ 1)Number of P_pics in the gop = 1
+ 2)There is an extra P at the end of the gop
+ */
+ if(i4_b_count_in_gop == i4_b_frms_in_prd)
+ {
+ i4_b_pic_idx +=
+ (3 + ((i4_b_in_incomp_subgop == 0) && (i4_p_frms_in_prd > 1) &&
+ (i4_pic_disp_order_no != (i4_p_frms_in_prd + i4_b_frms_in_prd - 1))));
+
+ i4_b_count_in_subgop = 0;
+ }
+ else if(
+ i4_b_count_in_subgop <
+ i4_num_b_in_subgop) /* For a B_PIC which is not the last one in a subgop */
+ {
+ i4_b_pic_idx++;
+ }
+ else /* For the last B_PIC of a subgop */
+ {
+ i4_b_pic_idx += 2;
+ i4_b_count_in_subgop = 0;
+ }
+ }
+ i4_b_pic_idx %= (i4_max_inter_frm_int + 1);
+ }
+ else /*********** I or P pic *********/
+ {
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_id = i4_enc_pic_id;
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_disp_order_no = i4_pic_disp_order_no;
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_is_scd = i4_is_scd;
+ /* Store Pic type*/
+ e_previous_pic_type = I_PIC;
+ if(i4_pic_disp_order_no == 0) /**** I_PIC ****/
+ {
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].e_pic_type = I_PIC;
+
+ /* Update the prev_pic_details */
+ memcpy(
+ &ps_pic_handling->s_prev_pic_details,
+ &ps_pic_handling->as_pic_stack[i4_ref_pic_idx],
+ sizeof(pic_details_t));
+
+ /* In case of an I-frame depending on OPEN or CLOSED gop, the ref_pic_idx changes */
+ if((!i4_is_gop_closed) && (i4_is_first_gop == 0))
+ {
+ if((i4_p_frms_in_prd <= 1) && (i4_b_in_incomp_subgop == 0))
+ {
+ i4_ref_pic_idx++;
+ }
+ else /* From the 2nd gop onwards, the I and first P frame are separated by the num_b_in_incomp_subgop */
+ {
+ i4_ref_pic_idx += (i4_b_in_incomp_subgop + 1);
+ }
+
+ ps_pic_handling->i4_b_in_incomp_subgop_mix_gop =
+ ps_pic_handling->i4_b_in_incomp_subgop;
+ }
+ else
+ {
+ i4_ref_pic_idx++;
+ }
+
+ i4_b_count_in_gop = 0;
+ i4_p_count_in_gop = 0;
+ i4_b_count_in_subgop = 0;
+ }
+ else /**** P_PIC ****/
+ {
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].e_pic_type = P_PIC;
+ /* Store Pic type*/
+ e_previous_pic_type = P_PIC;
+
+ /* Update the prev_pic_details */
+ memcpy(
+ &ps_pic_handling->s_prev_pic_details,
+ &ps_pic_handling->as_pic_stack[i4_ref_pic_idx],
+ sizeof(pic_details_t));
+
+ i4_p_count_in_gop++;
+ ps_pic_handling->i4_prev_intra_frame_interval = i4_intra_frm_int;
+
+ /* In case of an P-frame depending on OPEN or CLOSED gop, the ref_pic_idx changes */
+ if(i4_is_gop_closed && (i4_p_count_in_gop == i4_p_frms_in_prd))
+ {
+ /* For the last P_PIC in a gop, if extra_p or incomp_b are present, the
+ number of such pics between this and the next ref_pic is (i4_b_in_incomp_subgop + 1) */
+ if((i4_p_count_in_gop > 1) && (i4_b_in_incomp_subgop || i4_extra_p))
+ {
+ i4_ref_pic_idx += (i4_b_in_incomp_subgop + 1);
+ }
+ else
+ {
+ i4_ref_pic_idx += i4_inter_frm_int;
+ }
+ }
+ else
+ {
+ i4_ref_pic_idx += i4_inter_frm_int;
+ }
+ }
+
+ i4_ref_pic_idx %= (i4_max_inter_frm_int + 1);
+ }
+
+ /* Update those variables working on the input frames */
+ i4_pic_disp_order_no++;
+ i4_buf_pic_no++;
+
+ /* For any gop */
+ if(ps_pic_handling->i4_pic_disp_order_no ==
+ (i4_max_inter_frm_int - 1 -
+ ((!i4_is_gop_closed) * ps_pic_handling->i4_b_in_incomp_subgop_mix_gop)))
+ {
+ /* NITT DEBUG : COULD BE REMOVED. Replace i4_rem_frms_in_gop with a single variable thus getting rid of
+ the requirement to store rem frms in gop */
+ if((!i4_is_gop_closed) && (i4_is_first_gop) &&
+ (ps_pic_handling->i4_frms_in_cur_gop[B_PIC] >
+ ps_pic_handling->i4_b_in_incomp_subgop_mix_gop))
+ {
+ ps_pic_handling->i4_rem_frms_in_cur_gop -=
+ ps_pic_handling->i4_b_in_incomp_subgop_mix_gop;
+ }
+ }
+
+ /* End of GOP updates */
+ if(i4_pic_disp_order_no == (i4_p_frms_in_prd + i4_b_frms_in_prd + 1))
+ {
+ /* Now, the end of gop updates */
+ i4_pic_disp_order_no = 0;
+ i4_buf_pic_no = 0;
+ i4_is_first_gop = 0;
+ ps_pic_handling->i4_extra_p_mix_gop = ps_pic_handling->i4_extra_p;
+
+ if(i4_is_gop_closed)
+ {
+ ps_pic_handling->i4_b_in_incomp_subgop_mix_gop = ps_pic_handling->i4_b_in_incomp_subgop;
+ }
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_pic_handling->i4_frms_in_cur_gop[i] = ps_pic_handling->i4_frms_in_gop[i];
+ }
+ }
+
+ /* Updating the vars which work on the encoded pics */
+ /* For the first gop */
+ if(((ps_pic_handling->i4_is_first_gop) &&
+ (ps_pic_handling->i4_pic_disp_order_no == (i4_max_inter_frm_int - 1))) ||
+ (i4_intra_frm_int == 1))
+ {
+ ps_pic_handling->i4_coded_pic_no = 0;
+ ps_pic_handling->i4_stack_count = 0;
+ }
+
+ /* Update the state struct with the modifiable local vars */
+ ps_pic_handling->i4_buf_pic_no = i4_buf_pic_no;
+ ps_pic_handling->i4_pic_disp_order_no = i4_pic_disp_order_no;
+ ps_pic_handling->i4_b_pic_idx = i4_b_pic_idx;
+ ps_pic_handling->i4_ref_pic_idx = i4_ref_pic_idx;
+ ps_pic_handling->i4_is_first_gop = i4_is_first_gop;
+ ps_pic_handling->i4_p_count_in_gop = i4_p_count_in_gop;
+ ps_pic_handling->i4_b_count_in_gop = i4_b_count_in_gop;
+ ps_pic_handling->i4_b_count_in_subgop = i4_b_count_in_subgop;
+ ps_pic_handling->e_previous_pic_type = e_previous_pic_type;
+ ps_pic_handling->i4_force_I_frame = i4_force_I_frame;
+}
+
+/* ******************************************************************************/
+/**
+ * @brief Returns the picture type, ip and display order number for the frame to be encoded
+ *
+ * @param ps_pic_handling
+ * @param pi4_pic_id
+ * @param pi4_pic_disp_order_no
+ * @param pe_pic_type
+ */
+/* ******************************************************************************/
+void get_pic_from_stack(
+ pic_handling_t *ps_pic_handling,
+ WORD32 *pi4_pic_id,
+ WORD32 *pi4_pic_disp_order_no,
+ picture_type_e *pe_pic_type,
+ WORD32 *pi4_is_scd)
+{
+ pic_details_t s_pic_details;
+ pic_details_t *ps_pic_details = &s_pic_details;
+
+ if(ps_pic_handling->i4_stack_count < 0)
+ {
+ ps_pic_details->e_pic_type = BUF_PIC;
+ ps_pic_details->i4_pic_disp_order_no = -1;
+ ps_pic_details->i4_pic_id = -1;
+ ps_pic_details->i4_is_scd = 0;
+ }
+ else
+ {
+ memcpy(
+ ps_pic_details,
+ &ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count],
+ sizeof(pic_details_t));
+ /* Force I frame updations */
+ if((ps_pic_handling->i4_force_I_frame == 1) && (ps_pic_details->e_pic_type == I_PIC))
+ {
+ ps_pic_handling->i4_force_I_frame = 0;
+ /* Indicates count for no. of Pictures whose temporal reference has to be modified
+ in the new GOP*/
+ ps_pic_handling->i4_mod_temp_ref_cnt = ps_pic_handling->i4_b_in_incomp_subgop + 1;
+ ps_pic_handling->i4_first_gop_encoded = 1;
+ }
+
+ /* In MPEG2, the temporal reference of the first displayed frame in a gop is 0.
+ In case of an OPEN_GOP, the B_PICs of the last subgop in a gop,
+ maybe coded as a part of the next gop. Hence, in such conditions
+ the pic_disp_order needs to be modified so that it gives an indication
+ of the temoral reference */
+ if((!ps_pic_handling->i4_is_gop_closed) && (ps_pic_handling->i4_first_gop_encoded) &&
+ ps_pic_handling->i4_intra_frm_int !=
+ 1) /*i_only: no change to temporal reference done in case of i only as it is always 0*/
+ {
+ WORD32 i4_pic_disp_order_no;
+ if(s_pic_details.e_pic_type == I_PIC)
+ {
+ ps_pic_handling->i4_pic_order_cnt_base_offset =
+ ps_pic_handling->i4_b_in_incomp_subgop;
+ ps_pic_handling->i4_enable_modulo = 1;
+ }
+ else if(s_pic_details.e_pic_type == P_PIC)
+ {
+ ps_pic_handling->i4_enable_modulo = 0;
+ ps_pic_handling->i4_change_inter_frm_interval_correction = 0;
+ }
+
+ i4_pic_disp_order_no =
+ ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count].i4_pic_disp_order_no +
+ ps_pic_handling->i4_pic_order_cnt_base_offset;
+
+ if(ps_pic_handling->i4_enable_modulo)
+ {
+ if(!ps_pic_handling->i4_mod_temp_ref_cnt)
+ {
+ i4_pic_disp_order_no =
+ i4_pic_disp_order_no %
+ (ps_pic_handling->i4_prev_intra_frame_interval +
+ ps_pic_handling->i4_change_inter_frm_interval_correction);
+ }
+ else
+ {
+ /* due to force I frame First frame will have only ps_pic_handling->i4_frames_in_fif_gop number of frames*/
+ i4_pic_disp_order_no =
+ i4_pic_disp_order_no % ps_pic_handling->i4_frames_in_fif_gop;
+ ps_pic_handling->i4_mod_temp_ref_cnt--;
+ }
+ }
+ s_pic_details.i4_pic_disp_order_no = i4_pic_disp_order_no;
+ }
+ }
+
+ /* Giving this to the Codec */
+ *pi4_pic_id = s_pic_details.i4_pic_id;
+ *pi4_pic_disp_order_no = s_pic_details.i4_pic_disp_order_no;
+ *pe_pic_type = s_pic_details.e_pic_type;
+ *pi4_is_scd = s_pic_details.i4_is_scd;
+}
+
+/* ******************************************************************************/
+/**
+ * @brief Updates the picture handling state whenever there is changes in input parameter
+ *
+ * @param ps_pic_handling
+ * @param i4_intra_frm_int
+ * @param i4_inter_frm_int
+ * @param i4_gop_boundary
+ */
+/* ******************************************************************************/
+static void update_pic_distbn(
+ pic_handling_t *ps_pic_handling,
+ WORD32 i4_intra_frm_int,
+ WORD32 i4_inter_frm_int,
+ WORD32 i4_gop_boundary)
+{
+ /* Declarations */
+ WORD32 i4_is_gop_closed;
+ WORD32 i, i4_prev_inter_frm_int, i4_max_inter_frm_int, i4_pic_disp_order_no;
+ WORD32 i4_b_in_incomp_subgop, i4_extra_p, i4_b_in_incomp_subgop_mix_gop, i4_extra_p_mix_gop;
+ WORD32 i4_pb_frms_till_prev_p;
+ WORD32 ai4_diff_in_frms[MAX_PIC_TYPE];
+
+ /* Initialize the local vars from the state struct */
+ i4_is_gop_closed = ps_pic_handling->i4_is_gop_closed;
+ i4_prev_inter_frm_int = ps_pic_handling->i4_inter_frm_int;
+ i4_max_inter_frm_int = ps_pic_handling->i4_max_inter_frm_int;
+ i4_b_in_incomp_subgop = ps_pic_handling->i4_b_in_incomp_subgop;
+ i4_extra_p = ps_pic_handling->i4_extra_p;
+ i4_b_in_incomp_subgop_mix_gop = ps_pic_handling->i4_b_in_incomp_subgop_mix_gop;
+ i4_extra_p_mix_gop = ps_pic_handling->i4_extra_p_mix_gop;
+ i4_pic_disp_order_no = ps_pic_handling->i4_pic_disp_order_no;
+
+ i4_pb_frms_till_prev_p = (ps_pic_handling->i4_p_count_in_gop * i4_prev_inter_frm_int);
+
+ /* Check for the validity of the intra_frm_int */
+ if(i4_intra_frm_int <= 0)
+ {
+ i4_intra_frm_int = ps_pic_handling->i4_intra_frm_int;
+ }
+ /* Check for the validity of the inter_frm_int */
+ if((i4_inter_frm_int > i4_max_inter_frm_int) || (i4_inter_frm_int < 0))
+ {
+ i4_inter_frm_int = ps_pic_handling->i4_inter_frm_int;
+ }
+
+ /* Keep a copy of the older frms_in_gop */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ai4_diff_in_frms[i] = ps_pic_handling->i4_frms_in_cur_gop[i];
+ }
+
+ /******* Update all the variables which are calculated from the inter_frm_int *******/
+
+ /* Get the new pic distribution in the gop */
+ find_pic_distbn_in_gop(
+ ps_pic_handling->i4_frms_in_gop,
+ ps_pic_handling->i4_actual_frms_in_gop,
+ i4_intra_frm_int,
+ i4_inter_frm_int,
+ i4_is_gop_closed,
+ &i4_b_in_incomp_subgop,
+ &i4_extra_p,
+ ps_pic_handling->i4_num_active_pic_type,
+ ps_pic_handling->i4_field_pic);
+
+ /* Find the other related variables */
+ if(i4_gop_boundary == 0)
+ {
+ /* Since, the inter frame interval has changed between a gop the current gop will
+ be a mixed gop. So, we need to find the values of the related varibles */
+ find_pic_distbn_in_gop(
+ ps_pic_handling->i4_frms_in_cur_gop,
+ ps_pic_handling->i4_actual_frms_in_gop,
+ (i4_intra_frm_int - i4_pb_frms_till_prev_p),
+ i4_inter_frm_int,
+ i4_is_gop_closed,
+ &i4_b_in_incomp_subgop_mix_gop,
+ &i4_extra_p_mix_gop,
+ ps_pic_handling->i4_num_active_pic_type,
+ ps_pic_handling->i4_field_pic);
+
+ ps_pic_handling->i4_frms_in_cur_gop[P_PIC] += ps_pic_handling->i4_p_count_in_gop;
+ ps_pic_handling->i4_frms_in_cur_gop[B_PIC] += ps_pic_handling->i4_b_count_in_gop;
+ }
+ else
+ {
+ /* Since, the inter_frm_interval has changed at a gop boundary, the new gop will have
+ all the subgops with the new inter_frm_interval */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_pic_handling->i4_frms_in_cur_gop[i] = ps_pic_handling->i4_frms_in_gop[i];
+ }
+
+ i4_b_in_incomp_subgop_mix_gop = i4_b_in_incomp_subgop;
+ i4_extra_p_mix_gop = i4_extra_p;
+ }
+
+ /* For bit-allocation the rem_frms_in_gop need to be updated */
+ /* Checks needed:
+ 1) If the encoding is happening on the same gop as that of the buffering */
+ if(ps_pic_handling->i4_pic_disp_order_no >=
+ (i4_max_inter_frm_int - 1 -
+ ((!i4_is_gop_closed) * ps_pic_handling->i4_b_in_incomp_subgop_mix_gop)))
+ {
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_pic_handling->i4_rem_frms_in_cur_gop +=
+ (ps_pic_handling->i4_frms_in_cur_gop[i] - ai4_diff_in_frms[i]);
+ }
+ /* If gop is not closed then the difference in previous to next is to be added */
+ if(!i4_is_gop_closed)
+ ps_pic_handling->i4_rem_frms_in_cur_gop += (i4_prev_inter_frm_int - i4_inter_frm_int);
+ }
+
+ /* Update the vars which will affect the proper filling of the pic_stack */
+ if(i4_pic_disp_order_no == 0) /*Check if redundant*/
+ {
+ ps_pic_handling->i4_buf_pic_no = 0;
+ }
+ else
+ {
+ ps_pic_handling->i4_buf_pic_no = 1;
+ }
+
+ ps_pic_handling->i4_b_count_in_subgop = 0;
+
+ /* Update the state struct with the new inter_frm_int */
+ ps_pic_handling->i4_inter_frm_int = i4_inter_frm_int;
+ ps_pic_handling->i4_intra_frm_int = i4_intra_frm_int;
+ ps_pic_handling->i4_b_in_incomp_subgop = i4_b_in_incomp_subgop;
+ ps_pic_handling->i4_extra_p = i4_extra_p;
+ ps_pic_handling->i4_b_in_incomp_subgop_mix_gop = i4_b_in_incomp_subgop_mix_gop;
+ ps_pic_handling->i4_extra_p_mix_gop = i4_extra_p_mix_gop;
+}
+
+/* ******************************************************************************/
+/**
+ * @brief Distributes the frames as I, P and B based on intra/inter frame interval.
+ * Along with it it fills the number of frames in sub-gop and extra p frame
+ *
+ * @param i4_frms_in_gop[MAX_PIC_TYPE]
+ * @param i4_intra_frm_int
+ * @param i4_inter_frm_int
+ * @param i4_is_gop_closed
+ * @param pi4_b_in_incomp_subgop
+ * @param pi4_extra_p
+ */
+/* ******************************************************************************/
+static void find_pic_distbn_in_gop(
+ WORD32 i4_frms_in_gop[MAX_PIC_TYPE],
+ WORD32 i4_actual_frms_gop[MAX_PIC_TYPE],
+ WORD32 i4_intra_frm_int,
+ WORD32 i4_inter_frm_int,
+ WORD32 i4_is_gop_closed,
+ WORD32 *pi4_b_in_incomp_subgop,
+ WORD32 *pi4_extra_p,
+ WORD32 i4_num_active_pic_type,
+ WORD32 i4_field_pic)
+{
+ /* Checks */
+ WORD32 i;
+ WORD32 i4_num_b_in_temp_lyr_1 = 0;
+ /* Find the pic distribution in the gop depending on the inter and intra frm intervals */
+
+ /*init for all pic type*/
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ i4_frms_in_gop[i] = 0;
+ }
+ /*Atleast 1 in frame in a gop in all conditions possible*/
+ i4_frms_in_gop[I_PIC] = 1;
+
+ if(i4_intra_frm_int == 1) /* All I frames */
+ {
+ i4_frms_in_gop[P_PIC] = 0;
+ i4_frms_in_gop[B_PIC] = 0;
+ *pi4_b_in_incomp_subgop = 0;
+ *pi4_extra_p = 0;
+ }
+ else
+ {
+ if(i4_is_gop_closed)
+ {
+ i4_frms_in_gop[P_PIC] = ((i4_intra_frm_int - 1) / i4_inter_frm_int);
+ }
+ else
+ {
+ i4_frms_in_gop[P_PIC] = ((i4_intra_frm_int - 1) / i4_inter_frm_int);
+ }
+
+ /*calculate B pic based on temporal hierarchy*/
+ if(!i4_is_gop_closed)
+ {
+ i4_num_b_in_temp_lyr_1 = i4_frms_in_gop[P_PIC] + i4_frms_in_gop[I_PIC];
+ }
+ else
+ {
+ i4_num_b_in_temp_lyr_1 = i4_frms_in_gop[P_PIC] - 1 + i4_frms_in_gop[I_PIC];
+ }
+
+ if(i4_field_pic == 0)
+ {
+ /*HEVC_hierarchy*/
+
+ for(i = 2; i < i4_num_active_pic_type; i++)
+ {
+ i4_frms_in_gop[i] = (WORD32)(i4_num_b_in_temp_lyr_1 * pow(2, (i - 2)));
+ }
+ }
+ if(i4_field_pic == 1)
+ {
+ i4_frms_in_gop[P1_PIC] = i4_frms_in_gop[P_PIC];
+ i4_frms_in_gop[P1_PIC] += 1;
+ /* for the first layer initialisation is done*/
+ for(i = 2; i < i4_num_active_pic_type; i++)
+ {
+ i4_frms_in_gop[i] = (WORD32)(i4_num_b_in_temp_lyr_1 * pow(2, (i - 2)));
+ i4_frms_in_gop[i + FIELD_OFFSET] = i4_frms_in_gop[i];
+ }
+ }
+ }
+ /*store the true number of pictures in GOP before altering it based on number of non ref and ref B pic*/
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ i4_actual_frms_gop[i] = i4_frms_in_gop[i];
+ trace_printf("PIC TYPES IN GOP of %d type = %d\n", i, i4_frms_in_gop[i]);
+ }
+}
+
+/******************************************************************************
+ Function Name : pic_type_get_intra_frame_interval
+ Description :
+ Arguments :
+ Return Values : intra_frm_int
+ Revision History:
+ Creation
+
+Assumptions -
+
+Checks -
+*****************************************************************************/
+WORD32 pic_type_get_intra_frame_interval(pic_handling_t *ps_pic_handling)
+{
+ return (ps_pic_handling->i4_intra_frm_int);
+}
+
+/******************************************************************************
+ Function Name : pic_type_get_actual_intra_frame_interval
+ Description :
+ Arguments : ps_pic_handling
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+WORD32 pic_type_get_actual_intra_frame_interval(pic_handling_t *ps_pic_handling)
+{
+ WORD32 i4_intra_frame_int = 0, i = 0;
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ i4_intra_frame_int += ps_pic_handling->i4_actual_frms_in_gop[i];
+ return (i4_intra_frame_int);
+}
+/******************************************************************************
+ Function Name : pic_type_get_inter_frame_interval
+ Description :
+ Arguments :
+ Return Values : inter_frm_int
+ Revision History:
+ Creation
+
+Assumptions -
+
+Checks -
+*****************************************************************************/
+WORD32 pic_type_get_inter_frame_interval(pic_handling_t *ps_pic_handling)
+{
+ return (ps_pic_handling->i4_inter_frm_int);
+}
+
+/******************************************************************************
+ Function Name : pic_type_get_field_pic
+ Description :
+ Arguments :
+ Return Values : i4_field_pic
+ Revision History:
+ Creation
+
+Assumptions -
+
+Checks -
+*****************************************************************************/
+WORD32 pic_type_get_field_pic(pic_handling_t *ps_pic_handling)
+{
+ return (ps_pic_handling->i4_field_pic);
+}
+
+/******************************************************************************
+ Function Name : pic_type_is_gop_closed
+ Description :
+ Arguments : ps_pic_handling
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+WORD32 pic_type_is_gop_closed(pic_handling_t *ps_pic_handling)
+{
+ return (ps_pic_handling->i4_is_gop_closed);
+}
+
+/******************************************************************************
+ Function Name : pic_type_get_rem_frms_in_gop
+ Description :
+ Arguments :
+ Return Values : void
+ Revision History:
+ Creation
+
+Assumptions -
+
+Checks -
+*****************************************************************************/
+WORD32 pic_type_get_rem_frms_in_gop(pic_handling_t *ps_pic_handling)
+{
+ /* memcpy(ai4_rem_frms_in_gop,ps_pic_handling->i4_rem_frms_in_gop,sizeof(ps_pic_handling->i4_rem_frms_in_gop)); */
+ return (ps_pic_handling->i4_rem_frms_in_cur_gop);
+}
+/******************************************************************************
+ Function Name : pic_type_get_frms_in_gop_force_I_frm
+ Description :
+ Arguments : ps_pic_handling
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+WORD32 pic_type_get_frms_in_gop_force_I_frm(pic_handling_t *ps_pic_handling)
+{
+ return (ps_pic_handling->i4_frames_in_fif_gop);
+}
+/******************************************************************************
+ Function Name : pic_type_get_frms_in_gop
+ Description :
+ Arguments :
+ Return Values : void
+ Revision History:
+ Creation
+
+Assumptions -
+
+Checks -
+*****************************************************************************/
+void pic_type_get_frms_in_gop(pic_handling_t *ps_pic_handling, WORD32 ai4_frms_in_gop[MAX_PIC_TYPE])
+{
+ memcpy(
+ ai4_frms_in_gop,
+ ps_pic_handling->i4_frms_in_cur_gop,
+ sizeof(ps_pic_handling->i4_frms_in_cur_gop));
+}
+/******************************************************************************
+ Function Name : pic_type_get_actual_frms_in_gop
+ Description :
+ Arguments :
+ Return Values : void
+ Revision History:
+ Creation
+
+Assumptions -
+
+Checks -
+*****************************************************************************/
+void pic_type_get_actual_frms_in_gop(
+ pic_handling_t *ps_pic_handling, WORD32 ai4_frms_in_gop[MAX_PIC_TYPE])
+{
+ memcpy(
+ ai4_frms_in_gop,
+ ps_pic_handling->i4_actual_frms_in_gop,
+ sizeof(ps_pic_handling->i4_actual_frms_in_gop));
+}
+
+/******************************************************************************
+ Function Name : pic_type_get_frms_in_gop
+ Description :
+ Arguments :
+ Return Values : void
+ Revision History:
+ Creation
+
+Assumptions -
+
+Checks -
+*****************************************************************************/
+WORD32 pic_type_get_disp_order_no(pic_handling_t *ps_pic_handling)
+{
+ return (ps_pic_handling->i4_pic_disp_order_no);
+}
+
+/******************************************************************************
+ Function Name : pic_type_get_frms_in_gop
+ Description :
+ Arguments :
+ Return Values : void
+ Revision History:
+ Creation
+
+Assumptions -
+
+Checks -
+*****************************************************************************/
+void set_force_I_frame_flag(pic_handling_t *ps_pic_handling)
+{
+ ps_pic_handling->i4_force_I_frame = 1;
+}
+/******************************************************************************
+ Function Name : get_is_scd
+ Description :
+ Arguments : ps_pic_handling
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+WORD32 get_is_scd(pic_handling_t *ps_pic_handling)
+{
+ return (ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count].i4_is_scd);
+}
+/******************************************************************************/
+/* Functions that work on the encoded frames */
+/******************************************************************************/
+/******************************************************************************
+ Function Name : update_pic_handling
+ Description : Will be called only for the frames to be encoded
+ Arguments :
+ Return Values : void
+ Revision History:
+ Creation
+
+Assumptions -
+
+Checks -
+*****************************************************************************/
+void update_pic_handling(
+ pic_handling_t *ps_pic_handling,
+ picture_type_e e_pic_type,
+ WORD32 i4_is_non_ref_pic,
+ WORD32 i4_is_scd_I_frame)
+{
+ WORD32 i4_max_inter_frm_int;
+ WORD32 i;
+
+ /* Initializing the local vars with that of the state struct */
+ i4_max_inter_frm_int = ps_pic_handling->i4_max_inter_frm_int;
+
+ /* Update the variables working on the output frames */
+ /* Update the stack count */
+ ps_pic_handling->i4_stack_count++;
+
+ /*i_only reset stack count everytime to zero*/
+ if(ps_pic_handling->i4_stack_count == (i4_max_inter_frm_int + 1) ||
+ ps_pic_handling->i4_intra_frm_int == 1)
+ {
+ ps_pic_handling->i4_stack_count = 0;
+ }
+ if(i4_is_non_ref_pic)
+ {
+ ps_pic_handling->i4_non_ref_B_pic_count++;
+ }
+
+ /*if scd frame assume one frame has been encoded and handle*/
+ if(i4_is_scd_I_frame || e_pic_type == I_PIC)
+ {
+ ps_pic_handling->i4_rem_frms_in_cur_gop = 0;
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_pic_handling->i4_rem_frms_in_cur_gop += ps_pic_handling->i4_actual_frms_in_gop[i];
+ }
+ }
+ /* Update the rem_frms_in_gop */
+ /*HEVC_RC: update rem frms in cur gop counter only once for two reference pic (based on weightage)
+ This is assuming non reference pic comes sequentially*/
+ //if(!i4_is_non_ref_pic || ps_pic_handling->i4_non_ref_B_pic_count == NUM_NON_REF_B_EQE)
+ {
+ ps_pic_handling->i4_rem_frms_in_cur_gop--;
+ ps_pic_handling->i4_non_ref_B_pic_count = 0;
+ }
+
+ /* Assumption : Rem_frms_in_gop needs to be taken care of, for every change in frms */
+ ps_pic_handling->i4_last_frm_in_gop = 0;
+ if(ps_pic_handling->i4_rem_frms_in_cur_gop == 0)
+ {
+ /* Copy the cur_frms_in_gop to the rem_frm_in_gop */
+ ps_pic_handling->i4_rem_frms_in_cur_gop = 0;
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_pic_handling->i4_rem_frms_in_cur_gop += ps_pic_handling->i4_actual_frms_in_gop[i];
+ //ASSERT(ps_pic_handling->i4_actual_frms_in_gop[B2_PIC] == 0);
+ }
+
+ ps_pic_handling->i4_last_frm_in_gop = 1;
+ ps_pic_handling->i4_first_gop_encoded = 1;
+ }
+}
+/******************************************************************************
+ Function Name : is_last_frame_in_gop
+ Description :
+ Arguments : ps_pic_handling
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+WORD32 is_last_frame_in_gop(pic_handling_handle ps_pic_handling)
+{
+ return (ps_pic_handling->i4_last_frm_in_gop);
+}
+
+/******************************************************************************
+ Function Name : skip_encoded_frame
+ Description : Needs to go to the current pic in the pic_stack.
+ If it's B_PIC don't do anything
+ If it's a reference picture, push all but the last B_PICs
+ in the current subgop one place down (i.e. just copy their pic_details)
+ and move the last B_PIC in that subgop to the next slot of the
+ skipped picture and convert it's pic_type to that of the reference picture
+
+
+ Arguments :
+ Return Values : void
+ Revision History:
+ Creation
+
+Assumptions -
+
+Checks -
+*****************************************************************************/
+void skip_encoded_frame(pic_handling_t *ps_pic_handling, picture_type_e e_pic_type)
+{
+ pic_details_t s_pic_details;
+ WORD32 i4_stack_count, i4_next_ref_pic_idx, i4_pic_idx;
+ WORD32 i4_max_inter_frm_int, i4_last_b_pic_idx, i4_first_b_pic_idx;
+ WORD32 i4_next_pic_idx;
+
+ /* State variables used to initialize the local vars (Not to be changed) */
+ i4_stack_count = ps_pic_handling->i4_stack_count;
+ i4_next_ref_pic_idx = ps_pic_handling->i4_ref_pic_idx;
+ i4_max_inter_frm_int = ps_pic_handling->i4_max_inter_frm_int;
+
+ i4_next_pic_idx = ((i4_stack_count + 1) % (i4_max_inter_frm_int + 1));
+
+ /* Check what is the encoded frm_type
+ Changing a B_PIC to a ref_pic is not reqd if
+ there are no B_PICs referring from the skipped ref_pic */
+ if(((e_pic_type == P_PIC) || (e_pic_type == I_PIC)) && (i4_next_pic_idx != i4_next_ref_pic_idx))
+ {
+ /* Go to the last B_PIC before the next_ref_pic */
+ if(i4_next_ref_pic_idx == 0)
+ {
+ i4_last_b_pic_idx = i4_max_inter_frm_int;
+ }
+ else
+ {
+ i4_last_b_pic_idx = (i4_next_ref_pic_idx - 1);
+ }
+
+ /* Keep a copy of the last B_PIC pic_details */
+ memcpy(
+ &s_pic_details,
+ &ps_pic_handling->as_pic_stack[i4_last_b_pic_idx],
+ sizeof(pic_details_t));
+
+ i4_pic_idx = i4_last_b_pic_idx;
+ i4_first_b_pic_idx = (i4_stack_count + 1) % (i4_max_inter_frm_int + 1);
+
+ /* All the B_PICs other than the last one, need to be shifted one place in the stack */
+ while((i4_pic_idx != i4_stack_count) && (i4_first_b_pic_idx != i4_last_b_pic_idx))
+ {
+ if(i4_pic_idx == 0)
+ {
+ i4_pic_idx = i4_max_inter_frm_int;
+ }
+ else
+ {
+ i4_pic_idx--;
+ }
+
+ memcpy(
+ &ps_pic_handling->as_pic_stack[(i4_pic_idx + 1) % (i4_max_inter_frm_int + 1)],
+ &ps_pic_handling->as_pic_stack[i4_pic_idx],
+ sizeof(pic_details_t));
+ }
+
+ /* Check what type of ref_pic it is */
+ /*if(ps_pic_handling->i4_p_count_in_gop >= ps_pic_handling->i4_frms_in_cur_gop[P_PIC])
+ {
+ e_ref_pic_type = I_PIC;
+ }
+ else
+ {
+ e_ref_pic_type = P_PIC;
+ }*/
+
+ /* Copy the last B_PIC pic_details to the first B_PIC place and change it's pic type to the ref_PIC */
+ ps_pic_handling->as_pic_stack[i4_first_b_pic_idx].e_pic_type = P_PIC; /*e_ref_pic_type*/
+ ;
+ ps_pic_handling->as_pic_stack[i4_first_b_pic_idx].i4_pic_disp_order_no =
+ s_pic_details.i4_pic_disp_order_no;
+ ps_pic_handling->as_pic_stack[i4_first_b_pic_idx].i4_pic_id = s_pic_details.i4_pic_id;
+ }
+}
+
+/******************************************************************************
+ Function Name : flush_frame
+ Description : Since when a flush frame is called, there will be no valid
+ frames after it, the last frame cannot be a B_PIC, as there
+ will be no reference frame for it (Input in display order)
+
+ So,this fxn needs to go to the last added pic in the pic_stack.
+ If it's reference pic don't do anything
+ If it's a B_PIC, copy it's pic_details and put it in the
+ place of the next reference pic, changing the pic_type to
+ P_PIC
+
+ Arguments :
+ Return Values : void
+ Revision History:
+ Creation
+
+Assumptions -
+
+Checks -
+*****************************************************************************/
+void flush_frame_from_pic_stack(pic_handling_t *ps_pic_handling)
+{
+ pic_details_t s_prev_pic_details;
+
+ /* Get the last entered pic_details (not to be modified here) */
+ WORD32 i4_prev_b_pic_idx = ps_pic_handling->i4_prev_b_pic_idx;
+ WORD32 i4_ref_pic_idx = ps_pic_handling->i4_ref_pic_idx;
+ WORD32 i4_b_pic_idx = ps_pic_handling->i4_b_pic_idx;
+
+ memcpy(&s_prev_pic_details, &ps_pic_handling->s_prev_pic_details, sizeof(pic_details_t));
+
+ if(s_prev_pic_details.e_pic_type == B_PIC)
+ {
+ /* Copy the last B_PIC details to the next reference pic in display order */
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_disp_order_no =
+ s_prev_pic_details.i4_pic_disp_order_no;
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_id = s_prev_pic_details.i4_pic_id;
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].e_pic_type = P_PIC;
+
+ /* Modify the last B_PIC pic_type, so that codec gets to know when all the buffered frames
+ are flushed */
+ ps_pic_handling->as_pic_stack[i4_prev_b_pic_idx].e_pic_type = MAX_PIC_TYPE;
+ ps_pic_handling->as_pic_stack[i4_prev_b_pic_idx].i4_pic_id = -1;
+ ps_pic_handling->as_pic_stack[i4_prev_b_pic_idx].i4_pic_disp_order_no = -1;
+ }
+ else
+ {
+ /* Modify the next pic_type details in the stack, so that codec gets to know when all the
+ buffered frames are flushed */
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].e_pic_type = MAX_PIC_TYPE;
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_id = -1;
+ ps_pic_handling->as_pic_stack[i4_ref_pic_idx].i4_pic_disp_order_no = -1;
+
+ if(ps_pic_handling->i4_inter_frm_int != 1)
+ {
+ ps_pic_handling->as_pic_stack[i4_b_pic_idx].e_pic_type = MAX_PIC_TYPE;
+ ps_pic_handling->as_pic_stack[i4_b_pic_idx].i4_pic_id = -1;
+ ps_pic_handling->as_pic_stack[i4_b_pic_idx].i4_pic_disp_order_no = -1;
+ }
+ }
+}
+
+/********************************************************************************************/
+/******************************************************************************
+ Function Name : add_pic_to_stack_re_enc
+ Description : In case of a re-enc, we can assume the pictures to be coming
+ in the encode order.
+ In case of re-encoder basically, there are 2 problematic cases.
+ 1)Inter_frm_int is not known to start with
+ 2)Inter_frm_int can keep changing
+ 3)Intra_frm_int set by the application and that actually in the
+ decoded bitstream may be different
+
+ Arguments :
+ Return Values : WORD32
+ Revision History:
+ Creation
+
+Assumptions -
+
+Checks -
+*****************************************************************************/
+WORD32 add_pic_to_stack_re_enc(
+ pic_handling_t *ps_pic_handling, WORD32 i4_enc_pic_id, picture_type_e e_pic_type)
+{
+ WORD32 i4_b_count_in_subgop;
+ WORD32 i4_max_inter_frm_int, i4_inter_frm_int, i4_intra_frm_int;
+ WORD32 i4_pic_disp_order_no;
+ WORD32 i4_is_gop_closed;
+ picture_type_e e_out_pic_type;
+ WORD32 i4_b_in_incomp_subgop;
+
+ /* Check if a change in intra_frm_int call has been made */
+ if(ps_pic_handling->i4_change_in_intra_frm_int == 1)
+ {
+ update_pic_distbn(
+ ps_pic_handling,
+ ps_pic_handling->i4_new_intra_frm_int,
+ ps_pic_handling->i4_inter_frm_int,
+ 1);
+ ps_pic_handling->i4_change_in_intra_frm_int = 0;
+ }
+
+ /* Check if a change in inter_frm_int call has been made */
+ if(ps_pic_handling->i4_change_in_inter_frm_int == 1)
+ {
+ update_pic_distbn(
+ ps_pic_handling,
+ ps_pic_handling->i4_intra_frm_int,
+ ps_pic_handling->i4_new_inter_frm_int,
+ 1);
+
+ ps_pic_handling->i4_change_in_inter_frm_int = 0;
+ }
+
+ /* Initialize the local vars with the state vars */
+ i4_b_count_in_subgop = ps_pic_handling->i4_b_count_in_subgop;
+ i4_max_inter_frm_int = ps_pic_handling->i4_max_inter_frm_int;
+ i4_inter_frm_int = ps_pic_handling->i4_inter_frm_int;
+ i4_intra_frm_int = ps_pic_handling->i4_intra_frm_int;
+ i4_pic_disp_order_no = ps_pic_handling->i4_pic_disp_order_no;
+ i4_is_gop_closed = ps_pic_handling->i4_is_gop_closed;
+ i4_b_in_incomp_subgop = ps_pic_handling->i4_b_in_incomp_subgop;
+
+ e_out_pic_type = e_pic_type;
+
+ /* Initially the rate_control assumes an IPP sequence */
+ if(e_pic_type == B_PIC)
+ {
+ /* Update the number of B_PICs in a subgop */
+ i4_b_count_in_subgop++;
+
+ if(i4_b_count_in_subgop > i4_max_inter_frm_int)
+ {
+ return (-1);
+ }
+
+ /* If the number of B_PICs exceed the set inter_frm_int then
+ change the inter_frm_int */
+ if(i4_b_count_in_subgop > (i4_inter_frm_int - 1))
+ {
+ i4_inter_frm_int = (i4_b_count_in_subgop + 1);
+
+ update_pic_distbn(ps_pic_handling, i4_intra_frm_int, i4_inter_frm_int, 0);
+ }
+ }
+ else if((e_pic_type == I_PIC) || (e_pic_type == P_PIC))
+ {
+ /* If the B_PICs in the prev subgop were fewer than the current (inter_frm_int-1)
+ and none of these conditions occur, it'll mean the decrease in the inter_frm_int
+ 1)End of a GOP
+ 2)Beginning of an OPEN_GOP
+ */
+ if((i4_b_count_in_subgop < (i4_inter_frm_int - 1)) &&
+ !((!i4_is_gop_closed) && (i4_b_count_in_subgop >= i4_b_in_incomp_subgop)) &&
+ !((i4_pic_disp_order_no + (i4_inter_frm_int - 1 - i4_b_count_in_subgop)) >
+ i4_intra_frm_int))
+ {
+ i4_inter_frm_int = (i4_b_count_in_subgop + 1);
+
+ update_pic_distbn(ps_pic_handling, i4_intra_frm_int, i4_inter_frm_int, 0);
+ }
+
+ /* Reset the number of B_PICs in a subgop */
+ i4_b_count_in_subgop = 0;
+ }
+
+ /* Updation of the frame level vars */
+ i4_pic_disp_order_no++;
+
+ /* End of gop condition
+ Two cases can arise :
+ 1) The intra_frm_int set by the application is greater than the actual bitstream intra_frm_int
+ (i.e. we will get an I frame before pic_disp_order_no goes to intra_frm_int)
+ 2) The intra_frm_int set by the application is smaller than the actual bitstream intra_frm_int
+ (i.e. we won't get an I_PIC even if pic_disp_order_no goes to intra_frm_int)
+ Constraints :
+ 1) I_PIC cannot be changed to B_PIC
+ 2) B_PIC cannot be changed to I_PIC */
+ if(i4_pic_disp_order_no >= i4_intra_frm_int)
+ {
+ if(e_pic_type != B_PIC)
+ {
+ e_out_pic_type = I_PIC;
+ }
+ else
+ {
+ e_out_pic_type = B_PIC;
+ ps_pic_handling->i4_rem_frms_in_cur_gop++;
+ ps_pic_handling->i4_frms_in_cur_gop[B_PIC]++;
+ ps_pic_handling->i4_frms_in_gop[B_PIC]++;
+ }
+ }
+ else
+ {
+ if((e_pic_type == I_PIC) && (!ps_pic_handling->i4_is_first_gop))
+ {
+ e_out_pic_type = P_PIC;
+ ps_pic_handling->i4_rem_frms_in_cur_gop++;
+ ps_pic_handling->i4_frms_in_cur_gop[P_PIC]++;
+ ps_pic_handling->i4_frms_in_gop[P_PIC]++;
+ }
+ else
+ {
+ e_out_pic_type = e_pic_type;
+ }
+ }
+
+ /* Update the frm_vars at the end of the gop */
+ if(i4_pic_disp_order_no == (ps_pic_handling->i4_frms_in_cur_gop[P_PIC] +
+ ps_pic_handling->i4_frms_in_cur_gop[B_PIC] + 1))
+ {
+ i4_pic_disp_order_no = 0;
+ ps_pic_handling->i4_is_first_gop = 0;
+ }
+
+ /* Update the vars working on the encoded pics */
+ if((ps_pic_handling->i4_is_first_gop) && (ps_pic_handling->i4_stack_count == -1))
+ {
+ ps_pic_handling->i4_coded_pic_no = 0;
+ ps_pic_handling->i4_stack_count = 0;
+ }
+
+ /* Add the pic_details to the pic_stack */
+ ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count].e_pic_type = e_out_pic_type;
+ ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count].i4_pic_disp_order_no =
+ ps_pic_handling->i4_pic_disp_order_no;
+ ps_pic_handling->as_pic_stack[ps_pic_handling->i4_stack_count].i4_pic_id = i4_enc_pic_id;
+
+ /* Writing back those values which need to be updated */
+ ps_pic_handling->i4_inter_frm_int = i4_inter_frm_int;
+ ps_pic_handling->i4_pic_disp_order_no = i4_pic_disp_order_no;
+ ps_pic_handling->i4_b_count_in_subgop = i4_b_count_in_subgop;
+
+ return (0);
+}
+
+/******************************************************************************
+ Function Name : pic_type_update_frms_in_gop
+ Description : Update current gop from lap data
+ Arguments : ps_pic_handling
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+
+void pic_type_update_frms_in_gop(
+ pic_handling_t *ps_pic_handling, WORD32 ai4_frms_in_gop[MAX_PIC_TYPE])
+{
+ memmove(
+ ps_pic_handling->i4_frms_in_cur_gop,
+ ai4_frms_in_gop,
+ sizeof(ps_pic_handling->i4_frms_in_cur_gop));
+ memmove(
+ ps_pic_handling->i4_actual_frms_in_gop,
+ ai4_frms_in_gop,
+ sizeof(ps_pic_handling->i4_actual_frms_in_gop));
+}
+/******************************************************************************
+ Function Name : get_default_intra_period
+ Description :
+ Arguments : ps_pic_handling
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+WORD32 get_default_intra_period(pic_handling_t *ps_pic_handling)
+{
+ return (ps_pic_handling->i4_intra_frm_int);
+}
diff --git a/encoder/picture_type.h b/encoder/picture_type.h
new file mode 100644
index 0000000..b78e90a
--- /dev/null
+++ b/encoder/picture_type.h
@@ -0,0 +1,123 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file picture_type.h
+*
+* \brief
+* This file contains all the necessary declarations for
+* picture info handling functions
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+#ifndef _PIC_HANDLING_H_
+#define _PIC_HANDLING_H_
+
+/* Basic Understanding:
+ * add_pic_to_stack(_re_enc):
+ * This functions converts the input (or display) order to encoding order
+ *
+ * */
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+typedef struct pic_handling_t *pic_handling_handle;
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+WORD32 pic_handling_num_fill_use_free_memtab(
+ pic_handling_handle *pps_pic_handling, itt_memtab_t *ps_memtab, ITT_FUNC_TYPE_E e_func_type);
+void init_pic_handling(
+ pic_handling_handle ps_pic_handling,
+ WORD32 i4_intra_frm_int,
+ WORD32 i4_max_inter_frm_int,
+ WORD32 i4_is_gop_closed,
+ WORD32 i4_idr_period,
+ WORD32 i4_num_active_pic_type,
+ WORD32 i4_field_pic);
+
+void add_pic_to_stack(
+ pic_handling_handle ps_pic_handling, WORD32 i4_enc_pic_id, WORD32 i4_rc_in_pic);
+WORD32 add_pic_to_stack_re_enc(
+ pic_handling_handle ps_pic_handling, WORD32 i4_enc_pic_id, picture_type_e e_pic_type);
+
+void get_pic_from_stack(
+ pic_handling_handle ps_pic_handling,
+ WORD32 *pi4_pic_id,
+ WORD32 *pi4_pic_disp_order_no,
+ picture_type_e *pe_pic_type,
+ WORD32 *pi4_is_scd);
+
+WORD32 is_last_frame_in_gop(pic_handling_handle ps_pic_handling);
+void flush_frame_from_pic_stack(pic_handling_handle ps_pic_handling);
+
+/* NITT TBR The below two functions should be made a single function */
+void skip_encoded_frame(pic_handling_handle ps_pic_handling, picture_type_e e_pic_type);
+void update_pic_handling(
+ pic_handling_handle ps_pic_handling,
+ picture_type_e e_pic_type,
+ WORD32 i4_is_non_ref_pic,
+ WORD32 i4_is_scd);
+
+/* Function returns the number of frames that have been encoded in the GOP in
+ * which the force I frame takes impact */
+WORD32 pic_type_get_frms_in_gop_force_I_frm(pic_handling_handle ps_pic_handling);
+void set_force_I_frame_flag(pic_handling_handle ps_pic_handling);
+WORD32 get_forced_I_frame_cur_frm_flag(pic_handling_handle ps_pic_handling);
+void reset_forced_I_frame_cur_frm_flag(pic_handling_handle ps_pic_handling);
+
+/* Normal get functions */
+WORD32 pic_type_get_inter_frame_interval(pic_handling_handle ps_pic_handling);
+
+WORD32 pic_type_get_intra_frame_interval(pic_handling_handle ps_pic_handling);
+
+WORD32 pic_type_get_disp_order_no(pic_handling_handle ps_pic_handling);
+
+WORD32 pic_type_get_field_pic(pic_handling_handle ps_pic_handling);
+
+WORD32 pic_type_is_gop_closed(pic_handling_handle ps_pic_handling);
+
+void pic_handling_register_new_int_frm_interval(
+ pic_handling_handle ps_pic_handling, WORD32 i4_intra_frm_int);
+
+void pic_handling_register_new_inter_frm_interval(
+ pic_handling_handle ps_pic_handling, WORD32 i4_inter_frm_int);
+
+WORD32 pic_type_get_rem_frms_in_gop(pic_handling_handle ps_pic_handling);
+
+void pic_type_get_frms_in_gop(
+ pic_handling_handle ps_pic_handling, WORD32 ai4_frms_in_gop[MAX_PIC_TYPE]);
+
+void pic_type_get_actual_frms_in_gop(
+ pic_handling_handle ps_pic_handling, WORD32 ai4_frms_in_gop[MAX_PIC_TYPE]);
+
+void pic_type_update_frms_in_gop(
+ pic_handling_handle ps_pic_handling, WORD32 ai4_frms_in_gop[MAX_PIC_TYPE]);
+
+WORD32 get_default_intra_period(pic_handling_handle ps_pic_handling);
+
+WORD32 pic_type_get_actual_intra_frame_interval(pic_handling_handle ps_pic_handling);
+#endif /* _PIC_HANDLING_H_ */
diff --git a/encoder/rate_control_api.c b/encoder/rate_control_api.c
new file mode 100644
index 0000000..6503513
--- /dev/null
+++ b/encoder/rate_control_api.c
@@ -0,0 +1,4241 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file rate_control_api.c
+*
+* \brief
+* This file contain rate control API functions
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include <math.h>
+
+/* User include files */
+#include "ittiam_datatypes.h"
+/* Lower module include files. These inclusion can be removed by having
+ fwd declaration for each and every module*/
+#include "rc_common.h"
+#include "rc_cntrl_param.h"
+#include "mem_req_and_acq.h"
+#include "var_q_operator.h"
+#include "rc_rd_model.h"
+#include "est_sad.h"
+#include "fixed_point_error_bits.h"
+#include "vbr_storage_vbv.h"
+#include "picture_type.h"
+#include "cbr_buffer_control.h"
+#include "bit_allocation.h"
+#include "mb_model_based.h"
+#include "vbr_str_prms.h"
+#include "init_qp.h"
+#include "rc_sad_acc.h"
+#include "rc_frame_info_collector.h"
+#include "rate_control_api.h"
+#include "rate_control_api_structs.h"
+#include "trace_support.h"
+
+/** Macros **/
+#define MIN(x, y) ((x) < (y)) ? (x) : (y)
+#define MAX(x, y) ((x) < (y)) ? (y) : (x)
+#define CLIP3RC(x, min, max) (((x) > (max)) ? (max) : (((x) < (min)) ? (min) : (x)))
+
+#define DEV_Q 4 /*Q format(Shift) for Deviation range factor */
+#define HI_DEV_FCTR 26 //23//32 /* 1.4*16 */
+#define LO_DEV_B_FCTR 10 //temp change to avoid stuffing12 /* 0.75*16 */
+#define LO_DEV_FCTR_1B 14 //8 /* 0.75*16 */
+//#define LO_DEV_FCTR_7B 10//8 /* 0.75*16 */
+#define LO_DEV_FCTR_3B 12 //8 /* 0.75*16 */
+#define LO_DEV_FCTR_7B 12 //8 /* 0.75*16 */
+#define GET_HI_DEV_QP(Qprev) ((((WORD32)Qprev) * HI_DEV_FCTR + (1 << (DEV_Q - 1))) >> DEV_Q)
+
+#define GET_LO_DEV_QP(Qprev, i4_num_active_pic_types)((i4_num_active_pic_types <= B1_PIC)?(((((WORD32) Qprev)*LO_DEV_FCTR_1B + (1<<(DEV_Q-1)))>>DEV_Q): \
+ ((i4_num_active_pic_types == B2_PIC)? ((((WORD32) Qprev)*LO_DEV_FCTR_3B + (1<<(DEV_Q-1)))>>DEV_Q) \
+ ((((WORD32) Qprev)*LO_DEV_FCTR_7B + (1<<(DEV_Q-1)))>>DEV_Q))))
+
+#define GET_LO_DEV_QP_B(Qprev) ((((WORD32)Qprev) * LO_DEV_B_FCTR + (1 << (DEV_Q - 1))) >> DEV_Q)
+#define CLIP_QP(Qc, hi_d, lo_d) (((Qc) < (lo_d)) ? ((lo_d)) : (((Qc) > (hi_d)) ? (hi_d) : (Qc)))
+
+/*below macors are used when qp is already in q format so adding 0.5 for rounding is not required*/
+#define GET_HI_DEV_QP_QFAC(Qprev) ((((WORD32)Qprev) * HI_DEV_FCTR) >> DEV_Q)
+#define GET_LO_DEV_QP_QFAC(Qprev, i4_num_active_pic_types) \
+ ((i4_num_active_pic_types <= B1_PIC) \
+ ? ((((WORD32)Qprev) * LO_DEV_FCTR_1B) >> DEV_Q) \
+ : ((i4_num_active_pic_types == B2_PIC) ? ((((WORD32)Qprev) * LO_DEV_FCTR_3B) >> DEV_Q) \
+ : ((((WORD32)Qprev) * LO_DEV_FCTR_7B) >> DEV_Q)))
+
+#define GET_LO_DEV_QP_QFAC_B_PIC(Qprev) ((((WORD32)Qprev) * LO_DEV_FCTR_3B) >> DEV_Q)
+
+#define GET_LO_DEV_QP_B_QFAC(Qprev) ((((WORD32)Qprev) * LO_DEV_B_FCTR) >> DEV_Q)
+
+#define P_TO_I_RATIO_Q_FACTOR (9)
+#define MULT_FACTOR_SATD (4)
+#define GET_L0_SATD_BY_ACT_MAX_PER_PIXEL(i4_num_pixel) \
+ ((5.4191f * i4_num_pixel + 4000000.0f) / i4_num_pixel)
+#define GET_WEIGH_FACTOR_FOR_MIN_SCD_Q_SCALE(normal_satd_act, f_satd_by_Act_norm) \
+ (MULT_FACTOR_SATD * normal_satd_act + f_satd_by_Act_norm) / \
+ (normal_satd_act + MULT_FACTOR_SATD * f_satd_by_Act_norm)
+
+void SET_NETRA_TRACE(UWORD8 tag[], WORD32 value);
+#define NETRA_TRACE (0)
+#if NETRA_TRACE
+#else
+#define SET_NETRA_TRACE(x, y)
+#endif
+
+/*****************************************************************************/
+/* Restricts the quantisation parameter variation within delta */
+/*****************************************************************************/
+/* static WORD32 restrict_swing(WORD32 cur_qp, WORD32 prev_qp, WORD32 delta_qp)
+{
+ if((cur_qp) - (prev_qp) > (delta_qp)) (cur_qp) = (prev_qp) + (delta_qp) ;
+ if((prev_qp) - (cur_qp) > (delta_qp)) (cur_qp) = (prev_qp) - (delta_qp) ;
+ return cur_qp;
+}*/
+
+/*****************************************************************************
+Function Name : rate_control_get_init_free_memtab
+Description : Takes or gives memtab
+Inputs : pps_rate_control_api - pointer to RC api pointer
+ ps_memtab - Memtab pointer
+ i4_use_base - Set during init, else 0
+ i4_fill_base - Set during free, else 0
+Globals :
+Processing :
+Outputs :
+Returns :
+Issues :
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+#if NON_STEADSTATE_CODE
+WORD32 rate_control_num_fill_use_free_memtab(
+ rate_control_handle *pps_rate_control_api, itt_memtab_t *ps_memtab, ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0, i;
+ static rate_control_api_t s_temp_rc_api;
+
+ /* Hack for al alloc, during which we dont have any state memory.
+ Dereferencing can cause issues */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_rate_control_api) = &s_temp_rc_api;
+
+ /*for src rate control state structure*/
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(
+ &ps_memtab[i4_mem_tab_idx],
+ sizeof(rate_control_api_t),
+ MEM_TAB_ALIGNMENT,
+ PERSISTENT,
+ DDR);
+ use_or_fill_base(&ps_memtab[0], (void **)pps_rate_control_api, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ /* Get the memory requirement of lower modules */
+ i4_mem_tab_idx += bit_allocation_num_fill_use_free_memtab(
+ &pps_rate_control_api[0]->ps_bit_allocation, &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+ i4_mem_tab_idx += cbr_buffer_num_fill_use_free_memtab(
+ &pps_rate_control_api[0]->ps_cbr_buffer, &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+ i4_mem_tab_idx += est_sad_num_fill_use_free_memtab(
+ &pps_rate_control_api[0]->ps_est_sad, &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+ i4_mem_tab_idx += mbrc_num_fill_use_free_memtab(
+ &pps_rate_control_api[0]->ps_mb_rate_control, &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+ i4_mem_tab_idx += vbr_vbv_num_fill_use_free_memtab(
+ &pps_rate_control_api[0]->ps_vbr_storage_vbv, &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+ i4_mem_tab_idx += init_qp_num_fill_use_free_memtab(
+ &pps_rate_control_api[0]->ps_init_qp, &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+ i4_mem_tab_idx += sad_acc_num_fill_use_free_memtab(
+ &pps_rate_control_api[0]->ps_sad_acc, &ps_memtab[i4_mem_tab_idx], e_func_type);
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ i4_mem_tab_idx += rc_rd_model_num_fill_use_free_memtab(
+ &pps_rate_control_api[0]->aps_rd_model[i], &ps_memtab[i4_mem_tab_idx], e_func_type);
+ }
+ i4_mem_tab_idx += pic_handling_num_fill_use_free_memtab(
+ &pps_rate_control_api[0]->ps_pic_handling, &ps_memtab[i4_mem_tab_idx], e_func_type);
+ return (i4_mem_tab_idx);
+}
+
+/*****************************************************************************
+Function Name : initialise_rate_control
+Description : Initialise the rate control structure
+Inputs : ps_rate_control_api - api struct
+ e_rate_control_type - VBR, CBR (NLDRC/LDRC), VBR_STREAMING
+ u1_is_mb_level_rc_on - enabling mb level RC
+ u4_avg_bit_rate - bit rate to achieved across the entire file size
+ u4_peak_bit_rate - max possible drain rate
+ u4_frame_rate - number of frames in 1000 seconds
+ u4_intra_frame_interval - num frames between two I frames
+ *au1_init_qp - init_qp for I,P,B
+
+
+Globals :
+Processing :
+Outputs :
+Returns :
+Issues :
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+void initialise_rate_control(
+ rate_control_api_t *ps_rate_control_api,
+ rc_type_e e_rate_control_type,
+ UWORD8 u1_is_mb_level_rc_on,
+ UWORD32 u4_avg_bit_rate,
+ UWORD32 *pu4_peak_bit_rate,
+ UWORD32 u4_min_bit_rate,
+ UWORD32 u4_frame_rate,
+ UWORD32 u4_max_delay,
+ UWORD32 u4_intra_frame_interval,
+ UWORD32 u4_idr_period,
+ WORD32 *pi4_init_qp,
+ UWORD32 u4_max_vbv_buff_size,
+ WORD32 i4_max_inter_frm_int,
+ WORD32 i4_is_gop_closed,
+ WORD32 *pi4_min_max_qp,
+ WORD32 i4_use_est_intra_sad,
+ UWORD32 u4_src_ticks,
+ UWORD32 u4_tgt_ticks,
+ WORD32 i4_frame_height,
+ WORD32 i4_frame_width,
+ WORD32 i4_num_active_pic_type,
+ WORD32 i4_field_pic,
+ WORD32 i4_quality_preset,
+ WORD32 i4_lap_window,
+ WORD32 i4_initial_decoder_delay_frames,
+ float f_max_peak_rate_sustain_dur,
+ LWORD64 i8_num_frames_to_encode,
+ UWORD32 u4_min_scd_hevc_qp,
+ UWORD8 u1_bit_depth,
+ FILE *pf_rc_stat_file,
+ WORD32 i4_pass_num,
+ void *pv_gop_stat,
+ LWORD64 i8_num_gop_mem_alloc,
+ WORD32 i4_is_infinite_gop,
+ WORD32 i4_size_of_lap_out,
+ WORD32 i4_size_of_rc_lap_out,
+ void *pv_sys_rc_api,
+ WORD32 i4_fp_bit_alloc_in_sp,
+ WORD32 i4_num_frame_parallel,
+ WORD32 i4_capped_vbr_flag)
+{
+ WORD32 i, i4_temp;
+ UWORD32 u4_frms_in_delay_prd = (u4_frame_rate * u4_max_delay) / 1000000;
+ UWORD32 i4_cbr_bit_alloc_period;
+ float f_bit_depth_based_max_qp;
+ UWORD32 u4_bit_depth_based_max_qp;
+ WORD32 i4_pels_in_frame = (3 * (i4_frame_height * i4_frame_width) >> 1);
+
+ if(u4_intra_frame_interval ==
+ 1) /*i_only: Set bit allocation period to 15( currently not configurable) for i only mode*/
+ {
+ i4_cbr_bit_alloc_period = u4_frame_rate / 1000; /*changed */
+ }
+ else
+ {
+ i4_cbr_bit_alloc_period = 1;
+ }
+
+ if(CBR_NLDRC_HBR == e_rate_control_type)
+ {
+ e_rate_control_type = CBR_NLDRC;
+ ps_rate_control_api->i4_is_hbr = 1;
+ }
+ else
+ {
+ ps_rate_control_api->i4_is_hbr = 0;
+ }
+ ps_rate_control_api->e_rc_type = e_rate_control_type;
+ ps_rate_control_api->i4_capped_vbr_flag = i4_capped_vbr_flag;
+ ps_rate_control_api->u1_is_mb_level_rc_on = u1_is_mb_level_rc_on;
+ ps_rate_control_api->i4_num_active_pic_type = i4_num_active_pic_type;
+ ps_rate_control_api->i4_quality_preset = i4_quality_preset;
+ ps_rate_control_api->i4_scd_I_frame_estimated_tot_bits = 0;
+ ps_rate_control_api->i4_I_frame_qp_model = 0;
+ ps_rate_control_api->u4_min_scd_hevc_qp = u4_min_scd_hevc_qp;
+ ps_rate_control_api->pf_rc_stat_file = pf_rc_stat_file;
+ ps_rate_control_api->i4_rc_pass = i4_pass_num;
+ ps_rate_control_api->i4_max_frame_height = i4_frame_height;
+ ps_rate_control_api->i4_max_frame_width = i4_frame_width;
+ ps_rate_control_api->i4_underflow_warning = 0;
+ ps_rate_control_api->f_p_to_i_comp_ratio = 1.0f;
+ ps_rate_control_api->i4_scd_in_period_2_pass = 0;
+ ps_rate_control_api->i4_is_infinite_gop = i4_is_infinite_gop;
+ ps_rate_control_api->i4_frames_since_last_scd = 0;
+ ps_rate_control_api->i4_num_frame_parallel = i4_num_frame_parallel;
+
+ /*The memory for gop level summary struct is stored only for 2 pass*/
+ if(i4_pass_num == 2)
+ {
+ ps_rate_control_api->pv_2pass_gop_summary = pv_gop_stat;
+ }
+ else
+ {
+ ps_rate_control_api->pv_2pass_gop_summary = NULL;
+ }
+ /*Initialize the call back funcitons for file related operations*/
+ ps_rate_control_api->pv_rc_sys_api = pv_sys_rc_api;
+
+ ps_rate_control_api->u1_bit_depth = u1_bit_depth;
+
+ f_bit_depth_based_max_qp = (float)((51 + (6 * (u1_bit_depth - 8))) - 4) / 6;
+ u4_bit_depth_based_max_qp = (UWORD32)pow(2.0f, f_bit_depth_based_max_qp);
+
+ ps_rate_control_api->u4_bit_depth_based_max_qp = u4_bit_depth_based_max_qp;
+
+ trace_printf("RC type = %d\n", e_rate_control_type);
+
+ /* Set the avg_bitrate_changed flag for each pic_type to 0 */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rate_control_api->au1_avg_bitrate_changed[i] = 0;
+ }
+
+ /* Initialize the pic_handling module */
+ init_pic_handling(
+ ps_rate_control_api->ps_pic_handling, /*ps_pic_handling*/
+ (WORD32)u4_intra_frame_interval, /*i4_intra_frm_int,*/
+ i4_max_inter_frm_int, /*i4_max_inter_frm_int,*/
+ i4_is_gop_closed,
+ (WORD32)u4_idr_period,
+ ps_rate_control_api->i4_num_active_pic_type,
+ i4_field_pic); /*gop_struct_e*/
+
+ /* initialise the init Qp module */
+ init_init_qp(
+ ps_rate_control_api->ps_init_qp,
+ pi4_min_max_qp,
+ i4_pels_in_frame,
+ ps_rate_control_api->i4_is_hbr);
+
+ /*** Initialize the rate control modules ***/
+ if(ps_rate_control_api->e_rc_type != CONST_QP)
+ {
+ UWORD32 au4_num_pics_in_delay_prd[MAX_PIC_TYPE] = { 0 };
+
+ /* Initialise the model parameter structures */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ init_frm_rc_rd_model(ps_rate_control_api->aps_rd_model[i], MAX_FRAMES_MODELLED);
+ }
+
+ /* Initialize the buffer mechanism */
+ if((ps_rate_control_api->e_rc_type == VBR_STORAGE) ||
+ (ps_rate_control_api->e_rc_type == VBR_STORAGE_DVD_COMP))
+ {
+ /* Assuming both the peak bit rates are same for a VBR_STORAGE and
+ VBR_STORAGE_DVD_COMP */
+ if(pu4_peak_bit_rate[0] != pu4_peak_bit_rate[1])
+ {
+ trace_printf("For VBR_STORAGE and VBR_STORAGE_DVD_COMP the peak bit "
+ "rates should be same\n");
+ }
+ init_vbr_vbv(
+ ps_rate_control_api->ps_vbr_storage_vbv,
+ (WORD32)pu4_peak_bit_rate[0],
+ (WORD32)u4_frame_rate,
+ (WORD32)u4_max_vbv_buff_size);
+ }
+ else if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+ {
+ UWORD32 u4_avg_bit_rate_copy[MAX_NUM_DRAIN_RATES];
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ u4_avg_bit_rate_copy[i] = u4_avg_bit_rate;
+ }
+
+ init_cbr_buffer(
+ ps_rate_control_api->ps_cbr_buffer,
+ u4_max_delay,
+ u4_frame_rate,
+ u4_avg_bit_rate,
+ au4_num_pics_in_delay_prd,
+ u4_max_vbv_buff_size,
+ u4_intra_frame_interval,
+ ps_rate_control_api->e_rc_type,
+ pu4_peak_bit_rate[0],
+ i4_initial_decoder_delay_frames,
+ f_max_peak_rate_sustain_dur,
+ i8_num_frames_to_encode,
+ i4_max_inter_frm_int,
+ i4_pass_num,
+ 0 /*capped vbr off */);
+ }
+ else if(ps_rate_control_api->e_rc_type == VBR_STREAMING)
+ {
+ init_vbv_str_prms(
+ &ps_rate_control_api->s_vbr_str_prms,
+ u4_intra_frame_interval,
+ u4_src_ticks,
+ u4_tgt_ticks,
+ u4_frms_in_delay_prd);
+
+ init_cbr_buffer(
+ ps_rate_control_api->ps_cbr_buffer,
+ u4_max_delay,
+ u4_frame_rate,
+ u4_avg_bit_rate,
+ au4_num_pics_in_delay_prd,
+ u4_max_vbv_buff_size,
+ u4_intra_frame_interval,
+ ps_rate_control_api->e_rc_type,
+ pu4_peak_bit_rate[0],
+ i4_initial_decoder_delay_frames,
+ f_max_peak_rate_sustain_dur,
+ i8_num_frames_to_encode,
+ i4_max_inter_frm_int,
+ i4_pass_num,
+ ps_rate_control_api->i4_capped_vbr_flag);
+ }
+
+ /* Initalise the SAD estimation module */
+ init_est_sad(ps_rate_control_api->ps_est_sad, i4_use_est_intra_sad);
+
+ /* Initialise the bit allocation module according to VBR or CBR */
+ if((ps_rate_control_api->e_rc_type == VBR_STORAGE) ||
+ (ps_rate_control_api->e_rc_type == VBR_STREAMING) ||
+ (ps_rate_control_api->e_rc_type == VBR_STORAGE_DVD_COMP))
+ {
+ /*UWORD32 u4_scaled_avg_bit_rate;*/
+ /*X_PROD_Y_DIV_Z (u4_avg_bit_rate,1126,1024,u4_scaled_avg_bit_rate);*/
+ init_bit_allocation(
+ ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->ps_pic_handling,
+ i4_cbr_bit_alloc_period,
+ u4_avg_bit_rate /*u4_scaled_avg_bit_rate*/,
+ u4_frame_rate,
+ (WORD32 *)pu4_peak_bit_rate,
+ u4_min_bit_rate,
+ i4_pels_in_frame,
+ ps_rate_control_api->i4_is_hbr,
+ ps_rate_control_api->i4_num_active_pic_type,
+ i4_lap_window,
+ i4_field_pic,
+ i4_pass_num,
+ (i4_frame_height * i4_frame_width),
+ i4_fp_bit_alloc_in_sp);
+ }
+ else if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+ {
+ init_bit_allocation(
+ ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->ps_pic_handling,
+ i4_cbr_bit_alloc_period, //i_onlyCBR_BIT_ALLOC_PERIOD,
+ u4_avg_bit_rate,
+ u4_frame_rate,
+ (WORD32 *)pu4_peak_bit_rate,
+ u4_min_bit_rate,
+ i4_pels_in_frame,
+ ps_rate_control_api->i4_is_hbr,
+ ps_rate_control_api->i4_num_active_pic_type,
+ i4_lap_window,
+ i4_field_pic,
+ i4_pass_num,
+ (i4_frame_height * i4_frame_width),
+ i4_fp_bit_alloc_in_sp);
+ }
+ }
+ else
+ {
+ UWORD32 au4_num_pics_in_delay_prd[MAX_PIC_TYPE];
+
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ au4_num_pics_in_delay_prd[i] = 0;
+
+ init_cbr_buffer(
+ ps_rate_control_api->ps_cbr_buffer,
+ u4_max_delay,
+ u4_frame_rate,
+ u4_avg_bit_rate,
+ au4_num_pics_in_delay_prd,
+ u4_max_vbv_buff_size,
+ u4_intra_frame_interval,
+ ps_rate_control_api->e_rc_type,
+ pu4_peak_bit_rate[0],
+ i4_initial_decoder_delay_frames,
+ f_max_peak_rate_sustain_dur,
+ i8_num_frames_to_encode,
+ i4_max_inter_frm_int,
+ i4_pass_num,
+ 0 /*capped vbr off */);
+ }
+
+ /* Initialize the init_qp */
+ for(i4_temp = 0; i4_temp < MAX_SCENE_NUM_RC; i4_temp++)
+ {
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rate_control_api->ai4_prev_frm_qp[i4_temp][i] = 0x7FFFFFFF; //pi4_init_qp[i];
+ ps_rate_control_api->ai4_prev_frm_qp_q6[i4_temp][i] =
+ 0x7FFFFFFF; //pi4_init_qp[i] << QSCALE_Q_FAC;
+ ps_rate_control_api->ai4_min_qp[i] = pi4_min_max_qp[(i << 1)];
+ ps_rate_control_api->ai4_max_qp[i] = pi4_min_max_qp[(i << 1) + 1];
+ }
+ }
+ /*init min and max qp in qscale*/
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rate_control_api->ai4_min_qp_q6[i] = MIN_QSCALE_Q6;
+ //ps_rate_control_api->ai4_max_qp_q6[i] = (228 << QSCALE_Q_FAC);
+ ps_rate_control_api->ai4_max_qp_q6[i] = (u4_bit_depth_based_max_qp << QSCALE_Q_FAC);
+ }
+
+ /* Initialize the is_first_frm_encoded */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rate_control_api->au1_is_first_frm_coded[i] = 0;
+ }
+ ps_rate_control_api->u1_is_first_frm = 1;
+ ps_rate_control_api->i4_prev_ref_is_scd = 0;
+
+ for(i = 0; i < MAX_NUM_FRAME_PARALLEL; i++)
+ {
+ ps_rate_control_api->ai4_est_tot_bits[i] =
+ get_buf_max_drain_rate(ps_rate_control_api->ps_cbr_buffer);
+ }
+
+ /* Control flag for delayed impact after a change in peak bitrate has been made */
+ ps_rate_control_api->u4_frms_in_delay_prd_for_peak_bit_rate_change = 0;
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ ps_rate_control_api->au4_new_peak_bit_rate[i] = pu4_peak_bit_rate[i];
+ }
+
+ /* initialise the mb level rate control module */
+ init_mb_level_rc(ps_rate_control_api->ps_mb_rate_control);
+ ps_rate_control_api->i4_prev_frm_est_bits = u4_avg_bit_rate / (u4_frame_rate / 1000);
+
+ ps_rate_control_api->prev_ref_pic_type = I_PIC;
+ ps_rate_control_api->i4_P_to_I_ratio = (1 << (P_TO_I_RATIO_Q_FACTOR + K_Q)) / I_TO_P_RATIO;
+
+ /* Initialise sad accumulator */
+ init_sad_acc(ps_rate_control_api->ps_sad_acc);
+
+ rc_get_max_hme_sad_per_pixel(ps_rate_control_api, i4_frame_height * i4_frame_width);
+}
+#endif /* #if NON_STEADSTATE_CODE */
+
+/****************************************************************************
+*Function Name : add_picture_to_stack
+*Description : calls add_pic_to_stack
+*Inputs :
+*Globals :
+*Processing :
+*Outputs :
+*Returns :
+*Issues :
+*Revision History:d
+*DD MM YYYY Author(s) Changes (Describe the changes made)
+*
+*****************************************************************************/
+void add_picture_to_stack(
+ rate_control_api_t *rate_control_api, WORD32 i4_enc_pic_id, WORD32 i4_rc_in_pic)
+{
+ /* Call the routine to add the pic to stack in encode order */
+ add_pic_to_stack(rate_control_api->ps_pic_handling, i4_enc_pic_id, i4_rc_in_pic);
+}
+
+/****************************************************************************
+Function Name : add_picture_to_stack_re_enc
+Description :
+Inputs :
+Globals :
+Processing :
+Outputs :
+Returns :
+Issues :
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+void add_picture_to_stack_re_enc(
+ rate_control_api_t *rate_control_api, WORD32 i4_enc_pic_id, picture_type_e e_pic_type)
+{
+ /* In case of a re-encoder, the pics will come in the encode order itself.
+ So, there is no need to buffer the pics up */
+ add_pic_to_stack_re_enc(rate_control_api->ps_pic_handling, i4_enc_pic_id, e_pic_type);
+}
+
+/****************************************************************************
+Function Name : get_picture_details
+Description : decides the picture type based on the state
+Inputs :
+Globals :
+Processing :
+Outputs :
+Returns :
+Issues :
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+void get_picture_details(
+ rate_control_handle rate_control_api,
+ WORD32 *pi4_pic_id,
+ WORD32 *pi4_pic_disp_order_no,
+ picture_type_e *pe_pic_type,
+ WORD32 *pi4_is_scd)
+{
+ /* Call to get the pic_details */
+ get_pic_from_stack(
+ rate_control_api->ps_pic_handling,
+ pi4_pic_id,
+ pi4_pic_disp_order_no,
+ pe_pic_type,
+ pi4_is_scd);
+}
+
+/****************************************************************************
+Function Name : get_min_max_bits_based_on_buffer
+Description :
+Inputs : ps_rate_control_api
+
+Globals :
+Processing :
+Outputs :
+Returns :
+Issues :
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+static void get_min_max_bits_based_on_buffer(
+ rate_control_api_t *ps_rate_control_api,
+ picture_type_e e_pic_type,
+ WORD32 *pi4_min_bits,
+ WORD32 *pi4_max_bits,
+ WORD32 i4_get_error)
+{
+ WORD32 i4_min_bits = 0, i4_max_bits = 0;
+
+ cbr_modify_ebf_estimate(ps_rate_control_api->ps_cbr_buffer, i4_get_error); //ELP_RC
+
+ /* Find the min and max bits that can be consumed based on the buffer condition */
+ if(ps_rate_control_api->e_rc_type == VBR_STORAGE)
+ {
+ i4_max_bits = get_max_target_bits(ps_rate_control_api->ps_vbr_storage_vbv);
+ }
+ else if(ps_rate_control_api->e_rc_type == VBR_STORAGE_DVD_COMP)
+ {
+ WORD32 i4_rem_bits_in_gop, i4_rem_frms_in_gop;
+ /* WORD32 ai4_rem_frms_in_gop[MAX_PIC_TYPE]; */
+ i4_rem_frms_in_gop = pic_type_get_rem_frms_in_gop(ps_rate_control_api->ps_pic_handling);
+ i4_rem_bits_in_gop = rc_get_rem_bits_in_period(ps_rate_control_api);
+
+ i4_max_bits = get_max_tgt_bits_dvd_comp(
+ ps_rate_control_api->ps_vbr_storage_vbv,
+ i4_rem_bits_in_gop,
+ i4_rem_frms_in_gop,
+ e_pic_type);
+ }
+ else if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+ {
+ cbr_buffer_constraint_check(
+ ps_rate_control_api->ps_cbr_buffer, 0, e_pic_type, &i4_max_bits, &i4_min_bits);
+ }
+ else /* if(ps_rate_control_api->e_rc_type == VBR_STREAMING) */
+ {
+ vbr_stream_buffer_constraint_check(
+ ps_rate_control_api->ps_cbr_buffer, 0, e_pic_type, &i4_max_bits, &i4_min_bits);
+ }
+ /* Fill the min and max bits consumed */
+ if(1 != ps_rate_control_api->i4_capped_vbr_flag)
+ {
+ pi4_min_bits[0] = i4_min_bits;
+ }
+ else
+ {
+ /* Capped VBR case */
+ pi4_min_bits[0] = 0;
+ }
+ pi4_max_bits[0] = i4_max_bits;
+}
+
+/****************************************************************************
+Function Name : is_first_frame_coded
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+WORD32 is_first_frame_coded(rate_control_handle ps_rate_control_api)
+{
+ WORD32 i4_is_first_frame_coded = 1, i;
+ /* Check whether atleast one frame of a each picture type gets encoded */
+ /* Check whether it is an IPP or IPB kind of encoding */
+ if(pic_type_get_intra_frame_interval(ps_rate_control_api->ps_pic_handling) == 1)
+ {
+ i4_is_first_frame_coded = ps_rate_control_api->au1_is_first_frm_coded[I_PIC];
+ }
+ else /*HEVC_hierarchy*/
+ {
+ if(pic_type_get_field_pic(ps_rate_control_api->ps_pic_handling))
+ {
+ i4_is_first_frame_coded &= ps_rate_control_api->au1_is_first_frm_coded[I_PIC];
+
+ for(i = 1; i < ps_rate_control_api->i4_num_active_pic_type; i++)
+ {
+ i4_is_first_frame_coded &= ps_rate_control_api->au1_is_first_frm_coded[i];
+ i4_is_first_frame_coded &=
+ ps_rate_control_api->au1_is_first_frm_coded[i + FIELD_OFFSET];
+ }
+ }
+ else
+ {
+ for(i = 0; i < ps_rate_control_api->i4_num_active_pic_type; i++)
+ {
+ i4_is_first_frame_coded &= ps_rate_control_api->au1_is_first_frm_coded[i];
+ }
+ }
+ }
+
+ return i4_is_first_frame_coded;
+}
+
+/****************************************************************************
+Function Name : get_min_max_qp
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+
+static void get_min_max_qp(
+ rate_control_api_t *ps_rate_control_api,
+ picture_type_e e_pic_type,
+ WORD32 *pi4_hi_dev_qp_q6,
+ WORD32 *pi4_lo_dev_qp_q6,
+ WORD32 i4_complexity_bin,
+ WORD32 i4_scene_num)
+{
+ WORD32 prev_qp_q6, prev_I_qp_q6;
+ WORD32 hi_dev_qp_q6, lo_dev_qp_q6, hi_dev_qp_temp_q6;
+ WORD32 i4_intra_frm_int, prev_qp_for_high_dev_q6,
+ use_I_frame_qp_high_dev = 0; /*i_only : to detect i only case*/
+ float per_pixel_p_hme_sad =
+ (float)ps_rate_control_api->i8_per_pixel_p_frm_hme_sad_q10 / (1 << 10);
+
+ i4_intra_frm_int = pic_type_get_intra_frame_interval(ps_rate_control_api->ps_pic_handling);
+
+ /* Restricting the Quant swing */
+ prev_qp_q6 = ps_rate_control_api
+ ->ai4_prev_frm_qp_q6[i4_scene_num][ps_rate_control_api->prev_ref_pic_type];
+ prev_qp_for_high_dev_q6 = prev_qp_q6;
+ prev_I_qp_q6 = ps_rate_control_api->ai4_prev_frm_qp_q6[i4_scene_num][I_PIC];
+ if(ps_rate_control_api->prev_ref_pic_type != e_pic_type)
+ {
+ if(e_pic_type == I_PIC)
+ {
+ /* Constrain I-frame QP to be within specified limit of prev_ref_qp/Kp */
+ // SS - suppressing this assuming re-encode will take care
+ /* prev_qp = i4_frame_qp; */
+ prev_qp_q6 = (ps_rate_control_api->i4_P_to_I_ratio * (LWORD64)prev_qp_q6) >>
+ P_TO_I_RATIO_Q_FACTOR;
+ }
+ else if(e_pic_type == P_PIC || e_pic_type == P1_PIC)
+ {
+ /* Constrain P-frame QP to be within specified limit of Kp*prev_ref_qp */
+ prev_qp_q6 = (I_TO_P_RATIO * (LWORD64)prev_qp_q6) >> K_Q;
+ use_I_frame_qp_high_dev = 1;
+ }
+ else if(ps_rate_control_api->prev_ref_pic_type == P_PIC)
+ {
+ /* current frame is B-pic */
+ /* Constrain B-frame QP to be within specified limit of prev_ref_qp/Kb */
+ if(!ps_rate_control_api->i4_is_hbr)
+ {
+ prev_qp_q6 = (P_TO_B_RATIO * (LWORD64)prev_qp_q6) >> (K_Q);
+ }
+ else
+ {
+ prev_qp_q6 = (P_TO_B_RATIO_HBR * (LWORD64)prev_qp_q6) >> (K_Q);
+ }
+ }
+ else /* if(ps_rate_control_api->prev_ref_pic_type == I_PIC) */
+ {
+ /* current frame is B-pic */
+ /* Constrain B-frame QP to be within specified limit of prev_ref_qp/Kb */
+ if(!ps_rate_control_api->i4_is_hbr)
+ {
+ prev_qp_q6 = (P_TO_B_RATIO * I_TO_P_RATIO * (LWORD64)prev_qp_q6) >> (K_Q + K_Q);
+ }
+ else
+ {
+ prev_qp_q6 = (P_TO_B_RATIO_HBR * I_TO_P_RATIO * (LWORD64)prev_qp_q6) >> (K_Q + K_Q);
+ }
+ }
+ }
+
+ /*if (1)//e_pic_type != B_PIC)*/
+ {
+ if(use_I_frame_qp_high_dev)
+ {
+ /*For P pic if previous reference was I then pre_qp = I qp + 1, Then +4 high dev is allowed. To avoid P frame to be +5 off comapared to previous I*/
+ hi_dev_qp_q6 = GET_HI_DEV_QP_QFAC(prev_qp_for_high_dev_q6);
+ }
+ else
+ {
+ hi_dev_qp_q6 = GET_HI_DEV_QP_QFAC(prev_qp_q6);
+ }
+
+ if(e_pic_type == I_PIC || e_pic_type == P_PIC || e_pic_type == P1_PIC)
+ {
+ lo_dev_qp_q6 =
+ GET_LO_DEV_QP_QFAC(prev_qp_q6, ps_rate_control_api->i4_num_active_pic_type);
+ }
+ else
+ {
+ lo_dev_qp_q6 = GET_LO_DEV_QP_QFAC_B_PIC(prev_qp_q6);
+ }
+ }
+ /* For lower QPs due to scale factor and fixed point arithmetic, the
+ hi_dev_qp can be same as that of the prev qp and in which case it gets stuck
+ in the lower most qp and thus not allowing QPs not to change. To avoid this,
+ for lower qps the hi_dev_qp should be made slightly more than prev_qp */
+ if(prev_qp_q6 == hi_dev_qp_q6)
+ {
+ hi_dev_qp_q6 = ((LWORD64)hi_dev_qp_q6 * 18) >> 4;
+ }
+ /*minimum qp should atleast be 1 less than previous*/
+ if(prev_qp_q6 == lo_dev_qp_q6 && lo_dev_qp_q6 > (1 << QSCALE_Q_FAC))
+ {
+ lo_dev_qp_q6 = ((LWORD64)lo_dev_qp_q6 * 14) >> 4;
+ }
+ /*for shorter GOP make sure the P does not get better than I , NEED TO BE REVIEWED as gains seen in bq terrace after this change was with wrong config*/
+ /*Anything with per pixel sad < 1 is considered static. Since the hme sad is at L1 resolution, the threshold chosen is 0.25*/
+ if((per_pixel_p_hme_sad < 0.25f) && (ps_rate_control_api->i4_is_infinite_gop != 1))
+ {
+ if(e_pic_type == P_PIC && ps_rate_control_api->i4_I_frame_qp_model)
+ {
+ /*P is not allowed to get too better compared to previous I in static content*/
+ if(lo_dev_qp_q6<(prev_I_qp_q6 * 14)>> 4)
+ lo_dev_qp_q6 = ((LWORD64)prev_I_qp_q6 * 14) >> 4;
+ /*If previous reference is I then it cannot get better than I in static case*/
+ if(lo_dev_qp_q6 < prev_I_qp_q6)
+ lo_dev_qp_q6 = prev_I_qp_q6;
+ }
+ }
+ if(e_pic_type == I_PIC &&
+ i4_intra_frm_int !=
+ 1) /*i_only: In this case P frame Qp will be arbitrary value hence avoiding max_dev_qp to be independent of it*/
+ {
+ //WORD32 i4_p_qp = ps_rate_control_api->ai4_prev_frm_qp[P_PIC];
+ WORD32 i4_p_qp_q6 = ps_rate_control_api->ai4_prev_frm_qp_q6[i4_scene_num][P_PIC];
+ switch(i4_complexity_bin)
+ {
+ case 0:
+ hi_dev_qp_temp_q6 = (WORD32)(
+ ((LWORD64)i4_p_qp_q6 * I_TO_P_RATIO * I_TO_P_RATIO * I_TO_P_RATIO) >>
+ (K_Q + K_Q + K_Q));
+ break;
+ case 1:
+ hi_dev_qp_temp_q6 =
+ (WORD32)(((LWORD64)i4_p_qp_q6 * I_TO_P_RATIO * I_TO_P_RATIO) >> (K_Q + K_Q));
+ break;
+ case 2:
+ hi_dev_qp_temp_q6 = (WORD32)(((LWORD64)i4_p_qp_q6 * I_TO_P_RATIO) >> (K_Q));
+ break;
+ case 3:
+ hi_dev_qp_temp_q6 = i4_p_qp_q6;
+ break;
+ default:
+ hi_dev_qp_temp_q6 = (WORD32)(((LWORD64)i4_p_qp_q6 * P_TO_I_RATIO) >> (K_Q));
+ break;
+ }
+ hi_dev_qp_q6 = (hi_dev_qp_q6 > hi_dev_qp_temp_q6) ? hi_dev_qp_temp_q6 : hi_dev_qp_q6;
+ }
+ pi4_hi_dev_qp_q6[0] = hi_dev_qp_q6;
+ pi4_lo_dev_qp_q6[0] = lo_dev_qp_q6;
+}
+
+/****************************************************************************
+Function Name : get_min
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+static WORD32 get_min(WORD32 a, WORD32 b, WORD32 c, WORD32 d)
+{
+ WORD32 min = a;
+ if(b < min)
+ min = b;
+ if(c < min)
+ min = c;
+ if(d < min)
+ min = d;
+ return min;
+}
+
+/****************************************************************************
+Function Name : get_max
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+static WORD32 get_max(WORD32 a, WORD32 b, WORD32 c)
+{
+ WORD32 max = a;
+ if(b > max)
+ max = b;
+ if(c > max)
+ max = c;
+ return max;
+}
+/****************************************************************************
+Function Name : rc_modify_est_tot
+Description : Adds latest Estimated total bits to the loop .
+Inputs :
+Globals :
+Processing :
+Outputs :
+Returns :
+Issues :
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+void rc_modify_est_tot(rate_control_api_t *ps_rate_control_api, WORD32 i4_tot_est_bits) //ELP_RC
+{
+ WORD32 i4_num_frm_parallel, i;
+ i4_num_frm_parallel = ps_rate_control_api->i4_num_frame_parallel;
+
+ if(i4_num_frm_parallel) //for CPU i4_num_frm_parallel=0
+ {
+ for(i = 1; i < (i4_num_frm_parallel - 1); i++)
+ {
+ ps_rate_control_api->ai4_est_tot_bits[i - 1] = ps_rate_control_api->ai4_est_tot_bits[i];
+ }
+ ps_rate_control_api->ai4_est_tot_bits[i - 1] = i4_tot_est_bits;
+ }
+}
+/****************************************************************************
+Function Name : rc_get_estimate_bit_error
+Description : function returns the estimated bit error using estimated total
+ bits for the Enc Loop Parallelism based Encoder.
+Inputs :
+Globals :
+Processing :
+Outputs :
+Returns :
+Issues :
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+static WORD32 rc_get_estimate_bit_error(rate_control_api_t *ps_rate_control_api)
+{
+ WORD32 i4_error_bits = 0, i, i4_bits_per_frame;
+ i4_bits_per_frame = get_buf_max_drain_rate(ps_rate_control_api->ps_cbr_buffer);
+ if(ps_rate_control_api->i4_num_frame_parallel >
+ 0) // for CPU ps_rate_control_api->i4_num_frame_parallel =0;
+ {
+ for(i = 0; i < (ps_rate_control_api->i4_num_frame_parallel - 1); i++)
+ {
+ i4_error_bits += (ps_rate_control_api->ai4_est_tot_bits[i] - i4_bits_per_frame);
+ }
+ }
+ return i4_error_bits;
+}
+
+/****************************************************************************
+Function Name : get_est_hdr_bits
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+WORD32 get_est_hdr_bits(rate_control_api_t *ps_rate_control_api, picture_type_e e_pic_type)
+{
+ return (get_cur_frm_est_header_bits(ps_rate_control_api->ps_bit_allocation, e_pic_type));
+}
+
+/****************************************************************************
+Function Name : model_availability
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+WORD32 model_availability(rate_control_api_t *ps_rate_control_api, picture_type_e e_pic_type)
+{
+ return (is_model_valid(ps_rate_control_api->aps_rd_model[e_pic_type]));
+}
+
+/****************************************************************************
+Function Name : clip_qp_based_on_prev_ref
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+WORD32 clip_qp_based_on_prev_ref(
+ rate_control_api_t *ps_rate_control_api,
+ picture_type_e e_pic_type,
+ WORD32 i4_call_type,
+ WORD32 i4_scene_num)
+{
+ /* WORD32 i4_bpp_based_qp; */
+ /* If the number pf pels is set to zero it uses the value set during init time */
+ /* i4_frame_qp = get_init_qp_using_pels_bits_per_frame(ps_rate_control_api->ps_init_qp,
+ e_pic_type, i4_est_tex_bits, 0); */
+ WORD32 i4_frame_qp, i4_frame_qp_q6 = 0, i4_min_Kp_Kb_factor = 0;
+ WORD32 Kp_kb_factor = get_Kp_Kb(ps_rate_control_api->ps_bit_allocation, e_pic_type);
+ WORD32 kp_kb_ref_ref =
+ get_Kp_Kb(ps_rate_control_api->ps_bit_allocation, ps_rate_control_api->prev_ref_pic_type);
+
+ {
+ WORD32 i4_drain_bits_per_frame = get_buf_max_drain_rate(ps_rate_control_api->ps_cbr_buffer),
+ i4_ebf;
+ WORD32 i4_delay = cbr_get_delay_frames(ps_rate_control_api->ps_cbr_buffer),
+ max_buffer_level = 0, rc_type = get_rc_type(ps_rate_control_api->ps_cbr_buffer);
+
+ if(rc_type == VBR_STREAMING)
+ max_buffer_level = i4_drain_bits_per_frame * i4_delay;
+ else
+ max_buffer_level = get_cbr_buffer_size(ps_rate_control_api->ps_cbr_buffer);
+
+ i4_ebf = get_cbr_ebf(ps_rate_control_api->ps_cbr_buffer);
+
+ if(i4_ebf > (WORD32)(0.9f * max_buffer_level))
+ {
+ switch(e_pic_type)
+ {
+ case P_PIC:
+ case P1_PIC:
+ i4_min_Kp_Kb_factor = I_TO_P_RATIO;
+ break;
+ case B_PIC:
+ case BB_PIC:
+ i4_min_Kp_Kb_factor = I_TO_B_RATIO;
+ break;
+ case B1_PIC:
+ case B11_PIC:
+ i4_min_Kp_Kb_factor = I_TO_B1_RATIO;
+ break;
+ default:
+ i4_min_Kp_Kb_factor = I_TO_B2_RATIO;
+ break;
+ }
+ }
+ }
+ if((e_pic_type == I_PIC) &&
+ (ps_rate_control_api->ai4_prev_frm_qp[i4_scene_num][I_PIC] == 0x7FFFFFFF))
+ {
+ /*Is this a valid case?*/
+ ASSERT(0);
+ }
+ /*If there is a scene cut I frame followed by a scene cut I frame, non scene cut I frame
+ better assume the Qp of the I frame same as before instead of using bpp based qp*/
+ else if(
+ (e_pic_type == I_PIC) &&
+ (ps_rate_control_api->ai4_prev_frm_qp[i4_scene_num][I_PIC] != 0x7FFFFFFF))
+ {
+ i4_frame_qp = ps_rate_control_api->ai4_prev_frm_qp[i4_scene_num][I_PIC];
+ i4_frame_qp_q6 = ps_rate_control_api->ai4_prev_frm_qp_q6[i4_scene_num][I_PIC];
+ }
+ else /*! ISlice*/
+ {
+ if((Kp_kb_factor < i4_min_Kp_Kb_factor) && (i4_call_type == 1))
+ {
+ Kp_kb_factor = i4_min_Kp_Kb_factor;
+ trace_printf("Kp_kb_factor %d", Kp_kb_factor);
+ }
+ if((kp_kb_ref_ref > Kp_kb_factor) && (i4_call_type == 1))
+ {
+ kp_kb_ref_ref = Kp_kb_factor;
+ }
+
+ if(ps_rate_control_api
+ ->ai4_prev_frm_qp_q6[i4_scene_num][ps_rate_control_api->prev_ref_pic_type] ==
+ 0x7FFFFFFF)
+ {
+ ps_rate_control_api
+ ->ai4_prev_frm_qp_q6[i4_scene_num][ps_rate_control_api->prev_ref_pic_type] =
+ ps_rate_control_api->ai4_prev_frm_qp_q6[i4_scene_num][I_PIC];
+ kp_kb_ref_ref = 16;
+ }
+
+ i4_frame_qp_q6 =
+ ((ps_rate_control_api
+ ->ai4_prev_frm_qp_q6[i4_scene_num][ps_rate_control_api->prev_ref_pic_type] *
+ Kp_kb_factor) /
+ kp_kb_ref_ref);
+ }
+ return i4_frame_qp_q6;
+}
+
+/****************************************************************************
+Function Name : get_frame_level_qp
+Description : Get frame qp from the estimated bits
+Inputs : ps_rate_control_api
+ i_to_avg_ratio
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+WORD32 get_frame_level_qp(
+ rate_control_api_t *ps_rate_control_api,
+ picture_type_e e_pic_type,
+ WORD32 i4_ud_max_bits,
+ WORD32 *pi4_cur_est_texture_bits,
+ float af_sum_weigh[MAX_PIC_TYPE][3],
+ WORD32 i4_call_type,
+ float i_to_avg_ratio,
+ frame_info_t *ps_frame_stat,
+ WORD32 i4_complexity_bin,
+ WORD32 i4_scene_num,
+ WORD32 *pi4_tot_bits_estimated,
+ WORD32 *pi4_is_model_valid,
+ WORD32 *pi4_vbv_buf_max_bits,
+ WORD32 *pi4_est_tex_bits,
+ WORD32 *pi4_cur_est_header_bits,
+ WORD32 *pi4_maxEbfQP,
+ WORD32 *pi4_modelQP,
+ WORD32 *pi4_estimate_to_calc_frm_error)
+{
+ /* UWORD8 u1_frame_qp; */
+ WORD32 i4_frame_qp /*,i4_min_frame_qp = 1,i4_max_frame_qp = MAX_MPEG2_QP*/;
+ WORD32 i4_max_frame_qp_q6 = (MAX_MPEG2_QP << QSCALE_Q_FAC),
+ i4_min_frame_qp_q6 = MIN_QSCALE_Q6; /*0.707 in q6 corresponds to hevc qp = 1*/
+ WORD32 i4_is_first_frame_coded = 1;
+ WORD32 i4_is_model_valid = 0;
+ WORD32 i4_frame_qp_q6, i4_cur_est_header_bits, i4_frame_qp_q6_based_max_vbv_bits;
+ WORD32 i4_bit_alloc_est_tex_bits = 0, i4_bit_alloc_est_tex_bits_for_invalid_model = 0,
+ i4_est_tex_bits, i4_qp_based_min_est_tex_bits, i4_qp_based_max_est_tex_bits,
+ i4_buf_based_min_bits, i4_buf_based_max_bits;
+ UWORD32 u4_estimated_sad;
+ WORD32 i4_buffer_based_max_qp_clip_flag = 0;
+ WORD32 i4_min_Kp_Kb_factor = 0;
+ WORD32 i4_steady_state_texture_case = 0;
+
+ if(i4_call_type == 1)
+ {
+ *pi4_maxEbfQP = INVALID_QP;
+ *pi4_modelQP = INVALID_QP;
+ }
+
+ if((ps_rate_control_api->e_rc_type != VBR_STORAGE) &&
+ (ps_rate_control_api->e_rc_type != VBR_STORAGE_DVD_COMP) &&
+ (ps_rate_control_api->e_rc_type != CBR_NLDRC) &&
+ (ps_rate_control_api->e_rc_type != CONST_QP) &&
+ (ps_rate_control_api->e_rc_type != VBR_STREAMING))
+ {
+ return (0);
+ }
+
+ i4_is_first_frame_coded = is_first_frame_coded(ps_rate_control_api);
+
+ assign_complexity_coeffs(ps_rate_control_api->ps_bit_allocation, af_sum_weigh);
+
+ if(ps_rate_control_api->e_rc_type == CONST_QP)
+ {
+ i4_frame_qp = ps_rate_control_api->ai4_prev_frm_qp[i4_scene_num][e_pic_type];
+ i4_frame_qp_q6 =
+ (ps_rate_control_api->ai4_prev_frm_qp[i4_scene_num][e_pic_type] >> QSCALE_Q_FAC);
+ }
+ else
+ {
+ i4_cur_est_header_bits =
+ get_cur_frm_est_header_bits(ps_rate_control_api->ps_bit_allocation, e_pic_type);
+ u4_estimated_sad = get_est_sad(ps_rate_control_api->ps_est_sad, e_pic_type);
+ /* Constraining the qp variations based on bits allocated */
+ /* Step 1: Getting the bits based on bit allocation module */
+ /*check if model has atleast one data point, otherwise go with default qp*/
+ i4_is_model_valid = is_model_valid(ps_rate_control_api->aps_rd_model[e_pic_type]);
+
+ if(i4_is_model_valid == 1)
+ {
+ i4_bit_alloc_est_tex_bits = get_cur_frm_est_texture_bits(
+ ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->aps_rd_model,
+ ps_rate_control_api->ps_est_sad,
+ ps_rate_control_api->ps_pic_handling,
+ ps_rate_control_api->ps_cbr_buffer,
+ e_pic_type,
+ i4_is_first_frame_coded,
+ 0,
+ i4_call_type,
+ i_to_avg_ratio,
+ i4_is_model_valid);
+ if(i4_call_type == 1)
+ {
+ *pi4_estimate_to_calc_frm_error =
+ i4_bit_alloc_est_tex_bits + i4_cur_est_header_bits;
+ }
+
+ /* vbv buffer position based error correction to keep away encoder buffer overflow at layer 0 pictures*/
+ if(e_pic_type == I_PIC || e_pic_type == P_PIC || e_pic_type == P1_PIC)
+ {
+ WORD32 i4_cur_ebf = get_cbr_ebf(ps_rate_control_api->ps_cbr_buffer);
+ WORD32 i4_vbv_size = get_cbr_buffer_size(ps_rate_control_api->ps_cbr_buffer);
+ WORD32 i4_max_ebf = (WORD32)(i4_vbv_size * MAX_THRESHOLD_VBV_FRM_ERROR);
+ WORD32 i4_drain_rate = get_buf_max_drain_rate(ps_rate_control_api->ps_cbr_buffer);
+ WORD32 i4_total_bits_allocted = i4_bit_alloc_est_tex_bits + i4_cur_est_header_bits;
+ WORD32 i4_total_bits_to_be_alloc;
+ WORD32 i4_expected_ebf = (i4_cur_ebf + i4_total_bits_allocted - i4_drain_rate);
+ /*if expected ebf is greater than max threashold, correct the allocation such that it never cross max
+ but if it less than drain rate, atleast give drainrate bits*/
+ if(i4_expected_ebf > i4_max_ebf)
+ {
+ i4_total_bits_to_be_alloc = MAX(
+ i4_drain_rate, (i4_total_bits_allocted - (i4_expected_ebf - i4_max_ebf)));
+ i4_bit_alloc_est_tex_bits = i4_total_bits_to_be_alloc - i4_cur_est_header_bits;
+ }
+ }
+ }
+ else
+ {
+ i4_bit_alloc_est_tex_bits_for_invalid_model = get_cur_frm_est_texture_bits(
+ ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->aps_rd_model,
+ ps_rate_control_api->ps_est_sad,
+ ps_rate_control_api->ps_pic_handling,
+ ps_rate_control_api->ps_cbr_buffer,
+ e_pic_type,
+ i4_is_first_frame_coded,
+ 0,
+ i4_call_type,
+ i_to_avg_ratio,
+ i4_is_model_valid);
+ if(i4_call_type == 1)
+ {
+ *pi4_estimate_to_calc_frm_error =
+ i4_bit_alloc_est_tex_bits_for_invalid_model + i4_cur_est_header_bits;
+ }
+ }
+
+#if 1 /*model_low_bitrate_bug*/
+ /* This condition is added to use the model for cases when the estimated bits is less than zero.
+ We assume some bits of the header are used for texture and calcualte the qp */
+ if(i4_bit_alloc_est_tex_bits <= (i4_cur_est_header_bits >> 3))
+ {
+ i4_bit_alloc_est_tex_bits = (i4_cur_est_header_bits >> 3);
+ }
+#endif
+
+ /* Step 2: Getting the min and max texture bits based on min and max qp */
+ if(i4_is_model_valid && ps_rate_control_api->au1_avg_bitrate_changed[e_pic_type] == 0)
+ {
+ WORD32 /*i4_min_qp, i4_max_qp,*/ i4_max_qp_q6, i4_min_qp_q6;
+ number_t s_lin_coeff_wo_int =
+ get_linear_coefficient(ps_rate_control_api->aps_rd_model[e_pic_type]);
+
+ if(s_lin_coeff_wo_int.sm != 0)
+ {
+ /* Get the min and max qp deviation allowed based on prev frame qp */
+ get_min_max_qp(
+ ps_rate_control_api,
+ e_pic_type,
+ &i4_max_qp_q6,
+ &i4_min_qp_q6,
+ i4_complexity_bin,
+ i4_scene_num);
+
+ /* Estimate the max bits based on min qp */
+ i4_qp_based_min_est_tex_bits = estimate_bits_for_qp(
+ ps_rate_control_api->aps_rd_model[e_pic_type], u4_estimated_sad, i4_max_qp_q6);
+ /* Estimate the min bits based on max qp */
+ i4_qp_based_max_est_tex_bits = estimate_bits_for_qp(
+ ps_rate_control_api->aps_rd_model[e_pic_type], u4_estimated_sad, i4_min_qp_q6);
+ /*disable qp based min and max swing restriction*/
+ i4_min_frame_qp_q6 = i4_min_qp_q6;
+ i4_max_frame_qp_q6 = i4_max_qp_q6;
+ i4_qp_based_max_est_tex_bits = i4_bit_alloc_est_tex_bits;
+ i4_qp_based_min_est_tex_bits = i4_bit_alloc_est_tex_bits;
+ }
+ else
+ {
+ i4_qp_based_min_est_tex_bits = i4_bit_alloc_est_tex_bits;
+ i4_qp_based_max_est_tex_bits = i4_bit_alloc_est_tex_bits;
+ }
+ }
+ else
+ {
+ i4_qp_based_min_est_tex_bits = i4_bit_alloc_est_tex_bits_for_invalid_model;
+ i4_qp_based_max_est_tex_bits = i4_bit_alloc_est_tex_bits_for_invalid_model;
+ ps_rate_control_api->au1_avg_bitrate_changed[e_pic_type] = 0;
+ }
+
+ /* Step 3: Getting the min and max texture bits based on buffer fullness */
+
+ if(i4_call_type == 1)
+ {
+ WORD32 i4_get_error;
+
+ i4_get_error = rc_get_estimate_bit_error(ps_rate_control_api);
+
+ get_min_max_bits_based_on_buffer(
+ ps_rate_control_api,
+ e_pic_type,
+ &i4_buf_based_min_bits,
+ &i4_buf_based_max_bits,
+ i4_get_error);
+
+ /*In case buffer limitation will come, no need to reduce the QP further because of warning flag*/
+ if(i4_bit_alloc_est_tex_bits < (i4_buf_based_min_bits - i4_cur_est_header_bits))
+ ps_rate_control_api->i4_underflow_warning = 0;
+
+ if(i4_buf_based_max_bits < (i4_bit_alloc_est_tex_bits + i4_cur_est_header_bits))
+ {
+ i4_buffer_based_max_qp_clip_flag = 1;
+ }
+ trace_printf(
+ "i4_buf_based_min_bits %d i4_buf_based_max_bits %d",
+ i4_buf_based_min_bits,
+ i4_buf_based_max_bits);
+ trace_printf(
+ "Prev I frame qp q6 %d P frame qp q6 %d",
+ ps_rate_control_api->ai4_prev_frm_qp_q6[I_PIC],
+ ps_rate_control_api->ai4_prev_frm_qp_q6[P_PIC]);
+ }
+ else
+ {
+ i4_buf_based_min_bits = i4_qp_based_min_est_tex_bits;
+ i4_buf_based_max_bits = i4_qp_based_max_est_tex_bits;
+ }
+ /* for I frame the max bits is not restricted based on the user input */
+ if(e_pic_type == I_PIC)
+ {
+ i4_ud_max_bits = 0x7fffffff; /* i4_bit_alloc_est_tex_bits + i4_cur_est_header_bits; */
+ }
+
+ /* Step 4: Clip the bits allocated based on
+ 1) FinalBits = Max of (BitallocBits, MinBitsMaxQp, MinBufferBits)
+ 2) FinalBits = Min of (MaxBitsMinQp, MaxBufferBits, MaxUserDefBits, FinalBits)
+ Note that max is done after min to prevent over-consumption */
+ /* Finding the max of all the minimum bits */
+ i4_est_tex_bits = get_max(
+ i4_bit_alloc_est_tex_bits,
+ i4_qp_based_min_est_tex_bits,
+ (i4_buf_based_min_bits - i4_cur_est_header_bits));
+ i4_est_tex_bits = get_min(
+ i4_est_tex_bits,
+ i4_qp_based_max_est_tex_bits,
+ (i4_ud_max_bits - i4_cur_est_header_bits),
+ (i4_buf_based_max_bits - i4_cur_est_header_bits));
+
+ /*Highest priority given to min and max qp followed by buffer based min and max to prevent overconsumption in process of preventing stuffing*/
+ CLIP(
+ i4_est_tex_bits,
+ i4_buf_based_max_bits - i4_cur_est_header_bits,
+ i4_buf_based_min_bits - i4_cur_est_header_bits);
+
+ {
+ WORD32 i4_drain_bits_per_frame =
+ get_buf_max_drain_rate(ps_rate_control_api->ps_cbr_buffer),
+ i4_ebf;
+ WORD32 i4_delay = cbr_get_delay_frames(ps_rate_control_api->ps_cbr_buffer),
+ max_buffer_level = 0, rc_type = get_rc_type(ps_rate_control_api->ps_cbr_buffer);
+
+ if(rc_type == VBR_STREAMING)
+ max_buffer_level = i4_drain_bits_per_frame * i4_delay;
+ else
+ max_buffer_level = get_cbr_buffer_size(ps_rate_control_api->ps_cbr_buffer);
+
+ i4_ebf = get_cbr_ebf(ps_rate_control_api->ps_cbr_buffer);
+
+ if(i4_ebf > (WORD32)(0.9f * max_buffer_level))
+ {
+ i4_buffer_based_max_qp_clip_flag = 1;
+ switch(e_pic_type)
+ {
+ case P_PIC:
+ case P1_PIC:
+ i4_min_Kp_Kb_factor = I_TO_P_RATIO;
+ break;
+ case B_PIC:
+ case BB_PIC:
+ i4_min_Kp_Kb_factor = I_TO_B_RATIO;
+ break;
+ case B1_PIC:
+ case B11_PIC:
+ i4_min_Kp_Kb_factor = I_TO_B1_RATIO;
+ break;
+ default:
+ i4_min_Kp_Kb_factor = I_TO_B2_RATIO;
+ break;
+ }
+ }
+ }
+ /*i4_is_first_frame_coded will be considered only in 2 pass, since 2 pass precise I to rest is calcuated considering first sug-gop and full sub-gop complexity separately. Using offset based
+ qp instead of single frame model(with default bit allocation)*/
+ /* Step 6: Estimate the qp generated for the given texture bits */
+ if((!i4_is_first_frame_coded /* && ps_rate_control_api->i4_rc_pass == 2*/) ||
+ !i4_is_model_valid) //ELP_RC
+ {
+ /* WORD32 i4_bpp_based_qp; */
+ /* If the number pf pels is set to zero it uses the value set during init time */
+ /* i4_frame_qp = get_init_qp_using_pels_bits_per_frame(ps_rate_control_api->ps_init_qp,
+ e_pic_type, i4_est_tex_bits, 0); */
+ WORD32 Kp_kb_factor = get_Kp_Kb(ps_rate_control_api->ps_bit_allocation, e_pic_type);
+ WORD32 kp_kb_ref_ref = get_Kp_Kb(
+ ps_rate_control_api->ps_bit_allocation, ps_rate_control_api->prev_ref_pic_type);
+
+ if(e_pic_type == I_PIC &&
+ ps_rate_control_api->ai4_prev_frm_qp[i4_scene_num][I_PIC] == 0x7FFFFFFF)
+ {
+ /*Is this a valid case?*/
+ ASSERT(0);
+ i4_frame_qp = get_init_qp_using_pels_bits_per_frame(
+ ps_rate_control_api->ps_init_qp, e_pic_type, i4_est_tex_bits, 0);
+ i4_frame_qp_q6 = i4_frame_qp << QSCALE_Q_FAC;
+ }
+ /*If there is a scene cut I frame followed by a scene cut I frame, non scene cut I frame
+ better assume the Qp of the I frame same as before instead of using bpp based qp*/
+ else if(
+ e_pic_type == I_PIC &&
+ ps_rate_control_api->ai4_prev_frm_qp[i4_scene_num][I_PIC] != 0x7FFFFFFF)
+ {
+ i4_frame_qp = ps_rate_control_api->ai4_prev_frm_qp[i4_scene_num][I_PIC];
+ i4_frame_qp_q6 = ps_rate_control_api->ai4_prev_frm_qp_q6[i4_scene_num][I_PIC];
+ }
+ else /*! ISlice*/
+ {
+ if((Kp_kb_factor < i4_min_Kp_Kb_factor) && (i4_call_type == 1))
+ {
+ Kp_kb_factor = i4_min_Kp_Kb_factor;
+ trace_printf("Kp_kb_factor %d", Kp_kb_factor);
+ }
+ if((kp_kb_ref_ref > Kp_kb_factor) && (i4_call_type == 1))
+ {
+ kp_kb_ref_ref = Kp_kb_factor;
+ }
+
+ if(ps_rate_control_api
+ ->ai4_prev_frm_qp_q6[i4_scene_num][ps_rate_control_api->prev_ref_pic_type] ==
+ 0x7FFFFFFF)
+ {
+ ps_rate_control_api
+ ->ai4_prev_frm_qp_q6[i4_scene_num][ps_rate_control_api->prev_ref_pic_type] =
+ ps_rate_control_api->ai4_prev_frm_qp_q6[i4_scene_num][I_PIC];
+ kp_kb_ref_ref = 16;
+ }
+
+ i4_frame_qp_q6 =
+ ((ps_rate_control_api
+ ->ai4_prev_frm_qp_q6[i4_scene_num][ps_rate_control_api->prev_ref_pic_type] *
+ Kp_kb_factor) /
+ kp_kb_ref_ref);
+ }
+
+ /*HEVC_hierarchy: Breaks pause to resume logic if any and also the HBR mode concept as bit ratios are not known. It is now quaranteed that all frames
+ encoded after scene cut will belong to new scene(B pic of first sub-gop)Hence the below logic of using max of either current estimate
+ or previous B frame qp is not required*/
+ /* Since precise SCD position at B-pic level is not known, take the MAX of earlier B-QP and scaled I_QP after SCD */
+ /*HEVC_RC : Since precise SCD location is known and it is guranteed that pic encoded after I pic belongs to new scene*/
+
+ {
+ WORD32 i4_bits_per_frame;
+ i4_bits_per_frame = get_buf_max_drain_rate(ps_rate_control_api->ps_cbr_buffer);
+ if(i4_call_type == 1)
+ {
+ rc_modify_est_tot(ps_rate_control_api, i4_bits_per_frame);
+ }
+ }
+ }
+ /* The check is becaue the model gives a negative QP when the
+ i4_est_tex_bits is less than or equal to 0
+ [This is a bug in the model]. As a temporary fix, the frame QP
+ is being set to the max QP allowed */
+ else if(i4_est_tex_bits > 0)
+ {
+ if(i4_call_type == 1)
+ {
+ rc_modify_est_tot(ps_rate_control_api, (i4_est_tex_bits + i4_cur_est_header_bits));
+ }
+ i4_steady_state_texture_case = 1;
+ /* Query the model for the Qp for the corresponding frame*/
+ i4_frame_qp_q6_based_max_vbv_bits = find_qp_for_target_bits(
+ ps_rate_control_api->aps_rd_model[e_pic_type],
+ i4_buf_based_max_bits - i4_cur_est_header_bits,
+ u4_estimated_sad,
+ (ps_rate_control_api->ai4_max_qp_q6[e_pic_type]),
+ (ps_rate_control_api->ai4_min_qp_q6[e_pic_type]));
+ if(i4_call_type == 1)
+ {
+ *pi4_maxEbfQP = ihevce_rc_get_scaled_hevce_qp_q6(
+ i4_frame_qp_q6_based_max_vbv_bits, ps_rate_control_api->u1_bit_depth);
+ }
+ /* Query the model for the Qp for the corresponding frame*/
+ i4_frame_qp_q6 = find_qp_for_target_bits(
+ ps_rate_control_api->aps_rd_model[e_pic_type],
+ i4_est_tex_bits,
+ u4_estimated_sad,
+ (ps_rate_control_api->ai4_max_qp_q6[e_pic_type]),
+ (ps_rate_control_api->ai4_min_qp_q6[e_pic_type]));
+ i4_frame_qp = ((i4_frame_qp_q6 + (1 << (QSCALE_Q_FAC - 1))) >> QSCALE_Q_FAC);
+ }
+ else
+ {
+ {
+ WORD32 i4_bits_per_frame;
+ i4_bits_per_frame = get_buf_max_drain_rate(ps_rate_control_api->ps_cbr_buffer);
+ if(i4_call_type == 1)
+ {
+ rc_modify_est_tot(ps_rate_control_api, i4_bits_per_frame);
+ }
+ }
+ i4_frame_qp = ps_rate_control_api->ai4_max_qp[e_pic_type];
+ i4_frame_qp_q6 = ps_rate_control_api->ai4_max_qp_q6[e_pic_type];
+ }
+ if(i4_call_type == 1)
+ {
+ *pi4_modelQP =
+ ihevce_rc_get_scaled_hevce_qp_q6(i4_frame_qp_q6, ps_rate_control_api->u1_bit_depth);
+ }
+ {
+ /*This clip is added to prevent the change in qp close to scene cuts i.e even though the buffer
+ allows the qp to go low the bit alloc model has a problem of having the denominator considering
+ the previous subgop complexity and giving bits*/
+ WORD32 i4_clip_flag =
+ ((i4_call_type == 1) && (i4_is_model_valid == 1) &&
+ (ps_rate_control_api->i4_rc_pass == 2) &&
+ (i4_buf_based_max_bits > i4_est_tex_bits));
+ WORD32 i4_ebf = rc_get_ebf(ps_rate_control_api),
+ i4_max_ebf = i4_ebf + i4_buf_based_max_bits;
+ WORD32 i4_inter_frame_interval =
+ pic_type_get_inter_frame_interval(ps_rate_control_api->ps_pic_handling);
+ float f_buffer_fullness = (float)i4_ebf / i4_max_ebf;
+ i4_clip_flag = i4_clip_flag && (ps_rate_control_api->i4_scd_in_period_2_pass == 1);
+ i4_clip_flag = i4_clip_flag && (i4_ebf < (i4_max_ebf * 0.5f));
+ i4_clip_flag = i4_clip_flag && (ps_rate_control_api->e_rc_type == VBR_STREAMING);
+
+ i4_clip_flag = i4_clip_flag && (ps_rate_control_api->i4_frames_since_last_scd >
+ i4_inter_frame_interval);
+
+ if(i4_clip_flag == 1)
+ {
+ WORD32 i4_prev_frame_tot_est_bits = ba_get_prev_frame_tot_est_bits(
+ ps_rate_control_api->ps_bit_allocation, (WORD32)ps_rate_control_api->e_rc_type);
+ WORD32 i4_prev_frame_tot_bits = ba_get_prev_frame_tot_bits(
+ ps_rate_control_api->ps_bit_allocation, (WORD32)ps_rate_control_api->e_rc_type);
+ float i4_consumption_ratio =
+ (float)i4_prev_frame_tot_bits / i4_prev_frame_tot_est_bits;
+ if(i4_consumption_ratio > 0.7f && i4_consumption_ratio < 1.5f)
+ i4_clip_flag = 1;
+ else
+ i4_clip_flag = 0;
+ }
+ if(i4_clip_flag == 1)
+ {
+ trace_printf("Clipped");
+ trace_printf("Before %d", i4_frame_qp_q6);
+ if(af_sum_weigh[e_pic_type][0] > 1.0f)
+ {
+ /*Complex followed by simple*/
+ if(i4_frame_qp_q6 >
+ ps_rate_control_api->ai4_prev_frm_qp_q6[i4_scene_num][e_pic_type])
+ {
+ if(f_buffer_fullness < 0.3f)
+ {
+ i4_frame_qp_q6 =
+ ps_rate_control_api->ai4_prev_frm_qp_q6[i4_scene_num][e_pic_type];
+ }
+ else
+ {
+ if(i4_frame_qp_q6 >
+ (ps_rate_control_api->ai4_prev_frm_qp_q6[i4_scene_num][e_pic_type] *
+ 72 * 3))
+ i4_frame_qp_q6 =
+ (ps_rate_control_api
+ ->ai4_prev_frm_qp_q6[i4_scene_num][e_pic_type] *
+ 72 * 3);
+ }
+ }
+ }
+ if(af_sum_weigh[e_pic_type][0] < 1.0f)
+ {
+ /*Simple followed by complex*/
+ if(i4_frame_qp_q6 <
+ ps_rate_control_api->ai4_prev_frm_qp_q6[i4_scene_num][e_pic_type])
+ {
+ /*i4_frame_qp_q6 = ps_rate_control_api->ai4_prev_frm_qp_q6[e_pic_type];*/
+ }
+ }
+ trace_printf("After %d", i4_frame_qp_q6);
+ }
+ }
+
+ /*swing restriciton based on previous frame qp swing*/
+ {
+ if(i4_call_type == 1)
+ {
+ trace_printf(
+ "Before i4_frame_qp_q6 = %d min qp = %d max_qp = %d "
+ "bufclip %d",
+ i4_frame_qp_q6,
+ (i4_min_frame_qp_q6),
+ (i4_max_frame_qp_q6),
+ i4_buffer_based_max_qp_clip_flag);
+ }
+ if(i4_frame_qp_q6 < i4_min_frame_qp_q6)
+ i4_frame_qp_q6 = i4_min_frame_qp_q6;
+
+ /*removed low side clipping to avoid HRD compliance issue*/
+ if(i4_steady_state_texture_case)
+ {
+ if(i4_frame_qp_q6 > i4_max_frame_qp_q6)
+ {
+ if(i4_max_frame_qp_q6 > (i4_frame_qp_q6_based_max_vbv_bits))
+ {
+ i4_frame_qp_q6 = i4_max_frame_qp_q6;
+ }
+ else
+ {
+ i4_frame_qp_q6 = i4_frame_qp_q6_based_max_vbv_bits;
+ }
+ }
+ }
+ }
+ if(i4_call_type == 1)
+ {
+ trace_printf("After i4_frame_qp_q6 = %d", i4_frame_qp_q6);
+ }
+
+ /* SS - Following done to restore this after pause to resume detect - 0.25 is for syntax bits */
+ ps_rate_control_api->i4_orig_frm_est_bits = (i4_est_tex_bits * 5) >> 2;
+ ps_rate_control_api->i4_prev_frm_est_bits = (i4_est_tex_bits + i4_cur_est_header_bits);
+ pi4_cur_est_texture_bits[0] = i4_est_tex_bits;
+
+ /*For frames after SCD, when neither online or offline model can estimate the bits,
+ use the remaining bits in period as max bits*/
+ *pi4_is_model_valid = i4_is_model_valid;
+
+ if(0 == i4_is_model_valid)
+ {
+ *pi4_tot_bits_estimated =
+ i4_bit_alloc_est_tex_bits_for_invalid_model; //(i4_buf_based_max_bits * 0.80);
+ }
+ else
+ {
+ *pi4_tot_bits_estimated = i4_est_tex_bits + i4_cur_est_header_bits;
+ }
+
+ /*For B pics assigning a non-zero value to avoid asser */
+ if(*pi4_tot_bits_estimated == 0)
+ {
+ *pi4_tot_bits_estimated = 1;
+ }
+ ASSERT(*pi4_tot_bits_estimated != 0);
+ /*Underflow prevention*/
+ if((ps_rate_control_api->i4_underflow_warning == 1) &&
+ (i4_est_tex_bits < (i4_buf_based_max_bits - i4_cur_est_header_bits)) &&
+ (i4_call_type == 1))
+ {
+ //printf("\nUnderflow warning\n");
+ /*Decrement the hevc_qp by 1 for underflow prevention*/
+ i4_frame_qp_q6 = (WORD32)((float)i4_frame_qp_q6 / (float)1.125f);
+ ps_rate_control_api->i4_underflow_warning = 0;
+ if(i4_call_type == 1)
+ {
+ trace_printf("\nUnderflow warning");
+ }
+ }
+ }
+
+ /* Clip the frame qp within Min and Max QP */
+ if(i4_frame_qp_q6 < ps_rate_control_api->ai4_min_qp_q6[e_pic_type])
+ {
+ i4_frame_qp_q6 = ps_rate_control_api->ai4_min_qp_q6[e_pic_type];
+ }
+ else if(i4_frame_qp_q6 > ps_rate_control_api->ai4_max_qp_q6[e_pic_type])
+ {
+ i4_frame_qp_q6 = ps_rate_control_api->ai4_max_qp_q6[e_pic_type];
+ }
+ if(i4_call_type == 1)
+ {
+ *pi4_vbv_buf_max_bits = i4_buf_based_max_bits;
+ *pi4_est_tex_bits = i4_est_tex_bits;
+ *pi4_cur_est_header_bits = i4_cur_est_header_bits;
+ }
+ return (i4_frame_qp_q6);
+}
+
+/****************************************************************************
+Function Name : get_bits_for_final_qp
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+
+void get_bits_for_final_qp(
+ rate_control_api_t *ps_rate_control_api,
+ WORD32 *pi4_modelQP,
+ WORD32 *pi4_maxEbfQP,
+ LWORD64 *pi8_bits_from_finalQP,
+ WORD32 i4_clipQP,
+ WORD32 i4_frame_qp_q6,
+ WORD32 i4_cur_est_header_bits,
+ WORD32 i4_est_tex_bits,
+ WORD32 i4_buf_based_max_bits,
+ picture_type_e e_pic_type,
+ WORD32 i4_display_num)
+{
+ UWORD32 u4_estimated_sad;
+ u4_estimated_sad = get_est_sad(ps_rate_control_api->ps_est_sad, e_pic_type);
+ {
+ //printf("%d:\ti4_modelQP = %d\t i4_maxEbfQP = %d\t i4_clipQP = %d\t bits = %d\n",i4_display_num,*pi4_modelQP,*pi4_maxEbfQP,i4_clipQP,*pi8_bits_from_finalQP);
+ if((*pi4_modelQP != INVALID_QP) && (*pi4_maxEbfQP != INVALID_QP) &&
+ /*(*pi4_modelQP >= i4_clipQP) &&*/
+ (i4_clipQP > *pi4_maxEbfQP))
+ {
+ WORD32 i4_loop = 0, i4_error, i4_prev_error = 0x7FFFFFFF;
+ WORD32 i4_frame_qp_q6_temp;
+ WORD32 i4_buf_max_text_bits = i4_buf_based_max_bits - i4_cur_est_header_bits;
+ WORD32 i4_min_bits = i4_est_tex_bits, i4_max_bits = i4_buf_max_text_bits;
+ WORD32 i4_temp_bits = (i4_min_bits + i4_max_bits) >> 1;
+ if(*pi4_modelQP == i4_clipQP)
+ {
+ *pi8_bits_from_finalQP = i4_est_tex_bits + i4_cur_est_header_bits;
+ //printf("%d:\ti4_modelQP = %d\t i4_maxEbfQP = %d\t i4_clipQP = %d\t bits = %d\n",i4_display_num,*pi4_modelQP,*pi4_maxEbfQP,i4_clipQP,*pi8_bits_from_finalQP);
+ return;
+ }
+ //printf("%d:\ti4_modelQP = %d\t i4_maxEbfQP = %d\t i4_clipQP = %d\t bits = %d\n",i4_display_num,*pi4_modelQP,*pi4_maxEbfQP,i4_clipQP,*pi8_bits_from_finalQP);
+ /*binary search to find out bits corresponds to final QP(clipped)*/
+ while(i4_loop < 30)
+ {
+ i4_frame_qp_q6_temp = find_qp_for_target_bits(
+ ps_rate_control_api->aps_rd_model[e_pic_type],
+ i4_temp_bits,
+ u4_estimated_sad,
+ (ps_rate_control_api->ai4_max_qp_q6[e_pic_type]),
+ (ps_rate_control_api->ai4_min_qp_q6[e_pic_type]));
+ i4_error = abs(i4_frame_qp_q6_temp - i4_frame_qp_q6);
+ if(i4_error < i4_prev_error)
+ {
+ *pi8_bits_from_finalQP = i4_temp_bits + i4_cur_est_header_bits;
+ i4_prev_error = i4_error;
+ //printf("*pi8_bits_from_finalQP = %d\n",*pi8_bits_from_finalQP);
+ }
+ if(i4_frame_qp_q6_temp < i4_frame_qp_q6)
+ {
+ i4_max_bits = i4_temp_bits;
+ }
+ else
+ {
+ i4_min_bits = i4_temp_bits;
+ }
+ i4_temp_bits = (i4_min_bits + i4_max_bits) >> 1;
+ i4_loop++;
+ }
+ }
+ else
+ {
+ /* when est bits is less than 0 , max ebfQP is not updated, hence invalid
+ as estimated bits are less it will not cause any buffer trouble*/
+ if(((*pi4_maxEbfQP == INVALID_QP) && (*pi4_modelQP == i4_clipQP)))
+ {
+ *pi8_bits_from_finalQP = i4_est_tex_bits + i4_cur_est_header_bits;
+ }
+ else
+ {
+ *pi8_bits_from_finalQP = i4_buf_based_max_bits;
+ }
+ }
+ }
+ return;
+}
+/****************************************************************************
+*Function Name : get_buffer_status
+*Description : Gets the state of VBV buffer
+*Inputs : Rate control API , header and texture bits
+*Globals :
+*Processing :
+*Outputs : 0 = normal, 1 = underflow, 2= overflow
+*Returns : vbv_buf_status_e
+*Issues :
+*Revision History:
+*DD MM YYYY Author(s) Changes (Describe the changes made)
+*
+********************************************************************************/
+vbv_buf_status_e get_buffer_status(
+ rate_control_api_t *ps_rate_control_api,
+ WORD32 i4_total_frame_bits, /* Total frame bits consumed */
+ picture_type_e e_pic_type,
+ WORD32 *pi4_num_bits_to_prevent_vbv_underflow)
+{
+ vbv_buf_status_e e_buf_status = VBV_NORMAL;
+
+ /* Get the buffer status for the current total consumed bits and error bits*/
+ if(ps_rate_control_api->e_rc_type == VBR_STORAGE_DVD_COMP)
+ {
+ e_buf_status = get_vbv_buffer_status(
+ ps_rate_control_api->ps_vbr_storage_vbv,
+ i4_total_frame_bits,
+ pi4_num_bits_to_prevent_vbv_underflow);
+ }
+ else if(ps_rate_control_api->e_rc_type == VBR_STORAGE)
+ {
+ /* For VBR case since there is not underflow returning the max value */
+ pi4_num_bits_to_prevent_vbv_underflow[0] =
+ get_max_vbv_buf_size(ps_rate_control_api->ps_vbr_storage_vbv);
+ e_buf_status = VBV_NORMAL;
+ }
+ else if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+ {
+ e_buf_status = get_cbr_buffer_status(
+ ps_rate_control_api->ps_cbr_buffer,
+ i4_total_frame_bits,
+ pi4_num_bits_to_prevent_vbv_underflow,
+ e_pic_type,
+ ps_rate_control_api->e_rc_type);
+ }
+ else if(ps_rate_control_api->e_rc_type == VBR_STREAMING)
+ {
+ /* For VBR_streaming the error bits are computed according to peak bitrate*/
+ e_buf_status = get_cbr_buffer_status(
+ ps_rate_control_api->ps_cbr_buffer,
+ i4_total_frame_bits,
+ pi4_num_bits_to_prevent_vbv_underflow,
+ e_pic_type,
+ ps_rate_control_api->e_rc_type);
+ }
+ return e_buf_status;
+}
+/****************************************************************************
+ Function Name : update_pic_handling_state
+ Description : If the forward path and the backward path of rate control
+ Inputs :
+ Globals :
+ Processing :
+ Outputs :
+ Returns :
+ Issues :
+ Revision History:
+ DD MM YYYY Author(s) Changes (Describe the changes made)
+ KJN Original
+*****************************************************************************/
+void update_pic_handling_state(rate_control_api_t *ps_rate_control_api, picture_type_e e_pic_type)
+{
+ WORD32 i4_is_non_ref_pic = 0;
+ update_pic_handling(ps_rate_control_api->ps_pic_handling, e_pic_type, i4_is_non_ref_pic, 0);
+}
+LWORD64 get_gop_bits(rate_control_api_t *ps_rate_control_api)
+{
+ return (ba_get_gop_bits(ps_rate_control_api->ps_bit_allocation));
+}
+LWORD64 get_gop_sad(rate_control_api_t *ps_rate_control_api)
+{
+ return (ba_get_gop_sad(ps_rate_control_api->ps_bit_allocation));
+}
+WORD32 check_if_current_GOP_is_simple(rate_control_api_t *ps_rate_control_api)
+{
+ LWORD64 i8_buffer_play_bits =
+ ba_get_buffer_play_bits_for_cur_gop(ps_rate_control_api->ps_bit_allocation);
+ if(i8_buffer_play_bits)
+ {
+ if((i8_buffer_play_bits + get_cbr_ebf(ps_rate_control_api->ps_cbr_buffer)) >
+ (0.6 * get_cbr_buffer_size(ps_rate_control_api->ps_cbr_buffer)))
+ {
+ return 0;
+ }
+ else
+ {
+ return 1;
+ }
+ }
+ else
+ {
+ return 1;
+ }
+}
+LWORD64 rc_get_rbip_and_num_frames(rate_control_api_t *ps_rate_control_api, WORD32 *pi4_num_frames)
+{
+ return (ba_get_rbip_and_num_frames(
+ ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->ps_pic_handling,
+ pi4_num_frames));
+}
+/****************************************************************************
+Function Name : update_frame_level_info
+Description : Updates the frame level information into the rate control structure
+Inputs :
+Globals :
+Processing :
+Outputs :
+Returns :
+Issues :
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+KJN Original
+25 04 2008 Sushmita Added support to get different bits for model
+updation & buffer updation.May be used,in case encoder
+decides to follow strict VBV compliance and hence
+skips a picture after encoding it.Since it has
+statistics of the current picture also we update
+the model based on the discarded picture's stats
+and the buffer model on the basis of actual bits
+consumed by skipped picture
+*****************************************************************************/
+void update_frame_level_info(
+ rate_control_api_t *ps_rate_control_api,
+ picture_type_e e_pic_type,
+ LWORD64 *pi8_mb_type_sad, /* Frame level SAD for each type of MB[Intra/Inter] */
+ WORD32 i4_total_frame_bits, /* Total frame bits actually consumed */
+ WORD32 i4_model_updation_hdr_bits, /*header bits for model updation*/
+ WORD32 *
+ pi4_mb_type_tex_bits, /* Total texture bits consumed for each type of MB[Intra/Inter] used for model */
+ LWORD64 *pi8_tot_mb_type_qp_q6, /* Total qp of all MBs based on mb type */
+ WORD32 *pi4_tot_mb_in_type, /* total number of mbs in each mb type */
+ WORD32 i4_avg_activity, /* Average mb activity in frame */
+ UWORD8 u1_is_scd, /* Is a scene change detected at the current frame */
+ WORD32 i4_is_it_a_skip,
+ WORD32 i4_intra_frm_cost,
+ WORD32
+ i4_is_pic_handling_done, /* If picture handling is not done then update pic handling module. Special case for staggered endcoding */
+ WORD32 i4_suppress_bpic_update,
+ WORD32 i4_bits_to_be_stuffed,
+ WORD32 i4_is_pause_to_resume,
+ WORD32 i4_lap_window_comp,
+ WORD32 i4_is_end_of_period,
+ WORD32 i4_lap_based_comp_reset,
+ frame_info_t *ps_frame_info,
+ WORD32 i4_is_rc_model_needs_to_be_updated,
+ WORD8 i1_qp_offset,
+ WORD32 i4_scene_num,
+ WORD32 i4_num_frm_enc_in_scene,
+ WORD32 i4_est_text_bits_ctr_update_qp)
+{
+ UWORD8 u1_num_skips = 0;
+ WORD32 i;
+ /*picture_type_e e_orig_pic_type = e_pic_type;*/
+ LWORD64 i8_frame_sad = 0; /* Frame level SAD */
+ WORD32 i4_tot_texture_bits = 0; /* Total texture bits consumed */
+ WORD32 i4_tot_mbs = 0; /* Total number of mbs in frame */
+ LWORD64 i8_avg_qp = 0, i8_avg_qp_q6 = 0;
+ WORD32 i4_flag_rc_model_update = (i4_is_rc_model_needs_to_be_updated == 1);
+ WORD32 i4_gop_correction = 0, i4_new_correction = 0;
+
+ ps_frame_info->i4_flag_rc_model_update = i4_flag_rc_model_update;
+ ps_frame_info->i4_num_entries++;
+ trace_printf(
+ "update pic_type = %d tbc = %d hbc = %d\n",
+ e_pic_type,
+ (i4_total_frame_bits - i4_model_updation_hdr_bits),
+ i4_model_updation_hdr_bits);
+ /* NOTE KJN: SCD not supported in case of B Frames */
+ if(u1_is_scd && (e_pic_type != I_PIC && e_pic_type != P_PIC))
+ {
+ u1_is_scd = 0;
+ }
+
+ /*if both pause to resume and scene cut is signalled then ignore pause to resume flag*/
+ if(u1_is_scd && i4_is_pause_to_resume)
+ i4_is_pause_to_resume = 0;
+
+ if(!i4_is_it_a_skip && !i4_is_pic_handling_done)
+ {
+ /* Update the pic_handling struct */
+ /*: do not update pic handling even in case of non-reference B-PIC*/
+ update_pic_handling(
+ ps_rate_control_api->ps_pic_handling, e_pic_type, i4_suppress_bpic_update, u1_is_scd);
+ }
+ {
+ WORD32 *pi4_qp_array =
+ ps_rate_control_api
+ ->ai4_prev_frm_qp[(i4_scene_num + HALF_MAX_SCENE_NUM_RC) % MAX_SCENE_NUM_RC];
+ WORD32 *pi4_qp_array_q6 =
+ ps_rate_control_api
+ ->ai4_prev_frm_qp_q6[(i4_scene_num + HALF_MAX_SCENE_NUM_RC) % MAX_SCENE_NUM_RC];
+ WORD32 i4_i;
+ for(i4_i = 0; i4_i < MAX_PIC_TYPE; i4_i++)
+ {
+ pi4_qp_array[i4_i] = 0x7FFFFFFF;
+ pi4_qp_array_q6[i4_i] = 0x7FFFFFFF;
+ }
+ }
+
+ if(ps_rate_control_api->e_rc_type == CONST_QP)
+ {
+ if(!i4_is_it_a_skip)
+ {
+ /******************************************************************
+ Calculate the total values from the individual values
+ ******************************************************************/
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ i8_frame_sad += pi8_mb_type_sad[i];
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ i4_tot_texture_bits += pi4_mb_type_tex_bits[i];
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ i8_avg_qp += (pi8_tot_mb_type_qp_q6[i] >> 6);
+
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ i8_avg_qp_q6 += pi8_tot_mb_type_qp_q6[i];
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ i4_tot_mbs += pi4_tot_mb_in_type[i];
+ i8_avg_qp /= i4_tot_mbs; /* Calculate the average QP */
+ i8_avg_qp_q6 /= i4_tot_mbs;
+
+ if(ps_rate_control_api->u1_is_mb_level_rc_on)
+ {
+ /* The model needs to take into consideration the average activity of the
+ entire frame while estimating the QP. Thus the frame sad values are scaled by
+ the average activity before updating it into the model.*/
+ if(!i4_avg_activity)
+ i4_avg_activity = 1;
+ i4_intra_frm_cost /= i4_avg_activity;
+ i8_frame_sad /= i4_avg_activity;
+ }
+
+ ps_frame_info->i8_frame_num = get_num_frms_encoded(ps_rate_control_api->ps_cbr_buffer);
+ ps_frame_info->i4_num_entries++;
+
+ update_cbr_buffer(
+ ps_rate_control_api->ps_cbr_buffer,
+ (i4_total_frame_bits + i4_bits_to_be_stuffed),
+ e_pic_type);
+ }
+ }
+
+ if(ps_rate_control_api->e_rc_type != CONST_QP)
+ {
+ /* For improving CBR streams quality */
+ WORD32 i4_buffer_based_bit_error = 0;
+
+ if(!i4_is_it_a_skip)
+ {
+ WORD32 i4_new_period_flag;
+ /******************************************************************
+ Calculate the total values from the individual values
+ ******************************************************************/
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ i8_frame_sad += pi8_mb_type_sad[i];
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ i4_tot_texture_bits += pi4_mb_type_tex_bits[i];
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ i8_avg_qp += (pi8_tot_mb_type_qp_q6[i] >> 6);
+
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ i8_avg_qp_q6 += pi8_tot_mb_type_qp_q6[i];
+ for(i = 0; i < MAX_MB_TYPE; i++)
+ i4_tot_mbs += pi4_tot_mb_in_type[i];
+ i8_avg_qp /= i4_tot_mbs; /* Calculate the average QP */
+ i8_avg_qp_q6 /= i4_tot_mbs;
+
+ if(ps_rate_control_api->u1_is_mb_level_rc_on)
+ {
+ /* The model needs to take into consideration the average activity of the
+ entire frame while estimating the QP. Thus the frame sad values are scaled by
+ the average activity before updating it into the model.*/
+ if(!i4_avg_activity)
+ i4_avg_activity = 1;
+ i4_intra_frm_cost /= i4_avg_activity;
+ i8_frame_sad /= i4_avg_activity;
+ }
+
+ ps_frame_info->i8_frame_num = get_num_frms_encoded(ps_rate_control_api->ps_cbr_buffer);
+ ps_frame_info->i4_num_entries++;
+ /******************************************************************
+ Update the bit allocation module
+ NOTE: For bit allocation module, the pic_type should not be modified
+ to that of 'I', in case of a SCD.
+ ******************************************************************/
+ i4_new_period_flag = is_last_frame_in_gop(ps_rate_control_api->ps_pic_handling);
+
+ update_cur_frm_consumed_bits(
+ ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->ps_pic_handling,
+ ps_rate_control_api->ps_cbr_buffer,
+ i4_total_frame_bits,
+ /*((ps_rate_control_api->e_rc_type == CBR_NLDRC)?(i4_total_frame_bits + i4_bits_to_be_stuffed):i4_total_frame_bits)*/ //account for stuffing bits even when encoder does not stuff in case of CBR
+ i4_model_updation_hdr_bits,
+ e_pic_type,
+ u1_is_scd,
+ i4_is_end_of_period,
+ i4_lap_based_comp_reset,
+ i4_suppress_bpic_update,
+ i4_buffer_based_bit_error,
+ i4_bits_to_be_stuffed,
+ i4_lap_window_comp,
+ ps_rate_control_api->e_rc_type,
+ ps_rate_control_api->i4_num_gop,
+ i4_is_pause_to_resume,
+ i4_est_text_bits_ctr_update_qp,
+ &i4_gop_correction,
+ &i4_new_correction);
+ if(1 == i4_new_period_flag &&
+ ((ps_rate_control_api->e_rc_type == VBR_STORAGE) ||
+ (ps_rate_control_api->e_rc_type == VBR_STORAGE_DVD_COMP)))
+ {
+ check_and_update_bit_allocation(
+ ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->ps_pic_handling,
+ get_max_bits_inflow_per_frm_periode(ps_rate_control_api->ps_vbr_storage_vbv));
+ }
+ }
+
+ /******************************************************************
+ Update the buffer status
+ ******************************************************************/
+ /* This updation is done after overflow and underflow handling to
+ account for the actual bits dumped*/
+ if((ps_rate_control_api->e_rc_type == VBR_STORAGE) ||
+ (ps_rate_control_api->e_rc_type == VBR_STORAGE_DVD_COMP))
+ {
+ update_vbr_vbv(ps_rate_control_api->ps_vbr_storage_vbv, i4_total_frame_bits);
+ }
+ else if(
+ ps_rate_control_api->e_rc_type == CBR_NLDRC ||
+ ps_rate_control_api->e_rc_type == VBR_STREAMING)
+ {
+ update_cbr_buffer(
+ ps_rate_control_api->ps_cbr_buffer,
+ (i4_total_frame_bits + i4_bits_to_be_stuffed),
+ e_pic_type);
+ }
+
+ if(e_pic_type != B_PIC || e_pic_type != B1_PIC || e_pic_type != B2_PIC)
+ {
+ ps_rate_control_api->i4_prev_ref_is_scd = 0;
+ }
+
+ if(!i4_is_it_a_skip)
+ {
+ /******************************************************************
+ Handle the SCENE CHANGE DETECTED
+ 1) Make the picture type as I, so that updation happens as if it is
+ a I frame
+ 2) Reset model, SAD and flag to restart the estimation process
+ ******************************************************************/
+ if(u1_is_scd || ps_rate_control_api->u1_is_first_frm)
+ {
+ e_pic_type = I_PIC;
+
+ /* Reset the SAD estimation module */
+ reset_est_sad(ps_rate_control_api->ps_est_sad);
+
+ /*remember the previous reference as SCD. This is required to trigger quering model for B
+ * frames with delay one sub-gop*/
+ ps_rate_control_api->i4_prev_ref_is_scd = 1;
+
+ /* Reset the MB Rate control */
+ init_mb_level_rc(ps_rate_control_api->ps_mb_rate_control);
+
+ /* Adjust the average QP for the frame based on bits consumption */
+ /* Initialize the QP for each picture type according to the average QP of the SCD pic */
+ ps_rate_control_api->ai4_prev_frm_qp[i4_scene_num][I_PIC] = (WORD32)i8_avg_qp;
+
+ ps_rate_control_api->ai4_prev_frm_qp_q6[i4_scene_num][I_PIC] = (WORD32)i8_avg_qp_q6;
+
+ ps_rate_control_api->i4_frames_since_last_scd = 0;
+
+ ps_rate_control_api->f_p_to_i_comp_ratio = 1.0f;
+ /* Reset the number of header bits in a scene change */
+ //init_prev_header_bits(ps_rate_control_api->ps_bit_allocation, ps_rate_control_api->ps_pic_handling);
+ }
+ else if(i4_is_pause_to_resume)
+ {
+ reset_frm_rc_rd_model(ps_rate_control_api->aps_rd_model[e_pic_type]); //ELP_RC
+ }
+ if(i8_frame_sad && (!i4_suppress_bpic_update))
+ {
+ /********************************************************************
+ Update the model of the correponding picture type
+ NOTE: For SCD, we force the frame type from 'P' to that of a 'I'
+ *********************************************************************/
+ /* For very simple sequences no bits are consumed by texture. These frames
+ do not add any information to the model and so not added.
+ Update the model only when there is atleast 1 texture bit for every mb in a frame */
+ WORD32 i4_tot_texture_bits_added_to_model = i4_tot_texture_bits;
+ /*update the model only if bits consumed are zero. If this is zero qp for next frame has to be reduced until
+ * it provides some texture bits to update model*/
+
+ if(i4_tot_texture_bits_added_to_model > 0 && (i4_flag_rc_model_update == 1))
+ {
+ add_frame_to_rd_model(
+ ps_rate_control_api->aps_rd_model[e_pic_type],
+ i4_tot_texture_bits_added_to_model,
+ (WORD32)i8_avg_qp_q6,
+ i8_frame_sad,
+ u1_num_skips);
+
+ {
+ number_t temp =
+ get_linear_coefficient(ps_rate_control_api->aps_rd_model[e_pic_type]);
+ ps_frame_info->model_coeff_a_lin_wo_int.e = temp.e;
+ ps_frame_info->model_coeff_a_lin_wo_int.sm = temp.sm;
+ }
+ }
+
+ /******************************************************************
+ Update the sad estimation module
+ NOTE: For SCD, we force the frame type from 'P' to that of a 'I'
+ ******************************************************************/
+ update_actual_sad(
+ ps_rate_control_api->ps_est_sad, (UWORD32)i8_frame_sad, e_pic_type);
+ /*: This will update I pic sad with current pic intra SAD. Now for non I-PIC the intra sad is coming same as
+ *best sad. This will corrupt intra frame sad. So not updating this. I frame SAD is updated only at I pic */
+
+ /* Atleast one proper frame in added into the model. Until that
+ keep using the initial QP */
+
+ /*B frames immediatly encoded after scene cut may still belong to previous content, When B frames encoded after one P frame after SCD are guranteed to belong
+ * new scene, modeling these frames wrt previous B frames might give wrong results. To avoid this model for B frame is not queried unless it is guranteed that one B frame
+ * has been modeled with new content. So setting is_first_frm_coded for B frames with delay of one frame*/
+ /*In HEVC implementation it is guranteed to encode new scene after scene cut I pic*/
+ ps_rate_control_api->au1_is_first_frm_coded[e_pic_type] = 1;
+ }
+
+ if(i4_avg_activity)
+ {
+ /* Update the mb_level model */
+ mb_update_frame_level(ps_rate_control_api->ps_mb_rate_control, i4_avg_activity);
+ }
+ /* Update the variable which denotes that a frame has been encountered */
+ ps_rate_control_api->u1_is_first_frm = 0;
+ ps_rate_control_api->i4_frames_since_last_scd++;
+ }
+ }
+ return;
+}
+/* SGI & Enc Loop Parallelism related changes*/
+/****************************************************************************
+Function Name : update_frame_rc_get_frame_qp_info
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+void update_frame_rc_get_frame_qp_info(
+ rate_control_api_t *ps_rate_control_api,
+ picture_type_e e_pic_type,
+ WORD32 i4_is_scd,
+ WORD32 i4_is_pause_to_resume,
+ WORD32 i4_avg_frame_qp_q6,
+ WORD32 i4_suppress_bpic_update,
+ WORD32 i4_scene_num,
+ WORD32 i4_num_frm_enc_in_scene)
+{
+ WORD32 i4_avg_qp = 0, i4_avg_qp_q6 = 0;
+
+ i4_avg_qp = (i4_avg_frame_qp_q6 >> 6);
+ i4_avg_qp_q6 = i4_avg_frame_qp_q6;
+
+ if(i4_is_scd && (e_pic_type != I_PIC && e_pic_type != P_PIC))
+ {
+ i4_is_scd = 0;
+ }
+
+ if(e_pic_type == I_PIC)
+ {
+ ps_rate_control_api->i4_I_frame_qp_model = is_first_frame_coded(ps_rate_control_api);
+ }
+ if((i4_is_scd && i4_is_pause_to_resume)) //KISH
+ i4_is_pause_to_resume = 0;
+
+ if(i4_is_scd || ps_rate_control_api->u1_is_first_frm)
+ {
+ /* Save previous B-QP since some B-pics may follow detection of SCD */
+
+ e_pic_type = I_PIC;
+
+ /* Reset the SAD estimation module */
+ reset_est_sad(ps_rate_control_api->ps_est_sad);
+
+ /*remember the previous reference as SCD. This is required to trigger quering model for B
+ * frames with delay one sub-gop*/
+ ps_rate_control_api->i4_prev_ref_is_scd = 1;
+
+ /* Reset the MB Rate control */
+ init_mb_level_rc(ps_rate_control_api->ps_mb_rate_control);
+
+ /* Adjust the average QP for the frame based on bits consumption */
+ /* Initialize the QP for each picture type according to the average QP of the SCD pic */
+ ps_rate_control_api->ai4_prev_frm_qp[i4_scene_num][I_PIC] = i4_avg_qp;
+
+ ps_rate_control_api->ai4_prev_frm_qp_q6[i4_scene_num][I_PIC] = i4_avg_qp_q6;
+ }
+ else if(i4_is_pause_to_resume)
+ {
+ /*pause to resume is guranteed to be P_PIC*/
+ ASSERT(e_pic_type != I_PIC);
+
+ /* re-set all models eccept for I PIC model */
+ /*for(i=1;i<MAX_PIC_TYPE;i++)
+ {
+ reset_frm_rc_rd_model(ps_rate_control_api->aps_rd_model[i]);
+ ps_rate_control_api->au1_is_first_frm_coded[i] = 0;
+ }*/
+ /*resetting only current frame model instead of resetting all models*/
+ /*TO DO: i4_is_pause_to_resume is misnomer, as even non I scd are also handled in similar way*/
+ //reset_frm_rc_rd_model(ps_rate_control_api->aps_rd_model[e_pic_type]);
+ ps_rate_control_api->au1_is_first_frm_coded[e_pic_type] = 0;
+ ps_rate_control_api->i4_frames_since_last_scd = 0;
+
+ {
+ ps_rate_control_api->ai4_prev_frm_qp[i4_scene_num][e_pic_type] = i4_avg_qp;
+ ps_rate_control_api->ai4_prev_frm_qp_q6[i4_scene_num][e_pic_type] = i4_avg_qp_q6;
+ }
+ /*also reset previous I pic Qp since it uses I frame qp for qp determination when model is reset*/
+ if(e_pic_type == I_PIC)
+ {
+ ps_rate_control_api->ai4_prev_frm_qp[i4_scene_num][I_PIC] = i4_avg_qp;
+ ps_rate_control_api->ai4_prev_frm_qp_q6[i4_scene_num][I_PIC] = i4_avg_qp_q6;
+ }
+ else if(e_pic_type == P_PIC || e_pic_type == P1_PIC)
+ {
+ ps_rate_control_api->ai4_prev_frm_qp[i4_scene_num][I_PIC] =
+ ((LWORD64)i4_avg_qp * P_TO_I_RATIO) >> K_Q;
+ ps_rate_control_api->ai4_prev_frm_qp_q6[i4_scene_num][I_PIC] =
+ ((LWORD64)i4_avg_qp_q6 * P_TO_I_RATIO) >> K_Q;
+ }
+ else if(e_pic_type == B_PIC || e_pic_type == BB_PIC)
+ {
+ ps_rate_control_api->ai4_prev_frm_qp[i4_scene_num][I_PIC] =
+ ((LWORD64)i4_avg_qp * P_TO_I_RATIO * P_TO_I_RATIO) >> (K_Q + K_Q);
+ ps_rate_control_api->ai4_prev_frm_qp_q6[i4_scene_num][I_PIC] =
+ ((LWORD64)i4_avg_qp_q6 * P_TO_I_RATIO * P_TO_I_RATIO) >> (K_Q + K_Q);
+ }
+ else if(e_pic_type == B1_PIC || e_pic_type == B11_PIC)
+ {
+ ps_rate_control_api->ai4_prev_frm_qp[i4_scene_num][I_PIC] =
+ ((LWORD64)i4_avg_qp * P_TO_I_RATIO * P_TO_I_RATIO * P_TO_I_RATIO) >>
+ (K_Q + K_Q + K_Q);
+ ps_rate_control_api->ai4_prev_frm_qp_q6[i4_scene_num][I_PIC] =
+ ((LWORD64)i4_avg_qp_q6 * P_TO_I_RATIO * P_TO_I_RATIO * P_TO_I_RATIO) >>
+ (K_Q + K_Q + K_Q);
+ }
+ else if(e_pic_type == B2_PIC || e_pic_type == B22_PIC)
+ {
+ ps_rate_control_api->ai4_prev_frm_qp[i4_scene_num][I_PIC] =
+ ((LWORD64)i4_avg_qp * P_TO_I_RATIO * P_TO_I_RATIO * P_TO_I_RATIO * P_TO_I_RATIO) >>
+ (K_Q + K_Q + K_Q + K_Q);
+ ps_rate_control_api->ai4_prev_frm_qp_q6[i4_scene_num][I_PIC] =
+ ((LWORD64)i4_avg_qp_q6 * P_TO_I_RATIO * P_TO_I_RATIO * P_TO_I_RATIO *
+ P_TO_I_RATIO) >>
+ (K_Q + K_Q + K_Q + K_Q);
+ }
+ }
+ else
+ {
+#if 1 /* Prev QP updation has happened at the end of the get frame qp call itself */
+ /******************************************************************
+ Update the Qp used by the current frame
+ ******************************************************************/
+ if(!i4_suppress_bpic_update)
+ {
+ ps_rate_control_api->ai4_prev_frm_qp[i4_scene_num][e_pic_type] = i4_avg_qp;
+ ps_rate_control_api->ai4_prev_frm_qp_q6[i4_scene_num][e_pic_type] = i4_avg_qp_q6;
+ trace_printf("Prev frame qp q6 update %d pic type %d", i4_avg_qp_q6, e_pic_type);
+ }
+#endif
+ }
+
+ if(i4_num_frm_enc_in_scene == 1)
+ {
+ WORD32 i4_i = 0;
+ for(i4_i = 0; i4_i < MAX_PIC_TYPE; i4_i++)
+ {
+ if(ps_rate_control_api->ai4_prev_frm_qp[i4_scene_num][i4_i] == 0x7FFFFFFF)
+ {
+ ps_rate_control_api->ai4_prev_frm_qp[i4_scene_num][i4_i] = i4_avg_qp;
+ ps_rate_control_api->ai4_prev_frm_qp_q6[i4_scene_num][i4_i] = i4_avg_qp_q6;
+ }
+ }
+ }
+
+ if((!i4_suppress_bpic_update))
+ {
+ /*B frames immediatly encoded after scene cut may still belong to previous content, When B frames encoded after one P frame after SCD are guranteed to belong
+ * new scene, modeling these frames wrt previous B frames might give wrong results. To avoid this model for B frame is not queried unless it is guranteed that one B frame
+ * has been modeled with new content. So setting is_first_frm_coded for B frames with delay of one frame*/
+ /*In HEVC implementation it is guranteed to encode new scene after scene cut I pic*/
+ //ps_rate_control_api->au1_is_first_frm_coded[e_pic_type] = 1; //KISH_ELP
+ }
+
+ /* Update the variable which denotes that a frame has been encountered */
+ ps_rate_control_api->u1_is_first_frm = 0;
+
+ /* Store the prev encoded picture type for restricting Qp swing */
+ if((e_pic_type == I_PIC) || (e_pic_type == P_PIC))
+ {
+ ps_rate_control_api->prev_ref_pic_type = e_pic_type;
+ }
+
+ return;
+}
+
+/*update previous frame intra sad */
+/****************************************************************************
+Function Name : rc_update_prev_frame_intra_sad
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+void rc_update_prev_frame_intra_sad(
+ rate_control_api_t *ps_rate_control_api, WORD32 i4_intra_frame_sad)
+{
+ update_prev_frame_intra_sad(ps_rate_control_api->ps_est_sad, i4_intra_frame_sad);
+}
+/****************************************************************************
+Function Name : rc_get_prev_frame_intra_sad
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+WORD32 rc_get_prev_frame_intra_sad(rate_control_api_t *ps_rate_control_api)
+{
+ return get_prev_frame_intra_sad(ps_rate_control_api->ps_est_sad);
+}
+/*update previous frame sad */
+/****************************************************************************
+Function Name : rc_update_prev_frame_sad
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+void rc_update_prev_frame_sad(
+ rate_control_api_t *ps_rate_control_api, WORD32 i4_frame_sad, picture_type_e e_pic_type)
+{
+ update_prev_frame_sad(ps_rate_control_api->ps_est_sad, i4_frame_sad, e_pic_type);
+}
+/****************************************************************************
+Function Name : rc_get_prev_frame_sad
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+WORD32 rc_get_prev_frame_sad(rate_control_api_t *ps_rate_control_api, picture_type_e e_pic_type)
+{
+ return get_prev_frame_sad(ps_rate_control_api->ps_est_sad, e_pic_type);
+}
+
+/****************************************************************************
+Function Name : reset_rc_for_pause_to_play_transition
+Description : In this mode it resets RC only for P and B picture, since the
+ sequece has not changed but only the motion related changes would
+ take impact
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+void reset_rc_for_pause_to_play_transition(rate_control_api_t *ps_rate_control_api)
+{
+ WORD32 i;
+ /* re-set model only for P and B frame */
+ for(i = 1; i < MAX_PIC_TYPE; i++)
+ {
+ reset_frm_rc_rd_model(ps_rate_control_api->aps_rd_model[i]);
+ }
+ /* Reset flag */
+ for(i = 1; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rate_control_api->au1_is_first_frm_coded[i] = 0;
+ }
+}
+/****************************************************************************
+Function Name : get_rc_target_bits
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+WORD32 get_rc_target_bits(rate_control_api_t *ps_rate_control_api)
+{
+ return (ps_rate_control_api->i4_prev_frm_est_bits);
+}
+/****************************************************************************
+Function Name : get_orig_rc_target_bits
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+WORD32 get_orig_rc_target_bits(rate_control_api_t *ps_rate_control_api)
+{
+ return (ps_rate_control_api->i4_orig_frm_est_bits);
+}
+
+#if NON_STEADSTATE_CODE
+/******************************************************************************
+MB Level API functions
+******************************************************************************/
+/****************************************************************************
+Function Name : init_mb_rc_frame_level
+Description : Initialise the frame level details required for a mb level
+Inputs : u1_frame_qp - Frame Qp that is to be used to the current frame
+Globals :
+Processing :
+Outputs :
+Returns :
+Issues :
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+
+void init_mb_rc_frame_level(rate_control_api_t *ps_rate_control_api, UWORD8 u1_frame_qp)
+{
+ mb_init_frame_level(ps_rate_control_api->ps_mb_rate_control, u1_frame_qp);
+}
+#endif /* #if NON_STEADSTATE_CODE */
+
+/****************************************************************************
+Function Name : get_bits_to_stuff
+Description : Gets the bits to stuff to prevent Underflow of Encoder Buffer
+Inputs : Rate control API ctxt , total consumed bits
+Globals :
+Processing :
+Outputs : number of bits to stuff
+Returns : i4_bits_to_stuff
+Issues :
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+WORD32 get_bits_to_stuff(
+ rate_control_api_t *ps_rate_control_api, WORD32 i4_tot_consumed_bits, picture_type_e e_pic_type)
+{
+ WORD32 i4_bits_to_stuff;
+ /* Get the CBR bits to stuff*/
+ i4_bits_to_stuff =
+ get_cbr_bits_to_stuff(ps_rate_control_api->ps_cbr_buffer, i4_tot_consumed_bits, e_pic_type);
+ return i4_bits_to_stuff;
+}
+
+/****************************************************************************
+Function Name : get_prev_frm_est_bits
+Description : Returns previous frame estimated bits
+Inputs : Rate control API ctxt
+Globals :
+Processing :
+Outputs : previous frame estimated bits
+Returns : i4_prev_frm_est_bits
+Issues :
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+WORD32 get_prev_frm_est_bits(rate_control_api_t *ps_rate_control_api)
+{
+ return (ps_rate_control_api->i4_prev_frm_est_bits);
+}
+
+/****************************************************************************
+Function Name : change_frm_rate_for_bit_alloc
+Description : Does the necessary changes only in the bit_allocation module
+there is a change in frame rate
+Inputs : u4_frame_rate - new frame rate to be used
+Globals :
+Processing :
+Outputs :
+Returns :
+Issues :
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+void change_frm_rate_for_bit_alloc(rate_control_api_t *ps_rate_control_api, UWORD32 u4_frame_rate)
+{
+ if(ps_rate_control_api->e_rc_type != CONST_QP)
+ {
+ /* Bit Allocation Module: distribute the excess/deficit bits between the
+ old and the new frame rate to all the remaining frames */
+ change_remaining_bits_in_period(
+ ps_rate_control_api->ps_bit_allocation,
+ ba_get_bit_rate(ps_rate_control_api->ps_bit_allocation),
+ u4_frame_rate,
+ (WORD32 *)(ps_rate_control_api->au4_new_peak_bit_rate));
+ }
+}
+
+/****************************************************************************
+ * Function Name : rc_get_rem_bits_in_gop
+ * Description : API call to get remaining bits in GOP
+ * *****************************************************************************/
+WORD32 rc_get_rem_bits_in_period(rate_control_api_t *ps_rate_control_api)
+{
+ return (get_rem_bits_in_period(
+ ps_rate_control_api->ps_bit_allocation, ps_rate_control_api->ps_pic_handling));
+}
+
+/****************************************************************************
+ Function Name : flush_buf_frames
+Description : API call to flush the buffered up frames
+Inputs :
+Globals :
+Processing :
+Outputs :
+Returns :
+Issues :
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+void flush_buf_frames(rate_control_api_t *ps_rate_control_api)
+{
+ flush_frame_from_pic_stack(ps_rate_control_api->ps_pic_handling);
+}
+
+/****************************************************************************
+Function Name : rc_get_prev_header_bits
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+WORD32 rc_get_prev_header_bits(rate_control_api_t *ps_rate_control_api, WORD32 pic_type)
+{
+ return (get_prev_header_bits(ps_rate_control_api->ps_bit_allocation, pic_type));
+}
+/****************************************************************************
+Function Name : rc_get_prev_P_QP
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+WORD32 rc_get_prev_P_QP(rate_control_api_t *ps_rate_control_api, WORD32 i4_scene_num)
+{
+ WORD32 i4_prev_qp = ps_rate_control_api->ai4_prev_frm_qp[i4_scene_num][P_PIC];
+ i4_prev_qp =
+ (ps_rate_control_api->i4_P_to_I_ratio * i4_prev_qp + (1 << (P_TO_I_RATIO_Q_FACTOR - 1))) >>
+ P_TO_I_RATIO_Q_FACTOR;
+ return (i4_prev_qp);
+}
+/****************************************************************************
+Function Name : rc_put_sad
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+void rc_put_sad(
+ rate_control_api_t *ps_rate_control_api,
+ WORD32 i4_cur_intra_sad,
+ WORD32 i4_cur_sad,
+ WORD32 i4_cur_pic_type)
+{
+ sad_acc_put_sad(ps_rate_control_api->ps_sad_acc, i4_cur_intra_sad, i4_cur_sad, i4_cur_pic_type);
+}
+/****************************************************************************
+Function Name : rc_get_sad
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+void rc_get_sad(rate_control_api_t *ps_rate_control_api, WORD32 *pi4_sad)
+{
+ sad_acc_get_sad(ps_rate_control_api->ps_sad_acc, pi4_sad);
+}
+/****************************************************************************
+Function Name : rc_update_ppic_sad
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+WORD32 rc_update_ppic_sad(
+ rate_control_api_t *ps_rate_control_api, WORD32 i4_est_sad, WORD32 i4_prev_ppic_sad)
+{
+ return (update_ppic_sad(ps_rate_control_api->ps_est_sad, i4_est_sad, i4_prev_ppic_sad));
+}
+/****************************************************************************
+Function Name : change_avg_bit_rate
+Description : Whenever the average bit rate changes, the excess bits is
+between the changed bit rate and the old one is re-distributed
+in the bit allocation module
+Inputs : u4_average_bit_rate - new average bit rate to be used
+ : u4_peak_bit_rate - new peak bit rate to be used
+Globals :
+Processing :
+Outputs :
+Returns :
+Issues :
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+void change_avg_bit_rate(
+ rate_control_api_t *ps_rate_control_api, UWORD32 u4_average_bit_rate, UWORD32 u4_peak_bit_rate)
+{
+ int i;
+
+ if(ps_rate_control_api->e_rc_type != CONST_QP)
+ {
+ if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+ {
+ ps_rate_control_api->au4_new_peak_bit_rate[0] = u4_average_bit_rate;
+ ps_rate_control_api->au4_new_peak_bit_rate[1] = u4_average_bit_rate;
+ }
+ else
+ {
+ ps_rate_control_api->au4_new_peak_bit_rate[0] = u4_peak_bit_rate;
+ ps_rate_control_api->au4_new_peak_bit_rate[1] = u4_peak_bit_rate;
+ }
+ /* Bit Allocation Module: distribute the excess/deficit bits between the
+ old and the new frame rate to all the remaining frames */
+ change_remaining_bits_in_period(
+ ps_rate_control_api->ps_bit_allocation,
+ u4_average_bit_rate,
+ ba_get_frame_rate(ps_rate_control_api->ps_bit_allocation),
+ (WORD32 *)(ps_rate_control_api->au4_new_peak_bit_rate));
+ }
+ //if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+ {
+ UWORD32 u4_average_bit_rate_copy[MAX_NUM_DRAIN_RATES];
+ /*DYNAMIC_RC*/
+ //ps_rate_control_api->au4_new_peak_bit_rate[0]=u4_average_bit_rate;
+ //ps_rate_control_api->au4_new_peak_bit_rate[1]=u4_average_bit_rate;
+ for(i = 0; i < MAX_NUM_DRAIN_RATES; i++)
+ {
+ u4_average_bit_rate_copy[i] = u4_average_bit_rate;
+ }
+ change_cbr_vbv_bit_rate(
+ ps_rate_control_api->ps_cbr_buffer,
+ (WORD32 *)(u4_average_bit_rate_copy),
+ (WORD32)ps_rate_control_api->au4_new_peak_bit_rate[0]);
+ }
+
+ /* This is done only for average bitrate changing somewhere after the model stabilises.
+ Here it is assumed that user will not do this call after first few frames.
+ If we dont have this check, what would happen is since the model has not stabilised, also
+ bitrate has changed before the first frame, we dont restrict the qp. Qp can go to
+ very bad values after init qp since if swing is disabled
+
+ */
+ if(ps_rate_control_api->u1_is_first_frm == 0)
+ {
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ /*This also makes sure the qp swing restrictions wont be applied at boundary of bitrate change*/
+ ps_rate_control_api->au1_avg_bitrate_changed[i] = 1;
+ }
+ }
+}
+
+#if NON_STEADSTATE_CODE
+/******************************************************************************
+Control Level API functions
+Logic: The control call sets the state structure of the rate control api
+accordingly such that the next process call would implement the same.
+******************************************************************************/
+
+/******************************************************************************
+Function Name : change_inter_frm_int_call
+Description :
+Arguments :
+Return Values : void
+Revision History:
+Creation
+
+ Assumptions -
+
+ Checks -
+*****************************************************************************/
+void change_inter_frm_int_call(rate_control_api_t *ps_rate_control_api, WORD32 i4_inter_frm_int)
+{
+ pic_handling_register_new_inter_frm_interval(
+ ps_rate_control_api->ps_pic_handling, i4_inter_frm_int);
+}
+/******************************************************************************
+Function Name : change_intra_frm_int_call
+Description :
+Arguments :
+Return Values : void
+Revision History:
+Creation
+
+ Assumptions -
+
+ Checks -
+*****************************************************************************/
+void change_intra_frm_int_call(rate_control_api_t *ps_rate_control_api, WORD32 i4_intra_frm_int)
+{
+ pic_handling_register_new_int_frm_interval(
+ ps_rate_control_api->ps_pic_handling, i4_intra_frm_int);
+
+ if(ps_rate_control_api->e_rc_type == VBR_STREAMING)
+ {
+ change_vsp_ifi(&ps_rate_control_api->s_vbr_str_prms, i4_intra_frm_int);
+ }
+}
+
+/****************************************************************************
+Function Name : change_frame_rate
+Description : Does the necessary changes whenever there is a change in
+frame rate
+Inputs : u4_frame_rate - new frame rate to be used
+Globals :
+Processing :
+Outputs :
+Returns :
+Issues :
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+void change_frame_rate(
+ rate_control_api_t *ps_rate_control_api,
+ UWORD32 u4_frame_rate,
+ UWORD32 u4_src_ticks,
+ UWORD32 u4_tgt_ticks)
+{
+ if(ps_rate_control_api->e_rc_type != CONST_QP)
+ {
+ UWORD32 u4_frms_in_delay_prd =
+ ((u4_frame_rate * get_cbr_buffer_delay(ps_rate_control_api->ps_cbr_buffer)) / 1000000);
+ if((ps_rate_control_api->e_rc_type == VBR_STORAGE) ||
+ (ps_rate_control_api->e_rc_type == VBR_STORAGE_DVD_COMP))
+ {
+ change_vbr_vbv_frame_rate(ps_rate_control_api->ps_vbr_storage_vbv, u4_frame_rate);
+ }
+ else if(ps_rate_control_api->e_rc_type == CBR_NLDRC)
+ {
+ change_cbr_vbv_tgt_frame_rate(ps_rate_control_api->ps_cbr_buffer, u4_frame_rate);
+ }
+ else if(ps_rate_control_api->e_rc_type == VBR_STREAMING)
+ {
+ UWORD32 au4_num_pics_in_delay_prd[MAX_PIC_TYPE];
+ change_vsp_tgt_ticks(&ps_rate_control_api->s_vbr_str_prms, u4_tgt_ticks);
+ change_vsp_src_ticks(&ps_rate_control_api->s_vbr_str_prms, u4_src_ticks);
+ change_vsp_fidp(&ps_rate_control_api->s_vbr_str_prms, u4_frms_in_delay_prd);
+
+ change_cbr_vbv_tgt_frame_rate(ps_rate_control_api->ps_cbr_buffer, u4_frame_rate);
+ change_cbr_vbv_num_pics_in_delay_period(
+ ps_rate_control_api->ps_cbr_buffer, au4_num_pics_in_delay_prd);
+ }
+
+ /* Bit Allocation Module: distribute the excess/deficit bits between the
+ old and the new frame rate to all the remaining frames */
+ change_remaining_bits_in_period(
+ ps_rate_control_api->ps_bit_allocation,
+ ba_get_bit_rate(ps_rate_control_api->ps_bit_allocation),
+ u4_frame_rate,
+ (WORD32 *)(ps_rate_control_api->au4_new_peak_bit_rate));
+ }
+}
+/****************************************************************************
+Function Name : change_init_qp
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+void change_init_qp(
+ rate_control_api_t *ps_rate_control_api, WORD32 *pi4_init_qp, WORD32 i4_scene_num)
+{
+ WORD32 i;
+ /* Initialize the init_qp */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rate_control_api->ai4_prev_frm_qp[i4_scene_num][i] = pi4_init_qp[i];
+ }
+}
+
+/****************************************************************************
+Function Name : change_min_max_qp
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+void change_min_max_qp(rate_control_api_t *ps_rate_control_api, WORD32 *pi4_min_max_qp)
+{
+ WORD32 i;
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_rate_control_api->ai4_min_qp[i] = pi4_min_max_qp[(i << 1)];
+ ps_rate_control_api->ai4_max_qp[i] = pi4_min_max_qp[(i << 1) + 1];
+ }
+
+ change_init_qp_max_qp(ps_rate_control_api->ps_init_qp, pi4_min_max_qp);
+}
+/****************************************************************************
+Function Name : rc_get_frame_rate
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+/* Getter functions to get the current rate control parameters */
+UWORD32 rc_get_frame_rate(rate_control_api_t *ps_rate_control_api)
+{
+ return (ba_get_frame_rate(ps_rate_control_api->ps_bit_allocation));
+}
+/****************************************************************************
+Function Name : rc_get_bit_rate
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+UWORD32 rc_get_bit_rate(rate_control_api_t *ps_rate_control_api)
+{
+ return (ba_get_bit_rate(ps_rate_control_api->ps_bit_allocation));
+}
+/****************************************************************************
+Function Name : rc_get_peak_bit_rate
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+UWORD32 rc_get_peak_bit_rate(rate_control_api_t *ps_rate_control_api, WORD32 i4_index)
+{
+ return (ps_rate_control_api->au4_new_peak_bit_rate[i4_index]);
+}
+/****************************************************************************
+Function Name : rc_get_intra_frame_interval
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+UWORD32 rc_get_intra_frame_interval(rate_control_api_t *ps_rate_control_api)
+{
+ return (pic_type_get_intra_frame_interval(ps_rate_control_api->ps_pic_handling));
+}
+/****************************************************************************
+Function Name : rc_get_inter_frame_interval
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+UWORD32 rc_get_inter_frame_interval(rate_control_api_t *ps_rate_control_api)
+{
+ return (pic_type_get_inter_frame_interval(ps_rate_control_api->ps_pic_handling));
+}
+/****************************************************************************
+Function Name : rc_get_rc_type
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+rc_type_e rc_get_rc_type(rate_control_api_t *ps_rate_control_api)
+{
+ return (ps_rate_control_api->e_rc_type);
+}
+/****************************************************************************
+Function Name : rc_get_bits_per_frame
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+WORD32 rc_get_bits_per_frame(rate_control_api_t *ps_rate_control_api)
+{
+ WORD32 i4_bits_per_frm;
+
+ X_PROD_Y_DIV_Z(
+ ba_get_bit_rate(ps_rate_control_api->ps_bit_allocation),
+ (UWORD32)1000,
+ ba_get_frame_rate(ps_rate_control_api->ps_bit_allocation),
+ i4_bits_per_frm);
+
+ return (i4_bits_per_frm);
+}
+/****************************************************************************
+Function Name : rc_get_max_delay
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+UWORD32 rc_get_max_delay(rate_control_api_t *ps_rate_control_api)
+{
+ return (get_cbr_buffer_delay(ps_rate_control_api->ps_cbr_buffer));
+}
+/****************************************************************************
+Function Name : rc_get_seq_no
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+UWORD32 rc_get_seq_no(rate_control_api_t *ps_rate_control_api)
+{
+ return (pic_type_get_disp_order_no(ps_rate_control_api->ps_pic_handling));
+}
+/****************************************************************************
+Function Name : rc_get_rem_frames_in_gop
+Description :
+Inputs : ps_rate_control_api
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+UWORD32 rc_get_rem_frames_in_gop(rate_control_api_t *ps_rate_control_api)
+{
+ /* Get the rem_frms_in_gop & the frms_in_gop from the pic_type state struct */
+ return (pic_type_get_rem_frms_in_gop(ps_rate_control_api->ps_pic_handling));
+}
+
+/****************************************************************************
+ Function Name : flush_buf_frames
+Description : API call to flush the buffered up frames
+Inputs :
+Globals :
+Processing :
+Outputs :
+Returns :
+Issues :
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+void post_encode_frame_skip(rate_control_api_t *ps_rate_control_api, picture_type_e e_pic_type)
+{
+ skip_encoded_frame(ps_rate_control_api->ps_pic_handling, e_pic_type);
+}
+
+/****************************************************************************
+ Function Name : force_I_frame
+ Description : API call to force an I frame
+ *****************************************************************************/
+void force_I_frame(rate_control_api_t *ps_rate_control_api)
+{
+ set_force_I_frame_flag(ps_rate_control_api->ps_pic_handling);
+}
+
+/****************************************************************************
+ * Function Name : rc_get_vbv_buf_fullness
+ * Description : API call to get VBV buffer fullness
+ ******************************************************************************/
+WORD32 rc_get_vbv_buf_fullness(rate_control_api_t *ps_rate_control_api)
+{
+ return (get_cur_vbv_buf_size(ps_rate_control_api->ps_vbr_storage_vbv));
+}
+/****************************************************************************
+ * Function Name : rc_get_cur_peak_factor_2pass
+ * Description : API call to get current peak factor
+ ******************************************************************************/
+float rc_get_cur_peak_factor_2pass(rate_control_api_t *ps_rate_control_api)
+{
+ return (get_cur_peak_factor_2pass(ps_rate_control_api->ps_bit_allocation));
+}
+/****************************************************************************
+ * Function Name : rc_get_min_complexity_factor_2pass
+ * Description : API call to get minimm complexity factor
+ ******************************************************************************/
+float rc_get_min_complexity_factor_2pass(rate_control_api_t *ps_rate_control_api)
+{
+ return (get_cur_min_complexity_factor_2pass(ps_rate_control_api->ps_bit_allocation));
+}
+/****************************************************************************
+ * Function Name : rc_get_vbv_buf_size
+ * Description : API call to get VBV buffer size
+ ******************************************************************************/
+WORD32 rc_get_vbv_buf_size(rate_control_api_t *ps_rate_control_api)
+{
+ return (get_cbr_buffer_size(ps_rate_control_api->ps_cbr_buffer));
+}
+/****************************************************************************
+ * Function Name : rc_get_vbv_fulness_with_cur_bits
+ * Description : API call to get VBV buffer fullness with current bits
+ ******************************************************************************/
+WORD32 rc_get_vbv_fulness_with_cur_bits(rate_control_api_t *ps_rate_control_api, UWORD32 u4_bits)
+{
+ return (get_vbv_buf_fullness(ps_rate_control_api->ps_vbr_storage_vbv, u4_bits));
+}
+/****************************************************************************
+ * Function Name : rc_set_avg_mb_act
+ * Description :
+ ******************************************************************************/
+void rc_set_avg_mb_act(rate_control_api_t *ps_rate_control_api, WORD32 i4_avg_activity)
+{
+ mb_update_frame_level(ps_rate_control_api->ps_mb_rate_control, i4_avg_activity);
+ return;
+}
+/****************************************************************************
+ * Function Name : rc_init_set_ebf
+ * Description : API call to set EBF
+ ******************************************************************************/
+void rc_init_set_ebf(rate_control_api_t *ps_rate_control_api, WORD32 i32_init_ebf)
+{
+ set_cbr_ebf(ps_rate_control_api->ps_cbr_buffer, i32_init_ebf);
+}
+#endif /* #if NON_STEADSTATE_CODE */
+
+/****************************************************************************
+Function Name : rc_get_qp_scene_change_bits
+Description : HEVC specific function to get scene change qp at scene cut location
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+
+WORD32 rc_get_qp_scene_change_bits(
+ rate_control_handle ps_rate_control_api,
+ WORD32 i4_total_bits,
+ LWORD64 i8_satd_by_act_accum,
+ WORD32 i4_num_pixel,
+ void *offline_model_coeff,
+ float f_i_to_average_rest,
+ WORD32 i4_call_type)
+{
+ float f_trial_q_scale;
+ WORD32 i4_tex_bits = 0, i4_header_bits = 0;
+ WORD32 error = 0, min_error = 0x7FFFFFFF, i4_is_high_bitrate = 0;
+ double *model_coeff, min_error_q_scale = (double)127;
+ double min_scd_qscale, max_scd_q_scale;
+ WORD32 i4_QP, i4_max_Qp, i4_min_Qp, i4_qp_selection_flag = 0;
+ WORD32 i4_prev_best = -1;
+
+ /*The qp calculation here is based on offline generated stat for around 30 frames belonging to different scene
+ The I only mode of encode was done for the above sequence for qp range {8,51}. A quadratic and cubic curve was obtained
+ based on the stat geneated.
+ eq coeff*/
+ float coeff_a, coeff_b, coeff_c, coeff_d, X, tex_bpp;
+ float min_qp_qscale_multiplier =
+ 1; /*For fade-in fade-out case where scene starts with blank frame have higher min frame qp*/
+ //float head_per;
+ float normal_satd_act;
+ float bpp = (float)get_bits_per_frame(ps_rate_control_api->ps_bit_allocation) / i4_num_pixel;
+
+ if(i4_num_pixel > 5000000) /*UHD*/
+ {
+ if(bpp > 0.12) /*30mbp 2160 30p*/
+ i4_is_high_bitrate = 1;
+ else if(bpp > 0.06)
+ i4_is_high_bitrate = 2;
+ else if(bpp > 0.03)
+ i4_is_high_bitrate = 3;
+ }
+ else
+ {
+ if(bpp > 0.16) /*10mbps 1080 30p*/
+ i4_is_high_bitrate = 1;
+ else if(bpp > 0.08)
+ i4_is_high_bitrate = 2;
+ else if(bpp > 0.04)
+ i4_is_high_bitrate = 3;
+ }
+ /*Min qp and Max qp at scene cut is critical since offline models are not reliable always*/
+ /*During fade-in fade-out when LAP places I frame on blank pictures but the content slowly changes to complicated content, Due to low
+ spatial complxity of I pic a very low SCD qp will be allocated, qp swing restriction will not give enough frames to increase qp to high value
+ to encode such fast motion inter pictiures .Hence whenever temporal complexity is very high assume some least spatial complexity so that very low qp
+ is not chosen*/
+ if(f_i_to_average_rest < I_TO_REST_VVFAST &&
+ (i4_is_high_bitrate !=
+ 1)) /*The I_TO_AVERAGE RATIO generally comes very low, hence this wont be measure of extent on motion in inter pictures*/
+ {
+ WORD32 i4_min_num_pixel = i4_num_pixel;
+
+ if(i4_num_pixel > 5000000)
+ {
+ i4_min_num_pixel = i4_min_num_pixel / 2;
+ }
+
+ if(i8_satd_by_act_accum <
+ i4_num_pixel) /*In very fast motion case have min threshold for I frame, Assume atleast one unit per pixel sad*/
+ {
+ if(i4_is_high_bitrate == 2)
+ {
+ i8_satd_by_act_accum = (LWORD64)(i4_min_num_pixel / 2);
+ }
+ else if(i4_is_high_bitrate == 3)
+ {
+ i8_satd_by_act_accum = (LWORD64)(i4_min_num_pixel * 3.0f / 4.0f);
+ }
+ else
+ i8_satd_by_act_accum = (LWORD64)(i4_min_num_pixel);
+
+ min_qp_qscale_multiplier = (float)pow(
+ (float)1.125f,
+ (WORD32)6); //this will make min qp for simple frame with high moiton 24 instead of 18
+ }
+ }
+ min_scd_qscale = pow(2, (double)(ps_rate_control_api->u4_min_scd_hevc_qp - 4) / 6) *
+ min_qp_qscale_multiplier;
+ max_scd_q_scale = pow(2, (double)(SCD_MAX_HEVC_QP - 4) / 6);
+ i4_max_Qp = MAX_HEVC_QP;
+ i4_min_Qp = ps_rate_control_api->u4_min_scd_hevc_qp;
+ if((ps_rate_control_api->u1_bit_depth > 8) && (i4_call_type == 1))
+ {
+ i8_satd_by_act_accum = i8_satd_by_act_accum << (ps_rate_control_api->u1_bit_depth - 8);
+ i4_max_Qp = i4_max_Qp + (6 * (ps_rate_control_api->u1_bit_depth - 8));
+ i4_min_Qp = i4_min_Qp + (6 * (ps_rate_control_api->u1_bit_depth - 8));
+ max_scd_q_scale = max_scd_q_scale * (1 << (ps_rate_control_api->u1_bit_depth - 8));
+ }
+
+ normal_satd_act = (float)i8_satd_by_act_accum / i4_num_pixel;
+
+ {
+ /* Max satd/act at L0 was taken at qp 18 for
+ 480p - 4410520
+ 720p - 9664235
+ 1080p - 15735650
+ 4k - 50316472
+ A curve was generated using these points
+ */
+
+ float f_satd_by_Act_norm = GET_L0_SATD_BY_ACT_MAX_PER_PIXEL(i4_num_pixel);
+ float f_weigh_factor = 0.0f;
+ f_satd_by_Act_norm = f_satd_by_Act_norm * 0.75f;
+ f_weigh_factor = GET_WEIGH_FACTOR_FOR_MIN_SCD_Q_SCALE(normal_satd_act, f_satd_by_Act_norm);
+ CLIP(f_weigh_factor, 1.0f, 1.0f / MULT_FACTOR_SATD);
+ min_scd_qscale = min_scd_qscale * f_weigh_factor;
+ CLIP(min_scd_qscale, max_scd_q_scale, 1);
+ }
+
+ /*coeff value based on input resolution
+ 1920x1090 -> 207360,1280x720->921600,720x480->345600(unlike for I_REST_AVG_BIT_RATIO here 720x480 was considered as low resolution)
+ ultra high res = num_pixek > 5000000
+ high_res = num_pxel > 1500000
+ mid res = num_pixel > 600000
+ low_res = num_pixel < 600000
+ The fit is based on HEVC qp value between 18 and 48 inclusive
+ */
+ /*adding coeff for ultra HD resolution*/
+ /*
+ High quality bpp vs nor satd/act/qp
+ --------------------------------------
+ 480p y = -0.1823x3 + 0.5258x2 + 1.7707x - 0.0394
+ 720p y = -0.1458x3 + 0.4039x2 + 1.8817x - 0.0648
+ 1080p y = -0.4712x3 + 1.3818x2 + 1.2797x - 0.0262
+ 2160p y = -1.1234x3 + 2.6328x2 + 0.8817x - 0.0047
+
+
+ Medium speed
+ ------------
+ 480p y = -0.1567x3 + 0.4222x2 + 1.8899x - 0.0537
+ 720p y = -0.1417x3 + 0.3699x2 + 1.9611x - 0.0766
+ 1080p y = -0.4841x3 + 1.4123x2 + 1.2981x - 0.0321
+ 2160p y = -1.1989x3 + 2.7935x2 + 0.8648x - 0.0074
+
+ High speed
+ -------------
+ 480p y = -0.1611x3 + 0.4418x2 + 1.8754x - 0.0524
+ 720p y = -0.1455x3 + 0.3854x2 + 1.951x - 0.0753
+ 1080p y = -0.4908x3 + 1.4344x2 + 1.2848x - 0.031
+ 2160p y = -1.2037x3 + 2.8062x2 + 0.8551x - 0.0067
+ */
+ model_coeff = (double *)offline_model_coeff;
+ coeff_a = (float)model_coeff[0];
+ coeff_b = (float)model_coeff[1];
+ coeff_c = (float)model_coeff[2];
+ coeff_d = (float)model_coeff[3];
+ for(i4_QP = i4_min_Qp; i4_QP < i4_max_Qp; i4_QP++)
+ {
+ /*needs to use the array for qp to qscale */
+
+ f_trial_q_scale = (float)(pow(2.0, (i4_QP - 4.0) / 6.0));
+ /* curve fit for texture bits*/
+ X = (float)normal_satd_act / f_trial_q_scale;
+ tex_bpp = ((coeff_a * X * X * X) + (coeff_b * X * X) + (coeff_c * X) + coeff_d);
+ if(tex_bpp < (float)((1 << 30)) / i4_num_pixel)
+ i4_tex_bits = (tex_bpp * i4_num_pixel);
+ else
+ i4_tex_bits = (1 << 30);
+ i4_header_bits = 0;
+ if(i4_tex_bits > 0)
+ {
+ /*QP increase can't cause increase in bits*/
+ if(i4_prev_best != -1 && (i4_tex_bits > i4_prev_best))
+ {
+ min_error = 0x7FFFFFFF;
+ i4_qp_selection_flag = 0;
+ }
+ /*consider texture bits to get header bits using obtained header percentage. Using header bits on overall bits targetted might not be correct*/
+ error = i4_total_bits - (i4_tex_bits + i4_header_bits);
+ if(abs(error) < abs(min_error))
+ {
+ min_error = error;
+ min_error_q_scale = f_trial_q_scale;
+ i4_qp_selection_flag = 1;
+ i4_prev_best = i4_tex_bits;
+ }
+ }
+ }
+ if(!i4_qp_selection_flag)
+ {
+ min_error_q_scale = (WORD32)(min_scd_qscale + 0.5);
+ }
+ //if((ps_rate_control_api->u1_bit_depth > 8)&& (i4_call_type == 1))
+ // min_error_q_scale = min_error_q_scale / (1 << (ps_rate_control_api->u1_bit_depth - 8));
+
+ /*offline stat generation range considered is mpeg2qp 5 to 161 or hevc qp 18 to 48*/
+ CLIP(min_error_q_scale, (WORD32)(max_scd_q_scale + 0.5), (WORD32)(min_scd_qscale + .5));
+ return ((WORD32)(min_error_q_scale * (1 << QSCALE_Q_FAC_3)));
+}
+
+/****************************************************************************
+Function Name : rc_get_qp_for_scd_frame
+Description : Get qp for a scene cut frame
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+WORD32 rc_get_qp_for_scd_frame(
+ rate_control_api_t *ps_rate_control_api,
+ picture_type_e e_pic_type,
+ LWORD64 i8_satd_act_accum,
+ WORD32 i4_num_pels_in_frame,
+ WORD32 i4_est_I_pic_head_bits,
+ WORD32 i4_f_sim_lap_avg,
+ void *offline_model_coeff,
+ float i_to_avg_ratio,
+ WORD32 i4_true_scd,
+ float af_sum_weigh[MAX_PIC_TYPE][3],
+ frame_info_t *ps_frame_stat,
+ WORD32 i4_rc_2_pass,
+ WORD32 i4_is_not_an_I_pic,
+ WORD32 i4_ref_first_pass,
+ WORD32 i4_call_type,
+ WORD32 *pi4_cur_est_tot_bits,
+ WORD32 *pi4_tot_bits_estimated,
+ WORD32 i4_use_offline_model_2pass,
+ LWORD64 *pi8_i_tex_bits,
+ float *pf_i_qs,
+ WORD32 i4_best_br_id,
+ WORD32 *pi4_estimate_to_calc_frm_error)
+{
+ WORD32 i4_qs_q3, i4_buf_based_min_bits, i4_buf_based_max_bits, i4_cur_est_tot_bits,
+ i4_est_texture_bits, i4_get_error = 0;
+ float f_bits_ratio;
+
+ assign_complexity_coeffs(ps_rate_control_api->ps_bit_allocation, af_sum_weigh);
+
+ i4_cur_est_tot_bits = get_scene_change_tot_frm_bits(
+ ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->ps_pic_handling,
+ ps_rate_control_api->ps_cbr_buffer,
+ i4_num_pels_in_frame,
+ i4_f_sim_lap_avg,
+ i_to_avg_ratio,
+ i4_call_type,
+ i4_is_not_an_I_pic,
+ ps_rate_control_api->i4_is_infinite_gop);
+ if(i4_call_type == 1)
+ {
+ *pi4_estimate_to_calc_frm_error = i4_cur_est_tot_bits;
+ }
+
+ /* vbv buffer position based error correction to keep away encoder buffer overflow at layer 0 pictures*/
+ if(e_pic_type == I_PIC || e_pic_type == P_PIC || e_pic_type == P1_PIC)
+ {
+ WORD32 i4_cur_ebf = get_cbr_ebf(ps_rate_control_api->ps_cbr_buffer);
+ WORD32 i4_vbv_size = get_cbr_buffer_size(ps_rate_control_api->ps_cbr_buffer);
+ WORD32 i4_max_ebf = (WORD32)(i4_vbv_size * MAX_THRESHOLD_VBV_FRM_ERROR);
+ WORD32 i4_drain_rate = get_buf_max_drain_rate(ps_rate_control_api->ps_cbr_buffer);
+ WORD32 i4_total_bits_allocted = i4_cur_est_tot_bits;
+ WORD32 i4_total_bits_to_be_alloc;
+ WORD32 i4_expected_ebf = (i4_cur_ebf + i4_total_bits_allocted - i4_drain_rate);
+ /*if expected ebf is greater than max threashold, correct the allocation such that it never cross max
+ but if it less than drain rate, atleast give drainrate bits*/
+ if(i4_expected_ebf > i4_max_ebf)
+ {
+ i4_total_bits_to_be_alloc =
+ MAX(i4_drain_rate, (i4_total_bits_allocted - (i4_expected_ebf - i4_max_ebf)));
+ i4_cur_est_tot_bits = i4_total_bits_to_be_alloc;
+ }
+ }
+ if(i4_call_type == 1)
+ {
+ i4_get_error = rc_get_estimate_bit_error(ps_rate_control_api);
+ }
+ if(i4_est_I_pic_head_bits != -1)
+ /*get constraints from buffer*/
+ {
+ get_min_max_bits_based_on_buffer(
+ ps_rate_control_api,
+ e_pic_type,
+ &i4_buf_based_min_bits,
+ &i4_buf_based_max_bits,
+ i4_get_error);
+ if(i4_cur_est_tot_bits > i4_buf_based_max_bits)
+ i4_cur_est_tot_bits = i4_buf_based_max_bits;
+ if((i4_cur_est_tot_bits < i4_buf_based_min_bits) && (i_to_avg_ratio > 8.0))
+ i4_cur_est_tot_bits = i4_buf_based_min_bits;
+ }
+ if(i4_est_I_pic_head_bits <
+ 0) //indicates header bits data is not available. Assume default ratio
+ {
+ i4_est_texture_bits = (i4_cur_est_tot_bits * DEFAULT_TEX_PERCENTAGE_Q5) >> 5;
+ i4_est_I_pic_head_bits = i4_cur_est_tot_bits - i4_est_texture_bits;
+ }
+ if((i4_cur_est_tot_bits - i4_est_I_pic_head_bits) < 0)
+ i4_cur_est_tot_bits = i4_est_I_pic_head_bits;
+
+ *pi4_tot_bits_estimated = i4_cur_est_tot_bits;
+
+ if(i4_true_scd)
+ {
+ /*texture bits should be atleast 25% of header bits*/
+ if(i4_cur_est_tot_bits < (1.25 * i4_est_I_pic_head_bits))
+ i4_cur_est_tot_bits = (WORD32)(1.25 * i4_est_I_pic_head_bits);
+
+ ps_rate_control_api->i4_scd_I_frame_estimated_tot_bits = i4_cur_est_tot_bits;
+ }
+
+ /* Get qp for scene cut frame based on offline generated data*/
+
+ i4_qs_q3 = rc_get_qp_scene_change_bits(
+ ps_rate_control_api,
+ (i4_cur_est_tot_bits - i4_est_I_pic_head_bits),
+ i8_satd_act_accum,
+ i4_num_pels_in_frame,
+ offline_model_coeff,
+ i_to_avg_ratio,
+ i4_call_type);
+
+ if(i4_call_type)
+ trace_printf(
+ "i4_qp %d, i8_satd_act_accum %I64d,i_to_avg_ratio %f, "
+ "i4_est_I_pic_head_bits %d i4_cur_est_tot_bits %d\n",
+ i4_qp,
+ i8_satd_act_accum,
+ i_to_avg_ratio,
+ i4_est_I_pic_head_bits,
+ i4_cur_est_tot_bits);
+
+ *pi4_cur_est_tot_bits = i4_cur_est_tot_bits;
+
+ return (i4_qs_q3);
+}
+
+/****************************************************************************
+Function Name : rc_set_num_scd_in_lap_window
+Description :
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+void rc_set_num_scd_in_lap_window(
+ rate_control_api_t *ps_rate_control_api,
+ WORD32 i4_num_scd_in_lap_window,
+ WORD32 i4_num_frames_b4_scd)
+{
+ bit_allocation_set_num_scd_lap_window(
+ ps_rate_control_api->ps_bit_allocation, i4_num_scd_in_lap_window, i4_num_frames_b4_scd);
+}
+/****************************************************************************
+Function Name : rc_set_next_sc_i_in_rc_look_ahead
+Description :
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+void rc_set_next_sc_i_in_rc_look_ahead(
+ rate_control_api_t *ps_rate_control_api, WORD32 i4_next_sc_i_in_rc_look_ahead)
+{
+ bit_allocation_set_sc_i_in_rc_look_ahead(
+ ps_rate_control_api->ps_bit_allocation, i4_next_sc_i_in_rc_look_ahead);
+}
+
+/****************************************************************************
+ * Function Name : rc_update_mismatch_error
+ * Description : API call to update remaining bits in period based on error
+ * between rdopt bits estimate and actual bits produced in entorpy
+ * *****************************************************************************/
+void rc_update_mismatch_error(rate_control_api_t *ps_rate_control_api, WORD32 i4_error_bits)
+{
+ bit_allocation_update_gop_level_bit_error(
+ ps_rate_control_api->ps_bit_allocation, i4_error_bits);
+ /*Also alter the encoder buffer fullness based on the error*/
+ /*error = rdopt - entropy hence subtract form current buffer fullness*/
+ update_cbr_buf_mismatch_bit(ps_rate_control_api->ps_cbr_buffer, i4_error_bits);
+}
+/****************************************************************************
+Function Name : rc_set_estimate_status
+Description :
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+WORD32 rc_set_estimate_status(
+ rate_control_api_t *ps_rate_control_api,
+ WORD32 i4_tex_bits,
+ WORD32 i4_hdr_bits,
+ WORD32 i4_est_text_bits_ctr_get_qp)
+{
+ update_estimate_status(
+ ps_rate_control_api->ps_bit_allocation,
+ i4_tex_bits,
+ i4_hdr_bits,
+ i4_est_text_bits_ctr_get_qp);
+
+ return i4_tex_bits;
+}
+/****************************************************************************
+Function Name : rc_get_bpp_based_scene_cut_qp
+Description : bpp based qp for a scene cut frame
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+WORD32 rc_get_bpp_based_scene_cut_qp(
+ rate_control_api_t *ps_rate_control_api,
+ picture_type_e e_pic_type,
+ WORD32 i4_num_pels_in_frame,
+ WORD32 i4_f_sim_lap,
+ float af_sum_weigh[MAX_PIC_TYPE][3],
+ WORD32 i4_call_type)
+{
+ WORD32 i4_cur_est_texture_bits, i4_cur_est_header_bits, i4_qp, i4_tot_bits,
+ i4_buf_based_min_bits, i4_buf_based_max_bits;
+
+ /* Reset the number of header bits in a scene change */
+ //init_prev_header_bits(ps_rate_control_api->ps_bit_allocation, ps_rate_control_api->ps_pic_handling);
+
+ /* Get the estimated header bits for the current encoded frame */
+
+ assign_complexity_coeffs(ps_rate_control_api->ps_bit_allocation, af_sum_weigh);
+ i4_cur_est_header_bits =
+ get_cur_frm_est_header_bits(ps_rate_control_api->ps_bit_allocation, e_pic_type);
+
+ /*get estimate of total bits that can be allocated to I frame based on offline generated data*/
+ i4_tot_bits = get_scene_change_tot_frm_bits(
+ ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->ps_pic_handling,
+ ps_rate_control_api->ps_cbr_buffer,
+ i4_num_pels_in_frame,
+ i4_f_sim_lap,
+ (float)8.00,
+ 0,
+ 0,
+ ps_rate_control_api->i4_is_infinite_gop);
+
+ /* Getting the min and max texture bits based on buffer fullness and constraining the
+ bit allocation based on this */
+ if(i4_call_type == 1)
+ {
+ get_min_max_bits_based_on_buffer(
+ ps_rate_control_api, e_pic_type, &i4_buf_based_min_bits, &i4_buf_based_max_bits, 0);
+ if(i4_tot_bits > i4_buf_based_max_bits)
+ i4_tot_bits = i4_buf_based_max_bits;
+ if(i4_tot_bits < i4_buf_based_min_bits)
+ i4_tot_bits = i4_buf_based_min_bits;
+ }
+ /*Assume 30 percent header bits*/
+ i4_cur_est_texture_bits = (i4_tot_bits * DEFAULT_TEX_PERCENTAGE_Q5) >> 5;
+
+ /* Get the texture bits assigned to the current frame */
+ i4_cur_est_header_bits = i4_tot_bits - i4_cur_est_texture_bits;
+
+ if(i4_cur_est_texture_bits < 0)
+ i4_cur_est_texture_bits = 0;
+
+ /* Get the qp for the remaining bits allocated for that frame based on buffer status */
+ i4_qp = get_init_qp_using_pels_bits_per_frame(
+ ps_rate_control_api->ps_init_qp, I_PIC, i4_cur_est_texture_bits, i4_num_pels_in_frame);
+ /* Make sure the qp is with in range */
+ if(i4_qp < ps_rate_control_api->ai4_min_qp[e_pic_type])
+ {
+ i4_qp = ps_rate_control_api->ai4_min_qp[e_pic_type];
+ }
+ else if(i4_qp > ps_rate_control_api->ai4_max_qp[e_pic_type])
+ {
+ i4_qp = ps_rate_control_api->ai4_max_qp[e_pic_type];
+ }
+
+ return (i4_qp);
+}
+/****************************************************************************
+Function Name : rc_reset_pic_model
+Description :
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+void rc_reset_pic_model(rate_control_api_t *ps_rate_control_api, picture_type_e pic_type)
+{
+ reset_frm_rc_rd_model(ps_rate_control_api->aps_rd_model[pic_type]);
+}
+/****************************************************************************
+Function Name : rc_reset_first_frame_coded_flag
+Description :
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+void rc_reset_first_frame_coded_flag(
+ rate_control_api_t *ps_rate_control_api, picture_type_e pic_type)
+{
+ ps_rate_control_api->au1_is_first_frm_coded[pic_type] = 0;
+}
+/****************************************************************************
+Function Name : rc_get_scene_change_est_header_bits
+Description :
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+WORD32 rc_get_scene_change_est_header_bits(
+ rate_control_api_t *ps_rate_control_api,
+ WORD32 i4_num_pixels,
+ WORD32 i4_fsim_lap,
+ float af_sum_weigh[MAX_PIC_TYPE][3],
+ float i_to_avg_ratio)
+{
+ WORD32 i4_est_tot_bits;
+
+ assign_complexity_coeffs(ps_rate_control_api->ps_bit_allocation, af_sum_weigh);
+
+ i4_est_tot_bits = get_scene_change_tot_frm_bits(
+ ps_rate_control_api->ps_bit_allocation,
+ ps_rate_control_api->ps_pic_handling,
+ ps_rate_control_api->ps_cbr_buffer,
+ i4_num_pixels,
+ i4_fsim_lap,
+ i_to_avg_ratio,
+ 0,
+ 0,
+ ps_rate_control_api->i4_is_infinite_gop);
+ /*return header bits based on default percentage*/
+ return (i4_est_tot_bits - ((i4_est_tot_bits * DEFAULT_TEX_PERCENTAGE_Q5) >> 5));
+}
+/****************************************************************************
+Function Name : rc_put_temp_comp_lap
+Description :
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+void rc_put_temp_comp_lap(
+ rate_control_api_t *ps_rate_control_api,
+ WORD32 i4_lap_fsim,
+ LWORD64 i8_per_pixel_frm_hme_sad_q10,
+ picture_type_e e_pic_type)
+{
+ ps_rate_control_api->i4_lap_f_sim = i4_lap_fsim;
+ if(e_pic_type == P_PIC)
+ {
+ ps_rate_control_api->i8_per_pixel_p_frm_hme_sad_q10 = i8_per_pixel_frm_hme_sad_q10;
+ }
+}
+/****************************************************************************
+Function Name : rc_get_pic_distribution
+Description :
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+void rc_get_pic_distribution(rate_control_api_t *ps_rate_control_api, WORD32 *ai4_pic_type)
+{
+ pic_type_get_frms_in_gop(ps_rate_control_api->ps_pic_handling, ai4_pic_type);
+}
+/****************************************************************************
+Function Name : rc_get_actual_pic_distribution
+Description :
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+void rc_get_actual_pic_distribution(rate_control_api_t *ps_rate_control_api, WORD32 *ai4_pic_type)
+{
+ pic_type_get_actual_frms_in_gop(ps_rate_control_api->ps_pic_handling, ai4_pic_type);
+}
+/****************************************************************************
+Function Name : rc_reset_Kp_Kb
+Description :
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+void rc_reset_Kp_Kb(
+ rate_control_api_t *ps_rate_control_api,
+ float f_i_to_avg_rest,
+ WORD32 i4_num_active_pic_type,
+ float f_curr_hme_sad_per_pixel,
+ WORD32 *pi4_complexity_bin,
+ WORD32 i4_rc_pass)
+{
+ reset_Kp_Kb(
+ ps_rate_control_api->ps_bit_allocation,
+ f_i_to_avg_rest,
+ i4_num_active_pic_type,
+ f_curr_hme_sad_per_pixel,
+ ps_rate_control_api->f_max_hme_sad_per_pixel,
+ pi4_complexity_bin,
+ i4_rc_pass);
+}
+
+/****************************************************************************
+Function Name : rc_reset_Kp_Kb
+Description : Get Kp and Kb values for offset at scene cut
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+
+WORD32 rc_get_kp_kb(rate_control_api_t *ps_rate_control_api, picture_type_e e_pic_type)
+{
+ return get_Kp_Kb(ps_rate_control_api->ps_bit_allocation, e_pic_type);
+}
+/****************************************************************************
+Function Name : rc_get_ebf
+Description :
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+WORD32 rc_get_ebf(rate_control_api_t *ps_rate_control_api)
+{
+ return (get_cbr_ebf(ps_rate_control_api->ps_cbr_buffer));
+}
+
+/****************************************************************************
+Function Name : rc_get_offline_normalized_complexity
+Description : The complexities of L1 are normalized with the highest offline
+ global complexity
+Inputs :
+Globals :
+Processing :
+Outputs :
+Returns :
+Issues :
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+float rc_get_offline_normalized_complexity(
+ WORD32 i4_intra_period, WORD32 i4_luma_pels, float f_per_pixel_complexity, WORD32 i4_pass_number)
+{
+ {
+ if((i4_luma_pels) > 1500000)
+ {
+ if(i4_intra_period == 1)
+ {
+ f_per_pixel_complexity /= (float)3.69;
+ }
+ else
+ {
+ /*Full HD and above: Based on running few content, exact data needs to be plugged in*/
+ f_per_pixel_complexity /= (float)2.25;
+ }
+ }
+ else if((i4_luma_pels) > 700000)
+ {
+ if(i4_intra_period == 1)
+ {
+ f_per_pixel_complexity /= (float)4.28;
+ }
+ else
+ {
+ f_per_pixel_complexity /=
+ (float)2.6109; //the max complexity observed for 720p content of netflix_fountain
+ }
+ }
+ else
+ {
+ if(i4_intra_period == 1)
+ f_per_pixel_complexity /= (float)4.91;
+ else
+ f_per_pixel_complexity /=
+ (float)3; //the max complexity observed for 720p content of netflix_fountain
+ }
+ }
+ if(f_per_pixel_complexity > 1.0)
+ f_per_pixel_complexity = 1;
+ return f_per_pixel_complexity;
+}
+
+/****************************************************************************
+Function Name : rc_bit_alloc_detect_ebf_stuff_scenario
+Description : To estimate whether there will be a case of underflow based on
+ estimated bit consumption and drain rate if there is probability
+ of underflow then we will lower the HEVC qp's by 1 based
+ on the warning flag.
+Inputs :
+Globals :
+Processing :
+Outputs :
+Returns :
+Issues :
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+
+void rc_bit_alloc_detect_ebf_stuff_scenario(
+ rate_control_api_t *ps_rate_control_api,
+ WORD32 i4_num_frm_bef_scd_lap2,
+ LWORD64 i8_total_bits_est_consu_lap2,
+ WORD32 i4_max_inter_frm_int)
+{
+ WORD32 i4_peak_drain_rate;
+ LWORD64 i8_ebf, i8_estimate_ebf_at_end;
+ i4_peak_drain_rate = get_buf_max_drain_rate(ps_rate_control_api->ps_cbr_buffer);
+ i8_ebf = rc_get_ebf(ps_rate_control_api);
+ i8_estimate_ebf_at_end =
+ i8_ebf - (i4_num_frm_bef_scd_lap2 * i4_peak_drain_rate) + i8_total_bits_est_consu_lap2;
+
+ ps_rate_control_api->i4_underflow_warning = 0;
+
+ if(i8_estimate_ebf_at_end < (i4_max_inter_frm_int * i4_peak_drain_rate))
+ {
+ /*If underflow is imminent give a flag*/
+ ps_rate_control_api->i4_underflow_warning = 1;
+ }
+}
+
+/****************************************************************************
+Function Name : bit_alloc_get_estimated_bits_for_pic
+Description :
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+
+WORD32 bit_alloc_get_estimated_bits_for_pic(
+ rate_control_api_t *ps_rate_contro_api,
+ WORD32 i4_cur_frm_est_cl_sad,
+ WORD32 i4_prev_frm_cl_sad,
+ picture_type_e e_pic_type)
+{
+ WORD32 i4_prev_frame_bits, i4_curnt_frame_est_bits, i4_prev_frame_header_bits;
+ get_prev_frame_total_header_bits(
+ ps_rate_contro_api->ps_bit_allocation,
+ &i4_prev_frame_bits,
+ &i4_prev_frame_header_bits,
+ e_pic_type);
+
+ i4_curnt_frame_est_bits = (WORD32)(
+ ((float)(i4_prev_frame_bits - i4_prev_frame_header_bits) * (float)i4_cur_frm_est_cl_sad /
+ (float)i4_prev_frm_cl_sad) +
+ i4_prev_frame_header_bits);
+ return (i4_curnt_frame_est_bits);
+}
+
+/****************************************************************************
+Function Name : rc_get_max_hme_sad_per_pixel
+Description : At init time based on parameters we pick the max hme sad per pixel.
+Inputs :
+Globals :
+Processing :
+Outputs :
+Returns :
+Issues :
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+
+*****************************************************************************/
+void rc_get_max_hme_sad_per_pixel(rate_control_api_t *ps_rate_control_api, WORD32 i4_total_pixels)
+{
+ WORD32 i, i4_error = 0x7FFFFFFF, i4_temp_error, i4_res_index = 0, i4_br_index = 0;
+ WORD32 i4_num_temporal_layers;
+ /*Max hme sad per pixel based on resolutions, num. of temporal layers (0-3) and also bpp-> whether low bitrate or high bitrate*/
+ float af_offline_hme_sad_per_pixel_480p[4][2] = {
+ { 2.94f, 2.63f }, { 2.96f, 2.44f }, { 2.72f, 1.94f }, { 2.70f, 2.04f }
+ };
+ float af_offline_hme_sad_per_pixel_720p[4][2] = {
+ { 3.37f, 2.97f }, { 3.35f, 2.77f }, { 3.18f, 2.40f }, { 2.94f, 1.83f }
+ };
+ float af_offline_hme_sad_per_pixel_1080p[4][2] = {
+ { 3.24f, 2.78f }, { 3.17f, 2.46f }, { 2.91f, 1.98f }, { 2.75f, 1.65f }
+ };
+ float af_offline_hme_sad_per_pixel_2160p[4][2] = {
+ { 2.56f, 2.11f }, { 2.47f, 1.92f }, { 2.19f, 1.46f }, { 2.00f, 1.21f }
+ };
+
+ /*Low BR or HBR is decided by comparing the bpp values as below*/
+ float af_offline_bpp[4][2] = {
+ { 0.30f, 0.09f }, { 0.25f, 0.06f }, { 0.16f, 0.04f }, { 0.12f, 0.02f }
+ };
+
+ /*Number of pixels in the picture for picking the closest resolution*/
+ WORD32 ai4_pixels_res[4] = { 307200, 921600, 2073600, 8294400 };
+
+ float f_bpp =
+ (float)get_bits_per_frame(ps_rate_control_api->ps_bit_allocation) / i4_total_pixels;
+ float f_max_hme_sad_per_pixel;
+
+ i4_num_temporal_layers = ps_rate_control_api->i4_num_active_pic_type - 2;
+
+ CLIP(i4_num_temporal_layers, 3, 0);
+
+ /*Pick the closest resolution based on error*/
+ for(i = 0; i < 4; i++)
+ {
+ i4_temp_error = abs(i4_total_pixels - ai4_pixels_res[i]);
+
+ if(i4_temp_error < i4_error)
+ {
+ i4_error = i4_temp_error;
+ i4_res_index = i;
+ }
+ }
+
+ /*Decide whether LBR or HBR*/
+ if((fabs(af_offline_bpp[i4_res_index][0] - f_bpp)) >
+ (fabs(af_offline_bpp[i4_res_index][1] - f_bpp)))
+ {
+ i4_br_index = 1;
+ }
+ else
+ {
+ i4_br_index = 0;
+ }
+
+ /*After that pick the max hme sad*/
+ switch(i4_res_index)
+ {
+ case 0:
+ f_max_hme_sad_per_pixel =
+ af_offline_hme_sad_per_pixel_480p[i4_num_temporal_layers][i4_br_index];
+ break;
+ case 1:
+ f_max_hme_sad_per_pixel =
+ af_offline_hme_sad_per_pixel_720p[i4_num_temporal_layers][i4_br_index];
+ break;
+ case 2:
+ f_max_hme_sad_per_pixel =
+ af_offline_hme_sad_per_pixel_1080p[i4_num_temporal_layers][i4_br_index];
+ break;
+ case 3:
+ f_max_hme_sad_per_pixel =
+ af_offline_hme_sad_per_pixel_2160p[i4_num_temporal_layers][i4_br_index];
+ break;
+ default:
+ f_max_hme_sad_per_pixel =
+ af_offline_hme_sad_per_pixel_1080p[i4_num_temporal_layers][i4_br_index];
+ break;
+ }
+
+ ps_rate_control_api->f_max_hme_sad_per_pixel = f_max_hme_sad_per_pixel;
+}
+
+/****************************************************************************
+Function Name : rc_update_pic_distn_lap_to_rc
+Description :
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+void rc_update_pic_distn_lap_to_rc(
+ rate_control_api_t *ps_rate_contro_api, WORD32 ai4_num_pic_type[MAX_PIC_TYPE])
+{
+ pic_type_update_frms_in_gop(ps_rate_contro_api->ps_pic_handling, ai4_num_pic_type);
+}
+
+/****************************************************************************
+Function Name : rc_set_bits_based_on_complexity
+Description :
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+void rc_set_bits_based_on_complexity(
+ rate_control_api_t *ps_rate_contro_api, WORD32 i4_lap_window_comp, WORD32 i4_num_frames)
+{
+ set_bit_allocation_i_frames(
+ ps_rate_contro_api->ps_bit_allocation,
+ ps_rate_contro_api->ps_cbr_buffer,
+ ps_rate_contro_api->ps_pic_handling,
+ i4_lap_window_comp,
+ i4_num_frames);
+}
+/****************************************************************************
+Function Name : rc_set_avg_qscale_first_pass
+Description : Set the average qscale from first pass
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+
+void rc_set_avg_qscale_first_pass(
+ rate_control_api_t *ps_rate_control_api, float f_average_qscale_1st_pass)
+{
+ ba_set_avg_qscale_first_pass(ps_rate_control_api->ps_bit_allocation, f_average_qscale_1st_pass);
+}
+/****************************************************************************
+Function Name : rc_set_max_avg_qscale_first_pass
+Description : Set the maximum avergae Qscale in second pass as average Qscale
+ of first pass + 6 This is for simple contents
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+
+void rc_set_max_avg_qscale_first_pass(
+ rate_control_api_t *ps_rate_control_api, float f_max_average_qscale_1st_pass)
+{
+ ba_set_max_avg_qscale_first_pass(
+ ps_rate_control_api->ps_bit_allocation, f_max_average_qscale_1st_pass);
+}
+/****************************************************************************
+Function Name : rc_set_i_to_sum_api_ba
+Description :
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+void rc_set_i_to_sum_api_ba(rate_control_api_t *ps_rate_control_api, float f_curr_i_to_sum)
+{
+ bit_alloc_set_curr_i_to_sum_i(ps_rate_control_api->ps_bit_allocation, f_curr_i_to_sum);
+}
+/****************************************************************************
+Function Name : rc_set_p_to_i_complexity_ratio
+Description :
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+void rc_set_p_to_i_complexity_ratio(rate_control_api_t *ps_rate_control_api, float f_p_to_i_ratio)
+{
+ ps_rate_control_api->f_p_to_i_comp_ratio = f_p_to_i_ratio;
+}
+/****************************************************************************
+Function Name : rc_set_scd_in_period
+Description :
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+void rc_set_scd_in_period(rate_control_api_t *ps_rate_control_api, WORD32 i4_scd_in_period)
+{
+ ps_rate_control_api->i4_scd_in_period_2_pass = i4_scd_in_period;
+}
+/****************************************************************************
+Function Name : rc_ba_get_qp_offset_offline_data
+Description :
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+void rc_ba_get_qp_offset_offline_data(
+ rate_control_api_t *ps_rate_control_api,
+ WORD32 ai4_offsets[5],
+ float f_hme_sad_per_pixel,
+ WORD32 i4_num_active_pic_type,
+ WORD32 *pi4_complexity_bin)
+{
+ WORD32 i4_ratio;
+ float f_ratio;
+
+ CLIP(f_hme_sad_per_pixel, ps_rate_control_api->f_max_hme_sad_per_pixel, 0.01f);
+
+ i4_ratio = (WORD32)(ps_rate_control_api->f_max_hme_sad_per_pixel / f_hme_sad_per_pixel);
+ f_ratio = ps_rate_control_api->f_max_hme_sad_per_pixel / f_hme_sad_per_pixel;
+
+ ba_get_qp_offset_offline_data(
+ ai4_offsets, i4_ratio, f_ratio, i4_num_active_pic_type, pi4_complexity_bin);
+}
+/****************************************************************************
+Function Name : rc_api_gop_level_averagae_q_scale_without_offset
+Description : Find the GOP level average of the Qscale
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+
+float rc_api_gop_level_averagae_q_scale_without_offset(rate_control_api_t *ps_rate_control_api)
+{
+ float f_hbd_qscale =
+ ba_gop_info_average_qscale_gop_without_offset(ps_rate_control_api->ps_bit_allocation);
+
+ return (f_hbd_qscale);
+}
+
+/****************************************************************************
+Function Name : rc_getprev_ref_pic_type
+Description :
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+picture_type_e rc_getprev_ref_pic_type(rate_control_api_t *ps_rate_control_api)
+{
+ return (ps_rate_control_api->prev_ref_pic_type);
+}
+/****************************************************************************
+Function Name : rc_get_actual_intra_frame_int
+Description :
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+WORD32 rc_get_actual_intra_frame_int(rate_control_api_t *ps_rate_control_api)
+{
+ WORD32 i4_int = pic_type_get_actual_intra_frame_interval(ps_rate_control_api->ps_pic_handling);
+ return (i4_int);
+}
+/****************************************************************************
+Function Name : rc_get_qscale_max_clip_in_second_pass
+Description : Get maximum qscale that is allowed based on average Qp for simple contents
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+
+float rc_get_qscale_max_clip_in_second_pass(rate_control_api_t *ps_rate_control_api)
+{
+ float i4_max_qscale =
+ ba_get_qscale_max_clip_in_second_pass(ps_rate_control_api->ps_bit_allocation);
+ return (i4_max_qscale);
+}
+/****************************************************************************
+Function Name : rc_set_2pass_total_frames
+Description : Set the total number of frames in the stream
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+
+void rc_set_2pass_total_frames(rate_control_api_t *ps_rate_control_api, WORD32 i4_total_2pass_frames)
+{
+ bit_alloc_set_2pass_total_frames(ps_rate_control_api->ps_bit_allocation, i4_total_2pass_frames);
+}
+/****************************************************************************
+Function Name : rc_set_2pass_avg_bit_rate
+Description : Set the average bit-rate based on consumption so far
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+
+void rc_set_2pass_avg_bit_rate(
+ rate_control_api_t *ps_rate_control_api, LWORD64 i8_2pass_avg_bit_rate)
+{
+ ba_set_2pass_avg_bit_rate(ps_rate_control_api->ps_bit_allocation, i8_2pass_avg_bit_rate);
+}
+/****************************************************************************
+Function Name : rc_set_enable_look_ahead
+Description :
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+void rc_set_enable_look_ahead(rate_control_api_t *ps_rate_control_api, WORD32 i4_enable_look_ahead)
+{
+ ba_set_enable_look_ahead(ps_rate_control_api->ps_bit_allocation, i4_enable_look_ahead);
+}
+/****************************************************************************
+Function Name : rc_add_est_tot
+Description :
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+void rc_add_est_tot(rate_control_api_t *ps_rate_control_api, WORD32 i4_tot_tex_bits)
+{
+ rc_modify_est_tot(ps_rate_control_api, i4_tot_tex_bits);
+}
+/****************************************************************************
+Function Name : rc_init_buffer_info
+Description :
+Inputs : ps_rate_control_api
+
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+ *****************************************************************************/
+void rc_init_buffer_info(
+ rate_control_api_t *ps_rate_control_api,
+ WORD32 *pi4_vbv_buffer_size,
+ WORD32 *pi4_currEbf,
+ WORD32 *pi4_maxEbf,
+ WORD32 *pi4_drain_rate)
+{
+ *pi4_vbv_buffer_size = get_cbr_buffer_size(ps_rate_control_api->ps_cbr_buffer);
+ *pi4_currEbf = get_cbr_ebf(ps_rate_control_api->ps_cbr_buffer) +
+ rc_get_estimate_bit_error(ps_rate_control_api);
+ *pi4_maxEbf = get_cbr_max_ebf(ps_rate_control_api->ps_cbr_buffer);
+ *pi4_drain_rate = get_buf_max_drain_rate(ps_rate_control_api->ps_cbr_buffer);
+ return;
+}
diff --git a/encoder/rate_control_api.h b/encoder/rate_control_api.h
new file mode 100644
index 0000000..a76f20e
--- /dev/null
+++ b/encoder/rate_control_api.h
@@ -0,0 +1,492 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file rate_control_api.h
+*
+* \brief
+* This file should only contain RC API function declarations
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _RATE_CONTROL_API_H_
+#define _RATE_CONTROL_API_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define RC_OK 0
+#define RC_FAIL -1
+#define RC_BENIGN_ERR -2
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+
+typedef struct rate_control_api_t *rate_control_handle;
+
+WORD32 rate_control_num_fill_use_free_memtab(
+ rate_control_handle *pps_rate_control_api,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+
+void initialise_rate_control(
+ rate_control_handle ps_rate_control_api,
+ rc_type_e e_rate_control_type,
+ UWORD8 u1_is_mb_level_rc_on,
+ UWORD32 u4_avg_bit_rate,
+ UWORD32 *pu4_peak_bit_rate,
+ UWORD32 u4_min_bit_rate,
+ UWORD32 u4_frame_rate,
+ UWORD32 u4_max_delay,
+ UWORD32 u4_intra_frame_interval,
+ UWORD32 u4_idr_period,
+ WORD32 *pi4_init_qp,
+ UWORD32 u4_max_vbv_buff_size,
+ WORD32 i4_max_inter_frm_int,
+ WORD32 i4_is_gop_closed,
+ WORD32 *pi4_min_max_qp,
+ WORD32 i4_use_est_intra_sad,
+ UWORD32 u4_src_ticks,
+ UWORD32 u4_tgt_ticks,
+ WORD32 i4_frame_height,
+ WORD32 i4_frame_width,
+ WORD32 i4_num_active_pic_type,
+ WORD32 i4_field_pic,
+ WORD32 i4_quality_preset,
+ WORD32 i4_lap_window,
+ WORD32 i4_initial_decoder_delay_frames,
+ float f_max_peak_rate_sustain_dur,
+ LWORD64 i8_num_frames_to_encode,
+ UWORD32 u4_min_scd_hevc_qp,
+ UWORD8 u1_bit_depth,
+ FILE *pf_rc_stat_file,
+ WORD32 i4_rc_pass,
+ void *pv_gop_stat,
+ LWORD64 i8_num_gop_mem_alloc,
+ WORD32 i4_is_infinite_gop,
+ WORD32 i4_size_of_lap_out,
+ WORD32 i4_size_of_rc_lap_out,
+ void *pv_sys_api,
+ WORD32 i4_fp_bit_alloc_in_sp,
+ WORD32 i4_num_frame_parallel,
+ WORD32 i4_capped_vbr_flag);
+
+/*****************************************************************************
+ Process level API fuctions (FRAME LEVEL)
+*****************************************************************************/
+void flush_buf_frames(rate_control_handle ps_rate_control_api);
+
+void post_encode_frame_skip(rate_control_handle ps_rate_control_api, picture_type_e e_pic_type);
+
+void add_picture_to_stack(
+ rate_control_handle rate_control_api, WORD32 i4_enc_pic_id, WORD32 i4_rc_in_pic);
+
+void add_picture_to_stack_re_enc(
+ rate_control_handle rate_control_api, WORD32 i4_enc_pic_id, picture_type_e e_pic_type);
+
+void get_picture_details(
+ rate_control_handle rate_control_api,
+ WORD32 *pi4_pic_id,
+ WORD32 *pi4_pic_disp_order_no,
+ picture_type_e *pe_pic_type,
+ WORD32 *pi4_is_scd);
+
+WORD32 ihevce_rc_get_scaled_hevce_qp_q6(WORD32 i4_frame_qp_q6, UWORD8 u1_bit_depth);
+
+void get_bits_for_final_qp(
+ rate_control_handle ps_rate_control_api,
+ WORD32 *pi4_modelQP,
+ WORD32 *pi4_maxEbfQP,
+ LWORD64 *pi8_bits_from_finalQP,
+ WORD32 i4_clipQP,
+ WORD32 i4_frame_qp_q6,
+ WORD32 i4_cur_est_header_bits,
+ WORD32 i4_est_tex_bits,
+ WORD32 i4_buf_based_max_bits,
+ picture_type_e e_pic_type,
+ WORD32 i4_display_num);
+
+WORD32 model_availability(rate_control_handle rate_control_api, picture_type_e e_pic_type);
+
+WORD32 get_est_hdr_bits(rate_control_handle rate_control_api, picture_type_e e_pic_type);
+
+/* Gets the frame level Qp (q scale in q6 format)*/
+WORD32 get_frame_level_qp(
+ rate_control_handle rate_control_api,
+ picture_type_e pic_type,
+ WORD32 i4_max_frm_bits,
+ WORD32 *pi4_cur_est_texture_bits,
+ float af_sum_weigh[MAX_PIC_TYPE][3],
+ WORD32 i4_call_type,
+ float i_to_avg_ratio,
+ frame_info_t *ps_frame_stat,
+ WORD32 i4_complexity_bin,
+ WORD32 i4_scene_num,
+ WORD32 *i4_curr_bits_estimated,
+ WORD32 *pi4_is_model_valid,
+ WORD32 *pi4_vbv_buf_max_bits,
+ WORD32 *pi4_est_tex_bits,
+ WORD32 *pi4_cur_est_header_bits,
+ WORD32 *pi4_maxEbfQP,
+ WORD32 *pi4_modelQP,
+ WORD32 *pi4_estimate_to_calc_frm_error);
+
+WORD32 clip_qp_based_on_prev_ref(
+ rate_control_handle rate_control_api,
+ picture_type_e e_pic_type,
+ WORD32 i4_call_type,
+ WORD32 i4_scene_num);
+
+/* Obtain the VBV buffer status information */
+vbv_buf_status_e get_buffer_status(
+ rate_control_handle rate_control_api,
+ WORD32 i4_total_frame_bits, /* Total frame bits consumed */
+ picture_type_e e_pic_type,
+ WORD32 *pi4_num_bits_to_prevent_vbv_underflow);
+
+/* Returns previous frame estimated bits for SCD validation*/
+WORD32 get_prev_frm_est_bits(rate_control_handle ps_rate_control_api);
+
+WORD32 rc_set_estimate_status(
+ rate_control_handle ps_rate_control_api,
+ WORD32 i4_tex_bits,
+ WORD32 i4_hdr_bits,
+ WORD32 i4_est_text_bits_ctr_get_qp);
+
+void rc_reset_pic_model(rate_control_handle ps_rate_control_api, picture_type_e pic_type);
+
+/*reset the flag at qp query stage itself to differentiate scd frame for qp offset*/
+void rc_reset_first_frame_coded_flag(
+ rate_control_handle ps_rate_control_api, picture_type_e pic_type);
+
+/* get an estimate of total bits to find estimate of header bits after L1 stage in pre-enc*/
+WORD32 rc_get_scene_change_est_header_bits(
+ rate_control_handle ps_rate_control_api,
+ WORD32 i4_num_pixels,
+ WORD32 i4_fsim_lap_avg,
+ float af_sum_weigh[MAX_PIC_TYPE][3],
+ float i_to_avg_rest_ratio);
+
+/* Used in case when picture handling module needs to move to next frame type. This happens
+when the get frame qp and update frame qp do not happen within a frame and when there can be
+multiple get frame qps beofre a update. If this function is called then i4_is_pic_handling_done
+argument in update_frame_level_info should be set to 1 else 0 */
+void update_pic_handling_state(rate_control_handle ps_rate_control_api, picture_type_e e_pic_type);
+
+LWORD64 get_gop_sad(rate_control_handle ps_rate_control_api);
+
+LWORD64 get_gop_bits(rate_control_handle ps_rate_control_api);
+
+WORD32 check_if_current_GOP_is_simple(rate_control_handle ps_rate_control_api);
+
+/* Updates the frame level changes in the Rate control */
+void update_frame_level_info(
+ rate_control_handle ps_rate_control_api,
+ picture_type_e e_pic_type,
+ LWORD64 *pi8_mb_type_sad, /* Frame level SAD for each type of MB[Intra/Inter] */
+ WORD32 i4_total_frame_bits, /* Total frame bits actually consumed */
+ WORD32 i4_model_updation_hdr_bits, /*header bits for model updation*/
+ WORD32 *
+ pi4_mb_type_tex_bits, /* Total texture bits consumed for each type of MB[Intra/Inter] used for model */
+ LWORD64 *pi8_tot_mb_type_qp, /* Total qp of all MBs based on mb type */
+ WORD32 *pi4_tot_mb_in_type, /* total number of mbs in each mb type */
+ WORD32 i4_avg_activity, /* Average mb activity in frame */
+ UWORD8 u1_is_scd, /* Is a scene change detected at the current frame */
+ WORD32 i4_is_it_a_skip, /* If it's a pre-encode skip */
+ WORD32 i4_intra_frm_cost, /* Sum of Intra cost for each frame */
+ WORD32
+ i4_is_pic_handling_done, /* Is pic handling [update_pic_handling_state] done before update */
+ WORD32 i4_suppress_bpic_update,
+ WORD32 i4_bits_to_be_stuffed,
+ WORD32 i4_is_pause_to_resume,
+ WORD32 i4_lap_window_comp,
+ WORD32 i4_is_end_of_gop,
+ WORD32 i4_lap_based_bits_reset,
+ frame_info_t *ps_frame_info,
+ WORD32 i4_is_rc_model_needs_to_be_updated,
+ WORD8 i1_qp_offset,
+ WORD32 i4_scene_num,
+ WORD32 i4_num_frm_enc_in_scene,
+ WORD32
+ i4_est_text_bits_ctr_update_qp); /*complexity of future lap window used to set target buffer level at end if GOP*/
+
+void update_frame_rc_get_frame_qp_info(
+ rate_control_handle ps_rate_control_api,
+ picture_type_e rc_pic_type,
+ WORD32 i4_is_scd,
+ WORD32 i4_is_pause_to_resume,
+ WORD32 i4_avg_frame_qp_q6,
+ WORD32 i4_suppress_bpic_update,
+ WORD32 i4_scene_num,
+ WORD32 i4_num_frm_enc_in_scene);
+
+void reset_rc_for_pause_to_play_transition(rate_control_handle ps_rate_control_api);
+
+WORD32 is_first_frame_coded(rate_control_handle ps_rate_control_api);
+
+void rc_put_sad(
+ rate_control_handle ps_rate_control_api,
+ WORD32 i4_cur_intra_sad,
+ WORD32 i4_cur_sad,
+ WORD32 i4_cur_pic_type);
+
+WORD32 rc_get_qp_for_scd_frame(
+ rate_control_handle ps_rate_control_api,
+ picture_type_e e_pic_type,
+ LWORD64 i8_satd_act_accum,
+ WORD32 i4_num_pels_in_frame,
+ WORD32 i4_est_I_pic_head_bits,
+ WORD32 i4_f_sim_lap_avg,
+ void *offline_model_coeff,
+ float i_to_avg_ratio,
+ WORD32 i4_true_scd,
+ float af_sum_weigh[MAX_PIC_TYPE][3],
+ frame_info_t *ps_frame_stat,
+ WORD32 i4_rc_2_pass,
+ WORD32 i4_is_not_an_I_pic,
+ WORD32 i4_ref_first_pass,
+ WORD32 i4_call_type,
+ WORD32 *pi4_total_bits,
+ WORD32 *i4_curr_bits_estimated,
+ WORD32 i4_use_offline_model_2pass,
+ LWORD64 *pi8_i_tex_bits,
+ float *pf_i_qs,
+ WORD32 i4_best_br_id,
+ WORD32 *pi4_estimate_to_calc_frm_error);
+
+void rc_set_num_scd_in_lap_window(
+ rate_control_handle ps_rate_control_api,
+ WORD32 i4_num_scd_in_lap_window,
+ WORD32 i4_num_frames_b4_scd);
+
+void rc_set_next_sc_i_in_rc_look_ahead(
+ rate_control_handle ps_rate_control_api, WORD32 i4_next_sc_i_in_rc_look_ahead);
+
+void rc_update_mismatch_error(rate_control_handle ps_rate_control_api, WORD32 i4_error_bits);
+
+/*temp function to verify I only model*/
+WORD32 rc_get_qp_scene_change_bits(
+ rate_control_handle ps_rate_control_api,
+ WORD32 i4_total_bits,
+ LWORD64 i8_satd_by_act_accum,
+ WORD32 i4_num_pixel,
+ void *offline_model_coeff,
+ float f_i_to_average_rest,
+ WORD32 i4_call_type);
+
+WORD32 rc_get_bpp_based_scene_cut_qp(
+ rate_control_handle ps_rate_control_api,
+ picture_type_e e_pic_type,
+ WORD32 i4_num_pels_in_frame,
+ WORD32 i4_f_sim_lap,
+ float af_sum_weigh[MAX_PIC_TYPE][3],
+ WORD32 i4_call_type);
+
+/*****************************************************************************
+ MB LEVEL API (just wrapper fucntions)
+*****************************************************************************/
+/* Intitalises frame level information for mb level qp */
+void init_mb_rc_frame_level(
+ rate_control_handle ps_rate_control_api, UWORD8 u1_frame_qp); /* Current frame qp*/
+
+WORD32 get_bits_to_stuff(
+ rate_control_handle ps_rate_control_api,
+ WORD32 i4_tot_consumed_bits,
+ picture_type_e e_pic_type);
+
+/******************************************************************************
+ Control Level API functions
+Logic: The control call sets the state structure of the rate control api
+accordingly such that the next process call would implement the same.
+******************************************************************************/
+/* Re-initialise the rate control module with the same old parameters */
+/* void re_init_rate_control(rate_control_handle ps_rate_control_api); */
+
+/* RC API call to change the inter frame interval */
+void change_inter_frm_int_call(rate_control_handle ps_rate_control_api, WORD32 i4_inter_frm_int);
+
+/* RC API call to change the intra frame interval */
+void change_intra_frm_int_call(rate_control_handle ps_rate_control_api, WORD32 i4_intra_frm_int);
+
+/* Sets the necessary changes for the new average bit rate */
+void change_avg_bit_rate(
+ rate_control_handle ps_rate_control_api, UWORD32 u4_average_bit_rate, UWORD32 u4_peak_bit_rate);
+
+/* This is used for SOURCE FRAME RATE change from the application
+ use case. Target frame rate change is taken care using the
+ change_frm_rate_for_bit_alloc interface and modify frame rate
+ module */
+void change_frame_rate(
+ rate_control_handle ps_rate_control_api,
+ UWORD32 u4_frame_rate,
+ UWORD32 u4_src_ticks,
+ UWORD32 u4_target_ticks);
+
+/* When the change in frame should affect only the bit_allocation
+ This makes sense when the target frame rate changes. This change
+ is gradually done with the use of modify frame rate. Refer the
+ test application for beeter usecase */
+void change_frm_rate_for_bit_alloc(rate_control_handle ps_rate_control_api, UWORD32 u4_frame_rate);
+
+/* Set the init Qp values */
+void change_init_qp(
+ rate_control_handle ps_rate_control_api, WORD32 *pi4_init_qp, WORD32 i4_scene_num);
+
+/* Sets the necessary changes for the new peak bit rate */
+
+void force_I_frame(rate_control_handle ps_rate_control_api);
+
+void change_min_max_qp(rate_control_handle ps_rate_control_api, WORD32 *pi4_min_max_qp);
+
+/********************************************************************************
+ Getter functions
+For getting the current state of the rate control structures
+********************************************************************************/
+UWORD32 rc_get_frame_rate(rate_control_handle ps_rate_control_api);
+UWORD32 rc_get_bit_rate(rate_control_handle ps_rate_control_api);
+UWORD32 rc_get_intra_frame_interval(rate_control_handle ps_rate_control_api);
+UWORD32 rc_get_inter_frame_interval(rate_control_handle ps_rate_control_api);
+rc_type_e rc_get_rc_type(rate_control_handle ps_rate_control_api);
+WORD32 rc_get_bits_per_frame(rate_control_handle ps_rate_control_api);
+
+UWORD32 rc_get_peak_bit_rate(rate_control_handle ps_rate_control_api, WORD32 i4_index);
+UWORD32 rc_get_max_delay(rate_control_handle ps_rate_control_api);
+UWORD32 rc_get_seq_no(rate_control_handle ps_rate_control_api);
+
+WORD32 rc_get_rem_bits_in_period(rate_control_handle ps_rate_control_api);
+WORD32 rc_get_vbv_buf_fullness(rate_control_handle ps_rate_control_api);
+WORD32 rc_get_vbv_buf_size(rate_control_handle ps_rate_control_api);
+WORD32 rc_get_vbv_fulness_with_cur_bits(rate_control_handle ps_rate_control_api, UWORD32 u4_bits);
+WORD32 get_rc_target_bits(rate_control_handle ps_rate_control_api);
+WORD32 get_orig_rc_target_bits(rate_control_handle ps_rate_control_api);
+WORD32 rc_get_prev_header_bits(rate_control_handle ps_rate_control_api, WORD32 pic_type);
+WORD32 rc_get_prev_P_QP(rate_control_handle ps_rate_control_api, WORD32 i4_scene_num);
+WORD32 rc_update_ppic_sad(
+ rate_control_handle ps_rate_control_api, WORD32 i4_est_sad, WORD32 i4_prev_ppic_sad);
+void rc_get_sad(rate_control_handle ps_rate_control_api, WORD32 *pi4_sad);
+WORD32 rc_get_ebf(rate_control_handle ps_rate_control_api);
+void rc_init_set_ebf(rate_control_handle ps_rate_control_api, WORD32 i32_init_ebf);
+void rc_update_prev_frame_intra_sad(
+ rate_control_handle ps_rate_control_api, WORD32 i4_intra_frame_sad);
+WORD32 rc_get_prev_frame_intra_sad(rate_control_handle ps_rate_control_api);
+/*TO DO: previous frame intra SAD update function can also be replaced by below function*/
+void rc_update_prev_frame_sad(
+ rate_control_handle ps_rate_control_api, WORD32 i4_intra_frame_sad, picture_type_e e_pic_type);
+WORD32 rc_get_prev_frame_sad(rate_control_handle ps_rate_control_api, picture_type_e e_pic_type);
+
+/*update fsim of lap whenever fsim is updated in rc context*/
+void rc_put_temp_comp_lap(
+ rate_control_handle ps_rate_control_api,
+ WORD32 i4_lap_fsim,
+ LWORD64 i8_per_pixel_p_frm_hme_sad_q10,
+ picture_type_e e_pic_type);
+
+void rc_get_pic_distribution(
+ rate_control_handle ps_rate_control_api, WORD32 ai4_pic_type[MAX_PIC_TYPE]);
+
+void rc_get_actual_pic_distribution(
+ rate_control_handle ps_rate_control_api, WORD32 ai4_pic_type[MAX_PIC_TYPE]);
+
+void rc_reset_Kp_Kb(
+ rate_control_handle ps_rate_control_api,
+ float f_i_to_avg_rest,
+ WORD32 i4_num_active_pic_type,
+ float f_curr_hme_sad_per_pixel,
+ WORD32 *pi4_complexity_bin,
+ WORD32 i4_rc_pass);
+
+WORD32 rc_get_kp_kb(rate_control_handle ps_rate_control_api, picture_type_e e_pic_type);
+WORD32 rc_get_ebf(rate_control_handle ps_rate_control_api);
+
+float rc_get_cur_peak_factor_2pass(rate_control_handle ps_rate_control_api);
+float rc_get_offline_normalized_complexity(
+ WORD32 i4_intra_int, WORD32 i4_luma_pels, float f_per_pixel_complexity, WORD32 i4_pass_number);
+
+void rc_bit_alloc_detect_ebf_stuff_scenario(
+ rate_control_handle ps_rate_control_api,
+ WORD32 i4_num_frm_bef_scd_lap2,
+ LWORD64 i4_total_bits_est_consu_lap2,
+ WORD32 i4_max_inter_frm_int);
+
+LWORD64 rc_get_rbip_and_num_frames(rate_control_handle ps_rate_contro_api, WORD32 *pi4_num_frames);
+
+WORD32 bit_alloc_get_estimated_bits_for_pic(
+ rate_control_handle ps_rate_contro_api,
+ WORD32 i4_cur_frm_est_cl_sad,
+ WORD32 i4_prev_frm_cl_sad,
+ picture_type_e e_pic_type);
+
+void rc_get_max_hme_sad_per_pixel(rate_control_handle ps_rate_control_api, WORD32 i4_total_pixels);
+
+void rc_update_pic_distn_lap_to_rc(
+ rate_control_handle ps_rate_contro_api, WORD32 ai4_num_pic_type[MAX_PIC_TYPE]);
+
+void rc_set_bits_based_on_complexity(
+ rate_control_handle ps_rate_contro_api, WORD32 i4_lap_window_comp, WORD32 i4_num_frames);
+
+void rc_set_avg_qscale_first_pass(
+ rate_control_handle ps_rate_contro_api, float f_average_qscale_1st_pass);
+
+void rc_set_max_avg_qscale_first_pass(
+ rate_control_handle ps_rate_control_api, float f_max_average_qscale_1st_pass);
+
+void rc_set_i_to_sum_api_ba(rate_control_handle ps_rate_contro_api, float f_curr_i_to_sum);
+
+float rc_get_min_complexity_factor_2pass(rate_control_handle ps_rate_contro_api);
+
+void rc_set_p_to_i_complexity_ratio(
+ rate_control_handle ps_rate_contro_api, float f_p_to_i_comp_ratio);
+
+void rc_set_scd_in_period(rate_control_handle ps_rate_contro_api, WORD32 i4_scd_in_period);
+
+void rc_ba_get_qp_offset_offline_data(
+ rate_control_handle ps_rate_contro_api,
+ WORD32 ai4_offsets[5],
+ float f_hme_sad_per_pixel,
+ WORD32 i4_num_active_pic_type,
+ WORD32 *pi4_complexity_bin);
+
+float rc_api_gop_level_averagae_q_scale_without_offset(rate_control_handle ps_rate_control_api);
+picture_type_e rc_getprev_ref_pic_type(rate_control_handle ps_rate_control_api);
+WORD32 rc_get_actual_intra_frame_int(rate_control_handle ps_rate_control_api);
+float rc_get_qscale_max_clip_in_second_pass(rate_control_handle ps_rate_control_api);
+void rc_set_2pass_total_frames(
+ rate_control_handle ps_rate_control_api, WORD32 i4_total_2pass_frames);
+void rc_set_2pass_avg_bit_rate(
+ rate_control_handle ps_rate_control_api, LWORD64 i8_2pass_avg_bit_rate);
+
+void rc_set_enable_look_ahead(rate_control_handle ps_rate_control_api, WORD32 i4_enable_look_ahead);
+
+void rc_add_est_tot(rate_control_handle ps_rate_control_api, WORD32 i4_tot_tex_bits);
+void rc_init_buffer_info(
+ rate_control_handle ps_rate_control_api,
+ WORD32 *pi4_vbv_buffer_size,
+ WORD32 *pi4_currEbf,
+ WORD32 *pi4_maxEbf,
+ WORD32 *pi4_drain_rate);
+
+#endif
diff --git a/encoder/rate_control_api_structs.h b/encoder/rate_control_api_structs.h
new file mode 100644
index 0000000..e9f5044
--- /dev/null
+++ b/encoder/rate_control_api_structs.h
@@ -0,0 +1,122 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file rate_control_api_structs.h
+*
+* \brief
+* This file contains rate_control API struct and constant macro
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+#ifndef _RATE_CONTROL_API_STRUCTS_H_
+#define _RATE_CONTROL_API_STRUCTS_H_
+
+/* The following definitions were present in rc_cntrl_param.h, moved to this file
+ as it is used by rate_control_api.c*/
+/*#define VBR_BIT_ALLOC_PERIOD 3 num_frm_in_period = BIT_ALLOC_PERIOD*intra_frame_interval */
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define CBR_BIT_ALLOC_PERIOD 1
+#define MAX_SCENE_NUM_RC 30
+#define HALF_MAX_SCENE_NUM_RC MAX_SCENE_NUM_RC / 2
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+/* Rate control state structure */
+typedef struct rate_control_api_t
+{
+ rc_type_e e_rc_type; /* RC Algorithm */
+ UWORD8 u1_is_mb_level_rc_on; /* Whether MB level rc is enabled or not */
+ /* rate_control_param_t s_rate_control_param; Store a copy of input parameters for re-initialisation */
+ pic_handling_handle ps_pic_handling; /* Picture handling struct */
+ rc_rd_model_handle aps_rd_model[MAX_PIC_TYPE]; /* Model struct for I and P frms */
+ vbr_storage_vbv_handle ps_vbr_storage_vbv; /* VBR storage VBV structure */
+ est_sad_handle ps_est_sad; /* Calculate the estimated SAD */
+ bit_allocation_handle ps_bit_allocation; /* Allocation of bits for each frame */
+ mb_rate_control_handle ps_mb_rate_control; /* MB Level rate control state structure */
+ sad_acc_handle ps_sad_acc; /* Sad accumulator */
+ UWORD8 au1_is_first_frm_coded[MAX_PIC_TYPE];
+ WORD32 ai4_prev_frm_qp[MAX_SCENE_NUM_RC][MAX_PIC_TYPE];
+ WORD32 ai4_prev_frm_qp_q6[MAX_SCENE_NUM_RC][MAX_PIC_TYPE];
+
+ cbr_buffer_handle ps_cbr_buffer;
+ UWORD8 au1_avg_bitrate_changed[MAX_PIC_TYPE];
+ UWORD8 u1_is_first_frm;
+ /* UWORD8 au1_min_max_qp[(MAX_PIC_TYPE << 1)]; */
+ WORD32 ai4_min_qp[MAX_PIC_TYPE];
+ WORD32 ai4_max_qp[MAX_PIC_TYPE];
+ WORD32 ai4_max_qp_q6[MAX_PIC_TYPE];
+ WORD32 ai4_min_qp_q6[MAX_PIC_TYPE];
+
+ WORD32 i4_prev_frm_est_bits;
+ WORD32 i4_orig_frm_est_bits;
+ vbr_str_prms_t s_vbr_str_prms;
+ init_qp_handle ps_init_qp;
+ /* Store the values which are to be impacted after a delay */
+ UWORD32 u4_frms_in_delay_prd_for_peak_bit_rate_change;
+ UWORD32 au4_new_peak_bit_rate[MAX_NUM_DRAIN_RATES];
+ picture_type_e prev_ref_pic_type;
+ WORD32 i4_P_to_I_ratio;
+ WORD32 ai4_min_texture_bits[MAX_PIC_TYPE];
+ /* Complexity based buffer movement */
+ WORD32 i4_prev_ref_is_scd;
+ WORD32 i4_is_hbr; /*Flag to indicate CBR_NLDRC_HBR*/
+ WORD32 i4_num_active_pic_type;
+ WORD32 i4_lap_f_sim;
+ WORD32 i4_quality_preset;
+ WORD32 i4_scd_I_frame_estimated_tot_bits;
+ WORD32 i4_I_frame_qp_model; /*offline = 0, online = 1*/
+ LWORD64 i8_per_pixel_p_frm_hme_sad_q10;
+ UWORD32 u4_min_scd_hevc_qp;
+ UWORD32 u4_bit_depth_based_max_qp;
+ UWORD8 u1_bit_depth;
+ FILE *pf_rc_stat_file;
+ WORD32 i4_rc_pass; /*variable to differentiate first pass and second pass*/
+ WORD32 i4_max_frame_width;
+ WORD32 i4_max_frame_height;
+ void *pv_2pass_gop_summary;
+ WORD32 i4_num_gop;
+ void *pv_rc_sys_api;
+ /*In static cases signal the future underflow warning to lower the qp*/
+ WORD32 i4_underflow_warning;
+ float f_max_hme_sad_per_pixel;
+ /*f_p_to_i_comp_ratio is for comparison of pre intra complexity of i & p frames
+ It is used for jacking up of p frame qp if i frame was
+ extremely simple to avoid overconsumption of bits in p frame*/
+ float f_p_to_i_comp_ratio;
+ /*i4_scd_in_period_2_pass is used to signal the scd in period for 2 pass
+ this signal is one of the criteria for clipping the sudden increase of qp*/
+ WORD32 i4_scd_in_period_2_pass;
+ WORD32 i4_is_infinite_gop;
+ WORD32 i4_frames_since_last_scd;
+ WORD32 i4_num_frame_parallel;
+ WORD32 ai4_est_tot_bits[MAX_NUM_FRAME_PARALLEL];
+ WORD32 i4_capped_vbr_flag;
+} rate_control_api_t;
+
+#endif /*_RATE_CONTROL_API_STRUCTS_H_*/
diff --git a/encoder/rc_cntrl_param.h b/encoder/rc_cntrl_param.h
new file mode 100644
index 0000000..a734a0b
--- /dev/null
+++ b/encoder/rc_cntrl_param.h
@@ -0,0 +1,128 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file rc_cntrl_param.h
+*
+* \brief
+* This file should contain only enumerations and macros exported to codec
+* by RC
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _RC_CNTRL_PARAM_H_
+#define _RC_CNTRL_PARAM_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define QSCALE_Q_FAC 6
+#define QSCALE_Q_FAC_3 3
+
+/*corresponds to hevc qp = 4, not letting it go below 4 for better stability*/
+#define MIN_QSCALE_Q6 64
+
+#define INVALID_QP (-55)
+#define SCD_MIN_HEVC_QP 18
+
+/*For very high bitrate reduce min qp limit*/
+#define SCD_MIN_HEVC_QP_HBR 14
+#define SCD_MIN_HEVC_QP_VHBR 10
+
+#define SCD_MAX_HEVC_QP 48
+#define MAX_HEVC_QP 51
+
+/*classification based on i_to_average rest ratio so that appropriate qp offset can be chosen*/
+#define I_TO_REST_EXTREME_FAST (1.5)
+#define I_TO_REST_VVFAST (3)
+#define I_TO_REST_VFAST (5)
+#define I_TO_REST_FAST (8)
+#define I_TO_REST_MEDI (10)
+#define I_TO_REST_SLOW (14)
+
+#define MAX_NUM_FRAME_PARALLEL 8
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+/* RC algo type */
+typedef enum
+{
+ VBR_STORAGE = 0,
+ VBR_STORAGE_DVD_COMP = 1,
+ VBR_STREAMING = 2,
+ CONST_QP = 3,
+ CBR_LDRC = 4,
+ CBR_NLDRC = 5,
+ CBR_NLDRC_HBR = 6,
+ MAX_RC_TYPE
+} rc_type_e;
+
+/**/
+typedef enum
+{
+ FIELD_OFFSET = 4
+} field_offset_e;
+
+/* Picture type structure*/
+typedef enum
+{
+ BUF_PIC = -1,
+ I_PIC = 0,
+ P_PIC,
+ B_PIC,
+ B1_PIC,
+ B2_PIC,
+ P1_PIC,
+ BB_PIC,
+ B11_PIC,
+ B22_PIC,
+ MAX_PIC_TYPE
+} picture_type_e;
+
+typedef enum
+{
+ I_PIC_SCD = 0x100,
+ NA_PIC
+} picture_type_SCD;
+
+/* MB Type structure*/
+typedef enum
+{
+ MB_TYPE_INTRA,
+ MB_TYPE_INTER,
+ MAX_MB_TYPE /* Based on MB TYPES added the array size increases */
+} mb_type_e;
+
+typedef enum
+{
+ VBV_NORMAL,
+ VBV_UNDERFLOW,
+ VBV_OVERFLOW,
+ VBR_CAUTION
+} vbv_buf_status_e;
+
+#endif
diff --git a/encoder/rc_common.h b/encoder/rc_common.h
new file mode 100644
index 0000000..9c55863
--- /dev/null
+++ b/encoder/rc_common.h
@@ -0,0 +1,154 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file rc_common.h
+*
+* \brief
+* This file contains common macro used by rate control module
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+#ifndef _RC_COMMON_H_
+#define _RC_COMMON_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+/****************************************************************************
+ NOTE : Put only those things into this file which are common across many
+ files, say I_TO_P_BIT_RATIO macro is used across bit_allocation.c
+ and rate_control_api.c.If anything is exclusive only to one file,
+ define it in the same file
+
+ This file is an RC private file. It should not be exported to Codec
+ ****************************************************************************/
+
+/* Defines the maximum and the minimum quantizer allowed in the stream.*/
+#define MAX_MPEG2_QP (4095) /*255*/ /* 127*/
+#define ERR_2PASS_DISTR_GOP 5
+
+#define ENABLE_SSD_CALC_RC 0
+
+#ifdef ARM9
+/* Mem tab alignment to 4 bytes */
+#define MEM_TAB_ALIGNMENT 32 /*ALIGN_WORD32*/
+#else /* ARM968 */
+/* Mem tab alignment to 128 bytes */
+#define MEM_TAB_ALIGNMENT ALIGN_128_BYTE
+#endif /* ARM968 */
+
+#define COMP_TO_BITS_MAP(X, factor) \
+ ((-1.7561f * (X * X * X * X) + (2.5547f * X * X * X) - 0.3408f * (X * X) + (0.5343f * X) - \
+ 0.003f) * \
+ factor)
+
+#define COMP_TO_BITS_MAP_2_PASS(X, factor) \
+ ((-1.7561f * (X * X * X * X) + (2.5547f * X * X * X) - 0.3408f * (X * X) + (0.5343f * X) - \
+ 0.003f) * \
+ factor)
+/* Calculates P = (X*Y/Z) (Assuming all the four are in integers)*/
+#define X_PROD_Y_DIV_Z(X1, Y1, Z1, P1) \
+ { \
+ number_t vq_a, vq_b, vq_c; \
+ SET_VAR_Q(vq_a, (X1), 0); \
+ SET_VAR_Q(vq_b, (Y1), 0); \
+ SET_VAR_Q(vq_c, (Z1), 0); \
+ mult32_var_q(vq_a, vq_b, &vq_a); \
+ div32_var_q(vq_a, vq_c, &vq_a); \
+ number_t_to_word32(vq_a, &(P1)); \
+ }
+
+/* Maximum number of drain-rates supported. Currently a maximum of only 2
+ drain-rates supported. One for
+ I pictures and the other for P & B pictures */
+#define MAX_NUM_DRAIN_RATES 2
+
+/* The ratios between I to P and P to B Qp is specified here */
+#define K_Q 4
+#define I_TO_P_RATIO (18) /*(16)*/ /* In K_Q Q factor */
+#define P_TO_B_RATIO (18) /* In K_Q Q factor */
+#define B_TO_B1_RATIO (18)
+#define B1_TO_B2_RATIO (18)
+#define P_TO_I_RATIO (14)
+#define I_TO_B_RATIO ((P_TO_B_RATIO * I_TO_P_RATIO) >> K_Q)
+#define I_TO_B1_RATIO ((B_TO_B1_RATIO * P_TO_B_RATIO * I_TO_P_RATIO) >> (K_Q + K_Q))
+#define I_TO_B2_RATIO \
+ ((B1_TO_B2_RATIO * B_TO_B1_RATIO * P_TO_B_RATIO * I_TO_P_RATIO) >> (K_Q + K_Q + K_Q))
+
+#define P_TO_B_RATIO_HBR (16)
+#define I_TO_P_RATIO_LOW_MOTION (20)
+#define I_TO_P_RATIO_VLOW_MOTION (23)
+#define I_TO_P_RATIO_VVLOW_MOTION (26)
+
+/* #define I_TO_P_RATIO (16) In K_Q Q factor */
+/* #define P_TO_B_RATIO (16) In K_Q Q factor */
+
+/*Ratio of I frame bit consumptin vs average bit consumption for rest of the GOP
+ * This is based on experimaental runs over different seqience(same resolution scaled)*/
+#define I_TO_AVG_REST_GOP_BIT (8)
+#define MINIMUM_VISIBILITY_B4_STATIC_I \
+ (18) //assumes this minimum lap window for bit allocation of static frame
+#define MINIMUM_FRM_I_TO_REST_LAP_ENABLED (8)
+#define I_TO_AVG_REST_GOP_BIT_MIN (1)
+#define I_TO_AVG_REST_GOP_BIT_MAX (20)
+#define I_TO_AVG_REST_GOP_BIT_MAX_INFINITE (80)
+#define I_TO_AVG_REST_GOP_BIT_MAX_2_PASS (40)
+#define I_TO_AVG_REST_GOP_BIT_MIN_2_PASS (0.5f)
+
+#define UPPER_THRESHOLD_EBF_Q4 (15)
+#define STATIC_I_TO_REST_MULTIPLIER (6)
+
+/*also present in encoder herader file with same name*/
+#define MAX_LAP_COMPLEXITY_Q7 (90)
+#define DEFAULT_TEX_PERCENTAGE_Q5 24
+
+#ifdef DISABLE_NON_STEADY_STATE_CODE
+#define NON_STEADSTATE_CODE (0)
+#else
+#define NON_STEADSTATE_CODE (1)
+#endif
+
+/*HEVC_hierarchy*/
+#define I_TO_P_BIT_RATIO (6)
+#define P_TO_B_BIT_RATIO (2)
+#define B_TO_B1_BIT_RATO0 (2)
+#define B1_TO_B2_BIT_RATIO (2)
+
+/*define static I_TO_P ratio for all pic types*/
+/* Trying to detect a static content and fixing the quality for that content. The trigger for such a
+content is if the ratio between the estimated I frame to that of P is more than 18 times. If such
+a simple content is detected then the bit ditribution is fixed to a ration of 36:2:1 (I:P:B) */
+#define STATIC_I_TO_B2_RATIO (100) //(24)
+#define STATIC_P_TO_B2_RATIO (2)
+#define STATIC_B_TO_B2_RATIO (1)
+#define STATIC_B1_TO_B2_RATIO (1)
+
+/*Fsim limits*/
+#define RC_FSIM_LOW_THR_SCD 64
+
+#define RC_FSIM_HIGH_THR_STATIC 115
+
+#endif /* _RC_COMMON_H_ */
diff --git a/encoder/rc_frame_info_collector.h b/encoder/rc_frame_info_collector.h
new file mode 100644
index 0000000..9738f5b
--- /dev/null
+++ b/encoder/rc_frame_info_collector.h
@@ -0,0 +1,179 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file rc_frame_info_collector.h
+*
+* \brief
+* This file contains structs used by encoder to pass information to RC lib
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _RC_FRAME_INFO_COLLECTOR_H_
+#define _RC_FRAME_INFO_COLLECTOR_H_
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+
+#define MAX_NUM_FRAME_IN_GOP 300
+#define MAX_CHAR_IN_LINE 250
+#define MAX_MEM_FOR_LINE (MAX_CHAR_IN_LINE + 1)
+
+// Minimum allocate memory for 10 bit allocation period
+#define MIN_GOP_FOR_MEM_ALLOC 10
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+typedef struct
+{
+ WORD32 sm; /* MSB 1 bit sign & rest magnitude */
+ WORD32 e; /* Q-format */
+} number_t_frame;
+
+typedef struct
+{
+ LWORD64 i8_frame_num;
+ WORD32 i4_poc;
+ picture_type_e e_pic_type;
+ WORD32 i4_rc_hevc_qp;
+ WORD32 i4_scene_type; /*Should be in sync with what lap signals*/
+ float f_8bit_q_scale;
+ float f_8bit_q_scale_without_offset;
+ float f_hbd_q_scale;
+ float f_hbd_q_scale_without_offset;
+ LWORD64 i8_cl_sad;
+ LWORD64 i8_tex_bits;
+ LWORD64 i8_header_bits;
+ LWORD64 i8_L1_me_sad;
+ LWORD64 i8_L1_ipe_raw_sad;
+ LWORD64 i8_L1_me_or_ipe_raw_sad;
+ LWORD64 i8_L0_open_cost;
+ LWORD64 i8_est_texture_bits;
+ WORD32 i4_num_scd_in_lap_window;
+ WORD32 i4_num_frames_b4_scd;
+ WORD32 i4_num_entries;
+ LWORD64 i8_frame_acc_coarse_me_cost;
+ WORD32 i4_lap_f_sim;
+ float i_to_avg_bit_ratio;
+ WORD32 i4_lap_complexity_q7;
+ WORD32 i4_lap_var;
+ LWORD64 i8_num_bit_alloc_period;
+ WORD8 i1_is_complexity_based_bits_reset;
+ float af_sum_weigh[MAX_PIC_TYPE][3];
+ number_t_frame model_coeff_a_lin_wo_int;
+ WORD32 i4_flag_rc_model_update;
+ WORD32 i4_non_i_scd;
+} frame_info_t;
+
+typedef struct
+{
+ LWORD64 i4_gop_count;
+ WORD32 i4_tot_frm_in_gop;
+ //WORD32 ai4_pic_dist_in_cur_gop[MAX_PIC_TYPE];
+ float f_bits_complexity_l1_based;
+ float f_bits_complexity_l1_based_peak_factor;
+ float f_complexity_l1_based;
+ LWORD64 i8_bits_allocated_to_gop;
+ LWORD64 i8_tot_bits_consumed_first_pass;
+ float f_tot_bits_into_qscale_first_pass;
+ LWORD64 i8_L1_complexity_sad[MAX_NUM_FRAME_IN_GOP];
+ WORD8 ai1_is_complexlity_reset_bits[MAX_NUM_FRAME_IN_GOP];
+ WORD8 ai1_scene_type[MAX_NUM_FRAME_IN_GOP];
+ float f_den_wt_bits;
+ WORD32 ai4_pic_type[MAX_NUM_FRAME_IN_GOP];
+ LWORD64 ai8_head_bits_consumed[MAX_NUM_FRAME_IN_GOP];
+ LWORD64 ai8_tex_bits_consumed[MAX_NUM_FRAME_IN_GOP];
+ WORD32 ai4_first_pass_qscale[MAX_NUM_FRAME_IN_GOP];
+ WORD32 ai4_q6_frame_offsets[MAX_NUM_FRAME_IN_GOP];
+ float f_gop_level_buffer_play_factor;
+ //WORD32 i4_flag_query_buffer_excess;
+ float f_hbd_avg_q_scale_gop_without_offset;
+ WORD32 i4_num_scene_cuts;
+ LWORD64 i8_minimum_gop_bits;
+ WORD32 i4_is_below_avg_rate_gop_frame;
+ LWORD64 i8_cur_gop_bit_consumption;
+ LWORD64 i8_actual_bits_allocated_to_gop;
+ //LWORD64 i8_buffer_play_bits;
+ LWORD64 i8_buffer_play_bits_allocated_to_gop;
+ WORD32 i4_peak_br_clip;
+ float f_buffer_play_complexity;
+ float f_avg_complexity_factor;
+ //float f_gop_level_complexity_sum;
+ LWORD64 i8_max_bit_for_gop;
+ LWORD64 i8_acc_gop_sad;
+} gop_level_stat_t;
+/*This should be exact order in which data should be dumped and read back from stat file*/
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+typedef enum
+{
+ S_FRAME_NUM = 0,
+ S_POC,
+ S_PIC_TYPE,
+ S_HEVCQP,
+ S_SCENE_TYPE,
+ S_QSCALE,
+ S_CL_SAD,
+ S_HEAD_BITS,
+ S_TEXT_BITS,
+ S_EST_TEX_BITS,
+ S_L1_ME_SAD,
+ S_L1_IPE_SAD,
+ MAX_PARAM_DUMP
+} DUMP_PARAM_TYPE_E;
+
+/*****************************************************************************/
+/* Extern Function Declarations */
+/*****************************************************************************/
+void init_frame_info(frame_info_t *frame_info);
+
+void multi_pass_dump_frame_level_stat_binary(
+ FILE *pf_stat_file,
+ frame_info_t *ps_finfo,
+ void *pv_lap_out,
+ WORD32 i4_size_of_lap_out,
+ void *pv_rc_lap_out,
+ WORD32 i4_size_of_rc_lap_out,
+ void *pv_sys_rc_api,
+ WORD32 i4_br_id_for_2pass);
+
+WORD32 multi_pass_extract_frame_data_binary(
+ FILE *pf_stat_file,
+ frame_info_t *ps_finfo,
+ void *pv_lap_out,
+ WORD32 i4_sizeof_lap_out,
+ void *pv_rc_lap_out,
+ WORD32 i4_sizeof_rc_lap_out,
+ LWORD64 i8_frame_offset,
+ WORD32 i4_flag,
+ void *pv_sys_rc_api,
+ WORD32 i4_br_id_for_2pass);
+
+#endif
diff --git a/encoder/rc_look_ahead_params.h b/encoder/rc_look_ahead_params.h
new file mode 100644
index 0000000..d80ef06
--- /dev/null
+++ b/encoder/rc_look_ahead_params.h
@@ -0,0 +1,146 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file rc_look_ahead_params.h
+*
+* \brief
+* TODO:
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+
+#ifndef _RC_LOOK_AHEAD_PARAMS_H_
+#define _RC_LOOK_AHEAD_PARAMS_H_
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+/*
+* Frame metrics
+*/
+typedef struct
+{
+ /* Frame variance. Spatial property */
+ LWORD64 i8_8x8_var_lum;
+
+ /* frame and histogram similarity */
+ WORD32 ai4_hsim[3];
+ WORD32 i4_fsim;
+
+} rc_picture_metrics_t;
+
+typedef struct
+{
+ /* common params for both lap_out and rc_lap_out */
+
+ WORD32 i4_rc_pic_type;
+ WORD32 i4_rc_poc;
+ WORD32 i4_rc_temporal_lyr_id;
+ WORD32 i4_rc_is_ref_pic;
+ WORD32 i4_rc_scene_type;
+ UWORD32 u4_rc_scene_num;
+ WORD32 i4_rc_display_num;
+ WORD32 i4_rc_quality_preset;
+ WORD32 i4_rc_first_field;
+
+ /* rc_lap_out specific params */
+
+ /**
+ * array of rc_lap_out_params_t pointer to access
+ * the picture metrics of future pictures in capture order till
+ * the look ahead frames
+ */
+ void *ps_rc_lap_out_next_encode;
+
+ WORD32 i4_next_pic_type;
+
+ WORD32 i4_is_I_only_scd;
+ WORD32 i4_is_non_I_scd;
+
+ LWORD64 i8_frame_satd_act_accum;
+ LWORD64 i8_est_I_pic_header_bits;
+
+ /* Num pels in frame considered while accumulating the above satd metric */
+ WORD32 i4_num_pels_in_frame_considered;
+ /* Field type i.e either bottom or top is convyed */
+ WORD32 i4_is_bottom_field;
+ /* Coarse ME accumulated cost for entire frame */
+ LWORD64 i8_frame_acc_coarse_me_cost;
+ /* Coarse ME accumulated sad for entire frame */
+ LWORD64 ai8_frame_acc_coarse_me_sad[52];
+ /* L1 intra SATD */
+ LWORD64 i8_pre_intra_satd;
+ /* L1 intra SATD */
+ LWORD64 ai8_pre_intra_sad[52];
+ /* L1 IPE sad */
+ LWORD64 i8_raw_pre_intra_sad;
+ /* Frame - level L1 ME sad */
+ LWORD64 i8_raw_l1_coarse_me_sad;
+ /** Frame - level L1 satd/act accum*/
+ LWORD64 i8_frame_satd_by_act_L1_accum;
+ /** Frame - level L1 satd/act accum*/
+ LWORD64 i8_satd_by_act_L1_accum_evaluated;
+ /* Frame satd/act accumulated for L0 predicted based on L1 satd and qp used for L0 processing */
+ LWORD64 i8_frm_satd_act_accum_L0_frm_L1;
+
+ /* Frames for which online/offline model is not valid */
+ WORD32 i4_is_model_valid;
+ /* Steady State Frame */
+ WORD32 i4_is_steady_state;
+
+ LWORD64 i8_est_text_bits;
+ LWORD64 i8_frame_num;
+
+ frame_info_t *ps_frame_info;
+ /* complexity metrics from LAP */
+ rc_picture_metrics_t s_pic_metrics;
+
+ WORD32 i4_is_cmplx_change_reset_model;
+ WORD32 i4_is_cmplx_change_reset_bits;
+ WORD32 i4_is_rc_model_needs_to_be_updated;
+ WORD32 i4_next_sc_i_in_rc_look_ahead;
+ WORD32 ai4_num_pic_type[MAX_PIC_TYPE];
+ WORD32 ai4_offsets[5];
+ WORD32 i4_offsets_set_flag;
+ WORD32 i4_complexity_bin;
+ WORD32 i4_ignore_for_rc_update;
+ WORD32 i4_L1_qp;
+ WORD32 i4_L0_qp;
+ WORD32 i4_enable_lookahead;
+ WORD32 i4_orig_rc_qp;
+ WORD32 i4_use_offline_model_2pass;
+ WORD32 i4_next_scene_type;
+ WORD32 i4_perc_dc_blks;
+
+ /* Used only in ix,vx versions */
+ LWORD64 i8_frame_acc_satd_cost;
+ WORD32 i4_l1_update_done;
+ WORD32 i4_rc_i_pic_lamda_offset;
+ float f_rc_pred_factor;
+
+} rc_lap_out_params_t;
+
+#endif
diff --git a/encoder/rc_rd_model.c b/encoder/rc_rd_model.c
new file mode 100644
index 0000000..a9165fb
--- /dev/null
+++ b/encoder/rc_rd_model.c
@@ -0,0 +1,1022 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/****************************************************************************/
+/* File Name : rc_rd_model.c */
+/* */
+/* Description : Implall the Functions to Model the */
+/* Rate Distortion Behaviour of the Codec over the Last */
+/* Few Frames. */
+/* */
+/* List of Functions : update_frame_rd_model */
+/* estimate_mpeg2_qp_for_resbits */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 21 06 2006 ittiam Initial Version */
+/****************************************************************************/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdarg.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+
+/* System include files */
+#include "ittiam_datatypes.h"
+#include "rc_common.h"
+#include "var_q_operator.h"
+#include "mem_req_and_acq.h"
+#include "rc_rd_model.h"
+#include "rc_rd_model_struct.h"
+
+#if !(RC_FIXED_POINT)
+
+#if NON_STEADSTATE_CODE
+WORD32 rc_rd_model_num_fill_use_free_memtab(
+ rc_rd_model_t **pps_rc_rd_model, itt_memtab_t *ps_memtab, ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static rc_rd_model_t s_rc_rd_model_temp;
+
+ /* Hack for al alloc, during which we dont have any state memory.
+ Dereferencing can cause issues */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_rc_rd_model) = &s_rc_rd_model_temp;
+
+ /*for src rate control state structure*/
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(
+ &ps_memtab[i4_mem_tab_idx], sizeof(rc_rd_model_t), MEM_TAB_ALIGNMENT, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void **)pps_rc_rd_model, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ return (i4_mem_tab_idx);
+}
+
+void init_frm_rc_rd_model(rc_rd_model_t *ps_rd_model, UWORD8 u1_max_frames_modelled)
+{
+ /*ps_rd_model = ps_rd_model + u1_pic_type;*/
+
+ ps_rd_model->u1_num_frms_in_model = 0;
+ ps_rd_model->u1_curr_frm_counter = 0;
+ ps_rd_model->u1_max_frms_to_model = u1_max_frames_modelled;
+ /*
+ ps_rd_model->u1_min_frames_for_quad_model = u1_min_frames_for_quad_model;
+ ps_rd_model->u1_min_frames_for_lin_model = u1_min_frames_for_lin_model;
+ */
+
+ ps_rd_model->model_coeff_a_quad = 0;
+ ps_rd_model->model_coeff_b_quad = 0;
+ ps_rd_model->model_coeff_c_quad = 0;
+
+ ps_rd_model->model_coeff_a_lin = 0;
+ ps_rd_model->model_coeff_b_lin = 0;
+ ps_rd_model->model_coeff_c_lin = 0;
+
+ ps_rd_model->model_coeff_a_lin_wo_int = 0;
+ ps_rd_model->model_coeff_b_lin_wo_int = 0;
+ ps_rd_model->model_coeff_c_lin_wo_int = 0;
+}
+
+void reset_frm_rc_rd_model(rc_rd_model_t *ps_rd_model)
+{
+ /*ps_rd_model = ps_rd_model + u1_pic_type;*/
+
+ ps_rd_model->u1_num_frms_in_model = 0;
+ ps_rd_model->u1_curr_frm_counter = 0;
+ ps_rd_model->model_coeff_a_quad = 0;
+ ps_rd_model->model_coeff_b_quad = 0;
+ ps_rd_model->model_coeff_c_quad = 0;
+
+ ps_rd_model->model_coeff_a_lin = 0;
+ ps_rd_model->model_coeff_b_lin = 0;
+ ps_rd_model->model_coeff_c_lin = 0;
+
+ ps_rd_model->model_coeff_a_lin_wo_int = 0;
+ ps_rd_model->model_coeff_b_lin_wo_int = 0;
+ ps_rd_model->model_coeff_c_lin_wo_int = 0;
+}
+#endif /* #if NON_STEADSTATE_CODE */
+
+#if ENABLE_QUAD_MODEL
+static UWORD8 find_model_coeffs(
+ UWORD32 *pi4_res_bits,
+ UWORD32 *pi4_sad_h264,
+ UWORD8 *pu1_num_skips,
+ UWORD8 *pui_avg_mpeg2_qp,
+ UWORD8 u1_num_frms,
+ UWORD8 u1_model_used,
+ WORD8 *pi1_frame_index,
+ model_coeff *pmc_model_coeff,
+ model_coeff *pmc_model_coeff_lin,
+ model_coeff *pmc_model_coeff_lin_wo_int,
+ rc_rd_model_t *ps_rd_model)
+{
+ UWORD32 i;
+ UWORD8 u1_num_frms_used = 0;
+ UWORD8 u1_frm_indx;
+
+ float sum_y = 0;
+ float sum_x_y = 0;
+ float sum_x2_y = 0;
+ float sum_x = 0;
+ float sum_x2 = 0;
+ float sum_x3 = 0;
+ float sum_x4 = 0;
+ float var_x2_y = 0;
+ float var_x_y = 0;
+ float var_x2_x = 0;
+ float var_x2_x2 = 0;
+ float var_x_x = 0;
+ float x0, y0;
+ float model_coeff_a, model_coeff_b, model_coeff_c, model_coeff_den;
+
+ for(i = 0; i < u1_num_frms; i++)
+ {
+ if(-1 == pi1_frame_index[i])
+ continue;
+
+ u1_frm_indx = (UWORD8)pi1_frame_index[i];
+
+ y0 = (float)(pi4_res_bits[u1_frm_indx]);
+ x0 = (float)(pi4_sad_h264[u1_frm_indx] / (float)pui_avg_mpeg2_qp[u1_frm_indx]);
+
+ sum_y += y0;
+ sum_x_y += x0 * y0;
+ sum_x2_y += x0 * x0 * y0;
+ sum_x += x0;
+ sum_x2 += x0 * x0;
+ sum_x3 += x0 * x0 * x0;
+ sum_x4 += x0 * x0 * x0 * x0;
+ u1_num_frms_used++;
+ }
+
+ sum_y /= u1_num_frms_used;
+ sum_x_y /= u1_num_frms_used;
+ sum_x2_y /= u1_num_frms_used;
+ sum_x /= u1_num_frms_used;
+ sum_x2 /= u1_num_frms_used;
+ sum_x3 /= u1_num_frms_used;
+ sum_x4 /= u1_num_frms_used;
+
+#if !QUAD
+ u1_model_used = LIN_MODEL;
+#endif
+
+ if((QUAD_MODEL == u1_model_used) && (u1_num_frms_used <= MIN_FRAMES_FOR_QUAD_MODEL))
+ {
+ u1_model_used = LIN_MODEL;
+ }
+
+ if(QUAD_MODEL == u1_model_used)
+ {
+ var_x2_y = sum_x2_y - sum_x2 * sum_y;
+ var_x_y = sum_x_y - sum_x * sum_y;
+ var_x2_x = sum_x3 - sum_x2 * sum_x;
+ var_x2_x2 = sum_x4 - sum_x2 * sum_x2;
+ var_x_x = sum_x2 - sum_x * sum_x;
+
+ model_coeff_den = (var_x2_x * var_x2_x - var_x2_x2 * var_x_x);
+
+ if(0 != model_coeff_den)
+ {
+ model_coeff_b = (var_x_y * var_x2_x - var_x2_y * var_x_x);
+ model_coeff_b /= model_coeff_den;
+
+ model_coeff_a = (var_x2_y * var_x2_x - var_x_y * var_x2_x2);
+ model_coeff_a /= model_coeff_den;
+
+ model_coeff_c = sum_y - (model_coeff_a * sum_x) - (model_coeff_b * sum_x2);
+ }
+
+ pmc_model_coeff[0] = model_coeff_b;
+ pmc_model_coeff[1] = model_coeff_a;
+ pmc_model_coeff[2] = model_coeff_c;
+ }
+
+ if(NULL != pmc_model_coeff_lin)
+ {
+ var_x_y = sum_x_y - sum_x * sum_y;
+ var_x_x = sum_x2 - sum_x * sum_x;
+
+ if(0 != var_x_x)
+ {
+ model_coeff_a = (var_x_y / var_x_x);
+ model_coeff_c = sum_y - (model_coeff_a * sum_x);
+ /*model_coeff_b = 0;*/
+ model_coeff_b = model_coeff_a;
+
+ pmc_model_coeff_lin[0] = model_coeff_b;
+ pmc_model_coeff_lin[1] = model_coeff_a;
+ pmc_model_coeff_lin[2] = model_coeff_c;
+ }
+ }
+
+ if(NULL != pmc_model_coeff_lin_wo_int)
+ {
+ UWORD8 u1_curr_frame_index;
+ UWORD8 u1_avgqp_prvfrm;
+ UWORD32 u4_prevfrm_bits, u4_prevfrm_sad;
+
+ u1_curr_frame_index = ps_rd_model->u1_curr_frm_counter;
+ if(0 == u1_curr_frame_index)
+ u1_curr_frame_index = (MAX_FRAMES_MODELLED - 1);
+ else
+ u1_curr_frame_index--;
+
+ u1_avgqp_prvfrm = ps_rd_model->pu1_avg_qp[u1_curr_frame_index];
+ u4_prevfrm_bits = ps_rd_model->pi4_res_bits[u1_curr_frame_index];
+ u4_prevfrm_sad = ps_rd_model->pi4_sad[u1_curr_frame_index];
+
+ if(0 != u4_prevfrm_sad)
+ model_coeff_a = (float)(u4_prevfrm_bits * u1_avgqp_prvfrm) / u4_prevfrm_sad;
+ else
+ model_coeff_a = 0;
+
+ model_coeff_b = 0;
+ model_coeff_c = 0;
+
+ pmc_model_coeff_lin_wo_int[0] = model_coeff_b;
+ pmc_model_coeff_lin_wo_int[1] = model_coeff_a;
+ pmc_model_coeff_lin_wo_int[2] = model_coeff_c;
+ }
+
+ return u1_model_used;
+}
+
+static WORD8 refine_set_of_points(
+ UWORD32 *pi4_res_bits,
+ UWORD32 *pi4_sad_h264,
+ UWORD8 *pu1_num_skips,
+ UWORD8 *pui_avg_mpeg2_qp,
+ UWORD8 u1_num_frms,
+ WORD8 *pi1_frame_index,
+ model_coeff *pmc_model_coeff,
+ float *pfl_avg_deviation)
+{
+ float fl_avg_deviation, fl_estimated_bits, fl_deviation, x_val;
+ UWORD8 u1_return_value = 1;
+ UWORD32 i;
+ UWORD8 u1_num_frms_used, u1_frm_indx;
+
+ u1_num_frms_used = 0;
+ fl_avg_deviation = 0;
+ for(i = 0; i < u1_num_frms; i++)
+ {
+ if(-1 == pi1_frame_index[i])
+ continue;
+
+ u1_frm_indx = (UWORD8)pi1_frame_index[i];
+ x_val = pi4_sad_h264[u1_frm_indx] / (float)pui_avg_mpeg2_qp[u1_frm_indx];
+
+ fl_estimated_bits = (pmc_model_coeff[0] * x_val * x_val) + (pmc_model_coeff[1] * x_val) +
+ (pmc_model_coeff[2]);
+
+ fl_deviation =
+ fabs(pi4_res_bits[u1_frm_indx] - fl_estimated_bits) / (float)pi4_res_bits[u1_frm_indx];
+ fl_deviation = fl_deviation * fl_deviation;
+ fl_avg_deviation += fl_deviation;
+ u1_num_frms_used++;
+ }
+
+ fl_avg_deviation /= u1_num_frms_used;
+ /*fl_avg_deviation = sqrt(fl_avg_deviation);*/
+ fl_avg_deviation = (fl_avg_deviation);
+
+ for(i = 0; i < u1_num_frms; i++)
+ {
+ if((-1 == pi1_frame_index[i]) && (i != 0))
+ continue;
+
+ u1_frm_indx = (UWORD8)pi1_frame_index[i];
+
+ x_val = pi4_sad_h264[u1_frm_indx] / (float)pui_avg_mpeg2_qp[u1_frm_indx];
+
+ fl_estimated_bits = (pmc_model_coeff[0] * x_val * x_val) + (pmc_model_coeff[1] * x_val) +
+ (pmc_model_coeff[2]);
+
+ fl_deviation =
+ fabs(pi4_res_bits[u1_frm_indx] - fl_estimated_bits) / (float)pi4_res_bits[u1_frm_indx];
+
+ fl_deviation = fl_deviation * fl_deviation;
+
+ if(fl_deviation > (fl_avg_deviation))
+ {
+ pi1_frame_index[i] = -1;
+ }
+ }
+
+ if(fl_avg_deviation > 0.0625)
+ u1_return_value = 0;
+ if(fl_avg_deviation < 0.0225)
+ u1_return_value = 2;
+
+ *pfl_avg_deviation = fl_avg_deviation;
+
+ return (u1_return_value);
+}
+static void calc_avg_sqr_dev_for_model(
+ UWORD32 *pi4_res_bits,
+ UWORD32 *pi4_sad_h264,
+ UWORD8 *pu1_num_skips,
+ UWORD8 *pui_avg_mpeg2_qp,
+ UWORD8 u1_num_frms,
+ WORD8 *pi1_frame_index,
+ model_coeff *pmc_model_coeff,
+ float *pfl_avg_deviation)
+{
+ float fl_avg_deviation, fl_estimated_bits, fl_deviation, x_val;
+ UWORD8 u1_return_value = 1;
+ UWORD32 i;
+ UWORD8 u1_num_frms_used, u1_frm_indx;
+
+ u1_num_frms_used = 0;
+ fl_avg_deviation = 0;
+ for(i = 0; i < u1_num_frms; i++)
+ {
+ if(-1 == pi1_frame_index[i])
+ continue;
+
+ u1_frm_indx = (UWORD8)pi1_frame_index[i];
+
+ u1_frm_indx = (UWORD8)i;
+ x_val = pi4_sad_h264[u1_frm_indx] / (float)pui_avg_mpeg2_qp[u1_frm_indx];
+
+ fl_estimated_bits = (pmc_model_coeff[1] * x_val) + (pmc_model_coeff[2]);
+
+ fl_deviation =
+ fabs(pi4_res_bits[u1_frm_indx] - fl_estimated_bits) / (float)pi4_res_bits[u1_frm_indx];
+ fl_deviation = fl_deviation * fl_deviation;
+ fl_avg_deviation += fl_deviation;
+ u1_num_frms_used++;
+ }
+
+ fl_avg_deviation /= u1_num_frms_used;
+ /*fl_avg_deviation = sqrt(fl_avg_deviation);*/
+ fl_avg_deviation = (fl_avg_deviation);
+
+ *pfl_avg_deviation = fl_avg_deviation;
+ /*return (u1_return_value);*/
+}
+static void update_frame_rd_model(rc_rd_model_t *ps_rd_model)
+{
+ WORD8 pi1_frame_index[MAX_FRAMES_MODELLED], pi1_frame_index_initial[MAX_FRAMES_MODELLED];
+
+ UWORD8 u1_num_skips_temp;
+ UWORD8 u1_avg_mpeg2_qp_temp, u1_min_mpeg2_qp, u1_max_mpeg2_qp;
+ UWORD8 u1_num_frms_input, u1_num_active_frames, u1_reject_frame;
+ UWORD32 u4_num_skips;
+
+ UWORD8 u1_min2_mpeg2_qp, u1_max2_mpeg2_qp;
+ UWORD8 u1_min_qp_frame_indx, u1_max_qp_frame_indx;
+ UWORD8 pu1_num_frames[MPEG2_QP_ELEM];
+ model_coeff model_coeff_array[3], model_coeff_array_lin[3], model_coeff_array_lin_wo_int[3];
+ UWORD32 i;
+ UWORD8 u1_curr_frame_index;
+ UWORD8 u1_quad_model_valid, u1_lin_model_valid;
+
+ float fl_quad_avg_dev, fl_lin_avg_dev;
+
+ UWORD8 u1_check_model;
+
+ /*ps_rd_model += u1_pic_type;*/
+
+ u1_curr_frame_index = ps_rd_model->u1_curr_frm_counter;
+
+ ps_rd_model->u1_model_used = QUAD_MODEL;
+
+ if(0 == u1_curr_frame_index)
+ u1_curr_frame_index = (MAX_FRAMES_MODELLED - 1);
+ else
+ u1_curr_frame_index--;
+
+ /************************************************************************/
+ /* Rearrange data to be fed into a Linear Regression Module */
+ /* Module finds a,b,c such that */
+ /* y = ax + bx^2 + c */
+ /************************************************************************/
+ u4_num_skips = 0;
+ u1_num_frms_input = 0;
+ memset(pu1_num_frames, 0, MPEG2_QP_ELEM);
+ memset(pi1_frame_index, -1, MAX_FRAMES_MODELLED);
+ u1_min_mpeg2_qp = MAX_MPEG2_QP;
+ u1_max_mpeg2_qp = 0;
+
+ u1_num_active_frames = ps_rd_model->u1_num_frms_in_model;
+ if(u1_num_active_frames > MAX_ACTIVE_FRAMES)
+ u1_num_active_frames = MAX_ACTIVE_FRAMES;
+
+ /************************************************************************/
+ /* Choose the set of Points to be used for MSE fit of Quadratic model */
+ /* Points chosen are spread across the Qp range. Max of 2 points are */
+ /* chosen for a Qp. */
+ /************************************************************************/
+ for(i = 0; i < u1_num_active_frames; i++)
+ {
+ u1_reject_frame = 0;
+ u1_num_skips_temp = ps_rd_model->pu1_num_skips[u1_curr_frame_index];
+ u1_avg_mpeg2_qp_temp = ps_rd_model->pu1_avg_qp[u1_curr_frame_index];
+
+ if((0 == u4_num_skips) && (0 != u1_num_skips_temp))
+ u1_reject_frame = 1;
+ if((1 == u4_num_skips) && (u1_num_skips_temp > 1))
+ u1_reject_frame = 1;
+ if(pu1_num_frames[u1_avg_mpeg2_qp_temp] >= 2)
+ u1_reject_frame = 1;
+
+ if(0 == i)
+ u1_reject_frame = 0;
+
+ if(0 == u1_reject_frame)
+ {
+ pi1_frame_index[u1_num_frms_input] = (WORD8)u1_curr_frame_index;
+ pu1_num_frames[u1_avg_mpeg2_qp_temp] += 1;
+
+ if(u1_min_mpeg2_qp > u1_avg_mpeg2_qp_temp)
+ u1_min_mpeg2_qp = u1_avg_mpeg2_qp_temp;
+ if(u1_max_mpeg2_qp < u1_avg_mpeg2_qp_temp)
+ u1_max_mpeg2_qp = u1_avg_mpeg2_qp_temp;
+
+ u1_num_frms_input++;
+ }
+
+ if(0 == u1_curr_frame_index)
+ u1_curr_frame_index = (MAX_FRAMES_MODELLED - 1);
+ else
+ u1_curr_frame_index--;
+ }
+
+ /************************************************************************/
+ /* Add Pivot Points to the Data set to be used for finding Quadratic */
+ /* Model Coeffs. These will help in constraining the shape of Quadratic*/
+ /* to adapt too much to the Local deviations. */
+ /************************************************************************/
+ u1_min2_mpeg2_qp = u1_min_mpeg2_qp;
+ u1_max2_mpeg2_qp = u1_max_mpeg2_qp;
+ u1_min_qp_frame_indx = INVALID_FRAME_INDEX;
+ u1_max_qp_frame_indx = INVALID_FRAME_INDEX;
+
+ /* Loop runnning over the Stored Frame Level Data
+ to find frames of MinQp and MaxQp */
+ for(; i < ps_rd_model->u1_num_frms_in_model; i++)
+ {
+ u1_num_skips_temp = ps_rd_model->pu1_num_skips[u1_curr_frame_index];
+ u1_avg_mpeg2_qp_temp = ps_rd_model->pu1_avg_qp[u1_curr_frame_index];
+
+ if(((0 == u4_num_skips) && (0 != u1_num_skips_temp)) ||
+ ((1 == u4_num_skips) && (u1_num_skips_temp > 1)))
+ continue;
+
+ if(u1_min2_mpeg2_qp > u1_avg_mpeg2_qp_temp)
+ {
+ u1_min2_mpeg2_qp = u1_avg_mpeg2_qp_temp;
+ u1_min_qp_frame_indx = u1_curr_frame_index;
+ }
+ if(u1_max2_mpeg2_qp < u1_avg_mpeg2_qp_temp)
+ {
+ u1_max2_mpeg2_qp = u1_avg_mpeg2_qp_temp;
+ u1_max_qp_frame_indx = u1_curr_frame_index;
+ }
+ if(0 == u1_curr_frame_index)
+ u1_curr_frame_index = (MAX_FRAMES_MODELLED - 1);
+ else
+ u1_curr_frame_index--;
+ }
+
+ /* Add the Chosen Points to the regression data set */
+ if(INVALID_FRAME_INDEX != u1_min_qp_frame_indx)
+ {
+ pi1_frame_index[u1_num_frms_input] = (WORD8)u1_min_qp_frame_indx;
+ u1_num_frms_input++;
+ }
+ if(INVALID_FRAME_INDEX != u1_max_qp_frame_indx)
+ {
+ pi1_frame_index[u1_num_frms_input] = (WORD8)u1_max_qp_frame_indx;
+ u1_num_frms_input++;
+ }
+ memcpy(pi1_frame_index_initial, pi1_frame_index, MAX_FRAMES_MODELLED);
+
+ if(QUAD_MODEL == ps_rd_model->u1_model_used)
+ {
+ if(u1_num_frms_input < (MIN_FRAMES_FOR_QUAD_MODEL))
+ ps_rd_model->u1_model_used = LIN_MODEL;
+ if((WORD32)u1_max_mpeg2_qp < ((WORD32)(21 * u1_min_mpeg2_qp) >> 4))
+ ps_rd_model->u1_model_used = LIN_MODEL;
+ }
+
+ if(LIN_MODEL == ps_rd_model->u1_model_used)
+ {
+ if(u1_num_frms_input < MIN_FRAMES_FOR_LIN_MODEL)
+ ps_rd_model->u1_model_used = PREV_FRAME_MODEL;
+ if((WORD32)u1_max_mpeg2_qp < ((WORD32)(19 * u1_min_mpeg2_qp) >> 4))
+ ps_rd_model->u1_model_used = PREV_FRAME_MODEL;
+ }
+
+ /***** Call the Module to Return the Coeffs for the Fed Data *****/
+ ps_rd_model->u1_model_used = find_model_coeffs(
+ ps_rd_model->pi4_res_bits,
+ ps_rd_model->pi4_sad,
+ ps_rd_model->pu1_num_skips,
+ ps_rd_model->pu1_avg_qp,
+ u1_num_frms_input,
+ ps_rd_model->u1_model_used,
+ pi1_frame_index,
+ model_coeff_array,
+ model_coeff_array_lin,
+ model_coeff_array_lin_wo_int,
+ ps_rd_model);
+
+ if((model_coeff_array_lin[2] > 0) || (model_coeff_array_lin[0] < 0))
+ u1_lin_model_valid = 0;
+ else
+ {
+ u1_lin_model_valid = 1;
+ /* lin deviation calculation */
+ calc_avg_sqr_dev_for_model(
+ ps_rd_model->pi4_res_bits,
+ ps_rd_model->pi4_sad,
+ ps_rd_model->pu1_num_skips,
+ ps_rd_model->pu1_avg_qp,
+ u1_num_frms_input,
+ pi1_frame_index_initial,
+ model_coeff_array_lin,
+ &fl_lin_avg_dev);
+ }
+
+ if(QUAD_MODEL == ps_rd_model->u1_model_used)
+ {
+ u1_check_model = refine_set_of_points(
+ ps_rd_model->pi4_res_bits,
+ ps_rd_model->pi4_sad,
+ ps_rd_model->pu1_num_skips,
+ ps_rd_model->pu1_avg_qp,
+ u1_num_frms_input,
+ pi1_frame_index,
+ model_coeff_array,
+ &fl_quad_avg_dev);
+
+ if(2 == u1_check_model)
+ {
+ ps_rd_model->u1_model_used = QUAD_MODEL;
+ }
+ else
+ {
+ /*******************************************************************/
+ /* Make sure that some of the Pivot Points are used in the Refined */
+ /* data set. 1. Previous Frame */
+ /*******************************************************************/
+ /*pi1_frame_index[0] = ps_rd_model->u1_curr_frm_counter;*/
+
+ ps_rd_model->u1_model_used = find_model_coeffs(
+ ps_rd_model->pi4_res_bits,
+ ps_rd_model->pi4_sad,
+ ps_rd_model->pu1_num_skips,
+ ps_rd_model->pu1_avg_qp,
+ u1_num_frms_input,
+ ps_rd_model->u1_model_used,
+ pi1_frame_index,
+ model_coeff_array,
+ NULL,
+ NULL,
+ ps_rd_model);
+
+ u1_check_model = refine_set_of_points(
+ ps_rd_model->pi4_res_bits,
+ ps_rd_model->pi4_sad,
+ ps_rd_model->pu1_num_skips,
+ ps_rd_model->pu1_avg_qp,
+ u1_num_frms_input,
+ pi1_frame_index,
+ model_coeff_array,
+ &fl_quad_avg_dev);
+
+ if((0 == u1_check_model))
+ {
+#if RC_MODEL_USED_BUG_FIX
+ if((fl_lin_avg_dev < fl_quad_avg_dev) && (1 == u1_lin_model_valid))
+#endif
+ ps_rd_model->u1_model_used = LIN_MODEL;
+ }
+ }
+ }
+
+ if(QUAD_MODEL == ps_rd_model->u1_model_used)
+ {
+ /*min_res_bits = model_coeff_c -
+ ((model_coeff_a * model_coeff_a) / (4 * model_coeff_b));*/
+
+ if(model_coeff_array[0] < 0)
+ ps_rd_model->u1_model_used = LIN_MODEL;
+
+ /*if ((model_coeff_a * model_coeff_b) > 0)
+ u1_model_used = LIN_MODEL;*/
+ }
+ if(LIN_MODEL == ps_rd_model->u1_model_used)
+ {
+ if((model_coeff_array_lin[2] > 0) || (model_coeff_array_lin[0] < 0))
+ ps_rd_model->u1_model_used = PREV_FRAME_MODEL;
+ }
+
+#if RC_MODEL_USED_BUG_FIX
+ /* Another threshold of .25 on deviation i.e. deviation greater than 25% */
+ if((QUAD_MODEL == ps_rd_model->u1_model_used) && (fl_quad_avg_dev > .25))
+ ps_rd_model->u1_model_used = PREV_FRAME_MODEL;
+
+ if((LIN_MODEL == ps_rd_model->u1_model_used) && (fl_lin_avg_dev > .25))
+ ps_rd_model->u1_model_used = PREV_FRAME_MODEL;
+#endif /* #if RC_MODEL_USED_BUG_FIX */
+
+ ps_rd_model->model_coeff_b_quad = model_coeff_array[0];
+ ps_rd_model->model_coeff_a_quad = model_coeff_array[1];
+ ps_rd_model->model_coeff_c_quad = model_coeff_array[2];
+
+ ps_rd_model->model_coeff_b_lin = model_coeff_array_lin[0];
+ ps_rd_model->model_coeff_a_lin = model_coeff_array_lin[1];
+ ps_rd_model->model_coeff_c_lin = model_coeff_array_lin[2];
+
+ ps_rd_model->model_coeff_b_lin_wo_int = model_coeff_array_lin_wo_int[0];
+ ps_rd_model->model_coeff_a_lin_wo_int = model_coeff_array_lin_wo_int[1];
+ ps_rd_model->model_coeff_c_lin_wo_int = model_coeff_array_lin_wo_int[2];
+
+ /*ps_rd_model->u1_model_used = PREV_FRAME_MODEL;*/
+}
+#endif /* ENABLE_QUAD_MODEL */
+
+UWORD32 estimate_bits_for_qp(rc_rd_model_t *ps_rd_model, UWORD32 u4_estimated_sad, UWORD8 u1_avg_qp)
+{
+ float fl_num_bits;
+ /*ps_rd_model += u1_curr_pic_type;*/
+
+ {
+ fl_num_bits =
+ ps_rd_model->model_coeff_a_lin_wo_int * ((float)(u4_estimated_sad / u1_avg_qp));
+ }
+
+ return ((UWORD32)fl_num_bits);
+}
+
+UWORD8 find_qp_for_target_bits(
+ rc_rd_model_t *ps_rd_model,
+ UWORD32 u4_target_res_bits,
+ UWORD32 u4_estimated_sad,
+ UWORD8 u1_min_qp,
+ UWORD8 u1_max_qp)
+{
+ UWORD8 u1_qp;
+ float x_value, f_qp;
+ /*ps_rd_model += u1_curr_pic_type;*/
+#if ENABLE_QUAD_MODEL
+ if(QUAD_MODEL == ps_rd_model->u1_model_used)
+ {
+ float det;
+ det = (ps_rd_model->model_coeff_a_quad * ps_rd_model->model_coeff_a_quad) -
+ (4 * (ps_rd_model->model_coeff_b_quad) *
+ (ps_rd_model->model_coeff_c_quad - u4_target_res_bits));
+
+ if(det > 0)
+ {
+ x_value = sqrt(det);
+
+ x_value =
+ (x_value - ps_rd_model->model_coeff_a_quad) / (2 * ps_rd_model->model_coeff_b_quad);
+ }
+ else
+ ps_rd_model->u1_model_used = PREV_FRAME_MODEL;
+ }
+
+ if(LIN_MODEL == ps_rd_model->u1_model_used)
+ {
+ x_value = ((float)u4_target_res_bits - ps_rd_model->model_coeff_c_lin) /
+ (ps_rd_model->model_coeff_b_lin);
+ }
+#else
+ ps_rd_model->u1_model_used = PREV_FRAME_MODEL;
+#endif
+
+ if(PREV_FRAME_MODEL == ps_rd_model->u1_model_used)
+ {
+ x_value = (float)u4_target_res_bits / ps_rd_model->model_coeff_a_lin_wo_int;
+ }
+
+ if(0 != x_value)
+ f_qp = u4_estimated_sad / x_value;
+ else
+ f_qp = 255;
+
+ if(f_qp > 255)
+ f_qp = 255;
+
+ /* Truncating the QP to the Max and Min Qp values possible */
+ if(f_qp < u1_min_qp)
+ f_qp = u1_min_qp;
+ if(f_qp > u1_max_qp)
+ f_qp = u1_max_qp;
+
+ u1_qp = (UWORD8)(f_qp + 0.5);
+
+ return u1_qp;
+}
+
+void add_frame_to_rd_model(
+ rc_rd_model_t *ps_rd_model,
+ UWORD32 i4_res_bits,
+ UWORD8 u1_avg_mp2qp,
+ UWORD32 i4_sad_h264,
+ UWORD8 u1_num_skips)
+{
+ UWORD8 u1_curr_frame_index;
+ /*ps_rd_model += u1_curr_pic_type;*/
+ u1_curr_frame_index = ps_rd_model->u1_curr_frm_counter;
+ /*** Insert the Present Frame Data into the RD Model State Memory ***/
+ ps_rd_model->pi4_res_bits[u1_curr_frame_index] = i4_res_bits;
+ ps_rd_model->pi4_sad[u1_curr_frame_index] = i4_sad_h264;
+ ps_rd_model->pu1_num_skips[u1_curr_frame_index] = u1_num_skips;
+ ps_rd_model->pu1_avg_qp[u1_curr_frame_index] = u1_avg_mp2qp;
+
+ ps_rd_model->u1_curr_frm_counter++;
+ if(MAX_FRAMES_MODELLED == ps_rd_model->u1_curr_frm_counter)
+ ps_rd_model->u1_curr_frm_counter = 0;
+
+ if(ps_rd_model->u1_num_frms_in_model < ps_rd_model->u1_max_frms_to_model)
+ {
+ ps_rd_model->u1_num_frms_in_model++;
+ }
+ update_frame_rd_model(ps_rd_model);
+}
+
+WORD32 calc_per_frm_bits(
+ rc_rd_model_t *ps_rd_model, /* array of model structs */
+ UWORD16 *pu2_num_pics_of_a_pic_type, /* N1, N2,...Nk */
+ UWORD8 *
+ pu1_update_pic_type_model, /* flag which tells whether or not to update model coefficients of a particular pic-type */
+ UWORD8 u1_num_pic_types, /* value of k */
+ UWORD32 *
+ pu4_num_skip_of_a_pic_type, /* the number of skips of that pic-type. It "may" be used to update the model coefficients at a later point. Right now it is not being used at all. */
+ UWORD8 u1_base_pic_type, /* base pic type index wrt which alpha & beta are calculated */
+ float *pfl_gamma, /* gamma_i = beta_i / alpha_i */
+ float *pfl_eta,
+ UWORD8
+ u1_curr_pic_type, /* the current pic-type for which the targetted bits need to be computed */
+ UWORD32
+ u4_bits_for_sub_gop, /* the number of bits to be consumed for the remaining part of sub-gop */
+ UWORD32 u4_curr_estimated_sad,
+ UWORD8 *pu1_curr_pic_type_qp) /* output of this function */
+{
+ WORD32 i4_per_frm_bits_Ti;
+ UWORD8 u1_i;
+ rc_rd_model_t *ps_rd_model_of_pic_type;
+
+ /* first part of this function updates all the model coefficients */
+ /*for all the pic-types */
+ {
+ for(u1_i = 0; u1_i < u1_num_pic_types; u1_i++)
+ {
+ if((0 != pu2_num_pics_of_a_pic_type[u1_i]) && (1 == pu1_update_pic_type_model[u1_i]))
+ {
+ /* ps_rd_model_of_pic_type = ps_rd_model + u1_i; */
+
+ update_frame_rd_model(&ps_rd_model[u1_i]);
+ }
+ }
+ }
+
+ /* The second part of this function deals with solving the
+ equation using all the pic-types models */
+
+ {
+ UWORD8 u1_combined_model_used;
+
+ /* first choose the model to be used */
+ u1_combined_model_used = QUAD_MODEL;
+
+ for(u1_i = 0; u1_i < u1_num_pic_types; u1_i++)
+ {
+ ps_rd_model_of_pic_type = ps_rd_model + u1_i;
+
+ if((0 != pu2_num_pics_of_a_pic_type[u1_i]) &&
+ (QUAD_MODEL != ps_rd_model_of_pic_type->u1_model_used))
+ {
+ u1_combined_model_used = LIN_MODEL;
+ break;
+ }
+ }
+
+ if(u1_combined_model_used == LIN_MODEL)
+ {
+ for(u1_i = 0; u1_i < u1_num_pic_types; u1_i++)
+ {
+ ps_rd_model_of_pic_type = ps_rd_model + u1_i;
+
+ if((0 != pu2_num_pics_of_a_pic_type[u1_i]) &&
+ (QUAD_MODEL != ps_rd_model_of_pic_type->u1_model_used) &&
+ (LIN_MODEL != ps_rd_model_of_pic_type->u1_model_used))
+ {
+ u1_combined_model_used = PREV_FRAME_MODEL;
+ break;
+ }
+ }
+ }
+
+ /* solve the equation for the */
+ {
+ model_coeff eff_A;
+ model_coeff eff_B;
+ model_coeff eff_C;
+ float fl_determinant;
+ float fl_sad_by_qp_base;
+ float fl_sad_by_qp_curr_frm;
+ float fl_qp_curr_frm;
+ float fl_bits_for_curr_frm;
+
+ /* If the combined chosen model is quad model */
+ if(QUAD_MODEL == u1_combined_model_used)
+ {
+ eff_A = 0.0;
+ eff_B = 0.0;
+ eff_C = 0.0;
+ for(u1_i = 0; u1_i < u1_num_pic_types; u1_i++)
+ {
+ ps_rd_model_of_pic_type = ps_rd_model + u1_i;
+
+ eff_A +=
+ ((pfl_eta[u1_i] + pu2_num_pics_of_a_pic_type[u1_i] - 1) *
+ ps_rd_model_of_pic_type->model_coeff_a_quad * pfl_gamma[u1_i]);
+ eff_B +=
+ ((pfl_eta[u1_i] * pfl_eta[u1_i] + pu2_num_pics_of_a_pic_type[u1_i] - 1) *
+ ps_rd_model_of_pic_type->model_coeff_b_quad * pfl_gamma[u1_i] *
+ pfl_gamma[u1_i]);
+ eff_C +=
+ (pu2_num_pics_of_a_pic_type[u1_i] *
+ ps_rd_model_of_pic_type->model_coeff_c_quad);
+ }
+ eff_C -= u4_bits_for_sub_gop;
+
+ fl_determinant = eff_A * eff_A - 4 * eff_B * eff_C;
+
+ if(fl_determinant < 0)
+ {
+ u1_combined_model_used =
+ PREV_FRAME_MODEL; /* TO BE replaced by LIN_MODEL later */
+ }
+ else
+ {
+ fl_determinant = sqrt(fl_determinant);
+
+ fl_sad_by_qp_base = fl_determinant - eff_A;
+ fl_sad_by_qp_base = fl_sad_by_qp_base / (2 * eff_B);
+
+ fl_sad_by_qp_curr_frm =
+ fl_sad_by_qp_base * pfl_gamma[u1_curr_pic_type] * pfl_eta[u1_curr_pic_type];
+
+ ps_rd_model_of_pic_type = ps_rd_model + u1_curr_pic_type;
+
+ fl_bits_for_curr_frm =
+ ps_rd_model_of_pic_type->model_coeff_a_quad * fl_sad_by_qp_curr_frm +
+ ps_rd_model_of_pic_type->model_coeff_b_quad * fl_sad_by_qp_curr_frm *
+ fl_sad_by_qp_curr_frm +
+ ps_rd_model_of_pic_type->model_coeff_c_quad;
+ }
+ }
+
+ /* If the combined chosen model is linear model with an intercept */
+ if(LIN_MODEL == u1_combined_model_used)
+ {
+ eff_A = 0.0;
+ eff_B = 0.0;
+ eff_C = 0.0;
+ for(u1_i = 0; u1_i < u1_num_pic_types; u1_i++)
+ {
+ ps_rd_model_of_pic_type = ps_rd_model + u1_i;
+
+ eff_A +=
+ ((pfl_eta[u1_i] + pu2_num_pics_of_a_pic_type[u1_i] - 1) *
+ ps_rd_model_of_pic_type->model_coeff_a_lin * pfl_gamma[u1_i]);
+
+ eff_C +=
+ (pu2_num_pics_of_a_pic_type[u1_i] *
+ ps_rd_model_of_pic_type->model_coeff_c_lin);
+ }
+ eff_C -= u4_bits_for_sub_gop;
+
+ fl_determinant = (-(eff_C / eff_A));
+
+ if((fl_determinant) <= 0)
+ {
+ u1_combined_model_used = PREV_FRAME_MODEL;
+ }
+ else
+ {
+ fl_sad_by_qp_base = fl_determinant;
+
+ fl_sad_by_qp_curr_frm =
+ fl_sad_by_qp_base * pfl_gamma[u1_curr_pic_type] * pfl_eta[u1_curr_pic_type];
+
+ ps_rd_model_of_pic_type = ps_rd_model + u1_curr_pic_type;
+
+ fl_bits_for_curr_frm =
+ ps_rd_model_of_pic_type->model_coeff_a_lin * fl_sad_by_qp_curr_frm +
+ ps_rd_model_of_pic_type->model_coeff_c_lin;
+ }
+ }
+
+ /* If the combined chosen model is linear model without an intercept */
+ if(PREV_FRAME_MODEL == u1_combined_model_used)
+ {
+ eff_A = 0.0;
+ eff_B = 0.0;
+ eff_C = 0.0;
+ for(u1_i = 0; u1_i < u1_num_pic_types; u1_i++)
+ {
+ ps_rd_model_of_pic_type = ps_rd_model + u1_i;
+
+ eff_A +=
+ ((pfl_eta[u1_i] + pu2_num_pics_of_a_pic_type[u1_i] - 1) *
+ ps_rd_model_of_pic_type->model_coeff_a_lin_wo_int * pfl_gamma[u1_i]);
+ }
+
+ fl_sad_by_qp_base = u4_bits_for_sub_gop / eff_A;
+
+ fl_sad_by_qp_curr_frm =
+ fl_sad_by_qp_base * pfl_gamma[u1_curr_pic_type] * pfl_eta[u1_curr_pic_type];
+
+ ps_rd_model_of_pic_type = ps_rd_model + u1_curr_pic_type;
+
+ fl_bits_for_curr_frm =
+ ps_rd_model_of_pic_type->model_coeff_a_lin_wo_int * fl_sad_by_qp_curr_frm;
+ }
+
+ /* store the model that was finally used to calculate Qp.
+ This is so that the same model is used in further calculations for this picture. */
+ ps_rd_model_of_pic_type = ps_rd_model + u1_curr_pic_type;
+ ps_rd_model_of_pic_type->u1_model_used = u1_combined_model_used;
+
+ i4_per_frm_bits_Ti = (WORD32)(fl_bits_for_curr_frm + 0.5);
+
+ if(fl_sad_by_qp_curr_frm > 0)
+ fl_qp_curr_frm = (float)u4_curr_estimated_sad / fl_sad_by_qp_curr_frm;
+ else
+ fl_qp_curr_frm = 255;
+
+ if(fl_qp_curr_frm > 255)
+ fl_qp_curr_frm = 255;
+
+ *pu1_curr_pic_type_qp = (fl_qp_curr_frm + 0.5);
+ }
+ }
+ return (i4_per_frm_bits_Ti);
+}
+
+model_coeff get_linear_coefficient(rc_rd_model_t *ps_rd_model)
+{
+ /*UWORD32 linear_coeff:
+ linear_coeff = ps_rd_model->model_coeff_a_lin_wo_int;*/
+ return (ps_rd_model->model_coeff_a_lin_wo_int);
+}
+#endif /* !(RC_FIXED_POINT) */
+WORD32 rc_rd_model_dummy_for_avoiding_warnings(
+ rc_rd_model_t **pps_rc_rd_model, itt_memtab_t *ps_memtab, ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static rc_rd_model_t s_rc_rd_model_temp;
+
+ /* Hack for al alloc, during which we dont have any state memory.
+ Dereferencing can cause issues */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_rc_rd_model) = &s_rc_rd_model_temp;
+
+ /*for src rate control state structure*/
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(
+ &ps_memtab[i4_mem_tab_idx], sizeof(rc_rd_model_t), MEM_TAB_ALIGNMENT, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void **)pps_rc_rd_model, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ return (i4_mem_tab_idx);
+}
diff --git a/encoder/rc_rd_model.h b/encoder/rc_rd_model.h
new file mode 100644
index 0000000..956d715
--- /dev/null
+++ b/encoder/rc_rd_model.h
@@ -0,0 +1,87 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file rc_rd_model.h
+*
+* \brief
+* This file contains all the necessary declarations for
+* Rate Distortion related functions
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+#ifndef RC_RD_MODEL
+#define RC_RD_MODEL
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define RC_FIXED_POINT 1
+#define MAX_FRAMES_MODELLED 16
+
+#if !RC_FIXED_POINT
+typedef float model_coeff;
+#else
+typedef number_t model_coeff;
+#endif
+typedef struct rc_rd_model_t *rc_rd_model_handle;
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+WORD32 rc_rd_model_num_fill_use_free_memtab(
+ rc_rd_model_handle *pps_rc_rd_model, itt_memtab_t *ps_memtab, ITT_FUNC_TYPE_E e_func_type);
+/* Interface Functions */
+/* Initialise the rate distortion model */
+void init_frm_rc_rd_model(rc_rd_model_handle ps_rd_model, UWORD8 u1_max_frames_modelled);
+
+/* Reset the rate distortion model */
+void reset_frm_rc_rd_model(rc_rd_model_handle ps_rd_model);
+
+/* Returns the Qp to be used for the given bits and SAD */
+WORD32 find_qp_for_target_bits(
+ rc_rd_model_handle ps_rd_model,
+ UWORD32 u4_target_res_bits,
+ UWORD32 u4_estimated_sad,
+ WORD32 i4_max_qp_q6,
+ WORD32 i4_min_qp_q6);
+/* Updates the frame level statistics after encoding a frame */
+void add_frame_to_rd_model(
+ rc_rd_model_handle ps_rd_model,
+ UWORD32 i4_res_bits,
+ WORD32 i4_avg_mp2qp_q6,
+ LWORD64 i8_sad_h264,
+ UWORD8 u1_num_skips);
+
+UWORD32 estimate_bits_for_qp(
+ rc_rd_model_handle ps_rd_model, UWORD32 u4_estimated_sad, WORD32 i4_avg_qp_q6);
+/* Get the Linear model coefficient */
+model_coeff get_linear_coefficient(rc_rd_model_handle ps_rd_model);
+
+void set_linear_coefficient(rc_rd_model_handle ps_rd_model, number_t model_coeff_a_lin_wo_int);
+
+WORD32 is_model_valid(rc_rd_model_handle ps_rd_model);
+
+#endif
diff --git a/encoder/rc_rd_model_fix.c b/encoder/rc_rd_model_fix.c
new file mode 100644
index 0000000..7e7e9ca
--- /dev/null
+++ b/encoder/rc_rd_model_fix.c
@@ -0,0 +1,1505 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/****************************************************************************/
+/* File Name : rc_rd_model.c */
+/* */
+/* Description : Implall the Functions to Model the */
+/* Rate Distortion Behaviour of the Codec over the Last */
+/* Few Frames. */
+/* */
+/* List of Functions : update_frame_rd_model */
+/* estimate_mpeg2_qp_for_resbits */
+/* */
+/* Issues / Problems : None */
+/* */
+/* Revision History : */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 21 06 2006 ittiam Initial Version */
+/****************************************************************************/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <math.h>
+
+/* User include files */
+#include "ittiam_datatypes.h"
+#include "var_q_operator.h"
+#include "rc_common.h"
+#include "rc_cntrl_param.h"
+#include "mem_req_and_acq.h"
+#include "rc_rd_model.h"
+#include "rc_rd_model_struct.h"
+
+#if RC_FIXED_POINT
+WORD32 rc_rd_model_num_fill_use_free_memtab(
+ rc_rd_model_t **pps_rc_rd_model, itt_memtab_t *ps_memtab, ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static rc_rd_model_t s_rc_rd_model_temp;
+
+ /* Hack for al alloc, during which we dont have any state memory.
+ Dereferencing can cause issues */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_rc_rd_model) = &s_rc_rd_model_temp;
+
+ /*for src rate control state structure*/
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(
+ &ps_memtab[i4_mem_tab_idx], sizeof(rc_rd_model_t), MEM_TAB_ALIGNMENT, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void **)pps_rc_rd_model, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ return (i4_mem_tab_idx);
+}
+/******************************************************************************
+ Function Name : init_frm_rc_rd_model
+ Description :
+ Arguments : ps_rd_model
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+void init_frm_rc_rd_model(rc_rd_model_t *ps_rd_model, UWORD8 u1_max_frames_modelled)
+{
+ /* ps_rd_model = ps_rd_model + u1_pic_type; */
+
+ ps_rd_model->u1_num_frms_in_model = 0;
+ ps_rd_model->u1_curr_frm_counter = 0;
+ ps_rd_model->u1_max_frms_to_model = u1_max_frames_modelled;
+
+ ps_rd_model->model_coeff_a_quad.sm = 0;
+ ps_rd_model->model_coeff_b_quad.sm = 0;
+ ps_rd_model->model_coeff_c_quad.sm = 0;
+
+ ps_rd_model->model_coeff_a_lin.sm = 0;
+ ps_rd_model->model_coeff_b_lin.sm = 0;
+ ps_rd_model->model_coeff_c_lin.sm = 0;
+
+ ps_rd_model->model_coeff_a_lin_wo_int.sm = 0;
+ ps_rd_model->model_coeff_b_lin_wo_int.sm = 0;
+ ps_rd_model->model_coeff_c_lin_wo_int.sm = 0;
+
+ ps_rd_model->model_coeff_a_quad.e = 0;
+ ps_rd_model->model_coeff_b_quad.e = 0;
+ ps_rd_model->model_coeff_c_quad.e = 0;
+
+ ps_rd_model->model_coeff_a_lin.e = 0;
+ ps_rd_model->model_coeff_b_lin.e = 0;
+ ps_rd_model->model_coeff_c_lin.e = 0;
+
+ ps_rd_model->model_coeff_a_lin_wo_int.e = 0;
+ ps_rd_model->model_coeff_b_lin_wo_int.e = 0;
+ ps_rd_model->model_coeff_c_lin_wo_int.e = 0;
+}
+/******************************************************************************
+ Function Name : reset_frm_rc_rd_model
+ Description :
+ Arguments : ps_rd_model
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+void reset_frm_rc_rd_model(rc_rd_model_t *ps_rd_model)
+{
+ ps_rd_model->u1_num_frms_in_model = 0;
+ ps_rd_model->u1_curr_frm_counter = 0;
+
+ ps_rd_model->model_coeff_a_quad.sm = 0;
+ ps_rd_model->model_coeff_b_quad.sm = 0;
+ ps_rd_model->model_coeff_c_quad.sm = 0;
+
+ ps_rd_model->model_coeff_a_lin.sm = 0;
+ ps_rd_model->model_coeff_b_lin.sm = 0;
+ ps_rd_model->model_coeff_c_lin.sm = 0;
+
+ ps_rd_model->model_coeff_a_lin_wo_int.sm = 0;
+ ps_rd_model->model_coeff_b_lin_wo_int.sm = 0;
+ ps_rd_model->model_coeff_c_lin_wo_int.sm = 0;
+
+ ps_rd_model->model_coeff_a_quad.e = 0;
+ ps_rd_model->model_coeff_b_quad.e = 0;
+ ps_rd_model->model_coeff_c_quad.e = 0;
+
+ ps_rd_model->model_coeff_a_lin.e = 0;
+ ps_rd_model->model_coeff_b_lin.e = 0;
+ ps_rd_model->model_coeff_c_lin.e = 0;
+
+ ps_rd_model->model_coeff_a_lin_wo_int.e = 0;
+ ps_rd_model->model_coeff_b_lin_wo_int.e = 0;
+ ps_rd_model->model_coeff_c_lin_wo_int.e = 0;
+}
+
+#if ENABLE_QUAD_MODEL
+/******************************************************************************
+ Function Name : find_model_coeffs
+ Description :
+ Arguments :
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+static UWORD8 find_model_coeffs(
+ UWORD32 *pi4_res_bits,
+ LWORD64 *pi8_sad_h264,
+ WORD32 *pi4_avg_mpeg2_qp_q6,
+ UWORD8 u1_num_frms,
+ UWORD8 u1_model_used,
+ WORD8 *pi1_frame_index,
+ number_t *pmc_model_coeff,
+ number_t *pmc_model_coeff_lin,
+ number_t *pmc_model_coeff_lin_wo_int,
+ rc_rd_model_t *ps_rd_model)
+{
+ UWORD32 i;
+ UWORD8 u1_num_frms_used = 0;
+ UWORD8 u1_frm_indx;
+
+ number_t sum_y;
+ number_t sum_x_y;
+ number_t sum_x2_y;
+ number_t sum_x;
+ number_t sum_x2;
+ number_t sum_x3;
+ number_t sum_x4;
+ number_t var_x2_y;
+ number_t var_x_y;
+ number_t var_x2_x;
+ number_t var_x2_x2;
+ number_t var_x_x;
+ number_t x0, y0;
+ number_t s_res_bits, s_sad_h264, s_avg_mpeg2_qp;
+ number_t temp, temp1;
+
+ number_t model_coeff_a, model_coeff_b, model_coeff_c, model_coeff_den;
+
+ number_t s_num_frms_used;
+
+ /* initilising */
+ model_coeff_a.sm = 0;
+ model_coeff_a.e = 0;
+ model_coeff_b.sm = 0;
+ model_coeff_b.e = 0;
+ model_coeff_c.sm = 0;
+ model_coeff_c.e = 0;
+
+ sum_y.sm = 0;
+ sum_x_y.sm = 0;
+ sum_x2_y.sm = 0;
+ sum_x.sm = 0;
+ sum_x2.sm = 0;
+ sum_x3.sm = 0;
+ sum_x4.sm = 0;
+ var_x2_y.sm = 0;
+ var_x_y.sm = 0;
+ var_x2_x.sm = 0;
+ var_x2_x2.sm = 0;
+ var_x_x.sm = 0;
+
+ sum_y.e = 0;
+ sum_x_y.e = 0;
+ sum_x2_y.e = 0;
+ sum_x.e = 0;
+ sum_x2.e = 0;
+ sum_x3.e = 0;
+ sum_x4.e = 0;
+ var_x2_y.e = 0;
+ var_x_y.e = 0;
+ var_x2_x.e = 0;
+ var_x2_x2.e = 0;
+ var_x_x.e = 0;
+
+ for(i = 0; i < u1_num_frms; i++)
+ {
+ LWORD64 i8_local_sad_sm = 0;
+ WORD32 i4_local_e = 0;
+ if(-1 == pi1_frame_index[i])
+ continue;
+
+ u1_frm_indx = (UWORD8)pi1_frame_index[i];
+
+ s_res_bits.sm = pi4_res_bits[u1_frm_indx];
+ s_res_bits.e = 0;
+
+ /*s_sad_h264.sm = pi8_sad_h264[u1_frm_indx];
+ s_sad_h264.e = 0;*/
+ i8_local_sad_sm = pi8_sad_h264[u1_frm_indx];
+
+ while(i8_local_sad_sm > 0x7FFFFFFF)
+ {
+ i8_local_sad_sm = i8_local_sad_sm / 2;
+ i4_local_e++;
+ }
+ SET_VARQ_FRM_FIXQ(((WORD32)i8_local_sad_sm), s_sad_h264, -i4_local_e);
+
+ /*fract_quant*/
+ SET_VARQ_FRM_FIXQ(pi4_avg_mpeg2_qp_q6[u1_frm_indx], s_avg_mpeg2_qp, QSCALE_Q_FAC);
+
+ y0 = s_res_bits;
+ /*x0 = (float) (pi4_sad_h264[u1_frm_indx] /
+ (float)pui_avg_mpeg2_qp[u1_frm_indx]); */
+ div32_var_q(s_sad_h264, s_avg_mpeg2_qp, &x0);
+
+ /*
+ sum_y += y0;
+ sum_x_y += x0 * y0;
+ sum_x2_y += x0 * x0 * y0;
+ sum_x += x0;
+ sum_x2 += x0 * x0;
+ sum_x3 += x0 * x0 * x0;
+ sum_x4 += x0 * x0 * x0 * x0;
+ */
+ /* sum_y += y0; */
+ add32_var_q(sum_y, y0, &sum_y);
+ /* sum_x_y += x0 * y0; */
+ mult32_var_q(x0, y0, &temp);
+ add32_var_q(sum_x_y, temp, &sum_x_y);
+
+ /* sum_x2_y += x0 * x0 * y0; */
+ mult32_var_q(x0, temp, &temp);
+ add32_var_q(sum_x2_y, temp, &sum_x2_y);
+
+ /* sum_x += x0; */
+ add32_var_q(x0, sum_x, &sum_x);
+
+ /* sum_x2 += x0 * x0; */
+ mult32_var_q(x0, x0, &temp);
+ add32_var_q(temp, sum_x2, &sum_x2);
+
+ /* sum_x3 += x0 * x0 * x0; */
+ mult32_var_q(x0, temp, &temp);
+ add32_var_q(temp, sum_x3, &sum_x3);
+
+ /* sum_x4 += x0 * x0 * x0 * x0; */
+ mult32_var_q(x0, temp, &temp);
+ add32_var_q(temp, sum_x4, &sum_x4);
+
+ u1_num_frms_used++;
+ }
+
+ s_num_frms_used.sm = u1_num_frms_used;
+ s_num_frms_used.e = 0;
+
+ /* sum_y /= u1_num_frms_used; */
+ div32_var_q(sum_y, s_num_frms_used, &sum_y);
+ /* sum_x_y /= u1_num_frms_used; */
+ div32_var_q(sum_x_y, s_num_frms_used, &sum_x_y);
+ /* sum_x2_y /= u1_num_frms_used; */
+ div32_var_q(sum_x2_y, s_num_frms_used, &sum_x2_y);
+
+ /* sum_x /= u1_num_frms_used; */
+ div32_var_q(sum_x, s_num_frms_used, &sum_x);
+
+ /* sum_x2 /= u1_num_frms_used; */
+ div32_var_q(sum_x2, s_num_frms_used, &sum_x2);
+
+ /* sum_x3 /= u1_num_frms_used; */
+ div32_var_q(sum_x3, s_num_frms_used, &sum_x3);
+
+ /* sum_x4 /= u1_num_frms_used; */
+ div32_var_q(sum_x4, s_num_frms_used, &sum_x4);
+
+#if !QUAD
+ u1_model_used = LIN_MODEL;
+#endif
+
+ if((QUAD_MODEL == u1_model_used) && (u1_num_frms_used <= MIN_FRAMES_FOR_QUAD_MODEL))
+ {
+ u1_model_used = LIN_MODEL;
+ }
+
+ if(QUAD_MODEL == u1_model_used)
+ {
+ /* var_x2_y = sum_x2_y - sum_x2 * sum_y; */
+ mult32_var_q(sum_x2, sum_y, &temp);
+ sub32_var_q(sum_x2_y, temp, &var_x2_y);
+
+ /* var_x_y = sum_x_y - sum_x * sum_y; */
+ mult32_var_q(sum_x, sum_y, &temp);
+ sub32_var_q(sum_x_y, temp, &var_x_y);
+
+ /* var_x2_x = sum_x3 - sum_x2 * sum_x; */
+ mult32_var_q(sum_x2, sum_x, &temp);
+ sub32_var_q(sum_x3, temp, &var_x2_x);
+
+ /* var_x2_x2 = sum_x4 - sum_x2 * sum_x2; */
+ mult32_var_q(sum_x2, sum_x2, &temp);
+ sub32_var_q(sum_x4, temp, &var_x2_x2);
+
+ /* var_x_x = sum_x2 - sum_x * sum_x; */
+ mult32_var_q(sum_x, sum_x, &temp);
+ sub32_var_q(sum_x2, temp, &var_x_x);
+
+ /* model_coeff_den = (var_x2_x * var_x2_x - var_x2_x2 * var_x_x); */
+ mult32_var_q(var_x2_x, var_x2_x, &temp);
+ mult32_var_q(var_x2_x2, var_x_x, &temp1);
+ sub32_var_q(temp, temp1, &model_coeff_den);
+
+ if(0 != model_coeff_den.sm)
+ {
+ /* model_coeff_b = (var_x_y * var_x2_x - var_x2_y * var_x_x); */
+ mult32_var_q(var_x_y, var_x2_x, &temp);
+ mult32_var_q(var_x2_y, var_x_x, &temp1);
+ sub32_var_q(temp, temp1, &model_coeff_b);
+
+ /* model_coeff_b /= model_coeff_den; */
+ div32_var_q(model_coeff_b, model_coeff_den, &model_coeff_b);
+
+ /* model_coeff_a = (var_x2_y * var_x2_x - var_x_y * var_x2_x2); */
+ mult32_var_q(var_x2_y, var_x2_x, &temp);
+ mult32_var_q(var_x_y, var_x2_x2, &temp1);
+ sub32_var_q(temp, temp1, &model_coeff_a);
+
+ /* model_coeff_a /= model_coeff_den; */
+ div32_var_q(model_coeff_a, model_coeff_den, &model_coeff_a);
+
+ /*model_coeff_c = sum_y - (model_coeff_a * sum_x) -
+ (model_coeff_b * sum_x2); */
+ mult32_var_q(model_coeff_a, sum_x, &temp);
+ mult32_var_q(model_coeff_b, sum_x2, &temp1);
+ sub32_var_q(sum_y, temp, &model_coeff_c);
+ sub32_var_q(model_coeff_c, temp1, &model_coeff_c);
+ /* till here */
+ }
+
+ pmc_model_coeff[0] = model_coeff_b;
+ /* pmc_model_coeff[0] = (float)(model_coeff_b.sm /pow(2,model_coeff_b.e)); */
+ pmc_model_coeff[1] = model_coeff_a;
+ /* pmc_model_coeff[1] = (float)(model_coeff_a.sm /pow(2,model_coeff_a.e)); */
+ pmc_model_coeff[2] = model_coeff_c;
+ /* pmc_model_coeff[2] = (float)(model_coeff_c.sm /pow(2,model_coeff_c.e)); */
+ }
+
+ if(NULL != pmc_model_coeff_lin)
+ {
+ /* var_x_y = sum_x_y - sum_x * sum_y; */
+ mult32_var_q(sum_x, sum_y, &temp);
+ sub32_var_q(sum_x_y, temp, &var_x_y);
+
+ /* var_x_x = sum_x2 - sum_x * sum_x; */
+ mult32_var_q(sum_x, sum_x, &temp);
+ sub32_var_q(sum_x2, temp, &var_x_x);
+
+ if((0 != var_x_x.sm) && (u1_num_frms > 1))
+ {
+ /* model_coeff_b = (var_x_y / var_x_x); */
+ div32_var_q(var_x_y, var_x_x, &model_coeff_b);
+
+ /* model_coeff_c = sum_y - (model_coeff_b * sum_x); */
+ mult32_var_q(model_coeff_b, sum_x, &temp);
+ sub32_var_q(sum_y, temp, &model_coeff_c);
+
+ model_coeff_a = model_coeff_b;
+
+ pmc_model_coeff_lin[0] = model_coeff_b;
+ /* pmc_model_coeff_lin[0] = (float)(model_coeff_b.sm /pow(2,model_coeff_b.e)); */
+
+ pmc_model_coeff_lin[1] = model_coeff_a;
+ /* pmc_model_coeff_lin[1] = (float)(model_coeff_a.sm /pow(2,model_coeff_a.e)); */
+
+ pmc_model_coeff_lin[2] = model_coeff_c;
+ /* pmc_model_coeff_lin[2] = (float)(model_coeff_c.sm /pow(2,model_coeff_c.e)); */
+ }
+ }
+
+ /* TO DO : FLOAT_TO_FIX */
+ if(NULL != pmc_model_coeff_lin_wo_int)
+ {
+ UWORD8 u1_curr_frame_index;
+ /* UWORD8 u1_avgqp_prvfrm; */
+ number_t s_avgqp_prvfrm;
+ /* UWORD32 u4_prevfrm_bits, u4_prevfrm_sad; */
+ number_t s_prevfrm_bits, s_prevfrm_sad;
+ WORD32 i4_local_e = 0;
+ LWORD64 i8_local_sad_sm = 0;
+ u1_curr_frame_index = ps_rd_model->u1_curr_frm_counter;
+ if(0 == u1_curr_frame_index)
+ u1_curr_frame_index = (UWORD8)(ps_rd_model->u1_max_frms_to_model - 1);
+ else
+ u1_curr_frame_index--;
+
+ /* u1_avgqp_prvfrm = ps_rd_model->pu1_avg_mp2qp[u1_curr_frame_index]; */
+ /*fract_quant*/
+ SET_VARQ_FRM_FIXQ(
+ ps_rd_model->ai4_avg_qp_q6[u1_curr_frame_index], s_avgqp_prvfrm, QSCALE_Q_FAC);
+
+ /* u4_prevfrm_bits = ps_rd_model->pi4_res_bits[u1_curr_frame_index]; */
+ s_prevfrm_bits.sm = ps_rd_model->pi4_res_bits[u1_curr_frame_index];
+ s_prevfrm_bits.e = 0;
+
+ /* u4_prevfrm_sad = ps_rd_model->pi4_sad_h264[u1_curr_frame_index]; */
+ /*s_prevfrm_sad.sm = ps_rd_model->pi8_sad[u1_curr_frame_index];
+ s_prevfrm_sad.e = 0;*/
+ i8_local_sad_sm = ps_rd_model->pi8_sad[u1_curr_frame_index];
+ while(i8_local_sad_sm > 0x7FFFFFFF)
+ {
+ i8_local_sad_sm = i8_local_sad_sm / 2;
+ i4_local_e++;
+ }
+ SET_VARQ_FRM_FIXQ(((WORD32)i8_local_sad_sm), s_prevfrm_sad, -i4_local_e);
+
+ if(0 != s_prevfrm_sad.sm)
+ {
+ /* model_coeff_a = (float)(u4_prevfrm_bits * u1_avgqp_prvfrm) / u4_prevfrm_sad; */
+ mult32_var_q(s_prevfrm_bits, s_avgqp_prvfrm, &model_coeff_a);
+ div32_var_q(model_coeff_a, s_prevfrm_sad, &model_coeff_a);
+ }
+ else
+ {
+ model_coeff_a.sm = 0;
+ model_coeff_a.e = 0;
+ }
+
+ model_coeff_b.sm = 0;
+ model_coeff_b.e = 0;
+ model_coeff_c.sm = 0;
+ model_coeff_c.e = 0;
+
+ pmc_model_coeff_lin_wo_int[0] = model_coeff_b;
+ pmc_model_coeff_lin_wo_int[1] = model_coeff_a;
+ pmc_model_coeff_lin_wo_int[2] = model_coeff_c;
+ }
+ /* end of "TO DO : FLOAT_TO_FIX" */
+
+ return u1_model_used;
+}
+
+/******************************************************************************
+ Function Name : refine_set_of_points
+ Description :
+ Arguments :
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+static WORD8 refine_set_of_points(
+ UWORD32 *pi4_res_bits,
+ LWORD64 *pi8_sad_h264,
+ WORD32 *pi4_avg_mpeg2_qp_q6,
+ UWORD8 u1_num_frms,
+ WORD8 *pi1_frame_index,
+ number_t *ps_model_coeff,
+ number_t *ps_avg_deviation)
+{
+ /* float fl_avg_deviation, fl_estimated_bits, fl_deviation, x_val; */
+ number_t s_avg_deviation, s_estimated_bits, s_deviation, x_val;
+ /* number_t ps_model_coeff[3]; */
+ number_t s_sad_h264, s_avg_mpeg2_qp, s_res_bits;
+ number_t temp, temp1;
+ UWORD8 u1_return_value = 1;
+ UWORD32 i;
+ UWORD8 u1_num_frms_used, u1_frm_indx;
+ number_t s_num_frms_used;
+
+ /*
+ convert_float_to_fix(pmc_model_coeff[0],&ps_model_coeff[0]);
+ convert_float_to_fix(pmc_model_coeff[1],&ps_model_coeff[1]);
+ convert_float_to_fix(pmc_model_coeff[2],&ps_model_coeff[2]);
+ */
+
+ u1_num_frms_used = 0;
+ /* fl_avg_deviation = 0; */
+ s_avg_deviation.sm = 0;
+ s_avg_deviation.e = 0;
+
+ for(i = 0; i < u1_num_frms; i++)
+ {
+ LWORD64 i8_local_sad_sm = 0;
+ WORD32 i4_local_e = 0;
+ if(-1 == pi1_frame_index[i])
+ continue;
+
+ u1_frm_indx = (UWORD8)pi1_frame_index[i];
+ /*x_val = pi4_sad_h264[u1_frm_indx] /
+ (float) pui_avg_mpeg2_qp[u1_frm_indx]; */
+ /* s_sad_h264.sm = pi8_sad_h264[u1_frm_indx];
+ s_sad_h264.e = 0;*/
+
+ i8_local_sad_sm = pi8_sad_h264[u1_frm_indx];
+ while(i8_local_sad_sm > 0x7FFFFFFF)
+ {
+ i8_local_sad_sm = i8_local_sad_sm / 2;
+ i4_local_e++;
+ }
+ SET_VARQ_FRM_FIXQ(((WORD32)i8_local_sad_sm), s_sad_h264, -i4_local_e);
+
+ /*fract_quant*/
+ SET_VARQ_FRM_FIXQ(pi4_avg_mpeg2_qp_q6[u1_frm_indx], s_avg_mpeg2_qp, QSCALE_Q_FAC);
+
+ div32_var_q(s_sad_h264, s_avg_mpeg2_qp, &x_val);
+
+ /*
+ fl_estimated_bits = (pmc_model_coeff[0] * x_val * x_val ) +
+ (pmc_model_coeff[1] * x_val) +
+ (pmc_model_coeff[2]);
+ */
+ mult32_var_q(x_val, x_val, &temp);
+ mult32_var_q(temp, ps_model_coeff[0], &temp);
+ mult32_var_q(x_val, ps_model_coeff[1], &temp1);
+ add32_var_q(temp, temp1, &s_estimated_bits);
+ add32_var_q(s_estimated_bits, ps_model_coeff[2], &s_estimated_bits);
+
+ /*
+ fl_deviation = fabs(pi4_res_bits[u1_frm_indx] - fl_estimated_bits) /
+ (float) pi4_res_bits[u1_frm_indx];
+ */
+ s_res_bits.sm = pi4_res_bits[u1_frm_indx];
+ s_res_bits.e = 0;
+ sub32_var_q(s_res_bits, s_estimated_bits, &temp);
+ temp.sm = (temp.sm > 0) ? temp.sm : (-temp.sm);
+ div32_var_q(temp, s_res_bits, &s_deviation);
+
+ /* fl_deviation = fl_deviation * fl_deviation; */
+ mult32_var_q(s_deviation, s_deviation, &s_deviation);
+
+ /* fl_avg_deviation += fl_deviation;*/
+ add32_var_q(s_avg_deviation, s_deviation, &s_avg_deviation);
+
+ u1_num_frms_used++;
+ }
+
+ /* fl_avg_deviation /= u1_num_frms_used; */
+ s_num_frms_used.sm = u1_num_frms_used;
+ s_num_frms_used.e = 0;
+ div32_var_q(s_avg_deviation, s_num_frms_used, &s_avg_deviation);
+
+ /* fl_avg_deviation = sqrt(fl_avg_deviation); */
+ /* fl_avg_deviation = (fl_avg_deviation); */
+
+ for(i = 0; i < u1_num_frms; i++)
+ {
+ LWORD64 i8_local_sad_sm = 0;
+ WORD32 i4_local_e = 0;
+ if ((-1 == pi1_frame_index[i]) /*&&
+ (i != 0)*/)
+ continue;
+
+ u1_frm_indx = (UWORD8)pi1_frame_index[i];
+
+ /*
+ x_val = pi4_sad_h264[u1_frm_indx] /
+ (float) pui_avg_mpeg2_qp[u1_frm_indx];
+ */
+
+ /* s_sad_h264.sm = pi8_sad_h264[u1_frm_indx];
+ s_sad_h264.e = 0;*/
+
+ i8_local_sad_sm = pi8_sad_h264[u1_frm_indx];
+ while(i8_local_sad_sm > 0x7FFFFFFF)
+ {
+ i8_local_sad_sm = i8_local_sad_sm / 2;
+ i4_local_e++;
+ }
+ SET_VARQ_FRM_FIXQ(((WORD32)i8_local_sad_sm), s_sad_h264, -i4_local_e);
+
+ /*fract_quant*/
+ SET_VARQ_FRM_FIXQ(pi4_avg_mpeg2_qp_q6[u1_frm_indx], s_avg_mpeg2_qp, QSCALE_Q_FAC);
+
+ div32_var_q(s_sad_h264, s_avg_mpeg2_qp, &x_val);
+
+ /*
+ fl_estimated_bits = (pmc_model_coeff[0] * x_val * x_val ) +
+ (pmc_model_coeff[1] * x_val) +
+ (pmc_model_coeff[2]);
+ */
+ mult32_var_q(x_val, x_val, &temp);
+ mult32_var_q(temp, ps_model_coeff[0], &temp);
+ mult32_var_q(x_val, ps_model_coeff[1], &temp1);
+ add32_var_q(temp, temp1, &s_estimated_bits);
+ add32_var_q(s_estimated_bits, ps_model_coeff[2], &s_estimated_bits);
+
+ /*
+ fl_deviation = fabs(pi4_res_bits[u1_frm_indx] - fl_estimated_bits) /
+ (float) pi4_res_bits[u1_frm_indx];
+ */
+ s_res_bits.sm = pi4_res_bits[u1_frm_indx];
+ s_res_bits.e = 0;
+ sub32_var_q(s_res_bits, s_estimated_bits, &temp);
+ temp.sm = (temp.sm > 0) ? temp.sm : (-temp.sm);
+ div32_var_q(temp, s_res_bits, &s_deviation);
+
+ /* to remove the sqrt function */
+ /*fl_deviation = fl_deviation * fl_deviation; */
+ mult32_var_q(s_deviation, s_deviation, &s_deviation);
+
+ /*
+ if (fl_deviation > (fl_avg_deviation))
+ {
+ pi1_frame_index[i] = -1;
+ }
+ */
+ sub32_var_q(s_deviation, s_avg_deviation, &temp);
+ if(temp.sm > 0)
+ {
+ pi1_frame_index[i] = -1;
+ }
+ }
+
+ {
+ number_t up_thr, lo_thr;
+
+ /*
+ if (fl_avg_deviation > 0.0625)
+ u1_return_value = 0;
+ */
+ up_thr.sm = UP_THR_SM;
+ up_thr.e = UP_THR_E;
+ sub32_var_q(s_avg_deviation, up_thr, &temp);
+ if(temp.sm > 0)
+ {
+ u1_return_value = 0;
+ }
+
+ /*
+ if (fl_avg_deviation < 0.0225)
+ u1_return_value = 2;
+ */
+ lo_thr.sm = LO_THR_SM;
+ lo_thr.e = LO_THR_E;
+ sub32_var_q(s_avg_deviation, lo_thr, &temp);
+ if(temp.sm < 0)
+ {
+ u1_return_value = 2;
+ }
+ }
+ *ps_avg_deviation = s_avg_deviation;
+ return (u1_return_value);
+}
+/******************************************************************************
+ Function Name : calc_avg_sqr_dev_for_model
+ Description :
+ Arguments :
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+/* TO DO : FLOAT_TO_FIX */
+static void calc_avg_sqr_dev_for_model(
+ UWORD32 *pi4_res_bits,
+ LWORD64 *pi8_sad_h264,
+ WORD32 *pi4_avg_mpeg2_qp_q6,
+ UWORD8 u1_num_frms,
+ WORD8 *pi1_frame_index,
+ number_t *ps_model_coeff,
+ number_t *ps_avg_deviation)
+{
+ /* float fl_avg_deviation, fl_estimated_bits, fl_deviation, x_val; */
+ number_t s_avg_deviation, s_estimated_bits, s_deviation, x_val;
+ /* UWORD8 u1_return_value = 1; */
+ UWORD32 i;
+ UWORD8 u1_num_frms_used, u1_frm_indx;
+
+ number_t s_sad_h264;
+ number_t s_avg_mpeg2_qp;
+ number_t s_res_bits;
+ number_t temp;
+ number_t s_num_frms_used;
+
+ u1_num_frms_used = 0;
+ /* fl_avg_deviation = 0; */
+ s_deviation.sm = 0;
+ s_deviation.e = 0;
+
+ s_avg_deviation.sm = 0;
+ s_avg_deviation.e = 0;
+
+ for(i = 0; i < u1_num_frms; i++)
+ {
+ LWORD64 i8_local_sad_sm;
+ WORD32 i4_local_e = 0;
+ if(-1 == pi1_frame_index[i])
+ continue;
+
+ u1_frm_indx = (UWORD8)pi1_frame_index[i];
+
+ u1_frm_indx = (UWORD8)i;
+ /*
+ x_val = pi4_sad_h264[u1_frm_indx] /
+ (float) pui_avg_mpeg2_qp[u1_frm_indx];
+ */
+ /* s_sad_h264.sm = pi8_sad_h264[u1_frm_indx];
+ s_sad_h264.e = 0;*/
+ i8_local_sad_sm = pi8_sad_h264[u1_frm_indx];
+ while(i8_local_sad_sm > 0x7FFFFFFF)
+ {
+ i8_local_sad_sm = i8_local_sad_sm / 2;
+ i4_local_e++;
+ }
+ SET_VARQ_FRM_FIXQ(((WORD32)i8_local_sad_sm), s_sad_h264, -i4_local_e);
+ /*fract_quant*/
+ SET_VARQ_FRM_FIXQ(pi4_avg_mpeg2_qp_q6[u1_frm_indx], s_avg_mpeg2_qp, QSCALE_Q_FAC);
+
+ div32_var_q(s_sad_h264, s_avg_mpeg2_qp, &x_val);
+
+ /*fl_estimated_bits = (pmc_model_coeff[1] * x_val) +
+ (pmc_model_coeff[2]); */
+ mult32_var_q(x_val, ps_model_coeff[1], &s_estimated_bits);
+ add32_var_q(s_estimated_bits, ps_model_coeff[2], &s_estimated_bits);
+
+ /*fl_deviation = fabs(pi4_res_bits[u1_frm_indx] - fl_estimated_bits) /
+ (float) pi4_res_bits[u1_frm_indx]; */
+ s_res_bits.sm = pi4_res_bits[u1_frm_indx];
+ s_res_bits.e = 0;
+ sub32_var_q(s_res_bits, s_estimated_bits, &temp);
+ temp.sm = (temp.sm > 0) ? temp.sm : (-temp.sm);
+ div32_var_q(temp, s_res_bits, &s_deviation);
+
+ /* fl_deviation = fl_deviation * fl_deviation; */
+ mult32_var_q(s_deviation, s_deviation, &s_deviation);
+
+ /* fl_avg_deviation += fl_deviation; */
+ add32_var_q(s_avg_deviation, s_deviation, &s_avg_deviation);
+
+ u1_num_frms_used++;
+ }
+
+ /* fl_avg_deviation /= u1_num_frms_used; */
+ s_num_frms_used.sm = u1_num_frms_used;
+ s_num_frms_used.e = 0;
+ div32_var_q(s_avg_deviation, s_num_frms_used, &s_avg_deviation);
+ *ps_avg_deviation = s_avg_deviation;
+}
+/* end of "TO DO : FLOAT_TO_FIX" */
+/******************************************************************************
+ Function Name : is_qp_available
+ Description :
+ Arguments : ps_rd_model
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+static WORD32 is_qp_available(
+ rc_rd_model_t *ps_rd_model, UWORD8 u1_curr_frame_index, WORD32 i4_num_frames_to_check)
+{
+ WORD32 i;
+ /*fract_quant*/
+ WORD32 i4_qp = ps_rd_model->ai4_avg_qp_q6[u1_curr_frame_index];
+ WORD32 i4_num_frms = 0;
+
+ for(i = 0; i < i4_num_frames_to_check; i++)
+ {
+ u1_curr_frame_index++;
+ if(ps_rd_model->u1_max_frms_to_model == u1_curr_frame_index)
+ u1_curr_frame_index = 0;
+ /*fract_quant*/
+ if(ps_rd_model->ai4_avg_qp_q6[u1_curr_frame_index] == i4_qp)
+ i4_num_frms++;
+ }
+ if(i4_num_frms >= 2)
+ return (1);
+ else
+ return (0);
+}
+/****************************************************************************/
+/* */
+/* Function Name : example_of_a_function */
+/* */
+/* Description : This function illustrates the use of C coding standards.*/
+/* switch/case, if, for, block comments have been shown */
+/* here. */
+/* Inputs : <What inputs does the function take?> */
+/* Globals : <Does it use any global variables?> */
+/* Processing : <Describe how the function operates - include algorithm */
+/* description> */
+/* Outputs : <What does the function produce?> */
+/* Returns : <What does the function return?> */
+/* */
+/* Issues : <List any issues or problems with this function> */
+/* */
+/* Revision History: */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes made) */
+/* 13 07 2002 Ittiam Draft */
+/* */
+/****************************************************************************/
+static void update_frame_rd_model(rc_rd_model_t *ps_rd_model)
+{
+ WORD8 pi1_frame_index[MAX_FRAMES_MODELLED];
+ WORD8 pi1_frame_index_initial[MAX_FRAMES_MODELLED];
+ UWORD32 u4_num_skips;
+
+ UWORD8 u1_num_skips_temp;
+ /*UWORD8 u1_avg_mpeg2_qp_temp, u1_min_mpeg2_qp, u1_max_mpeg2_qp; */
+ /*WORD32 i4_avg_mpeg2_qp_temp, i4_min_mpeg2_qp, i4_max_mpeg2_qp;*/
+ WORD32 i4_avg_mpeg2_qp_temp_q6, i4_min_mpeg2_qp_q6, i4_max_mpeg2_qp_q6;
+ UWORD8 u1_num_frms_input, u1_num_active_frames, u1_reject_frame;
+
+ /* UWORD8 u1_min2_mpeg2_qp, u1_max2_mpeg2_qp; */
+ /* WORD32 i4_min2_mpeg2_qp, i4_max2_mpeg2_qp;*/
+ WORD32 i4_min2_mpeg2_qp_q6, i4_max2_mpeg2_qp_q6;
+ UWORD8 u1_min_qp_frame_indx, u1_max_qp_frame_indx;
+
+ number_t model_coeff_array[3], model_coeff_array_lin[3];
+ number_t model_coeff_array_lin_wo_int[3];
+ WORD32 i;
+ UWORD8 u1_curr_frame_index;
+
+#if RC_MODEL_USED_BUG_FIX
+ UWORD8 u1_lin_model_valid;
+#endif
+
+ number_t s_quad_avg_sqr_dev, s_lin_avg_sqr_dev;
+
+ UWORD8 u1_check_model;
+
+ model_coeff_array[0].sm = 0;
+ model_coeff_array[0].e = 0;
+ model_coeff_array[1].sm = 0;
+ model_coeff_array[1].e = 0;
+ model_coeff_array[2].sm = 0;
+ model_coeff_array[2].e = 0;
+
+ model_coeff_array_lin[0].sm = 0;
+ model_coeff_array_lin[0].e = 0;
+ model_coeff_array_lin[1].sm = 0;
+ model_coeff_array_lin[1].e = 0;
+ model_coeff_array_lin[2].sm = 0;
+ model_coeff_array_lin[2].e = 0;
+
+ model_coeff_array_lin_wo_int[0].sm = 0;
+ model_coeff_array_lin_wo_int[0].e = 0;
+ model_coeff_array_lin_wo_int[1].sm = 0;
+ model_coeff_array_lin_wo_int[1].e = 0;
+ model_coeff_array_lin_wo_int[2].sm = 0;
+ model_coeff_array_lin_wo_int[2].e = 0;
+
+ /* ps_rd_model += u1_pic_type; */
+
+ u1_curr_frame_index = ps_rd_model->u1_curr_frm_counter;
+
+ ps_rd_model->u1_model_used = QUAD_MODEL;
+
+ if(0 == u1_curr_frame_index)
+ u1_curr_frame_index = (UWORD8)(ps_rd_model->u1_max_frms_to_model - 1);
+ else
+ u1_curr_frame_index--;
+
+ /************************************************************************/
+ /* Rearrange data to be fed into a Linear Regression Module */
+ /* Module finds a,b,c such that */
+ /* y = ax + bx^2 + c */
+ /************************************************************************/
+ u4_num_skips = 0;
+ u1_num_frms_input = 0;
+ /*memset(ps_rd_model->au1_num_frames, 0, MPEG2_QP_ELEM);*/
+ memset(pi1_frame_index, -1, MAX_FRAMES_MODELLED);
+ /*i4_min_mpeg2_qp = MAX_MPEG2_QP;
+ i4_max_mpeg2_qp = 0;*/
+
+ i4_min_mpeg2_qp_q6 = (MAX_MPEG2_QP << QSCALE_Q_FAC);
+ i4_max_mpeg2_qp_q6 = MIN_QSCALE_Q6;
+
+ u1_num_active_frames = ps_rd_model->u1_num_frms_in_model;
+ if(u1_num_active_frames > MAX_ACTIVE_FRAMES)
+ u1_num_active_frames = MAX_ACTIVE_FRAMES;
+
+ /************************************************************************/
+ /* Choose the set of Points to be used for MSE fit of Quadratic model */
+ /* Points chosen are spread across the Qp range. Max of 2 points are */
+ /* chosen for a Qp. */
+ /************************************************************************/
+ for(i = 0; i < u1_num_active_frames; i++)
+ {
+ /* WORD32 i4_test1 = 0, i4_test2 = 0; NITT TBD */
+ u1_reject_frame = 0;
+ u1_num_skips_temp = ps_rd_model->pu1_num_skips[u1_curr_frame_index];
+ /*fract_quant*/
+ /*i4_avg_mpeg2_qp_temp = (ps_rd_model->ai4_avg_qp_q6[u1_curr_frame_index] >> QSCALE_Q_FAC);*/
+ i4_avg_mpeg2_qp_temp_q6 = ps_rd_model->ai4_avg_qp_q6[u1_curr_frame_index];
+
+ if((0 == u4_num_skips) && (0 != u1_num_skips_temp))
+ u1_reject_frame = 1;
+ if((1 == u4_num_skips) && (u1_num_skips_temp > 1))
+ u1_reject_frame = 1;
+ /* If there is already a frame having same qp reject the current frame */
+ if(is_qp_available(ps_rd_model, u1_curr_frame_index, i))
+ u1_reject_frame = 1;
+ /*if (ps_rd_model->au1_num_frames[i4_avg_mpeg2_qp_temp] >= 2)
+ {
+ u1_reject_frame = 1;
+ i4_test2 = 1;
+ }
+ if(i4_test2 != i4_test1)
+ {
+ printf("Why am I here??\n");
+ }*/
+
+ if(0 == i)
+ u1_reject_frame = 0;
+
+ if(0 == u1_reject_frame)
+ {
+ pi1_frame_index[u1_num_frms_input] = (WORD8)u1_curr_frame_index;
+ /* ps_rd_model->au1_num_frames[i4_avg_mpeg2_qp_temp] += 1; */
+
+ /*if (i4_min_mpeg2_qp > i4_avg_mpeg2_qp_temp) i4_min_mpeg2_qp = i4_avg_mpeg2_qp_temp;
+ if (i4_max_mpeg2_qp < i4_avg_mpeg2_qp_temp) i4_max_mpeg2_qp = i4_avg_mpeg2_qp_temp;*/
+
+ if(i4_min_mpeg2_qp_q6 > i4_avg_mpeg2_qp_temp_q6)
+ i4_min_mpeg2_qp_q6 = i4_avg_mpeg2_qp_temp_q6;
+ if(i4_max_mpeg2_qp_q6 < i4_avg_mpeg2_qp_temp_q6)
+ i4_max_mpeg2_qp_q6 = i4_avg_mpeg2_qp_temp_q6;
+ u1_num_frms_input++;
+ }
+
+ if(0 == u1_curr_frame_index)
+ u1_curr_frame_index = (UWORD8)(ps_rd_model->u1_max_frms_to_model - 1);
+ else
+ u1_curr_frame_index--;
+ }
+
+ /************************************************************************/
+ /* Add Pivot Points to the Data set to be used for finding Quadratic */
+ /* Model Coeffs. These will help in constraining the shape of Quadratic*/
+ /* to adapt too much to the Local deviations. */
+ /************************************************************************/
+ /*i4_min2_mpeg2_qp = i4_min_mpeg2_qp;
+ i4_max2_mpeg2_qp = i4_max_mpeg2_qp;*/
+
+ i4_min2_mpeg2_qp_q6 = i4_min_mpeg2_qp_q6;
+ i4_max2_mpeg2_qp_q6 = i4_max_mpeg2_qp_q6;
+
+ u1_min_qp_frame_indx = INVALID_FRAME_INDEX;
+ u1_max_qp_frame_indx = INVALID_FRAME_INDEX;
+
+ /* Loop runnning over the Stored Frame Level Data
+ to find frames of MinQp and MaxQp */
+ for(; i < ps_rd_model->u1_num_frms_in_model; i++)
+ {
+ u1_num_skips_temp = ps_rd_model->pu1_num_skips[u1_curr_frame_index];
+ /*fract_quant*/
+ //i4_avg_mpeg2_qp_temp = ps_rd_model->ai4_avg_qp[u1_curr_frame_index];
+
+ //i4_avg_mpeg2_qp_temp = (ps_rd_model->ai4_avg_qp_q6[u1_curr_frame_index] >> QSCALE_Q_FAC);
+
+ i4_avg_mpeg2_qp_temp_q6 = ps_rd_model->ai4_avg_qp_q6[u1_curr_frame_index];
+
+ if(((0 == u4_num_skips) && (0 != u1_num_skips_temp)) ||
+ ((1 == u4_num_skips) && (u1_num_skips_temp > 1)))
+ continue;
+ /*
+ if (i4_min2_mpeg2_qp > i4_avg_mpeg2_qp_temp)
+ {
+ i4_min2_mpeg2_qp = i4_avg_mpeg2_qp_temp;
+ u1_min_qp_frame_indx = u1_curr_frame_index;
+ }
+ if (i4_max2_mpeg2_qp < i4_avg_mpeg2_qp_temp)
+ {
+ i4_max2_mpeg2_qp = i4_avg_mpeg2_qp_temp;
+ u1_max_qp_frame_indx = u1_curr_frame_index;
+ }
+*/
+
+ if(i4_min2_mpeg2_qp_q6 > i4_avg_mpeg2_qp_temp_q6)
+ {
+ i4_min2_mpeg2_qp_q6 = i4_avg_mpeg2_qp_temp_q6;
+ u1_min_qp_frame_indx = u1_curr_frame_index;
+ }
+ if(i4_max2_mpeg2_qp_q6 < i4_avg_mpeg2_qp_temp_q6)
+ {
+ i4_max2_mpeg2_qp_q6 = i4_avg_mpeg2_qp_temp_q6;
+ u1_max_qp_frame_indx = u1_curr_frame_index;
+ }
+
+ if(0 == u1_curr_frame_index)
+ u1_curr_frame_index = (UWORD8)(ps_rd_model->u1_max_frms_to_model - 1);
+ else
+ u1_curr_frame_index--;
+ }
+
+ /* Add the Chosen Points to the regression data set */
+ if(INVALID_FRAME_INDEX != u1_min_qp_frame_indx)
+ {
+ pi1_frame_index[u1_num_frms_input] = (WORD8)u1_min_qp_frame_indx;
+ u1_num_frms_input++;
+ }
+ if(INVALID_FRAME_INDEX != u1_max_qp_frame_indx)
+ {
+ pi1_frame_index[u1_num_frms_input] = (WORD8)u1_max_qp_frame_indx;
+ u1_num_frms_input++;
+ }
+
+ /* memcpy(pi1_frame_index_initial, pi1_frame_index, MAX_FRAMES_MODELLED); */
+ {
+ UWORD8 u1_k;
+ for(u1_k = 0; u1_k < MAX_FRAMES_MODELLED; u1_k++)
+ {
+ pi1_frame_index_initial[u1_k] = pi1_frame_index[u1_k];
+ }
+ }
+
+ if(QUAD_MODEL == ps_rd_model->u1_model_used)
+ {
+ if(u1_num_frms_input < (MIN_FRAMES_FOR_QUAD_MODEL))
+ ps_rd_model->u1_model_used = LIN_MODEL;
+ if((WORD32)i4_max_mpeg2_qp_q6 < ((WORD32)(21 * i4_min_mpeg2_qp_q6) >> 4))
+ ps_rd_model->u1_model_used = LIN_MODEL;
+ }
+
+ if(LIN_MODEL == ps_rd_model->u1_model_used)
+ {
+ if(u1_num_frms_input < MIN_FRAMES_FOR_LIN_MODEL)
+ ps_rd_model->u1_model_used = PREV_FRAME_MODEL;
+ if((WORD32)i4_max_mpeg2_qp_q6 < ((WORD32)(19 * i4_min_mpeg2_qp_q6) >> 4))
+ ps_rd_model->u1_model_used = PREV_FRAME_MODEL;
+ }
+
+ /***** Call the Module to Return the Coeffs for the Fed Data *****/
+ ps_rd_model->u1_model_used = find_model_coeffs(
+ ps_rd_model->pi4_res_bits,
+ ps_rd_model->pi8_sad,
+ ps_rd_model->ai4_avg_qp_q6,
+ u1_num_frms_input,
+ ps_rd_model->u1_model_used,
+ pi1_frame_index,
+ model_coeff_array,
+ model_coeff_array_lin,
+ model_coeff_array_lin_wo_int,
+ ps_rd_model);
+
+ if((model_coeff_array_lin[2].sm > 0) || (model_coeff_array_lin[0].sm < 0))
+ {
+#if RC_MODEL_USED_BUG_FIX
+ u1_lin_model_valid = 0;
+#endif
+ }
+ else
+ {
+#if RC_MODEL_USED_BUG_FIX
+ u1_lin_model_valid = 1;
+#endif
+ /* lin deviation calculation */
+ calc_avg_sqr_dev_for_model(
+ ps_rd_model->pi4_res_bits,
+ ps_rd_model->pi8_sad,
+ ps_rd_model->ai4_avg_qp_q6,
+ u1_num_frms_input,
+ pi1_frame_index_initial,
+ model_coeff_array_lin,
+ &s_lin_avg_sqr_dev);
+ }
+
+ if(QUAD_MODEL == ps_rd_model->u1_model_used)
+ {
+ u1_check_model = refine_set_of_points(
+ ps_rd_model->pi4_res_bits,
+ ps_rd_model->pi8_sad,
+ ps_rd_model->ai4_avg_qp_q6,
+ u1_num_frms_input,
+ pi1_frame_index,
+ model_coeff_array,
+ &s_quad_avg_sqr_dev);
+
+ if(2 == u1_check_model)
+ {
+ ps_rd_model->u1_model_used = QUAD_MODEL;
+ }
+ else
+ {
+ /*******************************************************************/
+ /* Make sure that some of the Pivot Points are used in the Refined */
+ /* data set. 1. Previous Frame */
+ /*******************************************************************/
+ /* pi1_frame_index[0] = ps_rd_model->u1_curr_frm_counter; */
+
+ ps_rd_model->u1_model_used = find_model_coeffs(
+ ps_rd_model->pi4_res_bits,
+ ps_rd_model->pi8_sad,
+ ps_rd_model->ai4_avg_qp_q6,
+ u1_num_frms_input,
+ ps_rd_model->u1_model_used,
+ pi1_frame_index,
+ model_coeff_array,
+ NULL,
+ NULL,
+ ps_rd_model);
+
+ u1_check_model = refine_set_of_points(
+ ps_rd_model->pi4_res_bits,
+ ps_rd_model->pi8_sad,
+ ps_rd_model->ai4_avg_qp_q6,
+ u1_num_frms_input,
+ pi1_frame_index,
+ model_coeff_array,
+ &s_quad_avg_sqr_dev);
+
+ if((0 == u1_check_model))
+ {
+#if RC_MODEL_USED_BUG_FIX
+ if((s_lin_avg_sqr_dev < s_quad_avg_sqr_dev) && (1 == u1_lin_model_valid))
+#endif
+ ps_rd_model->u1_model_used = LIN_MODEL;
+ }
+ }
+ }
+
+ if(QUAD_MODEL == ps_rd_model->u1_model_used)
+ {
+ /* min_res_bits = model_coeff_c - */
+ /* ((model_coeff_a * model_coeff_a) / (4 * model_coeff_b)); */
+
+ if(model_coeff_array[0].sm < 0)
+ ps_rd_model->u1_model_used = LIN_MODEL;
+
+ /* if ((model_coeff_a * model_coeff_b) > 0) */
+ /* u1_model_used = LIN_MODEL; */
+
+ ps_rd_model->model_coeff_b_quad = model_coeff_array[0];
+ ps_rd_model->model_coeff_a_quad = model_coeff_array[1];
+ ps_rd_model->model_coeff_c_quad = model_coeff_array[2];
+ }
+ if(LIN_MODEL == ps_rd_model->u1_model_used)
+ {
+ if((model_coeff_array_lin[2].sm > 0) || (model_coeff_array_lin[0].sm < 0))
+ ps_rd_model->u1_model_used = PREV_FRAME_MODEL;
+ }
+/* TO DO : FLOAT_TO_FIX */
+#if RC_MODEL_USED_BUG_FIX
+ {
+ number_t s_quad_dev_thr;
+ number_t s_lin_dev_thr;
+ number_t s_diff;
+
+ s_quad_dev_thr.sm = QUAD_DEV_THR_SM;
+ s_quad_dev_thr.e = QUAD_DEV_THR_E;
+
+ /* (s_quad_avg_sqr_dev > .25) */
+ sub32_var_q(s_quad_avg_sqr_dev, s_quad_dev_thr, &s_diff);
+
+ /* Another threshold of .25 on deviation i.e. deviation greater than 25% */
+ if((QUAD_MODEL == ps_rd_model->u1_model_used) && (s_diff.sm > 0))
+ ps_rd_model->u1_model_used = PREV_FRAME_MODEL;
+
+ s_lin_dev_thr.sm = LIN_DEV_THR_SM;
+ s_lin_dev_thr.e = LIN_DEV_THR_E;
+
+ /* (s_lin_avg_sqr_dev > .25) */
+ sub32_var_q(s_lin_avg_sqr_dev, s_lin_dev_thr, &s_diff);
+
+ if((LIN_MODEL == ps_rd_model->u1_model_used) && (s_diff.sm > 0))
+ ps_rd_model->u1_model_used = PREV_FRAME_MODEL;
+ }
+#endif /* #if RC_MODEL_USED_BUG_FIX */
+ /* end of "TO DO : FLOAT_TO_FIX" */
+ ps_rd_model->model_coeff_b_lin = model_coeff_array_lin[0];
+ ps_rd_model->model_coeff_a_lin = model_coeff_array_lin[1];
+ ps_rd_model->model_coeff_c_lin = model_coeff_array_lin[2];
+ ps_rd_model->model_coeff_b_lin_wo_int = model_coeff_array_lin_wo_int[0];
+ ps_rd_model->model_coeff_a_lin_wo_int = model_coeff_array_lin_wo_int[1];
+ ps_rd_model->model_coeff_c_lin_wo_int = model_coeff_array_lin_wo_int[2];
+ /* ps_rd_model->u1_model_used = PREV_FRAME_MODEL; */
+}
+#endif
+
+/******************************************************************************
+ Function Name : estimate_bits_for_qp
+ Description :
+ Arguments : ps_rd_model
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+UWORD32
+ estimate_bits_for_qp(rc_rd_model_t *ps_rd_model, UWORD32 u4_estimated_sad, WORD32 i4_avg_qp_q6)
+{
+ /* float fl_num_bits; */
+ number_t s_num_bits;
+ number_t s_estimated_sad, s_avg_qp;
+
+ /* number_t s_model_coeff_a, s_model_coeff_b, s_model_coeff_c; */
+ WORD32 i4_temp;
+ number_t x_val;
+
+ /* ps_rd_model += u1_curr_pic_type; */
+ s_estimated_sad.sm = u4_estimated_sad;
+ s_estimated_sad.e = 0;
+ /*fract_quant*/
+ SET_VARQ_FRM_FIXQ(i4_avg_qp_q6, s_avg_qp, QSCALE_Q_FAC);
+ /* initilising s_num_bits */
+ s_num_bits.sm = 0;
+ s_num_bits.e = 0;
+
+ /*
+ convert_float_to_fix(ps_rd_model->model_coeff_a, &s_model_coeff_a);
+ convert_float_to_fix(ps_rd_model->model_coeff_b, &s_model_coeff_b);
+ convert_float_to_fix(ps_rd_model->model_coeff_c, &s_model_coeff_c);
+ */
+ div32_var_q(s_estimated_sad, s_avg_qp, &x_val);
+ {
+ /* TO DO : FLOAT_TO_FIX */
+ /* fl_num_bits = ps_rd_model->model_coeff_a_lin_wo_int * x_val; */
+ mult32_var_q(ps_rd_model->model_coeff_a_lin_wo_int, x_val, &s_num_bits);
+ /* end of "TO DO : FLOAT_TO_FIX" */
+ }
+
+ /* return ((UWORD32) fl_num_bits); */
+ number_t_to_word32(s_num_bits, &i4_temp);
+ if(i4_temp < 0)
+ i4_temp = 0;
+ return ((UWORD32)i4_temp);
+}
+
+/******************************************************************************
+ Function Name : find_qp_for_target_bits
+ Description :
+ Arguments : ps_rd_model
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+WORD32 find_qp_for_target_bits(
+ rc_rd_model_handle ps_rd_model,
+ UWORD32 u4_target_res_bits,
+ UWORD32 u4_estimated_sad,
+ WORD32 i4_max_qp_q6,
+ WORD32 i4_min_qp_q6)
+{
+ WORD32 i4_qp_q6;
+ /* float x_value, f_qp; */
+ number_t x_value, s_qp;
+ /* number_t s_model_coeff_a, s_model_coeff_b, s_model_coeff_c; */
+ number_t s_target_res_bits;
+ number_t s_estimated_sad;
+ number_t temp, temp3;
+ number_t temp2, temp1;
+
+ /* ps_rd_model += u1_curr_pic_type; */
+
+ s_target_res_bits.sm = u4_target_res_bits;
+ s_target_res_bits.e = 0;
+
+ s_estimated_sad.sm = u4_estimated_sad;
+ s_estimated_sad.e = 0;
+
+ /* initilising default value */
+ x_value.sm = 0;
+ x_value.e = 0;
+
+ /*
+ convert_float_to_fix(ps_rd_model->model_coeff_a, &(ps_rd_model->s_model_coeff_a));
+ convert_float_to_fix(ps_rd_model->model_coeff_b, &(ps_rd_model->s_model_coeff_b));
+ convert_float_to_fix(ps_rd_model->model_coeff_c, &(ps_rd_model->s_model_coeff_c));
+ */
+
+#if ENABLE_QUAD_MODEL
+ if(QUAD_MODEL == ps_rd_model->u1_model_used)
+ {
+ /* float det; */
+ number_t det;
+
+ /*
+ det = (ps_rd_model->model_coeff_a * ps_rd_model->model_coeff_a) -
+ (4 * (ps_rd_model->model_coeff_b) *
+ (ps_rd_model->model_coeff_c - u4_target_res_bits));
+ */
+ mult32_var_q(ps_rd_model->model_coeff_a_quad, ps_rd_model->model_coeff_a_quad, &temp);
+ temp3.sm = 4;
+ temp3.e = 0;
+ mult32_var_q(temp3, ps_rd_model->model_coeff_b_quad, &temp1);
+ sub32_var_q(ps_rd_model->model_coeff_c_quad, s_target_res_bits, &temp2);
+ mult32_var_q(temp1, temp2, &temp1);
+ sub32_var_q(temp, temp1, &det);
+
+ /* x_value = sqrt(det); */
+ sqrt32_var_q(det, &x_value);
+
+ /* x_value = (x_value - ps_rd_model->model_coeff_a) /
+ (2 * ps_rd_model->model_coeff_b);
+ */
+ sub32_var_q(x_value, ps_rd_model->model_coeff_a_quad, &temp);
+ temp3.sm = 2;
+ temp3.e = 0;
+ mult32_var_q(temp3, ps_rd_model->model_coeff_b_quad, &temp1);
+ div32_var_q(temp, temp1, &x_value);
+
+ if(det.sm < 0 || x_value.sm < 0)
+ {
+ /* x_value = 0; */
+ ps_rd_model->u1_model_used = PREV_FRAME_MODEL;
+ }
+ }
+
+ if(LIN_MODEL == ps_rd_model->u1_model_used)
+ {
+ /*
+ x_value = ((float)u4_target_res_bits - ps_rd_model->model_coeff_c) /
+ (ps_rd_model->model_coeff_b);
+ */
+ sub32_var_q(s_target_res_bits, ps_rd_model->model_coeff_c_lin, &temp);
+ div32_var_q(temp, ps_rd_model->model_coeff_b_lin, &x_value);
+ if(x_value.sm < 0)
+ {
+ /* x_value = 0; */
+ ps_rd_model->u1_model_used = PREV_FRAME_MODEL;
+ }
+ }
+#else
+ ps_rd_model->u1_model_used = PREV_FRAME_MODEL;
+#endif
+ if(PREV_FRAME_MODEL == ps_rd_model->u1_model_used)
+ {
+ /* TO DO : FLOAT_TO_FIX */
+ /* x_value = (float) u4_target_res_bits / ps_rd_model->model_coeff_a_lin_wo_int; */
+ div32_var_q(s_target_res_bits, ps_rd_model->model_coeff_a_lin_wo_int, &x_value);
+ /* end of "TO DO : FLOAT_TO_FIX" */
+ }
+
+ if(0 != x_value.sm)
+ {
+ /* f_qp = u4_estimated_sad / x_value; */
+ div32_var_q(s_estimated_sad, x_value, &s_qp);
+ }
+ else
+ {
+ s_qp.sm = MAX_MPEG2_QP;
+ s_qp.e = 0;
+ }
+
+ /*
+ if (f_qp > MAX_MPEG2_QP)
+ f_qp = MAX_MPEG2_QP;
+ */
+ temp3.sm = MAX_MPEG2_QP;
+ temp3.e = 0;
+ sub32_var_q(s_qp, temp3, &temp);
+ if(temp.sm > 0)
+ {
+ s_qp = temp3;
+ }
+ convert_varq_to_fixq(s_qp, &i4_qp_q6, (WORD32)QSCALE_Q_FAC);
+ /* Truncating the QP to the Max and Min Qp values possible */
+ if(i4_qp_q6 < i4_min_qp_q6)
+ {
+ i4_qp_q6 = i4_min_qp_q6;
+ }
+ if(i4_qp_q6 > i4_max_qp_q6)
+ {
+ i4_qp_q6 = i4_max_qp_q6;
+ }
+ return (i4_qp_q6);
+}
+/******************************************************************************
+ Function Name : add_frame_to_rd_model
+ Description :
+ Arguments : ps_rd_model
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+void add_frame_to_rd_model(
+ rc_rd_model_t *ps_rd_model,
+ UWORD32 i4_res_bits,
+ WORD32 i4_avg_mp2qp_q6,
+ LWORD64 i8_sad_h264,
+ UWORD8 u1_num_skips)
+{
+ UWORD8 u1_curr_frame_index, i4_same_bit_count = 0;
+ /* ps_rd_model += u1_curr_pic_type; */
+ u1_curr_frame_index = ps_rd_model->u1_curr_frm_counter;
+
+ {
+ WORD32 i;
+
+ i = ps_rd_model->u1_num_frms_in_model - 1;
+ while(i >= 0)
+ {
+ if(ps_rd_model->pi4_res_bits[i] == i4_res_bits)
+ i4_same_bit_count++;
+ i--;
+ }
+ }
+ /* - the condition check is a temporary fix to avoid feeding zero into model.
+ The change should be done so that 0 is not at all fed into model. When texture bit consumption becomes zero next frame qp should be explicitly decreased so that finite amount of texture
+ bits is consumed and feeds valid data to model to come out of deadlock*/
+
+ if(i4_same_bit_count < 3)
+ {
+ /*** Insert the Present Frame Data into the RD Model State Memory ***/
+ ps_rd_model->pi4_res_bits[u1_curr_frame_index] = i4_res_bits;
+ ps_rd_model->pi8_sad[u1_curr_frame_index] = i8_sad_h264;
+ ps_rd_model->pu1_num_skips[u1_curr_frame_index] = u1_num_skips;
+ ps_rd_model->ai4_avg_qp[u1_curr_frame_index] = (i4_avg_mp2qp_q6 >> QSCALE_Q_FAC);
+ ps_rd_model->ai4_avg_qp_q6[u1_curr_frame_index] = i4_avg_mp2qp_q6;
+
+ ps_rd_model->u1_curr_frm_counter++;
+ if(ps_rd_model->u1_max_frms_to_model == ps_rd_model->u1_curr_frm_counter)
+ ps_rd_model->u1_curr_frm_counter = 0;
+
+ if(ps_rd_model->u1_num_frms_in_model < ps_rd_model->u1_max_frms_to_model)
+ {
+ ps_rd_model->u1_num_frms_in_model++;
+ }
+ update_frame_rd_model(ps_rd_model);
+ }
+}
+/******************************************************************************
+ Function Name : get_linear_coefficient
+ Description :
+ Arguments : ps_rd_model
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+number_t get_linear_coefficient(rc_rd_model_t *ps_rd_model)
+{
+ return (ps_rd_model->model_coeff_a_lin_wo_int);
+}
+/******************************************************************************
+ Function Name : set_linear_coefficient
+ Description :
+ Arguments : ps_rd_model
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+void set_linear_coefficient(rc_rd_model_t *ps_rd_model, number_t model_coeff_a_lin_wo_int)
+{
+ ps_rd_model->model_coeff_a_lin_wo_int = model_coeff_a_lin_wo_int;
+ ps_rd_model->u1_model_used = PREV_FRAME_MODEL;
+}
+/******************************************************************************
+ Function Name : is_model_valid
+ Description :
+ Arguments : ps_rd_model
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+WORD32 is_model_valid(rc_rd_model_t *ps_rd_model)
+{
+ /*return 1 if atleast one data point is availbale: this is required because frames with zero texture consumption is not updated in model*/
+ if(ps_rd_model->u1_num_frms_in_model > 0)
+ {
+ return 1;
+ }
+ else
+ {
+ return 0;
+ }
+}
+
+#endif /* #if RC_FIXED_POINT */
diff --git a/encoder/rc_rd_model_struct.h b/encoder/rc_rd_model_struct.h
new file mode 100644
index 0000000..c537699
--- /dev/null
+++ b/encoder/rc_rd_model_struct.h
@@ -0,0 +1,106 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file rc_rd_model_struct.h
+*
+* \brief
+* This file contains rate control rd model struct and constant macro
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+#ifndef RC_RD_MODEL_STRUCT
+#define RC_RD_MODEL_STRUCT
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+/* Tool Set Switch */
+#define ENABLE_QUAD_MODEL 1
+
+/* Number of elements for QP */
+/* #define MPEG2_QP_ELEM (MAX_MPEG2_QP + 1) */
+
+/*#define MAX_NUM_PIC_TYPES_FOR_RC_MODEL 3*/ /* how many types of pictures will the rate-control handle */
+
+#define QUAD 1
+#define MIN_FRAMES_FOR_QUAD_MODEL 5
+#define MAX_ACTIVE_FRAMES 16
+#define MIN_FRAMES_FOR_LIN_MODEL 3
+#define INVALID_FRAME_INDEX 255
+
+#define UP_THR_SM 1 /* (1 /pow(2,4) = 0.0625 */
+#define UP_THR_E 4
+
+#define LO_THR_SM 368 /* (368.64 / pow(2,14)) = 0.0225 */
+#define LO_THR_E 14
+
+#define QUAD_DEV_THR_SM 1 /* (1 / pow(1,2)) = .25*/
+#define QUAD_DEV_THR_E 2
+
+#define LIN_DEV_THR_SM 1 /* (1 / pow(1,2)) = .25*/
+#define LIN_DEV_THR_E 2
+
+#define QUAD_MODEL 0
+#define LIN_MODEL 1
+#define PREV_FRAME_MODEL 2
+
+/* Q Factors used for fixed point calculation */
+#define Q_FORMAT_GAMMA 8
+#define Q_FORMAT_ETA 8
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+typedef struct rc_rd_model_t
+{
+ UWORD8 u1_curr_frm_counter;
+ UWORD8 u1_num_frms_in_model;
+ UWORD8 u1_max_frms_to_model;
+ UWORD8 u1_model_used;
+
+ UWORD32 pi4_res_bits[MAX_FRAMES_MODELLED];
+ LWORD64 pi8_sad[MAX_FRAMES_MODELLED];
+
+ UWORD8 pu1_num_skips[MAX_FRAMES_MODELLED];
+ /* UWORD8 pu1_avg_qp[MAX_FRAMES_MODELLED]; */
+ WORD32 ai4_avg_qp[MAX_FRAMES_MODELLED];
+ WORD32 ai4_avg_qp_q6[MAX_FRAMES_MODELLED];
+ /* UWORD8 au1_num_frames[MPEG2_QP_ELEM]; */
+
+ model_coeff model_coeff_a_quad;
+ model_coeff model_coeff_b_quad;
+ model_coeff model_coeff_c_quad;
+
+ model_coeff model_coeff_a_lin;
+ model_coeff model_coeff_b_lin;
+ model_coeff model_coeff_c_lin;
+
+ model_coeff model_coeff_a_lin_wo_int;
+ model_coeff model_coeff_b_lin_wo_int;
+ model_coeff model_coeff_c_lin_wo_int;
+} rc_rd_model_t;
+
+#endif /* RC_RD_MODEL_STRUCT*/
diff --git a/encoder/rc_sad_acc.c b/encoder/rc_sad_acc.c
new file mode 100644
index 0000000..6bb5992
--- /dev/null
+++ b/encoder/rc_sad_acc.c
@@ -0,0 +1,135 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file rc_sad_acc.c
+*
+* \brief
+* This file contain sad accumulator related functions
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <string.h>
+
+/* User include files */
+#include "ittiam_datatypes.h"
+#include "mem_req_and_acq.h"
+#include "rc_common.h"
+#include "rc_cntrl_param.h"
+#include "var_q_operator.h"
+#include "trace_support.h"
+#include "rc_sad_acc.h"
+
+/* State structure for sad accumulator */
+typedef struct
+{
+ WORD32 ai4_sad[MAX_PIC_TYPE];
+
+} sad_acc_t;
+
+#if NON_STEADSTATE_CODE
+WORD32 sad_acc_num_fill_use_free_memtab(
+ sad_acc_handle *pps_sad_acc_handle, itt_memtab_t *ps_memtab, ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ sad_acc_t **pps_sad_acc = (sad_acc_t **)pps_sad_acc_handle;
+ static sad_acc_t s_sad_acc;
+
+ /* Hack for al alloc, during which we dont have any state memory.
+ Dereferencing can cause issues */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_sad_acc) = &s_sad_acc;
+
+ /*for src rate control state structure*/
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(
+ &ps_memtab[i4_mem_tab_idx], sizeof(sad_acc_t), MEM_TAB_ALIGNMENT, PERSISTENT, DDR);
+ use_or_fill_base(&ps_memtab[0], (void **)pps_sad_acc, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ return (i4_mem_tab_idx);
+}
+/******************************************************************************
+ Function Name : init_sad_acc
+ Description :
+ Arguments : ps_sad_acc_handle
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+void init_sad_acc(sad_acc_handle ps_sad_acc_handle)
+{
+ sad_acc_t *ps_sad_acc = (sad_acc_t *)ps_sad_acc_handle;
+ WORD32 i;
+ /* Initialize the array */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ ps_sad_acc->ai4_sad[i] = -1;
+ }
+}
+#endif /* #if NON_STEADSTATE_CODE */
+/******************************************************************************
+ Function Name : sad_acc_put_sad
+ Description :
+ Arguments : ps_sad_acc_handle
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+void sad_acc_put_sad(
+ sad_acc_handle ps_sad_acc_handle,
+ WORD32 i4_cur_intra_sad,
+ WORD32 i4_cur_sad,
+ WORD32 i4_cur_pic_type)
+{
+ sad_acc_t *ps_sad_acc = (sad_acc_t *)ps_sad_acc_handle;
+ ps_sad_acc->ai4_sad[I_PIC] = i4_cur_intra_sad;
+ ps_sad_acc->ai4_sad[i4_cur_pic_type] = i4_cur_sad;
+}
+/******************************************************************************
+ Function Name : sad_acc_get_sad
+ Description :
+ Arguments : ps_sad_acc_handle
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+void sad_acc_get_sad(sad_acc_handle ps_sad_acc_handle, WORD32 *pi4_sad)
+{
+ sad_acc_t *ps_sad_acc = (sad_acc_t *)ps_sad_acc_handle;
+ WORD32 i;
+ /* Initialize the array */
+ for(i = 0; i < MAX_PIC_TYPE; i++)
+ {
+ pi4_sad[i] = ps_sad_acc->ai4_sad[i];
+ }
+}
diff --git a/encoder/rc_sad_acc.h b/encoder/rc_sad_acc.h
new file mode 100644
index 0000000..9a7fd69
--- /dev/null
+++ b/encoder/rc_sad_acc.h
@@ -0,0 +1,55 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file rc_sad_acc.h
+*
+* \brief
+* This file contains all the necessary declarations for
+* sad accumulate functions
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+#ifndef _RC_SAD_ACC_H_
+#define _RC_SAD_ACC_H_
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+typedef struct sad_acc_t *sad_acc_handle;
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+WORD32 sad_acc_num_fill_use_free_memtab(
+ sad_acc_handle *pps_sad_acc, itt_memtab_t *ps_memtab, ITT_FUNC_TYPE_E e_func_type);
+void init_sad_acc(sad_acc_handle ps_sad_acc);
+void sad_acc_put_sad(
+ sad_acc_handle ps_sad_acc_handle,
+ WORD32 i4_cur_intra_sad,
+ WORD32 i4_cur_sad,
+ WORD32 i4_cur_pic_type);
+void sad_acc_get_sad(sad_acc_handle ps_sad_acc_handle, WORD32 *pi4_sad);
+#endif /* _RC_SAD_ACC_H_ */
diff --git a/encoder/sqrt_interp.c b/encoder/sqrt_interp.c
new file mode 100644
index 0000000..3991ebb
--- /dev/null
+++ b/encoder/sqrt_interp.c
@@ -0,0 +1,115 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file squrt_interp.c
+*
+* \brief
+* This file contain square root interpolate function
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+#include <stdlib.h>
+
+/* User include files */
+#include "ia_type_def.h"
+#include "defs.h"
+/* #include "constants.h" */
+#include "ia_basic_ops32.h"
+/* #include "ia_basic_ops16.h" */
+#include "ia_basic_ops40.h"
+#include "sqrt_interp.h"
+/* #include "ia_enhaacplus_enc_basicops.h" */
+#include "common_rom.h"
+
+WORD32 sqrtFix_interpolate(WORD32 num, WORD *q, const WORD32 *sqrt_tab)
+{
+ WORD32 index, answer, temp, delta, step_size;
+ WORD q_temp = *q;
+ WORD k;
+
+ if(num == 0)
+ return 0;
+
+ k = norm32(num);
+ temp = shr32(shl32(num, k), 21);
+ q_temp += k;
+ index = temp & 0x1FF; /* Leave leading 1 */
+ {
+ delta = shl32((shl32(num, k) - shl32(temp, 21)), 10);
+ step_size = sub32(sqrt_tab[index + 1], sqrt_tab[index]);
+ step_size = mult32_shl_sat(step_size, delta); /* Q:Q_SQRT_TAB + 21 + 10 - 31 */
+ answer = add32(sqrt_tab[index], step_size);
+ }
+ if(q_temp & 1)
+ {
+ q_temp -= 1;
+ answer = mult32_shl_sat(answer, INV_SQRT_2_Q31);
+ }
+
+ q_temp = q_temp >> 1;
+ q_temp += (Q_SQRT_TAB);
+ *q = q_temp;
+
+ answer >>= 1;
+ *q -= 1;
+
+ return answer;
+}
+
+WORD32 sqrtFix(WORD32 num, WORD *q, const WORD32 *sqrt_tab)
+{
+ WORD32 index, answer, temp;
+ WORD k;
+ WORD q_temp = *q;
+
+ if(num == 0)
+ return 0;
+
+ k = norm32(num);
+ temp = shr32(shl32(num, k), 21);
+ q_temp += k;
+ index = temp & 0x1FF; /* Leave leading 1 */
+ answer = sqrt_tab[index];
+ if(q_temp & 1)
+ {
+ q_temp -= 1;
+ answer = mult32x16in32_shl(answer, INV_SQRT_2_Q15);
+ }
+ q_temp = q_temp >> 1;
+ q_temp += Q_SQRT_TAB;
+ *q = q_temp;
+ return answer;
+}
+
+#ifdef ITT_C6678
+#pragma CODE_SECTION(sqrtFix_interpolate, "itt_varq_l1pram");
+#pragma CODE_SECTION(sqrtFix, "itt_varq_l1pram");
+#endif
diff --git a/encoder/sqrt_interp.h b/encoder/sqrt_interp.h
new file mode 100644
index 0000000..af597ed
--- /dev/null
+++ b/encoder/sqrt_interp.h
@@ -0,0 +1,44 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file sqrt_interp.h
+*
+* \brief
+* This file contain declarations for square root interpolate functions
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+#ifndef SQRT_INTERP_H
+#define SQRT_INTERP_H
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+WORD32 sqrtFix_interpolate(WORD32 num, WORD *q, const WORD32 *sqrt_tab);
+
+WORD32 sqrtFix(WORD32 num, WORD *q, const WORD32 *sqrt_tab);
+
+#endif
diff --git a/encoder/trace_support.h b/encoder/trace_support.h
new file mode 100644
index 0000000..37dd70b
--- /dev/null
+++ b/encoder/trace_support.h
@@ -0,0 +1,64 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/****************************************************************************/
+/* */
+/* File Name : trace_support.h */
+/* */
+/* Description : Defines the functions for trace_support.c */
+/* */
+/* List of Functions : */
+/* */
+/* Issues / Problems : */
+/* */
+/* Revision History : */
+/* */
+/* DD MM YYYY Author(s) Changes (Describe the changes) */
+/* 24 03 2008 DPKA Creation */
+/****************************************************************************/
+#ifndef TRACE_SUPPORT_H
+#define TRACE_SUPPORT_H
+
+#define TRACE_SUPPORT 0
+
+#define RC_DEBUG_LEVEL_1 0
+
+#define RC_2PASS_GOP_DEBUG 0
+
+#define HEVC_RC 1
+
+typedef struct
+{
+ WORD8 *pu1_buf;
+ WORD32 i4_offset;
+ WORD32 i4_max_size;
+} trace_support_t;
+
+void init_trace_support(WORD8 *pu1_buf, WORD32 i4_size);
+
+#if TRACE_SUPPORT
+#define trace_printf(...) printf(__VA_ARGS__)
+#else
+#define trace_printf(...)
+#endif
+
+#define ASSERT(x) assert((x))
+//#define ASSERT(x) ihevcd_debug_assert((x))
+
+#endif
diff --git a/encoder/var_q_operator.c b/encoder/var_q_operator.c
new file mode 100644
index 0000000..21670b1
--- /dev/null
+++ b/encoder/var_q_operator.c
@@ -0,0 +1,209 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file var_q_operator.c
+*
+* \brief
+* This files to be used for basic fixed Q-point functions
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* User include files */
+#include "ia_type_def.h"
+#include "defs.h"
+#include "ia_basic_ops32.h"
+#include "ia_basic_ops40.h"
+/* #include "num_struct.h" */
+#include "var_q_operator.h"
+#include "sqrt_interp.h"
+#include "common_rom.h"
+
+#define NUM_BITS_MAG 32
+
+/************************************************************************************/
+/* The files to be used for basic fixed Q-point functions : */
+/* */
+/* audio/ia_standards/c64x/include/ia_basic_ops32.h cvs_version : FULL_V1_16 */
+/* audio/ia_standards/c64x/include/ia_basic_ops40.h cvs_version : FULL_V1_16 */
+/* */
+/************************************************************************************/
+
+/* Multiply */
+
+void mult32_var_q(number_t a, number_t b, number_t *c)
+{
+ WORD32 Q_a;
+ WORD32 Q_b;
+ /* WORD32 final_Q; */
+ WORD32 norm_a;
+ WORD32 norm_b;
+
+ norm_a = norm32(a.sm); /* norm32 defined in ia_basic_ops32.h */
+ norm_b = norm32(b.sm);
+
+ Q_a = norm_a + a.e;
+ Q_b = norm_b + b.e;
+
+ a.sm = shl32_sat(a.sm, norm_a);
+ b.sm = shl32_sat(b.sm, norm_b);
+
+ c->sm = mult32(a.sm, b.sm); /* mult32 defined in ia_basic_ops40.h */
+ c->e = a.e + b.e + norm_a + norm_b - 32; /* mult32 decreases the Q-format by 32 */
+}
+
+/* Division */
+
+void div32_var_q(number_t a, number_t b, number_t *c)
+{
+ WORD32 qoutient_q_format;
+
+ c->sm = div32(a.sm, b.sm, &qoutient_q_format); /* div32 defined in ia_basic_ops32.h */
+ c->e = (a.e - b.e) + qoutient_q_format;
+}
+
+/* Addition */
+
+void add32_var_q(number_t a, number_t b, number_t *c)
+{
+ WORD32 Q_a;
+ WORD32 Q_b;
+ WORD32 final_Q;
+ WORD32 norm_a;
+ WORD32 norm_b;
+
+ norm_a = norm32(a.sm) - 1; /* norm32 defined in ia_basic_ops32.h */
+ norm_b = norm32(b.sm) - 1; /* we normalise a & b only to 30t bit
+ instead of to 31st bit
+ */
+
+ Q_a = norm_a + a.e;
+ Q_b = norm_b + b.e;
+
+ if(Q_b < Q_a)
+ {
+ b.sm = shl32_dir_sat(b.sm, norm_b);
+ a.sm = shr32_dir_sat(a.sm, ((a.e - b.e) - norm_b));
+ final_Q = Q_b;
+ }
+ else if(Q_a < Q_b)
+ {
+ a.sm = shl32_dir_sat(a.sm, norm_a);
+ b.sm = shr32_dir_sat(b.sm, ((b.e - a.e) - norm_a));
+ final_Q = Q_a;
+ }
+ else
+ {
+ a.sm = shl32_dir_sat(a.sm, norm_a);
+ b.sm = shl32_dir_sat(b.sm, norm_b);
+ final_Q = Q_a;
+ }
+
+ c->sm = add32(a.sm, b.sm); /* add32_shr defined in ia_basic_ops32.h */
+ c->e = final_Q; /* because add32_shr does right shift
+ by 1 before adding */
+}
+
+/* Subtraction */
+
+void sub32_var_q(number_t a, number_t b, number_t *c)
+{
+ WORD32 Q_a;
+ WORD32 Q_b;
+ WORD32 final_Q;
+ WORD32 norm_a;
+ WORD32 norm_b;
+
+ norm_a = norm32(a.sm) - 1; /* norm32 defined in ia_basic_ops32.h */
+ norm_b = norm32(b.sm) - 1; /* we normalise a & b only to 30t bit
+ instead of to 31st bit
+ */
+
+ Q_a = norm_a + a.e;
+ Q_b = norm_b + b.e;
+
+ if(Q_b < Q_a)
+ {
+ b.sm = shl32_dir_sat(b.sm, norm_b);
+ a.sm = shr32_dir_sat(a.sm, ((a.e - b.e) - norm_b));
+ final_Q = Q_b;
+ }
+ else if(Q_a < Q_b)
+ {
+ a.sm = shl32_dir_sat(a.sm, norm_a);
+ b.sm = shr32_dir_sat(b.sm, ((b.e - a.e) - norm_a));
+ final_Q = Q_a;
+ }
+ else
+ {
+ a.sm = shl32_dir_sat(a.sm, norm_a);
+ b.sm = shl32_dir_sat(b.sm, norm_b);
+ final_Q = Q_a;
+ }
+
+ c->sm = sub32(a.sm, b.sm); /* add32_shr defined in ia_basic_ops32.h */
+ c->e = final_Q; /* because add32_shr does right shift
+ by 1 before adding */
+}
+
+/* square root */
+
+void sqrt32_var_q(number_t a, number_t *c)
+{
+ WORD32 q_temp;
+ q_temp = a.e;
+ c->sm = sqrtFix_interpolate(a.sm, &q_temp, gi4_sqrt_tab);
+ /* c->sm = sqrtFix(a.sm, &q_temp, gi4_sqrt_tab); */
+ c->e = q_temp;
+}
+
+void number_t_to_word32(number_t num_a, WORD32 *a)
+{
+ *a = shr32_dir_sat(num_a.sm, num_a.e);
+}
+
+/*
+convert_float_to_fix(float a_f,
+ number_t *a)
+{
+ double log_a_f;
+ log_a_f = log(ABS(a_f))/log(2);
+
+ a->e = 30 - (WORD32)ceil(log_a_f);
+ a->sm = (WORD32) (a_f * pow(2, a->e));
+}
+*/
+
+#ifdef ITT_C6678
+#pragma CODE_SECTION(number_t_to_word32, "itt_varq_l1pram");
+#pragma CODE_SECTION(sqrt32_var_q, "itt_varq_l1pram");
+#pragma CODE_SECTION(sub32_var_q, "itt_varq_l1pram");
+#pragma CODE_SECTION(add32_var_q, "itt_varq_l1pram");
+#pragma CODE_SECTION(div32_var_q, "itt_varq_l1pram");
+#pragma CODE_SECTION(mult32_var_q, "itt_varq_l1pram");
+#endif
diff --git a/encoder/var_q_operator.h b/encoder/var_q_operator.h
new file mode 100644
index 0000000..5a80688
--- /dev/null
+++ b/encoder/var_q_operator.h
@@ -0,0 +1,74 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+#ifndef VAR_Q_OPERATOR_H
+#define VAR_Q_OPERATOR_H
+
+typedef struct
+{
+ WORD32 sm; /* MSB 1 bit sign & rest magnitude */
+ WORD32 e; /* Q-format */
+} number_t;
+
+void mult32_var_q(number_t a, number_t b, number_t *c);
+
+void div32_var_q(number_t a, number_t b, number_t *c);
+
+void add32_var_q(number_t a, number_t b, number_t *c);
+
+void sub32_var_q(number_t a, number_t b, number_t *c);
+
+void sqrt32_var_q(number_t a, number_t *c);
+
+void number_t_to_word32(number_t num_a, WORD32 *a);
+
+void convert_float_to_fix(float a_f, number_t *a);
+
+void convert_fix_to_float(number_t a, float *a_f);
+
+#define SET_VAR_Q(a, b, c) \
+ { \
+ (a).sm = (b); \
+ (a).e = (c); \
+ }
+
+/*right shift greated than 32 bit for word32 variable is undefined*/
+#define convert_varq_to_fixq(varq, a, q_fact) \
+ { \
+ if((varq).e > q_fact) \
+ { \
+ if(((varq).e - q_fact) >= (WORD32)(sizeof(WORD32) * 8)) \
+ { \
+ *a = 0; \
+ } \
+ else \
+ { \
+ *a = (varq).sm >> ((varq).e - q_fact); \
+ } \
+ } \
+ else \
+ *a = (varq).sm << (q_fact - (varq).e); \
+ }
+
+#define SET_VARQ_FRM_FIXQ(fixq, var_q, q_fact) \
+ { \
+ (var_q).sm = fixq; \
+ (var_q).e = q_fact; \
+ }
+#endif
diff --git a/encoder/vbr_storage_vbv.c b/encoder/vbr_storage_vbv.c
new file mode 100644
index 0000000..6c5ae09
--- /dev/null
+++ b/encoder/vbr_storage_vbv.c
@@ -0,0 +1,404 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file vbr_storage_vbv.c
+*
+* \brief
+* This file contain functions related to VBV buffer
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+
+/* User include files */
+#include "ittiam_datatypes.h"
+#include "mem_req_and_acq.h"
+#include "rc_common.h"
+#include "rc_cntrl_param.h"
+#include "var_q_operator.h"
+#include "fixed_point_error_bits.h"
+#include "cbr_buffer_control.h"
+#include "rc_rd_model.h"
+#include "est_sad.h"
+#include "cbr_buffer_control.h"
+#include "picture_type.h"
+#include "bit_allocation.h"
+#include "vbr_storage_vbv.h"
+#include "trace_support.h"
+
+#define MAX(x, y) ((x) > (y) ? (x) : (y))
+
+typedef struct vbr_storage_vbv_t
+{
+ WORD32 i4_max_buf_size;
+ WORD32 i4_cur_buf_size;
+ WORD32 i4_max_bits_inflow_per_frm_period;
+ /* Storing input variables */
+ WORD32 i4_max_bit_rate;
+ WORD32 i4_max_frame_rate;
+ /* Error bits calculation module */
+ error_bits_handle ps_error_bits;
+} vbr_storage_vbv_t;
+
+#if NON_STEADSTATE_CODE
+
+WORD32 vbr_vbv_num_fill_use_free_memtab(
+ vbr_storage_vbv_t **pps_vbr_storage_vbv, itt_memtab_t *ps_memtab, ITT_FUNC_TYPE_E e_func_type)
+{
+ WORD32 i4_mem_tab_idx = 0;
+ static vbr_storage_vbv_t s_vbr_storage_vbv_temp;
+
+ /* Hack for al alloc, during which we dont have any state memory.
+ Dereferencing can cause issues */
+ if(e_func_type == GET_NUM_MEMTAB || e_func_type == FILL_MEMTAB)
+ (*pps_vbr_storage_vbv) = &s_vbr_storage_vbv_temp;
+
+ /*for src rate control state structure*/
+ if(e_func_type != GET_NUM_MEMTAB)
+ {
+ fill_memtab(
+ &ps_memtab[i4_mem_tab_idx],
+ sizeof(vbr_storage_vbv_t),
+ MEM_TAB_ALIGNMENT,
+ PERSISTENT,
+ DDR);
+ use_or_fill_base(&ps_memtab[0], (void **)pps_vbr_storage_vbv, e_func_type);
+ }
+ i4_mem_tab_idx++;
+
+ i4_mem_tab_idx += error_bits_num_fill_use_free_memtab(
+ &pps_vbr_storage_vbv[0]->ps_error_bits, &ps_memtab[i4_mem_tab_idx], e_func_type);
+ return (i4_mem_tab_idx);
+}
+/******************************************************************************
+ Function Name : init_vbr_vbv
+ Description :
+ Arguments : ps_vbr_storage_vbv
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+void init_vbr_vbv(
+ vbr_storage_vbv_t *ps_vbr_storage_vbv,
+ WORD32 i4_max_bit_rate,
+ WORD32 i4_frm_rate,
+ WORD32 i4_max_vbv_buff_size)
+{
+ ps_vbr_storage_vbv->i4_max_buf_size = i4_max_vbv_buff_size;
+ ps_vbr_storage_vbv->i4_cur_buf_size = i4_max_vbv_buff_size;
+
+ /* Calculate the max number of bits that flow into the decoder
+ in the interval of two frames */
+ X_PROD_Y_DIV_Z(
+ i4_max_bit_rate, 1000, i4_frm_rate, ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period);
+
+ /* init error bits */
+ init_error_bits(ps_vbr_storage_vbv->ps_error_bits, i4_frm_rate, i4_max_bit_rate);
+
+ /* Storing the input values */
+ ps_vbr_storage_vbv->i4_max_bit_rate = i4_max_bit_rate;
+ ps_vbr_storage_vbv->i4_max_frame_rate = i4_frm_rate;
+}
+#endif /* #if NON_STEADSTATE_CODE */
+/******************************************************************************
+ Function Name : update_vbr_vbv
+ Description :
+ Arguments : ps_vbr_storage_vbv
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+void update_vbr_vbv(vbr_storage_vbv_t *ps_vbr_storage_vbv, WORD32 i4_total_bits_decoded)
+{
+ WORD32 i4_error_bits = get_error_bits(ps_vbr_storage_vbv->ps_error_bits);
+ /* In the time interval between two decoded frames the buffer would have been
+ filled up by the max_bits_inflow_per_frm_period.*/
+ overflow_avoided_summation(
+ &ps_vbr_storage_vbv->i4_cur_buf_size,
+ (ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period + i4_error_bits));
+
+ if(ps_vbr_storage_vbv->i4_cur_buf_size > ps_vbr_storage_vbv->i4_max_buf_size)
+ {
+ ps_vbr_storage_vbv->i4_cur_buf_size = ps_vbr_storage_vbv->i4_max_buf_size;
+ }
+
+ ps_vbr_storage_vbv->i4_cur_buf_size -= i4_total_bits_decoded;
+
+ /* Update the error bits state */
+ update_error_bits(ps_vbr_storage_vbv->ps_error_bits);
+
+#define PRINT_UNDERFLOW 0
+#if PRINT_UNDERFLOW
+ if(ps_vbr_storage_vbv->i4_cur_buf_size < 0)
+ printf("The buffer underflows \n");
+#endif
+}
+/******************************************************************************
+ Function Name : get_max_target_bits
+ Description :
+ Arguments : ps_vbr_storage_vbv
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+WORD32 get_max_target_bits(vbr_storage_vbv_t *ps_vbr_storage_vbv)
+{
+ WORD32 i4_cur_buf_size = ps_vbr_storage_vbv->i4_cur_buf_size;
+ WORD32 i4_error_bits = get_error_bits(ps_vbr_storage_vbv->ps_error_bits);
+
+ /* The buffer size when the next frame is decoded */
+ overflow_avoided_summation(
+ &i4_cur_buf_size, (ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period + i4_error_bits));
+ if(i4_cur_buf_size > ps_vbr_storage_vbv->i4_max_buf_size)
+ {
+ i4_cur_buf_size = ps_vbr_storage_vbv->i4_max_buf_size;
+ }
+
+ /* Thus for the next frame the maximum number of bits the decoder can consume
+ without underflow is i4_cur_buf_size */
+ return i4_cur_buf_size;
+}
+
+/****************************************************************************
+Function Name : get_buffer_status
+Description : Gets the state of VBV buffer
+Inputs : Rate control API , header and texture bits
+Globals :
+Processing :
+Outputs : 0 = normal, 1 = underflow, 2= overflow
+Returns : vbv_buf_status_e
+Issues :
+Revision History:
+DD MM YYYY Author(s) Changes (Describe the changes made)
+*****************************************************************************/
+vbv_buf_status_e get_vbv_buffer_status(
+ vbr_storage_vbv_t *ps_vbr_storage_vbv,
+ WORD32 i4_total_frame_bits, /* Total frame bits consumed */
+ WORD32 *pi4_num_bits_to_prevent_vbv_underflow) /* The curent buffer status after updation */
+{
+ vbv_buf_status_e e_buf_status;
+ WORD32 i4_cur_buf;
+ WORD32 i4_error_bits = get_error_bits(ps_vbr_storage_vbv->ps_error_bits);
+
+ /* error bits due to fixed point computation of drain rate*/
+ i4_cur_buf = ps_vbr_storage_vbv->i4_cur_buf_size;
+ overflow_avoided_summation(
+ &i4_cur_buf, (ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period + i4_error_bits));
+
+ if(i4_cur_buf > ps_vbr_storage_vbv->i4_max_buf_size)
+ {
+ i4_cur_buf = ps_vbr_storage_vbv->i4_max_buf_size;
+ }
+
+ pi4_num_bits_to_prevent_vbv_underflow[0] = i4_cur_buf;
+
+ i4_cur_buf -= i4_total_frame_bits;
+ if(i4_cur_buf < 0)
+ {
+ e_buf_status = VBV_UNDERFLOW;
+ }
+ else if(i4_cur_buf > ps_vbr_storage_vbv->i4_max_buf_size)
+ {
+ e_buf_status = VBV_OVERFLOW;
+ }
+ else if(i4_cur_buf < (ps_vbr_storage_vbv->i4_max_buf_size >> 2))
+ {
+ e_buf_status = VBR_CAUTION;
+ }
+ else
+ {
+ e_buf_status = VBV_NORMAL;
+ }
+
+ return e_buf_status;
+}
+/******************************************************************************
+ Function Name : get_max_vbv_buf_size
+ Description :
+ Arguments : ps_vbr_storage_vbv
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+WORD32 get_max_vbv_buf_size(vbr_storage_vbv_t *ps_vbr_storage_vbv)
+{
+ return (ps_vbr_storage_vbv->i4_max_buf_size);
+}
+/******************************************************************************
+ Function Name : get_cur_vbv_buf_size
+ Description :
+ Arguments : ps_vbr_storage_vbv
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+WORD32 get_cur_vbv_buf_size(vbr_storage_vbv_t *ps_vbr_storage_vbv)
+{
+ return (ps_vbr_storage_vbv->i4_cur_buf_size);
+}
+/******************************************************************************
+ Function Name : get_max_bits_inflow_per_frm_periode
+ Description :
+ Arguments : ps_vbr_storage_vbv
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+WORD32 get_max_bits_inflow_per_frm_periode(vbr_storage_vbv_t *ps_vbr_storage_vbv)
+{
+ return (ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period);
+}
+
+/******************************************************************************
+ Function Name : get_vbv_buf_fullness
+ Description :
+ Arguments : ps_vbr_storage_vbv
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+WORD32 get_vbv_buf_fullness(vbr_storage_vbv_t *ps_vbr_storage_vbv, UWORD32 u4_bits)
+{
+ WORD32 i4_error_bits = get_error_bits(ps_vbr_storage_vbv->ps_error_bits);
+ WORD32 i4_cur_buf_size = ps_vbr_storage_vbv->i4_cur_buf_size;
+
+ overflow_avoided_summation(
+ &i4_cur_buf_size, (ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period + i4_error_bits));
+
+ if(i4_cur_buf_size > ps_vbr_storage_vbv->i4_max_buf_size)
+ {
+ i4_cur_buf_size = ps_vbr_storage_vbv->i4_max_buf_size;
+ }
+
+ i4_cur_buf_size -= u4_bits;
+
+#if PRINT_UNDERFLOW
+ if(i4_cur_buf_size < 0)
+ printf("The buffer underflows \n");
+#endif
+ return (i4_cur_buf_size);
+}
+/******************************************************************************
+ Function Name : get_max_tgt_bits_dvd_comp
+ Description :
+ Arguments : ps_vbr_storage_vbv
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+WORD32 get_max_tgt_bits_dvd_comp(
+ vbr_storage_vbv_t *ps_vbr_storage_vbv,
+ WORD32 i4_rem_bits_in_gop,
+ WORD32 i4_rem_frms_in_gop,
+ picture_type_e e_pic_type)
+{
+ WORD32 i4_dbf_max, i4_dbf_min, i4_dbf_prev, i4_vbv_size, i4_dbf_desired;
+ WORD32 i4_max_tgt_bits;
+
+ i4_vbv_size = ps_vbr_storage_vbv->i4_max_buf_size;
+ i4_dbf_max = 95 * i4_vbv_size / 100;
+ i4_dbf_min = 10 * i4_vbv_size / 100;
+ i4_dbf_prev = ps_vbr_storage_vbv->i4_cur_buf_size;
+
+ if(i4_rem_bits_in_gop < 0)
+ i4_rem_bits_in_gop = 0;
+ if(i4_rem_frms_in_gop <= 0)
+ i4_rem_frms_in_gop = 1;
+
+ if(e_pic_type == I_PIC)
+ {
+ i4_dbf_desired = i4_dbf_min;
+ }
+ else
+ {
+ i4_dbf_desired = (i4_dbf_max - i4_rem_bits_in_gop / i4_rem_frms_in_gop - i4_dbf_prev) /
+ i4_rem_frms_in_gop;
+ i4_dbf_desired += i4_dbf_prev;
+ }
+
+ i4_dbf_prev += ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period;
+ if(i4_dbf_prev > ps_vbr_storage_vbv->i4_max_buf_size)
+ {
+ i4_dbf_prev = ps_vbr_storage_vbv->i4_max_buf_size;
+ }
+
+ i4_max_tgt_bits = MAX(0, (i4_dbf_prev - i4_dbf_desired));
+ return (i4_max_tgt_bits);
+}
+
+#if NON_STEADSTATE_CODE
+/******************************************************************************
+ Function Name : change_vbr_vbv_frame_rate
+ Description :
+ Arguments : ps_vbr_storage_vbv
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+void change_vbr_vbv_frame_rate(vbr_storage_vbv_t *ps_vbr_storage_vbv, WORD32 i4_frm_rate)
+{
+ /* Calculate the max number of bits that flow into the decoder
+ in the interval of two frames */
+ X_PROD_Y_DIV_Z(
+ ps_vbr_storage_vbv->i4_max_bit_rate,
+ 1000,
+ i4_frm_rate,
+ ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period);
+
+ /* update the lower modules */
+ change_frm_rate_in_error_bits(ps_vbr_storage_vbv->ps_error_bits, i4_frm_rate);
+ /* Storing the input values */
+ ps_vbr_storage_vbv->i4_max_frame_rate = i4_frm_rate;
+}
+/******************************************************************************
+ Function Name : change_vbr_vbv_bit_rate
+ Description :
+ Arguments : ps_vbr_storage_vbv
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+void change_vbr_vbv_bit_rate(vbr_storage_vbv_t *ps_vbr_storage_vbv, WORD32 i4_max_bit_rate)
+{
+ /* Calculate the max number of bits that flow into the decoder
+ in the interval of two frames */
+ X_PROD_Y_DIV_Z(
+ i4_max_bit_rate,
+ 1000,
+ ps_vbr_storage_vbv->i4_max_frame_rate,
+ ps_vbr_storage_vbv->i4_max_bits_inflow_per_frm_period);
+
+ /* update the lower modules */
+ change_bitrate_in_error_bits(ps_vbr_storage_vbv->ps_error_bits, i4_max_bit_rate);
+
+ /* Storing the input values */
+ ps_vbr_storage_vbv->i4_max_bit_rate = i4_max_bit_rate;
+}
+#endif /* #if NON_STEADSTATE_CODE */
diff --git a/encoder/vbr_storage_vbv.h b/encoder/vbr_storage_vbv.h
new file mode 100644
index 0000000..4b64440
--- /dev/null
+++ b/encoder/vbr_storage_vbv.h
@@ -0,0 +1,124 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file vbr_storage_vbv.h
+*
+* \brief
+* This file contains all the necessary declarations for
+* vbr buffer control functions
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+#ifndef _VBR_STORAGE_VBV_H_
+#define _VBR_STORAGE_VBV_H_
+/******************************************************************************
+VBR STORAGE (VBV):
+Max. buffer filling rate: Rmax
+Max. buffer size: Bmax (as specified by level and profile)
+Current Buffer Level: Bcur
+Frame Rate: F
+
+For a storage scenario, the initial buffer size is assumed to be max. For every
+frame the Maximum bits filled in to the buffer is given by Rmaxfrm = Rmax/F. If
+the buffer overflows then the buffer is thresholded to the max buffer size.
+
+ (overflow)
+ B(0) /|
+---|--------------/-|------------------------------ Bmax
+ | / |
+ | /|/ |
+ | /| / |
+ | / | /|/ |
+ |/ | / | /|
+ |/ |/ |
+ |
+ |
+-----------------------|---------------------------
+ |<->| |
+(1/F)=>1/frame_rate (underflow)
+
+
+ B"(i) - Bits in buffer just before decoding a frame.
+ B'(i) - Bits in buffer just after decoding a frame.
+
+
+ B(0) (initBuffer size) = Bmax.
+ B'(i) = B"(i) - bits_decoded
+ B"(i) = Min( Bmax, B'(i-1) + Rmaxfrm)
+
+Overflow Scenario: In VBR case, since we have only a max filling rate (or input bit rate)
+buffer overflow is not a issue (since the buffer filling rate can be reduced to any value
+below this rate)
+
+Underflow Scenario: B'(i) should always be > 0. If not then, the buffer underflows. To
+prevent this condition the number bits that needs to be decoded must be equal to B"(i)
+which is equal to Min( Bmax, B'(i-1) + Rmaxfrm)
+****************************************************************************************/
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+
+typedef struct vbr_storage_vbv_t *vbr_storage_vbv_handle;
+
+WORD32 vbr_vbv_num_fill_use_free_memtab(
+ vbr_storage_vbv_handle *pps_vbr_storage_vbv,
+ itt_memtab_t *ps_memtab,
+ ITT_FUNC_TYPE_E e_func_type);
+
+/* Initalises the vbv buffer status */
+void init_vbr_vbv(
+ vbr_storage_vbv_handle ps_vbr_storage_vbv,
+ WORD32 max_bit_rate, /* In bits/sec*/
+ WORD32 max_frm_rate, /* In frames/1000 sec*/
+ WORD32 i4_max_vbv_buff_size); /* in bits*/
+
+/* Updates the buffer after decoding a frame */
+void update_vbr_vbv(vbr_storage_vbv_handle ps_vbr_storage_vbv, WORD32 i4_total_bits_decoded);
+
+/* gets the max_number of bits that can be decoded out of the VBV without underflow */
+WORD32 get_max_target_bits(vbr_storage_vbv_handle ps_vbr_storage_vbv);
+WORD32 get_max_bits_inflow_per_frm_periode(vbr_storage_vbv_handle ps_vbr_storage_vbv);
+WORD32 get_cur_vbv_buf_size(vbr_storage_vbv_handle ps_vbr_storage_vbv);
+
+/* Queries the VBV buffer for the buffer status */
+vbv_buf_status_e get_vbv_buffer_status(
+ vbr_storage_vbv_handle ps_vbr_storage_vbv,
+ WORD32 i4_total_frame_bits, /* Total frame bits consumed */
+ WORD32 *
+ pi4_num_bits_to_prevent_vbv_underflow); /* num bits to prevent from underflow after update */
+
+WORD32 get_max_vbv_buf_size(vbr_storage_vbv_handle ps_vbr_storage_vbv);
+WORD32 get_vbv_buf_fullness(vbr_storage_vbv_handle ps_vbr_storage_vbv, UWORD32 u4_bits);
+WORD32 get_max_tgt_bits_dvd_comp(
+ vbr_storage_vbv_handle ps_vbr_storage_vbv,
+ WORD32 i4_rem_bits_in_gop,
+ WORD32 i4_rem_frms_in_gop,
+ picture_type_e e_pic_type);
+/* Changing input values at run time */
+void change_vbr_vbv_bit_rate(vbr_storage_vbv_handle ps_vbr_storage_vbv, WORD32 i4_max_bit_rate);
+void change_vbr_vbv_frame_rate(vbr_storage_vbv_handle ps_vbr_storage_vbv, WORD32 i4_frm_rate);
+#endif
diff --git a/encoder/vbr_str_prms.c b/encoder/vbr_str_prms.c
new file mode 100644
index 0000000..cabaef8
--- /dev/null
+++ b/encoder/vbr_str_prms.c
@@ -0,0 +1,153 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file vbr_str_prms.c
+*
+* \brief
+* This file contain
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+/* System include files */
+#include <stdio.h>
+
+/* User include files */
+#include "ittiam_datatypes.h"
+#include "rc_cntrl_param.h"
+#include "var_q_operator.h"
+#include "rc_common.h"
+#include "vbr_str_prms.h"
+
+/******************************************************************************
+ Function Name : init_vbv_str_prms
+ Description : Initializes and calcuates the number of I frame and P frames
+ in the delay period
+ Arguments :
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+#if NON_STEADSTATE_CODE
+void init_vbv_str_prms(
+ vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 u4_intra_frm_interval,
+ UWORD32 u4_src_ticks,
+ UWORD32 u4_tgt_ticks,
+ UWORD32 u4_frms_in_delay_period)
+{
+ p_vbr_str_prms->u4_frms_in_delay_prd = u4_frms_in_delay_period;
+ p_vbr_str_prms->u4_src_ticks = u4_src_ticks;
+ p_vbr_str_prms->u4_tgt_ticks = u4_tgt_ticks;
+ p_vbr_str_prms->u4_intra_frame_int = u4_intra_frm_interval;
+}
+#endif /* #if NON_STEADSTATE_CODE */
+
+/*********************************************************************************
+ Function Name : change_vbr_str_prms
+ Description : Takes in changes of Intra frame interval, source and target ticks
+ and recalculates the position of the next I frame
+ Arguments :
+ Return Values : void
+ Revision History:
+ Creation
+***********************************************************************************/
+#if NON_STEADSTATE_CODE
+void change_vsp_ifi(vbr_str_prms_t *p_vbr_str_prms, UWORD32 u4_intra_frame_int)
+{
+ init_vbv_str_prms(
+ p_vbr_str_prms,
+ u4_intra_frame_int,
+ p_vbr_str_prms->u4_src_ticks,
+ p_vbr_str_prms->u4_tgt_ticks,
+ p_vbr_str_prms->u4_frms_in_delay_prd);
+}
+/******************************************************************************
+ Function Name : change_vsp_tgt_ticks
+ Description :
+ Arguments : p_vbr_str_prms
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+void change_vsp_tgt_ticks(vbr_str_prms_t *p_vbr_str_prms, UWORD32 u4_tgt_ticks)
+{
+ UWORD32 u4_rem_intra_per_scaled;
+ UWORD32 u4_prev_tgt_ticks = p_vbr_str_prms->u4_tgt_ticks;
+
+ /*
+ If the target frame rate is changed, recalculate the position of the next I frame based
+ on the new target frame rate
+
+ LIMITATIONS :
+ Currently no support is available for dynamic change in source frame rate
+ */
+
+ u4_rem_intra_per_scaled =
+ ((p_vbr_str_prms->u4_intra_prd_pos_in_tgt_ticks - p_vbr_str_prms->u4_cur_pos_in_src_ticks) /
+ u4_prev_tgt_ticks) *
+ u4_tgt_ticks;
+
+ p_vbr_str_prms->u4_intra_prd_pos_in_tgt_ticks =
+ u4_rem_intra_per_scaled + p_vbr_str_prms->u4_cur_pos_in_src_ticks;
+}
+/******************************************************************************
+ Function Name : change_vsp_src_ticks
+ Description :
+ Arguments : p_vbr_str_prms
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+void change_vsp_src_ticks(vbr_str_prms_t *p_vbr_str_prms, UWORD32 u4_src_ticks)
+{
+ init_vbv_str_prms(
+ p_vbr_str_prms,
+ p_vbr_str_prms->u4_intra_frame_int,
+ u4_src_ticks,
+ p_vbr_str_prms->u4_tgt_ticks,
+ p_vbr_str_prms->u4_frms_in_delay_prd);
+}
+/******************************************************************************
+ Function Name : change_vsp_fidp
+ Description :
+ Arguments : p_vbr_str_prms
+ Return Values : void
+ Revision History:
+ Creation
+*****************************************************************************/
+void change_vsp_fidp(vbr_str_prms_t *p_vbr_str_prms, UWORD32 u4_frms_in_delay_period)
+{
+ init_vbv_str_prms(
+ p_vbr_str_prms,
+ p_vbr_str_prms->u4_intra_frame_int,
+ p_vbr_str_prms->u4_src_ticks,
+ p_vbr_str_prms->u4_tgt_ticks,
+ u4_frms_in_delay_period);
+}
+#endif /* #if NON_STEADSTATE_CODE */
diff --git a/encoder/vbr_str_prms.h b/encoder/vbr_str_prms.h
new file mode 100644
index 0000000..4919970
--- /dev/null
+++ b/encoder/vbr_str_prms.h
@@ -0,0 +1,68 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+/*!
+******************************************************************************
+* \file vbr_str_prms.h
+*
+* \brief
+* This file contains all the necessary declarations for
+* vbr params functions
+*
+* \date
+*
+* \author
+* ittiam
+*
+******************************************************************************
+*/
+#ifndef _VBR_STR_PRMS_H_
+#define _VBR_STR_PRMS_H_
+
+/*****************************************************************************/
+/* Structure */
+/*****************************************************************************/
+typedef struct
+{
+ UWORD32 u4_num_pics_in_delay_prd[MAX_PIC_TYPE];
+ UWORD32 u4_pic_num;
+ UWORD32 u4_intra_prd_pos_in_tgt_ticks;
+ UWORD32 u4_cur_pos_in_src_ticks;
+ UWORD32 u4_intra_frame_int;
+ UWORD32 u4_src_ticks;
+ UWORD32 u4_tgt_ticks;
+ UWORD32 u4_frms_in_delay_prd;
+} vbr_str_prms_t;
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+void init_vbv_str_prms(
+ vbr_str_prms_t *p_vbr_str_prms,
+ UWORD32 u4_intra_frm_interval,
+ UWORD32 u4_src_ticks,
+ UWORD32 u4_tgt_ticks,
+ UWORD32 u4_frms_in_delay_period);
+
+void change_vsp_ifi(vbr_str_prms_t *p_vbr_str_prms, UWORD32 u4_intra_frame_int);
+void change_vsp_tgt_ticks(vbr_str_prms_t *p_vbr_str_prms, UWORD32 u4_tgt_ticks);
+void change_vsp_src_ticks(vbr_str_prms_t *p_vbr_str_prms, UWORD32 u4_src_ticks);
+void change_vsp_fidp(vbr_str_prms_t *p_vbr_str_prms, UWORD32 u4_frms_in_delay_period);
+
+#endif
diff --git a/cfi_blacklist.txt b/libhevc_blacklist.txt
similarity index 89%
rename from cfi_blacklist.txt
rename to libhevc_blacklist.txt
index 4193e81..4328f1c 100644
--- a/cfi_blacklist.txt
+++ b/libhevc_blacklist.txt
@@ -1,3 +1,4 @@
+[cfi]
# CFI blacklist for external/libhevc
# assembly functions where CFI has issues
@@ -457,3 +458,47 @@
fun:ihevc_weighted_pred_uni_sse42
fun:ihevc_weighted_pred_uni_ssse3
+#encoder files
+fun:ihevce_ed_calc_8x8_blk
+fun:ihevce_ed_calc_4x4_blk
+fun:ihevce_ed_4x4_find_best_modes
+fun:ihevce_pu_calc_4x4_blk
+fun:ihevce_mode_eval_filtering
+fun:ihevce_intra_rdopt_cu_ntu
+fun:ihevce_final_rdopt_mode_prcs
+fun:ihevce_it_recon_fxn
+fun:ihevce_deblk_ctb
+fun:ihevce_pad_interp_recon_ctb
+fun:ihevce_luma_interpolate_8bit_dxdy
+fun:ihevce_luma_interpolate_16bit_dxdy
+fun:ihevce_chroma_interpolate_8bit_dxdy
+fun:ihevce_chroma_interpolate_16bit_dxdy
+fun:ihevce_chroma_cu_prcs_rdopt
+fun:ihevce_intra_chroma_pred_mode_selector
+fun:ihevce_sao_ctb
+fun:ihevce_distortion_based_intra_chroma_mode_selector
+fun:ihevce_luma_inter_pred_pu
+
+[integer]
+# decoder/ihevcd_bitstream.c:250: 0 - 16 cannot be represented in type 'unsigned int'
+# decoder/ihevcd_bitstream.c:251: 4294967280 + 32 cannot be represented in type 'unsigned int'
+fun:ihevcd_bits_seek
+# decoder/ihevcd_decode.c:190: 0 - 22 cannot be represented in type 'unsigned int'
+fun:ihevcd_fill_outargs
+# decoder/ihevcd_nal.c:304: 0 - 1 cannot be represented in type 'unsigned int'
+fun:ihevcd_nal_unit_header
+# fun:ihevcd_parse_coding_unit
+# decoder/ihevcd_parse_residual.c:724: 5 - 32 cannot be represented in type 'unsigned int'
+# decoder/ihevcd_parse_residual.c:886: 5 - 32 cannot be represented in type 'unsigned int'
+fun:ihevcd_parse_residual_coding
+# decoder/ihevcd_sao.c:672:36: 8 - 248 cannot be represented in type 'unsigned int'
+# decoder/ihevcd_sao.c:751:36: 8 - 248 cannot be represented in type 'unsigned int'
+# decoder/ihevcd_sao.c:3395:36: 8 - 248 cannot be represented in type 'unsigned int'
+# decoder/ihevcd_sao.c:3473:36: 8 - 248 cannot be represented in type 'unsigned int'
+fun:ihevcd_sao_shift_ctb
+# decoder/ihevcd_bitstream.c:517: 0 - 1 cannot be represented in type 'unsigned int'
+fun:ihevcd_uev
+
+# Performance related blacklists
+fun:ihevcd_deblk_ctb
+fun:ihevcd_get_mv_ctb
diff --git a/test/Android.mk b/test/Android.mk
deleted file mode 100644
index 7807003..0000000
--- a/test/Android.mk
+++ /dev/null
@@ -1,5 +0,0 @@
-LOCAL_PATH := $(call my-dir)
-include $(CLEAR_VARS)
-
-# decoder
-include $(LOCAL_PATH)/decoder.mk
diff --git a/test/decoder.mk b/test/decoder.mk
deleted file mode 100644
index 6b343a0..0000000
--- a/test/decoder.mk
+++ /dev/null
@@ -1,15 +0,0 @@
-LOCAL_PATH := $(call my-dir)
-
-include $(CLEAR_VARS)
-
-LOCAL_MODULE := hevcdec
-LOCAL_MODULE_TAGS := optional
-
-LOCAL_CFLAGS := \
- -DPROFILE_ENABLE -DARM -fPIC -DMD5_DISABLE \
- -Wall -Werror
-LOCAL_C_INCLUDES += $(LOCAL_PATH)/../decoder $(LOCAL_PATH)/../common $(LOCAL_PATH)/
-LOCAL_SRC_FILES := decoder/main.c
-LOCAL_STATIC_LIBRARIES := libhevcdec
-
-include $(BUILD_EXECUTABLE)
diff --git a/test/encoder/app.h b/test/encoder/app.h
new file mode 100644
index 0000000..26959a9
--- /dev/null
+++ b/test/encoder/app.h
@@ -0,0 +1,229 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/**
+*******************************************************************************
+* @file
+* app.h
+*
+* @brief
+* This file contains all the necessary structure and enumeration definitions
+* needed for the Application
+*
+* @author
+* ittiam
+*
+* @remarks
+* none
+*
+*******************************************************************************
+*/
+
+#ifndef _APP_H_
+#define _APP_H_
+
+/*****************************************************************************/
+/* Function Macros */
+/*****************************************************************************/
+#define MAX(a, b) ((a) > (b)) ? (a) : (b)
+#define MIN(a, b) ((a) < (b)) ? (a) : (b)
+
+#define STR_LEN 512
+
+/*****************************************************************************/
+/* Enums */
+/*****************************************************************************/
+typedef enum
+{
+ INVALID,
+ HELP,
+ VERSION,
+ INPUT_YUV,
+ OUTPUT,
+ STAT_FILE,
+ STAT_FILE_BLK,
+ SAVE_RECON,
+ RECON_YUV,
+ NUM_FRAMES_TO_ENCODE,
+ START_FRM_OFFSET,
+ LOG_DUMP_LEVEL,
+ PERF_MODE,
+ ENABLE_CSV_DUMP,
+ CSV_FILE_PATH,
+ ENABLE_LOOPBACK,
+ ENABLE_LOGO,
+ RES_CHNG_INTRVL,
+ SRC_WIDTH,
+ SRC_HEIGHT,
+ SRC_FRAME_RATE_NUM,
+ SRC_FRAME_RATE_DENOM,
+ SRC_INTERLACED,
+ INPUT_CHROMA_FORMAT,
+ INPUT_BIT_DEPTH,
+ TOPFIELD_FIRST,
+ NUM_RESOLUTIONS,
+ MRES_SINGLE_OUT,
+ START_RES_ID,
+ MBR_QUALITY_SETTING,
+ TGT_WIDTH,
+ TGT_HEIGHT,
+ CODEC_LEVEL,
+ NUM_BITRATES,
+ TGT_BITRATE,
+ FRAME_QP,
+ OUTPUT_BIT_DEPTH,
+ ENABLE_TEMPORAL_SCALABILITY,
+ MAX_CLOSED_GOP_PERIOD,
+ MIN_CLOSED_GOP_PERIOD,
+ MAX_CRA_OPEN_GOP_PERIOD,
+ MAX_I_OPEN_GOP_PERIOD,
+ MAX_TEMPORAL_LAYERS,
+ QUALITY_PRESET,
+ DEBLOCKING_TYPE,
+ USE_DEFAULT_SC_MTX,
+ ENABLE_ENTROPY_SYNC,
+ MAX_TR_TREE_DEPTH_I,
+ MAX_TR_TREE_DEPTH_NI,
+ MAX_SEARCH_RANGE_HORZ,
+ MAX_SEARCH_RANGE_VERT,
+ VISUAL_QUALITY_ENHANCEMENTS_TOGGLER,
+ ARCH_TYPE,
+ NUM_CORES,
+ ENABLE_THREAD_AFFINITY,
+ RATE_CONTROL_MODE,
+ CU_LEVEL_RC,
+ PASS,
+ MAX_VBV_BUFFER_SIZE,
+ PEAK_BITRATE,
+ RATE_FACTOR,
+ VBR_MAX_PEAK_RATE_DUR,
+ MAX_FRAME_QP,
+ MIN_FRAME_QP,
+ ENABLE_LOOK_AHEAD,
+ RC_LOOK_AHEAD_PICS,
+ ENABLE_WEIGHTED_PREDICTION,
+ CODEC_TYPE,
+ CODEC_PROFILE,
+ CODEC_TIER,
+ AUD_ENABLE_FLAGS,
+ INTEROP_FLAGS,
+ SPS_AT_CDR_ENABLE,
+ SEI_VUI_INFO_CFG,
+ VUI_ENABLE,
+ SEI_ENABLE_FLAGS,
+ SEI_PAYLOAD_ENABLE_FLAGS,
+ SEI_PAYLOAD_PATH,
+ FORCE_IDR_LOCS_ENABLE,
+ FORCE_IDR_LOCS_FILENAME,
+ SEI_BUFFER_PERIOD_FLAGS,
+ SEI_PIC_TIMING_FLAGS,
+ SEI_RECOVERY_POINT_FLAGS,
+ SEI_HASH_FLAGS,
+ SEI_MASTERING_DISP_COLOUR_VOL_FLAGS,
+ DISPLAY_PRIMARIES_X,
+ DISPLAY_PRIMARIES_Y,
+ WHITE_POINT_X,
+ WHITE_POINT_Y,
+ MAX_DISPLAY_MASTERING_LUMINANCE,
+ MIN_DISPLAY_MASTERING_LUMINANCE,
+ SEI_CLL_INFO_ENABLE,
+ SEI_MAX_CLL,
+ SEI_AVG_CLL,
+ TILES_ENABLED_FLAG,
+ UNIFORM_SPACING_FLAG,
+ NUM_TILE_COLS,
+ NUM_TILE_ROWS,
+ COLUMN_WIDTH_ARRAY,
+ ROW_HEIGHT_ARRAY,
+ SLICE_SEGMENT_MODE,
+ SLICE_SEGMENT_ARGUMENT,
+ ASPECT_RATIO_INFO_PRESENT_FLAG,
+ ASPECT_RATIO_IDC,
+ SAR_WIDTH,
+ SAR_HEIGHT,
+ OVERSCAN_INFO_PRESENT_FLAG,
+ OVERSCAN_APPROPRIATE_FLAG,
+ VIDEO_SIGNAL_TYPE_PRESENT_FLAG,
+ VIDEO_FORMAT,
+ VIDEO_FULL_RANGE_FLAG,
+ COLOUR_DESCRIPTION_PRESENT_FLAG,
+ COLOUR_PRIMARIES,
+ TRANSFER_CHARACTERISTICS,
+ MATRIX_COEFFICIENTS,
+ CHROMA_LOC_INFO_PRESENT_FLAG,
+ CHROMA_SAMPLE_LOC_TYPE_TOP_FIELD,
+ CHROMA_SAMPLE_LOC_TYPE_BOTTOM_FIELD,
+ TIMING_INFO_PRESENT_FLAG,
+ VUI_HRD_PARAMETERS_PRESENT_FLAG,
+ NAL_HRD_PARAMETERS_PRESENT_FLAG,
+ CONFIG,
+ GRPINFO
+} ARGUMENT_T;
+
+/*****************************************************************************/
+/* Structure definitions */
+/*****************************************************************************/
+
+typedef struct
+{
+ void *ihevceHdl;
+
+ char au1_in_file[STR_LEN]; /*!< input yuv file name
+ */
+ char au1_out_file[IHEVCE_MAX_NUM_RESOLUTIONS][IHEVCE_MAX_NUM_BITRATES]
+ [STR_LEN]; /*!< output bitstream filename
+ */
+ char au1_recon_file[IHEVCE_MAX_NUM_RESOLUTIONS][IHEVCE_MAX_NUM_BITRATES]
+ [STR_LEN]; /*!< Recon yuv filename
+ */
+ char au1_stat_file[IHEVCE_MAX_NUM_RESOLUTIONS][IHEVCE_MAX_NUM_BITRATES]
+ [STR_LEN]; /*!< stat filename from pass1
+ */
+ char au1_stat_blk_file[IHEVCE_MAX_NUM_RESOLUTIONS][IHEVCE_MAX_NUM_BITRATES]
+ [STR_LEN]; /*!< stat filename from pass1
+ */
+ char au1_csv_file[IHEVCE_MAX_NUM_RESOLUTIONS][IHEVCE_MAX_NUM_BITRATES][STR_LEN];
+
+ ihevce_static_cfg_params_t s_static_cfg_prms;
+
+ char ai1_sei_payload_path[STR_LEN];
+
+} appl_ctxt_t;
+
+typedef struct
+{
+ /** App context pointer */
+ appl_ctxt_t s_app_ctxt;
+} main_ctxt_t;
+
+typedef struct
+{
+ char argument_shortname[25];
+ char argument_name[128];
+ ARGUMENT_T argument;
+ char description[512];
+} argument_t;
+
+/*****************************************************************************/
+/* Function Declarations */
+/*****************************************************************************/
+void codec_exit(CHAR *pc_err_message);
+
+#endif /* _APP_H_ */
diff --git a/test/encoder/main.c b/test/encoder/main.c
new file mode 100644
index 0000000..975b2ac
--- /dev/null
+++ b/test/encoder/main.c
@@ -0,0 +1,1229 @@
+/******************************************************************************
+ *
+ * Copyright (C) 2018 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ *****************************************************************************
+ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
+*/
+
+/*!
+******************************************************************************
+* \file main.c
+*
+* \brief
+* This file contains sample application for HEVC Encoder
+*
+* \date
+* 18/09/2012
+*
+* \author
+* Ittiam
+*
+******************************************************************************
+*/
+
+/*****************************************************************************/
+/* File Includes */
+/*****************************************************************************/
+
+/* System include files */
+#include <assert.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <stdint.h>
+#include <malloc.h>
+#include <limits.h>
+
+/* User include files */
+#include "ihevc_typedefs.h"
+#include "itt_video_api.h"
+#include "ihevce_api.h"
+#include "ihevce_plugin.h"
+#include "ihevce_profile.h"
+#include "app.h"
+
+/*****************************************************************************/
+/* Constant Macros */
+/*****************************************************************************/
+#define DYN_BITRATE_TEST 0
+
+/*****************************************************************************/
+/* Global definitions */
+/*****************************************************************************/
+
+/*!
+*******************************************************************************
+* \brief
+* list of supported arguments
+*
+*****************************************************************************
+*/
+// clang-format off
+static const argument_t argument_mapping[] =
+{
+ {"-h", "--help", HELP, "Print help \n"},
+ {"-c", "--config", CONFIG, "Input Config file \n" },
+ {"-v", "--version", VERSION, "Encoder version \n"},
+ {"", "", GRPINFO, "\n " " File I/O Parameters \n" " ---------------------\n"},
+ {"-i", "--input", INPUT_YUV, "Input yuv file {mandatory} \n"},
+ {"-o", "--output", OUTPUT, "Output bitstream file {mandatory}\n"},
+ {"-frames", "--num_frames_to_encode", NUM_FRAMES_TO_ENCODE, "Number of frames to encode \n"},
+ {"-log", "--log_dump_level", LOG_DUMP_LEVEL, "0- [No log/prints] 1- [BitsGenerated, POC, Qp, Pic-type]\n"
+ " 2- [1 + PSNR + Seq Summary] 3- [2 + SSIM + Frame Summary] {0}\n"},
+ {"", "", GRPINFO, "\n " " Source Parameters \n" " ---------------------\n"},
+ {"-sw", "--src_width", SRC_WIDTH, "Input Source Width {mandatory}[240:4096]\n"},
+ {"-sh", "--src_height", SRC_HEIGHT, "Input Source Height {mandatory}[128:2176] [ \n"},
+ {"-fNum", "--src_frame_rate_num", SRC_FRAME_RATE_NUM, "Frame rate numerator {30000}[7500:120000]\n"},
+ {"-fDen", "--src_frame_rate_denom", SRC_FRAME_RATE_DENOM, "Frame rate denominator {1000}[1000,1001]\n"},
+ {"-pixfmt", "--input_chroma_format", INPUT_CHROMA_FORMAT, "11- YUV_420P; 13- YUV_422P {11}\n"},
+ {"", "", GRPINFO, "\n " " Target Parameters (for all the layers of multi-resolution encoding) \n" " ------------------------------------------------------------------------\n"},
+ {"-level", "--codec_level", CODEC_LEVEL, "Coded Level multiplied by 30 {153}[0:153]\n"},
+ {"-b", "--tgt_bitrate", TGT_BITRATE, "Target bitrates in bps{5000000}."
+ " For MRESxMBR comma seperated BR1,BR2,BR3...\n"},
+ {"-qp", "--frame_qp", FRAME_QP, "Initial QP values.Dependes on bit depth {38},"
+ " For MRESxMBR comma seperated QP1,QP2,QP3...\n"},
+ {"-obd", "--output_bit_depth", OUTPUT_BIT_DEPTH, "Output bit depth common for all Res.{-ibd}[8,10,12] \n"},
+ {"", "", GRPINFO, "\n " " GOP structure Parameters \n" " ----------------------------\n"},
+ {"-maxCgop", "--max_closed_gop_period", MAX_CLOSED_GOP_PERIOD, "Max IDR Pic distance- Closed GOP {0}[0:300] \n"},
+ {"-minCgop", "--min_closed_gop_period", MIN_CLOSED_GOP_PERIOD, "Min IDR Pic distance- Closed GOP {0}[0:300]\n"},
+ {"-craOgop", "--max_cra_open_gop_period", MAX_CRA_OPEN_GOP_PERIOD, "Max CRA Pic distance- Open GOP {60}[0:300]\n"},
+ {"-maxIgop", "--max_i_open_gop_period", MAX_I_OPEN_GOP_PERIOD, "Max I (non CRA, non IDR) Pic distance {0}[0:300]\n"},
+ {"-bpicTL", "--max_temporal_layers", MAX_TEMPORAL_LAYERS, "B pyramid layers {3}[0:3] \n"},
+ {"", "", GRPINFO, "\n " " Coding tools Parameters \n" " ---------------------------\n"},
+ {"-preset", "--quality_preset", QUALITY_PRESET, "0- PQ, 2- HQ, 3- MS, 4- HS, 5- ES {3}\n"},
+ {"-lfd", "--deblocking_type", DEBLOCKING_TYPE, "Debocking 0- enabled, 1- disabled {0}\n"},
+ {"-scm", "--use_default_sc_mtx", USE_DEFAULT_SC_MTX, "0- disabled, 1- enabled {0}\n"},
+ {"-wpp", "--enable_entropy_sync", ENABLE_ENTROPY_SYNC, "Entropy sync 1- enabled, 0- disabled {0}\n"},
+ {"-intraTD", "--max_tr_tree_depth_I", MAX_TR_TREE_DEPTH_I, "Max transform tree depth for intra {3}[1,2,3]\n"},
+ {"-interTD", "--max_tr_tree_depth_nI", MAX_TR_TREE_DEPTH_NI, "Max transform tree depth for inter {3}[2,3,4]\n"},
+ {"-hrange", "--max_search_range_horz", MAX_SEARCH_RANGE_HORZ, "Horizontal search range {512}[64:512]\n"},
+ {"-vrange", "--max_search_range_vert", MAX_SEARCH_RANGE_VERT, "Vertical search range {256}[32:256]\n"},
+ {"-arch", "--archType", ARCH_TYPE, "0 => Automatic, 4 => ARM(No neon)\n"},
+
+ {"", "", GRPINFO, "\n " " Multi Core parameters \n" " -------------------------\n"},
+ {"-core", "--num_cores", NUM_CORES, "#Logical cores (Include hyperthreads){auto}[1:80] \n"},
+ {"", "", GRPINFO, "\n " " Rate Control parameters \n" " -------------------------\n"},
+ {"-rc", "--rate_control_mode", RATE_CONTROL_MODE, "1 -Capped VBR,2- VBR ,3- CQP, 5- CBR {5} \n"},
+ {"-aq", "--cu_level_rc", CU_LEVEL_RC, "CU Qp Modulation 0- Disable 1-Spatial QP modulation \n"},
+ {"-maxqp", "--max_frame_qp", MAX_FRAME_QP, "Max frame Qp for I frame {51}[51] \n"},
+ {"-minqp", "--min_frame_qp", MIN_FRAME_QP, "Min frame Qp for I frame. Depends on Bit depth {1}[1/-12/-24] \n"},
+ {"", "", GRPINFO, "\n " " Look Ahead Processing Parameters \n" " ----------------------------------\n"},
+
+ {"-lapwindow", "--rc_look_ahead_pics", RC_LOOK_AHEAD_PICS, "RC look ahead window {60}[0:120] \n"},
+ {"", "", GRPINFO, "\n " " Output stream Parameters \n" " ----------------------------------\n"},
+ {"-codec", "--codec_type", CODEC_TYPE, "0- HEVC {0}\n"},
+ {"-profile", "--codec_profile", CODEC_PROFILE, "1- Main 2- Main10 4- RExt {1} \n"},
+ {"-tier", "--codec_tier", CODEC_TIER, "0- Main 1- High {1} \n"},
+ {"-sps", "--sps_at_cdr_enable", SPS_AT_CDR_ENABLE, "1- enable, 0- disable {1}\n"},
+ {"", "", GRPINFO, "\n " " Tile Parameters \n" " --------------------------\n"},
+ {"-tiles", "--tiles_enabled_flag", TILES_ENABLED_FLAG, "Tile encoding 0- disable 1-enable {0} \n"},
+ {"", "", GRPINFO, "\n " " Slice Parameters \n" " --------------------------\n"},
+ {"-slicemode", "--slice_segment_mode", SLICE_SEGMENT_MODE, "Flag to control dependent slice generation {0}[0,1,2]\n"
+ " 0- Disable slices\n"
+ " 1- CTB/Slice\n"
+ " 2- Bytes/Slice \n"},
+ {"", "", GRPINFO, "\n " " SEI parameters \n" " ---------------------------\n"},
+ {"-sei", "--sei_enable_flags", SEI_ENABLE_FLAGS, "1- enable, 0- disable {0}\n"},
+ {"-seipayload", "--sei_payload_enable_flags", SEI_PAYLOAD_ENABLE_FLAGS, "1- enable, 0- disable {0}\n"},
+ {"-seipayloadpath", "--sei_payload_path", SEI_PAYLOAD_PATH, "Input SEI Payload Path (optional)" },
+ {"-seibuf", "--sei_buffer_period_flags", SEI_BUFFER_PERIOD_FLAGS, "1- enable, 0- disable {0}\n"},
+ {"-seipictime", "--sei_pic_timing_flags", SEI_PIC_TIMING_FLAGS, "1- enable, 0- disable {0}\n"},
+ {"-seirecpt", "--sei_recovery_point_flags", SEI_RECOVERY_POINT_FLAGS, "1- enable, 0- disable {0}\n"},
+ {"-seihash", "--sei_hash_flags", SEI_HASH_FLAGS, "3- Checksum, 2- CRC, 1- MD5, 0- disable {0}\n"},
+ {"-seidispcol", "--sei_mastering_disp_colour_vol_flags", SEI_MASTERING_DISP_COLOUR_VOL_FLAGS, "1: enable, 0: disable {0}\n"},
+ {"-seiprimx", "--display_primaries_x", DISPLAY_PRIMARIES_X, "X-Primaries: comma separated R,G,B values {}[0:50000] \n"},
+ {"-seiprimy", "--display_primaries_y", DISPLAY_PRIMARIES_Y, "Y-Primaries: comma separated R,G,B values {}[0:50000] \n"},
+ {"-seiwhiteptx", "--white_point_x", WHITE_POINT_X, "X White point value {}[0:50000] \n"},
+ {"-seiwhitepty", "--white_point_y", WHITE_POINT_Y, "Y White point value {}[0:50000] \n"},
+ {"-seidisplummax", "--max_display_mastering_luminance", MAX_DISPLAY_MASTERING_LUMINANCE, "Max mastering Luminance. In units of 0.0001 Candelas/sqmtr {} \n"},
+ {"-seidisplummin", "--min_display_mastering_luminance", MIN_DISPLAY_MASTERING_LUMINANCE, "Min mastering Luminance. In units of 0.0001 Candelas/sqmtr {}\n"},
+ {"-seicllinfo", "--sei_content_light_level_info", SEI_CLL_INFO_ENABLE, "1- enable, 0- disable {0}\n"},
+ {"-seimaxcll", "--max_content_light_level", SEI_MAX_CLL, "16bit unsigned number indicating max pixel intensity\n"},
+ {"-seiavgcll", "--max_frame_average_light_level", SEI_AVG_CLL, "16bit unsigned number indicating max avg pixel intensity\n"},
+ {"", "", GRPINFO, "\n " " VUI Parameters \n" " ------------------------\n"},
+ {"-vui", "--vui_enable", VUI_ENABLE, "1- enable, 0- disable {0}\n"},
+ {"-arFlag", "--aspect_ratio_info_present_flag", ASPECT_RATIO_INFO_PRESENT_FLAG, "Aspect Ratio 1-enable 0-diable {0} \n"},
+ {"-arIdc", "--aspect_ratio_idc", ASPECT_RATIO_IDC, "Aspect Ration IDC {255}[0:255]\n"},
+ {"-sarw", "--sar_width", SAR_WIDTH, "SAR Width {4}[0:65535]\n"},
+ {"-sarh", "--sar_height", SAR_HEIGHT, "SAR Height {3}[0:65535] \n"},
+ {"-overscan", "--overscan_info_present_flag", OVERSCAN_INFO_PRESENT_FLAG, "Overscan Info. 1-enable 0-disable {0}\n"},
+ {"-overscanValid", "--overscan_appropriate_flag", OVERSCAN_APPROPRIATE_FLAG, "Overscan Appropriate 1-enable 0-disable {0}\n"},
+ {"-vidsigp", "--video_signal_type_present_flag", VIDEO_SIGNAL_TYPE_PRESENT_FLAG, "Video Signal Type Present. 1-enable 0-diable {1} \n"},
+ {"-vidfmt", "--video_format", VIDEO_FORMAT, "Video Format {5}[0:5]\n"},
+ {"-fullrange", "--video_full_range_flag", VIDEO_FULL_RANGE_FLAG, "Video Full Range. 1-enable 0-diable {1}\n"},
+ {"-colorDesc", "--colour_description_present_flag", COLOUR_DESCRIPTION_PRESENT_FLAG, "Colour description.1-enable 0-diable {0}\n"},
+ {"-colorPrim", "--colour_primaries", COLOUR_PRIMARIES, "Colour Primaries {2}[0:255] \n"},
+ {"-xferCh", "--transfer_characteristics", TRANSFER_CHARACTERISTICS, "Transfer Characteristic {2}[0:255]\n"},
+ {"-mxcoeff", "--matrix_coefficients", MATRIX_COEFFICIENTS, "Matrix Coefficients {2}[0:255]\n"},
+ {"-chloc", "--chroma_loc_info_present_flag", CHROMA_LOC_INFO_PRESENT_FLAG, "Presence of chroma_sample_loc_type_top_field and "
+ "chroma_sample_loc_type_bottom_field.1-enable 0-diable {0}\n"},
+ {"-chtf", "--chroma_sample_loc_type_top_field", CHROMA_SAMPLE_LOC_TYPE_TOP_FIELD, "Location of Chroma samples for Top field.{0}[0,1] \n"},
+ {"-chbf", "--chroma_sample_loc_type_bottom_field", CHROMA_SAMPLE_LOC_TYPE_BOTTOM_FIELD, "Location of Chroma samples for Bottom field..{0}[0,1] \n"},
+ {"-timinginfo", "--timing_info_present_flag", TIMING_INFO_PRESENT_FLAG, "Timing info.1-enable 0-diable {0}\n"},
+ {"-vuihrdparam", "--vui_hrd_parameters_present_flag", VUI_HRD_PARAMETERS_PRESENT_FLAG, "HRD parameters.1-enable 0-diable {0} \n"},
+ {"-nalhrdparam", "--nal_hrd_parameters_present_flag", NAL_HRD_PARAMETERS_PRESENT_FLAG, "NAL HRD parameters.1-enable 0-diable {0}\n"}
+};
+// clang-format on
+
+/*!
+******************************************************************************
+* \if Function name : print_usage \endif
+*
+* \brief
+* prints application usage
+*
+*****************************************************************************
+*/
+void print_usage(void)
+{
+ WORD32 i = 0;
+ WORD32 num_entries = sizeof(argument_mapping) / sizeof(argument_t);
+
+ printf("\nUsage:\n");
+ while(i < num_entries)
+ {
+ printf("%-32s\t %s", argument_mapping[i].argument_name, argument_mapping[i].description);
+ i++;
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : get_argument \endif
+*
+* \brief
+* Maps input string to a argument. If the input string is not recognized,
+* returns INVALID
+*
+*****************************************************************************
+*/
+ARGUMENT_T get_argument(CHAR *name)
+{
+ WORD32 i;
+ WORD32 num_entries = sizeof(argument_mapping) / sizeof(argument_t);
+
+ for(i = 0; i < num_entries; i++)
+ {
+ if((0 == strcmp(argument_mapping[i].argument_name, name)) ||
+ ((0 == strcmp(argument_mapping[i].argument_shortname, name)) &&
+ (0 != strcmp(argument_mapping[i].argument_shortname, "--"))))
+ {
+ return argument_mapping[i].argument;
+ }
+ }
+ return INVALID;
+}
+
+/*!
+******************************************************************************
+* \if Function name : codec_exit \endif
+*
+* \brief
+* handles unrecoverable errors. Prints error message to console and exits
+*
+*****************************************************************************
+*/
+void codec_exit(CHAR *pc_err_message)
+{
+ printf("%s\n", pc_err_message);
+ exit(-1);
+}
+
+/*!
+******************************************************************************
+* \if Function name : parse_argument \endif
+*
+* \brief
+* Parse input argument
+*
+*****************************************************************************
+*/
+IHEVCE_PLUGIN_STATUS_T parse_argument(appl_ctxt_t *ps_ctxt, CHAR *argument, CHAR *value)
+{
+ ihevce_static_cfg_params_t *ps_static_prms = &ps_ctxt->s_static_cfg_prms;
+ ARGUMENT_T arg = get_argument(argument);
+ WORD32 i4_value = 0;
+ UWORD8 au1_keywd_str[STR_LEN];
+ UWORD8 *pu1_keywd_str = au1_keywd_str;
+
+ switch(arg)
+ {
+ case HELP:
+ print_usage();
+ return IHEVCE_EFAIL;
+
+ case VERSION:
+ break;
+
+ case INPUT_YUV:
+ sscanf(value, "%s", ps_ctxt->au1_in_file);
+ assert(strlen((char *)ps_ctxt->au1_in_file) < STR_LEN);
+ break;
+
+ case OUTPUT:
+ sscanf(value, "%s", ps_ctxt->au1_out_file[0][0]);
+ assert(strlen((char *)ps_ctxt->au1_out_file[0][0]) < STR_LEN);
+ break;
+ case NUM_FRAMES_TO_ENCODE:
+ sscanf(value, "%d", &i4_value);
+ if(i4_value < 0)
+ ps_static_prms->s_config_prms.i4_num_frms_to_encode = INT32_MAX - 1;
+ else
+ ps_static_prms->s_config_prms.i4_num_frms_to_encode = i4_value;
+ break;
+
+ case LOG_DUMP_LEVEL:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->i4_log_dump_level = i4_value;
+ break;
+ case SRC_WIDTH:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_src_prms.i4_width = i4_value;
+ break;
+
+ case SRC_HEIGHT:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_src_prms.i4_height = i4_value;
+ break;
+
+ case SRC_FRAME_RATE_NUM:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_src_prms.i4_frm_rate_num = i4_value;
+ break;
+
+ case SRC_FRAME_RATE_DENOM:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_src_prms.i4_frm_rate_denom = i4_value;
+ break;
+ case INPUT_CHROMA_FORMAT:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_src_prms.inp_chr_format = (IV_COLOR_FORMAT_T)i4_value;
+ break;
+ case CODEC_LEVEL:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_tgt_lyr_prms.as_tgt_params[0].i4_codec_level = i4_value;
+ break;
+ case TGT_BITRATE:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_tgt_lyr_prms.as_tgt_params[0].ai4_tgt_bitrate[0] = i4_value;
+ break;
+
+ case FRAME_QP:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_tgt_lyr_prms.as_tgt_params[0].ai4_frame_qp[0] = i4_value;
+ break;
+ case MAX_CLOSED_GOP_PERIOD:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_coding_tools_prms.i4_max_closed_gop_period = i4_value;
+ break;
+
+ case MIN_CLOSED_GOP_PERIOD:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_coding_tools_prms.i4_min_closed_gop_period = i4_value;
+ break;
+
+ case MAX_CRA_OPEN_GOP_PERIOD:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_coding_tools_prms.i4_max_cra_open_gop_period = i4_value;
+ break;
+
+ case MAX_I_OPEN_GOP_PERIOD:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_coding_tools_prms.i4_max_i_open_gop_period = i4_value;
+ break;
+
+ case MAX_TEMPORAL_LAYERS:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_coding_tools_prms.i4_max_temporal_layers = i4_value;
+ break;
+
+ case QUALITY_PRESET:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_tgt_lyr_prms.as_tgt_params[0].i4_quality_preset =
+ (IHEVCE_QUALITY_CONFIG_T)i4_value;
+ break;
+
+ case DEBLOCKING_TYPE:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_coding_tools_prms.i4_deblocking_type = i4_value;
+ break;
+
+ case USE_DEFAULT_SC_MTX:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_coding_tools_prms.i4_use_default_sc_mtx = i4_value;
+ break;
+
+ case ENABLE_ENTROPY_SYNC:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_coding_tools_prms.i4_enable_entropy_sync = i4_value;
+ break;
+
+ case MAX_TR_TREE_DEPTH_I:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_config_prms.i4_max_tr_tree_depth_I = i4_value;
+ break;
+
+ case MAX_TR_TREE_DEPTH_NI:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_config_prms.i4_max_tr_tree_depth_nI = i4_value;
+ break;
+
+ case MAX_SEARCH_RANGE_HORZ:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_config_prms.i4_max_search_range_horz = i4_value;
+ break;
+
+ case MAX_SEARCH_RANGE_VERT:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_config_prms.i4_max_search_range_vert = i4_value;
+ break;
+ case ARCH_TYPE:
+ sscanf(value, "%d", &i4_value);
+ switch(i4_value)
+ {
+ case 0:
+ ps_static_prms->e_arch_type = ARCH_NA;
+ break;
+ case 4:
+ ps_static_prms->e_arch_type = ARCH_ARM_NONEON;
+ break;
+ default:
+ ps_static_prms->e_arch_type = ARCH_ARM_NONEON;
+ break;
+ }
+ break;
+
+ case NUM_CORES:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_multi_thrd_prms.i4_max_num_cores = i4_value;
+ if((i4_value > MAX_NUM_CORES) || (i4_value < 1))
+ {
+ printf("APLN ERROR >> Number of cores per CPU configured is "
+ "unsupported \n");
+ return IHEVCE_EFAIL;
+ }
+ break;
+ case RATE_CONTROL_MODE:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_config_prms.i4_rate_control_mode = i4_value;
+ break;
+ case CU_LEVEL_RC:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_config_prms.i4_cu_level_rc = i4_value;
+ break;
+ case MAX_FRAME_QP:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_config_prms.i4_max_frame_qp = i4_value;
+ break;
+
+ case MIN_FRAME_QP:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_config_prms.i4_min_frame_qp = i4_value;
+ break;
+
+ case RC_LOOK_AHEAD_PICS:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_lap_prms.i4_rc_look_ahead_pics = i4_value;
+ break;
+
+ case CODEC_TYPE:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_out_strm_prms.i4_codec_type = i4_value;
+ break;
+
+ case CODEC_PROFILE:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_out_strm_prms.i4_codec_profile = i4_value;
+ break;
+
+ case CODEC_TIER:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_out_strm_prms.i4_codec_tier = i4_value;
+ break;
+
+ case SPS_AT_CDR_ENABLE:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_out_strm_prms.i4_sps_at_cdr_enable = i4_value;
+ break;
+
+ case VUI_ENABLE:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_out_strm_prms.i4_vui_enable = i4_value;
+ break;
+
+ case SEI_ENABLE_FLAGS:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_out_strm_prms.i4_sei_enable_flag = i4_value;
+ break;
+
+ case SEI_PAYLOAD_ENABLE_FLAGS:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_out_strm_prms.i4_sei_payload_enable_flag = i4_value;
+ break;
+
+ case SEI_PAYLOAD_PATH:
+ sscanf(value, "%s", ps_ctxt->ai1_sei_payload_path);
+ assert(strlen((char *)ps_ctxt->ai1_sei_payload_path) < STR_LEN);
+ break;
+
+ case SEI_BUFFER_PERIOD_FLAGS:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_out_strm_prms.i4_sei_buffer_period_flags = i4_value;
+ break;
+
+ case SEI_PIC_TIMING_FLAGS:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_out_strm_prms.i4_sei_pic_timing_flags = i4_value;
+ break;
+
+ case SEI_RECOVERY_POINT_FLAGS:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_out_strm_prms.i4_sei_recovery_point_flags = i4_value;
+ break;
+
+ case SEI_HASH_FLAGS:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_out_strm_prms.i4_decoded_pic_hash_sei_flag = i4_value;
+ break;
+
+ case SEI_MASTERING_DISP_COLOUR_VOL_FLAGS:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_out_strm_prms.i4_sei_mastering_disp_colour_vol_flags = i4_value;
+ break;
+
+ case DISPLAY_PRIMARIES_X:
+ {
+ char *token;
+ char *str;
+ const char s[2] = ",";
+ WORD32 i;
+
+ if(0 == ps_static_prms->s_out_strm_prms.i4_sei_mastering_disp_colour_vol_flags)
+ {
+ break;
+ }
+ sscanf(value, "%s", pu1_keywd_str);
+
+ str = (char *)pu1_keywd_str;
+ token = strtok(str, s);
+
+ for(i = 0; i < 3; i++)
+ {
+ if(token != NULL)
+ {
+ sscanf(token, "%d", &i4_value);
+ ps_static_prms->s_out_strm_prms.au2_display_primaries_x[i] = i4_value;
+ token = strtok(NULL, s);
+ }
+ else if((token == NULL) && (i != 2))
+ {
+ printf("APLN ERROR >> Insufficient number of display_primary_x "
+ "values entered \n");
+ return IHEVCE_EFAIL;
+ }
+ }
+ }
+ break;
+
+ case DISPLAY_PRIMARIES_Y:
+ {
+ char *token;
+ char *str;
+ const char s[2] = ",";
+ WORD32 i;
+
+ if(0 == ps_static_prms->s_out_strm_prms.i4_sei_mastering_disp_colour_vol_flags)
+ {
+ break;
+ }
+ sscanf(value, "%s", pu1_keywd_str);
+
+ str = (char *)pu1_keywd_str;
+ token = strtok(str, s);
+
+ for(i = 0; i < 3; i++)
+ {
+ if(token != NULL)
+ {
+ sscanf(token, "%d", &i4_value);
+ ps_static_prms->s_out_strm_prms.au2_display_primaries_y[i] = i4_value;
+ token = strtok(NULL, s);
+ }
+ else if((token == NULL) && (i != 2))
+ {
+ printf("APLN ERROR >> Insufficient number of display_primary_x "
+ "values entered \n");
+ return IHEVCE_EFAIL;
+ }
+ }
+ }
+ break;
+
+ case WHITE_POINT_X:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_out_strm_prms.u2_white_point_x = i4_value;
+ break;
+
+ case WHITE_POINT_Y:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_out_strm_prms.u2_white_point_y = i4_value;
+ break;
+
+ case MAX_DISPLAY_MASTERING_LUMINANCE:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_out_strm_prms.u4_max_display_mastering_luminance = i4_value;
+ break;
+
+ case MIN_DISPLAY_MASTERING_LUMINANCE:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_out_strm_prms.u4_min_display_mastering_luminance = i4_value;
+ break;
+
+ case SEI_CLL_INFO_ENABLE:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_out_strm_prms.i4_sei_cll_enable = i4_value;
+ break;
+
+ case SEI_MAX_CLL:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_out_strm_prms.u2_sei_max_cll = i4_value;
+ break;
+
+ case SEI_AVG_CLL:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_out_strm_prms.u2_sei_avg_cll = i4_value;
+ break;
+
+ case TILES_ENABLED_FLAG:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_app_tile_params.i4_tiles_enabled_flag = i4_value;
+ break;
+ case SLICE_SEGMENT_MODE:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_slice_params.i4_slice_segment_mode = i4_value;
+ break;
+ case ASPECT_RATIO_INFO_PRESENT_FLAG:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_vui_sei_prms.u1_aspect_ratio_info_present_flag = i4_value;
+ break;
+
+ case ASPECT_RATIO_IDC:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_vui_sei_prms.au1_aspect_ratio_idc[0] = i4_value;
+ break;
+
+ case SAR_WIDTH:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_vui_sei_prms.au2_sar_width[0] = i4_value;
+ break;
+
+ case SAR_HEIGHT:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_vui_sei_prms.au2_sar_height[0] = i4_value;
+ break;
+
+ case OVERSCAN_INFO_PRESENT_FLAG:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_vui_sei_prms.u1_overscan_info_present_flag = i4_value;
+ break;
+
+ case OVERSCAN_APPROPRIATE_FLAG:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_vui_sei_prms.u1_overscan_appropriate_flag = i4_value;
+ break;
+
+ case VIDEO_SIGNAL_TYPE_PRESENT_FLAG:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_vui_sei_prms.u1_video_signal_type_present_flag = i4_value;
+ break;
+
+ case VIDEO_FORMAT:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_vui_sei_prms.u1_video_format = i4_value;
+ break;
+
+ case VIDEO_FULL_RANGE_FLAG:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_vui_sei_prms.u1_video_full_range_flag = i4_value;
+ break;
+
+ case COLOUR_DESCRIPTION_PRESENT_FLAG:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_vui_sei_prms.u1_colour_description_present_flag = i4_value;
+ break;
+
+ case COLOUR_PRIMARIES:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_vui_sei_prms.u1_colour_primaries = i4_value;
+ break;
+
+ case TRANSFER_CHARACTERISTICS:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_vui_sei_prms.u1_transfer_characteristics = i4_value;
+ break;
+
+ case MATRIX_COEFFICIENTS:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_vui_sei_prms.u1_matrix_coefficients = i4_value;
+ break;
+
+ case CHROMA_LOC_INFO_PRESENT_FLAG:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_vui_sei_prms.u1_chroma_loc_info_present_flag = i4_value;
+ break;
+
+ case CHROMA_SAMPLE_LOC_TYPE_TOP_FIELD:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_vui_sei_prms.u1_chroma_sample_loc_type_top_field = i4_value;
+ break;
+
+ case CHROMA_SAMPLE_LOC_TYPE_BOTTOM_FIELD:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_vui_sei_prms.u1_chroma_sample_loc_type_bottom_field = i4_value;
+ break;
+
+ case TIMING_INFO_PRESENT_FLAG:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_vui_sei_prms.u1_timing_info_present_flag = i4_value;
+ break;
+
+ case VUI_HRD_PARAMETERS_PRESENT_FLAG:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_vui_sei_prms.u1_vui_hrd_parameters_present_flag = i4_value;
+ break;
+
+ case NAL_HRD_PARAMETERS_PRESENT_FLAG:
+ sscanf(value, "%d", &i4_value);
+ ps_static_prms->s_vui_sei_prms.u1_nal_hrd_parameters_present_flag = i4_value;
+ break;
+
+ case INVALID:
+ default:
+ printf("APLN ERROR >> Argument %s is invalid, ignoring \n", argument);
+ break;
+ }
+
+ return IHEVCE_EOK;
+}
+
+/*!
+******************************************************************************
+* \if Function name : read_cfg_file \endif
+*
+* \brief
+* Parse config file
+*
+*****************************************************************************
+*/
+IHEVCE_PLUGIN_STATUS_T read_cfg_file(appl_ctxt_t *ps_ctxt, FILE *fp_cfg)
+{
+ while(1)
+ {
+ CHAR line[STR_LEN] = { '\0' };
+ CHAR argument[STR_LEN] = { '\0' };
+ CHAR value[STR_LEN];
+ CHAR description[STR_LEN];
+ IHEVCE_PLUGIN_STATUS_T status;
+
+ if(NULL == fgets(line, STR_LEN, fp_cfg))
+ return IHEVCE_EOK;
+
+ /* split string on whitespace */
+ sscanf(line, "%s %s %s", argument, value, description);
+ if(argument[0] == '\0' || argument[0] == '#')
+ continue;
+
+ status = parse_argument(ps_ctxt, argument, value);
+ if(status != IHEVCE_EOK)
+ {
+ return status;
+ }
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : libihevce_encode_init \endif
+*
+* \brief
+* Allocates the memory and calls encoder init
+*
+*****************************************************************************
+*/
+IHEVCE_PLUGIN_STATUS_T libihevce_encode_init(appl_ctxt_t *ps_ctxt)
+{
+ ihevce_static_cfg_params_t *params = &ps_ctxt->s_static_cfg_prms;
+ CHAR ac_error[STR_LEN];
+
+ /* call the function to initialise encoder*/
+ if(IHEVCE_EFAIL == ihevce_init(params, (void *)&ps_ctxt->ihevceHdl))
+ {
+ sprintf(ac_error, "Unable to initialise libihevce encoder\n");
+ return IHEVCE_EFAIL;
+ }
+
+ return IHEVCE_EOK;
+}
+
+/*!
+******************************************************************************
+* \if Function name : allocate_input \endif
+*
+* \brief
+* allocate input buffers
+*
+*****************************************************************************
+*/
+IHEVCE_PLUGIN_STATUS_T allocate_input(appl_ctxt_t *ps_ctxt, ihevce_inp_buf_t *inp_pic)
+{
+ ihevce_static_cfg_params_t *params = &ps_ctxt->s_static_cfg_prms;
+ WORD32 y_sz = params->s_src_prms.i4_width * params->s_src_prms.i4_height;
+ WORD32 uv_sz = y_sz >> 1;
+ WORD32 pic_size = y_sz + uv_sz;
+ UWORD8 *pu1_buf;
+
+#ifdef X86_MINGW
+ pu1_buf = (UWORD8 *)_aligned_malloc(pic_size, 64);
+#else
+ posix_memalign((void **)&pu1_buf, 64, pic_size);
+#endif
+ if(NULL == pu1_buf)
+ {
+ return (IHEVCE_EFAIL);
+ }
+ if(IV_YUV_420P == params->s_src_prms.inp_chr_format)
+ {
+ inp_pic->apv_inp_planes[0] = pu1_buf;
+ inp_pic->apv_inp_planes[1] = pu1_buf + y_sz;
+ inp_pic->apv_inp_planes[2] = pu1_buf + y_sz + (uv_sz >> 1);
+
+ inp_pic->ai4_inp_strd[0] = params->s_src_prms.i4_width;
+ inp_pic->ai4_inp_strd[1] = params->s_src_prms.i4_width >> 1;
+ inp_pic->ai4_inp_strd[2] = params->s_src_prms.i4_width >> 1;
+
+ inp_pic->ai4_inp_size[0] = y_sz;
+ inp_pic->ai4_inp_size[1] = (uv_sz >> 1);
+ inp_pic->ai4_inp_size[2] = (uv_sz >> 1);
+ }
+ else if(IV_YUV_420SP_UV == params->s_src_prms.inp_chr_format)
+ {
+ inp_pic->apv_inp_planes[0] = pu1_buf;
+ inp_pic->apv_inp_planes[1] = pu1_buf + y_sz;
+ inp_pic->apv_inp_planes[2] = NULL;
+
+ inp_pic->ai4_inp_strd[0] = params->s_src_prms.i4_width;
+ inp_pic->ai4_inp_strd[1] = params->s_src_prms.i4_width;
+ inp_pic->ai4_inp_strd[2] = 0;
+
+ inp_pic->ai4_inp_size[0] = y_sz;
+ inp_pic->ai4_inp_size[1] = uv_sz;
+ inp_pic->ai4_inp_size[2] = 0;
+ }
+
+ inp_pic->i4_curr_bitrate = params->s_tgt_lyr_prms.as_tgt_params[0].ai4_tgt_bitrate[0];
+ inp_pic->i4_curr_peak_bitrate = params->s_tgt_lyr_prms.as_tgt_params[0].ai4_peak_bitrate[0];
+ inp_pic->i4_curr_rate_factor = params->s_config_prms.i4_rate_factor;
+ inp_pic->u8_pts = 0;
+
+ return IHEVCE_EOK;
+}
+
+/*!
+******************************************************************************
+* \if Function name : read_input \endif
+*
+* \brief
+* read input from a file
+*
+*****************************************************************************
+*/
+IHEVCE_PLUGIN_STATUS_T read_input(appl_ctxt_t *ps_ctxt, FILE *fp, ihevce_inp_buf_t *inp_pic)
+{
+ ihevce_static_cfg_params_t *params = &ps_ctxt->s_static_cfg_prms;
+ WORD32 au4_wd[3] = { 0 };
+ WORD32 au4_ht[3] = { 0 };
+ WORD32 num_comp = 3;
+ WORD32 comp_idx;
+ WORD32 i;
+
+ if(IV_YUV_420P == params->s_src_prms.inp_chr_format)
+ {
+ au4_wd[0] = params->s_src_prms.i4_width;
+ au4_wd[1] = au4_wd[2] = params->s_src_prms.i4_width >> 1;
+ au4_ht[0] = params->s_src_prms.i4_height;
+ au4_ht[1] = au4_ht[2] = params->s_src_prms.i4_height >> 1;
+
+ num_comp = 3;
+ }
+ else if(IV_YUV_420SP_UV == params->s_src_prms.inp_chr_format)
+ {
+ au4_wd[0] = params->s_src_prms.i4_width;
+ au4_wd[1] = params->s_src_prms.i4_width;
+ au4_ht[0] = params->s_src_prms.i4_height;
+ au4_ht[1] = params->s_src_prms.i4_height >> 1;
+
+ num_comp = 2;
+ }
+
+ for(comp_idx = 0; comp_idx < num_comp; comp_idx++)
+ {
+ WORD32 wd = au4_wd[comp_idx];
+ WORD32 ht = au4_ht[comp_idx];
+ WORD32 strd = inp_pic->ai4_inp_strd[comp_idx];
+ UWORD8 *pu1_buf = inp_pic->apv_inp_planes[comp_idx];
+
+ for(i = 0; i < ht; i++)
+ {
+ WORD32 bytes = fread(pu1_buf, sizeof(UWORD8), wd, fp);
+ if(bytes != wd)
+ {
+ return (IHEVCE_EFAIL);
+ }
+ pu1_buf += strd;
+ }
+ }
+
+ return IHEVCE_EOK;
+}
+
+/*!
+******************************************************************************
+* \if Function name : write_output \endif
+*
+* \brief
+* Write bitstream buffers to a file
+*
+*****************************************************************************
+*/
+IHEVCE_PLUGIN_STATUS_T write_output(FILE *fp, ihevce_out_buf_t *out_pic)
+{
+ WORD32 bytes;
+
+ bytes = fwrite(out_pic->pu1_output_buf, sizeof(UWORD8), out_pic->i4_bytes_generated, fp);
+ if(bytes != out_pic->i4_bytes_generated)
+ return IHEVCE_EFAIL;
+
+ return IHEVCE_EOK;
+}
+
+/*!
+******************************************************************************
+* \if Function name : free_input \endif
+*
+* \brief
+* free input buffers
+*
+*****************************************************************************
+*/
+void free_input(ihevce_inp_buf_t *inp_pic)
+{
+ if(inp_pic->apv_inp_planes[0])
+ {
+#ifdef X86_MINGW
+ _aligned_free(inp_pic->apv_inp_planes[0]);
+#else
+ free(inp_pic->apv_inp_planes[0]);
+#endif
+ }
+}
+
+/*!
+******************************************************************************
+* \if Function name : libihevce_encode_close \endif
+*
+* \brief
+* Frees all the allocated resources and call encoder free
+*
+*****************************************************************************
+*/
+IHEVCE_PLUGIN_STATUS_T libihevce_encode_close(appl_ctxt_t *ps_ctxt)
+{
+ /* encoder close */
+ if(ps_ctxt->ihevceHdl)
+ ihevce_close(ps_ctxt->ihevceHdl);
+
+ return IHEVCE_EOK;
+}
+
+/*!
+******************************************************************************
+* \if Function name : libihevce_encode_frame \endif
+*
+* \brief
+* Calls encoder process and copied the output to pckt buffer
+*
+*****************************************************************************
+*/
+IHEVCE_PLUGIN_STATUS_T libihevce_encode_frame(appl_ctxt_t *ps_ctxt, FILE *pf_inp_yuv, FILE *pf_out)
+{
+ ihevce_static_cfg_params_t *params = &ps_ctxt->s_static_cfg_prms;
+ IHEVCE_PLUGIN_STATUS_T status = IHEVCE_EOK;
+ WORD32 i4_num_frames = 0;
+ ihevce_inp_buf_t inp_pic;
+ ihevce_out_buf_t out_pic;
+ CHAR ac_error[STR_LEN];
+ profile_database_t s_profile_data;
+#if HEADER_MODE
+ ihevce_out_buf_t out_pic_hdr;
+#endif
+
+ (void)s_profile_data;
+ memset(&inp_pic, 0, sizeof(inp_pic));
+ memset(&out_pic, 0, sizeof(out_pic));
+#if HEADER_MODE
+ memset(&out_pic_hdr, 0, sizeof(out_pic_hdr));
+#endif
+
+ status = allocate_input(ps_ctxt, &inp_pic);
+ if(status != IHEVCE_EOK)
+ {
+ sprintf(ac_error, "Unable to allocate input");
+ return IHEVCE_EFAIL;
+ }
+
+#if HEADER_MODE
+ status = ihevce_encode_header(ps_ctxt->ihevceHdl, &out_pic_hdr);
+ if(status != IHEVCE_EOK)
+ {
+ sprintf(ac_error, "encode header call failed");
+ return IHEVCE_EFAIL;
+ }
+ if(out_pic_hdr.i4_bytes_generated)
+ {
+ status = write_output(pf_out, &out_pic_hdr);
+ if(status != IHEVCE_EOK)
+ {
+ sprintf(ac_error, "Unable to write output");
+ return IHEVCE_EFAIL;
+ }
+ }
+#endif
+
+ PROFILE_INIT(&s_profile_data);
+
+ while(1)
+ {
+ ihevce_inp_buf_t *ps_inp_pic = &inp_pic;
+
+ if(i4_num_frames < params->s_config_prms.i4_num_frms_to_encode)
+ {
+ status = read_input(ps_ctxt, pf_inp_yuv, &inp_pic);
+ if(status != IHEVCE_EOK)
+ {
+ ps_inp_pic = NULL;
+ }
+ }
+ else
+ {
+ ps_inp_pic = NULL;
+ }
+#if DYN_BITRATE_TEST
+ if((i4_num_frames == 200) && (ps_inp_pic != NULL))
+ {
+ ps_inp_pic->i4_curr_bitrate = ps_inp_pic->i4_curr_bitrate << 1;
+ }
+#endif
+ /* call encoder process frame */
+ PROFILE_START(&s_profile_data);
+ status = ihevce_encode(ps_ctxt->ihevceHdl, ps_inp_pic, &out_pic);
+ PROFILE_STOP(&s_profile_data, NULL);
+ if(status != IHEVCE_EOK)
+ {
+ sprintf(ac_error, "Unable to process encode");
+ return IHEVCE_EFAIL;
+ }
+
+ if(out_pic.i4_bytes_generated)
+ {
+ status = write_output(pf_out, &out_pic);
+ if(status != IHEVCE_EOK)
+ {
+ sprintf(ac_error, "Unable to write output");
+ return IHEVCE_EFAIL;
+ }
+ }
+
+ if(out_pic.i4_end_flag)
+ break;
+
+ i4_num_frames++;
+ inp_pic.u8_pts +=
+ (1000000 * params->s_src_prms.i4_frm_rate_denom) / params->s_src_prms.i4_frm_rate_num;
+ }
+
+ PROFILE_END(&s_profile_data, "encode API call");
+
+ free_input(&inp_pic);
+
+ return IHEVCE_EOK;
+}
+
+/*!
+******************************************************************************
+* \if Function name : main \endif
+*
+* \brief
+* Application to demonstrate codec API. Shows how to use create,
+* process, control and delete
+*
+*****************************************************************************
+*/
+int main(int argc, char *argv[])
+{
+ /* Main context */
+ main_ctxt_t s_main_ctxt;
+
+ /* app ctxt */
+ appl_ctxt_t *ps_ctxt = &s_main_ctxt.s_app_ctxt;
+
+ /* cfg params */
+ ihevce_static_cfg_params_t *params = &ps_ctxt->s_static_cfg_prms;
+
+ /* error string */
+ CHAR ac_error[STR_LEN];
+
+ /* config file name */
+ CHAR ac_cfg_fname[STR_LEN];
+
+ WORD32 i;
+ FILE *fp_cfg = NULL;
+ FILE *pf_inp_yuv = NULL;
+ FILE *pf_out = NULL;
+
+ /* error status */
+ IHEVCE_PLUGIN_STATUS_T status = IHEVCE_EOK;
+
+ /* call the function to set default params */
+ if(IHEVCE_EFAIL == ihevce_set_def_params(params))
+ {
+ sprintf(ac_error, "Unable to set default parameters\n");
+ codec_exit(ac_error);
+ }
+
+ /* Usage */
+ if(argc < 2)
+ {
+ printf("Using enc.cfg as configuration file \n");
+ strcpy(ac_cfg_fname, "enc.cfg");
+ }
+ else if(argc == 2)
+ {
+ if(!strcmp(argv[1], "--help"))
+ {
+ print_usage();
+ exit(-1);
+ }
+ strcpy(ac_cfg_fname, argv[1]);
+ }
+
+ /*************************************************************************/
+ /* Parse arguments */
+ /*************************************************************************/
+ /* Read command line arguments */
+ if(argc > 2)
+ {
+ for(i = 1; i + 1 < argc; i += 2)
+ {
+ if(CONFIG == get_argument(argv[i]))
+ {
+ strcpy(ac_cfg_fname, argv[i + 1]);
+ if((fp_cfg = fopen(ac_cfg_fname, "r")) == NULL)
+ {
+ sprintf(ac_error, "Could not open Configuration file %s", ac_cfg_fname);
+ codec_exit(ac_error);
+ }
+ status = read_cfg_file(ps_ctxt, fp_cfg);
+ if(status != IHEVCE_EOK)
+ {
+ sprintf(ac_error, "Encountered error in cfg file");
+ codec_exit(ac_error);
+ }
+ fclose(fp_cfg);
+ }
+ else
+ {
+ status = parse_argument(ps_ctxt, argv[i], argv[i + 1]);
+ if(status != IHEVCE_EOK)
+ {
+ sprintf(ac_error, "Encountered error in cfg file");
+ codec_exit(ac_error);
+ }
+ }
+ }
+ }
+ else
+ {
+ if((fp_cfg = fopen(ac_cfg_fname, "r")) == NULL)
+ {
+ sprintf(ac_error, "Could not open Configuration file %s", ac_cfg_fname);
+ codec_exit(ac_error);
+ }
+ status = read_cfg_file(ps_ctxt, fp_cfg);
+ if(status != IHEVCE_EOK)
+ {
+ sprintf(ac_error, "Unable to set Configuration parameter");
+ codec_exit(ac_error);
+ }
+ fclose(fp_cfg);
+ }
+
+ pf_inp_yuv = fopen(ps_ctxt->au1_in_file, "rb");
+ printf("Input file %s \n", ps_ctxt->au1_in_file);
+ if(NULL == pf_inp_yuv)
+ {
+ sprintf(ac_error, "Could not open input file");
+ codec_exit(ac_error);
+ }
+
+ pf_out = fopen(ps_ctxt->au1_out_file[0][0], "wb");
+ printf("Output file %s \n", ps_ctxt->au1_out_file[0][0]);
+ if(NULL == pf_out)
+ {
+ sprintf(ac_error, "Could not open output file");
+ codec_exit(ac_error);
+ }
+
+ status = libihevce_encode_init(ps_ctxt);
+ if(status != IHEVCE_EOK)
+ {
+ sprintf(ac_error, "Unable to init encoder");
+ codec_exit(ac_error);
+ }
+
+ status = libihevce_encode_frame(ps_ctxt, pf_inp_yuv, pf_out);
+ if(status != IHEVCE_EOK)
+ {
+ sprintf(ac_error, "Unable to encode frame");
+ codec_exit(ac_error);
+ }
+
+ status = libihevce_encode_close(ps_ctxt);
+ if(status != IHEVCE_EOK)
+ {
+ sprintf(ac_error, "Unable to close encoder");
+ return IHEVCE_EFAIL;
+ }
+
+ if(NULL != pf_inp_yuv)
+ fclose(pf_inp_yuv);
+
+ if(NULL != pf_out)
+ fclose(pf_out);
+
+ return 0;
+}
diff --git a/test/encoder/vid_enc_cfg.txt b/test/encoder/vid_enc_cfg.txt
new file mode 100644
index 0000000..d0bad5f
--- /dev/null
+++ b/test/encoder/vid_enc_cfg.txt
@@ -0,0 +1,140 @@
+######################################################################################
+# ITTIAM HEVC ENCODER CONFIGURATION FILE (c) ITTIAM SYSTEMS #
+# The following rules / restrictions apply to this config file #
+# All the parameters are recognized using the keywords at the start of each line #
+# Value which is present after the = sign is considered as the valid value #
+# Every configuration parameter must be assigned a valid value. #
+# The encoder does not assign default values for any missing parameter #
+# The order of parameters can be changed #
+# Each entry must be present only once #
+######################################################################################
+
+######################################################################################
+# Component Name : HEVC Encoder on x86 #
+######################################################################################
+
+######################################################################################
+# File I/O Parameters #
+######################################################################################
+
+--input 720p_basketballdrive.yuv /* Input yuv file {mandatory} */
+--output out.265 /* Output bitstream file (mandatory) */
+--num_frames_to_encode -1
+--log_dump_level 1 /* 0- [No log/prints] 1- [Bits Generated, POC, Qp, Pic-type] 2- [1 + PSNR + Seq Summary] 3- [2 + SSIM + Frame Summary] */
+
+######################################################################################
+# Source Parameters #
+######################################################################################
+
+--src_width 1280 /* Input Source Width {mandatory}[320:4096] */
+--src_height 720 /* Input Source Height {mandatory}[128:2304] */
+--src_frame_rate_num 30000 /* Frame rate numerator {30000}[7500:120000] */
+--src_frame_rate_denom 1000 /* Frame rate denominator {1000}[1000,1001] */
+--input_chroma_format 1 /* 1- YUV_420P,11- YUV_420SP; {1, 11} */
+
+######################################################################################
+# Target Parameters (for all the layers of multi-resolution encoding) #
+######################################################################################
+
+--codec_level 156 /* Coded Level multiplied by 30 */
+--tgt_bitrate 4000000 /* Target bitrates in bps{5000000} */
+--frame_qp 32 /* Initial QP values {32} */
+
+######################################################################################
+# GOP structure Parameters #
+######################################################################################
+
+--max_closed_gop_period 0 /* Max IDR Pic distance- Closed GOP {0} */
+--min_closed_gop_period 0 /* Min IDR Pic distance- Closed GOP {0} */
+--max_cra_open_gop_period 60 /* Max CRA Pic distance- Open GOP {60} */
+--max_i_open_gop_period 60 /* Max I (non CRA, non IDR) Pic distance {0} */
+--max_temporal_layers 0 /* B pyramid layers {3}[0:3] */
+
+######################################################################################
+# Coding tools Parameters #
+######################################################################################
+
+--quality_preset 5 /* 0->P0(Best Quality), 2->P2, 3->P3, 4->P4, 5->P5, 6->P6(Best Speed) {5} */
+--deblocking_type 0 /* Debocking 0- enabled, 1- disabled {0} */
+--use_default_sc_mtx 0 /* 0- disabled, 1- enabled {0} */
+--enable_entropy_sync 0 /* Entropy sync 1- enabled, 0- disabled {0} */
+--max_tr_tree_depth_I 1 /* Max transform tree depth for intra {3}[1,2,3] */
+--max_tr_tree_depth_nI 3 /* Max transform tree depth for inter {3}[1,2,3,4] */
+--max_search_range_horz 512 /* Horizontal search range {512}[64:512] */
+--max_search_range_vert 256 /* Vertical search range {256}[32:256] */
+--archType 0 /* 0 => Automatic, 4 => No Neon */
+
+######################################################################################
+# Multi Core parameters #
+######################################################################################
+
+--num_cores 4 /* [1:4] */
+
+######################################################################################
+# Rate Control parameters #
+######################################################################################
+
+--rate_control_mode 2 /* 2- VBR 3- CQP, 5- CBR {2} */
+--cu_level_rc 1 /* CU QP Modulation 0-disable, 1-spatial qp modulation {1} */
+--max_frame_qp 51 /* Max frame Qp for I frame {51}[51]*/
+--min_frame_qp 1 /* Min frame Qp for I frame. {1}[1] */
+
+######################################################################################
+# Look Ahead Processing Parameters #
+######################################################################################
+
+--rc_look_ahead_pics 0 /* RC look ahead window {0}[0:120] */
+
+######################################################################################
+# Output stream Parameters #
+######################################################################################
+
+--codec_type 0 /* 0- HEVC {0} */
+--codec_profile 1 /* 1- Main */
+--codec_tier 0 /* 0- Main 1- High {1} */
+--sps_at_cdr_enable 0 /* 1- enable, 0- disable {1} */
+
+######################################################################################
+# SEI and VUI parameters #
+######################################################################################
+
+--sei_enable_flags 0 /* 1- enable, 0- disable {0} */
+--sei_buffer_period_flags 0 /* 1- enable, 0- disable {0} */
+--sei_pic_timing_flags 0 /* 1- enable, 0- disable {0} */
+--sei_recovery_point_flags 0 /* 1- enable, 0- disable {0} */
+--sei_hash_flags 0 /* 3- Checksum, 2- CRC, 0- disable {0} */
+--sei_mastering_disp_colour_vol_flags 0 /* 1: enable, 0: disable {0} */
+--display_primaries_x 0,0,0 /* X-Primaries: comma separated R,G,B values {}[0:50000] */
+--display_primaries_y 0,0,0 /* Y-Primaries: comma separated R,G,B values {}[0:50000] */
+--white_point_x 0 /* X White point value {}[0:50000] */
+--white_point_y 0 /* Y White point value {}[0:50000] */
+--max_display_mastering_luminance 1 /* Max mastering Luminance. In units of 0.0001 Candelas/sqmtr {} */
+--min_display_mastering_luminance 0 /* Min mastering Luminance. In units of 0.0001 Candelas/sqmtr {} */
+--sei_content_light_level_info 0 /* 0-disable,1-enable */
+--max_content_light_level 20 /* 16bit unsigned number indicating max pixel intensity*/
+--max_frame_average_light_level 10 /* 16bit unsigned number indicating max avg pixel intensity*/
+
+######################################################################################
+# VUI Parameters #
+######################################################################################
+
+--vui_enable 0 /* 1- enable, 0- disable {0} */
+--aspect_ratio_info_present_flag 0 /* Aspect Ratio 1-enable 0-diable {0} */
+--aspect_ratio_idc 3 /* Aspect Ration IDC {255}[0:255] */
+--sar_width 4 /* SAR Width {4}[0:65535] */
+--sar_height 3 /* SAR Height {3}[0:65535]*/
+--overscan_info_present_flag 0 /* Overscan Info. 1-enable 0-disable {0} */
+--overscan_appropriate_flag 1 /* Overscan Appropriate 1-enable 0-disable {0} */
+--video_signal_type_present_flag 0 /* Video Signal Type Present. 1-enable 0-diable {1} */
+--video_format 5 /* Video Format {5}[0:5] */
+--video_full_range_flag 0 /* Video Full Range. 1-enable 0-diable {1} */
+--colour_description_present_flag 0 /* Colour description.1-enable 0-diable {0} */
+--colour_primaries 2 /* Colour Primaries {2}[0:255] */
+--transfer_characteristics 2 /* Transfer Characteristic {2}[0:255] */
+--matrix_coefficients 2 /* Matrix Coefficients {2}[0:255] */
+--chroma_loc_info_present_flag 0 /* Presence of chroma_sample_loc_type_top_field and chroma_sample_loc_type_bottom_field.1-enable 0-diable {0} */
+--chroma_sample_loc_type_top_field 0 /* Location of Chroma samples for Top field.{0}[0,1] */
+--chroma_sample_loc_type_bottom_field 0 /* Location of Chroma samples for Bottom field..{0}[0,1] */
+--timing_info_present_flag 1 /* Timing info.1-enable 0-diable {0} */
+--vui_hrd_parameters_present_flag 1 /* HRD parameters.1-enable 0-diable {0} */
+--nal_hrd_parameters_present_flag 1 /* NAL HRD parameters.1-enable 0-diable {0} */